Relicense Sortix to the ISC license.
I hereby relicense all my work on Sortix under the ISC license as below.
All Sortix contributions by other people are already under this license,
are not substantial enough to be copyrightable, or have been removed.
All imported code from other projects is compatible with this license.
All GPL licensed code from other projects had previously been removed.
Copyright 2011-2016 Jonas 'Sortie' Termansen and contributors.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2016-03-02 17:38:16 -05:00
|
|
|
/*
|
2021-06-12 19:36:11 -04:00
|
|
|
* Copyright (c) 2014, 2015, 2018, 2021 Jonas 'Sortie' Termansen.
|
Relicense Sortix to the ISC license.
I hereby relicense all my work on Sortix under the ISC license as below.
All Sortix contributions by other people are already under this license,
are not substantial enough to be copyrightable, or have been removed.
All imported code from other projects is compatible with this license.
All GPL licensed code from other projects had previously been removed.
Copyright 2011-2016 Jonas 'Sortie' Termansen and contributors.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2016-03-02 17:38:16 -05:00
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*
|
|
|
|
* sort.c
|
|
|
|
* Sort, merge, or sequence check text files.
|
|
|
|
*/
|
2014-03-07 20:02:02 -05:00
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
#include <ctype.h>
|
2018-04-07 19:43:27 -04:00
|
|
|
#include <err.h>
|
2014-03-07 20:02:02 -05:00
|
|
|
#include <errno.h>
|
2021-06-12 19:36:11 -04:00
|
|
|
#include <inttypes.h>
|
|
|
|
#include <limits.h>
|
2014-03-07 20:02:02 -05:00
|
|
|
#include <locale.h>
|
2016-02-28 18:40:20 -05:00
|
|
|
#include <stdbool.h>
|
2014-03-07 20:02:02 -05:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
#define MODIFIER_BLANK (1 << 0)
|
|
|
|
#define MODIFIER_DICTIONARY (1 << 1)
|
|
|
|
#define MODIFIER_IGNORE_CASE (1 << 2)
|
|
|
|
#define MODIFIER_GENERAL_NUMERIC (1 << 3)
|
|
|
|
#define MODIFIER_HUMAN (1 << 4)
|
|
|
|
#define MODIFIER_IGNORE_NONPRINTING (1 << 5)
|
|
|
|
#define MODIFIER_MONTH (1 << 6)
|
|
|
|
#define MODIFIER_NUMERIC (1 << 7)
|
|
|
|
#define MODIFIER_REVERSE (1 << 8)
|
|
|
|
#define MODIFIER_RANDOM (1 << 9)
|
|
|
|
#define MODIFIER_VERSION (1 << 10)
|
|
|
|
#define MODIFIER_UNIQUE (1 << 11)
|
|
|
|
|
|
|
|
static char separator;
|
|
|
|
static bool separator_is_blank = true;
|
|
|
|
static int modifiers;
|
|
|
|
|
|
|
|
struct key
|
|
|
|
{
|
|
|
|
size_t start_field;
|
|
|
|
size_t first_character;
|
|
|
|
size_t end_field;
|
|
|
|
size_t last_character;
|
|
|
|
int start_modifiers;
|
|
|
|
int end_modifiers;
|
|
|
|
};
|
2014-03-07 20:02:02 -05:00
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static struct key* keys;
|
|
|
|
static size_t keys_count;
|
|
|
|
|
|
|
|
static size_t field_length(const char* string)
|
2018-04-08 13:33:17 -04:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
size_t length = 0;
|
|
|
|
if ( separator_is_blank )
|
|
|
|
{
|
|
|
|
while ( string[length] && isblank((unsigned char) string[length]) )
|
|
|
|
length++;
|
|
|
|
}
|
|
|
|
while ( string[length] &&
|
|
|
|
(separator_is_blank ?
|
|
|
|
!isblank((unsigned char) string[length]) :
|
|
|
|
string[length] != separator) )
|
|
|
|
length++;
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t separator_length(const char* string)
|
|
|
|
{
|
|
|
|
if ( separator_is_blank )
|
2018-04-08 13:33:17 -04:00
|
|
|
return 0;
|
2021-06-12 19:36:11 -04:00
|
|
|
return string[0] == separator ? 1 : 0;
|
2018-04-08 13:33:17 -04:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int strdoublecmp(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
double ad = strtod(a, NULL);
|
|
|
|
double bd = strtod(b, NULL);
|
|
|
|
return ad < bd ? -1 : ad > bd ? 1 : 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int month_index(const char* string)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
const char* months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
|
|
|
|
for ( int i = 0; i < 12; i++ )
|
|
|
|
if ( !strncasecmp(string, months[i], 3) )
|
|
|
|
return i + 1;
|
|
|
|
return 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int strmonthcmp(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
int am = month_index(a);
|
|
|
|
int bm = month_index(b);
|
|
|
|
return am < bm ? -1 : am > bm ? 1 : 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int strnumcmp(const char* a, const char* b, bool human)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
while ( isspace((unsigned char) *a) )
|
|
|
|
a++;
|
|
|
|
while ( isspace((unsigned char) *b) )
|
|
|
|
b++;
|
|
|
|
bool a_negative = *a == '-';
|
|
|
|
if ( a_negative )
|
|
|
|
a++;
|
|
|
|
bool b_negative = *b == '-';
|
|
|
|
if ( b_negative )
|
|
|
|
b++;
|
|
|
|
while ( *a == '0' )
|
|
|
|
a++;
|
|
|
|
while ( *b == '0' )
|
|
|
|
b++;
|
|
|
|
size_t a_length = 0;
|
|
|
|
size_t b_length = 0;
|
|
|
|
while ( isdigit((unsigned char) a[a_length]) )
|
|
|
|
a_length++;
|
|
|
|
while ( isdigit((unsigned char) b[b_length]) )
|
|
|
|
b_length++;
|
|
|
|
size_t a_fractionals = 0;
|
|
|
|
bool a_nonzero = false;
|
|
|
|
if ( a[a_length] == '.' )
|
|
|
|
{
|
|
|
|
a_fractionals = 1;
|
|
|
|
while ( isdigit((unsigned char) a[a_length + a_fractionals]) )
|
|
|
|
{
|
|
|
|
if ( a[a_length + a_fractionals] != '0' )
|
|
|
|
a_nonzero = true;
|
|
|
|
a_fractionals++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
size_t b_fractionals = 0;
|
|
|
|
bool b_nonzero = false;
|
|
|
|
if ( b[b_length] == '.' )
|
|
|
|
{
|
|
|
|
b_fractionals = 1;
|
|
|
|
while ( isdigit((unsigned char) b[b_length + b_fractionals]) )
|
|
|
|
{
|
|
|
|
if ( b[b_length + b_fractionals] != '0' )
|
|
|
|
b_nonzero = true;
|
|
|
|
b_fractionals++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// All zero representations are equal regardless of sign.
|
|
|
|
if ( !human && !a_length && !a_nonzero && !b_length && !b_nonzero )
|
|
|
|
return 0;
|
|
|
|
if ( a_negative && !b_negative )
|
|
|
|
return -1;
|
|
|
|
if ( !a_negative && b_negative )
|
|
|
|
return 1;
|
|
|
|
int bigger = a_negative ? -1 : 1;
|
|
|
|
if ( human )
|
|
|
|
{
|
|
|
|
if ( !(a_length || a_fractionals) && (b_length || b_fractionals) )
|
|
|
|
return -bigger;
|
|
|
|
if ( (a_length || a_fractionals) && !(b_length || b_fractionals) )
|
|
|
|
return bigger;
|
|
|
|
const char* magnitudes = "YZEPTGMK";
|
|
|
|
unsigned char a_unit = (unsigned char) a[a_length + a_fractionals];
|
|
|
|
unsigned char b_unit = (unsigned char) b[b_length + b_fractionals];
|
|
|
|
if ( a_unit == 'k' )
|
|
|
|
a_unit = 'K';
|
|
|
|
if ( b_unit == 'k' )
|
|
|
|
b_unit = 'K';
|
|
|
|
const char* a_magnitude = strchrnul(magnitudes, a_unit);
|
|
|
|
const char* b_magnitude = strchrnul(magnitudes, b_unit);
|
|
|
|
if ( a_magnitude > b_magnitude )
|
|
|
|
return -bigger;
|
|
|
|
if ( a_magnitude < b_magnitude )
|
|
|
|
return bigger;
|
|
|
|
}
|
|
|
|
if ( a_length < b_length )
|
|
|
|
return -bigger;
|
|
|
|
if ( a_length > b_length )
|
|
|
|
return bigger;
|
|
|
|
for ( size_t i = 0; i < a_length; i++ )
|
|
|
|
{
|
|
|
|
if ( a[i] < b[i] )
|
|
|
|
return -bigger;
|
|
|
|
if ( a[i] > b[i] )
|
|
|
|
return bigger;
|
|
|
|
}
|
|
|
|
for ( size_t i = 1; i < a_fractionals || i < b_fractionals; i++ )
|
|
|
|
{
|
|
|
|
char a_digit = i < a_fractionals ? a[a_length + i] : '0';
|
|
|
|
char b_digit = i < b_fractionals ? b[b_length + i] : '0';
|
|
|
|
if ( a_digit < b_digit )
|
|
|
|
return -bigger;
|
|
|
|
if ( a_digit > b_digit )
|
|
|
|
return bigger;
|
|
|
|
}
|
|
|
|
return 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int is_dictionary(int c)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
return isblank(c) ||
|
|
|
|
('a' <= c && c <= 'z') ||
|
|
|
|
('A' <= c && c <= 'Z') ||
|
|
|
|
('0' <= c && c <= '9');
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int string_compare(const char* a,
|
|
|
|
const char* b,
|
|
|
|
bool dictionary,
|
|
|
|
bool uppercase,
|
|
|
|
bool printable)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
size_t ai = 0;
|
|
|
|
size_t bi = 0;
|
|
|
|
while ( true )
|
|
|
|
{
|
|
|
|
unsigned char ac = (unsigned char) a[ai];
|
|
|
|
unsigned char bc = (unsigned char) b[bi];
|
|
|
|
if ( !ac && !bc )
|
|
|
|
break;
|
|
|
|
if ( uppercase )
|
|
|
|
{
|
|
|
|
ac = toupper(ac);
|
|
|
|
bc = toupper(bc);
|
|
|
|
}
|
|
|
|
if ( ac && ((dictionary && !is_dictionary(ac)) ||
|
|
|
|
(printable && !isprint(ac))) )
|
|
|
|
{
|
|
|
|
ai++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( bc && ((dictionary && !is_dictionary(bc)) ||
|
|
|
|
(printable && !isprint(bc))) )
|
|
|
|
{
|
|
|
|
bi++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( ac < bc )
|
|
|
|
return -1;
|
|
|
|
if ( ac > bc )
|
|
|
|
return 1;
|
|
|
|
if ( ac )
|
|
|
|
ai++;
|
|
|
|
if ( bc )
|
|
|
|
bi++;
|
|
|
|
}
|
|
|
|
return 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int relate_field(char* a, char* b, int field_modifiers)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
int rel = 0;
|
|
|
|
if ( field_modifiers & MODIFIER_HUMAN )
|
|
|
|
rel = strnumcmp(a, b, true);
|
|
|
|
else if ( field_modifiers & MODIFIER_GENERAL_NUMERIC )
|
|
|
|
rel = strdoublecmp(a, b);
|
|
|
|
else if ( field_modifiers & MODIFIER_MONTH )
|
|
|
|
rel = strmonthcmp(a, b);
|
|
|
|
else if ( field_modifiers & MODIFIER_NUMERIC )
|
|
|
|
rel = strnumcmp(a, b, false);
|
|
|
|
else if ( field_modifiers & MODIFIER_VERSION )
|
|
|
|
rel = strverscmp(a, b);
|
|
|
|
else if ( field_modifiers & (MODIFIER_DICTIONARY |
|
|
|
|
MODIFIER_IGNORE_CASE |
|
|
|
|
MODIFIER_IGNORE_NONPRINTING) )
|
|
|
|
rel = string_compare(a, b, field_modifiers & MODIFIER_DICTIONARY,
|
|
|
|
field_modifiers & MODIFIER_IGNORE_CASE,
|
|
|
|
field_modifiers & MODIFIER_IGNORE_NONPRINTING);
|
|
|
|
else
|
|
|
|
rel = strcoll(a, b);
|
|
|
|
if ( field_modifiers & MODIFIER_REVERSE )
|
|
|
|
rel = rel < 0 ? 1 : 0 < rel ? -1 : 0;
|
|
|
|
return rel;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int relate(char* a, char* b, int modifiers)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
size_t a_len = strlen(a);
|
|
|
|
size_t b_len = strlen(b);
|
|
|
|
bool was_trivial = true;
|
|
|
|
for ( size_t key_index = 0; key_index < keys_count; key_index++ )
|
|
|
|
{
|
|
|
|
const struct key* key = &keys[key_index];
|
|
|
|
size_t a_start = a_len;
|
|
|
|
size_t b_start = b_len;
|
|
|
|
size_t a_end = a_len;
|
|
|
|
size_t b_end = b_len;
|
|
|
|
int start_modifiers = key->start_modifiers | key->end_modifiers ?
|
|
|
|
key->start_modifiers : modifiers;
|
|
|
|
int end_modifiers = key->start_modifiers | key->end_modifiers ?
|
|
|
|
key->end_modifiers : modifiers;
|
|
|
|
for ( size_t field = 0, a_offset = 0, b_offset = 0;
|
|
|
|
(field <= key->start_field || field <= key->end_field) &&
|
|
|
|
(a_offset < a_len || b_offset < b_len);
|
|
|
|
field++ )
|
|
|
|
{
|
|
|
|
size_t a_field = a_offset;
|
|
|
|
size_t b_field = b_offset;
|
|
|
|
size_t a_field_length = field_length(a + a_field);
|
|
|
|
size_t b_field_length = field_length(b + b_field);
|
|
|
|
a_offset += a_field_length;
|
|
|
|
b_offset += b_field_length;
|
|
|
|
a_offset += separator_length(a + a_offset);
|
|
|
|
b_offset += separator_length(b + b_offset);
|
|
|
|
if ( field == key->start_field )
|
|
|
|
{
|
|
|
|
size_t a_start_field = a_field;
|
|
|
|
size_t a_start_field_length = a_field_length;
|
|
|
|
size_t b_start_field = b_field;
|
|
|
|
size_t b_start_field_length = b_field_length;
|
|
|
|
if ( start_modifiers & MODIFIER_BLANK )
|
|
|
|
{
|
|
|
|
while ( a_start_field_length &&
|
|
|
|
isblank((unsigned char) a[a_start_field]) )
|
|
|
|
{
|
|
|
|
a_start_field++;
|
|
|
|
a_start_field_length--;
|
|
|
|
}
|
|
|
|
while ( b_start_field_length &&
|
|
|
|
isblank((unsigned char) b[b_start_field]) )
|
|
|
|
{
|
|
|
|
b_start_field++;
|
|
|
|
b_start_field_length--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
size_t a_left = a_len - a_start_field;
|
|
|
|
size_t b_left = b_len - b_start_field;
|
|
|
|
size_t a_first = key->first_character <= a_left ?
|
|
|
|
key->first_character : a_left;
|
|
|
|
size_t b_first = key->first_character <= b_left ?
|
|
|
|
key->first_character : b_left;
|
|
|
|
a_start = a_start_field + a_first;
|
|
|
|
b_start = b_start_field + b_first;
|
|
|
|
}
|
|
|
|
if ( field == key->end_field )
|
|
|
|
{
|
|
|
|
size_t a_end_field = a_field;
|
|
|
|
size_t a_end_field_length = a_field_length;
|
|
|
|
size_t b_end_field = b_field;
|
|
|
|
size_t b_end_field_length = b_field_length;
|
|
|
|
if ( end_modifiers & MODIFIER_BLANK )
|
|
|
|
{
|
|
|
|
while ( a_end_field_length &&
|
|
|
|
isblank((unsigned char) a[a_end_field]) )
|
|
|
|
{
|
|
|
|
a_end_field++;
|
|
|
|
a_end_field_length--;
|
|
|
|
}
|
|
|
|
while ( b_end_field_length &&
|
|
|
|
isblank((unsigned char) b[b_end_field]) )
|
|
|
|
{
|
|
|
|
b_end_field++;
|
|
|
|
b_end_field_length--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
size_t a_last = key->last_character <= a_end_field_length ?
|
|
|
|
key->last_character : a_end_field_length;
|
|
|
|
size_t b_last = key->last_character <= b_end_field_length ?
|
|
|
|
key->last_character : b_end_field_length;
|
|
|
|
a_end = a_end_field + a_last + 1;
|
|
|
|
b_end = b_end_field + b_last + 1;
|
|
|
|
if ( a_len < a_end )
|
|
|
|
a_end = a_len;
|
|
|
|
if ( b_len < b_end )
|
|
|
|
b_end = b_len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( a_end < a_start )
|
|
|
|
a_start = a_end;
|
|
|
|
if ( b_end < b_start )
|
|
|
|
b_start = b_end;
|
|
|
|
char a_separator = a[a_end];
|
|
|
|
char b_separator = b[b_end];
|
|
|
|
a[a_end] = '\0';
|
|
|
|
b[b_end] = '\0';
|
|
|
|
int key_modifiers = start_modifiers | end_modifiers;
|
|
|
|
if ( a_start != 0 || b_start != 0 || a_end != a_len || b_end != b_len ||
|
|
|
|
(key_modifiers & ~MODIFIER_RANDOM) != 0 )
|
|
|
|
was_trivial = false;
|
|
|
|
int rel = relate_field(a + a_start, b + b_start, key_modifiers);
|
|
|
|
a[a_end] = a_separator;
|
|
|
|
b[b_end] = b_separator;
|
|
|
|
if ( rel != 0 )
|
|
|
|
return rel;
|
|
|
|
}
|
|
|
|
if ( was_trivial || (modifiers & MODIFIER_UNIQUE) )
|
|
|
|
return 0;
|
|
|
|
return strcoll(a, b);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int compare(char* a, char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
int rel = relate(a, b, modifiers & ~MODIFIER_REVERSE);
|
|
|
|
if ( modifiers & MODIFIER_REVERSE )
|
|
|
|
rel = rel < 0 ? 1 : 0 < rel ? -1 : 0;
|
|
|
|
return rel;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static int indirect_compare(const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
char* a = *(char* const*) a_ptr;
|
|
|
|
char* b = *(char* const*) b_ptr;
|
|
|
|
return compare(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t pick_uniform(size_t upper)
|
|
|
|
{
|
|
|
|
if ( upper < 2 )
|
|
|
|
return 0;
|
|
|
|
size_t minimum = -upper % upper;
|
|
|
|
size_t selection;
|
|
|
|
do arc4random_buf(&selection, sizeof(selection));
|
|
|
|
while ( selection < minimum );
|
|
|
|
return selection % upper;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
struct input_stream
|
|
|
|
{
|
|
|
|
const char* const* files;
|
|
|
|
size_t files_current;
|
|
|
|
size_t files_length;
|
|
|
|
FILE* current_file;
|
|
|
|
const char* last_file_path;
|
|
|
|
uintmax_t last_line_number;
|
|
|
|
};
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char* read_line(FILE* fp, const char* fpname, int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
char* line = NULL;
|
|
|
|
size_t line_size = 0;
|
|
|
|
ssize_t amount = getdelim(&line, &line_size, delim, fp);
|
|
|
|
if ( amount < 0 )
|
|
|
|
{
|
2015-05-11 09:49:54 -04:00
|
|
|
free(line);
|
2014-03-07 20:02:02 -05:00
|
|
|
if ( ferror(fp) )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "read: %s", fpname);
|
2014-03-07 20:02:02 -05:00
|
|
|
return NULL;
|
|
|
|
}
|
2016-05-15 10:32:38 -04:00
|
|
|
if ( (unsigned char) line[amount-1] == (unsigned char) delim )
|
2014-03-07 20:02:02 -05:00
|
|
|
line[amount-1] = '\0';
|
|
|
|
return line;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char* read_input_stream_line(struct input_stream* is, int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( !is->files_length )
|
|
|
|
{
|
|
|
|
char* result = read_line(stdin, "<stdin>", delim);
|
|
|
|
is->last_file_path = "-";
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( result )
|
|
|
|
is->last_line_number++;
|
2014-03-07 20:02:02 -05:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
while ( is->files_current < is->files_length )
|
|
|
|
{
|
|
|
|
const char* path = is->files[is->files_current];
|
|
|
|
if ( !is->current_file )
|
|
|
|
{
|
|
|
|
is->last_line_number = 0;
|
|
|
|
if ( !strcmp(path, "-") )
|
|
|
|
is->current_file = stdin;
|
|
|
|
else if ( !(is->current_file = fopen(path, "r")) )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "%s", path);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
char* result = read_line(is->current_file, path, delim);
|
|
|
|
if ( !result )
|
|
|
|
{
|
|
|
|
if ( is->current_file != stdin )
|
|
|
|
fclose(is->current_file);
|
|
|
|
is->current_file = NULL;
|
|
|
|
is->files_current++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
is->last_file_path = path;
|
|
|
|
is->last_line_number++;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char** read_input_stream_lines(size_t* result_num_lines,
|
|
|
|
struct input_stream* is,
|
|
|
|
int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
char** lines = NULL;
|
|
|
|
size_t lines_used = 0;
|
|
|
|
size_t lines_length = 0;
|
|
|
|
|
2016-02-28 18:40:20 -05:00
|
|
|
char* line;
|
|
|
|
while ( (line = read_input_stream_line(is, delim)) )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( lines_used == lines_length )
|
|
|
|
{
|
2018-04-07 19:43:27 -04:00
|
|
|
size_t old_lines_length = lines_length ? lines_length : 64;
|
|
|
|
char** new_lines = (char**) reallocarray(lines, old_lines_length,
|
|
|
|
2 * sizeof(char*));
|
2014-03-07 20:02:02 -05:00
|
|
|
if ( !new_lines )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "malloc");
|
2014-03-07 20:02:02 -05:00
|
|
|
lines = new_lines;
|
2018-04-07 19:43:27 -04:00
|
|
|
lines_length = 2 * old_lines_length;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
lines[lines_used++] = line;
|
|
|
|
}
|
|
|
|
|
|
|
|
return *result_num_lines = lines_used, lines;
|
|
|
|
}
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
static size_t parse_modifiers(const char* keystring, int* modifiers)
|
|
|
|
{
|
|
|
|
size_t offset = 0;
|
|
|
|
while ( keystring[offset] )
|
|
|
|
{
|
|
|
|
switch ( keystring[offset] )
|
|
|
|
{
|
|
|
|
case 'b': *modifiers |= MODIFIER_BLANK; break;
|
|
|
|
case 'd': *modifiers |= MODIFIER_DICTIONARY; break;
|
|
|
|
case 'f': *modifiers |= MODIFIER_IGNORE_CASE; break;
|
|
|
|
case 'g': *modifiers |= MODIFIER_GENERAL_NUMERIC; break;
|
|
|
|
case 'h': *modifiers |= MODIFIER_HUMAN; break;
|
|
|
|
case 'i': *modifiers |= MODIFIER_IGNORE_NONPRINTING; break;
|
|
|
|
case 'M': *modifiers |= MODIFIER_MONTH; break;
|
|
|
|
case 'n': *modifiers |= MODIFIER_NUMERIC; break;
|
|
|
|
case 'R': *modifiers |= MODIFIER_RANDOM; break;
|
|
|
|
case 'r': *modifiers |= MODIFIER_REVERSE; break;
|
|
|
|
case 'V': *modifiers |= MODIFIER_VERSION; break;
|
|
|
|
default: return offset;
|
|
|
|
}
|
|
|
|
offset++;
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check_modifiers(int modifiers)
|
|
|
|
{
|
|
|
|
modifiers &= ~(MODIFIER_BLANK | MODIFIER_UNIQUE | MODIFIER_REVERSE);
|
|
|
|
bool incompatible = false;
|
|
|
|
if ( modifiers & (MODIFIER_DICTIONARY |
|
|
|
|
MODIFIER_IGNORE_CASE |
|
|
|
|
MODIFIER_IGNORE_NONPRINTING) )
|
|
|
|
{
|
|
|
|
if ( modifiers & ~(MODIFIER_DICTIONARY |
|
|
|
|
MODIFIER_IGNORE_CASE |
|
|
|
|
MODIFIER_IGNORE_NONPRINTING) )
|
|
|
|
incompatible = true;
|
|
|
|
}
|
|
|
|
else if ( modifiers & MODIFIER_NUMERIC )
|
|
|
|
{
|
|
|
|
if ( modifiers & ~(MODIFIER_NUMERIC | MODIFIER_IGNORE_CASE) )
|
|
|
|
incompatible = true;
|
|
|
|
}
|
|
|
|
else if ( modifiers && 1 << (ffs(modifiers) - 1) != modifiers )
|
|
|
|
incompatible = true;
|
|
|
|
if ( incompatible )
|
|
|
|
{
|
|
|
|
char options[sizeof(int) * CHAR_BIT];
|
|
|
|
size_t offset = 0;
|
|
|
|
if ( modifiers & MODIFIER_DICTIONARY ) options[offset++] = 'd';
|
|
|
|
if ( modifiers & MODIFIER_IGNORE_CASE ) options[offset++] = 'f';
|
|
|
|
if ( modifiers & MODIFIER_GENERAL_NUMERIC ) options[offset++] = 'g';
|
|
|
|
if ( modifiers & MODIFIER_HUMAN ) options[offset++] = 'h';
|
|
|
|
if ( modifiers & MODIFIER_IGNORE_NONPRINTING ) options[offset++] = 'i';
|
|
|
|
if ( modifiers & MODIFIER_MONTH ) options[offset++] = 'M';
|
|
|
|
if ( modifiers & MODIFIER_NUMERIC ) options[offset++] = 'n';
|
|
|
|
if ( modifiers & MODIFIER_RANDOM ) options[offset++] = 'R';
|
|
|
|
if ( modifiers & MODIFIER_VERSION ) options[offset++] = 'V';
|
|
|
|
options[offset] = '\0';
|
|
|
|
errx(2, "options '-%s' are incompatible", options);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_key(const char* keystring)
|
|
|
|
{
|
|
|
|
|
|
|
|
const char* str = keystring;
|
|
|
|
if ( !(keys = reallocarray(keys, sizeof(struct key), ++keys_count)) )
|
|
|
|
err(2, "malloc");
|
|
|
|
struct key* key = &keys[keys_count - 1];
|
|
|
|
memset(key, 0, sizeof(*key));
|
|
|
|
for ( size_t i = 0; str[i]; i++ )
|
|
|
|
{
|
|
|
|
if ( isblank((unsigned char) str[i]) ||
|
|
|
|
str[i] == '-' || str[i] == '+' )
|
|
|
|
errx(2, "invalid key: %s", keystring);
|
|
|
|
}
|
|
|
|
char* end;
|
|
|
|
uintmax_t value;
|
|
|
|
errno = 0;
|
|
|
|
value = strtoumax(str, &end, 10);
|
|
|
|
if ( end == str || errno || value != (size_t) value )
|
|
|
|
errx(2, "invalid key starting field: %s", keystring);
|
|
|
|
str += end - str;
|
|
|
|
if ( value == 0 )
|
|
|
|
errx(2, "invalid key with zero field: %s", keystring);
|
|
|
|
key->start_field = value - 1;
|
|
|
|
if ( *str == '.' )
|
|
|
|
{
|
|
|
|
str++;
|
|
|
|
errno = 0;
|
|
|
|
value = strtoumax(str, &end, 10);
|
|
|
|
if ( end == str || errno || value != (size_t) value )
|
|
|
|
errx(2, "invalid key starting character offset: %s", keystring);
|
|
|
|
str += end - str;
|
|
|
|
if ( value == 0 )
|
|
|
|
errx(2, "invalid key with zero character offset: %s", keystring);
|
|
|
|
key->first_character = value - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
key->first_character = 0;
|
|
|
|
str += parse_modifiers(str, &key->start_modifiers);
|
|
|
|
if ( !*str )
|
|
|
|
{
|
|
|
|
key->end_field = SIZE_MAX - 1;
|
|
|
|
key->last_character = SIZE_MAX;
|
|
|
|
key->end_modifiers = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if ( *str != ',' )
|
|
|
|
errx(2, "invalid key modifier '%c': %s", *str, keystring);
|
|
|
|
str++;
|
|
|
|
errno = 0;
|
|
|
|
value = strtoumax(str, &end, 10);
|
|
|
|
if ( end == str || errno || value != (size_t) value )
|
|
|
|
errx(2, "invalid key ending field: %s", keystring);
|
|
|
|
key->end_field = value - 1;
|
|
|
|
if ( value == 0 )
|
|
|
|
errx(2, "invalid key with zero field: %s", keystring);
|
|
|
|
str += end - str;
|
|
|
|
if ( *str == '.' )
|
|
|
|
{
|
|
|
|
str++;
|
|
|
|
errno = 0;
|
|
|
|
value = strtoumax(str, &end, 10);
|
|
|
|
if ( end == str || errno || value != (size_t) value )
|
|
|
|
errx(2, "invalid key ending character offset: %s", keystring);
|
|
|
|
str += end - str;
|
|
|
|
if ( value == 0 )
|
|
|
|
key->last_character = SIZE_MAX;
|
|
|
|
else
|
|
|
|
key->last_character = value - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
key->last_character = SIZE_MAX;
|
|
|
|
str += parse_modifiers(str, &key->end_modifiers);
|
|
|
|
if ( *str )
|
|
|
|
errx(2, "invalid key modifier '%c': %s", *str, keystring);
|
|
|
|
check_modifiers(key->start_modifiers | key->end_modifiers);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_separator(const char* parameter)
|
|
|
|
{
|
|
|
|
if ( strlen(parameter) != 1 )
|
|
|
|
errx(2, "invalid separator: %s", parameter);
|
|
|
|
separator = parameter[0];
|
|
|
|
separator_is_blank = false;
|
|
|
|
}
|
|
|
|
|
2014-03-07 20:02:02 -05:00
|
|
|
static void compact_arguments(int* argc, char*** argv)
|
|
|
|
{
|
|
|
|
for ( int i = 0; i < *argc; i++ )
|
|
|
|
{
|
|
|
|
while ( i < *argc && !(*argv)[i] )
|
|
|
|
{
|
|
|
|
for ( int n = i; n < *argc; n++ )
|
|
|
|
(*argv)[n] = (*argv)[n+1];
|
|
|
|
(*argc)--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
setlocale(LC_ALL, "");
|
|
|
|
|
|
|
|
bool check = false;
|
|
|
|
bool check_quiet = false;
|
|
|
|
bool merge = false;
|
|
|
|
const char* output = NULL;
|
2021-06-12 19:36:11 -04:00
|
|
|
const char* parameter = NULL;
|
2014-03-07 20:02:02 -05:00
|
|
|
bool unique = false;
|
|
|
|
bool zero_terminated = false;
|
|
|
|
|
|
|
|
for ( int i = 1; i < argc; i++ )
|
|
|
|
{
|
|
|
|
const char* arg = argv[i];
|
|
|
|
if ( arg[0] != '-' || !arg[1] )
|
|
|
|
continue;
|
|
|
|
argv[i] = NULL;
|
|
|
|
if ( !strcmp(arg, "--") )
|
|
|
|
break;
|
|
|
|
if ( arg[1] != '-' )
|
|
|
|
{
|
2016-02-28 18:40:20 -05:00
|
|
|
char c;
|
|
|
|
while ( (c = *++arg) ) switch ( c )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
case 'b': modifiers |= MODIFIER_BLANK; break;
|
2018-04-07 19:43:27 -04:00
|
|
|
case 'C': check = true, check_quiet = true; break;
|
|
|
|
case 'c': check = true, check_quiet = false; break;
|
2021-06-12 19:36:11 -04:00
|
|
|
case 'd': modifiers |= MODIFIER_DICTIONARY; break;
|
|
|
|
case 'f': modifiers |= MODIFIER_IGNORE_CASE; break;
|
|
|
|
case 'g': modifiers |= MODIFIER_GENERAL_NUMERIC; break;
|
|
|
|
case 'i': modifiers |= MODIFIER_IGNORE_NONPRINTING; break;
|
|
|
|
case 'k':
|
|
|
|
if ( !*(parameter = arg + 1) )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
|
|
|
errx(2, "parameter requires an argument -- 'k'");
|
|
|
|
parameter = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
parse_key(parameter);
|
|
|
|
arg = "k";
|
|
|
|
break;
|
|
|
|
case 'h': modifiers |= MODIFIER_HUMAN; break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'm': merge = true; break;
|
2021-06-12 19:36:11 -04:00
|
|
|
case 'M': modifiers |= MODIFIER_MONTH; break;
|
|
|
|
case 'n': modifiers |= MODIFIER_NUMERIC; break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'o':
|
|
|
|
if ( !*(output = arg + 1) )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "option requires an argument -- 'o'");
|
2014-03-07 20:02:02 -05:00
|
|
|
output = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
arg = "o";
|
|
|
|
break;
|
2021-06-12 19:36:11 -04:00
|
|
|
case 'R': modifiers |= MODIFIER_RANDOM; break;
|
|
|
|
case 'r': modifiers |= MODIFIER_REVERSE; break;
|
|
|
|
case 't':
|
|
|
|
if ( !*(parameter = arg + 1) )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
|
|
|
errx(2, "parameter requires an argument -- 't'");
|
|
|
|
parameter = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
parse_separator(parameter);
|
|
|
|
arg = "t";
|
|
|
|
break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'u': unique = true; break;
|
2021-06-12 19:36:11 -04:00
|
|
|
case 'V': modifiers |= MODIFIER_VERSION; break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'z': zero_terminated = true; break;
|
|
|
|
default:
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "unknown option -- '%c'", c);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
}
|
2021-06-12 19:36:11 -04:00
|
|
|
else if ( !strcmp(arg, "--ignore-leading-blanks") )
|
|
|
|
modifiers |= MODIFIER_BLANK;
|
|
|
|
else if ( !strcmp(arg, "--dictionary-order") )
|
|
|
|
modifiers |= MODIFIER_DICTIONARY;
|
|
|
|
else if ( !strcmp(arg, "--ignore-case") )
|
|
|
|
modifiers |= MODIFIER_IGNORE_CASE;
|
|
|
|
else if ( !strcmp(arg, "--general-numeric-sort") )
|
|
|
|
modifiers |= MODIFIER_GENERAL_NUMERIC;
|
|
|
|
else if ( !strcmp(arg, "--ignore-nonprinting") )
|
|
|
|
modifiers |= MODIFIER_IGNORE_NONPRINTING;
|
|
|
|
else if ( !strcmp(arg, "--human-numeric-sort") )
|
|
|
|
modifiers |= MODIFIER_HUMAN;
|
|
|
|
else if ( !strcmp(arg, "--month-sort") )
|
|
|
|
modifiers |= MODIFIER_MONTH;
|
|
|
|
else if ( !strncmp(arg, "--key=", strlen("--key=")) )
|
|
|
|
parse_key(arg + strlen("--key="));
|
|
|
|
else if ( !strcmp(arg, "--key") )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
|
|
|
errx(2, "option '--key' requires an argument");
|
|
|
|
parse_key(argv[i+1]);
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
else if ( !strncmp(arg, "--field-separator=", strlen("--field-separator=")) )
|
|
|
|
parse_separator(arg + strlen("--field-separator="));
|
|
|
|
else if ( !strcmp(arg, "--field-separator") )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
|
|
|
errx(2, "option '--field-separator' requires an argument");
|
|
|
|
parse_separator(argv[i+1]);
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
else if ( !strcmp(arg, "--sort") ||
|
|
|
|
!strncmp(arg, "--sort=", strlen("--sort=")) )
|
|
|
|
{
|
|
|
|
if ( !strncmp(arg, "--sort=", strlen("--sort=")) )
|
|
|
|
parameter = arg + strlen("--sort=");
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
|
|
|
errx(2, "option '--sort' requires an argument");
|
|
|
|
parameter = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
if ( !strcmp(parameter, "general-numeric") )
|
|
|
|
modifiers |= MODIFIER_GENERAL_NUMERIC;
|
|
|
|
else if ( !strcmp(parameter, "human-numeric") )
|
|
|
|
modifiers |= MODIFIER_HUMAN;
|
|
|
|
else if ( !strcmp(parameter, "month") )
|
|
|
|
modifiers |= MODIFIER_MONTH;
|
|
|
|
else if ( !strcmp(parameter, "numeric") )
|
|
|
|
modifiers |= MODIFIER_NUMERIC;
|
|
|
|
else if ( !strcmp(parameter, "random") )
|
|
|
|
modifiers |= MODIFIER_RANDOM;
|
|
|
|
else if ( !strcmp(parameter, "version") )
|
|
|
|
modifiers |= MODIFIER_VERSION;
|
|
|
|
else
|
|
|
|
errx(1, "invalid sort: %s", parameter);
|
|
|
|
}
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strcmp(arg, "--check") ||
|
|
|
|
!strcmp(arg, "--check=diagnose-first") )
|
|
|
|
check = true, check_quiet = false;
|
|
|
|
else if ( !strcmp(arg, "--check=quiet") ||
|
|
|
|
!strcmp(arg, "--check=silent") )
|
|
|
|
check = true, check_quiet = true;
|
|
|
|
else if ( !strcmp(arg, "--merge") )
|
|
|
|
merge = true;
|
2021-06-12 19:36:11 -04:00
|
|
|
else if ( !strcmp(arg, "--numeric-sort") )
|
|
|
|
modifiers |= MODIFIER_NUMERIC;
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strncmp(arg, "--output=", strlen("--output=")) )
|
|
|
|
output = arg + strlen("--output=");
|
|
|
|
else if ( !strcmp(arg, "--output") )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "option '--output' requires an argument");
|
2014-03-07 20:02:02 -05:00
|
|
|
output = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
2018-04-08 13:33:17 -04:00
|
|
|
else if ( !strcmp(arg, "--random-sort") )
|
2021-06-12 19:36:11 -04:00
|
|
|
modifiers |= MODIFIER_RANDOM;
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strcmp(arg, "--reverse") )
|
2021-06-12 19:36:11 -04:00
|
|
|
modifiers |= MODIFIER_REVERSE;
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strcmp(arg, "--unique") )
|
|
|
|
unique = true;
|
|
|
|
else if ( !strcmp(arg, "--version-sort") )
|
2021-06-12 19:36:11 -04:00
|
|
|
modifiers |= MODIFIER_VERSION;
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strcmp(arg, "--zero-terminated") )
|
|
|
|
zero_terminated = true;
|
|
|
|
else
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "unknown option: %s", arg);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
compact_arguments(&argc, &argv);
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( check_quiet && output )
|
2021-06-12 19:36:11 -04:00
|
|
|
errx(2, "the -C and -o options are incompatible");
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( check && output )
|
2021-06-12 19:36:11 -04:00
|
|
|
errx(2, "the -c and -o options are incompatible");
|
|
|
|
if ( check_quiet && (modifiers & MODIFIER_RANDOM) )
|
|
|
|
errx(2, "the -C and -R options are incompatible");
|
|
|
|
if ( check && (modifiers & MODIFIER_RANDOM) )
|
|
|
|
errx(2, "the -c and -R options are incompatible");
|
|
|
|
|
|
|
|
check_modifiers(modifiers);
|
2014-03-07 20:02:02 -05:00
|
|
|
|
|
|
|
int delim = zero_terminated ? '\0' : '\n';
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
if ( !keys_count )
|
|
|
|
{
|
|
|
|
keys = malloc(sizeof(struct key));
|
|
|
|
if ( !keys )
|
|
|
|
err(2, "malloc");
|
|
|
|
keys->start_field = 0;
|
|
|
|
keys->first_character = 0;
|
|
|
|
keys->end_field = SIZE_MAX - 1;
|
|
|
|
keys->last_character = SIZE_MAX;
|
|
|
|
keys->start_modifiers = 0;
|
|
|
|
keys->end_modifiers = 0;
|
|
|
|
keys_count = 1;
|
|
|
|
}
|
2014-03-07 20:02:02 -05:00
|
|
|
|
|
|
|
struct input_stream is;
|
|
|
|
memset(&is, 0, sizeof(is));
|
2016-02-28 18:40:20 -05:00
|
|
|
is.files = (const char* const*) (argv + 1);
|
2014-03-07 20:02:02 -05:00
|
|
|
is.files_current = 0;
|
|
|
|
is.files_length = argc - 1;
|
|
|
|
|
|
|
|
if ( check )
|
|
|
|
{
|
2021-06-12 19:36:11 -04:00
|
|
|
if ( unique )
|
|
|
|
modifiers |= MODIFIER_UNIQUE;
|
|
|
|
|
2014-03-07 20:02:02 -05:00
|
|
|
int needed_relation = unique ? 1 : 0;
|
|
|
|
char* prev_line = NULL;
|
2016-02-28 18:40:20 -05:00
|
|
|
char* line;
|
|
|
|
while ( (line = read_input_stream_line(&is, delim)) )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( prev_line && compare(line, prev_line) < needed_relation )
|
|
|
|
{
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( check_quiet )
|
|
|
|
return 1;
|
|
|
|
errx(1, "%s:%ju: disorder: %s", is.last_file_path,
|
|
|
|
is.last_line_number, line);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
free(prev_line);
|
|
|
|
prev_line = line;
|
|
|
|
}
|
|
|
|
free(prev_line);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
(void) merge;
|
|
|
|
|
|
|
|
size_t lines_used = 0;
|
|
|
|
char** lines = read_input_stream_lines(&lines_used, &is, delim);
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
if ( !(modifiers & MODIFIER_RANDOM) || unique )
|
|
|
|
qsort(lines, lines_used, sizeof(*lines), indirect_compare);
|
2018-04-08 13:33:17 -04:00
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
if ( (modifiers & MODIFIER_RANDOM) )
|
2018-04-08 13:33:17 -04:00
|
|
|
{
|
|
|
|
if ( unique )
|
|
|
|
{
|
|
|
|
size_t o = 0;
|
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
if ( o && compare(lines[i], lines[o - 1]) == 0 )
|
|
|
|
continue;
|
|
|
|
lines[o++] = lines[i];
|
|
|
|
}
|
|
|
|
lines_used = o;
|
|
|
|
}
|
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
size_t left = lines_used - i;
|
|
|
|
size_t choice = i + pick_uniform(left);
|
|
|
|
if ( choice != i )
|
|
|
|
{
|
|
|
|
char* tmp = lines[i];
|
|
|
|
lines[i] = lines[choice];
|
|
|
|
lines[choice] = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-03-07 20:02:02 -05:00
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( output && !freopen(output, "w", stdout) )
|
|
|
|
err(2, "%s", output);
|
|
|
|
|
2021-06-12 19:36:11 -04:00
|
|
|
if ( unique )
|
|
|
|
modifiers |= MODIFIER_UNIQUE;
|
|
|
|
|
2014-03-07 20:02:02 -05:00
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
if ( unique && i && compare(lines[i-1], lines[i]) == 0 )
|
|
|
|
continue;
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( fputs(lines[i], stdout) == EOF || fputc(delim, stdout) == EOF )
|
|
|
|
err(2, "%s", output ? output : "<stdout>");
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( fflush(stdout) == EOF )
|
|
|
|
err(2, "%s", output ? output : "<stdout>");
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
return 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|