Relicense Sortix to the ISC license.
I hereby relicense all my work on Sortix under the ISC license as below.
All Sortix contributions by other people are already under this license,
are not substantial enough to be copyrightable, or have been removed.
All imported code from other projects is compatible with this license.
All GPL licensed code from other projects had previously been removed.
Copyright 2011-2016 Jonas 'Sortie' Termansen and contributors.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2016-03-02 17:38:16 -05:00
|
|
|
/*
|
2018-04-07 19:43:27 -04:00
|
|
|
* Copyright (c) 2014, 2015, 2018 Jonas 'Sortie' Termansen.
|
Relicense Sortix to the ISC license.
I hereby relicense all my work on Sortix under the ISC license as below.
All Sortix contributions by other people are already under this license,
are not substantial enough to be copyrightable, or have been removed.
All imported code from other projects is compatible with this license.
All GPL licensed code from other projects had previously been removed.
Copyright 2011-2016 Jonas 'Sortie' Termansen and contributors.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2016-03-02 17:38:16 -05:00
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*
|
|
|
|
* sort.c
|
|
|
|
* Sort, merge, or sequence check text files.
|
|
|
|
*/
|
2014-03-07 20:02:02 -05:00
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
#include <err.h>
|
2014-03-07 20:02:02 -05:00
|
|
|
#include <errno.h>
|
|
|
|
#include <locale.h>
|
2016-02-28 18:40:20 -05:00
|
|
|
#include <stdbool.h>
|
2014-03-07 20:02:02 -05:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
// TODO: Implement all the features mandated by POSIX.
|
|
|
|
// TODO: Implement the useful GNU extensions.
|
|
|
|
|
2018-04-08 13:33:17 -04:00
|
|
|
static size_t pick_uniform(size_t upper)
|
|
|
|
{
|
|
|
|
if ( upper < 2 )
|
|
|
|
return 0;
|
|
|
|
size_t minimum = -upper % upper;
|
|
|
|
size_t selection;
|
|
|
|
do arc4random_buf(&selection, sizeof(selection));
|
|
|
|
while ( selection < minimum );
|
|
|
|
return selection % upper;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int flip_comparison(int rel)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return rel < 0 ? 1 : 0 < rel ? -1 : 0;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int indirect_compare(int (*compare)(const char*, const char*),
|
|
|
|
const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
const char* a = *(const char* const*) a_ptr;
|
|
|
|
const char* b = *(const char* const*) b_ptr;
|
|
|
|
return compare(a, b);
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int compare_line(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return strcoll(a, b);
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int indirect_compare_line(const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return indirect_compare(compare_line, a_ptr, b_ptr);
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int compare_line_reverse(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return flip_comparison(compare_line(a, b));
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2015-05-17 12:52:04 -04:00
|
|
|
return indirect_compare(compare_line_reverse, a_ptr, b_ptr);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int compare_version(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return strverscmp(a, b);
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int indirect_compare_version(const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return indirect_compare(compare_version, a_ptr, b_ptr);
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int compare_version_reverse(const char* a, const char* b)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
return flip_comparison(compare_version(a, b));
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2015-05-17 12:52:04 -04:00
|
|
|
return indirect_compare(compare_version_reverse, a_ptr, b_ptr);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
struct input_stream
|
|
|
|
{
|
|
|
|
const char* const* files;
|
|
|
|
size_t files_current;
|
|
|
|
size_t files_length;
|
|
|
|
FILE* current_file;
|
|
|
|
const char* last_file_path;
|
|
|
|
uintmax_t last_line_number;
|
|
|
|
};
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char* read_line(FILE* fp, const char* fpname, int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
char* line = NULL;
|
|
|
|
size_t line_size = 0;
|
|
|
|
ssize_t amount = getdelim(&line, &line_size, delim, fp);
|
|
|
|
if ( amount < 0 )
|
|
|
|
{
|
2015-05-11 09:49:54 -04:00
|
|
|
free(line);
|
2014-03-07 20:02:02 -05:00
|
|
|
if ( ferror(fp) )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "read: %s", fpname);
|
2014-03-07 20:02:02 -05:00
|
|
|
return NULL;
|
|
|
|
}
|
2016-05-15 10:32:38 -04:00
|
|
|
if ( (unsigned char) line[amount-1] == (unsigned char) delim )
|
2014-03-07 20:02:02 -05:00
|
|
|
line[amount-1] = '\0';
|
|
|
|
return line;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char* read_input_stream_line(struct input_stream* is, int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( !is->files_length )
|
|
|
|
{
|
|
|
|
char* result = read_line(stdin, "<stdin>", delim);
|
|
|
|
is->last_file_path = "-";
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( result )
|
|
|
|
is->last_line_number++;
|
2014-03-07 20:02:02 -05:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
while ( is->files_current < is->files_length )
|
|
|
|
{
|
|
|
|
const char* path = is->files[is->files_current];
|
|
|
|
if ( !is->current_file )
|
|
|
|
{
|
|
|
|
is->last_line_number = 0;
|
|
|
|
if ( !strcmp(path, "-") )
|
|
|
|
is->current_file = stdin;
|
|
|
|
else if ( !(is->current_file = fopen(path, "r")) )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "%s", path);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
char* result = read_line(is->current_file, path, delim);
|
|
|
|
if ( !result )
|
|
|
|
{
|
|
|
|
if ( is->current_file != stdin )
|
|
|
|
fclose(is->current_file);
|
|
|
|
is->current_file = NULL;
|
|
|
|
is->files_current++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
is->last_file_path = path;
|
|
|
|
is->last_line_number++;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
static char** read_input_stream_lines(size_t* result_num_lines,
|
|
|
|
struct input_stream* is,
|
|
|
|
int delim)
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
char** lines = NULL;
|
|
|
|
size_t lines_used = 0;
|
|
|
|
size_t lines_length = 0;
|
|
|
|
|
2016-02-28 18:40:20 -05:00
|
|
|
char* line;
|
|
|
|
while ( (line = read_input_stream_line(is, delim)) )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( lines_used == lines_length )
|
|
|
|
{
|
2018-04-07 19:43:27 -04:00
|
|
|
size_t old_lines_length = lines_length ? lines_length : 64;
|
|
|
|
char** new_lines = (char**) reallocarray(lines, old_lines_length,
|
|
|
|
2 * sizeof(char*));
|
2014-03-07 20:02:02 -05:00
|
|
|
if ( !new_lines )
|
2018-04-07 19:43:27 -04:00
|
|
|
err(2, "malloc");
|
2014-03-07 20:02:02 -05:00
|
|
|
lines = new_lines;
|
2018-04-07 19:43:27 -04:00
|
|
|
lines_length = 2 * old_lines_length;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
lines[lines_used++] = line;
|
|
|
|
}
|
|
|
|
|
|
|
|
return *result_num_lines = lines_used, lines;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void compact_arguments(int* argc, char*** argv)
|
|
|
|
{
|
|
|
|
for ( int i = 0; i < *argc; i++ )
|
|
|
|
{
|
|
|
|
while ( i < *argc && !(*argv)[i] )
|
|
|
|
{
|
|
|
|
for ( int n = i; n < *argc; n++ )
|
|
|
|
(*argv)[n] = (*argv)[n+1];
|
|
|
|
(*argc)--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
setlocale(LC_ALL, "");
|
|
|
|
|
|
|
|
bool check = false;
|
|
|
|
bool check_quiet = false;
|
|
|
|
bool merge = false;
|
|
|
|
const char* output = NULL;
|
2018-04-08 13:33:17 -04:00
|
|
|
bool random = false;
|
2014-03-07 20:02:02 -05:00
|
|
|
bool reverse = false;
|
|
|
|
bool unique = false;
|
|
|
|
bool version_sort = false;
|
|
|
|
bool zero_terminated = false;
|
|
|
|
|
|
|
|
for ( int i = 1; i < argc; i++ )
|
|
|
|
{
|
|
|
|
const char* arg = argv[i];
|
|
|
|
if ( arg[0] != '-' || !arg[1] )
|
|
|
|
continue;
|
|
|
|
argv[i] = NULL;
|
|
|
|
if ( !strcmp(arg, "--") )
|
|
|
|
break;
|
|
|
|
if ( arg[1] != '-' )
|
|
|
|
{
|
2016-02-28 18:40:20 -05:00
|
|
|
char c;
|
|
|
|
while ( (c = *++arg) ) switch ( c )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
2018-04-07 19:43:27 -04:00
|
|
|
case 'C': check = true, check_quiet = true; break;
|
|
|
|
case 'c': check = true, check_quiet = false; break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'm': merge = true; break;
|
|
|
|
case 'o':
|
|
|
|
if ( !*(output = arg + 1) )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "option requires an argument -- 'o'");
|
2014-03-07 20:02:02 -05:00
|
|
|
output = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
|
|
|
arg = "o";
|
|
|
|
break;
|
2018-04-08 13:33:17 -04:00
|
|
|
case 'R': random = true; break;
|
2014-03-07 20:02:02 -05:00
|
|
|
case 'r': reverse = true; break;
|
|
|
|
case 'u': unique = true; break;
|
|
|
|
case 'V': version_sort = true; break;
|
|
|
|
case 'z': zero_terminated = true; break;
|
|
|
|
default:
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "unknown option -- '%c'", c);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if ( !strcmp(arg, "--check") ||
|
|
|
|
!strcmp(arg, "--check=diagnose-first") )
|
|
|
|
check = true, check_quiet = false;
|
|
|
|
else if ( !strcmp(arg, "--check=quiet") ||
|
|
|
|
!strcmp(arg, "--check=silent") )
|
|
|
|
check = true, check_quiet = true;
|
|
|
|
else if ( !strcmp(arg, "--merge") )
|
|
|
|
merge = true;
|
|
|
|
else if ( !strncmp(arg, "--output=", strlen("--output=")) )
|
|
|
|
output = arg + strlen("--output=");
|
|
|
|
else if ( !strcmp(arg, "--output") )
|
|
|
|
{
|
|
|
|
if ( i + 1 == argc )
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "option '--output' requires an argument");
|
2014-03-07 20:02:02 -05:00
|
|
|
output = argv[i+1];
|
|
|
|
argv[++i] = NULL;
|
|
|
|
}
|
2018-04-08 13:33:17 -04:00
|
|
|
else if ( !strcmp(arg, "--random-sort") )
|
|
|
|
random = true;
|
2014-03-07 20:02:02 -05:00
|
|
|
else if ( !strcmp(arg, "--reverse") )
|
|
|
|
reverse = true;
|
|
|
|
else if ( !strcmp(arg, "--unique") )
|
|
|
|
unique = true;
|
|
|
|
else if ( !strcmp(arg, "--version-sort") )
|
|
|
|
version_sort = true;
|
|
|
|
else if ( !strcmp(arg, "--zero-terminated") )
|
|
|
|
zero_terminated = true;
|
|
|
|
else
|
2018-04-07 19:43:27 -04:00
|
|
|
errx(2, "unknown option: %s", arg);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
compact_arguments(&argc, &argv);
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( check_quiet && output )
|
|
|
|
errx(1, "the -C and -o options are incompatible");
|
|
|
|
if ( check && output )
|
|
|
|
errx(1, "the -c and -o options are incompatible");
|
2018-04-08 13:33:17 -04:00
|
|
|
if ( check_quiet && random )
|
|
|
|
errx(1, "the -C and -R options are incompatible");
|
|
|
|
if ( check && random )
|
|
|
|
errx(1, "the -c and -R options are incompatible");
|
2014-03-07 20:02:02 -05:00
|
|
|
|
|
|
|
int delim = zero_terminated ? '\0' : '\n';
|
|
|
|
|
|
|
|
int (*compare)(const char*, const char*);
|
|
|
|
int (*qsort_compare)(const void*, const void*);
|
|
|
|
|
|
|
|
if ( version_sort && reverse )
|
|
|
|
compare = compare_version_reverse,
|
|
|
|
qsort_compare = indirect_compare_version_reverse;
|
|
|
|
else if ( version_sort )
|
|
|
|
compare = compare_version,
|
|
|
|
qsort_compare = indirect_compare_version;
|
|
|
|
else if ( reverse )
|
|
|
|
compare = compare_line_reverse,
|
|
|
|
qsort_compare = indirect_compare_line_reverse;
|
|
|
|
else
|
|
|
|
compare = compare_line,
|
|
|
|
qsort_compare = indirect_compare_line;
|
|
|
|
|
|
|
|
struct input_stream is;
|
|
|
|
memset(&is, 0, sizeof(is));
|
2016-02-28 18:40:20 -05:00
|
|
|
is.files = (const char* const*) (argv + 1);
|
2014-03-07 20:02:02 -05:00
|
|
|
is.files_current = 0;
|
|
|
|
is.files_length = argc - 1;
|
|
|
|
|
|
|
|
if ( check )
|
|
|
|
{
|
|
|
|
int needed_relation = unique ? 1 : 0;
|
|
|
|
char* prev_line = NULL;
|
2016-02-28 18:40:20 -05:00
|
|
|
char* line;
|
|
|
|
while ( (line = read_input_stream_line(&is, delim)) )
|
2014-03-07 20:02:02 -05:00
|
|
|
{
|
|
|
|
if ( prev_line && compare(line, prev_line) < needed_relation )
|
|
|
|
{
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( check_quiet )
|
|
|
|
return 1;
|
|
|
|
errx(1, "%s:%ju: disorder: %s", is.last_file_path,
|
|
|
|
is.last_line_number, line);
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
free(prev_line);
|
|
|
|
prev_line = line;
|
|
|
|
}
|
|
|
|
free(prev_line);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
(void) merge;
|
|
|
|
|
|
|
|
size_t lines_used = 0;
|
|
|
|
char** lines = read_input_stream_lines(&lines_used, &is, delim);
|
|
|
|
|
2018-04-08 13:33:17 -04:00
|
|
|
if ( !random || unique )
|
|
|
|
qsort(lines, lines_used, sizeof(*lines), qsort_compare);
|
|
|
|
|
|
|
|
if ( random )
|
|
|
|
{
|
|
|
|
if ( unique )
|
|
|
|
{
|
|
|
|
size_t o = 0;
|
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
if ( o && compare(lines[i], lines[o - 1]) == 0 )
|
|
|
|
continue;
|
|
|
|
lines[o++] = lines[i];
|
|
|
|
}
|
|
|
|
lines_used = o;
|
|
|
|
}
|
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
size_t left = lines_used - i;
|
|
|
|
size_t choice = i + pick_uniform(left);
|
|
|
|
if ( choice != i )
|
|
|
|
{
|
|
|
|
char* tmp = lines[i];
|
|
|
|
lines[i] = lines[choice];
|
|
|
|
lines[choice] = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-03-07 20:02:02 -05:00
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( output && !freopen(output, "w", stdout) )
|
|
|
|
err(2, "%s", output);
|
|
|
|
|
2014-03-07 20:02:02 -05:00
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
|
|
{
|
|
|
|
if ( unique && i && compare(lines[i-1], lines[i]) == 0 )
|
|
|
|
continue;
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( fputs(lines[i], stdout) == EOF || fputc(delim, stdout) == EOF )
|
|
|
|
err(2, "%s", output ? output : "<stdout>");
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
2018-04-07 19:43:27 -04:00
|
|
|
if ( fflush(stdout) == EOF )
|
|
|
|
err(2, "%s", output ? output : "<stdout>");
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|
|
|
|
|
2018-04-07 19:43:27 -04:00
|
|
|
return 0;
|
2014-03-07 20:02:02 -05:00
|
|
|
}
|