mirror of
https://gitlab.com/sortix/sortix.git
synced 2023-02-13 20:55:38 -05:00
385 lines
9.5 KiB
C
385 lines
9.5 KiB
C
/*
|
|
* Copyright (c) 2014, 2015, 2018 Jonas 'Sortie' Termansen.
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*
|
|
* sort.c
|
|
* Sort, merge, or sequence check text files.
|
|
*/
|
|
|
|
#include <err.h>
|
|
#include <errno.h>
|
|
#include <locale.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
// TODO: Implement all the features mandated by POSIX.
|
|
// TODO: Implement the useful GNU extensions.
|
|
|
|
static size_t pick_uniform(size_t upper)
|
|
{
|
|
if ( upper < 2 )
|
|
return 0;
|
|
size_t minimum = -upper % upper;
|
|
size_t selection;
|
|
do arc4random_buf(&selection, sizeof(selection));
|
|
while ( selection < minimum );
|
|
return selection % upper;
|
|
}
|
|
|
|
static int flip_comparison(int rel)
|
|
{
|
|
return rel < 0 ? 1 : 0 < rel ? -1 : 0;
|
|
}
|
|
|
|
static int indirect_compare(int (*compare)(const char*, const char*),
|
|
const void* a_ptr, const void* b_ptr)
|
|
{
|
|
const char* a = *(const char* const*) a_ptr;
|
|
const char* b = *(const char* const*) b_ptr;
|
|
return compare(a, b);
|
|
}
|
|
|
|
static int compare_line(const char* a, const char* b)
|
|
{
|
|
return strcoll(a, b);
|
|
}
|
|
|
|
static int indirect_compare_line(const void* a_ptr, const void* b_ptr)
|
|
{
|
|
return indirect_compare(compare_line, a_ptr, b_ptr);
|
|
}
|
|
|
|
static int compare_line_reverse(const char* a, const char* b)
|
|
{
|
|
return flip_comparison(compare_line(a, b));
|
|
}
|
|
|
|
static int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr)
|
|
{
|
|
return indirect_compare(compare_line_reverse, a_ptr, b_ptr);
|
|
}
|
|
|
|
static int compare_version(const char* a, const char* b)
|
|
{
|
|
return strverscmp(a, b);
|
|
}
|
|
|
|
static int indirect_compare_version(const void* a_ptr, const void* b_ptr)
|
|
{
|
|
return indirect_compare(compare_version, a_ptr, b_ptr);
|
|
}
|
|
|
|
static int compare_version_reverse(const char* a, const char* b)
|
|
{
|
|
return flip_comparison(compare_version(a, b));
|
|
}
|
|
|
|
static int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr)
|
|
{
|
|
return indirect_compare(compare_version_reverse, a_ptr, b_ptr);
|
|
}
|
|
|
|
struct input_stream
|
|
{
|
|
const char* const* files;
|
|
size_t files_current;
|
|
size_t files_length;
|
|
FILE* current_file;
|
|
const char* last_file_path;
|
|
uintmax_t last_line_number;
|
|
};
|
|
|
|
static char* read_line(FILE* fp, const char* fpname, int delim)
|
|
{
|
|
char* line = NULL;
|
|
size_t line_size = 0;
|
|
ssize_t amount = getdelim(&line, &line_size, delim, fp);
|
|
if ( amount < 0 )
|
|
{
|
|
free(line);
|
|
if ( ferror(fp) )
|
|
err(2, "read: %s", fpname);
|
|
return NULL;
|
|
}
|
|
if ( (unsigned char) line[amount-1] == (unsigned char) delim )
|
|
line[amount-1] = '\0';
|
|
return line;
|
|
}
|
|
|
|
static char* read_input_stream_line(struct input_stream* is, int delim)
|
|
{
|
|
if ( !is->files_length )
|
|
{
|
|
char* result = read_line(stdin, "<stdin>", delim);
|
|
is->last_file_path = "-";
|
|
if ( result )
|
|
is->last_line_number++;
|
|
return result;
|
|
}
|
|
while ( is->files_current < is->files_length )
|
|
{
|
|
const char* path = is->files[is->files_current];
|
|
if ( !is->current_file )
|
|
{
|
|
is->last_line_number = 0;
|
|
if ( !strcmp(path, "-") )
|
|
is->current_file = stdin;
|
|
else if ( !(is->current_file = fopen(path, "r")) )
|
|
err(2, "%s", path);
|
|
}
|
|
char* result = read_line(is->current_file, path, delim);
|
|
if ( !result )
|
|
{
|
|
if ( is->current_file != stdin )
|
|
fclose(is->current_file);
|
|
is->current_file = NULL;
|
|
is->files_current++;
|
|
continue;
|
|
}
|
|
is->last_file_path = path;
|
|
is->last_line_number++;
|
|
return result;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static char** read_input_stream_lines(size_t* result_num_lines,
|
|
struct input_stream* is,
|
|
int delim)
|
|
{
|
|
char** lines = NULL;
|
|
size_t lines_used = 0;
|
|
size_t lines_length = 0;
|
|
|
|
char* line;
|
|
while ( (line = read_input_stream_line(is, delim)) )
|
|
{
|
|
if ( lines_used == lines_length )
|
|
{
|
|
size_t old_lines_length = lines_length ? lines_length : 64;
|
|
char** new_lines = (char**) reallocarray(lines, old_lines_length,
|
|
2 * sizeof(char*));
|
|
if ( !new_lines )
|
|
err(2, "malloc");
|
|
lines = new_lines;
|
|
lines_length = 2 * old_lines_length;
|
|
}
|
|
lines[lines_used++] = line;
|
|
}
|
|
|
|
return *result_num_lines = lines_used, lines;
|
|
}
|
|
|
|
static void compact_arguments(int* argc, char*** argv)
|
|
{
|
|
for ( int i = 0; i < *argc; i++ )
|
|
{
|
|
while ( i < *argc && !(*argv)[i] )
|
|
{
|
|
for ( int n = i; n < *argc; n++ )
|
|
(*argv)[n] = (*argv)[n+1];
|
|
(*argc)--;
|
|
}
|
|
}
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
setlocale(LC_ALL, "");
|
|
|
|
bool check = false;
|
|
bool check_quiet = false;
|
|
bool merge = false;
|
|
const char* output = NULL;
|
|
bool random = false;
|
|
bool reverse = false;
|
|
bool unique = false;
|
|
bool version_sort = false;
|
|
bool zero_terminated = false;
|
|
|
|
for ( int i = 1; i < argc; i++ )
|
|
{
|
|
const char* arg = argv[i];
|
|
if ( arg[0] != '-' || !arg[1] )
|
|
continue;
|
|
argv[i] = NULL;
|
|
if ( !strcmp(arg, "--") )
|
|
break;
|
|
if ( arg[1] != '-' )
|
|
{
|
|
char c;
|
|
while ( (c = *++arg) ) switch ( c )
|
|
{
|
|
case 'C': check = true, check_quiet = true; break;
|
|
case 'c': check = true, check_quiet = false; break;
|
|
case 'm': merge = true; break;
|
|
case 'o':
|
|
if ( !*(output = arg + 1) )
|
|
{
|
|
if ( i + 1 == argc )
|
|
errx(2, "option requires an argument -- 'o'");
|
|
output = argv[i+1];
|
|
argv[++i] = NULL;
|
|
}
|
|
arg = "o";
|
|
break;
|
|
case 'R': random = true; break;
|
|
case 'r': reverse = true; break;
|
|
case 'u': unique = true; break;
|
|
case 'V': version_sort = true; break;
|
|
case 'z': zero_terminated = true; break;
|
|
default:
|
|
errx(2, "unknown option -- '%c'", c);
|
|
}
|
|
}
|
|
else if ( !strcmp(arg, "--check") ||
|
|
!strcmp(arg, "--check=diagnose-first") )
|
|
check = true, check_quiet = false;
|
|
else if ( !strcmp(arg, "--check=quiet") ||
|
|
!strcmp(arg, "--check=silent") )
|
|
check = true, check_quiet = true;
|
|
else if ( !strcmp(arg, "--merge") )
|
|
merge = true;
|
|
else if ( !strncmp(arg, "--output=", strlen("--output=")) )
|
|
output = arg + strlen("--output=");
|
|
else if ( !strcmp(arg, "--output") )
|
|
{
|
|
if ( i + 1 == argc )
|
|
errx(2, "option '--output' requires an argument");
|
|
output = argv[i+1];
|
|
argv[++i] = NULL;
|
|
}
|
|
else if ( !strcmp(arg, "--random-sort") )
|
|
random = true;
|
|
else if ( !strcmp(arg, "--reverse") )
|
|
reverse = true;
|
|
else if ( !strcmp(arg, "--unique") )
|
|
unique = true;
|
|
else if ( !strcmp(arg, "--version-sort") )
|
|
version_sort = true;
|
|
else if ( !strcmp(arg, "--zero-terminated") )
|
|
zero_terminated = true;
|
|
else
|
|
errx(2, "unknown option: %s", arg);
|
|
}
|
|
|
|
compact_arguments(&argc, &argv);
|
|
|
|
if ( check_quiet && output )
|
|
errx(1, "the -C and -o options are incompatible");
|
|
if ( check && output )
|
|
errx(1, "the -c and -o options are incompatible");
|
|
if ( check_quiet && random )
|
|
errx(1, "the -C and -R options are incompatible");
|
|
if ( check && random )
|
|
errx(1, "the -c and -R options are incompatible");
|
|
|
|
int delim = zero_terminated ? '\0' : '\n';
|
|
|
|
int (*compare)(const char*, const char*);
|
|
int (*qsort_compare)(const void*, const void*);
|
|
|
|
if ( version_sort && reverse )
|
|
compare = compare_version_reverse,
|
|
qsort_compare = indirect_compare_version_reverse;
|
|
else if ( version_sort )
|
|
compare = compare_version,
|
|
qsort_compare = indirect_compare_version;
|
|
else if ( reverse )
|
|
compare = compare_line_reverse,
|
|
qsort_compare = indirect_compare_line_reverse;
|
|
else
|
|
compare = compare_line,
|
|
qsort_compare = indirect_compare_line;
|
|
|
|
struct input_stream is;
|
|
memset(&is, 0, sizeof(is));
|
|
is.files = (const char* const*) (argv + 1);
|
|
is.files_current = 0;
|
|
is.files_length = argc - 1;
|
|
|
|
if ( check )
|
|
{
|
|
int needed_relation = unique ? 1 : 0;
|
|
char* prev_line = NULL;
|
|
char* line;
|
|
while ( (line = read_input_stream_line(&is, delim)) )
|
|
{
|
|
if ( prev_line && compare(line, prev_line) < needed_relation )
|
|
{
|
|
if ( check_quiet )
|
|
return 1;
|
|
errx(1, "%s:%ju: disorder: %s", is.last_file_path,
|
|
is.last_line_number, line);
|
|
}
|
|
free(prev_line);
|
|
prev_line = line;
|
|
}
|
|
free(prev_line);
|
|
}
|
|
else
|
|
{
|
|
(void) merge;
|
|
|
|
size_t lines_used = 0;
|
|
char** lines = read_input_stream_lines(&lines_used, &is, delim);
|
|
|
|
if ( !random || unique )
|
|
qsort(lines, lines_used, sizeof(*lines), qsort_compare);
|
|
|
|
if ( random )
|
|
{
|
|
if ( unique )
|
|
{
|
|
size_t o = 0;
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
{
|
|
if ( o && compare(lines[i], lines[o - 1]) == 0 )
|
|
continue;
|
|
lines[o++] = lines[i];
|
|
}
|
|
lines_used = o;
|
|
}
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
{
|
|
size_t left = lines_used - i;
|
|
size_t choice = i + pick_uniform(left);
|
|
if ( choice != i )
|
|
{
|
|
char* tmp = lines[i];
|
|
lines[i] = lines[choice];
|
|
lines[choice] = tmp;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( output && !freopen(output, "w", stdout) )
|
|
err(2, "%s", output);
|
|
|
|
for ( size_t i = 0; i < lines_used; i++ )
|
|
{
|
|
if ( unique && i && compare(lines[i-1], lines[i]) == 0 )
|
|
continue;
|
|
if ( fputs(lines[i], stdout) == EOF || fputc(delim, stdout) == EOF )
|
|
err(2, "%s", output ? output : "<stdout>");
|
|
}
|
|
if ( fflush(stdout) == EOF )
|
|
err(2, "%s", output ? output : "<stdout>");
|
|
}
|
|
|
|
return 0;
|
|
}
|