diff --git a/doc/user-guide b/doc/user-guide
index 1c11fe42..7e16e81b 100644
--- a/doc/user-guide
+++ b/doc/user-guide
@@ -185,6 +185,7 @@ Sortix comes with a number of home-made programs. Here is an overview:
* `rmdir` - remove empty directory
* `sh` - alias for the shell
* `snake` - remake of the classic snake game
+* `sort` - sort lines of text files
* `tail` - display end of file
* `time` - measure program running time
* `true` - exit with a success status
diff --git a/utils/.gitignore b/utils/.gitignore
index 3d56985b..26a0a487 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -33,6 +33,7 @@ pwd
rm
rmdir
sh
+sort
tail
time
true
diff --git a/utils/Makefile b/utils/Makefile
index 76cb89fa..3143ed50 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -49,6 +49,7 @@ pwd \
rm \
rmdir \
sh \
+sort \
tail \
time \
true \
diff --git a/utils/sort.cpp b/utils/sort.cpp
new file mode 100644
index 00000000..5c9cf412
--- /dev/null
+++ b/utils/sort.cpp
@@ -0,0 +1,457 @@
+/*******************************************************************************
+
+ Copyright(C) Jonas 'Sortie' Termansen 2014.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see .
+
+ sort.cpp
+ Sort, merge, or sequence check text files.
+
+*******************************************************************************/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// TODO: Implement all the features mandated by POSIX.
+// TODO: Implement the useful GNU extensions.
+
+int flip_comparison(int rel)
+{
+ return rel < 0 ? 1 : 0 < rel ? -1 : 0;
+}
+
+int indirect_compare(int (*compare)(const char*, const char*),
+ const void* a_ptr, const void* b_ptr)
+{
+ const char* a = *(const char* const*) a_ptr;
+ const char* b = *(const char* const*) b_ptr;
+ return compare(a, b);
+}
+
+int compare_line(const char* a, const char* b)
+{
+ return strcoll(a, b);
+}
+
+int indirect_compare_line(const void* a_ptr, const void* b_ptr)
+{
+ return indirect_compare(compare_line, a_ptr, b_ptr);
+}
+
+int compare_line_reverse(const char* a, const char* b)
+{
+ return flip_comparison(compare_line(a, b));
+}
+
+int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr)
+{
+ return indirect_compare(compare_line, a_ptr, b_ptr);
+}
+
+int compare_version(const char* a, const char* b)
+{
+ return strverscmp(a, b);
+}
+
+int indirect_compare_version(const void* a_ptr, const void* b_ptr)
+{
+ return indirect_compare(compare_version, a_ptr, b_ptr);
+}
+
+int compare_version_reverse(const char* a, const char* b)
+{
+ return flip_comparison(compare_version(a, b));
+}
+
+int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr)
+{
+ return indirect_compare(compare_version, a_ptr, b_ptr);
+}
+
+struct input_stream
+{
+ const char* const* files;
+ size_t files_current;
+ size_t files_length;
+ FILE* current_file;
+ const char* last_file_path;
+ uintmax_t last_line_number;
+ bool result_status;
+};
+
+char* read_line(FILE* fp, const char* fpname, int delim)
+{
+ char* line = NULL;
+ size_t line_size = 0;
+ ssize_t amount = getdelim(&line, &line_size, delim, fp);
+ if ( amount < 0 )
+ {
+ if ( ferror(fp) )
+ error(0, errno, "read: `%s'", fpname);
+ return NULL;
+ }
+ if ( amount && (unsigned char) line[amount-1] == (unsigned char) delim )
+ line[amount-1] = '\0';
+ return line;
+}
+
+char* read_input_stream_line(struct input_stream* is, int delim)
+{
+ if ( !is->files_length )
+ {
+ char* result = read_line(stdin, "", delim);
+ if ( ferror(stdin) )
+ is->result_status = false;
+ is->last_file_path = "-";
+ is->last_line_number++;
+ return result;
+ }
+ while ( is->files_current < is->files_length )
+ {
+ const char* path = is->files[is->files_current];
+ if ( !is->current_file )
+ {
+ is->last_line_number = 0;
+ if ( !strcmp(path, "-") )
+ is->current_file = stdin;
+ else if ( !(is->current_file = fopen(path, "r")) )
+ {
+ error(0, errno, "`%s'", path);
+ is->result_status = false;
+ is->files_current++;
+ continue;
+ }
+ }
+ char* result = read_line(is->current_file, path, delim);
+ if ( !result )
+ {
+ if ( ferror(is->current_file) )
+ {
+ error(0, errno, "reading: `%s'", path);
+ is->result_status = false;
+ }
+ if ( is->current_file != stdin )
+ fclose(is->current_file);
+ is->current_file = NULL;
+ is->files_current++;
+ continue;
+ }
+ is->last_file_path = path;
+ is->last_line_number++;
+ return result;
+ }
+ return NULL;
+}
+
+char** read_input_stream_lines(size_t* result_num_lines,
+ struct input_stream* is,
+ int delim)
+{
+ char** lines = NULL;
+ size_t lines_used = 0;
+ size_t lines_length = 0;
+
+ while ( char* line = read_input_stream_line(is, delim) )
+ {
+ if ( lines_used == lines_length )
+ {
+ size_t new_lines_length = lines_length ? 2 * lines_length : 64;
+ size_t new_lines_size = sizeof(char*) * new_lines_length;
+ char** new_lines = (char**) realloc(lines, new_lines_size);
+ if ( !new_lines )
+ {
+ error(0, errno, "realloc");
+ free(line);
+ is->result_status = false;
+ return *result_num_lines = lines_used, lines;
+ }
+ lines = new_lines;
+ lines_length = new_lines_length;
+ }
+ lines[lines_used++] = line;
+ }
+
+ return *result_num_lines = lines_used, lines;
+}
+
+static void compact_arguments(int* argc, char*** argv)
+{
+ for ( int i = 0; i < *argc; i++ )
+ {
+ while ( i < *argc && !(*argv)[i] )
+ {
+ for ( int n = i; n < *argc; n++ )
+ (*argv)[n] = (*argv)[n+1];
+ (*argc)--;
+ }
+ }
+}
+
+static void help(FILE* fp, const char* argv0)
+{
+ fprintf(fp, "Usage: %s [OPTION]... [FILE]...\n", argv0);
+ fprintf(fp, "Write sorted concatenation of all FILE(s) to standard output.\n");
+ fprintf(fp, "\n");
+ fprintf(fp, "Mandatory arguments to long options are mandatory for short options too.\n");
+ fprintf(fp, "Ordering options:\n");
+ fprintf(fp, "\n");
+#if 0
+ fprintf(fp, " -b, --ignore-leading-blanks ignore leading blanks\n");
+ fprintf(fp, " -d, --dictionary-order consider only blanks and alphanumeric characters\n");
+ fprintf(fp, " -f, --ignore-case fold lower case to upper case characters\n");
+ fprintf(fp, " -g, --general-numeric-sort compare according to general numerical value\n");
+ fprintf(fp, " -i, --ignore-nonprinting consider only printable characters\n");
+ fprintf(fp, " -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n");
+ fprintf(fp, " -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n");
+ fprintf(fp, " -n, --numeric-sort compare according to string numerical value\n");
+ fprintf(fp, " -R, --random-sort sort by random hash of keys\n");
+ fprintf(fp, " --random-source=FILE get random bytes from FILE\n");
+#endif
+ fprintf(fp, " -r, --reverse reverse the result of comparisons\n");
+#if 0
+ fprintf(fp, " --sort=WORD sort according to WORD:\n");
+ fprintf(fp, " general-numeric -g, human-numeric -h, month -M,\n");
+ fprintf(fp, " numeric -n, random -R, version -V\n");
+#endif
+ fprintf(fp, " -V, --version-sort natural sort of (version) numbers within text\n");
+ fprintf(fp, "\n");
+ fprintf(fp, "Other options:\n");
+ fprintf(fp, "\n");
+#if 0
+ fprintf(fp, " --batch-size=NMERGE merge at most NMERGE inputs at once;\n");
+ fprintf(fp, " for more use temp files\n");
+#endif
+ fprintf(fp, " -c, --check, --check=diagnose-first check for sorted input; do not sort\n");
+ fprintf(fp, " -C, --check=quiet, --check=silent like -c, but do not report first bad line\n");
+#if 0
+ fprintf(fp, " --compress-program=PROG compress temporaries with PROG;\n");
+ fprintf(fp, " decompress them with PROG -d\n");
+ fprintf(fp, " --debug annotate the part of the line used to sort,\n");
+ fprintf(fp, " and warn about questionable usage to stderr\n");
+ fprintf(fp, " --files0-from=F read input from the files specified by\n");
+ fprintf(fp, " NUL-terminated names in file F;\n");
+ fprintf(fp, " If F is - then read names from standard input\n");
+ fprintf(fp, " -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n");
+ fprintf(fp, " (default end of line). See POS syntax below\n");
+#endif
+ fprintf(fp, " -m, --merge merge already sorted files; do not sort\n");
+ fprintf(fp, " -o, --output=FILE write result to FILE instead of standard output\n");
+#if 0
+ fprintf(fp, " -s, --stable stabilize sort by disabling last-resort comparison\n");
+ fprintf(fp, " -S, --buffer-size=SIZE use SIZE for main memory buffer\n");
+ fprintf(fp, " -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n");
+ fprintf(fp, " -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or /tmp;\n");
+ fprintf(fp, " multiple options specify multiple directories\n");
+ fprintf(fp, " --parallel=N change the number of sorts run concurrently to N\n");
+#endif
+ fprintf(fp, " -u, --unique with -c, check for strict ordering;\n");
+ fprintf(fp, " without -c, output only the first of an equal run\n");
+ fprintf(fp, " -z, --zero-terminated end lines with 0 byte, not newline\n");
+ fprintf(fp, " --help display this help and exit\n");
+ fprintf(fp, " --version output version information and exit\n");
+ fprintf(fp, "\n");
+#if 0
+ fprintf(fp, "POS is F[.C][OPTS], where F is the field number and C the character position\n");
+ fprintf(fp, "in the field; both are origin 1. If neither -t nor -b is in effect, characters\n");
+ fprintf(fp, "in a field are counted from the beginning of the preceding whitespace. OPTS is\n");
+ fprintf(fp, "one or more single-letter ordering options, which override global ordering\n");
+ fprintf(fp, "options for that key. If no key is given, use the entire line as the key.\n");
+ fprintf(fp, "\n");
+ fprintf(fp, "SIZE may be followed by the following multiplicative suffixes:\n");
+ fprintf(fp, "%% 1%% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n");
+ fprintf(fp, "\n");
+#endif
+ fprintf(fp, "With no FILE, or when FILE is -, read standard input.\n");
+ fprintf(fp, "\n");
+ fprintf(fp, "*** WARNING ***\n");
+ fprintf(fp, "The locale specified by the environment affects sort order.\n");
+ fprintf(fp, "Set LC_ALL=C to get the traditional sort order that uses\n");
+ fprintf(fp, "native byte values.\n");
+}
+
+static void version(FILE* fp, const char* argv0)
+{
+ fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR);
+ fprintf(fp, "License GPLv3+: GNU GPL version 3 or later .\n");
+ fprintf(fp, "This is free software: you are free to change and redistribute it.\n");
+ fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n");
+}
+
+int main(int argc, char* argv[])
+{
+ setlocale(LC_ALL, "");
+
+ bool check = false;
+ bool check_quiet = false;
+ bool merge = false;
+ const char* output = NULL;
+ bool reverse = false;
+ bool unique = false;
+ bool version_sort = false;
+ bool zero_terminated = false;
+
+ const char* argv0 = argv[0];
+ for ( int i = 1; i < argc; i++ )
+ {
+ const char* arg = argv[i];
+ if ( arg[0] != '-' || !arg[1] )
+ continue;
+ argv[i] = NULL;
+ if ( !strcmp(arg, "--") )
+ break;
+ if ( arg[1] != '-' )
+ {
+ while ( char c = *++arg ) switch ( c )
+ {
+ case 'c': check = true; break;
+ case 'C': check = check_quiet = false; break;
+ case 'm': merge = true; break;
+ case 'o':
+ if ( !*(output = arg + 1) )
+ {
+ if ( i + 1 == argc )
+ {
+ error(0, 0, "option requires an argument -- 'o'");
+ fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]);
+ exit(125);
+ }
+ output = argv[i+1];
+ argv[++i] = NULL;
+ }
+ arg = "o";
+ break;
+ case 'r': reverse = true; break;
+ case 'u': unique = true; break;
+ case 'V': version_sort = true; break;
+ case 'z': zero_terminated = true; break;
+ default:
+ fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c);
+ help(stderr, argv0);
+ exit(1);
+ }
+ }
+ else if ( !strcmp(arg, "--help") )
+ help(stdout, argv0), exit(0);
+ else if ( !strcmp(arg, "--version") )
+ version(stdout, argv0), exit(0);
+ else if ( !strcmp(arg, "--check") ||
+ !strcmp(arg, "--check=diagnose-first") )
+ check = true, check_quiet = false;
+ else if ( !strcmp(arg, "--check=quiet") ||
+ !strcmp(arg, "--check=silent") )
+ check = true, check_quiet = true;
+ else if ( !strcmp(arg, "--merge") )
+ merge = true;
+ else if ( !strncmp(arg, "--output=", strlen("--output=")) )
+ output = arg + strlen("--output=");
+ else if ( !strcmp(arg, "--output") )
+ {
+ if ( i + 1 == argc )
+ {
+ error(0, 0, "option '--output' requires an argument");
+ fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]);
+ exit(125);
+ }
+ output = argv[i+1];
+ argv[++i] = NULL;
+ }
+ else if ( !strcmp(arg, "--reverse") )
+ reverse = true;
+ else if ( !strcmp(arg, "--unique") )
+ unique = true;
+ else if ( !strcmp(arg, "--version-sort") )
+ version_sort = true;
+ else if ( !strcmp(arg, "--zero-terminated") )
+ zero_terminated = true;
+ else
+ {
+ fprintf(stderr, "%s: unknown option: %s\n", argv0, arg);
+ help(stderr, argv0);
+ exit(1);
+ }
+ }
+
+ compact_arguments(&argc, &argv);
+
+ if ( output && !freopen(output, "w", stdout) )
+ error(2, errno, "`%s'", output);
+
+ int delim = zero_terminated ? '\0' : '\n';
+
+ int (*compare)(const char*, const char*);
+ int (*qsort_compare)(const void*, const void*);
+
+ if ( version_sort && reverse )
+ compare = compare_version_reverse,
+ qsort_compare = indirect_compare_version_reverse;
+ else if ( version_sort )
+ compare = compare_version,
+ qsort_compare = indirect_compare_version;
+ else if ( reverse )
+ compare = compare_line_reverse,
+ qsort_compare = indirect_compare_line_reverse;
+ else
+ compare = compare_line,
+ qsort_compare = indirect_compare_line;
+
+ struct input_stream is;
+ memset(&is, 0, sizeof(is));
+ is.files = argv + 1;
+ is.files_current = 0;
+ is.files_length = argc - 1;
+ is.result_status = true;
+
+ if ( check )
+ {
+ int needed_relation = unique ? 1 : 0;
+ char* prev_line = NULL;
+ while ( char* line = read_input_stream_line(&is, delim) )
+ {
+ if ( prev_line && compare(line, prev_line) < needed_relation )
+ {
+ if ( !check_quiet )
+ error(0, errno, "%s:%ju: disorder: %s", is.last_file_path,
+ is.last_line_number, line);
+ exit(1);
+ }
+ free(prev_line);
+ prev_line = line;
+ }
+ free(prev_line);
+ }
+ else
+ {
+ (void) merge;
+
+ size_t lines_used = 0;
+ char** lines = read_input_stream_lines(&lines_used, &is, delim);
+
+ qsort(lines, lines_used, sizeof(*lines), qsort_compare);
+
+ for ( size_t i = 0; i < lines_used; i++ )
+ {
+ if ( unique && i && compare(lines[i-1], lines[i]) == 0 )
+ continue;
+ fputs(lines[i], stdout);
+ fputc(delim, stdout);
+ }
+ }
+
+ return is.result_status ? 0 : 2;
+}