diff --git a/utils/.gitignore b/utils/.gitignore
index c86a23ab..6ace2e45 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -33,4 +33,5 @@ time
type
uname
uptime
+wc
which
diff --git a/utils/Makefile b/utils/Makefile
index edbb48e2..8c394648 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -49,6 +49,7 @@ type \
time \
uname \
uptime \
+wc \
which \
INSTALLBINARIES:=$(addprefix $(DESTDIR)$(BINDIR)/,$(BINARIES))
diff --git a/utils/wc.cpp b/utils/wc.cpp
new file mode 100644
index 00000000..a657860b
--- /dev/null
+++ b/utils/wc.cpp
@@ -0,0 +1,311 @@
+/*******************************************************************************
+
+ Copyright(C) Jonas 'Sortie' Termansen 2013.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see .
+
+ wc.cpp
+ Counts bytes, characters, words and lines.
+
+*******************************************************************************/
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#if !defined(VERSIONSTR)
+#define VERSIONSTR "unknown version"
+#endif
+
+const int FLAG_PRINT_NUM_BYTES = 1 << 0;
+const int FLAG_PRINT_NUM_CHARACTERS = 1 << 1;
+const int FLAG_PRINT_NUM_WORDS = 1 << 2;
+const int FLAG_PRINT_NUM_LINES = 1 << 3;
+const int FLAG_PRINT_COMPACT = 1 << 4;
+
+const int DEFAULT_FLAGS =
+ FLAG_PRINT_NUM_BYTES | FLAG_PRINT_NUM_WORDS | FLAG_PRINT_NUM_LINES;
+
+struct word_count
+{
+ uintmax_t num_bytes;
+ uintmax_t num_characters;
+ uintmax_t num_words;
+ uintmax_t num_lines;
+};
+
+static struct word_count count_words(FILE* fp)
+{
+ struct word_count stats;
+ memset(&stats, 0, sizeof(stats));
+
+ mbstate_t mbstate;
+ memset(&mbstate, 0, sizeof(mbstate));
+
+ bool word_begun = false;
+ bool line_begun = false;
+
+ int ic;
+ while ( (ic = fgetc(fp)) != EOF )
+ {
+ stats.num_bytes++;
+
+ char c = (char) ((unsigned char) ic);
+
+ wchar_t wc;
+ size_t num_converted = mbrtowc(&wc, &c, 1, &mbstate);
+ if ( num_converted == (size_t) -1 )
+ {
+ memset(&mbstate, 0, sizeof(mbstate));
+ continue;
+ }
+ if ( num_converted == (size_t) -2 )
+ continue;
+ // TODO: Is this strictly speaking needed?
+ if ( !num_converted )
+ wc = L'\0';
+
+ stats.num_characters++;
+ word_begun = !iswspace(wc) ||
+ (word_begun ? (stats.num_words++, false) : false);
+ line_begun = wc != L'\n' || (stats.num_lines++, false);
+ }
+
+ if ( word_begun )
+ stats.num_words++;
+ if ( line_begun )
+ stats.num_lines++;
+
+ return stats;
+}
+
+static void print_stat(FILE* fp, uintmax_t value, int flags, int cond)
+{
+ if ( !(flags & cond) )
+ return;
+ if ( flags & FLAG_PRINT_COMPACT )
+ {
+ fprintf(fp, "%ju", value);
+ return;
+ }
+ if ( value < 100000 )
+ {
+ fprintf(fp, "%6ju", value);
+ return;
+ }
+ fprintf(fp, " %ju ", value);
+}
+
+static
+void print_stats(struct word_count stats, FILE* fp, int flags, const char* path)
+{
+ // TODO: Proper columnization of large values will require knowing all the
+ // row values in advance - so we'll have to remember the statistics
+ // for every file we process before printing!
+ print_stat(fp, stats.num_lines, flags, FLAG_PRINT_NUM_LINES);
+ print_stat(fp, stats.num_words, flags, FLAG_PRINT_NUM_WORDS);
+ print_stat(fp, stats.num_bytes, flags, FLAG_PRINT_NUM_BYTES);
+ print_stat(fp, stats.num_characters, flags, FLAG_PRINT_NUM_CHARACTERS);
+ if ( path )
+ fprintf(fp, " %s", path);
+ fprintf(fp, "\n");
+}
+
+static void usage(FILE* fp, const char* argv0)
+{
+ fprintf(fp, "Usage: %s [OPTION]...\n", argv0);
+ fprintf(fp, "Print newline, word, and byte counts for each FILE, and a total line if\n");
+ fprintf(fp, "more than one FILE is specified. With no FILE, or when FILE is -,\n");
+ fprintf(fp, "read standard input. A word is a non-zero-length sequence of characters\n");
+ fprintf(fp, "delimited by white space.\n");
+ fprintf(fp, "The options below may be used to select which counts are printed, always in\n");
+ fprintf(fp, "the following order: newline, word, character, byte.\n");
+ fprintf(fp, "\n");
+ fprintf(fp, " -c, --bytes print the byte counts\n");
+ fprintf(fp, " -m, --chars print the character counts\n");
+ fprintf(fp, " -l, --lines print the newline counts\n");
+ fprintf(fp, " -w, --words print the word counts\n");
+ fprintf(fp, " --help display this help and exit\n");
+ fprintf(fp, " --usage display this help and exit\n");
+ fprintf(fp, " --version output version information and exit\n");
+}
+
+static void help(FILE* fp, const char* argv0)
+{
+ usage(fp, argv0);
+}
+
+static void version(FILE* fp, const char* argv0)
+{
+ fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR);
+ fprintf(fp, "License GPLv3+: GNU GPL version 3 or later .\n");
+ fprintf(fp, "This is free software: you are free to change and redistribute it.\n");
+ fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n");
+}
+
+static void compact_arguments(int* argc, char*** argv)
+{
+ for ( int i = 0; i < *argc; i++ )
+ while ( i < *argc && !(*argv)[i] )
+ {
+ for ( int n = i; n < *argc; n++ )
+ (*argv)[n] = (*argv)[n+1];
+ (*argc)--;
+ }
+}
+
+bool word_count_file(FILE* fp, const char* path, int flags,
+ struct word_count* total)
+{
+ struct stat st;
+ if ( fstat(fileno(fp), &st) == 0 && S_ISDIR(st.st_mode) )
+ {
+ struct word_count word_count;
+ memset(&word_count, 0, sizeof(word_count));
+ error(0, EISDIR, "`%s'", path);
+ print_stats(word_count, stdout, flags, path);
+ return false;
+ }
+ struct word_count word_count = count_words(fp);
+ // TODO: Possible overflow here!
+ if ( total )
+ {
+ total->num_bytes += word_count.num_bytes;
+ total->num_characters += word_count.num_characters;
+ total->num_words += word_count.num_words;
+ total->num_lines += word_count.num_lines;
+ }
+ if ( ferror(fp) )
+ {
+ error(0, errno, "`%s'", path);
+ print_stats(word_count, stdout, flags, path);
+ return false;
+ }
+ print_stats(word_count, stdout, flags, path);
+ return true;
+}
+
+int word_count_files(int argc, char* argv[], int flags)
+{
+ if ( argc <= 1 )
+ return word_count_file(stdin, NULL, flags, NULL);
+
+ struct word_count total_count;
+ memset(&total_count, 0, sizeof(total_count));
+
+ bool success = true;
+ for ( int i = 1; i < argc; i++ )
+ {
+ if ( !strcmp(argv[i], "-") )
+ {
+ if ( !word_count_file(stdin, "-", flags, NULL) )
+ success = false;
+ continue;
+ }
+
+ FILE* fp = fopen(argv[i], "r");
+ if ( !fp )
+ {
+ error(0, errno, "`%s'", argv[i]);
+ struct word_count word_count;
+ memset(&word_count, 0, sizeof(word_count));
+ print_stats(word_count, stdout, flags, argv[i]);
+ success = false;
+ continue;
+ }
+
+ if ( !word_count_file(fp, argv[i], flags, &total_count) )
+ success = false;
+
+ fclose(fp);
+ }
+
+ if ( 3 <= argc )
+ print_stats(total_count, stdout, flags, "total");
+
+ return success;
+}
+
+int main(int argc, char* argv[])
+{
+ setlocale(LC_ALL, "");
+
+ int flags = 0;
+
+ const char* argv0 = argv[0];
+ for ( int i = 0; i < argc; i++ )
+ {
+ const char* arg = argv[i];
+ if ( arg[0] != '-' || !arg[1] )
+ continue;
+ argv[i] = NULL;
+ if ( !strcmp(arg, "--") )
+ break;
+ if ( arg[1] != '-' )
+ {
+ while ( char c = *++arg ) switch ( c )
+ {
+ case 'c': flags |= FLAG_PRINT_NUM_BYTES; break;
+ case 'l': flags |= FLAG_PRINT_NUM_LINES; break;
+ case 'm': flags |= FLAG_PRINT_NUM_CHARACTERS; break;
+ case 'w': flags |= FLAG_PRINT_NUM_WORDS; break;
+ default:
+ fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c);
+ usage(stderr, argv0);
+ exit(1);
+ }
+ }
+ else if ( !strcmp(arg, "--help") )
+ help(stdout, argv0), exit(0);
+ else if ( !strcmp(arg, "--usage") )
+ usage(stdout, argv0), exit(0);
+ else if ( !strcmp(arg, "--version") )
+ version(stdout, argv0), exit(0);
+ else if ( !strcmp(arg, "--bytes") )
+ flags |= FLAG_PRINT_NUM_BYTES;
+ else if ( !strcmp(arg, "--chars") )
+ flags |= FLAG_PRINT_NUM_CHARACTERS;
+ else if ( !strcmp(arg, "--lines") )
+ flags |= FLAG_PRINT_NUM_LINES;
+ else if ( !strcmp(arg, "--words") )
+ flags |= FLAG_PRINT_NUM_WORDS;
+ else
+ {
+ fprintf(stderr, "%s: unknown option: %s\n", argv0, arg);
+ usage(stderr, argv0);
+ exit(1);
+ }
+ }
+
+ compact_arguments(&argc, &argv);
+
+ if ( !flags )
+ flags = DEFAULT_FLAGS;
+
+ if ( flags && flags == 1 << (ffs(flags)-1) && argc <= 2 )
+ flags |= FLAG_PRINT_COMPACT;
+
+ return word_count_files(argc, argv, flags) ? 0 : 1;
+}