mirror of
https://gitlab.com/sortix/sortix.git
synced 2023-02-13 20:55:38 -05:00
Add wc(1).
This commit is contained in:
parent
3e5a6644c8
commit
51d965d6eb
3 changed files with 313 additions and 0 deletions
1
utils/.gitignore
vendored
1
utils/.gitignore
vendored
|
@ -33,4 +33,5 @@ time
|
|||
type
|
||||
uname
|
||||
uptime
|
||||
wc
|
||||
which
|
||||
|
|
|
@ -49,6 +49,7 @@ type \
|
|||
time \
|
||||
uname \
|
||||
uptime \
|
||||
wc \
|
||||
which \
|
||||
|
||||
INSTALLBINARIES:=$(addprefix $(DESTDIR)$(BINDIR)/,$(BINARIES))
|
||||
|
|
311
utils/wc.cpp
Normal file
311
utils/wc.cpp
Normal file
|
@ -0,0 +1,311 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2013.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
wc.cpp
|
||||
Counts bytes, characters, words and lines.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
#include <locale.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#if !defined(VERSIONSTR)
|
||||
#define VERSIONSTR "unknown version"
|
||||
#endif
|
||||
|
||||
const int FLAG_PRINT_NUM_BYTES = 1 << 0;
|
||||
const int FLAG_PRINT_NUM_CHARACTERS = 1 << 1;
|
||||
const int FLAG_PRINT_NUM_WORDS = 1 << 2;
|
||||
const int FLAG_PRINT_NUM_LINES = 1 << 3;
|
||||
const int FLAG_PRINT_COMPACT = 1 << 4;
|
||||
|
||||
const int DEFAULT_FLAGS =
|
||||
FLAG_PRINT_NUM_BYTES | FLAG_PRINT_NUM_WORDS | FLAG_PRINT_NUM_LINES;
|
||||
|
||||
struct word_count
|
||||
{
|
||||
uintmax_t num_bytes;
|
||||
uintmax_t num_characters;
|
||||
uintmax_t num_words;
|
||||
uintmax_t num_lines;
|
||||
};
|
||||
|
||||
static struct word_count count_words(FILE* fp)
|
||||
{
|
||||
struct word_count stats;
|
||||
memset(&stats, 0, sizeof(stats));
|
||||
|
||||
mbstate_t mbstate;
|
||||
memset(&mbstate, 0, sizeof(mbstate));
|
||||
|
||||
bool word_begun = false;
|
||||
bool line_begun = false;
|
||||
|
||||
int ic;
|
||||
while ( (ic = fgetc(fp)) != EOF )
|
||||
{
|
||||
stats.num_bytes++;
|
||||
|
||||
char c = (char) ((unsigned char) ic);
|
||||
|
||||
wchar_t wc;
|
||||
size_t num_converted = mbrtowc(&wc, &c, 1, &mbstate);
|
||||
if ( num_converted == (size_t) -1 )
|
||||
{
|
||||
memset(&mbstate, 0, sizeof(mbstate));
|
||||
continue;
|
||||
}
|
||||
if ( num_converted == (size_t) -2 )
|
||||
continue;
|
||||
// TODO: Is this strictly speaking needed?
|
||||
if ( !num_converted )
|
||||
wc = L'\0';
|
||||
|
||||
stats.num_characters++;
|
||||
word_begun = !iswspace(wc) ||
|
||||
(word_begun ? (stats.num_words++, false) : false);
|
||||
line_begun = wc != L'\n' || (stats.num_lines++, false);
|
||||
}
|
||||
|
||||
if ( word_begun )
|
||||
stats.num_words++;
|
||||
if ( line_begun )
|
||||
stats.num_lines++;
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
static void print_stat(FILE* fp, uintmax_t value, int flags, int cond)
|
||||
{
|
||||
if ( !(flags & cond) )
|
||||
return;
|
||||
if ( flags & FLAG_PRINT_COMPACT )
|
||||
{
|
||||
fprintf(fp, "%ju", value);
|
||||
return;
|
||||
}
|
||||
if ( value < 100000 )
|
||||
{
|
||||
fprintf(fp, "%6ju", value);
|
||||
return;
|
||||
}
|
||||
fprintf(fp, " %ju ", value);
|
||||
}
|
||||
|
||||
static
|
||||
void print_stats(struct word_count stats, FILE* fp, int flags, const char* path)
|
||||
{
|
||||
// TODO: Proper columnization of large values will require knowing all the
|
||||
// row values in advance - so we'll have to remember the statistics
|
||||
// for every file we process before printing!
|
||||
print_stat(fp, stats.num_lines, flags, FLAG_PRINT_NUM_LINES);
|
||||
print_stat(fp, stats.num_words, flags, FLAG_PRINT_NUM_WORDS);
|
||||
print_stat(fp, stats.num_bytes, flags, FLAG_PRINT_NUM_BYTES);
|
||||
print_stat(fp, stats.num_characters, flags, FLAG_PRINT_NUM_CHARACTERS);
|
||||
if ( path )
|
||||
fprintf(fp, " %s", path);
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
static void usage(FILE* fp, const char* argv0)
|
||||
{
|
||||
fprintf(fp, "Usage: %s [OPTION]...\n", argv0);
|
||||
fprintf(fp, "Print newline, word, and byte counts for each FILE, and a total line if\n");
|
||||
fprintf(fp, "more than one FILE is specified. With no FILE, or when FILE is -,\n");
|
||||
fprintf(fp, "read standard input. A word is a non-zero-length sequence of characters\n");
|
||||
fprintf(fp, "delimited by white space.\n");
|
||||
fprintf(fp, "The options below may be used to select which counts are printed, always in\n");
|
||||
fprintf(fp, "the following order: newline, word, character, byte.\n");
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, " -c, --bytes print the byte counts\n");
|
||||
fprintf(fp, " -m, --chars print the character counts\n");
|
||||
fprintf(fp, " -l, --lines print the newline counts\n");
|
||||
fprintf(fp, " -w, --words print the word counts\n");
|
||||
fprintf(fp, " --help display this help and exit\n");
|
||||
fprintf(fp, " --usage display this help and exit\n");
|
||||
fprintf(fp, " --version output version information and exit\n");
|
||||
}
|
||||
|
||||
static void help(FILE* fp, const char* argv0)
|
||||
{
|
||||
usage(fp, argv0);
|
||||
}
|
||||
|
||||
static void version(FILE* fp, const char* argv0)
|
||||
{
|
||||
fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR);
|
||||
fprintf(fp, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>.\n");
|
||||
fprintf(fp, "This is free software: you are free to change and redistribute it.\n");
|
||||
fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n");
|
||||
}
|
||||
|
||||
static void compact_arguments(int* argc, char*** argv)
|
||||
{
|
||||
for ( int i = 0; i < *argc; i++ )
|
||||
while ( i < *argc && !(*argv)[i] )
|
||||
{
|
||||
for ( int n = i; n < *argc; n++ )
|
||||
(*argv)[n] = (*argv)[n+1];
|
||||
(*argc)--;
|
||||
}
|
||||
}
|
||||
|
||||
bool word_count_file(FILE* fp, const char* path, int flags,
|
||||
struct word_count* total)
|
||||
{
|
||||
struct stat st;
|
||||
if ( fstat(fileno(fp), &st) == 0 && S_ISDIR(st.st_mode) )
|
||||
{
|
||||
struct word_count word_count;
|
||||
memset(&word_count, 0, sizeof(word_count));
|
||||
error(0, EISDIR, "`%s'", path);
|
||||
print_stats(word_count, stdout, flags, path);
|
||||
return false;
|
||||
}
|
||||
struct word_count word_count = count_words(fp);
|
||||
// TODO: Possible overflow here!
|
||||
if ( total )
|
||||
{
|
||||
total->num_bytes += word_count.num_bytes;
|
||||
total->num_characters += word_count.num_characters;
|
||||
total->num_words += word_count.num_words;
|
||||
total->num_lines += word_count.num_lines;
|
||||
}
|
||||
if ( ferror(fp) )
|
||||
{
|
||||
error(0, errno, "`%s'", path);
|
||||
print_stats(word_count, stdout, flags, path);
|
||||
return false;
|
||||
}
|
||||
print_stats(word_count, stdout, flags, path);
|
||||
return true;
|
||||
}
|
||||
|
||||
int word_count_files(int argc, char* argv[], int flags)
|
||||
{
|
||||
if ( argc <= 1 )
|
||||
return word_count_file(stdin, NULL, flags, NULL);
|
||||
|
||||
struct word_count total_count;
|
||||
memset(&total_count, 0, sizeof(total_count));
|
||||
|
||||
bool success = true;
|
||||
for ( int i = 1; i < argc; i++ )
|
||||
{
|
||||
if ( !strcmp(argv[i], "-") )
|
||||
{
|
||||
if ( !word_count_file(stdin, "-", flags, NULL) )
|
||||
success = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
FILE* fp = fopen(argv[i], "r");
|
||||
if ( !fp )
|
||||
{
|
||||
error(0, errno, "`%s'", argv[i]);
|
||||
struct word_count word_count;
|
||||
memset(&word_count, 0, sizeof(word_count));
|
||||
print_stats(word_count, stdout, flags, argv[i]);
|
||||
success = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( !word_count_file(fp, argv[i], flags, &total_count) )
|
||||
success = false;
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if ( 3 <= argc )
|
||||
print_stats(total_count, stdout, flags, "total");
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
int flags = 0;
|
||||
|
||||
const char* argv0 = argv[0];
|
||||
for ( int i = 0; i < argc; i++ )
|
||||
{
|
||||
const char* arg = argv[i];
|
||||
if ( arg[0] != '-' || !arg[1] )
|
||||
continue;
|
||||
argv[i] = NULL;
|
||||
if ( !strcmp(arg, "--") )
|
||||
break;
|
||||
if ( arg[1] != '-' )
|
||||
{
|
||||
while ( char c = *++arg ) switch ( c )
|
||||
{
|
||||
case 'c': flags |= FLAG_PRINT_NUM_BYTES; break;
|
||||
case 'l': flags |= FLAG_PRINT_NUM_LINES; break;
|
||||
case 'm': flags |= FLAG_PRINT_NUM_CHARACTERS; break;
|
||||
case 'w': flags |= FLAG_PRINT_NUM_WORDS; break;
|
||||
default:
|
||||
fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c);
|
||||
usage(stderr, argv0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else if ( !strcmp(arg, "--help") )
|
||||
help(stdout, argv0), exit(0);
|
||||
else if ( !strcmp(arg, "--usage") )
|
||||
usage(stdout, argv0), exit(0);
|
||||
else if ( !strcmp(arg, "--version") )
|
||||
version(stdout, argv0), exit(0);
|
||||
else if ( !strcmp(arg, "--bytes") )
|
||||
flags |= FLAG_PRINT_NUM_BYTES;
|
||||
else if ( !strcmp(arg, "--chars") )
|
||||
flags |= FLAG_PRINT_NUM_CHARACTERS;
|
||||
else if ( !strcmp(arg, "--lines") )
|
||||
flags |= FLAG_PRINT_NUM_LINES;
|
||||
else if ( !strcmp(arg, "--words") )
|
||||
flags |= FLAG_PRINT_NUM_WORDS;
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s: unknown option: %s\n", argv0, arg);
|
||||
usage(stderr, argv0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
compact_arguments(&argc, &argv);
|
||||
|
||||
if ( !flags )
|
||||
flags = DEFAULT_FLAGS;
|
||||
|
||||
if ( flags && flags == 1 << (ffs(flags)-1) && argc <= 2 )
|
||||
flags |= FLAG_PRINT_COMPACT;
|
||||
|
||||
return word_count_files(argc, argv, flags) ? 0 : 1;
|
||||
}
|
Loading…
Reference in a new issue