From 0738e9bf3c9b055a3055205e337a821152579a89 Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Thu, 17 Oct 2013 20:55:54 +0200 Subject: [PATCH] Add du(1). --- utils/.gitignore | 1 + utils/Makefile | 1 + utils/du.cpp | 487 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 489 insertions(+) create mode 100644 utils/du.cpp diff --git a/utils/.gitignore b/utils/.gitignore index 6ace2e45..ffd29a92 100644 --- a/utils/.gitignore +++ b/utils/.gitignore @@ -8,6 +8,7 @@ colormake column cp date +du echo editor find diff --git a/utils/Makefile b/utils/Makefile index 8c394648..4771f322 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -24,6 +24,7 @@ colormake \ column \ cp \ date \ +du \ echo \ editor \ find \ diff --git a/utils/du.cpp b/utils/du.cpp new file mode 100644 index 00000000..226e2d04 --- /dev/null +++ b/utils/du.cpp @@ -0,0 +1,487 @@ +/******************************************************************************* + + Copyright(C) Jonas 'Sortie' Termansen 2013. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + du.cpp + Estimate file space usage. + +*******************************************************************************/ + +// TODO: Currently hardlinks count twice! + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(VERSIONSTR) +#define VERSIONSTR "unknown version" +#endif + +const int FLAG_ALL = 1 << 0; +const int FLAG_SUMMARIZE = 1 << 1; +const int FLAG_SAME_DEVICE = 1 << 2; +const int FLAG_APPARENT_SIZE = 1 << 3; +const int FLAG_TOTAL = 1 << 4; +const int FLAG_HUMAN_READABLE = 1 << 5; +const int FLAG_SI = 1 << 6; +const int FLAG_SEPARATE_DIRS = 1 << 7; +const int FLAG_IS_OPERAND = 1 << 8; + +enum symbolic_dereference +{ + SYMBOLIC_DEREFERENCE_NONE, + SYMBOLIC_DEREFERENCE_ARGUMENTS, + SYMBOLIC_DEREFERENCE_ALWAYS, +}; + +static bool string_has_prefix(const char* string, const char* prefix) +{ + return !strncmp(string, prefix, strlen(prefix)); +} + +static char* append_to_path(const char* path, const char* elem) +{ + size_t path_length = strlen(path); + if ( !path_length ) + return errno = EINVAL, (char*) NULL; + size_t elem_length = strlen(elem); + size_t result_length = path_length + elem_length; + const char* extra_slash = ""; + if ( path[path_length-1] != '/' ) + { + result_length += 1; + extra_slash = "/"; + } + char* result = (char*) malloc(sizeof(char) * (result_length+1)); + if ( !result ) + return NULL; + stpcpy(stpcpy(stpcpy(result, path), extra_slash), elem); + return result; +} + +static uintmax_t size_to_blocks(off_t size, uintmax_t block_size) +{ + uintmax_t umax_size = (uintmax_t) size; + return umax_size / block_size + (umax_size % block_size ? 1 : 0); +} + +static uintmax_t parse_block_size(const char* block_size_str) +{ + if ( !block_size_str[0] ) + return 0; + const char* endptr; + uintmax_t ret = strtoumax((char*) block_size_str, (char**) &endptr, 0); + if ( !ret && block_size_str[0] == '0' ) + return 0; + if ( endptr[0] && endptr[1] && (endptr[1] != 'B' || endptr[2]) ) + return 0; + uintmax_t magnitude = 1; + uintmax_t exponent = endptr[0] && endptr[1] == 'B' ? 1000 : 1024; + char prefixes[] = { '\0', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' }; + size_t num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); + size_t prefix_index; + for ( prefix_index = 0; + *endptr != prefixes[prefix_index] && prefix_index < num_prefixes; + prefix_index++ ) + magnitude *= exponent; + if ( prefix_index == num_prefixes ) + return 0; + if ( !ret ) + ret = 1; + return ret * magnitude; +} + +static void print_disk_usage(uintmax_t num_bytes, uintmax_t block_size, + int flags, const char* path) +{ + if ( flags & FLAG_HUMAN_READABLE ) + { + uintmax_t value = num_bytes; + uintmax_t exponent = flags & FLAG_SI ? 1000 : 1024; + char prefixes[] = { '\0', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' }; + size_t num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); + size_t prefix_index = 0; + while ( exponent <= value && prefix_index + 1 < num_prefixes) + { + value /= exponent; + prefix_index++; + } + char prefix_str[2] = { prefixes[prefix_index], '\0' }; + printf("%ju%s\t%s\n", value, prefix_str, path); + } + else + { + uintmax_t num_blocks = size_to_blocks(num_bytes, block_size); + printf("%ju\t%s\n", num_blocks, path); + } +} + +static +bool disk_usage_file_at(int relfd, + const char* relpath, + const char* path, + int flags, + enum symbolic_dereference symbolic_dereference, + uintmax_t block_size, + uintmax_t* total_bytes_ptr, + uintmax_t* num_bytes_ptr, + dev_t expected_dev = 0, + mode_t* result_mode_ptr = NULL) +{ + bool flag_all = flags & FLAG_ALL; + bool flag_is_operand = flags & FLAG_IS_OPERAND; + bool flag_same_device = flags & FLAG_SAME_DEVICE; + bool flag_separate_dirs = flags & FLAG_SEPARATE_DIRS; + bool flag_summarize = flags & FLAG_SUMMARIZE; + + bool print_if_file = flag_is_operand || (!flag_summarize && flag_all); + bool print_if_dir = (flags & FLAG_IS_OPERAND) || !(flags & FLAG_SUMMARIZE); + + bool follow_symlinks = + symbolic_dereference == SYMBOLIC_DEREFERENCE_ALWAYS || + (flag_is_operand && symbolic_dereference == SYMBOLIC_DEREFERENCE_ARGUMENTS); + + int open_flags = O_RDONLY | (!follow_symlinks ? O_NOFOLLOW : 0); + int fd = openat(relfd, relpath, open_flags); + if ( fd < 0 ) + { + if ( errno == ELOOP && !follow_symlinks ) + { + if ( print_if_file ) + print_disk_usage(0, block_size, flags, path); + if ( num_bytes_ptr ) + *num_bytes_ptr = 0; + if ( result_mode_ptr ) + *result_mode_ptr = S_IFLNK; + return true; + } + return error(0, errno, "cannot access `%s'", path), false; + } + + struct stat st; + if ( fstat(fd, &st) != 0 ) + { + error(0, errno, "stat: `%s'", path); + close(fd); + return false; + } + + if ( result_mode_ptr ) + *result_mode_ptr = st.st_mode; + + if ( flag_is_operand ) + expected_dev = st.st_dev; + else if ( flag_same_device && st.st_dev != expected_dev ) + return true; + + uintmax_t num_bytes = flags & FLAG_APPARENT_SIZE ? + (uintmax_t) st.st_size : + st.st_blocks * 512; + + if ( !S_ISDIR(st.st_mode) ) + { + if ( print_if_file ) + print_disk_usage(num_bytes, block_size, flags, path); + if ( num_bytes_ptr ) + *num_bytes_ptr = num_bytes; + if ( total_bytes_ptr ) + *total_bytes_ptr += num_bytes; + close(fd); + return true; + } + + if ( total_bytes_ptr ) + *total_bytes_ptr += num_bytes; + + DIR* dir = fdopendir(fd); + if ( !dir ) + { + error(0, errno, "fdopendir(%i): `%s'", fd, path); + close(fd); + return false; + } + + bool success = true; + while ( struct dirent* entry = readdir(dir) ) + { + if ( !strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") ) + continue; + char* new_path = append_to_path(path, entry->d_name); + if ( !new_path ) + { + error(0, errno, "malloc: `%s/%s'", path, entry->d_name); + continue; + } + int new_flags = flags & ~FLAG_IS_OPERAND; + uintmax_t new_num_bytes = 0; + mode_t new_mode = 0; + if ( !disk_usage_file_at(dirfd(dir), entry->d_name, new_path, new_flags, + symbolic_dereference, block_size, + total_bytes_ptr, &new_num_bytes, expected_dev, + &new_mode) ) + success = false; + if ( !flag_separate_dirs || !S_ISDIR(new_mode) ) + num_bytes += new_num_bytes; + free(new_path); + } + + if ( num_bytes_ptr ) + *num_bytes_ptr = num_bytes; + +#if defined(__sortix__) + if ( derror(dir) ) + { + error(0, errno, "reading directory `%s'", path); + closedir(dir); + return false; + } +#endif + + if ( print_if_dir ) + print_disk_usage(num_bytes, block_size, flags, path); + + closedir(dir); + + return success; +} + +static void usage(FILE* fp, const char* argv0) +{ + fprintf(fp, "Usage: %s [OPTION]... [FILE]...\n", argv0); + fprintf(fp, "Summarize disk usage of each FILE, recursively for directories.\n"); + fprintf(fp, "\n"); + fprintf(fp, "Mandatory arguments to long options are mandatory for short options too.\n"); + fprintf(fp, " -a, --all write counts for all files, not just directories\n"); + fprintf(fp, " --apparent-size print apparent sizes, rather than disk usage; although\n"); + fprintf(fp, " the apparent size is usually smaller, it may be\n"); + fprintf(fp, " larger due to holes in (`sparse') files, internal\n"); + fprintf(fp, " fragmentation, indirect blocks, and the like\n"); + fprintf(fp, " -B, --block-size=SIZE scale sizes by SIZE before printing them. E.g.,\n"); + fprintf(fp, " `-BM' prints sizes in units of 1,048,576 bytes.\n"); + fprintf(fp, " See SIZE format below.\n"); + fprintf(fp, " -b, --bytes equivalent to `--apparent-size --block-size=1'\n"); + fprintf(fp, " -c, --total produce a grand total\n"); + fprintf(fp, " -D, --dereference-args dereference only symlinks that are listed on the\n"); + fprintf(fp, " command line\n"); + fprintf(fp, " -H equivalent to --dereference-args (-D)\n"); + fprintf(fp, " -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n"); + fprintf(fp, " --si like -h, but use powers of 1000 not 1024\n"); + fprintf(fp, " -k like --block-size=1K\n"); + fprintf(fp, " -m like --block-size=1M\n"); + fprintf(fp, " -L, --dereference dereference all symbolic links\n"); + fprintf(fp, " -P, --no-dereference don't follow any symbolic links (this is the default)\n"); + fprintf(fp, " -S, --separate-dirs do not include size of subdirectories\n"); + fprintf(fp, " -s, --summarize display only a total for each argument\n"); + fprintf(fp, " -x, --one-file-system skip directories on different file systems\n"); + fprintf(fp, " --help display this help and exit\n"); + fprintf(fp, " --usage display this help and exit\n"); + fprintf(fp, " --version output version information and exit\n"); + fprintf(fp, "\n"); + fprintf(fp, "Display values are in units of the first available SIZE from --block-size,\n"); + fprintf(fp, "and the DU_BLOCK_SIZE, BLOCK_SIZE and BLOCKSIZE environment variables.\n"); + fprintf(fp, "Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).\n"); + fprintf(fp, "\n"); + fprintf(fp, "SIZE may be (or may be an integer optionally followed by) one of following:\n"); + fprintf(fp, "KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n"); +} + +static void help(FILE* fp, const char* argv0) +{ + usage(fp, argv0); +} + +static void version(FILE* fp, const char* argv0) +{ + fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR); + fprintf(fp, "License GPLv3+: GNU GPL version 3 or later .\n"); + fprintf(fp, "This is free software: you are free to change and redistribute it.\n"); + fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n"); +} + +static void compact_arguments(int* argc, char*** argv) +{ + for ( int i = 0; i < *argc; i++ ) + while ( i < *argc && !(*argv)[i] ) + { + for ( int n = i; n < *argc; n++ ) + (*argv)[n] = (*argv)[n+1]; + (*argc)--; + } +} + +static +bool disk_usage_files(int argc, + char* argv[], + int flags, + enum symbolic_dereference symbolic_dereference, + uintmax_t block_size) +{ + bool success = true; + + uintmax_t total_bytes = 0; + + if ( argc <= 1 ) + { + if ( !disk_usage_file_at(AT_FDCWD, ".", ".", flags | FLAG_IS_OPERAND, + symbolic_dereference, block_size, &total_bytes, + NULL) ) + success = false; + } + else for ( int i = 1; i < argc; i++ ) + { + const char* path = argv[i]; + if ( !disk_usage_file_at(AT_FDCWD, path, path, flags | FLAG_IS_OPERAND, + symbolic_dereference, block_size, &total_bytes, + NULL) ) + success = false; + } + + if ( flags & FLAG_TOTAL ) + print_disk_usage(total_bytes, block_size, flags, "total"); + + return success; +} + +static uintmax_t get_default_block_size() +{ + uintmax_t result = 0; + if ( !result && getenv("DU_BLOCK_SIZE") ) + result = parse_block_size(getenv("DU_BLOCK_SIZE")); + if ( !result && getenv("BLOCK_SIZE") ) + result = parse_block_size(getenv("BLOCK_SIZE")); + if ( !result && getenv("BLOCKSIZE") ) + result = parse_block_size(getenv("BLOCKSIZE")); + if ( !result && getenv("POSIXLY_CORRECT") ) + result = 512; + if ( !result ) + result = 1024; + return result; +} + +int main(int argc, char* argv[]) +{ + int flags = 0; + enum symbolic_dereference symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE; + uintmax_t block_size = get_default_block_size(); + + const char* argv0 = argv[0]; + for ( int i = 0; i < argc; i++ ) + { + const char* arg = argv[i]; + if ( arg[0] != '-' || !arg[1] ) + continue; + argv[i] = NULL; + if ( !strcmp(arg, "--") ) + break; + if ( arg[1] != '-' ) + { + while ( char c = *++arg ) switch ( c ) + { + case 'a': flags |= FLAG_ALL; break; + case 'b': flags |= FLAG_APPARENT_SIZE, block_size = 1; break; + case 'B': + if ( !arg[1] ) + goto next_is_block_size_operand; + if ( !(block_size = parse_block_size(arg+1)) ) + error(1, 0, "invalid block size `%s'", arg); + arg += strlen(arg)-1; + break; + case 'c': flags |= FLAG_TOTAL; break; + case 'D': symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS; break; + case 'h': flags |= FLAG_HUMAN_READABLE; break; + case 'H': symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS; break; + case 'k': block_size = 1024; break; + case 'L': symbolic_dereference = SYMBOLIC_DEREFERENCE_ALWAYS; break; + case 'm': block_size = 1024*1024; break; + case 'P': symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE; break; + case 's': flags |= FLAG_SUMMARIZE; break; + case 'S': flags |= FLAG_SEPARATE_DIRS; break; + case 'x': flags |= FLAG_SAME_DEVICE; break; + default: + fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c); + usage(stderr, argv0); + exit(1); + } + } + else if ( !strcmp(arg, "--help") ) + help(stdout, argv0), exit(0); + else if ( !strcmp(arg, "--usage") ) + usage(stdout, argv0), exit(0); + else if ( !strcmp(arg, "--version") ) + version(stdout, argv0), exit(0); + else if ( !strcmp(arg, "--all") ) + flags |= FLAG_ALL; + else if ( !strcmp(arg, "--apparent-size") ) + flags |= FLAG_APPARENT_SIZE; + else if ( !strcmp(arg, "--block-size") ) + { + next_is_block_size_operand: + if ( i + 1 == argc ) + error(1, 0, "expected operand after `%s'", arg); + const char* block_size_str = argv[++i]; + argv[i] = NULL; + if ( !(block_size = parse_block_size(block_size_str)) ) + error(1, 0, "invalid block size `%s'", block_size_str); + } + else if ( string_has_prefix(arg, "--block-size=") ) + { + const char* block_size_str = arg + strlen("--block-size="); + if ( !(block_size = parse_block_size(block_size_str)) ) + error(1, 0, "invalid block size `%s'", block_size_str); + } + else if ( !strcmp(arg, "--bytes") ) + flags |= FLAG_APPARENT_SIZE, block_size = 1; + else if ( !strcmp(arg, "--dereference") ) + symbolic_dereference = SYMBOLIC_DEREFERENCE_ALWAYS; + else if ( !strcmp(arg, "--dereference-args") ) + symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS; + else if ( !strcmp(arg, "--human-readable") ) + flags |= FLAG_HUMAN_READABLE; + else if ( !strcmp(arg, "--no-dereference") ) + symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE; + else if ( !strcmp(arg, "--one-file-system") ) + flags |= FLAG_SAME_DEVICE; + else if ( !strcmp(arg, "--separate-dirs") ) + flags |= FLAG_SEPARATE_DIRS; + else if ( !strcmp(arg, "--si") ) + flags |= FLAG_HUMAN_READABLE | FLAG_SI; + else if ( !strcmp(arg, "--summarize") ) + flags |= FLAG_SUMMARIZE; + else if ( !strcmp(arg, "--total") ) + flags |= FLAG_TOTAL; + else + { + fprintf(stderr, "%s: unknown option: %s\n", argv0, arg); + usage(stderr, argv0); + exit(1); + } + } + + compact_arguments(&argc, &argv); + + return disk_usage_files(argc, argv, flags, symbolic_dereference, + block_size) ? 0 : 1; +}