/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2013.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
du.cpp
Estimate file space usage.
*******************************************************************************/
// TODO: Currently hardlinks count twice!
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
const int FLAG_ALL = 1 << 0;
const int FLAG_SUMMARIZE = 1 << 1;
const int FLAG_SAME_DEVICE = 1 << 2;
const int FLAG_APPARENT_SIZE = 1 << 3;
const int FLAG_TOTAL = 1 << 4;
const int FLAG_HUMAN_READABLE = 1 << 5;
const int FLAG_SI = 1 << 6;
const int FLAG_SEPARATE_DIRS = 1 << 7;
const int FLAG_IS_OPERAND = 1 << 8;
enum symbolic_dereference
{
SYMBOLIC_DEREFERENCE_NONE,
SYMBOLIC_DEREFERENCE_ARGUMENTS,
SYMBOLIC_DEREFERENCE_ALWAYS,
};
static bool string_has_prefix(const char* string, const char* prefix)
{
return !strncmp(string, prefix, strlen(prefix));
}
static char* append_to_path(const char* path, const char* elem)
{
size_t path_length = strlen(path);
if ( !path_length )
return errno = EINVAL, (char*) NULL;
size_t elem_length = strlen(elem);
size_t result_length = path_length + elem_length;
const char* extra_slash = "";
if ( path[path_length-1] != '/' )
{
result_length += 1;
extra_slash = "/";
}
char* result = (char*) malloc(sizeof(char) * (result_length+1));
if ( !result )
return NULL;
stpcpy(stpcpy(stpcpy(result, path), extra_slash), elem);
return result;
}
static uintmax_t size_to_blocks(off_t size, uintmax_t block_size)
{
uintmax_t umax_size = (uintmax_t) size;
return umax_size / block_size + (umax_size % block_size ? 1 : 0);
}
static uintmax_t parse_block_size(const char* block_size_str)
{
if ( !block_size_str[0] )
return 0;
const char* endptr;
uintmax_t ret = strtoumax((char*) block_size_str, (char**) &endptr, 0);
if ( !ret && block_size_str[0] == '0' )
return 0;
if ( endptr[0] && endptr[1] && (endptr[1] != 'B' || endptr[2]) )
return 0;
uintmax_t magnitude = 1;
uintmax_t exponent = endptr[0] && endptr[1] == 'B' ? 1000 : 1024;
char prefixes[] = { '\0', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' };
size_t num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
size_t prefix_index;
for ( prefix_index = 0;
*endptr != prefixes[prefix_index] && prefix_index < num_prefixes;
prefix_index++ )
magnitude *= exponent;
if ( prefix_index == num_prefixes )
return 0;
if ( !ret )
ret = 1;
return ret * magnitude;
}
static void print_disk_usage(uintmax_t num_bytes, uintmax_t block_size,
int flags, const char* path)
{
if ( flags & FLAG_HUMAN_READABLE )
{
uintmax_t value = num_bytes;
uintmax_t exponent = flags & FLAG_SI ? 1000 : 1024;
char prefixes[] = { '\0', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' };
size_t num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
size_t prefix_index = 0;
while ( exponent <= value && prefix_index + 1 < num_prefixes)
{
value /= exponent;
prefix_index++;
}
char prefix_str[2] = { prefixes[prefix_index], '\0' };
printf("%ju%s\t%s\n", value, prefix_str, path);
}
else
{
uintmax_t num_blocks = size_to_blocks(num_bytes, block_size);
printf("%ju\t%s\n", num_blocks, path);
}
}
static
bool disk_usage_file_at(int relfd,
const char* relpath,
const char* path,
int flags,
enum symbolic_dereference symbolic_dereference,
uintmax_t block_size,
uintmax_t* total_bytes_ptr,
uintmax_t* num_bytes_ptr,
dev_t expected_dev = 0,
mode_t* result_mode_ptr = NULL)
{
bool flag_all = flags & FLAG_ALL;
bool flag_is_operand = flags & FLAG_IS_OPERAND;
bool flag_same_device = flags & FLAG_SAME_DEVICE;
bool flag_separate_dirs = flags & FLAG_SEPARATE_DIRS;
bool flag_summarize = flags & FLAG_SUMMARIZE;
bool print_if_file = flag_is_operand || (!flag_summarize && flag_all);
bool print_if_dir = (flags & FLAG_IS_OPERAND) || !(flags & FLAG_SUMMARIZE);
bool follow_symlinks =
symbolic_dereference == SYMBOLIC_DEREFERENCE_ALWAYS ||
(flag_is_operand && symbolic_dereference == SYMBOLIC_DEREFERENCE_ARGUMENTS);
int open_flags = O_RDONLY | (!follow_symlinks ? O_NOFOLLOW : 0);
int fd = openat(relfd, relpath, open_flags);
if ( fd < 0 )
{
if ( errno == ELOOP && !follow_symlinks )
{
if ( print_if_file )
print_disk_usage(0, block_size, flags, path);
if ( num_bytes_ptr )
*num_bytes_ptr = 0;
if ( result_mode_ptr )
*result_mode_ptr = S_IFLNK;
return true;
}
return error(0, errno, "cannot access `%s'", path), false;
}
struct stat st;
if ( fstat(fd, &st) != 0 )
{
error(0, errno, "stat: `%s'", path);
close(fd);
return false;
}
if ( result_mode_ptr )
*result_mode_ptr = st.st_mode;
if ( flag_is_operand )
expected_dev = st.st_dev;
else if ( flag_same_device && st.st_dev != expected_dev )
return true;
uintmax_t num_bytes = S_ISBLK(st.st_mode) ?
0 :
flags & FLAG_APPARENT_SIZE ?
(uintmax_t) st.st_size :
st.st_blocks * 512;
if ( !S_ISDIR(st.st_mode) )
{
if ( print_if_file )
print_disk_usage(num_bytes, block_size, flags, path);
if ( num_bytes_ptr )
*num_bytes_ptr = num_bytes;
if ( total_bytes_ptr )
*total_bytes_ptr += num_bytes;
close(fd);
return true;
}
if ( total_bytes_ptr )
*total_bytes_ptr += num_bytes;
DIR* dir = fdopendir(fd);
if ( !dir )
{
error(0, errno, "fdopendir(%i): `%s'", fd, path);
close(fd);
return false;
}
bool success = true;
while ( struct dirent* entry = readdir(dir) )
{
if ( !strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") )
continue;
char* new_path = append_to_path(path, entry->d_name);
if ( !new_path )
{
error(0, errno, "malloc: `%s/%s'", path, entry->d_name);
continue;
}
int new_flags = flags & ~FLAG_IS_OPERAND;
uintmax_t new_num_bytes = 0;
mode_t new_mode = 0;
if ( !disk_usage_file_at(dirfd(dir), entry->d_name, new_path, new_flags,
symbolic_dereference, block_size,
total_bytes_ptr, &new_num_bytes, expected_dev,
&new_mode) )
success = false;
if ( !flag_separate_dirs || !S_ISDIR(new_mode) )
num_bytes += new_num_bytes;
free(new_path);
}
if ( num_bytes_ptr )
*num_bytes_ptr = num_bytes;
#if defined(__sortix__)
if ( derror(dir) && errno != ENOTDIR )
{
error(0, errno, "reading directory `%s'", path);
closedir(dir);
return false;
}
#endif
if ( print_if_dir )
print_disk_usage(num_bytes, block_size, flags, path);
closedir(dir);
return success;
}
static void help(FILE* fp, const char* argv0)
{
fprintf(fp, "Usage: %s [OPTION]... [FILE]...\n", argv0);
fprintf(fp, "Summarize disk usage of each FILE, recursively for directories.\n");
fprintf(fp, "\n");
fprintf(fp, "Mandatory arguments to long options are mandatory for short options too.\n");
fprintf(fp, " -a, --all write counts for all files, not just directories\n");
fprintf(fp, " --apparent-size print apparent sizes, rather than disk usage; although\n");
fprintf(fp, " the apparent size is usually smaller, it may be\n");
fprintf(fp, " larger due to holes in (`sparse') files, internal\n");
fprintf(fp, " fragmentation, indirect blocks, and the like\n");
fprintf(fp, " -B, --block-size=SIZE scale sizes by SIZE before printing them. E.g.,\n");
fprintf(fp, " `-BM' prints sizes in units of 1,048,576 bytes.\n");
fprintf(fp, " See SIZE format below.\n");
fprintf(fp, " -b, --bytes equivalent to `--apparent-size --block-size=1'\n");
fprintf(fp, " -c, --total produce a grand total\n");
fprintf(fp, " -D, --dereference-args dereference only symlinks that are listed on the\n");
fprintf(fp, " command line\n");
fprintf(fp, " -H equivalent to --dereference-args (-D)\n");
fprintf(fp, " -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n");
fprintf(fp, " --si like -h, but use powers of 1000 not 1024\n");
fprintf(fp, " -k like --block-size=1K\n");
fprintf(fp, " -m like --block-size=1M\n");
fprintf(fp, " -L, --dereference dereference all symbolic links\n");
fprintf(fp, " -P, --no-dereference don't follow any symbolic links (this is the default)\n");
fprintf(fp, " -S, --separate-dirs do not include size of subdirectories\n");
fprintf(fp, " -s, --summarize display only a total for each argument\n");
fprintf(fp, " -x, --one-file-system skip directories on different file systems\n");
fprintf(fp, " --help display this help and exit\n");
fprintf(fp, " --version output version information and exit\n");
fprintf(fp, "\n");
fprintf(fp, "Display values are in units of the first available SIZE from --block-size,\n");
fprintf(fp, "and the DU_BLOCK_SIZE, BLOCK_SIZE and BLOCKSIZE environment variables.\n");
fprintf(fp, "Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).\n");
fprintf(fp, "\n");
fprintf(fp, "SIZE may be (or may be an integer optionally followed by) one of following:\n");
fprintf(fp, "KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n");
}
static void version(FILE* fp, const char* argv0)
{
fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR);
fprintf(fp, "License GPLv3+: GNU GPL version 3 or later .\n");
fprintf(fp, "This is free software: you are free to change and redistribute it.\n");
fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n");
}
static void compact_arguments(int* argc, char*** argv)
{
for ( int i = 0; i < *argc; i++ )
{
while ( i < *argc && !(*argv)[i] )
{
for ( int n = i; n < *argc; n++ )
(*argv)[n] = (*argv)[n+1];
(*argc)--;
}
}
}
static
bool disk_usage_files(int argc,
char* argv[],
int flags,
enum symbolic_dereference symbolic_dereference,
uintmax_t block_size)
{
bool success = true;
uintmax_t total_bytes = 0;
if ( argc <= 1 )
{
if ( !disk_usage_file_at(AT_FDCWD, ".", ".", flags | FLAG_IS_OPERAND,
symbolic_dereference, block_size, &total_bytes,
NULL) )
success = false;
}
else for ( int i = 1; i < argc; i++ )
{
const char* path = argv[i];
if ( !disk_usage_file_at(AT_FDCWD, path, path, flags | FLAG_IS_OPERAND,
symbolic_dereference, block_size, &total_bytes,
NULL) )
success = false;
}
if ( flags & FLAG_TOTAL )
print_disk_usage(total_bytes, block_size, flags, "total");
return success;
}
static uintmax_t get_default_block_size()
{
uintmax_t result = 0;
if ( !result && getenv("DU_BLOCK_SIZE") )
result = parse_block_size(getenv("DU_BLOCK_SIZE"));
if ( !result && getenv("BLOCK_SIZE") )
result = parse_block_size(getenv("BLOCK_SIZE"));
if ( !result && getenv("BLOCKSIZE") )
result = parse_block_size(getenv("BLOCKSIZE"));
if ( !result && getenv("POSIXLY_CORRECT") )
result = 512;
if ( !result )
result = 1024;
return result;
}
int main(int argc, char* argv[])
{
int flags = 0;
enum symbolic_dereference symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE;
uintmax_t block_size = get_default_block_size();
const char* argv0 = argv[0];
for ( int i = 1; i < argc; i++ )
{
const char* arg = argv[i];
if ( arg[0] != '-' || !arg[1] )
continue;
argv[i] = NULL;
if ( !strcmp(arg, "--") )
break;
if ( arg[1] != '-' )
{
while ( char c = *++arg ) switch ( c )
{
case 'a': flags |= FLAG_ALL; break;
case 'b': flags |= FLAG_APPARENT_SIZE, block_size = 1; break;
case 'B':
if ( !arg[1] )
goto next_is_block_size_operand;
if ( !(block_size = parse_block_size(arg+1)) )
error(1, 0, "invalid block size `%s'", arg);
arg += strlen(arg)-1;
break;
case 'c': flags |= FLAG_TOTAL; break;
case 'D': symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS; break;
case 'h': flags |= FLAG_HUMAN_READABLE; break;
case 'H': symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS; break;
case 'k': block_size = 1024; break;
case 'L': symbolic_dereference = SYMBOLIC_DEREFERENCE_ALWAYS; break;
case 'm': block_size = 1024*1024; break;
case 'P': symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE; break;
case 's': flags |= FLAG_SUMMARIZE; break;
case 'S': flags |= FLAG_SEPARATE_DIRS; break;
case 'x': flags |= FLAG_SAME_DEVICE; break;
default:
fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c);
help(stderr, argv0);
exit(1);
}
}
else if ( !strcmp(arg, "--help") )
help(stdout, argv0), exit(0);
else if ( !strcmp(arg, "--version") )
version(stdout, argv0), exit(0);
else if ( !strcmp(arg, "--all") )
flags |= FLAG_ALL;
else if ( !strcmp(arg, "--apparent-size") )
flags |= FLAG_APPARENT_SIZE;
else if ( !strcmp(arg, "--block-size") )
{
next_is_block_size_operand:
if ( i + 1 == argc )
error(1, 0, "expected operand after `%s'", arg);
const char* block_size_str = argv[++i];
argv[i] = NULL;
if ( !(block_size = parse_block_size(block_size_str)) )
error(1, 0, "invalid block size `%s'", block_size_str);
}
else if ( string_has_prefix(arg, "--block-size=") )
{
const char* block_size_str = arg + strlen("--block-size=");
if ( !(block_size = parse_block_size(block_size_str)) )
error(1, 0, "invalid block size `%s'", block_size_str);
}
else if ( !strcmp(arg, "--bytes") )
flags |= FLAG_APPARENT_SIZE, block_size = 1;
else if ( !strcmp(arg, "--dereference") )
symbolic_dereference = SYMBOLIC_DEREFERENCE_ALWAYS;
else if ( !strcmp(arg, "--dereference-args") )
symbolic_dereference = SYMBOLIC_DEREFERENCE_ARGUMENTS;
else if ( !strcmp(arg, "--human-readable") )
flags |= FLAG_HUMAN_READABLE;
else if ( !strcmp(arg, "--no-dereference") )
symbolic_dereference = SYMBOLIC_DEREFERENCE_NONE;
else if ( !strcmp(arg, "--one-file-system") )
flags |= FLAG_SAME_DEVICE;
else if ( !strcmp(arg, "--separate-dirs") )
flags |= FLAG_SEPARATE_DIRS;
else if ( !strcmp(arg, "--si") )
flags |= FLAG_HUMAN_READABLE | FLAG_SI;
else if ( !strcmp(arg, "--summarize") )
flags |= FLAG_SUMMARIZE;
else if ( !strcmp(arg, "--total") )
flags |= FLAG_TOTAL;
else
{
fprintf(stderr, "%s: unknown option: %s\n", argv0, arg);
help(stderr, argv0);
exit(1);
}
}
compact_arguments(&argc, &argv);
return disk_usage_files(argc, argv, flags, symbolic_dereference,
block_size) ? 0 : 1;
}