polybar/src/utils/string.cpp

345 lines
9.2 KiB
C++
Raw Normal View History

#include "utils/string.hpp"
#include <algorithm>
2017-01-12 19:28:44 +00:00
#include <iomanip>
2016-11-02 19:22:45 +00:00
#include <sstream>
2016-11-25 12:55:15 +00:00
#include <utility>
2016-11-02 19:22:45 +00:00
2016-11-19 05:22:44 +00:00
POLYBAR_NS
2016-11-02 19:22:45 +00:00
namespace string_util {
/**
* Check if haystack contains needle
*/
bool contains(const string& haystack, const string& needle) {
return haystack.find(needle) != string::npos;
}
bool ends_with(const string& haystack, const string& suffix) {
if (haystack.length() < suffix.length()) {
return false;
}
return haystack.compare(haystack.length() - suffix.length(), suffix.length(), suffix) == 0;
}
/**
* Check if haystack contains needle ignoring case
*/
bool contains_ignore_case(const string& haystack, const string& needle) {
return lower(haystack).find(lower(needle)) != string::npos;
}
2016-11-02 19:22:45 +00:00
/**
* Convert string to uppercase
*/
string upper(const string& s) {
string str(s);
2016-11-25 12:55:15 +00:00
for (auto& c : str) {
c = toupper(c);
}
2016-11-02 19:22:45 +00:00
return str;
}
/**
* Convert string to lowercase
*/
string lower(const string& s) {
string str(s);
2016-11-25 12:55:15 +00:00
for (auto& c : str) {
c = tolower(c);
}
2016-11-02 19:22:45 +00:00
return str;
}
/**
* Test lower case equality
*/
bool compare(const string& s1, const string& s2) {
return lower(s1) == lower(s2);
}
/**
2021-10-04 21:46:38 +00:00
* Replace first occurrence of needle in haystack
2016-11-02 19:22:45 +00:00
*/
string replace(const string& haystack, const string& needle, const string& replacement, size_t start, size_t end) {
2016-11-02 19:22:45 +00:00
string str(haystack);
string::size_type pos;
if (needle != replacement && (pos = str.find(needle, start)) != string::npos) {
2016-11-25 12:55:15 +00:00
if (end == string::npos || pos < end) {
str = str.replace(pos, needle.length(), replacement);
2016-11-25 12:55:15 +00:00
}
}
2016-11-02 19:22:45 +00:00
return str;
}
/**
2021-10-04 21:46:38 +00:00
* Replace all occurrences of needle in haystack
2016-11-02 19:22:45 +00:00
*/
string replace_all(
const string& haystack, const string& needle, const string& replacement, size_t start, size_t end) {
string result{haystack};
string::size_type pos;
while ((pos = result.find(needle, start)) != string::npos && pos < result.length() &&
(end == string::npos || pos + needle.length() <= end)) {
result.replace(pos, needle.length(), replacement);
start = pos + replacement.length();
2016-11-02 19:22:45 +00:00
}
return result;
2016-11-02 19:22:45 +00:00
}
/**
* Replace all consecutive occurrences of needle in haystack
*/
string squeeze(const string& haystack, char needle) {
string result = haystack;
2016-11-25 12:55:15 +00:00
while (result.find({needle, needle}) != string::npos) {
result = replace_all(result, {needle, needle}, {needle});
}
2016-11-02 19:22:45 +00:00
return result;
}
/**
* Remove all occurrences of needle in haystack
*/
string strip(const string& haystack, char needle) {
string str(haystack);
string::size_type pos;
2016-11-25 12:55:15 +00:00
while ((pos = str.find(needle)) != string::npos) {
str.erase(pos, 1);
}
2016-11-02 19:22:45 +00:00
return str;
}
/**
* Remove trailing newline
*/
string strip_trailing_newline(const string& haystack) {
string str(haystack);
2016-11-25 12:55:15 +00:00
if (str[str.length() - 1] == '\n') {
2016-11-02 19:22:45 +00:00
str.erase(str.length() - 1, 1);
2016-11-25 12:55:15 +00:00
}
2016-11-02 19:22:45 +00:00
return str;
}
config_parser: Introduce stricter syntax conventions (#1377) This is the next step to merge #1237 in stages. Currently there are barely any restrictions on how the config can be written. This causes things like config files with DOS line endings to not be parsed properly (#1366) because polybar splits by `\n` and when parsing section headers, it can't deal with the `\r` at the end of the line and thus doesn't recognize any section headers. With this PR we introduce some rules as to what characters are allowed in section names and keys. Note: When talking about spaces I refer to any character for which `isspace()` returns `true`. The rules are as follows: * A section name or a key name cannot contain any spaces as well as any of there characters:`"'=;#[](){}:.$\%` * Spaces at the beginning and end of lines are always ignored when parsing * Comment lines start with `;` or `#` and last for the whole line. The whole line will be ignored by the parser. You cannot start a comment at the end of a line. * Section headers have the following form `[HEADER_NAME]` * Key-value lines look like this: `KEY_NAME{SPACES}={SPACES}VALUE_STRING` where `{SPACES}` represents any number of spaces. `VALUE_STRING` can contain any characters. If it is *surrounded* with double quotes (`"`), those quotes will be removed, this can be used to add spaces to the beginning or end of the value * Empty lines are lines with only spaces in them * If the line has any other form, it is a syntax error This will introduce the following breaking changes because of how underdefined the config syntax was before: * `key = ""` will get treated as an empty string instead of the literal * string `""` * Any section or key name with forbidden characters will now be syntax errors. * Certain strings will be forbidden as section names: `self`, `root`, * `BAR`. Because they have a special meaning inside references and so a * section `[root]` can never be referenced. This replaces the current parser implementation with a new more robust one that will later be expanded to also check for dependency cycles and allow for values that contain references mixed with other strings. This PR also now expands the config paths given over the command line so that `--config=~/.config/polybar/config` resolves properly. Closes #1032 Closes #1694 * config_parser: Add skeleton with tests First step in the config_parser develoment. Only tests functions that are easily testable without many outside dependencies. Integration tests will follow. * config_parser: Implement parse_header * config_parser: Implement get_line_type * feat(string): Add trim functions with predicate Not only trimming based on single character matching but based on a freely specifiable predicate. Will be used to trim all spaces (based on isspace) * config_parser: Implement parse_key * config_parser: Implement parse_line for valid lines * config_parser: Throw exception on invalid lines * config_parser: Remove line_no and file_index from parse_line Cleaner to let the caller catch and fill in the line number and file path * string: Clear up misleading description of trim Before, trim would remove all characters that *didn't* match the predicate and thus the predicate isspace wouldn't work correctly. But because we used the inverse (isnospace_pred) it all worked out, but if the function was used with any other function, it wouldn't have given the desired output * config_parser: Implement parse_file * config_parser: Switch operation to config_parser This changes the way the config is invoked. Now main.cpp creates a config_parser object which then returns the singleton config object from the parse method. Subsequent calls to config::make will return the already created config object as before The config_parser does not yet have all the functionality of the old parser: `inherit` directives are not yet resolved. Other than that all the old functionality is implemented (creating sectionmap and applying include-file) Any sort of dependency detection (except for include-file) are still missing * config: Move xrm initialization to constructor config_parser handles the detection of xrdb references and passes that info to the config object. This finally allows us to delete the config::parse_file function because everything in it has been implemented (except for xrdb detection and file error handling) * refactor(config_parser): Cleanup * config_parser: Set config data after initialization Looks much cleaner this way * config_parser: Expand include-file paths * config_parser: Init xrm if the config uses %{xrdb references * config_parser: Use same type of maps as in old impl Polybar has some weird, not yet fixed, inheriting behaviour and it changes depending on the order in which the config stores its data. Using the same type of maps ensures that the behaviour stays the same. * refactor(config_parser): Clearer invalid name error message * config_parser: Don't allow reserved section names Sections with the names 'self', 'BAR', 'root' could never be referenced because those strings have a special meaning inside references * config_parser: Handle inherit directives This uses the old copy_inherited function, so this still suffers from crashes if there are cyclic dependencies. This also fixes the behaviour where any key that starts with 'inherit' would be treated as an inherit directive * config_parser: Clearer dependency cycle error message * refactor(config_parser): Handle file errors when parsing This removes the need to check if the file exists separately * fix(config): expand config file path Now paths using ~ and environment variables can be used as the config path * fix(config): Properly recognize xrdb references * config_parser: Make messages more informative * doc(config): Improve commenting Comments now describe what the config_parser actually does instead of what it will do. We also now follow the rule that single line comments inside functions should use `//` comments * refactor: Move else on same line as curly braces * fix(config_parser): Don't duplicate paths in `files` * refactor(config_parser): Use else if for clarity * fix(config): Undefined behavior in syntax_error Before the custom what() method produced undefined behavior because the returned string became invalid once the function returned. * refactor(config): descriptive name for useless lines is_valid could easily be confused as meaning syntactically invalid without it being clarified in a comment * refactor(config): Use separate strings instead of key_value Takes just as much space and is much better to read * fix(config_parser): TestCase -> TestSuite and fix macro call Ref: #1644 * config_parser: use const string& in method args * config_parser: Improve comments * config_parser: Incorporate review comments
2019-08-06 17:41:31 +00:00
/**
* Trims all characters that match pred from the left
*/
string ltrim(string value, function<bool(char)> pred) {
value.erase(value.begin(), find_if(value.begin(), value.end(), not1(pred)));
return value;
}
/**
* Trims all characters that match pred from the right
*/
string rtrim(string value, function<bool(char)> pred) {
value.erase(find_if(value.rbegin(), value.rend(), not1(pred)).base(), value.end());
return value;
}
/**
* Trims all characters that match pred from both sides
*/
string trim(string value, function<bool(char)> pred) {
return ltrim(rtrim(move(value), pred), pred);
}
2016-11-02 19:22:45 +00:00
/**
* Remove needle from the start of the string
*/
2016-12-14 10:34:09 +00:00
string ltrim(string&& value, const char& needle) {
if (value.empty()) {
return "";
2016-11-25 12:55:15 +00:00
}
2016-12-14 10:34:09 +00:00
while (*value.begin() == needle) {
value.erase(0, 1);
}
return forward<string>(value);
2016-11-02 19:22:45 +00:00
}
/**
* Remove needle from the end of the string
*/
2016-12-14 10:34:09 +00:00
string rtrim(string&& value, const char& needle) {
if (value.empty()) {
return "";
2016-11-25 12:55:15 +00:00
}
2016-12-14 10:34:09 +00:00
while (*(value.end() - 1) == needle) {
value.erase(value.length() - 1, 1);
}
return forward<string>(value);
2016-11-02 19:22:45 +00:00
}
/**
* Remove needle from the start and end of the string
*/
string trim(string&& value, const char& needle) {
2016-12-14 10:34:09 +00:00
if (value.empty()) {
return "";
}
return rtrim(ltrim(forward<string>(value), needle), needle);
2016-11-02 19:22:45 +00:00
}
/**
* Counts the number of codepoints in a utf8 encoded string.
*/
size_t char_len(const string& value) {
// utf-8 bytes of the form 10xxxxxx are continuation bytes, so we
// simply count the number of bytes not of this form.
//
// 0xc0 = 11000000
// 0x80 = 10000000
return std::count_if(value.begin(), value.end(), [](char c) { return (c & 0xc0) != 0x80; });
}
/**
* Truncates a utf8 string at len number of codepoints. This isn't 100%
* matching the user-perceived character count, but it should be close
* enough and avoids having to pull in something like ICU to count actual
* grapheme clusters.
*/
string utf8_truncate(string&& value, size_t len) {
if (value.empty()) {
return "";
}
// utf-8 bytes of the form 10xxxxxx are continuation bytes, so we
// simply jump forward to bytes not of that form and truncate starting
// at that byte if we've counted too many codepoints
//
// 0xc0 = 11000000
// 0x80 = 10000000
auto it = value.begin();
auto end = value.end();
for (size_t i = 0; i < len; ++i) {
if (it == end)
break;
++it;
it = std::find_if(it, end, [](char c) { return (c & 0xc0) != 0x80; });
}
value.erase(it, end);
return forward<string>(value);
}
2016-11-02 19:22:45 +00:00
/**
* Join all strings in vector into a single string separated by delim
*/
string join(const vector<string>& strs, const string& delim) {
2016-11-02 19:22:45 +00:00
string str;
2016-11-25 12:55:15 +00:00
for (auto& s : strs) {
str += (str.empty() ? "" : delim) + s;
2016-11-25 12:55:15 +00:00
}
2016-11-02 19:22:45 +00:00
return str;
}
/**
* Explode string by delim, ignore empty tokens
2016-11-02 19:22:45 +00:00
*/
vector<string> split(const string& s, char delim) {
std::string::size_type pos = 0;
std::vector<std::string> result;
while ((pos = s.find_first_not_of(delim, pos)) != std::string::npos) {
auto nextpos = s.find_first_of(delim, pos);
result.emplace_back(s.substr(pos, nextpos - pos));
pos = nextpos;
2016-11-25 12:55:15 +00:00
}
return result;
2016-11-02 19:22:45 +00:00
}
/**
* Explode string by delim, include empty tokens
2016-11-02 19:22:45 +00:00
*/
std::vector<std::string> tokenize(const string& str, char delimiters) {
std::vector<std::string> tokens;
std::string::size_type lastPos = 0;
auto pos = str.find_first_of(delimiters, lastPos);
while (pos != std::string::npos && lastPos != std::string::npos) {
tokens.emplace_back(str.substr(lastPos, pos - lastPos));
lastPos = pos + 1;
pos = str.find_first_of(delimiters, lastPos);
}
tokens.emplace_back(str.substr(lastPos, pos - lastPos));
return tokens;
2016-11-02 19:22:45 +00:00
}
/**
* Find the nth occurrence of needle in haystack starting from pos
2016-11-02 19:22:45 +00:00
*/
2016-11-25 12:55:15 +00:00
size_t find_nth(const string& haystack, size_t pos, const string& needle, size_t nth) {
2016-11-02 19:22:45 +00:00
size_t found_pos = haystack.find(needle, pos);
2016-11-25 12:55:15 +00:00
if (1 == nth || string::npos == found_pos) {
2016-11-02 19:22:45 +00:00
return found_pos;
2016-11-25 12:55:15 +00:00
}
2016-11-02 19:22:45 +00:00
return find_nth(haystack, found_pos + 1, needle, nth - 1);
}
/**
2017-01-12 19:28:44 +00:00
* Create a floating point string
*/
2017-01-12 19:28:44 +00:00
string floating_point(double value, size_t precision, bool fixed, const string& locale) {
2017-01-13 10:09:56 +00:00
std::stringstream ss;
2017-01-12 19:28:44 +00:00
ss.imbue(!locale.empty() ? std::locale(locale.c_str()) : std::locale::classic());
ss << std::fixed << std::setprecision(precision) << value;
return fixed ? ss.str() : replace(ss.str(), ".00", "");
}
/**
* Create a MiB filesize string
2017-01-12 19:28:44 +00:00
*/
string filesize_mib(unsigned long long kibibytes, size_t precision, const string& locale) {
return floating_point(kibibytes / 1024.0, precision, true, locale) + " MiB";
}
/**
* Create a GiB filesize string
*/
string filesize_gib(unsigned long long kibibytes, size_t precision, const string& locale) {
return floating_point(kibibytes / 1024.0 / 1024.0, precision, true, locale) + " GiB";
2017-01-12 19:28:44 +00:00
}
/**
* Create a GiB string, if the value in GiB is >= 1.0. Otherwise, create a MiB string.
*/
string filesize_gib_mib(
unsigned long long kibibytes, size_t precision_mib, size_t precision_gib, const string& locale) {
if (kibibytes < 1024 * 1024) {
return filesize_mib(kibibytes, precision_mib, locale);
} else {
return filesize_gib(kibibytes, precision_gib, locale);
}
}
2017-01-12 19:28:44 +00:00
/**
* Create a filesize string by converting given bytes to highest unit possible
*/
2017-03-12 13:49:10 +00:00
string filesize(unsigned long long bytes, size_t precision, bool fixed, const string& locale) {
vector<string> suffixes{"TB", "GB", "MB", "KB"};
string suffix{"B"};
double value = bytes;
while (!suffixes.empty() && value >= 1024.0) {
suffix = suffixes.back();
suffixes.pop_back();
2017-03-12 13:49:10 +00:00
value /= 1024.0;
}
2017-03-12 13:49:10 +00:00
return floating_point(value, precision, fixed, locale) + " " + suffix;
}
2016-11-02 19:22:45 +00:00
/**
* Compute string hash
*/
2016-11-25 12:55:15 +00:00
hash_type hash(const string& src) {
2016-11-02 19:22:45 +00:00
return std::hash<string>()(src);
}
config_parser: Introduce stricter syntax conventions (#1377) This is the next step to merge #1237 in stages. Currently there are barely any restrictions on how the config can be written. This causes things like config files with DOS line endings to not be parsed properly (#1366) because polybar splits by `\n` and when parsing section headers, it can't deal with the `\r` at the end of the line and thus doesn't recognize any section headers. With this PR we introduce some rules as to what characters are allowed in section names and keys. Note: When talking about spaces I refer to any character for which `isspace()` returns `true`. The rules are as follows: * A section name or a key name cannot contain any spaces as well as any of there characters:`"'=;#[](){}:.$\%` * Spaces at the beginning and end of lines are always ignored when parsing * Comment lines start with `;` or `#` and last for the whole line. The whole line will be ignored by the parser. You cannot start a comment at the end of a line. * Section headers have the following form `[HEADER_NAME]` * Key-value lines look like this: `KEY_NAME{SPACES}={SPACES}VALUE_STRING` where `{SPACES}` represents any number of spaces. `VALUE_STRING` can contain any characters. If it is *surrounded* with double quotes (`"`), those quotes will be removed, this can be used to add spaces to the beginning or end of the value * Empty lines are lines with only spaces in them * If the line has any other form, it is a syntax error This will introduce the following breaking changes because of how underdefined the config syntax was before: * `key = ""` will get treated as an empty string instead of the literal * string `""` * Any section or key name with forbidden characters will now be syntax errors. * Certain strings will be forbidden as section names: `self`, `root`, * `BAR`. Because they have a special meaning inside references and so a * section `[root]` can never be referenced. This replaces the current parser implementation with a new more robust one that will later be expanded to also check for dependency cycles and allow for values that contain references mixed with other strings. This PR also now expands the config paths given over the command line so that `--config=~/.config/polybar/config` resolves properly. Closes #1032 Closes #1694 * config_parser: Add skeleton with tests First step in the config_parser develoment. Only tests functions that are easily testable without many outside dependencies. Integration tests will follow. * config_parser: Implement parse_header * config_parser: Implement get_line_type * feat(string): Add trim functions with predicate Not only trimming based on single character matching but based on a freely specifiable predicate. Will be used to trim all spaces (based on isspace) * config_parser: Implement parse_key * config_parser: Implement parse_line for valid lines * config_parser: Throw exception on invalid lines * config_parser: Remove line_no and file_index from parse_line Cleaner to let the caller catch and fill in the line number and file path * string: Clear up misleading description of trim Before, trim would remove all characters that *didn't* match the predicate and thus the predicate isspace wouldn't work correctly. But because we used the inverse (isnospace_pred) it all worked out, but if the function was used with any other function, it wouldn't have given the desired output * config_parser: Implement parse_file * config_parser: Switch operation to config_parser This changes the way the config is invoked. Now main.cpp creates a config_parser object which then returns the singleton config object from the parse method. Subsequent calls to config::make will return the already created config object as before The config_parser does not yet have all the functionality of the old parser: `inherit` directives are not yet resolved. Other than that all the old functionality is implemented (creating sectionmap and applying include-file) Any sort of dependency detection (except for include-file) are still missing * config: Move xrm initialization to constructor config_parser handles the detection of xrdb references and passes that info to the config object. This finally allows us to delete the config::parse_file function because everything in it has been implemented (except for xrdb detection and file error handling) * refactor(config_parser): Cleanup * config_parser: Set config data after initialization Looks much cleaner this way * config_parser: Expand include-file paths * config_parser: Init xrm if the config uses %{xrdb references * config_parser: Use same type of maps as in old impl Polybar has some weird, not yet fixed, inheriting behaviour and it changes depending on the order in which the config stores its data. Using the same type of maps ensures that the behaviour stays the same. * refactor(config_parser): Clearer invalid name error message * config_parser: Don't allow reserved section names Sections with the names 'self', 'BAR', 'root' could never be referenced because those strings have a special meaning inside references * config_parser: Handle inherit directives This uses the old copy_inherited function, so this still suffers from crashes if there are cyclic dependencies. This also fixes the behaviour where any key that starts with 'inherit' would be treated as an inherit directive * config_parser: Clearer dependency cycle error message * refactor(config_parser): Handle file errors when parsing This removes the need to check if the file exists separately * fix(config): expand config file path Now paths using ~ and environment variables can be used as the config path * fix(config): Properly recognize xrdb references * config_parser: Make messages more informative * doc(config): Improve commenting Comments now describe what the config_parser actually does instead of what it will do. We also now follow the rule that single line comments inside functions should use `//` comments * refactor: Move else on same line as curly braces * fix(config_parser): Don't duplicate paths in `files` * refactor(config_parser): Use else if for clarity * fix(config): Undefined behavior in syntax_error Before the custom what() method produced undefined behavior because the returned string became invalid once the function returned. * refactor(config): descriptive name for useless lines is_valid could easily be confused as meaning syntactically invalid without it being clarified in a comment * refactor(config): Use separate strings instead of key_value Takes just as much space and is much better to read * fix(config_parser): TestCase -> TestSuite and fix macro call Ref: #1644 * config_parser: use const string& in method args * config_parser: Improve comments * config_parser: Incorporate review comments
2019-08-06 17:41:31 +00:00
} // namespace string_util
2016-11-02 19:22:45 +00:00
2016-11-19 05:22:44 +00:00
POLYBAR_NS_END