diff --git a/include/cairo/context.hpp b/include/cairo/context.hpp index b0589d0f..0434e6f5 100644 --- a/include/cairo/context.hpp +++ b/include/cairo/context.hpp @@ -166,9 +166,10 @@ namespace cairo { string utf8 = t.contents; string_util::unicode_charlist chars; - bool success = string_util::utf8_to_ucs4(utf8, chars); + bool valid = string_util::utf8_to_ucs4(utf8, chars); - if (!success) { + // The conversion already removed any invalid chunks. We should probably log a warning though. + if (!valid) { sstream hex; hex << std::hex << std::setw(2) << std::setfill('0'); diff --git a/include/utils/string.hpp b/include/utils/string.hpp index 7d0a3dc5..45eaec06 100644 --- a/include/utils/string.hpp +++ b/include/utils/string.hpp @@ -84,14 +84,7 @@ string trim(string&& value, const char& needle = ' '); size_t char_len(const string& value); string utf8_truncate(string&& value, size_t len); -/** - * @brief Create a UCS-4 codepoint from a utf-8 encoded string - */ [[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list); - -/** - * @brief Convert a UCS-4 codepoint to a utf-8 encoded string - */ size_t ucs4_to_utf8(std::array& utf8, unsigned int ucs); string join(const vector& strs, const string& delim); diff --git a/tests/unit_tests/utils/string.cpp b/tests/unit_tests/utils/string.cpp index e95be7c4..de7a1825 100644 --- a/tests/unit_tests/utils/string.cpp +++ b/tests/unit_tests/utils/string.cpp @@ -188,8 +188,8 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) { string_util::unicode_charlist result_list{}; string str = GetParam(); - bool success = string_util::utf8_to_ucs4(str, result_list); - ASSERT_TRUE(success); + bool valid = string_util::utf8_to_ucs4(str, result_list); + ASSERT_TRUE(valid); ASSERT_EQ(str.size(), result_list.size()); @@ -206,18 +206,20 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) { } } +// String containing a single codepoint and the expected numerical codepoint using single_test_t = std::pair; class Utf8ToUCS4SingleTest : public testing::TestWithParam {}; const vector utf8_to_ucs4_single_list = { - {" ", 0x20}, {"\u007f", 0x7f}, // End of 1 byte range - {"\u0080", 0x80}, // Start of 2 byte range - {"\u07ff", 0x7ff}, // End of 2 byte range - {"\u0800", 0x800}, // Start of 3 byte range - {"\uffff", 0xffff}, // End of 3 byte range - {"\U00010000", 0x10000}, // Start of 4 byte range - {"\U0010ffff", 0x10ffff}, // End of 4 byte range - {"\U0001f600", 0x1f600}, // Grinning face emoji + {" ", 0x20}, // Single ASCII character + {"\u007f", 0x7f}, // End of 1 byte range + {"\u0080", 0x80}, // Start of 2 byte range + {"\u07ff", 0x7ff}, // End of 2 byte range + {"\u0800", 0x800}, // Start of 3 byte range + {"\uffff", 0xffff}, // End of 3 byte range + {"\U00010000", 0x10000}, // Start of 4 byte range + {"\U0010ffff", 0x10ffff}, // End of 4 byte range + {"\U0001f600", 0x1f600}, // Grinning face emoji }; INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list)); @@ -229,8 +231,8 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) { string_util::unicode_charlist result_list{}; const auto [str, codepoint] = GetParam(); - bool success = string_util::utf8_to_ucs4(str, result_list); - ASSERT_TRUE(success); + bool valid = string_util::utf8_to_ucs4(str, result_list); + ASSERT_TRUE(valid); ASSERT_EQ(1, result_list.size()); @@ -262,8 +264,8 @@ INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ TEST_P(Utf8ToUCS4InvalidTest, correctness) { string_util::unicode_charlist result_list{}; const auto str = GetParam(); - bool success = string_util::utf8_to_ucs4(str, result_list); - EXPECT_FALSE(success); + bool valid = string_util::utf8_to_ucs4(str, result_list); + EXPECT_FALSE(valid); EXPECT_EQ(0, result_list.size()); } @@ -273,8 +275,8 @@ TEST_P(Utf8ToUCS4InvalidTest, correctness) { TEST(String, utf8ToUCS4Partial) { string_util::unicode_charlist result_list{}; string str = "\xe0\x70\x80"; // a valid ascii character between two invalid characters - bool success = string_util::utf8_to_ucs4(str, result_list); - EXPECT_FALSE(success); + bool valid = string_util::utf8_to_ucs4(str, result_list); + EXPECT_FALSE(valid); EXPECT_EQ(1, result_list.size()); EXPECT_EQ(0x70, result_list[0].codepoint);