From 93dd7f36f2066ec52137178ee52052f293e5e743 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 21 Apr 2022 12:11:01 -0400 Subject: libstdc++: Avoid ASCII assumptions in floating_from_chars.cc In starts_with_ci and in __floating_from_chars_hex's inf/nan handling, we were assuming that the letters are contiguous and that 'A' + 32 == 'a' which is true for ASCII but not for other character encodings. This patch fixes starts_with_ci by using a constexpr lookup table that maps uppercase letters to lowercase, and fixes __floating_from_chars_hex by using __from_chars_alnum_to_val. libstdc++-v3/ChangeLog: * include/std/charconv (__from_chars_alnum_to_val_table): Simplify initialization of __lower/__upper_letters. (__from_chars_alnum_to_val): Default the template parameter to false. * src/c++17/floating_from_chars.cc (starts_with_ci): Don't assume the uppercase and lowercase letters are contiguous. (__floating_from_chars_hex): Likewise. --- libstdc++-v3/src/c++17/floating_from_chars.cc | 33 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'libstdc++-v3/src/c++17') diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 0f5183aa9b5..13de1e346ab 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -30,6 +30,7 @@ // Prefer to use std::pmr::string if possible, which requires the cxx11 ABI. #define _GLIBCXX_USE_CXX11_ABI 1 +#include #include #include #include @@ -451,15 +452,33 @@ namespace #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. + // PREFIX is assumed to not contain any uppercase letters. bool starts_with_ci(const char* first, const char* last, string_view prefix) { __glibcxx_requires_valid_range(first, last); - for (char ch : prefix) + // A lookup table that maps uppercase letters to lowercase and + // is otherwise the identity mapping. + static constexpr auto upper_to_lower_table = [] { + constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::array table = {}; + for (unsigned i = 0; i < table.size(); ++i) + table[i] = i; + for (unsigned i = 0; i < 26; ++i) + table[upper_letters[i]] = lower_letters[i]; + return table; + }(); + + if (last - first < static_cast(prefix.length())) + return false; + + for (const unsigned char pch : prefix) { - __glibcxx_assert(ch >= 'a' && ch <= 'z'); - if (first == last || (*first != ch && *first != ch - 32)) + // __glibcxx_assert(pch == upper_to_lower_table[pch]); + const unsigned char ch = *first; + if (ch != pch && upper_to_lower_table[ch] != pch) return false; ++first; } @@ -535,10 +554,8 @@ namespace ++first; break; } - else if ((ch >= '0' && ch <= '9') - || (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '_') + else if (ch == '_' + || __detail::__from_chars_alnum_to_val(ch) < 127) continue; else { @@ -599,7 +616,7 @@ namespace continue; } - int hexit = __detail::__from_chars_alnum_to_val(ch); + int hexit = __detail::__from_chars_alnum_to_val(ch); if (hexit >= 16) break; seen_hexit = true; -- cgit v1.2.3