diff options
author | Jonathan Wakely <jwakely@redhat.com> | 2019-06-17 15:19:04 +0100 |
---|---|---|
committer | Jonathan Wakely <redi@gcc.gnu.org> | 2019-06-17 15:19:04 +0100 |
commit | 26b1320ee5e2e9e107e092162d1c82b682504534 (patch) | |
tree | 4a56946748990bfb63e818d0c143f2b10a1c2018 /libstdc++-v3/testsuite/27_io/filesystem | |
parent | 41d93b16cac5b348b7883e259f7880b4a0addc23 (diff) |
PR libstdc++/90281 Fix string conversions for filesystem::path
Fix several bugs in the encoding conversions for filesystem::path that
prevent conversion of Unicode characters outside the Basic Multilingual
Plane, and prevent returning basic_string specializations with
alternative allocator types.
The std::codecvt_utf8 class template is not suitable for UTF-16
conversions because it uses UCS-2 instead. For conversions between UTF-8
and UTF-16 either std::codecvt<C, char, mbstate> or
codecvt_utf8_utf16<C> must be used.
The __str_codecvt_in and __str_codecvt_out utilities do not
return false on a partial conversion (e.g. for invalid or incomplete
Unicode input). Add new helpers that treat partial conversions as
errors, and use them for all filesystem::path conversions.
PR libstdc++/90281 Fix string conversions for filesystem::path
* include/bits/fs_path.h (u8path) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]:
Use codecvt_utf8_utf16 instead of codecvt_utf8. Use
__str_codecvt_in_all to fail for partial conversions and throw on
error.
[!_GLIBCXX_FILESYSTEM_IS_WINDOWS && _GLIBCXX_USE_CHAR8_T]
(path::_Cvt<char8_t>): Add explicit specialization.
[_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Remove
overloads.
[_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
if-constexpr instead of dispatching to _S_wconvert. Use codecvt
instead of codecvt_utf8. Use __str_codecvt_in_all and
__str_codecvt_out_all.
[!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
codecvt instead of codecvt_utf8. Use __str_codecvt_out_all.
(path::_S_str_convert) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values
with allocator. Use __str_codecvt_out_all. Fallthrough to POSIX code
after converting to UTF-8.
(path::_S_str_convert): Use codecvt instead of codecvt_utf8. Use
__str_codecvt_in_all.
(path::string): Fix initialization of string types with different
allocators.
(path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all.
* include/bits/locale_conv.h (__do_str_codecvt): Reorder static and
runtime conditions.
(__str_codecvt_out_all, __str_codecvt_in_all): New functions that
return false for partial conversions.
* include/experimental/bits/fs_path.h (u8path):
[_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Implement correctly for mingw.
[_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Add
missing handling for char8_t. Use codecvt and codecvt_utf8_utf16
instead of codecvt_utf8. Use __str_codecvt_in_all and
__str_codecvt_out_all.
[!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
codecvt instead of codecvt_utf8. Use __str_codecvt_out_all.
(path::string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values
with allocator. Use __str_codecvt_out_all and __str_codecvt_in_all.
(path::string) [!_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
__str_codecvt_in_all.
(path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all.
* src/c++17/fs_path.cc (path::_S_convert_loc): Use
__str_codecvt_in_all.
* src/filesystem/path.cc (path::_S_convert_loc): Likewise.
* testsuite/27_io/filesystem/path/construct/90281.cc: New test.
* testsuite/27_io/filesystem/path/factory/u8path.cc: New test.
* testsuite/27_io/filesystem/path/native/string.cc: Test with empty
strings and with Unicode characters outside the basic multilingual
plane.
* testsuite/27_io/filesystem/path/native/alloc.cc: New test.
* testsuite/experimental/filesystem/path/construct/90281.cc: New test.
* testsuite/experimental/filesystem/path/factory/u8path.cc: New test.
* testsuite/experimental/filesystem/path/native/alloc.cc: New test.
* testsuite/experimental/filesystem/path/native/string.cc: Test with
empty strings and with Unicode characters outside the basic
multilingual plane.
From-SVN: r272385
Diffstat (limited to 'libstdc++-v3/testsuite/27_io/filesystem')
4 files changed, 239 insertions, 0 deletions
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc new file mode 100644 index 00000000000..e0d10e56e8c --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc @@ -0,0 +1,53 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include <filesystem> +#include <testsuite_hooks.h> + +namespace fs = std::filesystem; + +template<typename C = fs::path::value_type> +const C* code_units() +{ + if constexpr (std::is_same_v<C, char>) + return "\xf0\x9d\x84\x9e"; + else + return L"\xD834\xDD1E"; +} + +// PR libstdc++/90281 +void +test01() +{ + const fs::path::string_type expected = code_units(); + + fs::path p8 = fs::u8path(u8"\U0001D11E"); + VERIFY( p8.native() == expected ); + fs::path p16(u"\U0001D11E"); + VERIFY( p16.native() == expected ); + fs::path p32(U"\U0001D11E"); + VERIFY( p32.native() == expected ); +} + +int +main() +{ + test01(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc new file mode 100644 index 00000000000..aff722b5867 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc @@ -0,0 +1,67 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include <filesystem> +#include <testsuite_hooks.h> + +namespace fs = std::filesystem; + +void +test01() +{ + fs::path p = fs::u8path(""); + VERIFY( p.empty() ); + + p = fs::u8path("filename"); + VERIFY( p.u8string() == u8"filename" ); + + p = fs::u8path("\xf0\x9d\x84\x9e"); + VERIFY( p.u8string() == u8"\U0001D11E" ); +} + +void +test02() +{ + // These calls to u8path are undefined, because they fail to meet the + // requirement that the input is valid UTF-8 data. For Windows u8path + // will fail. For POSIX constructing an invalid path appears to work, + // but will fail when converted to a different encoding. + + try { + auto p = fs::u8path("\xf0\x9d"); // incomplete surrogate pair + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } + + try { + auto p = fs::u8path("\xf0"); // incomplete multibyte character + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc new file mode 100644 index 00000000000..bdb52a20e14 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc @@ -0,0 +1,92 @@ +// Copyright (C) 2016-2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include <filesystem> +#include <string> +#include <testsuite_hooks.h> +#include <testsuite_allocator.h> + +template<typename C> + using alloc = __gnu_test::uneq_allocator<C>; + +void +test01() +{ + using namespace std::filesystem; + path p; + + auto str = p.string<char>(alloc<char>(1)); + VERIFY( str == "" ); + VERIFY( str.get_allocator() == alloc<char>(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string<char8_t>(alloc<char8_t>(1)); + VERIFY( str8 == u8"" ); + VERIFY( str8.get_allocator() == alloc<char8_t>(1) ); +#endif + + auto strw = p.string<wchar_t>(alloc<wchar_t>(2)); + VERIFY( strw == L"" ); + VERIFY( strw.get_allocator() == alloc<wchar_t>(2) ); + + auto str16 = p.string<char16_t>(alloc<char16_t>(3)); + VERIFY( str16 == u"" ); + VERIFY( str16.get_allocator() == alloc<char16_t>(3) ); + + auto str32 = p.string<char32_t>(alloc<char32_t>(4)); + VERIFY( str32 == U"" ); + VERIFY( str32.get_allocator() == alloc<char32_t>(4) ); +} + +void +test02() +{ + using namespace std::filesystem; + path p = "abcdefghijklmnopqrstuvwxyz"; + + auto str = p.string<char>(alloc<char>(1)); + VERIFY( str == "abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str.get_allocator() == alloc<char>(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string<char8_t>(alloc<char8_t>(1)); + VERIFY( str8 == u8"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str8.get_allocator() == alloc<char8_t>(1) ); +#endif + + auto strw = p.string<wchar_t>(alloc<wchar_t>(2)); + VERIFY( strw == L"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( strw.get_allocator() == alloc<wchar_t>(2) ); + + auto str16 = p.string<char16_t>(alloc<char16_t>(3)); + VERIFY( str16 == u"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str16.get_allocator() == alloc<char16_t>(3) ); + + auto str32 = p.string<char32_t>(alloc<char32_t>(4)); + VERIFY( str32 == U"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str32.get_allocator() == alloc<char32_t>(4) ); +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc index 4d45c7e15df..2ed58e379ef 100644 --- a/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc @@ -62,9 +62,36 @@ test02() VERIFY( str32 == p.u32string() ); } +void +test03() +{ + std::filesystem::path p; + auto str8 = p.u8string(); + VERIFY( str8 == u8"" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"" ); +} + +void +test04() +{ + // PR libstdc++/90281 + auto p = std::filesystem::u8path("\xf0\x9d\x84\x9e"); + auto str8 = p.u8string(); + VERIFY( str8 == u8"\U0001D11E" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"\U0001D11E" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"\U0001D11E" ); +} + int main() { test01(); test02(); + test03(); + test04(); } |