From 2d694414ada8e3b58f504c1b175d31088529632e Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 4 Apr 2024 10:33:33 +0100 Subject: [PATCH] libstdc++: Fix infinite loop in std::istream::ignore(n, delim) [PR93672] A negative delim value passed to std::istream::ignore can never match any character in the stream, because the comparison is done using traits_type::eq_int_type(sb->sgetc(), delim) and sgetc() never returns negative values (except at EOF). The optimized version of ignore for the std::istream specialization uses traits_type::find to locate the delim character in the streambuf, which _can_ match a negative delim on platforms where char is signed, but then we do another comparison using eq_int_type which fails. The code then keeps looping forever, with traits_type::find locating the character and traits_type::eq_int_type saying it's not a match, so traits_type::find is used again and finds the same character again. A possible fix would be to check with eq_int_type after a successful find, to see whether we really have a match. However, that would be suboptimal since we know that a negative delimiter will never match using eq_int_type. So a better fix is to adjust the check at the top of the function that handles delim==eof(), so that we treat all negative delim values as equivalent to EOF. That way we don't bother using find to search for something that will never match with eq_int_type. The version of ignore in the primary template doesn't need a change, because it doesn't use traits_type::find, instead characters are extracted one-by-one and always matched using eq_int_type. That avoids the inconsistency between find and eq_int_type. The specialization for std::wistream does use traits_type::find, but traits_type::to_int_type is equivalent to an implicit conversion from wchar_t to wint_t, so passing a wchar_t directly to ignore without using to_int_type works. libstdc++-v3/ChangeLog: PR libstdc++/93672 * src/c++98/istream.cc (istream::ignore(streamsize, int_type)): Treat all negative delimiter values as eof(). * testsuite/27_io/basic_istream/ignore/char/93672.cc: New test. * testsuite/27_io/basic_istream/ignore/wchar_t/93672.cc: New test. --- libstdc++-v3/src/c++98/istream.cc | 13 ++- .../27_io/basic_istream/ignore/char/93672.cc | 101 ++++++++++++++++++ .../basic_istream/ignore/wchar_t/93672.cc | 34 ++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/93672.cc create mode 100644 libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/93672.cc diff --git a/libstdc++-v3/src/c++98/istream.cc b/libstdc++-v3/src/c++98/istream.cc index 07ac739c26a..d1b4444ff2b 100644 --- a/libstdc++-v3/src/c++98/istream.cc +++ b/libstdc++-v3/src/c++98/istream.cc @@ -112,8 +112,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION basic_istream:: ignore(streamsize __n, int_type __delim) { - if (traits_type::eq_int_type(__delim, traits_type::eof())) - return ignore(__n); + { + // If conversion to int_type changes the value then __delim does not + // correspond to a value of type char_type, and so will never match + // a character extracted from the input sequence. Just use ignore(n). + const int_type chk_delim = traits_type::to_int_type(__delim); + const bool matchable = traits_type::eq_int_type(chk_delim, __delim); + if (__builtin_expect(!matchable, 0)) + return ignore(__n); + // Now we know that __delim is a valid char_type value, so it's safe + // for the code below to use traits_type::find to search for it. + } _M_gcount = 0; sentry __cerb(*this, true); diff --git a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/93672.cc b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/93672.cc new file mode 100644 index 00000000000..96737485b83 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/93672.cc @@ -0,0 +1,101 @@ +// { dg-do run } + +#include +#include +#include + +void +test_pr93672() // std::basic_istream::ignore hangs if delim MSB is set +{ + std::istringstream in(".\xfc..\xfd...\xfe."); + + // This should find '\xfd' even on platforms where char is signed, + // because the delimiter is correctly converted to the stream's int_type. + in.ignore(100, std::char_traits::to_int_type('\xfc')); + VERIFY( in.gcount() == 2 ); + VERIFY( ! in.eof() ); + + // This should work equivalently to traits_type::to_int_type + in.ignore(100, (unsigned char)'\xfd'); + VERIFY( in.gcount() == 3 ); + VERIFY( ! in.eof() ); + + // This only works if char is unsigned. + in.ignore(100, '\xfe'); + if (std::numeric_limits::is_signed) + { + // When char is signed, '\xfe' != traits_type::to_int_type('\xfe') + // so the delimiter does not match the character in the input sequence, + // and ignore consumes all input until EOF. + VERIFY( in.gcount() == 5 ); + VERIFY( in.eof() ); + } + else + { + // When char is unsigned, '\xfe' == to_int_type('\xfe') so the delimiter + // matches the character in the input sequence, and doesn't reach EOF. + VERIFY( in.gcount() == 4 ); + VERIFY( ! in.eof() ); + } + + in.clear(); + in.str(".a."); + in.ignore(100, 'a' + 256); // Should not match 'a' + VERIFY( in.gcount() == 3 ); + VERIFY( in.eof() ); +} + +// Custom traits type that inherits all behaviour from std::char_traits. +struct traits : std::char_traits { }; + +void +test_primary_template() +{ + // Check that the primary template for std::basic_istream::ignore + // works the same as the std::istream::ignore specialization. + // The infinite loop bug was never present in the primary template, + // because it doesn't use traits_type::find to search the input sequence. + + std::basic_istringstream in(".\xfc..\xfd...\xfe."); + + // This should find '\xfd' even on platforms where char is signed, + // because the delimiter is correctly converted to the stream's int_type. + in.ignore(100, std::char_traits::to_int_type('\xfc')); + VERIFY( in.gcount() == 2 ); + VERIFY( ! in.eof() ); + + // This should work equivalently to traits_type::to_int_type + in.ignore(100, (unsigned char)'\xfd'); + VERIFY( in.gcount() == 3 ); + VERIFY( ! in.eof() ); + + // This only works if char is unsigned. + in.ignore(100, '\xfe'); + if (std::numeric_limits::is_signed) + { + // When char is signed, '\xfe' != traits_type::to_int_type('\xfe') + // so the delimiter does not match the character in the input sequence, + // and ignore consumes all input until EOF. + VERIFY( in.gcount() == 5 ); + VERIFY( in.eof() ); + } + else + { + // When char is unsigned, '\xfe' == to_int_type('\xfe') so the delimiter + // matches the character in the input sequence, and doesn't reach EOF. + VERIFY( in.gcount() == 4 ); + VERIFY( ! in.eof() ); + } + + in.clear(); + in.str(".a."); + in.ignore(100, 'a' + 256); // Should not match 'a' + VERIFY( in.gcount() == 3 ); + VERIFY( in.eof() ); +} + +int main() +{ + test_pr93672(); + test_primary_template(); +} diff --git a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/93672.cc b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/93672.cc new file mode 100644 index 00000000000..5ce9155e02c --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/93672.cc @@ -0,0 +1,34 @@ +// { dg-do run } + +#include +#include +#include +#include + +// PR 93672 was a bug in std::istream that never affected std::wistream. +// This test ensures that the bug doesn't get introduced to std::wistream. +void +test_pr93672() +{ + std::wstring str = L".x..x."; + str[1] = (wchar_t)-2; + str[4] = (wchar_t)-3; + std::wistringstream in(str); + + // This should find the character even on platforms where wchar_t is signed, + // because the delimiter is correctly converted to the stream's int_type. + in.ignore(100, std::char_traits::to_int_type((wchar_t)-2)); + VERIFY( in.gcount() == 2 ); + VERIFY( ! in.eof() ); + + // This also works, because std::char_traits::to_int_type(wc) is + // equivalent to (int_type)wc so using to_int_type isn't needed. + in.ignore(100, (wchar_t)-3); + VERIFY( in.gcount() == 3 ); + VERIFY( ! in.eof() ); +} + +int main() +{ + test_pr93672(); +}