mirror of
https://gcc.gnu.org/git/gcc.git
synced 2025-01-18 10:24:30 +08:00
acinclude.m4: Fix typo in comment.
* acinclude.m4: Fix typo in comment. * configure: Regenerate. * include/bits/codecvt.h (codecvt<char16_t, char, mbstate_t>, codecvt<char16_t, char, mbstate_t>): Declare specializations. * include/bits/locale_facets.h: Reserve space for new specializations. * src/c++11/Makefile.am: Add codecvt.cc. * src/c++11/Makefile.in: Regenerate. * src/c++11/codecvt.cc: New. * src/c++98/Makefile.am: Compile locale_init.cc and localename.cc with -std=gnu++11. * src/c++98/Makefile.in: Regenerate. * src/c++98/locale_init.cc: Initialize new codecvt specializations. * src/c++98/localename.cc: Likewise. * config/abi/pre/gnu.ver: Exports for new codecvt specializations. * testsuite/22_locale/codecvt/utf8.cc: New. * testsuite/22_locale/locale/cons/unicode.cc: Check that new specializations are installed in locale objects. From-SVN: r219747
This commit is contained in:
parent
16ab99ba18
commit
bb93f35da1
@ -1,3 +1,23 @@
|
||||
2015-01-16 Jonathan Wakely <jwakely@redhat.com>
|
||||
|
||||
* acinclude.m4: Fix typo in comment.
|
||||
* configure: Regenerate.
|
||||
* include/bits/codecvt.h (codecvt<char16_t, char, mbstate_t>,
|
||||
codecvt<char16_t, char, mbstate_t>): Declare specializations.
|
||||
* include/bits/locale_facets.h: Reserve space for new specializations.
|
||||
* src/c++11/Makefile.am: Add codecvt.cc.
|
||||
* src/c++11/Makefile.in: Regenerate.
|
||||
* src/c++11/codecvt.cc: New.
|
||||
* src/c++98/Makefile.am: Compile locale_init.cc and localename.cc
|
||||
with -std=gnu++11.
|
||||
* src/c++98/Makefile.in: Regenerate.
|
||||
* src/c++98/locale_init.cc: Initialize new codecvt specializations.
|
||||
* src/c++98/localename.cc: Likewise.
|
||||
* config/abi/pre/gnu.ver: Exports for new codecvt specializations.
|
||||
* testsuite/22_locale/codecvt/utf8.cc: New.
|
||||
* testsuite/22_locale/locale/cons/unicode.cc: Check that new
|
||||
specializations are installed in locale objects.
|
||||
|
||||
2015-01-16 Torvald Riegel <triegel@redhat.com>
|
||||
|
||||
* include/std/shared_mutex (shared_timed_mutex): Add POSIX-based
|
||||
|
@ -1777,7 +1777,7 @@ AC_DEFUN([GLIBCXX_CHECK_C99_TR1], [
|
||||
<tr1/cinttypes> in namespace std::tr1.])
|
||||
fi
|
||||
|
||||
# Check for the existence of whcar_t <inttypes.h> functions (NB: doesn't
|
||||
# Check for the existence of wchar_t <inttypes.h> functions (NB: doesn't
|
||||
# make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1).
|
||||
ac_c99_inttypes_wchar_t_tr1=no;
|
||||
if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then
|
||||
|
@ -1759,6 +1759,11 @@ GLIBCXX_3.4.21 {
|
||||
_ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE3getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmPK[cw]SC_;
|
||||
_ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmcc;
|
||||
|
||||
# codecvt<char16_t, char, mbstate_t>, codecvt<char32_t, char, mbstate_t>
|
||||
_ZNKSt7codecvtID[is]c11__mbstate_t*;
|
||||
_ZNSt7codecvtID[is]c11__mbstate_t*;
|
||||
_ZT[ISV]St7codecvtID[is]c11__mbstate_tE;
|
||||
|
||||
} GLIBCXX_3.4.20;
|
||||
|
||||
|
||||
|
2
libstdc++-v3/configure
vendored
2
libstdc++-v3/configure
vendored
@ -19216,7 +19216,7 @@ $as_echo "#define _GLIBCXX_USE_C99_INTTYPES_TR1 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
# Check for the existence of whcar_t <inttypes.h> functions (NB: doesn't
|
||||
# Check for the existence of wchar_t <inttypes.h> functions (NB: doesn't
|
||||
# make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1).
|
||||
ac_c99_inttypes_wchar_t_tr1=no;
|
||||
if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then
|
||||
|
@ -393,7 +393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
};
|
||||
|
||||
#ifdef _GLIBCXX_USE_WCHAR_T
|
||||
/// class codecvt<wchar_t, char, mbstate_t> specialization.
|
||||
/** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
|
||||
*
|
||||
* Converts between narrow and wide characters in the native character set
|
||||
*/
|
||||
template<>
|
||||
class codecvt<wchar_t, char, mbstate_t>
|
||||
: public __codecvt_abstract_base<wchar_t, char, mbstate_t>
|
||||
@ -455,6 +458,125 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
};
|
||||
#endif //_GLIBCXX_USE_WCHAR_T
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
/** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
|
||||
*
|
||||
* Converts between UTF-16 and UTF-8.
|
||||
*/
|
||||
template<>
|
||||
class codecvt<char16_t, char, mbstate_t>
|
||||
: public __codecvt_abstract_base<char16_t, char, mbstate_t>
|
||||
{
|
||||
public:
|
||||
// Types:
|
||||
typedef char16_t intern_type;
|
||||
typedef char extern_type;
|
||||
typedef mbstate_t state_type;
|
||||
|
||||
public:
|
||||
static locale::id id;
|
||||
|
||||
explicit
|
||||
codecvt(size_t __refs = 0)
|
||||
: __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
|
||||
|
||||
protected:
|
||||
virtual
|
||||
~codecvt();
|
||||
|
||||
virtual result
|
||||
do_out(state_type& __state, const intern_type* __from,
|
||||
const intern_type* __from_end, const intern_type*& __from_next,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const;
|
||||
|
||||
virtual result
|
||||
do_unshift(state_type& __state,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const;
|
||||
|
||||
virtual result
|
||||
do_in(state_type& __state,
|
||||
const extern_type* __from, const extern_type* __from_end,
|
||||
const extern_type*& __from_next,
|
||||
intern_type* __to, intern_type* __to_end,
|
||||
intern_type*& __to_next) const;
|
||||
|
||||
virtual
|
||||
int do_encoding() const throw();
|
||||
|
||||
virtual
|
||||
bool do_always_noconv() const throw();
|
||||
|
||||
virtual
|
||||
int do_length(state_type&, const extern_type* __from,
|
||||
const extern_type* __end, size_t __max) const;
|
||||
|
||||
virtual int
|
||||
do_max_length() const throw();
|
||||
};
|
||||
|
||||
/** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
|
||||
*
|
||||
* Converts between UTF-32 and UTF-8.
|
||||
*/
|
||||
template<>
|
||||
class codecvt<char32_t, char, mbstate_t>
|
||||
: public __codecvt_abstract_base<char32_t, char, mbstate_t>
|
||||
{
|
||||
public:
|
||||
// Types:
|
||||
typedef char32_t intern_type;
|
||||
typedef char extern_type;
|
||||
typedef mbstate_t state_type;
|
||||
|
||||
public:
|
||||
static locale::id id;
|
||||
|
||||
explicit
|
||||
codecvt(size_t __refs = 0)
|
||||
: __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
|
||||
|
||||
protected:
|
||||
virtual
|
||||
~codecvt();
|
||||
|
||||
virtual result
|
||||
do_out(state_type& __state, const intern_type* __from,
|
||||
const intern_type* __from_end, const intern_type*& __from_next,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const;
|
||||
|
||||
virtual result
|
||||
do_unshift(state_type& __state,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const;
|
||||
|
||||
virtual result
|
||||
do_in(state_type& __state,
|
||||
const extern_type* __from, const extern_type* __from_end,
|
||||
const extern_type*& __from_next,
|
||||
intern_type* __to, intern_type* __to_end,
|
||||
intern_type*& __to_next) const;
|
||||
|
||||
virtual
|
||||
int do_encoding() const throw();
|
||||
|
||||
virtual
|
||||
bool do_always_noconv() const throw();
|
||||
|
||||
virtual
|
||||
int do_length(state_type&, const extern_type* __from,
|
||||
const extern_type* __end, size_t __max) const;
|
||||
|
||||
virtual int
|
||||
do_max_length() const throw();
|
||||
};
|
||||
|
||||
#endif // _GLIBCXX_USE_C99_STDINT_TR1
|
||||
#endif // C++11
|
||||
|
||||
/// class codecvt_byname [22.2.1.6].
|
||||
template<typename _InternT, typename _ExternT, typename _StateT>
|
||||
class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
|
||||
|
@ -58,6 +58,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
#else
|
||||
# define _GLIBCXX_NUM_FACETS 14
|
||||
# define _GLIBCXX_NUM_CXX11_FACETS 8
|
||||
#endif
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
# define _GLIBCXX_NUM_UNICODE_FACETS 2
|
||||
#else
|
||||
# define _GLIBCXX_NUM_UNICODE_FACETS 0
|
||||
#endif
|
||||
|
||||
// Convert string to numeric value of type _Tp and store results.
|
||||
|
@ -53,6 +53,7 @@ endif
|
||||
|
||||
sources = \
|
||||
chrono.cc \
|
||||
codecvt.cc \
|
||||
condition_variable.cc \
|
||||
cow-stdexcept.cc \
|
||||
ctype.cc \
|
||||
|
@ -72,12 +72,12 @@ libc__11convenience_la_LIBADD =
|
||||
@ENABLE_DUAL_ABI_TRUE@ cxx11-ios_failure.lo \
|
||||
@ENABLE_DUAL_ABI_TRUE@ cxx11-shim_facets.lo cxx11-stdexcept.lo
|
||||
am__objects_2 = ctype_configure_char.lo ctype_members.lo
|
||||
am__objects_3 = chrono.lo condition_variable.lo cow-stdexcept.lo \
|
||||
ctype.lo debug.lo functexcept.lo functional.lo future.lo \
|
||||
hash_c++0x.lo hashtable_c++0x.lo ios.lo limits.lo mutex.lo \
|
||||
placeholders.lo random.lo regex.lo shared_ptr.lo \
|
||||
snprintf_lite.lo system_error.lo thread.lo $(am__objects_1) \
|
||||
$(am__objects_2)
|
||||
am__objects_3 = chrono.lo codecvt.lo condition_variable.lo \
|
||||
cow-stdexcept.lo ctype.lo debug.lo functexcept.lo \
|
||||
functional.lo future.lo hash_c++0x.lo hashtable_c++0x.lo \
|
||||
ios.lo limits.lo mutex.lo placeholders.lo random.lo regex.lo \
|
||||
shared_ptr.lo snprintf_lite.lo system_error.lo thread.lo \
|
||||
$(am__objects_1) $(am__objects_2)
|
||||
@ENABLE_DUAL_ABI_TRUE@am__objects_4 = cow-fstream-inst.lo \
|
||||
@ENABLE_DUAL_ABI_TRUE@ cow-sstream-inst.lo cow-string-inst.lo \
|
||||
@ENABLE_DUAL_ABI_TRUE@ cow-wstring-inst.lo cxx11-locale-inst.lo \
|
||||
@ -344,6 +344,7 @@ host_sources = \
|
||||
|
||||
sources = \
|
||||
chrono.cc \
|
||||
codecvt.cc \
|
||||
condition_variable.cc \
|
||||
cow-stdexcept.cc \
|
||||
ctype.cc \
|
||||
|
461
libstdc++-v3/src/c++11/codecvt.cc
Normal file
461
libstdc++-v3/src/c++11/codecvt.cc
Normal file
@ -0,0 +1,461 @@
|
||||
// Locale support (codecvt) -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include <bits/locale_classes.h>
|
||||
#include <bits/codecvt.h>
|
||||
#include <bits/stl_algobase.h> // std::max
|
||||
#include <cstring> // std::memcpy, std::memcmp
|
||||
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
namespace
|
||||
{
|
||||
// Largest code point that fits in a single UTF-16 code unit.
|
||||
const char32_t max_single_utf16_unit = 0xFFFF;
|
||||
const char32_t max_code_point = 0x10FFFF;
|
||||
|
||||
template<typename Elem>
|
||||
struct range
|
||||
{
|
||||
Elem* next;
|
||||
Elem* end;
|
||||
|
||||
Elem operator*() const { return *next; }
|
||||
|
||||
range& operator++() { ++next; return *this; }
|
||||
|
||||
size_t size() const { return end - next; }
|
||||
};
|
||||
|
||||
char32_t
|
||||
read_utf8_code_point(range<const char>& from, unsigned long maxcode)
|
||||
{
|
||||
size_t avail = from.size();
|
||||
if (avail == 0)
|
||||
return -1;
|
||||
unsigned char c1 = from.next[0];
|
||||
// https://en.wikipedia.org/wiki/UTF-8#Sample_code
|
||||
if (c1 < 0x80)
|
||||
{
|
||||
++from.next;
|
||||
return c1;
|
||||
}
|
||||
else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
|
||||
return -1;
|
||||
else if (c1 < 0xE0) // 2-byte sequence
|
||||
{
|
||||
if (avail < 2)
|
||||
return -1;
|
||||
unsigned char c2 = from.next[1];
|
||||
if ((c2 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
char32_t c = (c1 << 6) + c2 - 0x3080;
|
||||
if (c > maxcode)
|
||||
return -1;
|
||||
from.next += 2;
|
||||
return c;
|
||||
}
|
||||
else if (c1 < 0xF0) // 3-byte sequence
|
||||
{
|
||||
if (avail < 3)
|
||||
return -1;
|
||||
unsigned char c2 = from.next[1];
|
||||
if ((c2 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
if (c1 == 0xE0 && c2 < 0xA0) // overlong
|
||||
return -1;
|
||||
unsigned char c3 = from.next[2];
|
||||
if ((c3 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
|
||||
if (c > maxcode)
|
||||
return -1;
|
||||
from.next += 3;
|
||||
return c;
|
||||
}
|
||||
else if (c1 < 0xF5) // 4-byte sequence
|
||||
{
|
||||
if (avail < 4)
|
||||
return -1;
|
||||
unsigned char c2 = from.next[1];
|
||||
if ((c2 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
if (c1 == 0xF0 && c2 < 0x90) // overlong
|
||||
return -1;
|
||||
if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
|
||||
return -1;
|
||||
unsigned char c3 = from.next[2];
|
||||
if ((c3 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
unsigned char c4 = from.next[3];
|
||||
if ((c4 & 0xC0) != 0x80)
|
||||
return -1;
|
||||
char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
|
||||
if (c > maxcode)
|
||||
return -1;
|
||||
from.next += 4;
|
||||
return c;
|
||||
}
|
||||
else // > U+10FFFF
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool
|
||||
write_utf8_code_point(range<char>& to, char32_t code_point)
|
||||
{
|
||||
if (code_point < 0x80)
|
||||
{
|
||||
if (to.size() < 1)
|
||||
return false;
|
||||
*to.next++ = code_point;
|
||||
}
|
||||
else if (code_point <= 0x7FF)
|
||||
{
|
||||
if (to.size() < 2)
|
||||
return false;
|
||||
*to.next++ = (code_point >> 6) + 0xC0;
|
||||
*to.next++ = (code_point & 0x3F) + 0x80;
|
||||
}
|
||||
else if (code_point <= 0xFFFF)
|
||||
{
|
||||
if (to.size() < 3)
|
||||
return false;
|
||||
*to.next++ = (code_point >> 12) + 0xE0;
|
||||
*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
|
||||
*to.next++ = (code_point & 0x3F) + 0x80;
|
||||
}
|
||||
else if (code_point <= 0x10FFFF)
|
||||
{
|
||||
if (to.size() < 4)
|
||||
return false;
|
||||
*to.next++ = (code_point >> 18) + 0xF0;
|
||||
*to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
|
||||
*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
|
||||
*to.next++ = (code_point & 0x3F) + 0x80;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
write_utf16_code_point(range<char16_t>& to, char32_t codepoint)
|
||||
{
|
||||
if (codepoint < max_single_utf16_unit)
|
||||
{
|
||||
if (to.size() > 0)
|
||||
{
|
||||
*to.next = codepoint;
|
||||
++to.next;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (to.size() > 1)
|
||||
{
|
||||
// Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
|
||||
const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
|
||||
const char32_t SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
|
||||
char16_t lead = LEAD_OFFSET + (codepoint >> 10);
|
||||
char16_t trail = 0xDC00 + (codepoint & 0x3FF);
|
||||
char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET;
|
||||
|
||||
to.next[0] = utf16bytes >> 16;
|
||||
to.next[1] = utf16bytes & 0xFFFF;
|
||||
to.next += 2;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// utf8 -> ucs4
|
||||
codecvt_base::result
|
||||
ucs4_in(range<const char>& from, range<char32_t>& to,
|
||||
unsigned long maxcode = max_code_point)
|
||||
{
|
||||
while (from.size() && to.size())
|
||||
{
|
||||
const char32_t codepoint = read_utf8_code_point(from, maxcode);
|
||||
if (codepoint == char32_t(-1) || codepoint > maxcode)
|
||||
return codecvt_base::error;
|
||||
*to.next++ = codepoint;
|
||||
}
|
||||
return from.size() ? codecvt_base::partial : codecvt_base::ok;
|
||||
}
|
||||
|
||||
// ucs4 -> utf8
|
||||
codecvt_base::result
|
||||
ucs4_out(range<const char32_t>& from, range<char>& to,
|
||||
unsigned long maxcode = max_code_point)
|
||||
{
|
||||
while (from.size())
|
||||
{
|
||||
const char32_t c = from.next[0];
|
||||
if (c > maxcode)
|
||||
return codecvt_base::error;
|
||||
if (!write_utf8_code_point(to, c))
|
||||
return codecvt_base::partial;
|
||||
++from.next;
|
||||
}
|
||||
return codecvt_base::ok;
|
||||
}
|
||||
|
||||
// utf8 -> utf16
|
||||
codecvt_base::result
|
||||
utf16_in(range<const char>& from, range<char16_t>& to,
|
||||
unsigned long maxcode = max_code_point)
|
||||
{
|
||||
while (from.size() && to.size())
|
||||
{
|
||||
const char* first = from.next;
|
||||
if ((unsigned char)*first >= 0xF0 && to.size() < 2)
|
||||
return codecvt_base::partial;
|
||||
const char32_t codepoint = read_utf8_code_point(from, maxcode);
|
||||
if (codepoint == char32_t(-1) || codepoint > maxcode)
|
||||
return codecvt_base::error;
|
||||
if (!write_utf16_code_point(to, codepoint))
|
||||
{
|
||||
from.next = first;
|
||||
return codecvt_base::partial;
|
||||
}
|
||||
}
|
||||
return codecvt_base::ok;
|
||||
}
|
||||
|
||||
// utf16 -> utf8
|
||||
codecvt_base::result
|
||||
utf16_out(range<const char16_t>& from, range<char>& to,
|
||||
unsigned long maxcode = max_code_point)
|
||||
{
|
||||
while (from.size())
|
||||
{
|
||||
char32_t c = from.next[0];
|
||||
int inc = 1;
|
||||
if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair
|
||||
{
|
||||
if (from.size() < 2)
|
||||
return codecvt_base::ok; // stop converting at this point
|
||||
|
||||
const char32_t c2 = from.next[1];
|
||||
if (c2 >= 0xDC00 && c2 <= 0xDFFF)
|
||||
{
|
||||
inc = 2;
|
||||
c = (c << 10) + c2 - 0x35FDC00;
|
||||
}
|
||||
else
|
||||
return codecvt_base::error;
|
||||
}
|
||||
if (c > maxcode)
|
||||
return codecvt_base::error;
|
||||
if (!write_utf8_code_point(to, c))
|
||||
return codecvt_base::partial;
|
||||
from.next += inc;
|
||||
}
|
||||
return codecvt_base::ok;
|
||||
}
|
||||
|
||||
// return pos such that [begin,pos) is valid UTF-16 string no longer than max
|
||||
int
|
||||
utf16_len(const char* begin, const char* end, size_t max,
|
||||
char32_t maxcode = max_code_point)
|
||||
{
|
||||
range<const char> from{ begin, end };
|
||||
size_t count = 0;
|
||||
while (count+1 < max)
|
||||
{
|
||||
char32_t c = read_utf8_code_point(from, maxcode);
|
||||
if (c == char32_t(-1))
|
||||
break;
|
||||
else if (c > max_single_utf16_unit)
|
||||
++count;
|
||||
++count;
|
||||
}
|
||||
if (count+1 == max) // take one more character if it fits in a single unit
|
||||
read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
|
||||
return from.next - begin;
|
||||
}
|
||||
|
||||
// return pos such that [begin,pos) is valid UCS-4 string no longer than max
|
||||
int
|
||||
ucs4_len(const char* begin, const char* end, size_t max,
|
||||
char32_t maxcode = max_code_point)
|
||||
{
|
||||
range<const char> from{ begin, end };
|
||||
size_t count = 0;
|
||||
while (count < max)
|
||||
{
|
||||
char32_t c = read_utf8_code_point(from, maxcode);
|
||||
if (c == char32_t(-1))
|
||||
break;
|
||||
++count;
|
||||
}
|
||||
return from.next - begin;
|
||||
}
|
||||
}
|
||||
|
||||
// Define members of codecvt<char16_t, char, mbstate_t> specialization.
|
||||
// Converts from UTF-8 to UTF-16.
|
||||
|
||||
locale::id codecvt<char16_t, char, mbstate_t>::id;
|
||||
|
||||
codecvt<char16_t, char, mbstate_t>::~codecvt() { }
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char16_t, char, mbstate_t>::
|
||||
do_out(state_type&,
|
||||
const intern_type* __from,
|
||||
const intern_type* __from_end, const intern_type*& __from_next,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const
|
||||
{
|
||||
range<const char16_t> from{ __from, __from_end };
|
||||
range<char> to{ __to, __to_end };
|
||||
auto res = utf16_out(from, to);
|
||||
__from_next = from.next;
|
||||
__to_next = to.next;
|
||||
return res;
|
||||
}
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char16_t, char, mbstate_t>::
|
||||
do_unshift(state_type&, extern_type* __to, extern_type*,
|
||||
extern_type*& __to_next) const
|
||||
{
|
||||
__to_next = __to;
|
||||
return noconv; // we don't use mbstate_t for the unicode facets
|
||||
}
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char16_t, char, mbstate_t>::
|
||||
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
|
||||
const extern_type*& __from_next,
|
||||
intern_type* __to, intern_type* __to_end,
|
||||
intern_type*& __to_next) const
|
||||
{
|
||||
range<const char> from{ __from, __from_end };
|
||||
range<char16_t> to{ __to, __to_end };
|
||||
auto res = utf16_in(from, to);
|
||||
__from_next = from.next;
|
||||
__to_next = to.next;
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
|
||||
{ return 0; }
|
||||
|
||||
bool
|
||||
codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
|
||||
{ return false; }
|
||||
|
||||
int
|
||||
codecvt<char16_t, char, mbstate_t>::
|
||||
do_length(state_type&, const extern_type* __from,
|
||||
const extern_type* __end, size_t __max) const
|
||||
{
|
||||
return utf16_len(__from, __end, __max);
|
||||
}
|
||||
|
||||
int
|
||||
codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
|
||||
{
|
||||
// Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
|
||||
// whereas 4 byte sequences require two 16-bit code units.
|
||||
return 3;
|
||||
}
|
||||
|
||||
// Define members of codecvt<char32_t, char, mbstate_t> specialization.
|
||||
// Converts from UTF-8 to UTF-32 (aka UCS-4).
|
||||
|
||||
locale::id codecvt<char32_t, char, mbstate_t>::id;
|
||||
|
||||
codecvt<char32_t, char, mbstate_t>::~codecvt() { }
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char32_t, char, mbstate_t>::
|
||||
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
|
||||
const intern_type*& __from_next,
|
||||
extern_type* __to, extern_type* __to_end,
|
||||
extern_type*& __to_next) const
|
||||
{
|
||||
range<const char32_t> from{ __from, __from_end };
|
||||
range<char> to{ __to, __to_end };
|
||||
auto res = ucs4_out(from, to);
|
||||
__from_next = from.next;
|
||||
__to_next = to.next;
|
||||
return res;
|
||||
}
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char32_t, char, mbstate_t>::
|
||||
do_unshift(state_type&, extern_type* __to, extern_type*,
|
||||
extern_type*& __to_next) const
|
||||
{
|
||||
__to_next = __to;
|
||||
return noconv;
|
||||
}
|
||||
|
||||
codecvt_base::result
|
||||
codecvt<char32_t, char, mbstate_t>::
|
||||
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
|
||||
const extern_type*& __from_next,
|
||||
intern_type* __to, intern_type* __to_end,
|
||||
intern_type*& __to_next) const
|
||||
{
|
||||
range<const char> from{ __from, __from_end };
|
||||
range<char32_t> to{ __to, __to_end };
|
||||
auto res = ucs4_in(from, to);
|
||||
__from_next = from.next;
|
||||
__to_next = to.next;
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
|
||||
{ return 0; }
|
||||
|
||||
bool
|
||||
codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
|
||||
{ return false; }
|
||||
|
||||
int
|
||||
codecvt<char32_t, char, mbstate_t>::
|
||||
do_length(state_type&, const extern_type* __from,
|
||||
const extern_type* __end, size_t __max) const
|
||||
{
|
||||
return ucs4_len(__from, __end, __max);
|
||||
}
|
||||
|
||||
int
|
||||
codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
|
||||
{ return 4; }
|
||||
|
||||
inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
|
||||
inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
}
|
||||
#endif // _GLIBCXX_USE_C99_STDINT_TR1
|
@ -176,6 +176,16 @@ numeric_members_cow.o: numeric_members_cow.cc
|
||||
$(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
|
||||
endif
|
||||
|
||||
# XXX TODO move locale_init.cc and localename.cc to src/c++11
|
||||
locale_init.lo: locale_init.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
locale_init.o: locale_init.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
localename.lo: localename.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
localename.o: localename.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
|
||||
# Use special rules for the deprecated source files so that they find
|
||||
# deprecated include files.
|
||||
GLIBCXX_INCLUDE_DIR=$(glibcxx_builddir)/include
|
||||
|
@ -764,6 +764,16 @@ vpath % $(top_srcdir)/src/c++98
|
||||
@ENABLE_DUAL_ABI_TRUE@ $(LTCXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
|
||||
@ENABLE_DUAL_ABI_TRUE@numeric_members_cow.o: numeric_members_cow.cc
|
||||
@ENABLE_DUAL_ABI_TRUE@ $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
|
||||
|
||||
# XXX TODO move locale_init.cc and localename.cc to src/c++11
|
||||
locale_init.lo: locale_init.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
locale_init.o: locale_init.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
localename.lo: localename.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
localename.o: localename.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++11 -c $<
|
||||
strstream.lo: strstream.cc
|
||||
$(LTCXXCOMPILE) -I$(GLIBCXX_INCLUDE_DIR)/backward -Wno-deprecated -c $<
|
||||
strstream.o: strstream.cc
|
||||
|
@ -57,7 +57,7 @@ _GLIBCXX_LOC_ID(_ZNSt8messagesIwE2idE);
|
||||
|
||||
namespace
|
||||
{
|
||||
const int num_facets = _GLIBCXX_NUM_FACETS
|
||||
const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS
|
||||
+ (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0);
|
||||
|
||||
__gnu_cxx::__mutex&
|
||||
@ -201,6 +201,16 @@ namespace
|
||||
fake_messages_w messages_w;
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
typedef char fake_codecvt_c16[sizeof(codecvt<char16_t, char, mbstate_t>)]
|
||||
__attribute__ ((aligned(__alignof__(codecvt<char16_t, char, mbstate_t>))));
|
||||
fake_codecvt_c16 codecvt_c16;
|
||||
|
||||
typedef char fake_codecvt_c32[sizeof(codecvt<char32_t, char, mbstate_t>)]
|
||||
__attribute__ ((aligned(__alignof__(codecvt<char32_t, char, mbstate_t>))));
|
||||
fake_codecvt_c32 codecvt_c32;
|
||||
#endif
|
||||
|
||||
// Storage for "C" locale caches.
|
||||
typedef char fake_num_cache_c[sizeof(std::__numpunct_cache<char>)]
|
||||
__attribute__ ((aligned(__alignof__(std::__numpunct_cache<char>))));
|
||||
@ -318,6 +328,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
#ifdef _GLIBCXX_USE_WCHAR_T
|
||||
&std::ctype<wchar_t>::id,
|
||||
&codecvt<wchar_t, char, mbstate_t>::id,
|
||||
#endif
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
&codecvt<char16_t, char, mbstate_t>::id,
|
||||
&codecvt<char32_t, char, mbstate_t>::id,
|
||||
#endif
|
||||
0
|
||||
};
|
||||
@ -522,6 +536,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_M_init_facet(new (&messages_w) std::messages<wchar_t>(1));
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
_M_init_facet(new (&codecvt_c16) codecvt<char16_t, char, mbstate_t>(1));
|
||||
_M_init_facet(new (&codecvt_c32) codecvt<char32_t, char, mbstate_t>(1));
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_USE_DUAL_ABI
|
||||
facet* extra[] = { __npc, __mpcf, __mpct
|
||||
# ifdef _GLIBCXX_USE_WCHAR_T
|
||||
|
@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
}
|
||||
}
|
||||
|
||||
const int num_facets = _GLIBCXX_NUM_FACETS
|
||||
const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS
|
||||
+ (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0);
|
||||
|
||||
// Construct named _Impl.
|
||||
@ -267,7 +267,12 @@ const int num_facets = _GLIBCXX_NUM_FACETS
|
||||
_M_init_facet(new time_get<wchar_t>);
|
||||
_M_init_facet(new time_put<wchar_t>);
|
||||
_M_init_facet(new std::messages<wchar_t>(__cloc, __s));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
_M_init_facet(new codecvt<char16_t, char, mbstate_t>);
|
||||
_M_init_facet(new codecvt<char32_t, char, mbstate_t>);
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_USE_DUAL_ABI
|
||||
_M_init_extra(&__cloc, &__clocm, __s, __smon);
|
||||
|
76
libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc
Normal file
76
libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc
Normal file
@ -0,0 +1,76 @@
|
||||
// Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// { dg-require-cstdint "" }
|
||||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
#include <locale>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
const char expected[] = u8"£¥€";
|
||||
const std::size_t expected_len = std::char_traits<char>::length(expected);
|
||||
|
||||
template<typename C>
|
||||
void test(const C* from)
|
||||
{
|
||||
auto len = std::char_traits<C>::length(from);
|
||||
std::mbstate_t state{};
|
||||
char buf[16] = { };
|
||||
using test_type = std::codecvt<C, char, std::mbstate_t>;
|
||||
const test_type& cvt = std::use_facet<test_type>(std::locale::classic());
|
||||
auto from_end = from + len;
|
||||
auto from_next = from;
|
||||
auto buf_end = std::end(buf);
|
||||
auto buf_next = buf;
|
||||
auto res = cvt.out(state, from, from_end, from_next, buf, buf_end, buf_next);
|
||||
VERIFY( res == std::codecvt_base::ok );
|
||||
VERIFY( from_next == from_end );
|
||||
VERIFY( (buf_next - buf) == expected_len );
|
||||
VERIFY( 0 == std::char_traits<char>::compare(buf, expected, expected_len) );
|
||||
|
||||
C buf2[16];
|
||||
auto exp_end = expected + expected_len;
|
||||
auto exp_next = expected;
|
||||
auto buf2_end = std::end(buf2);
|
||||
auto buf2_next = buf2;
|
||||
res = cvt.in(state, expected, exp_end, exp_next, buf2, buf2_end, buf2_next);
|
||||
VERIFY( res == std::codecvt_base::ok );
|
||||
VERIFY( exp_next == exp_end );
|
||||
VERIFY( (buf2_next - buf2) == len );
|
||||
VERIFY( 0 == std::char_traits<C>::compare(buf2, from, len) );
|
||||
}
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
test(u"£¥€");
|
||||
}
|
||||
|
||||
void
|
||||
test02()
|
||||
{
|
||||
test(U"£¥€");
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
test02();
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
// { dg-require-iconv "ISO-8859-1" }
|
||||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
// Copyright (C) 2006-2015 Free Software Foundation, Inc.
|
||||
//
|
||||
@ -32,6 +33,11 @@ typedef std::codecvt<char, char, std::mbstate_t> c_codecvt;
|
||||
typedef std::codecvt<wchar_t, char, std::mbstate_t> w_codecvt;
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
typedef std::codecvt<char16_t, char, std::mbstate_t> u16_codecvt;
|
||||
typedef std::codecvt<char32_t, char, std::mbstate_t> u32_codecvt;
|
||||
#endif
|
||||
|
||||
class gnu_facet: public std::locale::facet
|
||||
{
|
||||
public:
|
||||
@ -60,6 +66,10 @@ void test01()
|
||||
VERIFY( has_facet<c_codecvt>(loc13) );
|
||||
#ifdef _GLIBCXX_USE_WCHAR_T
|
||||
VERIFY( has_facet<w_codecvt>(loc13) );
|
||||
#endif
|
||||
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
|
||||
VERIFY( has_facet<u16_codecvt>(loc13) );
|
||||
VERIFY( has_facet<u32_codecvt>(loc13) );
|
||||
#endif
|
||||
VERIFY( has_facet<unicode_codecvt>(loc13) );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user