mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-23 19:03:59 +08:00
libcpp: Add -Winvalid-utf8 warning [PR106655]
The following patch introduces a new warning - -Winvalid-utf8 similarly to what clang now has - to diagnose invalid UTF-8 byte sequences in comments, but not just in those, but also in string/character literals and outside of them. The warning is on by default when explicit -finput-charset=UTF-8 is used and C++23 compilation is requested and if -{,W}pedantic or -pedantic-errors it is actually a pedwarn. The reason it is on by default only for -finput-charset=UTF-8 is that the sources often are UTF-8, but sometimes could be some ASCII compatible single byte encoding where non-ASCII characters only appear in comments. So having the warning off by default is IMO desirable. The C++23 pedantic mode for when the source code is UTF-8 is -std=c++23 -pedantic-errors -finput-charset=UTF-8. 2022-09-01 Jakub Jelinek <jakub@redhat.com> PR c++/106655 libcpp/ * include/cpplib.h (struct cpp_options): Implement C++23 P2295R6 - Support for UTF-8 as a portable source file encoding. Add cpp_warn_invalid_utf8 and cpp_input_charset_explicit fields. (enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator. * init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8 and cpp_input_charset_explicit. * charset.cc (_cpp_valid_utf8): Adjust function comment. * lex.cc (UCS_LIMIT): Define. (utf8_continuation): New const variable. (utf8_signifier): Move earlier in the file. (_cpp_warn_invalid_utf8, _cpp_handle_multibyte_utf8): New functions. (_cpp_skip_block_comment): Handle -Winvalid-utf8 warning. (skip_line_comment): Likewise. (lex_raw_string, lex_string): Likewise. (_cpp_lex_direct): Likewise. gcc/ * doc/invoke.texi (-Winvalid-utf8): Document it. gcc/c-family/ * c.opt (-Winvalid-utf8): New warning. * c-opts.cc (c_common_handle_option) <case OPT_finput_charset_>: Set cpp_opts->cpp_input_charset_explicit. (c_common_post_options): If -finput-charset=UTF-8 is explicit in C++23, enable -Winvalid-utf8 by default and if -pedantic or -pedantic-errors, make it a pedwarn. gcc/testsuite/ * c-c++-common/cpp/Winvalid-utf8-1.c: New test. * c-c++-common/cpp/Winvalid-utf8-2.c: New test. * c-c++-common/cpp/Winvalid-utf8-3.c: New test. * g++.dg/cpp23/Winvalid-utf8-1.C: New test. * g++.dg/cpp23/Winvalid-utf8-2.C: New test. * g++.dg/cpp23/Winvalid-utf8-3.C: New test. * g++.dg/cpp23/Winvalid-utf8-4.C: New test. * g++.dg/cpp23/Winvalid-utf8-5.C: New test. * g++.dg/cpp23/Winvalid-utf8-6.C: New test. * g++.dg/cpp23/Winvalid-utf8-7.C: New test. * g++.dg/cpp23/Winvalid-utf8-8.C: New test. * g++.dg/cpp23/Winvalid-utf8-9.C: New test. * g++.dg/cpp23/Winvalid-utf8-10.C: New test. * g++.dg/cpp23/Winvalid-utf8-11.C: New test. * g++.dg/cpp23/Winvalid-utf8-12.C: New test.
This commit is contained in:
parent
bdfe0d1ce0
commit
0b8c57ed40
@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
|
||||
|
||||
case OPT_finput_charset_:
|
||||
cpp_opts->input_charset = arg;
|
||||
cpp_opts->cpp_input_charset_explicit = 1;
|
||||
break;
|
||||
|
||||
case OPT_ftemplate_depth_:
|
||||
@ -1152,6 +1153,17 @@ c_common_post_options (const char **pfilename)
|
||||
lang_hooks.preprocess_options (parse_in);
|
||||
cpp_post_options (parse_in);
|
||||
init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
|
||||
/* For C++23 and explicit -finput-charset=UTF-8, turn on -Winvalid-utf8
|
||||
by default and make it a pedwarn unless -Wno-invalid-utf8. */
|
||||
if (cxx_dialect >= cxx23
|
||||
&& cpp_opts->cpp_input_charset_explicit
|
||||
&& strcmp (cpp_opts->input_charset, "UTF-8") == 0
|
||||
&& (cpp_opts->cpp_warn_invalid_utf8
|
||||
|| !global_options_set.x_warn_invalid_utf8))
|
||||
{
|
||||
global_options.x_warn_invalid_utf8 = 1;
|
||||
cpp_opts->cpp_warn_invalid_utf8 = cpp_opts->cpp_pedantic ? 2 : 1;
|
||||
}
|
||||
|
||||
/* Let diagnostics infrastructure know how to convert input files the same
|
||||
way libcpp will do it, namely using the configured input charset and
|
||||
|
@ -821,6 +821,10 @@ Winvalid-pch
|
||||
C ObjC C++ ObjC++ CPP(warn_invalid_pch) CppReason(CPP_W_INVALID_PCH) Var(cpp_warn_invalid_pch) Init(0) Warning
|
||||
Warn about PCH files that are found but not used.
|
||||
|
||||
Winvalid-utf8
|
||||
C objC C++ ObjC++ CPP(cpp_warn_invalid_utf8) CppReason(CPP_W_INVALID_UTF8) Var(warn_invalid_utf8) Init(0) Warning
|
||||
Warn about invalid UTF-8 characters in comments.
|
||||
|
||||
Wjump-misses-init
|
||||
C ObjC Var(warn_jump_misses_init) Warning LangEnabledby(C ObjC,Wc++-compat)
|
||||
Warn when a jump misses a variable initialization.
|
||||
|
@ -365,9 +365,9 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-Winfinite-recursion @gol
|
||||
-Winit-self -Winline -Wno-int-conversion -Wint-in-bool-context @gol
|
||||
-Wno-int-to-pointer-cast -Wno-invalid-memory-model @gol
|
||||
-Winvalid-pch -Wjump-misses-init -Wlarger-than=@var{byte-size} @gol
|
||||
-Wlogical-not-parentheses -Wlogical-op -Wlong-long @gol
|
||||
-Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
|
||||
-Winvalid-pch -Winvalid-utf8 -Wjump-misses-init @gol
|
||||
-Wlarger-than=@var{byte-size} -Wlogical-not-parentheses -Wlogical-op @gol
|
||||
-Wlong-long -Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
|
||||
-Wmemset-elt-size -Wmemset-transposed-args @gol
|
||||
-Wmisleading-indentation -Wmissing-attributes -Wmissing-braces @gol
|
||||
-Wmissing-field-initializers -Wmissing-format-attribute @gol
|
||||
@ -9569,6 +9569,13 @@ different size.
|
||||
Warn if a precompiled header (@pxref{Precompiled Headers}) is found in
|
||||
the search path but cannot be used.
|
||||
|
||||
@item -Winvalid-utf8
|
||||
@opindex Winvalid-utf8
|
||||
@opindex Wno-invalid-utf8
|
||||
Warn if an invalid UTF-8 character is found.
|
||||
This warning is on by default for C++23 if @option{-finput-charset=UTF-8}
|
||||
is used and turned into error with @option{-pedantic-errors}.
|
||||
|
||||
@item -Wlong-long
|
||||
@opindex Wlong-long
|
||||
@opindex Wno-long-long
|
||||
|
43
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
Normal file
43
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
Normal file
@ -0,0 +1,43 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
|
||||
|
||||
// aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" }
|
||||
// a€a { dg-warning "invalid UTF-8 character <80>" }
|
||||
// a¿a { dg-warning "invalid UTF-8 character <bf>" }
|
||||
// aÀa { dg-warning "invalid UTF-8 character <c0>" }
|
||||
// aÁa { dg-warning "invalid UTF-8 character <c1>" }
|
||||
// aõa { dg-warning "invalid UTF-8 character <f5>" }
|
||||
// aÿa { dg-warning "invalid UTF-8 character <ff>" }
|
||||
// aÂa { dg-warning "invalid UTF-8 character <c2>" }
|
||||
// aàa { dg-warning "invalid UTF-8 character <e0>" }
|
||||
// aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
// aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
// aì€a { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
// aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
// að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
// að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
// aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
|
||||
// aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
||||
/* aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" } */
|
||||
/* a€a { dg-warning "invalid UTF-8 character <80>" } */
|
||||
/* a¿a { dg-warning "invalid UTF-8 character <bf>" } */
|
||||
/* aÀa { dg-warning "invalid UTF-8 character <c0>" } */
|
||||
/* aÁa { dg-warning "invalid UTF-8 character <c1>" } */
|
||||
/* aõa { dg-warning "invalid UTF-8 character <f5>" } */
|
||||
/* aÿa { dg-warning "invalid UTF-8 character <ff>" } */
|
||||
/* aÂa { dg-warning "invalid UTF-8 character <c2>" } */
|
||||
/* aàa { dg-warning "invalid UTF-8 character <e0>" } */
|
||||
/* aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" } */
|
||||
/* aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" } */
|
||||
/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" } */
|
||||
/* aì€a { dg-warning "invalid UTF-8 character <ec><80>" } */
|
||||
/* aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" } */
|
||||
/* að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" } */
|
||||
/* að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" } */
|
||||
/* aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" } */
|
||||
/* aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" } */
|
||||
/* { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
|
88
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
Normal file
88
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
Normal file
@ -0,0 +1,88 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess { target { c || c++11 } } }
|
||||
// { dg-require-effective-target wchar }
|
||||
// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
|
||||
// { dg-additional-options "-std=gnu99" { target c } }
|
||||
|
||||
#ifndef __cplusplus
|
||||
#include <wchar.h>
|
||||
typedef __CHAR16_TYPE__ char16_t;
|
||||
typedef __CHAR32_TYPE__ char32_t;
|
||||
#endif
|
||||
|
||||
char32_t a = U'€'; // { dg-warning "invalid UTF-8 character <80>" }
|
||||
char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" }
|
||||
char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" }
|
||||
char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" }
|
||||
char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" }
|
||||
char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" }
|
||||
char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" }
|
||||
char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" }
|
||||
char32_t i = U'à€¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
char32_t j = U'àŸ€'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
char32_t l = U'ì€'; // { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
char32_t m = U'í €'; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
char32_t n = U'ð€€€'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
char32_t o = U'ð<EFBFBD>¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
char32_t p = U'ô<EFBFBD>€€'; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
|
||||
char32_t q = U'ý¿¿¿¿¿'; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
||||
const char32_t *A = U"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
const char32_t *B = U"€"; // { dg-warning "invalid UTF-8 character <80>" }
|
||||
const char32_t *C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" }
|
||||
const char32_t *D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" }
|
||||
const char32_t *E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" }
|
||||
const char32_t *F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" }
|
||||
const char32_t *G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" }
|
||||
const char32_t *H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" }
|
||||
const char32_t *I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" }
|
||||
const char32_t *J = U"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
const char32_t *K = U"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
const char32_t *L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
const char32_t *M = U"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
const char32_t *N = U"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
const char32_t *O = U"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
const char32_t *P = U"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
const char32_t *Q = U"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
|
||||
const char32_t *R = U"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
||||
const char32_t *A1 = UR"(Â€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿)"; // { dg-bogus "invalid UTF-8 character" }
|
||||
const char32_t *B1 = UR"(€)"; // { dg-warning "invalid UTF-8 character <80>" }
|
||||
const char32_t *C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" }
|
||||
const char32_t *D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" }
|
||||
const char32_t *E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" }
|
||||
const char32_t *F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" }
|
||||
const char32_t *G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" }
|
||||
const char32_t *H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" }
|
||||
const char32_t *I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" }
|
||||
const char32_t *J1 = UR"(à€¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
const char32_t *K1 = UR"(àŸ€)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
const char32_t *L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
const char32_t *M1 = UR"(ì€)"; // { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
const char32_t *N1 = UR"(í €)"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
const char32_t *O1 = UR"(ð€€€)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
const char32_t *P1 = UR"(ð<>¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
const char32_t *Q1 = UR"(ô<>€€)"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
|
||||
const char32_t *R1 = UR"(ý¿¿¿¿¿)"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
||||
const char *A2 = u8"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
const char *B2 = u8"€"; // { dg-warning "invalid UTF-8 character <80>" }
|
||||
const char *C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" }
|
||||
const char *D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" }
|
||||
const char *E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" }
|
||||
const char *F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" }
|
||||
const char *G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" }
|
||||
const char *H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" }
|
||||
const char *I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" }
|
||||
const char *J2 = u8"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
const char *K2 = u8"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
const char *L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
const char *M2 = u8"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
const char *N2 = u8"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
const char *O2 = u8"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
const char *P2 = u8"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
const char *Q2 = u8"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
|
||||
const char *R2 = u8"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
27
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
Normal file
27
gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
Normal file
@ -0,0 +1,27 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
|
||||
|
||||
#define I(x)
|
||||
I(Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character" }
|
||||
// { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
|
||||
I(€) // { dg-warning "invalid UTF-8 character <80>" }
|
||||
I(¿) // { dg-warning "invalid UTF-8 character <bf>" }
|
||||
I(À) // { dg-warning "invalid UTF-8 character <c0>" }
|
||||
I(Á) // { dg-warning "invalid UTF-8 character <c1>" }
|
||||
I(õ) // { dg-warning "invalid UTF-8 character <f5>" }
|
||||
I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" }
|
||||
I(Â) // { dg-warning "invalid UTF-8 character <c2>" }
|
||||
I(à) // { dg-warning "invalid UTF-8 character <e0>" }
|
||||
I(à€¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
|
||||
I(àŸ€) // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
|
||||
I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" }
|
||||
I(ì€) // { dg-warning "invalid UTF-8 character <ec><80>" }
|
||||
I(í €) // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
|
||||
I(ð€€€) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
|
||||
I(ð<EFBFBD>¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
I(ô<EFBFBD>€€) // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c } }
|
||||
// { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
|
||||
I(ý¿¿¿¿¿) // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c } }
|
||||
// { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
|
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C
Normal file
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C
Normal file
@ -0,0 +1,43 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8" }
|
||||
|
||||
// aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" }
|
||||
// a€a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
// a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
// aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
// aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
// aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
// aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
// aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
// aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
// aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
// aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
// aì€a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
// aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
// að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
// að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
// aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
// aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
/* aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" } */
|
||||
/* a€a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
|
||||
/* a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
|
||||
/* aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
|
||||
/* aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
|
||||
/* aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
|
||||
/* aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
|
||||
/* aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
|
||||
/* aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
|
||||
/* aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
|
||||
/* aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
|
||||
/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
|
||||
/* aì€a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
|
||||
/* aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
|
||||
/* að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
|
||||
/* að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
|
||||
/* aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
|
||||
/* aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
|
||||
/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
|
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C
Normal file
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C
Normal file
@ -0,0 +1,25 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic" }
|
||||
|
||||
#define I(x)
|
||||
I(Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character" }
|
||||
// { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
|
||||
I(€) // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
I(¿) // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
I(À) // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
I(Á) // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
I(õ) // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
I(Â) // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
I(à) // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
I(à€¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
I(àŸ€) // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
I(ì€) // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
I(í €) // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
I(ð€€€) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
I(ð<EFBFBD>¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
I(ô<EFBFBD>€€) // { dg-error "is not valid in an identifier" }
|
||||
I(ý¿¿¿¿¿) // { dg-error "is not valid in an identifier" }
|
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C
Normal file
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C
Normal file
@ -0,0 +1,25 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
|
||||
|
||||
#define I(x)
|
||||
I(Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character" }
|
||||
// { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
|
||||
I(€) // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
I(¿) // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
I(À) // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
I(Á) // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
I(õ) // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
I(ÿ) // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
I(Â) // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
I(à) // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
I(à€¿) // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
I(àŸ€) // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
I(à¿) // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
I(ì€) // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
I(í €) // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
I(ð€€€) // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
I(ð<EFBFBD>¿¿) // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
I(ô<EFBFBD>€€) // { dg-error "is not valid in an identifier" }
|
||||
I(ý¿¿¿¿¿) // { dg-error "is not valid in an identifier" }
|
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C
Normal file
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C
Normal file
@ -0,0 +1,25 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
|
||||
|
||||
#define I(x)
|
||||
I(Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character" }
|
||||
// { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
|
||||
I(€) // { dg-bogus "invalid UTF-8 character <80>" }
|
||||
I(¿) // { dg-bogus "invalid UTF-8 character <bf>" }
|
||||
I(À) // { dg-bogus "invalid UTF-8 character <c0>" }
|
||||
I(Á) // { dg-bogus "invalid UTF-8 character <c1>" }
|
||||
I(õ) // { dg-bogus "invalid UTF-8 character <f5>" }
|
||||
I(ÿ) // { dg-bogus "invalid UTF-8 character <ff>" }
|
||||
I(Â) // { dg-bogus "invalid UTF-8 character <c2>" }
|
||||
I(à) // { dg-bogus "invalid UTF-8 character <e0>" }
|
||||
I(à€¿) // { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
|
||||
I(àŸ€) // { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
|
||||
I(à¿) // { dg-bogus "invalid UTF-8 character <e0><bf>" }
|
||||
I(ì€) // { dg-bogus "invalid UTF-8 character <ec><80>" }
|
||||
I(í €) // { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
|
||||
I(ð€€€) // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
|
||||
I(ð<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
I(ô<EFBFBD>€€) // { dg-error "is not valid in an identifier" }
|
||||
I(ý¿¿¿¿¿) // { dg-error "is not valid in an identifier" }
|
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C
Normal file
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C
Normal file
@ -0,0 +1,43 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic" }
|
||||
|
||||
// aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" }
|
||||
// a€a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
// a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
// aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
// aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
// aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
// aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
// aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
// aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
// aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
// aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
// aì€a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
// aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
// að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
// að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
// aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
// aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
/* aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" } */
|
||||
/* a€a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
|
||||
/* a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
|
||||
/* aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
|
||||
/* aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
|
||||
/* aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
|
||||
/* aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
|
||||
/* aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
|
||||
/* aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
|
||||
/* aà€¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
|
||||
/* aàŸ€a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
|
||||
/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
|
||||
/* aì€a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
|
||||
/* aí €a { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
|
||||
/* að€€€a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
|
||||
/* að<61>¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
|
||||
/* aô<61>€€a { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
|
||||
/* aý¿¿¿¿¿a { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
|
||||
/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
|
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C
Normal file
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C
Normal file
@ -0,0 +1,43 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
|
||||
|
||||
// aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" }
|
||||
// a€a { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
// a¿a { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
// aÀa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
// aÁa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
// aõa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
// aÿa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
// aÂa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
// aàa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
// aà€¿a { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
// aàŸ€a { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
// aà¿a { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
// aì€a { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
// aí €a { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
// að€€€a { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
// að<61>¿¿a { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
// aô<61>€€a { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
// aý¿¿¿¿¿a { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
/* aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" } */
|
||||
/* a€a { dg-error "invalid UTF-8 character <80>" "" { target c++23 } } */
|
||||
/* a¿a { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } } */
|
||||
/* aÀa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } } */
|
||||
/* aÁa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } } */
|
||||
/* aõa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } } */
|
||||
/* aÿa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } } */
|
||||
/* aÂa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } } */
|
||||
/* aàa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } } */
|
||||
/* aà€¿a { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
|
||||
/* aàŸ€a { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
|
||||
/* aà¿a { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
|
||||
/* aì€a { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
|
||||
/* aí €a { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
|
||||
/* að€€€a { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
|
||||
/* að<61>¿¿a { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
|
||||
/* aô<61>€€a { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
|
||||
/* aý¿¿¿¿¿a { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
|
||||
/* { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
|
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C
Normal file
43
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C
Normal file
@ -0,0 +1,43 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
|
||||
|
||||
// aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" }
|
||||
// a€a { dg-bogus "invalid UTF-8 character <80>" }
|
||||
// a¿a { dg-bogus "invalid UTF-8 character <bf>" }
|
||||
// aÀa { dg-bogus "invalid UTF-8 character <c0>" }
|
||||
// aÁa { dg-bogus "invalid UTF-8 character <c1>" }
|
||||
// aõa { dg-bogus "invalid UTF-8 character <f5>" }
|
||||
// aÿa { dg-bogus "invalid UTF-8 character <ff>" }
|
||||
// aÂa { dg-bogus "invalid UTF-8 character <c2>" }
|
||||
// aàa { dg-bogus "invalid UTF-8 character <e0>" }
|
||||
// aà€¿a { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
|
||||
// aàŸ€a { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
|
||||
// aà¿a { dg-bogus "invalid UTF-8 character <e0><bf>" }
|
||||
// aì€a { dg-bogus "invalid UTF-8 character <ec><80>" }
|
||||
// aí €a { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
|
||||
// að€€€a { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
|
||||
// að<61>¿¿a { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
|
||||
// aô<61>€€a { dg-bogus "invalid UTF-8 character <f4><90><80><80>" }
|
||||
// aý¿¿¿¿¿a { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" }
|
||||
// { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
|
||||
/* aÂ€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿a { dg-bogus "invalid UTF-8 character" } */
|
||||
/* a€a { dg-bogus "invalid UTF-8 character <80>" } */
|
||||
/* a¿a { dg-bogus "invalid UTF-8 character <bf>" } */
|
||||
/* aÀa { dg-bogus "invalid UTF-8 character <c0>" } */
|
||||
/* aÁa { dg-bogus "invalid UTF-8 character <c1>" } */
|
||||
/* aõa { dg-bogus "invalid UTF-8 character <f5>" } */
|
||||
/* aÿa { dg-bogus "invalid UTF-8 character <ff>" } */
|
||||
/* aÂa { dg-bogus "invalid UTF-8 character <c2>" } */
|
||||
/* aàa { dg-bogus "invalid UTF-8 character <e0>" } */
|
||||
/* aà€¿a { dg-bogus "invalid UTF-8 character <e0><80><bf>" } */
|
||||
/* aàŸ€a { dg-bogus "invalid UTF-8 character <e0><9f><80>" } */
|
||||
/* aà¿a { dg-bogus "invalid UTF-8 character <e0><bf>" } */
|
||||
/* aì€a { dg-bogus "invalid UTF-8 character <ec><80>" } */
|
||||
/* aí €a { dg-bogus "invalid UTF-8 character <ed><a0><80>" } */
|
||||
/* að€€€a { dg-bogus "invalid UTF-8 character <f0><80><80><80>" } */
|
||||
/* að<61>¿¿a { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" } */
|
||||
/* aô<61>€€a { dg-bogus "invalid UTF-8 character <f4><90><80><80>" } */
|
||||
/* aý¿¿¿¿¿a { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" } */
|
||||
/* { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
|
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C
Normal file
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C
Normal file
@ -0,0 +1,80 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess { target c++11 } }
|
||||
// { dg-options "-finput-charset=UTF-8" }
|
||||
|
||||
char32_t a = U'€'; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
char32_t i = U'à€¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
char32_t j = U'àŸ€'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
char32_t l = U'ì€'; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
char32_t m = U'í €'; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
char32_t n = U'ð€€€'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
char32_t o = U'ð<EFBFBD>¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
char32_t p = U'ô<EFBFBD>€€'; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
char32_t q = U'ý¿¿¿¿¿'; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A = U"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B = U"€"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J = U"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K = U"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M = U"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N = U"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O = U"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P = U"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q = U"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R = U"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A1 = UR"(Â€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿)"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B1 = UR"(€)"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J1 = UR"(à€¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K1 = UR"(àŸ€)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M1 = UR"(ì€)"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N1 = UR"(í €)"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O1 = UR"(ð€€€)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P1 = UR"(ð<>¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q1 = UR"(ô<>€€)"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R1 = UR"(ý¿¿¿¿¿)"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A2 = u8"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B2 = u8"€"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J2 = u8"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K2 = u8"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M2 = u8"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N2 = u8"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O2 = u8"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P2 = u8"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q2 = u8"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R2 = u8"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C
Normal file
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C
Normal file
@ -0,0 +1,80 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess { target c++11 } }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic" }
|
||||
|
||||
char32_t a = U'€'; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
char32_t i = U'à€¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
char32_t j = U'àŸ€'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
char32_t l = U'ì€'; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
char32_t m = U'í €'; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
char32_t n = U'ð€€€'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
char32_t o = U'ð<EFBFBD>¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
char32_t p = U'ô<EFBFBD>€€'; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
char32_t q = U'ý¿¿¿¿¿'; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A = U"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B = U"€"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J = U"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K = U"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M = U"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N = U"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O = U"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P = U"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q = U"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R = U"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A1 = UR"(Â€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿)"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B1 = UR"(€)"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J1 = UR"(à€¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K1 = UR"(àŸ€)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M1 = UR"(ì€)"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N1 = UR"(í €)"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O1 = UR"(ð€€€)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P1 = UR"(ð<>¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q1 = UR"(ô<>€€)"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R1 = UR"(ý¿¿¿¿¿)"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A2 = u8"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B2 = u8"€"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J2 = u8"à€¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K2 = u8"àŸ€"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M2 = u8"ì€"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N2 = u8"í €"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O2 = u8"ð€€€"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P2 = u8"ð<EFBFBD>¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q2 = u8"ô<EFBFBD>€€"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R2 = u8"ý¿¿¿¿¿"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C
Normal file
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C
Normal file
@ -0,0 +1,80 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess { target c++11 } }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
|
||||
|
||||
char32_t a = U'€'; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
char32_t b = U'¿'; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
char32_t c = U'À'; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
char32_t d = U'Á'; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
char32_t e = U'õ'; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
char32_t f = U'ÿ'; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
char32_t g = U'Â'; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
char32_t h = U'à'; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
char32_t i = U'à€¿'; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
char32_t j = U'àŸ€'; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
char32_t k = U'à¿'; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
char32_t l = U'ì€'; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
char32_t m = U'í €'; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
char32_t n = U'ð€€€'; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
char32_t o = U'ð<EFBFBD>¿¿'; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
char32_t p = U'ô<EFBFBD>€€'; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
char32_t q = U'ý¿¿¿¿¿'; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A = U"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B = U"€"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C = U"¿"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D = U"À"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E = U"Á"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F = U"õ"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G = U"ÿ"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H = U"Â"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I = U"à"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J = U"à€¿"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K = U"àŸ€"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L = U"à¿"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M = U"ì€"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N = U"í €"; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O = U"ð€€€"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P = U"ð<EFBFBD>¿¿"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q = U"ô<EFBFBD>€€"; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R = U"ý¿¿¿¿¿"; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A1 = UR"(Â€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿)"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B1 = UR"(€)"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C1 = UR"(¿)"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D1 = UR"(À)"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E1 = UR"(Á)"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F1 = UR"(õ)"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G1 = UR"(ÿ)"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H1 = UR"(Â)"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I1 = UR"(à)"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J1 = UR"(à€¿)"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K1 = UR"(àŸ€)"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L1 = UR"(à¿)"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M1 = UR"(ì€)"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N1 = UR"(í €)"; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O1 = UR"(ð€€€)"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P1 = UR"(ð<>¿¿)"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q1 = UR"(ô<>€€)"; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R1 = UR"(ý¿¿¿¿¿)"; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A2 = u8"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B2 = u8"€"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C2 = u8"¿"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D2 = u8"À"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E2 = u8"Á"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F2 = u8"õ"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G2 = u8"ÿ"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H2 = u8"Â"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I2 = u8"à"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J2 = u8"à€¿"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K2 = u8"àŸ€"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L2 = u8"à¿"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M2 = u8"ì€"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N2 = u8"í €"; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O2 = u8"ð€€€"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P2 = u8"ð<EFBFBD>¿¿"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q2 = u8"ô<EFBFBD>€€"; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R2 = u8"ý¿¿¿¿¿"; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C
Normal file
80
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C
Normal file
@ -0,0 +1,80 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess { target c++11 } }
|
||||
// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
|
||||
|
||||
char32_t a = U'€'; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
char32_t b = U'¿'; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
char32_t c = U'À'; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
char32_t d = U'Á'; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
char32_t e = U'õ'; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
char32_t f = U'ÿ'; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
char32_t g = U'Â'; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
char32_t h = U'à'; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
char32_t i = U'à€¿'; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
char32_t j = U'àŸ€'; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
char32_t k = U'à¿'; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
char32_t l = U'ì€'; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
char32_t m = U'í €'; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
char32_t n = U'ð€€€'; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
char32_t o = U'ð<EFBFBD>¿¿'; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
char32_t p = U'ô<EFBFBD>€€'; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
char32_t q = U'ý¿¿¿¿¿'; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A = U"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B = U"€"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C = U"¿"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D = U"À"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E = U"Á"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F = U"õ"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G = U"ÿ"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H = U"Â"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I = U"à"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J = U"à€¿"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K = U"àŸ€"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L = U"à¿"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M = U"ì€"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N = U"í €"; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O = U"ð€€€"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P = U"ð<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q = U"ô<EFBFBD>€€"; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R = U"ý¿¿¿¿¿"; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A1 = UR"(Â€ß¿à €íŸ¿î€€ð<E282AC>€€ô<E282AC>¿¿)"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B1 = UR"(€)"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C1 = UR"(¿)"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D1 = UR"(À)"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E1 = UR"(Á)"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F1 = UR"(õ)"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G1 = UR"(ÿ)"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H1 = UR"(Â)"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I1 = UR"(à)"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J1 = UR"(à€¿)"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K1 = UR"(àŸ€)"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L1 = UR"(à¿)"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M1 = UR"(ì€)"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N1 = UR"(í €)"; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O1 = UR"(ð€€€)"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P1 = UR"(ð<>¿¿)"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q1 = UR"(ô<>€€)"; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R1 = UR"(ý¿¿¿¿¿)"; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
||||
auto A2 = u8"Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character" }
|
||||
auto B2 = u8"€"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
auto C2 = u8"¿"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
auto D2 = u8"À"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
auto E2 = u8"Á"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
auto F2 = u8"õ"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
auto G2 = u8"ÿ"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
auto H2 = u8"Â"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
auto I2 = u8"à"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
auto J2 = u8"à€¿"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
auto K2 = u8"àŸ€"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
auto L2 = u8"à¿"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
auto M2 = u8"ì€"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
auto N2 = u8"í €"; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
auto O2 = u8"ð€€€"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
auto P2 = u8"ð<EFBFBD>¿¿"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
auto Q2 = u8"ô<EFBFBD>€€"; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
|
||||
auto R2 = u8"ý¿¿¿¿¿"; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
|
||||
// { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
|
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C
Normal file
25
gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C
Normal file
@ -0,0 +1,25 @@
|
||||
// P2295R6 - Support for UTF-8 as a portable source file encoding
|
||||
// This test intentionally contains various byte sequences which are not valid UTF-8
|
||||
// { dg-do preprocess }
|
||||
// { dg-options "-finput-charset=UTF-8" }
|
||||
|
||||
#define I(x)
|
||||
I(Â€ß¿à €íŸ¿î€€ð<EFBFBD>€€ô<EFBFBD>¿¿) // { dg-bogus "invalid UTF-8 character" }
|
||||
// { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
|
||||
I(€) // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
|
||||
I(¿) // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
|
||||
I(À) // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
|
||||
I(Á) // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
|
||||
I(õ) // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
|
||||
I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
|
||||
I(Â) // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
|
||||
I(à) // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
|
||||
I(à€¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
|
||||
I(àŸ€) // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
|
||||
I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
|
||||
I(ì€) // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
|
||||
I(í €) // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
|
||||
I(ð€€€) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
|
||||
I(ð<EFBFBD>¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
|
||||
I(ô<EFBFBD>€€) // { dg-error "is not valid in an identifier" }
|
||||
I(ý¿¿¿¿¿) // { dg-error "is not valid in an identifier" }
|
@ -1742,9 +1742,9 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
case, no diagnostic is emitted, and the return value of FALSE should cause
|
||||
a new token to be formed.
|
||||
|
||||
Unlike _cpp_valid_ucn, this will never be called when lexing a string; only
|
||||
a potential identifier, or a CPP_OTHER token. NST is unused in the latter
|
||||
case.
|
||||
_cpp_valid_utf8 can be called when lexing a potential identifier, or a
|
||||
CPP_OTHER token or for the purposes of -Winvalid-utf8 warning in string or
|
||||
character literals. NST is unused when not in a potential identifier.
|
||||
|
||||
As in _cpp_valid_ucn, IDENTIFIER_POS is 0 when not in an identifier, 1 for
|
||||
the start of an identifier, or 2 otherwise. */
|
||||
|
@ -560,6 +560,13 @@ struct cpp_options
|
||||
cpp_bidirectional_level. */
|
||||
unsigned char cpp_warn_bidirectional;
|
||||
|
||||
/* True if libcpp should warn about invalid UTF-8 characters in comments.
|
||||
2 if it should be a pedwarn. */
|
||||
unsigned char cpp_warn_invalid_utf8;
|
||||
|
||||
/* True if -finput-charset= option has been used explicitly. */
|
||||
bool cpp_input_charset_explicit;
|
||||
|
||||
/* Dependency generation. */
|
||||
struct
|
||||
{
|
||||
@ -666,7 +673,8 @@ enum cpp_warning_reason {
|
||||
CPP_W_CXX11_COMPAT,
|
||||
CPP_W_CXX20_COMPAT,
|
||||
CPP_W_EXPANSION_TO_DEFINED,
|
||||
CPP_W_BIDIRECTIONAL
|
||||
CPP_W_BIDIRECTIONAL,
|
||||
CPP_W_INVALID_UTF8
|
||||
};
|
||||
|
||||
/* Callback for header lookup for HEADER, which is the name of a
|
||||
|
@ -227,6 +227,8 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
|
||||
CPP_OPTION (pfile, ext_numeric_literals) = 1;
|
||||
CPP_OPTION (pfile, warn_date_time) = 0;
|
||||
CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
|
||||
CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0;
|
||||
CPP_OPTION (pfile, cpp_input_charset_explicit) = 0;
|
||||
|
||||
/* Default CPP arithmetic to something sensible for the host for the
|
||||
benefit of dumb users like fix-header. */
|
||||
|
209
libcpp/lex.cc
209
libcpp/lex.cc
@ -50,6 +50,9 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
|
||||
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
|
||||
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
|
||||
|
||||
/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. */
|
||||
#define UCS_LIMIT 0x10FFFF
|
||||
|
||||
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
|
||||
static int skip_line_comment (cpp_reader *);
|
||||
static void skip_whitespace (cpp_reader *, cppchar_t);
|
||||
@ -1704,6 +1707,120 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
|
||||
bidi::on_char (kind, ucn_p, loc);
|
||||
}
|
||||
|
||||
static const cppchar_t utf8_continuation = 0x80;
|
||||
static const cppchar_t utf8_signifier = 0xC0;
|
||||
|
||||
/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
|
||||
at PFILE->buffer->cur. Return a pointer after the diagnosed
|
||||
invalid character. */
|
||||
|
||||
static const uchar *
|
||||
_cpp_warn_invalid_utf8 (cpp_reader *pfile)
|
||||
{
|
||||
cpp_buffer *buffer = pfile->buffer;
|
||||
const uchar *cur = buffer->cur;
|
||||
bool pedantic = (CPP_PEDANTIC (pfile)
|
||||
&& CPP_OPTION (pfile, cpp_warn_invalid_utf8) == 2);
|
||||
|
||||
if (cur[0] < utf8_signifier
|
||||
|| cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
|
||||
{
|
||||
if (pedantic)
|
||||
cpp_error_with_line (pfile, CPP_DL_PEDWARN,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x>",
|
||||
cur[0]);
|
||||
else
|
||||
cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x>",
|
||||
cur[0]);
|
||||
return cur + 1;
|
||||
}
|
||||
else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
|
||||
{
|
||||
if (pedantic)
|
||||
cpp_error_with_line (pfile, CPP_DL_PEDWARN,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x>",
|
||||
cur[0], cur[1]);
|
||||
else
|
||||
cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x>",
|
||||
cur[0], cur[1]);
|
||||
return cur + 2;
|
||||
}
|
||||
else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
|
||||
{
|
||||
if (pedantic)
|
||||
cpp_error_with_line (pfile, CPP_DL_PEDWARN,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x><%x>",
|
||||
cur[0], cur[1], cur[2]);
|
||||
else
|
||||
cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x><%x>",
|
||||
cur[0], cur[1], cur[2]);
|
||||
return cur + 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pedantic)
|
||||
cpp_error_with_line (pfile, CPP_DL_PEDWARN,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x><%x><%x>",
|
||||
cur[0], cur[1], cur[2], cur[3]);
|
||||
else
|
||||
cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
|
||||
pfile->line_table->highest_line,
|
||||
CPP_BUF_COL (buffer),
|
||||
"invalid UTF-8 character <%x><%x><%x><%x>",
|
||||
cur[0], cur[1], cur[2], cur[3]);
|
||||
return cur + 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper function of *skip_*_comment and lex*_string. For C,
|
||||
character at CUR[-1] with MSB set handle -Wbidi-chars* and
|
||||
-Winvalid-utf8 diagnostics and return pointer to first character
|
||||
that should be processed next. */
|
||||
|
||||
static inline const uchar *
|
||||
_cpp_handle_multibyte_utf8 (cpp_reader *pfile, uchar c,
|
||||
const uchar *cur, bool warn_bidi_p,
|
||||
bool warn_invalid_utf8_p)
|
||||
{
|
||||
/* If this is a beginning of a UTF-8 encoding, it might be
|
||||
a bidirectional control character. */
|
||||
if (c == bidi::utf8_start && warn_bidi_p)
|
||||
{
|
||||
location_t loc;
|
||||
bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
|
||||
maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
|
||||
}
|
||||
if (!warn_invalid_utf8_p)
|
||||
return cur;
|
||||
if (c >= utf8_signifier)
|
||||
{
|
||||
cppchar_t s;
|
||||
const uchar *pstr = cur - 1;
|
||||
if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0, NULL, &s)
|
||||
&& s <= UCS_LIMIT)
|
||||
return pstr;
|
||||
}
|
||||
pfile->buffer->cur = cur - 1;
|
||||
return _cpp_warn_invalid_utf8 (pfile);
|
||||
}
|
||||
|
||||
/* Skip a C-style block comment. We find the end of the comment by
|
||||
seeing if an asterisk is before every '/' we encounter. Returns
|
||||
nonzero if comment terminated by EOF, zero otherwise.
|
||||
@ -1716,6 +1833,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||||
const uchar *cur = buffer->cur;
|
||||
uchar c;
|
||||
const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||||
const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
|
||||
const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
|
||||
|
||||
cur++;
|
||||
if (*cur == '/')
|
||||
@ -1765,14 +1884,10 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||||
|
||||
cur = buffer->cur;
|
||||
}
|
||||
/* If this is a beginning of a UTF-8 encoding, it might be
|
||||
a bidirectional control character. */
|
||||
else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
|
||||
{
|
||||
location_t loc;
|
||||
bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
|
||||
maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
|
||||
}
|
||||
else if (__builtin_expect (c >= utf8_continuation, 0)
|
||||
&& warn_bidi_or_invalid_utf8_p)
|
||||
cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
|
||||
warn_invalid_utf8_p);
|
||||
}
|
||||
|
||||
buffer->cur = cur;
|
||||
@ -1789,11 +1904,13 @@ skip_line_comment (cpp_reader *pfile)
|
||||
cpp_buffer *buffer = pfile->buffer;
|
||||
location_t orig_line = pfile->line_table->highest_line;
|
||||
const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||||
const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
|
||||
const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
|
||||
|
||||
if (!warn_bidi_p)
|
||||
if (!warn_bidi_or_invalid_utf8_p)
|
||||
while (*buffer->cur != '\n')
|
||||
buffer->cur++;
|
||||
else
|
||||
else if (!warn_invalid_utf8_p)
|
||||
{
|
||||
while (*buffer->cur != '\n'
|
||||
&& *buffer->cur != bidi::utf8_start)
|
||||
@ -1813,6 +1930,22 @@ skip_line_comment (cpp_reader *pfile)
|
||||
maybe_warn_bidi_on_close (pfile, buffer->cur);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (*buffer->cur != '\n')
|
||||
{
|
||||
if (*buffer->cur < utf8_continuation)
|
||||
{
|
||||
buffer->cur++;
|
||||
continue;
|
||||
}
|
||||
buffer->cur
|
||||
= _cpp_handle_multibyte_utf8 (pfile, *buffer->cur, buffer->cur + 1,
|
||||
warn_bidi_p, warn_invalid_utf8_p);
|
||||
}
|
||||
if (warn_bidi_p)
|
||||
maybe_warn_bidi_on_close (pfile, buffer->cur);
|
||||
}
|
||||
|
||||
_cpp_process_line_notes (pfile, true);
|
||||
return orig_line != pfile->line_table->highest_line;
|
||||
@ -1919,8 +2052,6 @@ warn_about_normalization (cpp_reader *pfile,
|
||||
}
|
||||
}
|
||||
|
||||
static const cppchar_t utf8_signifier = 0xC0;
|
||||
|
||||
/* Returns TRUE if the sequence starting at buffer->cur is valid in
|
||||
an identifier. FIRST is TRUE if this starts an identifier. */
|
||||
|
||||
@ -2361,6 +2492,8 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||||
{
|
||||
const uchar *pos = base;
|
||||
const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||||
const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
|
||||
const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
|
||||
|
||||
/* 'tis a pity this information isn't passed down from the lexer's
|
||||
initial categorization of the token. */
|
||||
@ -2597,13 +2730,10 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||||
pos = base = pfile->buffer->cur;
|
||||
note = &pfile->buffer->notes[pfile->buffer->cur_note];
|
||||
}
|
||||
else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
|
||||
&& warn_bidi_p)
|
||||
{
|
||||
location_t loc;
|
||||
bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
|
||||
maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
|
||||
}
|
||||
else if (__builtin_expect ((unsigned char) c >= utf8_continuation, 0)
|
||||
&& warn_bidi_or_invalid_utf8_p)
|
||||
pos = _cpp_handle_multibyte_utf8 (pfile, c, pos, warn_bidi_p,
|
||||
warn_invalid_utf8_p);
|
||||
}
|
||||
|
||||
if (warn_bidi_p)
|
||||
@ -2704,6 +2834,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||||
terminator = '>', type = CPP_HEADER_NAME;
|
||||
|
||||
const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||||
const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
|
||||
const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
|
||||
for (;;)
|
||||
{
|
||||
cppchar_t c = *cur++;
|
||||
@ -2745,12 +2877,10 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||||
}
|
||||
else if (c == '\0')
|
||||
saw_NUL = true;
|
||||
else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
|
||||
{
|
||||
location_t loc;
|
||||
bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
|
||||
maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
|
||||
}
|
||||
else if (__builtin_expect (c >= utf8_continuation, 0)
|
||||
&& warn_bidi_or_invalid_utf8_p)
|
||||
cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
|
||||
warn_invalid_utf8_p);
|
||||
}
|
||||
|
||||
if (saw_NUL && !pfile->state.skipping)
|
||||
@ -4052,6 +4182,7 @@ _cpp_lex_direct (cpp_reader *pfile)
|
||||
default:
|
||||
{
|
||||
const uchar *base = --buffer->cur;
|
||||
static int no_warn_cnt;
|
||||
|
||||
/* Check for an extended identifier ($ or UCN or UTF-8). */
|
||||
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
||||
@ -4072,7 +4203,33 @@ _cpp_lex_direct (cpp_reader *pfile)
|
||||
const uchar *pstr = base;
|
||||
cppchar_t s;
|
||||
if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
|
||||
buffer->cur = pstr;
|
||||
{
|
||||
if (s > UCS_LIMIT && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
|
||||
{
|
||||
buffer->cur = base;
|
||||
_cpp_warn_invalid_utf8 (pfile);
|
||||
}
|
||||
buffer->cur = pstr;
|
||||
}
|
||||
else if (CPP_OPTION (pfile, cpp_warn_invalid_utf8))
|
||||
{
|
||||
buffer->cur = base;
|
||||
const uchar *end = _cpp_warn_invalid_utf8 (pfile);
|
||||
buffer->cur = base + 1;
|
||||
no_warn_cnt = end - buffer->cur;
|
||||
}
|
||||
}
|
||||
else if (c >= utf8_continuation
|
||||
&& CPP_OPTION (pfile, cpp_warn_invalid_utf8))
|
||||
{
|
||||
if (no_warn_cnt)
|
||||
--no_warn_cnt;
|
||||
else
|
||||
{
|
||||
buffer->cur = base;
|
||||
_cpp_warn_invalid_utf8 (pfile);
|
||||
buffer->cur = base + 1;
|
||||
}
|
||||
}
|
||||
create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user