diff options
| author | Joseph Myers <josmyers@redhat.com> | 2024-12-03 13:01:58 +0000 |
|---|---|---|
| committer | Joseph Myers <josmyers@redhat.com> | 2024-12-03 13:01:58 +0000 |
| commit | f3b5de944ad6d1f6a10f819b816c2ba234ecd8c0 (patch) | |
| tree | 45c15b02d46a42f4ffc5167fff2fae29ebc36b0e /libcpp | |
| parent | af9a3fe6a52974252516b3eea4c5ab5caae47b4b (diff) | |
preprocessor: Adjust C rules on UCNs for C23 [PR117162]
As noted in bug 117162, C23 changed some rules on UCNs to match C++
(this was a late change agreed in the resolution to CD2 comment
US-032, implementing changes from N3124), which we need to implement.
Allow UCNs below 0xa0 outside identifiers for C, with a
pedwarn-if-pedantic before C23 (and a warning with -Wc11-c23-compat)
except for the always-allowed cases of UCNs for $ @ `. Also as part
of that change, do not allow \u0024 in identifiers as equivalent to $
for C23.
Bootstrapped with no regressions for x86_64-pc-linux-gnu.
PR c/117162
libcpp/
* include/cpplib.h (struct cpp_options): Add low_ucns.
* init.cc (struct lang_flags, lang_defaults): Add low_ucns.
(cpp_set_lang): Set low_ucns
* charset.cc (_cpp_valid_ucn): For C, allow UCNs below 0xa0
outside identifiers, with a pedwarn if pedantic before C23 or a
warning with -Wc11-c23-compat. Do not allow \u0024 in identifiers
for C23.
gcc/testsuite/
* gcc.dg/cpp/c17-ucn-1.c, gcc.dg/cpp/c17-ucn-2.c,
gcc.dg/cpp/c17-ucn-3.c, gcc.dg/cpp/c17-ucn-4.c,
gcc.dg/cpp/c23-ucn-2.c, gcc.dg/cpp/c23-ucnid-2.c: New tests.
* c-c++-common/cpp/delimited-escape-seq-3.c,
c-c++-common/cpp/named-universal-char-escape-3.c,
gcc.dg/cpp/c23-ucn-1.c, gcc.dg/cpp/c2y-delimited-escape-seq-3.c:
Update expected messages
* gcc.dg/cpp/ucs.c: Use -pedantic-errors. Update expected
messages.
Diffstat (limited to 'libcpp')
| -rw-r--r-- | libcpp/charset.cc | 34 | ||||
| -rw-r--r-- | libcpp/include/cpplib.h | 3 | ||||
| -rw-r--r-- | libcpp/init.cc | 70 |
3 files changed, 63 insertions, 44 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 9337fbc3a7a..72049e61fa8 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1811,14 +1811,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); result = 1; } - /* The C99 standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. - C++0x permits everything below 0xa0 within literals; - ucn_valid_in_identifier will complain about identifiers. */ - else if ((result < 0xa0 - && !CPP_OPTION (pfile, cplusplus) - && (result != 0x24 && result != 0x40 && result != 0x60)) - || (result & 0x80000000) + else if ((result & 0x80000000) || (result >= 0xD800 && result <= 0xDFFF)) { cpp_error (pfile, CPP_DL_ERROR, @@ -1826,13 +1819,34 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); result = 1; } + /* The C99 standard permits $, @ and ` to be specified as UCNs. We use + hex escapes so that this also works with EBCDIC hosts. + C++0x permits everything below 0xa0 within literals, as does C23; + ucn_valid_in_identifier will complain about identifiers. */ + else if (result < 0xa0 + && !identifier_pos + && !CPP_OPTION (pfile, cplusplus) + && (result != 0x24 && result != 0x40 && result != 0x60)) + { + bool warned = false; + if (!CPP_OPTION (pfile, low_ucns) && CPP_OPTION (pfile, cpp_pedantic)) + warned = cpp_pedwarning (pfile, CPP_W_PEDANTIC, + "%.*s is not a valid universal character" + " name before C23", (int) (str - base), base); + if (!warned && CPP_OPTION (pfile, cpp_warn_c11_c23_compat) > 0) + warned = cpp_warning (pfile, CPP_W_C11_C23_COMPAT, + "%.*s is not a valid universal character" + " name before C23", (int) (str - base), base); + } else if (identifier_pos && result == 0x24 && CPP_OPTION (pfile, dollars_in_ident) /* In C++26 when dollars are allowed in identifiers, we should still reject \u0024 as $ is part of the basic - character set. */ + character set. C23 also does not allow \u0024 in + identifiers. */ && !(CPP_OPTION (pfile, cplusplus) - && CPP_OPTION (pfile, lang) > CLK_CXX23)) + ? CPP_OPTION (pfile, lang) > CLK_CXX23 + : CPP_OPTION (pfile, low_ucns))) { if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) { diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index e73f77e67d8..298b434698a 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -557,6 +557,9 @@ struct cpp_options /* Nonzero for C++23 named universal character escape sequences. */ unsigned char named_uc_escape_seqs; + /* Nonzero for C++ and C23 UCNs for characters below 0xa0. */ + unsigned char low_ucns; + /* Nonzero for C2Y 0o prefixed octal integer constants. */ unsigned char octal_constants; diff --git a/libcpp/init.cc b/libcpp/init.cc index 3ab120a36e6..62982355c8f 100644 --- a/libcpp/init.cc +++ b/libcpp/init.cc @@ -113,44 +113,45 @@ struct lang_flags unsigned int true_false : 1; unsigned int embed : 1; unsigned int imaginary_constants : 1; + unsigned int low_ucns : 1; }; static const struct lang_flags lang_defaults[] = { /* u e w n - b d 8 l a a t - x u i i c v s s i r d m o r e - x i d u r d n g t h a c z f n e e c u m i - c c n x c d s i l l l c s r l o o d l d d l d t f b m - 9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a - 9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g */ - /* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 }, - /* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 }, - /* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 }, - /* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 }, - /* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 }, - /* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 }, - /* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 }, - /* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 }, - /* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 }, - /* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 }, - /* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 }, - /* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 }, - /* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 }, - /* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 }, - /* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 }, - /* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0 }, - /* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0 }, - /* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 }, - /* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 }, - /* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 }, - /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 }, - /* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 }, - /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } + b d 8 l a a t l + x u i i c v s s i r d m o r e o + x i d u r d n g t h a c z f n e e c u m i w + c c n x c d s i l l l c s r l o o d l d d l d t f b m u + 9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a c + 9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g n */ + /* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 }, + /* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 }, + /* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 }, + /* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 }, + /* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 }, + /* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 }, + /* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, + /* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, + /* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, + /* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, + /* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, + /* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 }, + /* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 }, + /* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 }, + /* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 }, + /* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 }, + /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 }, + /* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 }, + /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } }; /* Sets internal flags correctly for a given language. */ @@ -188,6 +189,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, true_false) = l->true_false; CPP_OPTION (pfile, embed) = l->embed; CPP_OPTION (pfile, imaginary_constants) = l->imaginary_constants; + CPP_OPTION (pfile, low_ucns) = l->low_ucns; } /* Initialize library global state. */ |
