summaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorJoseph Myers <josmyers@redhat.com>2024-12-03 13:01:58 +0000
committerJoseph Myers <josmyers@redhat.com>2024-12-03 13:01:58 +0000
commitf3b5de944ad6d1f6a10f819b816c2ba234ecd8c0 (patch)
tree45c15b02d46a42f4ffc5167fff2fae29ebc36b0e /libcpp
parentaf9a3fe6a52974252516b3eea4c5ab5caae47b4b (diff)
preprocessor: Adjust C rules on UCNs for C23 [PR117162]
As noted in bug 117162, C23 changed some rules on UCNs to match C++ (this was a late change agreed in the resolution to CD2 comment US-032, implementing changes from N3124), which we need to implement. Allow UCNs below 0xa0 outside identifiers for C, with a pedwarn-if-pedantic before C23 (and a warning with -Wc11-c23-compat) except for the always-allowed cases of UCNs for $ @ `. Also as part of that change, do not allow \u0024 in identifiers as equivalent to $ for C23. Bootstrapped with no regressions for x86_64-pc-linux-gnu. PR c/117162 libcpp/ * include/cpplib.h (struct cpp_options): Add low_ucns. * init.cc (struct lang_flags, lang_defaults): Add low_ucns. (cpp_set_lang): Set low_ucns * charset.cc (_cpp_valid_ucn): For C, allow UCNs below 0xa0 outside identifiers, with a pedwarn if pedantic before C23 or a warning with -Wc11-c23-compat. Do not allow \u0024 in identifiers for C23. gcc/testsuite/ * gcc.dg/cpp/c17-ucn-1.c, gcc.dg/cpp/c17-ucn-2.c, gcc.dg/cpp/c17-ucn-3.c, gcc.dg/cpp/c17-ucn-4.c, gcc.dg/cpp/c23-ucn-2.c, gcc.dg/cpp/c23-ucnid-2.c: New tests. * c-c++-common/cpp/delimited-escape-seq-3.c, c-c++-common/cpp/named-universal-char-escape-3.c, gcc.dg/cpp/c23-ucn-1.c, gcc.dg/cpp/c2y-delimited-escape-seq-3.c: Update expected messages * gcc.dg/cpp/ucs.c: Use -pedantic-errors. Update expected messages.
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/charset.cc34
-rw-r--r--libcpp/include/cpplib.h3
-rw-r--r--libcpp/init.cc70
3 files changed, 63 insertions, 44 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index 9337fbc3a7a..72049e61fa8 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1811,14 +1811,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
(int) (str - base), base);
result = 1;
}
- /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
- hex escapes so that this also works with EBCDIC hosts.
- C++0x permits everything below 0xa0 within literals;
- ucn_valid_in_identifier will complain about identifiers. */
- else if ((result < 0xa0
- && !CPP_OPTION (pfile, cplusplus)
- && (result != 0x24 && result != 0x40 && result != 0x60))
- || (result & 0x80000000)
+ else if ((result & 0x80000000)
|| (result >= 0xD800 && result <= 0xDFFF))
{
cpp_error (pfile, CPP_DL_ERROR,
@@ -1826,13 +1819,34 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
(int) (str - base), base);
result = 1;
}
+ /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
+ hex escapes so that this also works with EBCDIC hosts.
+ C++0x permits everything below 0xa0 within literals, as does C23;
+ ucn_valid_in_identifier will complain about identifiers. */
+ else if (result < 0xa0
+ && !identifier_pos
+ && !CPP_OPTION (pfile, cplusplus)
+ && (result != 0x24 && result != 0x40 && result != 0x60))
+ {
+ bool warned = false;
+ if (!CPP_OPTION (pfile, low_ucns) && CPP_OPTION (pfile, cpp_pedantic))
+ warned = cpp_pedwarning (pfile, CPP_W_PEDANTIC,
+ "%.*s is not a valid universal character"
+ " name before C23", (int) (str - base), base);
+ if (!warned && CPP_OPTION (pfile, cpp_warn_c11_c23_compat) > 0)
+ warned = cpp_warning (pfile, CPP_W_C11_C23_COMPAT,
+ "%.*s is not a valid universal character"
+ " name before C23", (int) (str - base), base);
+ }
else if (identifier_pos && result == 0x24
&& CPP_OPTION (pfile, dollars_in_ident)
/* In C++26 when dollars are allowed in identifiers,
we should still reject \u0024 as $ is part of the basic
- character set. */
+ character set. C23 also does not allow \u0024 in
+ identifiers. */
&& !(CPP_OPTION (pfile, cplusplus)
- && CPP_OPTION (pfile, lang) > CLK_CXX23))
+ ? CPP_OPTION (pfile, lang) > CLK_CXX23
+ : CPP_OPTION (pfile, low_ucns)))
{
if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
{
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index e73f77e67d8..298b434698a 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -557,6 +557,9 @@ struct cpp_options
/* Nonzero for C++23 named universal character escape sequences. */
unsigned char named_uc_escape_seqs;
+ /* Nonzero for C++ and C23 UCNs for characters below 0xa0. */
+ unsigned char low_ucns;
+
/* Nonzero for C2Y 0o prefixed octal integer constants. */
unsigned char octal_constants;
diff --git a/libcpp/init.cc b/libcpp/init.cc
index 3ab120a36e6..62982355c8f 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -113,44 +113,45 @@ struct lang_flags
unsigned int true_false : 1;
unsigned int embed : 1;
unsigned int imaginary_constants : 1;
+ unsigned int low_ucns : 1;
};
static const struct lang_flags lang_defaults[] = {
/* u e w n
- b d 8 l a a t
- x u i i c v s s i r d m o r e
- x i d u r d n g t h a c z f n e e c u m i
- c c n x c d s i l l l c s r l o o d l d d l d t f b m
- 9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a
- 9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g */
- /* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
- /* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
- /* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
- /* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0 },
- /* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 },
- /* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 },
- /* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
- /* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
- /* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
- /* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
- /* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 },
- /* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0 },
- /* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1 },
- /* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
- /* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
- /* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0 },
- /* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0 },
- /* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0 },
- /* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
- /* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
- /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
- /* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0 },
- /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
+ b d 8 l a a t l
+ x u i i c v s s i r d m o r e o
+ x i d u r d n g t h a c z f n e e c u m i w
+ c c n x c d s i l l l c s r l o o d l d d l d t f b m u
+ 9 + u i 1 i t g i i i s e i i p p f i e i i u a a e a c
+ 9 + m d 1 d d r t t t t p g t t e p t f r m c l l d g n */
+ /* GNUC89 */ { 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUC99 */ { 1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUC11 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUC17 */ { 1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUC23 */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 },
+ /* GNUC2Y */ { 1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 },
+ /* STDC89 */ { 0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* STDC94 */ { 0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* STDC99 */ { 1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* STDC11 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* STDC17 */ { 1,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* STDC23 */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1 },
+ /* STDC2Y */ { 1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1 },
+ /* GNUCXX */ { 0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* CXX98 */ { 0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* GNUCXX11 */ { 1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* CXX11 */ { 1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* GNUCXX14 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* CXX14 */ { 1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* GNUCXX17 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* CXX17 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* GNUCXX20 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* CXX20 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
+ /* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+ /* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+ /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+ /* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+ /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
/* Sets internal flags correctly for a given language. */
@@ -188,6 +189,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
CPP_OPTION (pfile, true_false) = l->true_false;
CPP_OPTION (pfile, embed) = l->embed;
CPP_OPTION (pfile, imaginary_constants) = l->imaginary_constants;
+ CPP_OPTION (pfile, low_ucns) = l->low_ucns;
}
/* Initialize library global state. */