summaryrefslogtreecommitdiff
path: root/libstdc++-v3/scripts
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2024-01-23 14:57:15 +0000
committerJonathan Wakely <jwakely@redhat.com>2024-01-31 09:42:38 +0000
commit358fd42aabec56e471ed3c8e6f3dccbc305ff6f7 (patch)
tree0490ee568f23de8b45d53d3f1ade42746c29e184 /libstdc++-v3/scripts
parent00b2d7d17c38bdf3d26786c7030dc47454678c44 (diff)
libstdc++: Add "ASCII" as an alias for std::text_encoding::id::ASCII
As noted in LWG 4043, "ASCII" is not an alias for any known registered character encoding, so std::text_encoding("ASCII").mib() == id::other. Add the alias "ASCII" to the implementation-defined superset of aliases for that encoding. libstdc++-v3/ChangeLog: * include/bits/text_encoding-data.h: Regenerate. * scripts/gen_text_encoding_data.py: Add extra_aliases dict containing "ASCII". * testsuite/std/text_encoding/cons.cc: Check "ascii" is known. Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com> Signed-off-by: Ewan Higgs <ewan.higgs@gmail.com>
Diffstat (limited to 'libstdc++-v3/scripts')
-rwxr-xr-xlibstdc++-v3/scripts/gen_text_encoding_data.py24
1 files changed, 23 insertions, 1 deletions
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py b/libstdc++-v3/scripts/gen_text_encoding_data.py
index 2d6f3e4077a..f0ebb42d8c2 100755
--- a/libstdc++-v3/scripts/gen_text_encoding_data.py
+++ b/libstdc++-v3/scripts/gen_text_encoding_data.py
@@ -36,6 +36,18 @@ print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
print('# error "This is not a public header, do not include it directly"')
print("#endif\n")
+# We need to generate a list of initializers of the form { mib, alias }, e.g.,
+# { 3, "US-ASCII" },
+# { 3, "ISO646-US" },
+# { 3, "csASCII" },
+# { 4, "ISO_8859-1:1987" },
+# { 4, "latin1" },
+# The initializers must be sorted by the mib value. The first entry for
+# a given mib must be the primary name for the encoding. Any aliases for
+# the encoding come after the primary name.
+# We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the
+# offset into the list of the mib=106, alias="UTF-8" entry. This is used
+# to optimize the common case, so we don't need to search for "UTF-8".
charsets = {}
with open(sys.argv[1], newline='') as f:
@@ -52,10 +64,15 @@ with open(sys.argv[1], newline='') as f:
aliases.remove(name)
charsets[mib] = [name] + aliases
-# Remove "NATS-DANO" and "NATS-DANO-ADD"
+# Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard.
charsets.pop(33, None)
charsets.pop(34, None)
+# This is not an official IANA alias, but we include it in the
+# implementation-defined superset of aliases for US-ASCII.
+# See also LWG 4043.
+extra_aliases = {3: ["ASCII"]}
+
count = 0
for mib in sorted(charsets.keys()):
names = charsets[mib]
@@ -64,6 +81,11 @@ for mib in sorted(charsets.keys()):
for name in names:
print(' {{ {:4}, "{}" }},'.format(mib, name))
count += len(names)
+ if mib in extra_aliases:
+ names = extra_aliases[mib]
+ for name in names:
+ print(' {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name))
+ count += len(names)
# <text_encoding> gives an error if this macro is left defined.
# Do this last, so that the generated output is not usable unless we reach here.