From da61dd28c6dd77901058580e391cb8c88bb506f2 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Mon, 17 Nov 2025 15:43:42 -0800 Subject: [libc] Move mbtowc, mbstowcs and inverse functions to stdlib.h (#168455) These functions should be declared in `stdlib.h`, not `wchar.h`, as confusing as it is. Move them to the proper header file and matching directories in src/ and test/ trees. This was discovered while testing libc++ build against llvm-libc, which re-declares functions like mbtowc in std-namespace in `` header, and then uses those functions in its locale implementation. --- libc/config/linux/x86_64/entrypoints.txt | 8 +- libc/include/stdlib.yaml | 32 ++++++ libc/include/wchar.yaml | 31 ------ libc/src/stdlib/CMakeLists.txt | 59 +++++++++++ libc/src/stdlib/mbstowcs.cpp | 40 ++++++++ libc/src/stdlib/mbstowcs.h | 22 ++++ libc/src/stdlib/mbtowc.cpp | 37 +++++++ libc/src/stdlib/mbtowc.h | 22 ++++ libc/src/stdlib/wcstombs.cpp | 38 +++++++ libc/src/stdlib/wcstombs.h | 22 ++++ libc/src/stdlib/wctomb.cpp | 35 +++++++ libc/src/stdlib/wctomb.h | 22 ++++ libc/src/wchar/CMakeLists.txt | 59 ----------- libc/src/wchar/mbstowcs.cpp | 40 -------- libc/src/wchar/mbstowcs.h | 22 ---- libc/src/wchar/mbtowc.cpp | 37 ------- libc/src/wchar/mbtowc.h | 22 ---- libc/src/wchar/wcstombs.cpp | 38 ------- libc/src/wchar/wcstombs.h | 22 ---- libc/src/wchar/wctomb.cpp | 35 ------- libc/src/wchar/wctomb.h | 22 ---- libc/test/src/stdlib/CMakeLists.txt | 50 +++++++++ libc/test/src/stdlib/mbstowcs_test.cpp | 171 +++++++++++++++++++++++++++++++ libc/test/src/stdlib/mbtowc_test.cpp | 154 ++++++++++++++++++++++++++++ libc/test/src/stdlib/wcstombs_test.cpp | 84 +++++++++++++++ libc/test/src/stdlib/wctomb_test.cpp | 73 +++++++++++++ libc/test/src/wchar/CMakeLists.txt | 50 --------- libc/test/src/wchar/mbstowcs_test.cpp | 171 ------------------------------- libc/test/src/wchar/mbtowc_test.cpp | 154 ---------------------------- libc/test/src/wchar/wcstombs_test.cpp | 84 --------------- libc/test/src/wchar/wctomb_test.cpp | 73 ------------- 31 files changed, 865 insertions(+), 864 deletions(-) create mode 100644 libc/src/stdlib/mbstowcs.cpp create mode 100644 libc/src/stdlib/mbstowcs.h create mode 100644 libc/src/stdlib/mbtowc.cpp create mode 100644 libc/src/stdlib/mbtowc.h create mode 100644 libc/src/stdlib/wcstombs.cpp create mode 100644 libc/src/stdlib/wcstombs.h create mode 100644 libc/src/stdlib/wctomb.cpp create mode 100644 libc/src/stdlib/wctomb.h delete mode 100644 libc/src/wchar/mbstowcs.cpp delete mode 100644 libc/src/wchar/mbstowcs.h delete mode 100644 libc/src/wchar/mbtowc.cpp delete mode 100644 libc/src/wchar/mbtowc.h delete mode 100644 libc/src/wchar/wcstombs.cpp delete mode 100644 libc/src/wchar/wcstombs.h delete mode 100644 libc/src/wchar/wctomb.cpp delete mode 100644 libc/src/wchar/wctomb.h create mode 100644 libc/test/src/stdlib/mbstowcs_test.cpp create mode 100644 libc/test/src/stdlib/mbtowc_test.cpp create mode 100644 libc/test/src/stdlib/wcstombs_test.cpp create mode 100644 libc/test/src/stdlib/wctomb_test.cpp delete mode 100644 libc/test/src/wchar/mbstowcs_test.cpp delete mode 100644 libc/test/src/wchar/mbtowc_test.cpp delete mode 100644 libc/test/src/wchar/wcstombs_test.cpp delete mode 100644 libc/test/src/wchar/wctomb_test.cpp (limited to 'libc') diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 5036c9438a50..910bdc53cbbc 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1254,7 +1254,11 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdlib.atexit libc.src.stdlib.exit libc.src.stdlib.getenv + libc.src.stdlib.mbstowcs + libc.src.stdlib.mbtowc libc.src.stdlib.quick_exit + libc.src.stdlib.wcstombs + libc.src.stdlib.wctomb # signal.h entrypoints libc.src.signal.kill @@ -1372,13 +1376,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.mbrlen libc.src.wchar.mbsinit libc.src.wchar.mbrtowc - libc.src.wchar.mbtowc - libc.src.wchar.mbstowcs libc.src.wchar.mbsrtowcs libc.src.wchar.mbsnrtowcs libc.src.wchar.wcrtomb - libc.src.wchar.wctomb - libc.src.wchar.wcstombs libc.src.wchar.wcsrtombs libc.src.wchar.wcsnrtombs diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml index 495eb7e1317b..475224427924 100644 --- a/libc/include/stdlib.yaml +++ b/libc/include/stdlib.yaml @@ -17,6 +17,7 @@ types: - type_name: lldiv_t - type_name: locale_t - type_name: size_t + - type_name: wchar_t enums: [] objects: [] functions: @@ -135,6 +136,22 @@ functions: arguments: - type: long long - type: long long + - name: mbstowcs + standards: + - stdc + return_type: size_t + arguments: + - type: wchar_t *__restrict + - type: const char *__restrict + - type: size_t + - name: mbtowc + standards: + - stdc + return_type: int + arguments: + - type: wchar_t *__restrict + - type: const char *__restrict + - type: size_t - name: memalignment standards: - stdc @@ -332,3 +349,18 @@ functions: return_type: int arguments: - type: const char * + - name: wctomb + standards: + - stdc + return_type: int + arguments: + - type: char * + - type: wchar_t + - name: wcstombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t *__restrict + - type: size_t diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index a524c7f56bed..7a94f9b542b7 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -50,14 +50,6 @@ functions: - type: const char *__restrict - type: size_t - type: mbstate_t *__restrict - - name: mbtowc - standards: - - stdc - return_type: int - arguments: - - type: wchar_t *__restrict - - type: const char *__restrict - - type: size_t - name: mbsnrtowcs standards: - stdc @@ -77,14 +69,6 @@ functions: - type: const char **__restrict - type: size_t - type: mbstate_t *__restrict - - name: mbstowcs - standards: - - stdc - return_type: size_t - arguments: - - type: wchar_t *__restrict - - type: const char *__restrict - - type: size_t - name: mbsinit standards: - stdc @@ -269,13 +253,6 @@ functions: - type: char *__restrict - type: wchar_t - type: mbstate_t *__restrict - - name: wctomb - standards: - - stdc - return_type: int - arguments: - - type: char * - - type: wchar_t - name: wcscpy standards: - stdc @@ -336,14 +313,6 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int - - name: wcstombs - standards: - - stdc - return_type: size_t - arguments: - - type: char *__restrict - - type: const wchar_t *__restrict - - type: size_t - name: wcstoul standards: - stdc diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 1ccdcc8bec14..62da469f0eb9 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -368,6 +368,65 @@ add_entrypoint_object( libc.hdr.types.size_t ) +add_entrypoint_object( + mbtowc + SRCS + mbtowc.cpp + HDRS + mbtowc.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbrtowc + libc.src.__support.wchar.mbstate +) + +add_entrypoint_object( + mbstowcs + SRCS + mbstowcs.cpp + HDRS + mbstowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.macros.null_check + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( + wctomb + SRCS + wctomb.cpp + HDRS + wctomb.h + DEPENDS + libc.hdr.types.wchar_t + libc.src.__support.wchar.wcrtomb + libc.src.__support.wchar.mbstate + libc.src.__support.libc_errno +) + +add_entrypoint_object( + wcstombs + SRCS + wcstombs.cpp + HDRS + wcstombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.wcsnrtombs + libc.src.__support.libc_errno +) + if(NOT LIBC_TARGET_OS_IS_BAREMETAL AND NOT LIBC_TARGET_OS_IS_GPU) if(LLVM_LIBC_INCLUDE_SCUDO) set(SCUDO_DEPS "") diff --git a/libc/src/stdlib/mbstowcs.cpp b/libc/src/stdlib/mbstowcs.cpp new file mode 100644 index 000000000000..6d283ea46e3b --- /dev/null +++ b/libc/src/stdlib/mbstowcs.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of mbstowcs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/mbstowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbstowcs, + (wchar_t *__restrict pwcs, const char *__restrict s, + size_t n)) { + LIBC_CRASH_ON_NULLPTR(s); + // If destination is null, ignore n + n = pwcs == nullptr ? SIZE_MAX : n; + static internal::mbstate internal_mbstate; + const char *temp = s; + auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate); + + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/mbstowcs.h b/libc/src/stdlib/mbstowcs.h new file mode 100644 index 000000000000..90f8195a39ec --- /dev/null +++ b/libc/src/stdlib/mbstowcs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbstowcs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_MBSTOWCS_H +#define LLVM_LIBC_SRC_STDLIB_MBSTOWCS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_MBSTOWCS_H diff --git a/libc/src/stdlib/mbtowc.cpp b/libc/src/stdlib/mbtowc.cpp new file mode 100644 index 000000000000..5f482463f471 --- /dev/null +++ b/libc/src/stdlib/mbtowc.cpp @@ -0,0 +1,37 @@ +//===-- Implementation of mbtowc -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/mbtowc.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbrtowc.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, mbtowc, + (wchar_t *__restrict pwc, const char *__restrict s, + size_t n)) { + // returns 0 since UTF-8 encoding is not state-dependent + if (s == nullptr) + return 0; + internal::mbstate internal_mbstate; + auto ret = internal::mbrtowc(pwc, s, n, &internal_mbstate); + if (!ret.has_value() || static_cast(ret.value()) == -2) { + // Encoding failure + libc_errno = EILSEQ; + return -1; + } + return static_cast(ret.value()); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/mbtowc.h b/libc/src/stdlib/mbtowc.h new file mode 100644 index 000000000000..acd85cb77ba7 --- /dev/null +++ b/libc/src/stdlib/mbtowc.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbtowc ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_MBTOWC_H +#define LLVM_LIBC_SRC_STDLIB_MBTOWC_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int mbtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_MBTOWC_H diff --git a/libc/src/stdlib/wcstombs.cpp b/libc/src/stdlib/wcstombs.cpp new file mode 100644 index 000000000000..712af958456d --- /dev/null +++ b/libc/src/stdlib/wcstombs.cpp @@ -0,0 +1,38 @@ +//===-- Implementation of wcstombs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/wcstombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcstombs, + (char *__restrict s, const wchar_t *__restrict wcs, + size_t n)) { + LIBC_CRASH_ON_NULLPTR(wcs); + static internal::mbstate internal_mbstate; + const wchar_t *wcs_ptr_copy = wcs; + auto result = + internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + + return result.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/wcstombs.h b/libc/src/stdlib/wcstombs.h new file mode 100644 index 000000000000..39515431098c --- /dev/null +++ b/libc/src/stdlib/wcstombs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcstombs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_WCSTOMBS_H +#define LLVM_LIBC_SRC_STDLIB_WCSTOMBS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcstombs(char *__restrict s, const wchar_t *__restrict pwcs, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_WCSTOMBS_H diff --git a/libc/src/stdlib/wctomb.cpp b/libc/src/stdlib/wctomb.cpp new file mode 100644 index 000000000000..0ca1a84cd923 --- /dev/null +++ b/libc/src/stdlib/wctomb.cpp @@ -0,0 +1,35 @@ +//===-- Implementation of wctomb ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/wctomb.h" + +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcrtomb.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) { + static internal::mbstate internal_mbstate; + if (s == nullptr) + return 0; + + auto result = internal::wcrtomb(s, wc, &internal_mbstate); + + if (!result.has_value()) { // invalid wide character + libc_errno = EILSEQ; + return -1; + } + + return static_cast(result.value()); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/wctomb.h b/libc/src/stdlib/wctomb.h new file mode 100644 index 000000000000..90afa31d9e70 --- /dev/null +++ b/libc/src/stdlib/wctomb.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wctomb ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_WCTOMB_H +#define LLVM_LIBC_SRC_STDLIB_WCTOMB_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int wctomb(char *s, wchar_t wc); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_WCTOMB_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index e6d9af9eacf7..9ca7295118a1 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -156,19 +156,6 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) -add_entrypoint_object( - wctomb - SRCS - wctomb.cpp - HDRS - wctomb.h - DEPENDS - libc.hdr.types.wchar_t - libc.src.__support.wchar.wcrtomb - libc.src.__support.wchar.mbstate - libc.src.__support.libc_errno -) - add_entrypoint_object( mbsinit SRCS @@ -201,39 +188,6 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) -add_entrypoint_object( - mbtowc - SRCS - mbtowc.cpp - HDRS - mbtowc.h - DEPENDS - libc.hdr.types.size_t - libc.hdr.types.wchar_t - libc.src.__support.common - libc.src.__support.macros.config - libc.src.__support.libc_errno - libc.src.__support.wchar.mbrtowc - libc.src.__support.wchar.mbstate -) - -add_entrypoint_object( - mbstowcs - SRCS - mbstowcs.cpp - HDRS - mbstowcs.h - DEPENDS - libc.hdr.types.size_t - libc.hdr.types.wchar_t - libc.src.__support.common - libc.src.__support.macros.config - libc.src.__support.macros.null_check - libc.src.__support.libc_errno - libc.src.__support.wchar.mbstate - libc.src.__support.wchar.mbsnrtowcs -) - add_entrypoint_object( mbsrtowcs SRCS @@ -266,19 +220,6 @@ add_entrypoint_object( libc.src.__support.wchar.mbsnrtowcs ) -add_entrypoint_object( - wcstombs - SRCS - wcstombs.cpp - HDRS - wcstombs.h - DEPENDS - libc.hdr.types.wchar_t - libc.src.__support.wchar.mbstate - libc.src.__support.wchar.wcsnrtombs - libc.src.__support.libc_errno -) - add_entrypoint_object( wcsrtombs SRCS diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp deleted file mode 100644 index 43e953cdf2ac..000000000000 --- a/libc/src/wchar/mbstowcs.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===-- Implementation of mbstowcs ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/wchar/mbstowcs.h" - -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/null_check.h" -#include "src/__support/wchar/mbsnrtowcs.h" -#include "src/__support/wchar/mbstate.h" - -namespace LIBC_NAMESPACE_DECL { - -LLVM_LIBC_FUNCTION(size_t, mbstowcs, - (wchar_t *__restrict pwcs, const char *__restrict s, - size_t n)) { - LIBC_CRASH_ON_NULLPTR(s); - // If destination is null, ignore n - n = pwcs == nullptr ? SIZE_MAX : n; - static internal::mbstate internal_mbstate; - const char *temp = s; - auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate); - - if (!ret.has_value()) { - // Encoding failure - libc_errno = ret.error(); - return -1; - } - return ret.value(); -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h deleted file mode 100644 index 7d08a838b232..000000000000 --- a/libc/src/wchar/mbstowcs.h +++ /dev/null @@ -1,22 +0,0 @@ -//===-- Implementation header for mbstowcs --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H -#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H - -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H diff --git a/libc/src/wchar/mbtowc.cpp b/libc/src/wchar/mbtowc.cpp deleted file mode 100644 index 6d099d43da5f..000000000000 --- a/libc/src/wchar/mbtowc.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- Implementation of mbtowc -----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/wchar/mbtowc.h" - -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" -#include "src/__support/wchar/mbrtowc.h" -#include "src/__support/wchar/mbstate.h" - -namespace LIBC_NAMESPACE_DECL { - -LLVM_LIBC_FUNCTION(int, mbtowc, - (wchar_t *__restrict pwc, const char *__restrict s, - size_t n)) { - // returns 0 since UTF-8 encoding is not state-dependent - if (s == nullptr) - return 0; - internal::mbstate internal_mbstate; - auto ret = internal::mbrtowc(pwc, s, n, &internal_mbstate); - if (!ret.has_value() || static_cast(ret.value()) == -2) { - // Encoding failure - libc_errno = EILSEQ; - return -1; - } - return static_cast(ret.value()); -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbtowc.h b/libc/src/wchar/mbtowc.h deleted file mode 100644 index f974197f81b5..000000000000 --- a/libc/src/wchar/mbtowc.h +++ /dev/null @@ -1,22 +0,0 @@ -//===-- Implementation header for mbtowc ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_WCHAR_MBTOWC_H -#define LLVM_LIBC_SRC_WCHAR_MBTOWC_H - -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -int mbtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_WCHAR_MBTOWC_H diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp deleted file mode 100644 index c3793cbe912c..000000000000 --- a/libc/src/wchar/wcstombs.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- Implementation of wcstombs ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/wchar/wcstombs.h" - -#include "hdr/types/char32_t.h" -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" -#include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/wcsnrtombs.h" - -namespace LIBC_NAMESPACE_DECL { - -LLVM_LIBC_FUNCTION(size_t, wcstombs, - (char *__restrict s, const wchar_t *__restrict wcs, - size_t n)) { - LIBC_CRASH_ON_NULLPTR(wcs); - static internal::mbstate internal_mbstate; - const wchar_t *wcs_ptr_copy = wcs; - auto result = - internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate); - if (!result.has_value()) { - libc_errno = result.error(); - return -1; - } - - return result.value(); -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstombs.h b/libc/src/wchar/wcstombs.h deleted file mode 100644 index cd0008a168d9..000000000000 --- a/libc/src/wchar/wcstombs.h +++ /dev/null @@ -1,22 +0,0 @@ -//===-- Implementation header for wcstombs --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H -#define LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H - -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -size_t wcstombs(char *__restrict s, const wchar_t *__restrict pwcs, size_t n); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp deleted file mode 100644 index 142302e6ae09..000000000000 --- a/libc/src/wchar/wctomb.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===-- Implementation of wctomb ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/wchar/wctomb.h" - -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" -#include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/wcrtomb.h" - -namespace LIBC_NAMESPACE_DECL { - -LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) { - static internal::mbstate internal_mbstate; - if (s == nullptr) - return 0; - - auto result = internal::wcrtomb(s, wc, &internal_mbstate); - - if (!result.has_value()) { // invalid wide character - libc_errno = EILSEQ; - return -1; - } - - return static_cast(result.value()); -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wctomb.h b/libc/src/wchar/wctomb.h deleted file mode 100644 index 02a34e5ad229..000000000000 --- a/libc/src/wchar/wctomb.h +++ /dev/null @@ -1,22 +0,0 @@ -//===-- Implementation header for wctomb ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_WCHAR_WCTOMB_H -#define LLVM_LIBC_SRC_WCHAR_WCTOMB_H - -#include "hdr/types/mbstate_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -int wctomb(char *s, wchar_t wc); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_WCHAR_WCTOMB_H diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 42e8faa3fd69..bcd3d139aa46 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -388,6 +388,56 @@ add_libc_test( libc.src.stdlib.memalignment ) +add_libc_test( + mbtowc_test + SUITE + libc-stdlib-tests + SRCS + mbtowc_test.cpp + DEPENDS + libc.hdr.errno_macros + libc.src.stdlib.mbtowc + libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest +) + +add_libc_test( + mbstowcs_test + SUITE + libc-stdlib-tests + SRCS + mbstowcs_test.cpp + DEPENDS + libc.hdr.errno_macros + libc.src.stdlib.mbstowcs + libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest +) + +add_libc_test( + wctomb_test + SUITE + libc-stdlib-tests + SRCS + wctomb_test.cpp + DEPENDS + libc.hdr.errno_macros + libc.src.stdlib.wctomb + libc.hdr.types.wchar_t +) + +add_libc_test( + wcstombs_test + SUITE + libc-stdlib-tests + SRCS + wcstombs_test.cpp + DEPENDS + libc.src.stdlib.wcstombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t +) + if(LLVM_LIBC_FULL_BUILD) add_libc_test( diff --git a/libc/test/src/stdlib/mbstowcs_test.cpp b/libc/test/src/stdlib/mbstowcs_test.cpp new file mode 100644 index 000000000000..125683a3eca9 --- /dev/null +++ b/libc/test/src/stdlib/mbstowcs_test.cpp @@ -0,0 +1,171 @@ +//===-- Unittests for mbstowcs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/null_check.h" +#include "src/stdlib/mbstowcs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) { + const char *ch = "A"; + const char *original = ch; + wchar_t dest[2]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1); + ASSERT_EQ(static_cast(*dest), 'A'); + ASSERT_EQ(static_cast(n), 1); + // Making sure the pointer is not getting updated + ASSERT_EQ(ch, original); + ASSERT_ERRNO_SUCCESS(); + + n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1); + ASSERT_EQ(static_cast(dest[1]), '\0'); + // Should not include null terminator + ASSERT_EQ(static_cast(n), 0); + // Making sure the pointer is not getting updated + ASSERT_EQ(ch, original); + ASSERT_ERRNO_SUCCESS(); +} + +TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) { + const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 + const char *original = src; + wchar_t dest[2]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_TRUE(dest[1] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 1); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) { + // Two laughing cat emojis "😹😹" + const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_TRUE(dest[2] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 2); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) { + // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' + const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 'A'); + ASSERT_EQ(static_cast(dest[1]), 931); + ASSERT_EQ(static_cast(dest[2]), 9851); + ASSERT_EQ(static_cast(dest[3]), 128569); + ASSERT_TRUE(dest[4] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 4); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + ASSERT_ERRNO_SUCCESS(); + // Should have read 3 emojis + ASSERT_EQ(static_cast(n), 3); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(static_cast(dest[2]), 128569); + ASSERT_TRUE(dest[3] == L'd'); + ASSERT_TRUE(dest[4] == L'e'); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) { + // 0x80 is invalid first byte of mb character + const char *src = + "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) { + // The 7th byte is invalid for a 4 byte character + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, NullDestination) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); + ASSERT_ERRNO_SUCCESS(); + // Null destination should ignore len and read till end of string + ASSERT_EQ(static_cast(n), 4); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { + // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + // First two bytes are valid --> should not set errno + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 2); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); + // Trying to read the 3rd byte should set errno + n = LIBC_NAMESPACE::mbstowcs(dest, src + 2, 2); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(static_cast(n), -1); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); +} + +#if defined(LIBC_ADD_NULL_CHECKS) +TEST(LlvmLibcMBSToWCSTest, NullptrCrash) { + // Passing in a nullptr should crash the program. + EXPECT_DEATH([] { LIBC_NAMESPACE::mbstowcs(nullptr, nullptr, 1); }, + WITH_SIGNAL(-1)); +} +#endif // LIBC_ADD_NULL_CHECKS diff --git a/libc/test/src/stdlib/mbtowc_test.cpp b/libc/test/src/stdlib/mbtowc_test.cpp new file mode 100644 index 000000000000..7946e077b647 --- /dev/null +++ b/libc/test/src/stdlib/mbtowc_test.cpp @@ -0,0 +1,154 @@ +//===-- Unittests for mbtowc ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/wchar_t.h" +#include "src/stdlib/mbtowc.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcMBToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBToWCTest, OneByte) { + const char *ch = "A"; + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); + ASSERT_EQ(static_cast(*dest), 'A'); + ASSERT_EQ(n, 1); + + // Should fail since we have not read enough + n = LIBC_NAMESPACE::mbtowc(dest, ch, 0); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, TwoByte) { + const char ch[2] = {static_cast(0xC2), + static_cast(0x8E)}; // Ž car symbol + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); + ASSERT_EQ(static_cast(*dest), 142); + ASSERT_EQ(n, 2); + + // Should fail since we have not read enough + n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); + ASSERT_EQ(n, -1); + // Should fail after trying to read next byte too + n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 1); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, ThreeByte) { + const char ch[3] = {static_cast(0xE2), static_cast(0x88), + static_cast(0x91)}; // ∑ sigma symbol + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3); + ASSERT_EQ(static_cast(*dest), 8721); + ASSERT_EQ(n, 3); + + // Should fail since we have not read enough + n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, FourByte) { + const char ch[4] = {static_cast(0xF0), static_cast(0x9F), + static_cast(0xA4), + static_cast(0xA1)}; // 🤡 clown emoji + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); + ASSERT_EQ(static_cast(*dest), 129313); + ASSERT_EQ(n, 4); + + // Should fail since we have not read enough + n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, InvalidByte) { + const char ch[1] = {static_cast(0x80)}; + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, InvalidMultiByte) { + const char ch[4] = {static_cast(0x80), static_cast(0x00), + static_cast(0x80), + static_cast(0x00)}; // invalid sequence of bytes + wchar_t dest[2]; + // Trying to push all 4 should error + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); + + // Trying to push the second and third should correspond to null wc + n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 2); + ASSERT_EQ(n, 0); + ASSERT_TRUE(*dest == L'\0'); +} + +TEST_F(LlvmLibcMBToWCTest, InvalidLastByte) { + // Last byte is invalid since it does not have correct starting sequence. + // 0xC0 --> 11000000 starting sequence should be 10xxxxxx + const char ch[4] = {static_cast(0xF1), static_cast(0x80), + static_cast(0x80), static_cast(0xC0)}; + wchar_t dest[2]; + // Trying to push all 4 should error + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); + ASSERT_EQ(n, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBToWCTest, ValidTwoByteWithExtraRead) { + const char ch[3] = {static_cast(0xC2), static_cast(0x8E), + static_cast(0x80)}; + wchar_t dest[2]; + // Trying to push all 3 should return valid 2 byte + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3); + ASSERT_EQ(n, 2); + ASSERT_EQ(static_cast(*dest), 142); +} + +TEST_F(LlvmLibcMBToWCTest, TwoValidTwoBytes) { + const char ch[4] = {static_cast(0xC2), static_cast(0x8E), + static_cast(0xC7), static_cast(0x8C)}; + wchar_t dest[2]; + int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); + ASSERT_EQ(n, 2); + ASSERT_EQ(static_cast(*dest), 142); + n = LIBC_NAMESPACE::mbtowc(dest + 1, ch + 2, 2); + ASSERT_EQ(n, 2); + ASSERT_EQ(static_cast(*(dest + 1)), 460); +} + +TEST_F(LlvmLibcMBToWCTest, NullString) { + wchar_t dest[2] = {L'O', L'K'}; + // reading on nullptr should return 0 + int n = LIBC_NAMESPACE::mbtowc(dest, nullptr, 2); + ASSERT_EQ(n, 0); + ASSERT_TRUE(dest[0] == L'O'); + // reading a null terminator should return 0 + const char *ch = "\0"; + n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); + ASSERT_EQ(n, 0); +} + +TEST_F(LlvmLibcMBToWCTest, NullWCPtr) { + const char ch[2] = { + static_cast(0xC2), + static_cast(0x8E), + }; + // a null destination should still return the number of read bytes + int n = LIBC_NAMESPACE::mbtowc(nullptr, ch, 2); + ASSERT_EQ(n, 2); +} diff --git a/libc/test/src/stdlib/wcstombs_test.cpp b/libc/test/src/stdlib/wcstombs_test.cpp new file mode 100644 index 000000000000..792a4edb51b3 --- /dev/null +++ b/libc/test/src/stdlib/wcstombs_test.cpp @@ -0,0 +1,84 @@ +//===-- Unittests for wcstombs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/wcstombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + +TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 11), static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcstombs, DestLimit) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 4), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 6), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST_F(LlvmLibcWcstombs, ErrnoTest) { + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + char mbs[11]; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} diff --git a/libc/test/src/stdlib/wctomb_test.cpp b/libc/test/src/stdlib/wctomb_test.cpp new file mode 100644 index 000000000000..56bebf87a279 --- /dev/null +++ b/libc/test/src/stdlib/wctomb_test.cpp @@ -0,0 +1,73 @@ +//===-- Unittests for wctomb ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/wchar_t.h" +#include "src/stdlib/wctomb.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWCToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST(LlvmLibcWCToMBTest, OneByte) { + wchar_t wc = L'U'; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 1); + ASSERT_EQ(mb[0], 'U'); +} + +TEST(LlvmLibcWCToMBTest, TwoByte) { + // testing utf32: 0xff -> utf8: 0xc3 0xbf + wchar_t wc = 0xff; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 2); + ASSERT_EQ(mb[0], static_cast(0xc3)); + ASSERT_EQ(mb[1], static_cast(0xbf)); +} + +TEST(LlvmLibcWCToMBTest, ThreeByte) { + // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 + wchar_t wc = 0xac15; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 3); + ASSERT_EQ(mb[0], static_cast(0xea)); + ASSERT_EQ(mb[1], static_cast(0xb0)); + ASSERT_EQ(mb[2], static_cast(0x95)); +} + +TEST(LlvmLibcWCToMBTest, FourByte) { + // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 + wchar_t wc = 0x1f921; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 4); + ASSERT_EQ(mb[0], static_cast(0xf0)); + ASSERT_EQ(mb[1], static_cast(0x9f)); + ASSERT_EQ(mb[2], static_cast(0xa4)); + ASSERT_EQ(mb[3], static_cast(0xa1)); +} + +TEST(LlvmLibcWCToMBTest, NullString) { + wchar_t wc = L'A'; + + int cnt = LIBC_NAMESPACE::wctomb(nullptr, wc); + + // no state-dependent encoding + ASSERT_EQ(cnt, 0); +} + +TEST(LlvmLibcWCToMBTest, InvalidWchar) { + wchar_t wc = 0x12ffff; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index a62a30fe0012..7a7cfaee7f36 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -62,32 +62,6 @@ add_libc_test( libc.test.UnitTest.ErrnoCheckingTest ) -add_libc_test( - mbtowc_test - SUITE - libc_wchar_unittests - SRCS - mbtowc_test.cpp - DEPENDS - libc.hdr.errno_macros - libc.src.wchar.mbtowc - libc.hdr.types.wchar_t - libc.test.UnitTest.ErrnoCheckingTest -) - -add_libc_test( - mbstowcs_test - SUITE - libc_wchar_unittests - SRCS - mbstowcs_test.cpp - DEPENDS - libc.hdr.errno_macros - libc.src.wchar.mbstowcs - libc.hdr.types.wchar_t - libc.test.UnitTest.ErrnoCheckingTest -) - add_libc_test( mblen_test SUITE @@ -188,30 +162,6 @@ add_libc_test( libc.test.UnitTest.ErrnoCheckingTest ) -add_libc_test( - wctomb_test - SUITE - libc_wchar_unittests - SRCS - wctomb_test.cpp - DEPENDS - libc.hdr.errno_macros - libc.src.wchar.wctomb - libc.hdr.types.wchar_t -) - -add_libc_test( - wcstombs_test - SUITE - libc_wchar_unittests - SRCS - wcstombs_test.cpp - DEPENDS - libc.src.wchar.wcstombs - libc.test.UnitTest.ErrnoCheckingTest - libc.hdr.types.wchar_t -) - add_libc_test( wcsrtombs_test SUITE diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp deleted file mode 100644 index 742f47819c84..000000000000 --- a/libc/test/src/wchar/mbstowcs_test.cpp +++ /dev/null @@ -1,171 +0,0 @@ -//===-- Unittests for mbstowcs --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "hdr/errno_macros.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/macros/null_check.h" -#include "src/wchar/mbstowcs.h" -#include "test/UnitTest/ErrnoCheckingTest.h" -#include "test/UnitTest/Test.h" - -using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; - -TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) { - const char *ch = "A"; - const char *original = ch; - wchar_t dest[2]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1); - ASSERT_EQ(static_cast(*dest), 'A'); - ASSERT_EQ(static_cast(n), 1); - // Making sure the pointer is not getting updated - ASSERT_EQ(ch, original); - ASSERT_ERRNO_SUCCESS(); - - n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1); - ASSERT_EQ(static_cast(dest[1]), '\0'); - // Should not include null terminator - ASSERT_EQ(static_cast(n), 0); - // Making sure the pointer is not getting updated - ASSERT_EQ(ch, original); - ASSERT_ERRNO_SUCCESS(); -} - -TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) { - const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 - const char *original = src; - wchar_t dest[2]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(dest[0]), 128569); - ASSERT_TRUE(dest[1] == L'\0'); - // Should not count null terminator in number - ASSERT_EQ(static_cast(n), 1); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) { - // Two laughing cat emojis "😹😹" - const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; - const char *original = src; - wchar_t dest[3]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(dest[0]), 128569); - ASSERT_EQ(static_cast(dest[1]), 128569); - ASSERT_TRUE(dest[2] == L'\0'); - // Should not count null terminator in number - ASSERT_EQ(static_cast(n), 2); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) { - // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' - const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; - const char *original = src; - wchar_t dest[5]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(dest[0]), 'A'); - ASSERT_EQ(static_cast(dest[1]), 931); - ASSERT_EQ(static_cast(dest[2]), 9851); - ASSERT_EQ(static_cast(dest[3]), 128569); - ASSERT_TRUE(dest[4] == L'\0'); - // Should not count null terminator in number - ASSERT_EQ(static_cast(n), 4); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) { - // Four laughing cat emojis "😹😹😹😹" - const char *src = - "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; - const char *original = src; - wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); - ASSERT_ERRNO_SUCCESS(); - // Should have read 3 emojis - ASSERT_EQ(static_cast(n), 3); - ASSERT_EQ(static_cast(dest[0]), 128569); - ASSERT_EQ(static_cast(dest[1]), 128569); - ASSERT_EQ(static_cast(dest[2]), 128569); - ASSERT_TRUE(dest[3] == L'd'); - ASSERT_TRUE(dest[4] == L'e'); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) { - // 0x80 is invalid first byte of mb character - const char *src = - "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; - wchar_t dest[3]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); - // Should return error and set errno - ASSERT_EQ(static_cast(n), -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) { - // The 7th byte is invalid for a 4 byte character - const char *src = - "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; - const char *original = src; - wchar_t dest[3]; - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); - // Should return error and set errno - ASSERT_EQ(static_cast(n), -1); - ASSERT_ERRNO_EQ(EILSEQ); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, NullDestination) { - // Four laughing cat emojis "😹😹😹😹" - const char *src = - "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; - const char *original = src; - size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); - ASSERT_ERRNO_SUCCESS(); - // Null destination should ignore len and read till end of string - ASSERT_EQ(static_cast(n), 4); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { - // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) - const char *src = - "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; - const char *original = src; - wchar_t dest[5]; - // First two bytes are valid --> should not set errno - size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(n), 2); - ASSERT_EQ(static_cast(dest[0]), 128569); - ASSERT_EQ(static_cast(dest[1]), 128569); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); - // Trying to read the 3rd byte should set errno - n = LIBC_NAMESPACE::mbstowcs(dest, src + 2, 2); - ASSERT_ERRNO_EQ(EILSEQ); - ASSERT_EQ(static_cast(n), -1); - // Making sure the pointer is not getting updated - ASSERT_EQ(src, original); -} - -#if defined(LIBC_ADD_NULL_CHECKS) -TEST(LlvmLibcMBSToWCSTest, NullptrCrash) { - // Passing in a nullptr should crash the program. - EXPECT_DEATH([] { LIBC_NAMESPACE::mbstowcs(nullptr, nullptr, 1); }, - WITH_SIGNAL(-1)); -} -#endif // LIBC_ADD_NULL_CHECKS diff --git a/libc/test/src/wchar/mbtowc_test.cpp b/libc/test/src/wchar/mbtowc_test.cpp deleted file mode 100644 index 7c86d5583aae..000000000000 --- a/libc/test/src/wchar/mbtowc_test.cpp +++ /dev/null @@ -1,154 +0,0 @@ -//===-- Unittests for mbtowc ---------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "hdr/errno_macros.h" -#include "hdr/types/wchar_t.h" -#include "src/wchar/mbtowc.h" -#include "test/UnitTest/ErrnoCheckingTest.h" -#include "test/UnitTest/Test.h" - -using LlvmLibcMBToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; - -TEST_F(LlvmLibcMBToWCTest, OneByte) { - const char *ch = "A"; - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); - ASSERT_EQ(static_cast(*dest), 'A'); - ASSERT_EQ(n, 1); - - // Should fail since we have not read enough - n = LIBC_NAMESPACE::mbtowc(dest, ch, 0); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, TwoByte) { - const char ch[2] = {static_cast(0xC2), - static_cast(0x8E)}; // Ž car symbol - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); - ASSERT_EQ(static_cast(*dest), 142); - ASSERT_EQ(n, 2); - - // Should fail since we have not read enough - n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); - ASSERT_EQ(n, -1); - // Should fail after trying to read next byte too - n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 1); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, ThreeByte) { - const char ch[3] = {static_cast(0xE2), static_cast(0x88), - static_cast(0x91)}; // ∑ sigma symbol - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3); - ASSERT_EQ(static_cast(*dest), 8721); - ASSERT_EQ(n, 3); - - // Should fail since we have not read enough - n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, FourByte) { - const char ch[4] = {static_cast(0xF0), static_cast(0x9F), - static_cast(0xA4), - static_cast(0xA1)}; // 🤡 clown emoji - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); - ASSERT_EQ(static_cast(*dest), 129313); - ASSERT_EQ(n, 4); - - // Should fail since we have not read enough - n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, InvalidByte) { - const char ch[1] = {static_cast(0x80)}; - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, InvalidMultiByte) { - const char ch[4] = {static_cast(0x80), static_cast(0x00), - static_cast(0x80), - static_cast(0x00)}; // invalid sequence of bytes - wchar_t dest[2]; - // Trying to push all 4 should error - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); - - // Trying to push the second and third should correspond to null wc - n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 2); - ASSERT_EQ(n, 0); - ASSERT_TRUE(*dest == L'\0'); -} - -TEST_F(LlvmLibcMBToWCTest, InvalidLastByte) { - // Last byte is invalid since it does not have correct starting sequence. - // 0xC0 --> 11000000 starting sequence should be 10xxxxxx - const char ch[4] = {static_cast(0xF1), static_cast(0x80), - static_cast(0x80), static_cast(0xC0)}; - wchar_t dest[2]; - // Trying to push all 4 should error - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4); - ASSERT_EQ(n, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} - -TEST_F(LlvmLibcMBToWCTest, ValidTwoByteWithExtraRead) { - const char ch[3] = {static_cast(0xC2), static_cast(0x8E), - static_cast(0x80)}; - wchar_t dest[2]; - // Trying to push all 3 should return valid 2 byte - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3); - ASSERT_EQ(n, 2); - ASSERT_EQ(static_cast(*dest), 142); -} - -TEST_F(LlvmLibcMBToWCTest, TwoValidTwoBytes) { - const char ch[4] = {static_cast(0xC2), static_cast(0x8E), - static_cast(0xC7), static_cast(0x8C)}; - wchar_t dest[2]; - int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2); - ASSERT_EQ(n, 2); - ASSERT_EQ(static_cast(*dest), 142); - n = LIBC_NAMESPACE::mbtowc(dest + 1, ch + 2, 2); - ASSERT_EQ(n, 2); - ASSERT_EQ(static_cast(*(dest + 1)), 460); -} - -TEST_F(LlvmLibcMBToWCTest, NullString) { - wchar_t dest[2] = {L'O', L'K'}; - // reading on nullptr should return 0 - int n = LIBC_NAMESPACE::mbtowc(dest, nullptr, 2); - ASSERT_EQ(n, 0); - ASSERT_TRUE(dest[0] == L'O'); - // reading a null terminator should return 0 - const char *ch = "\0"; - n = LIBC_NAMESPACE::mbtowc(dest, ch, 1); - ASSERT_EQ(n, 0); -} - -TEST_F(LlvmLibcMBToWCTest, NullWCPtr) { - const char ch[2] = { - static_cast(0xC2), - static_cast(0x8E), - }; - // a null destination should still return the number of read bytes - int n = LIBC_NAMESPACE::mbtowc(nullptr, ch, 2); - ASSERT_EQ(n, 2); -} diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp deleted file mode 100644 index 61e0873dc971..000000000000 --- a/libc/test/src/wchar/wcstombs_test.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//===-- Unittests for wcstombs --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/wchar/wcstombs.h" -#include "test/UnitTest/ErrnoCheckingTest.h" -#include "test/UnitTest/Test.h" - -using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; - -// these tests are fairly simple as this function just calls into the internal -// wcsnrtombs which is more thoroughly tested - -TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { - /// clown emoji, sigma symbol, y with diaeresis, letter A - const wchar_t src[] = {static_cast(0x1f921), - static_cast(0x2211), - static_cast(0xff), static_cast(0x41), - static_cast(0x0)}; - char mbs[11]; - - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 11), static_cast(10)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(mbs[0], '\xF0'); // clown begin - ASSERT_EQ(mbs[1], '\x9F'); - ASSERT_EQ(mbs[2], '\xA4'); - ASSERT_EQ(mbs[3], '\xA1'); - ASSERT_EQ(mbs[4], '\xE2'); // sigma begin - ASSERT_EQ(mbs[5], '\x88'); - ASSERT_EQ(mbs[6], '\x91'); - ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin - ASSERT_EQ(mbs[8], '\xBF'); - ASSERT_EQ(mbs[9], '\x41'); // A begin - ASSERT_EQ(mbs[10], '\0'); // null terminator -} - -TEST_F(LlvmLibcWcstombs, DestLimit) { - /// clown emoji, sigma symbol, y with diaeresis, letter A - const wchar_t src[] = {static_cast(0x1f921), - static_cast(0x2211), - static_cast(0xff), static_cast(0x41), - static_cast(0x0)}; - char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values - - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 4), static_cast(4)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(mbs[0], '\xF0'); - ASSERT_EQ(mbs[1], '\x9F'); - ASSERT_EQ(mbs[2], '\xA4'); - ASSERT_EQ(mbs[3], '\xA1'); - ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values - - // not enough bytes to convert the second character, so only converts one - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 6), static_cast(4)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(mbs[0], '\xF0'); - ASSERT_EQ(mbs[1], '\x9F'); - ASSERT_EQ(mbs[2], '\xA4'); - ASSERT_EQ(mbs[3], '\xA1'); - ASSERT_EQ(mbs[4], '\x01'); -} - -TEST_F(LlvmLibcWcstombs, ErrnoTest) { - const wchar_t src[] = {static_cast(0x1f921), - static_cast(0x2211), - static_cast(0x12ffff), // invalid widechar - static_cast(0x0)}; - char mbs[11]; - - // n parameter ignored when dest is null - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast(7)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), static_cast(-1)); - ASSERT_ERRNO_EQ(EILSEQ); -} diff --git a/libc/test/src/wchar/wctomb_test.cpp b/libc/test/src/wchar/wctomb_test.cpp deleted file mode 100644 index 357f36267b68..000000000000 --- a/libc/test/src/wchar/wctomb_test.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===-- Unittests for wctomb ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "hdr/errno_macros.h" -#include "hdr/types/wchar_t.h" -#include "src/wchar/wctomb.h" -#include "test/UnitTest/ErrnoCheckingTest.h" -#include "test/UnitTest/Test.h" - -using LlvmLibcWCToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; - -TEST(LlvmLibcWCToMBTest, OneByte) { - wchar_t wc = L'U'; - char mb[4]; - int cnt = LIBC_NAMESPACE::wctomb(mb, wc); - ASSERT_EQ(cnt, 1); - ASSERT_EQ(mb[0], 'U'); -} - -TEST(LlvmLibcWCToMBTest, TwoByte) { - // testing utf32: 0xff -> utf8: 0xc3 0xbf - wchar_t wc = 0xff; - char mb[4]; - int cnt = LIBC_NAMESPACE::wctomb(mb, wc); - ASSERT_EQ(cnt, 2); - ASSERT_EQ(mb[0], static_cast(0xc3)); - ASSERT_EQ(mb[1], static_cast(0xbf)); -} - -TEST(LlvmLibcWCToMBTest, ThreeByte) { - // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 - wchar_t wc = 0xac15; - char mb[4]; - int cnt = LIBC_NAMESPACE::wctomb(mb, wc); - ASSERT_EQ(cnt, 3); - ASSERT_EQ(mb[0], static_cast(0xea)); - ASSERT_EQ(mb[1], static_cast(0xb0)); - ASSERT_EQ(mb[2], static_cast(0x95)); -} - -TEST(LlvmLibcWCToMBTest, FourByte) { - // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 - wchar_t wc = 0x1f921; - char mb[4]; - int cnt = LIBC_NAMESPACE::wctomb(mb, wc); - ASSERT_EQ(cnt, 4); - ASSERT_EQ(mb[0], static_cast(0xf0)); - ASSERT_EQ(mb[1], static_cast(0x9f)); - ASSERT_EQ(mb[2], static_cast(0xa4)); - ASSERT_EQ(mb[3], static_cast(0xa1)); -} - -TEST(LlvmLibcWCToMBTest, NullString) { - wchar_t wc = L'A'; - - int cnt = LIBC_NAMESPACE::wctomb(nullptr, wc); - - // no state-dependent encoding - ASSERT_EQ(cnt, 0); -} - -TEST(LlvmLibcWCToMBTest, InvalidWchar) { - wchar_t wc = 0x12ffff; - char mb[4]; - int cnt = LIBC_NAMESPACE::wctomb(mb, wc); - ASSERT_EQ(cnt, -1); - ASSERT_ERRNO_EQ(EILSEQ); -} -- cgit v1.2.3