From 098504eef2fe08c512a893a7cd5bf6cd7d76964e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Petr=20P=C3=ADsa=C5=99?= Date: Sun, 12 May 2024 13:17:27 +0200 Subject: [PATCH] tests: Adapt utf8locale to musl MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit utf8locale test failed in musl: Testing unit: utf8locale NULL input: passed Empty string: passed ASCII text: passed non-ASCII text in UTF-8 locale: passed OTP message in UTF-8 locale: passed non-ASCII text in ISO-8859-2 locale: failed reason: output differs: expected=`��len� �lu�ou�k�' is=`Šíleně žluťoučký' non-ASCII text in C locale: failed reason: correct is NULL, output is not NULL and should be Test results: unit = utf8locale, passed = 5, failed = 2, skipped = 0 The cause of "ISO-8859-2 locale" failure was that musl does not yet fully support locales and handles any character set identifier in a locale specifier as an alias for UTF-8. This patch fixes the test by validing a code set the exampled locale actually sets. "non-ASCII text in C locale" failed because musl performs a lossy conversion (replaces unrepresantable chacracters with stars) in constrat to strict conversion done by glibc. POSIX recommends "an implementation-defined conversion" in that case. Because _isds_utf82locale is only used for presenting messages, a lossy output is not a big deal (libisds simply omitts an incovertible part of a message on glibc). This patch relaxes the test to accept any output. We keep the test here to cover all code paths. https://www.openwall.com/lists/musl/2024/05/11/6 https://bugs.gentoo.org/show_bug.cgi?id=928107 --- test/offline/utf82locale.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/test/offline/utf82locale.c b/test/offline/utf82locale.c index 909e745..25dc59a 100644 --- a/test/offline/utf82locale.c +++ b/test/offline/utf82locale.c @@ -1,25 +1,37 @@ #include "../test.h" #include "utils.h" #include +#include +#include + +static int codeset_is(const char *expected_codeset) { + char *current_codeset = nl_langinfo(CODESET); + if (expected_codeset == NULL) + return current_codeset == NULL; + return current_codeset != NULL && !strcmp(expected_codeset, current_codeset); + +} static int prepare_locale(char type) { char *old_locale; if ('U' == type) { old_locale = setlocale(LC_ALL, "C.UTF-8"); - if (old_locale != NULL) return 0; + if (old_locale != NULL && codeset_is("UTF-8")) return 0; old_locale = setlocale(LC_ALL, "en_US.UTF-8"); - if (old_locale != NULL) return 0; + if (old_locale != NULL && codeset_is("UTF-8")) return 0; old_locale = setlocale(LC_ALL, "cs_CZ.UTF-8"); - if (old_locale != NULL) return 0; + if (old_locale != NULL && codeset_is("UTF-8")) return 0; } else if ('2' == type) { old_locale = setlocale(LC_ALL, "cs_CZ.ISO8859-2"); - if (old_locale != NULL) return 0; + if (old_locale != NULL && codeset_is("ISO-8859-2")) return 0; } else { old_locale = setlocale(LC_ALL, "C"); - if (old_locale != NULL) return 0; + if (old_locale != NULL && + (codeset_is("ASCII") || codeset_is("ANSI_X3.4-1968"))) + return 0; } return -1; @@ -36,6 +48,21 @@ static int test_utf82locale(const void *input, const void *correct) { PASS_TEST; } +/* POSIX prescribe that valid, but incovertible input should be converted in + * an implemenantation-defined way. Glibc returns error, musl outputs stars. */ +static int test_utf82locale_any(const void *input) { + void *output = NULL; + + output = _isds_utf82locale(input); + TEST_DESTRUCTOR(free, output); + + if (output != NULL) + /* Check that we do not access unallocated memory. */ + (void)strlen(output); + + PASS_TEST; +} + int main(void) { INIT_TEST("utf8locale"); @@ -60,9 +87,9 @@ int main(void) { } if (prepare_locale('C')) { - SKIP_TESTS(1, "Could not set C locale"); + SKIP_TESTS(1, "Could not set ASCII locale"); } else { - TEST("non-ASCII text in C locale", test_utf82locale, "Šíleně žluťoučký", NULL); + TEST("non-ASCII text in ASCII locale", test_utf82locale_any, "Šíleně žluťoučký"); } SUM_TEST(); -- 2.11.4.GIT