From ecc30ac20f559e47fc4a183942d73913d615ff3f Mon Sep 17 00:00:00 2001 From: Eike Rathke Date: Wed, 5 Oct 2022 01:29:02 +0200 Subject: [PATCH] Introduce unicode::getScriptClassFromLanguageTag() Change-Id: Ifb932ff2aabc5767571433627314f0b29c35c471 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140953 Reviewed-by: Eike Rathke Tested-by: Jenkins --- i18nutil/source/utility/unicode.cxx | 32 ++++++++++++++++++++++++++++++++ include/i18nutil/unicode.hxx | 9 +++++++++ 2 files changed, 41 insertions(+) diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index 5e479989eae9..33f1ca2f190e 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -24,6 +24,7 @@ #include #include #include +#include #include "unicode_data.h" #include #include @@ -190,6 +191,37 @@ sal_Int16 unicode::getScriptClassFromUScriptCode(UScriptCode eScript) return nRet; } +sal_Int16 unicode::getScriptClassFromLanguageTag( const LanguageTag& rLanguageTag ) +{ + static UScriptCode nMaxScript = static_cast(u_getIntPropertyMaxValue(UCHAR_SCRIPT)); + constexpr int32_t nBuf = 42; + UScriptCode aBuf[nBuf]; + if (rLanguageTag.hasScript()) + { + aBuf[0] = static_cast(u_getPropertyValueEnum( UCHAR_SCRIPT, + OUStringToOString( rLanguageTag.getScript(), RTL_TEXTENCODING_ASCII_US).getStr())); + } + else + { + OUString aName; + if (rLanguageTag.getCountry().isEmpty()) + aName = rLanguageTag.getLanguage(); + else + aName = rLanguageTag.getLanguage() + "-" + rLanguageTag.getCountry(); + UErrorCode status = U_ZERO_ERROR; + const int32_t nScripts = uscript_getCode( + OUStringToOString( aName, RTL_TEXTENCODING_ASCII_US).getStr(), + aBuf, nBuf, &status); + // U_BUFFER_OVERFLOW_ERROR would be set with too many scripts for buffer + // and required capacity returned, but really.. + if (nScripts == 0 || !U_SUCCESS(status)) + return css::i18n::ScriptType::LATIN; + } + if (aBuf[0] > nMaxScript) + return css::i18n::ScriptType::COMPLEX; + return getScriptClassFromUScriptCode( aBuf[0]); +} + OString unicode::getExemplarLanguageForUScriptCode(UScriptCode eScript) { OString sRet; diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index 0ca14290981e..ebe50ce90384 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -82,6 +82,15 @@ public: //Format a number as a percentage according to the rules of the given //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE static OUString formatPercent(double dNumber, const LanguageTag& rLangTag); + + /** Map a LanguageTag's language ISO 639 code or script ISO 15924 code or + language-script or locale to Latin/Asian/Complex/Weak. If more than one + script is used with a language(-country) tag then the first (default) + script is mapped for that language. + + @return a css::i18n::ScriptType value. + */ + static sal_Int16 getScriptClassFromLanguageTag(const LanguageTag& rLanguageTag); }; /* -- 2.11.4.GIT