udev: String substitutions can be done in ENV, too
[systemd_ALT.git] / src / basic / locale-util.c
blob84ad7a9dc1874b7f7a57434c416f2b25266d146c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <langinfo.h>
6 #include <libintl.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <sys/mman.h>
11 #include <sys/stat.h>
13 #include "constants.h"
14 #include "dirent-util.h"
15 #include "env-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "hashmap.h"
19 #include "locale-util.h"
20 #include "missing_syscall.h"
21 #include "path-util.h"
22 #include "set.h"
23 #include "string-table.h"
24 #include "string-util.h"
25 #include "strv.h"
26 #include "utf8.h"
28 static char *normalize_locale(const char *name) {
29 const char *e;
31 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
32 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
33 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
34 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
35 * that for UTF-8 however, since it's kinda the only charset that matters. */
37 e = endswith(name, ".utf8");
38 if (e) {
39 _cleanup_free_ char *prefix = NULL;
41 prefix = strndup(name, e - name);
42 if (!prefix)
43 return NULL;
45 return strjoin(prefix, ".UTF-8");
48 e = strstr(name, ".utf8@");
49 if (e) {
50 _cleanup_free_ char *prefix = NULL;
52 prefix = strndup(name, e - name);
53 if (!prefix)
54 return NULL;
56 return strjoin(prefix, ".UTF-8@", e + 6);
59 return strdup(name);
62 static int add_locales_from_archive(Set *locales) {
63 /* Stolen from glibc... */
65 struct locarhead {
66 uint32_t magic;
67 /* Serial number. */
68 uint32_t serial;
69 /* Name hash table. */
70 uint32_t namehash_offset;
71 uint32_t namehash_used;
72 uint32_t namehash_size;
73 /* String table. */
74 uint32_t string_offset;
75 uint32_t string_used;
76 uint32_t string_size;
77 /* Table with locale records. */
78 uint32_t locrectab_offset;
79 uint32_t locrectab_used;
80 uint32_t locrectab_size;
81 /* MD5 sum hash table. */
82 uint32_t sumhash_offset;
83 uint32_t sumhash_used;
84 uint32_t sumhash_size;
87 struct namehashent {
88 /* Hash value of the name. */
89 uint32_t hashval;
90 /* Offset of the name in the string table. */
91 uint32_t name_offset;
92 /* Offset of the locale record. */
93 uint32_t locrec_offset;
96 const struct locarhead *h;
97 const struct namehashent *e;
98 const void *p = MAP_FAILED;
99 _cleanup_close_ int fd = -EBADF;
100 size_t sz = 0;
101 struct stat st;
102 int r;
104 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
105 if (fd < 0)
106 return errno == ENOENT ? 0 : -errno;
108 if (fstat(fd, &st) < 0)
109 return -errno;
111 if (!S_ISREG(st.st_mode))
112 return -EBADMSG;
114 if (st.st_size < (off_t) sizeof(struct locarhead))
115 return -EBADMSG;
117 if (file_offset_beyond_memory_size(st.st_size))
118 return -EFBIG;
120 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
121 if (p == MAP_FAILED)
122 return -errno;
124 h = (const struct locarhead *) p;
125 if (h->magic != 0xde020109 ||
126 h->namehash_offset + h->namehash_size > st.st_size ||
127 h->string_offset + h->string_size > st.st_size ||
128 h->locrectab_offset + h->locrectab_size > st.st_size ||
129 h->sumhash_offset + h->sumhash_size > st.st_size) {
130 r = -EBADMSG;
131 goto finish;
134 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
135 for (size_t i = 0; i < h->namehash_size; i++) {
136 char *z;
138 if (e[i].locrec_offset == 0)
139 continue;
141 if (!utf8_is_valid((char*) p + e[i].name_offset))
142 continue;
144 z = normalize_locale((char*) p + e[i].name_offset);
145 if (!z) {
146 r = -ENOMEM;
147 goto finish;
150 r = set_consume(locales, z);
151 if (r < 0)
152 goto finish;
155 r = 0;
157 finish:
158 if (p != MAP_FAILED)
159 munmap((void*) p, sz);
161 return r;
164 static int add_locales_from_libdir(Set *locales) {
165 _cleanup_closedir_ DIR *dir = NULL;
166 int r;
168 dir = opendir("/usr/lib/locale");
169 if (!dir)
170 return errno == ENOENT ? 0 : -errno;
172 FOREACH_DIRENT(de, dir, return -errno) {
173 char *z;
175 if (de->d_type != DT_DIR)
176 continue;
178 z = normalize_locale(de->d_name);
179 if (!z)
180 return -ENOMEM;
182 r = set_consume(locales, z);
183 if (r < 0 && r != -EEXIST)
184 return r;
187 return 0;
190 int get_locales(char ***ret) {
191 _cleanup_set_free_free_ Set *locales = NULL;
192 _cleanup_strv_free_ char **l = NULL;
193 int r;
195 locales = set_new(&string_hash_ops);
196 if (!locales)
197 return -ENOMEM;
199 r = add_locales_from_archive(locales);
200 if (r < 0 && r != -ENOENT)
201 return r;
203 r = add_locales_from_libdir(locales);
204 if (r < 0)
205 return r;
207 char *locale;
208 SET_FOREACH(locale, locales) {
209 r = locale_is_installed(locale);
210 if (r < 0)
211 return r;
212 if (r == 0)
213 free(set_remove(locales, locale));
216 l = set_get_strv(locales);
217 if (!l)
218 return -ENOMEM;
220 /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */
221 locales = set_free(locales);
223 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
224 if (r == -ENXIO || r == 0) {
225 char **a, **b;
227 /* Filter out non-UTF-8 locales, because it's 2019, by default */
228 for (a = b = l; *a; a++) {
230 if (endswith(*a, "UTF-8") ||
231 strstr(*a, ".UTF-8@"))
232 *(b++) = *a;
233 else
234 free(*a);
237 *b = NULL;
239 } else if (r < 0)
240 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
242 strv_sort(l);
244 *ret = TAKE_PTR(l);
246 return 0;
249 bool locale_is_valid(const char *name) {
251 if (isempty(name))
252 return false;
254 if (strlen(name) >= 128)
255 return false;
257 if (!utf8_is_valid(name))
258 return false;
260 if (!filename_is_valid(name))
261 return false;
263 if (!string_is_safe(name))
264 return false;
266 return true;
269 int locale_is_installed(const char *name) {
270 if (!locale_is_valid(name))
271 return false;
273 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
274 return true;
276 _cleanup_(freelocalep) locale_t loc =
277 newlocale(LC_ALL_MASK, name, 0);
278 if (loc == (locale_t) 0)
279 return errno == ENOMEM ? -ENOMEM : false;
281 return true;
284 bool is_locale_utf8(void) {
285 static int cached_answer = -1;
286 const char *set;
287 int r;
289 /* Note that we default to 'true' here, since today UTF8 is
290 * pretty much supported everywhere. */
292 if (cached_answer >= 0)
293 goto out;
295 r = getenv_bool_secure("SYSTEMD_UTF8");
296 if (r >= 0) {
297 cached_answer = r;
298 goto out;
299 } else if (r != -ENXIO)
300 log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m");
302 /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */
303 if (gettid() != raw_getpid()) {
304 cached_answer = true;
305 goto out;
308 if (!setlocale(LC_ALL, "")) {
309 cached_answer = true;
310 goto out;
313 set = nl_langinfo(CODESET);
314 if (!set) {
315 cached_answer = true;
316 goto out;
319 if (streq(set, "UTF-8")) {
320 cached_answer = true;
321 goto out;
324 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
325 * unset and everything can do to UTF-8 nowadays. */
326 set = setlocale(LC_CTYPE, NULL);
327 if (!set) {
328 cached_answer = true;
329 goto out;
332 /* Check result, but ignore the result if C was set
333 * explicitly. */
334 cached_answer =
335 STR_IN_SET(set, "C", "POSIX") &&
336 !getenv("LC_ALL") &&
337 !getenv("LC_CTYPE") &&
338 !getenv("LANG");
340 out:
341 return (bool) cached_answer;
344 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
345 if (!l)
346 return;
348 for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
349 l[i] = mfree(l[i]);
352 void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) {
353 assert(l);
355 for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
356 if (p == VARIABLE_LANG)
357 continue;
358 if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p]))
359 l[p] = mfree(l[p]);
363 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
364 [VARIABLE_LANG] = "LANG",
365 [VARIABLE_LANGUAGE] = "LANGUAGE",
366 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
367 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
368 [VARIABLE_LC_TIME] = "LC_TIME",
369 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
370 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
371 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
372 [VARIABLE_LC_PAPER] = "LC_PAPER",
373 [VARIABLE_LC_NAME] = "LC_NAME",
374 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
375 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
376 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
377 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
380 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);