Further improve string bench timing
[glibc.git] / locale / findlocale.c
blob9af605bd649447a7a7418d5da9569be9c9b59912
1 /* Copyright (C) 1996-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <locale.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #ifdef _POSIX_MAPPED_FILES
26 # include <sys/mman.h>
27 #endif
29 #include "localeinfo.h"
30 #include "../iconv/gconv_charset.h"
31 #include "../iconv/gconv_int.h"
34 #ifdef NL_CURRENT_INDIRECT
35 # define DEFINE_CATEGORY(category, category_name, items, a) \
36 extern struct __locale_data _nl_C_##category; \
37 weak_extern (_nl_C_##category)
38 # include "categories.def"
39 # undef DEFINE_CATEGORY
41 /* Array indexed by category of pointers to _nl_C_CATEGORY slots.
42 Elements are zero for categories whose data is never used. */
43 struct __locale_data *const _nl_C[] attribute_hidden =
45 # define DEFINE_CATEGORY(category, category_name, items, a) \
46 [category] = &_nl_C_##category,
47 # include "categories.def"
48 # undef DEFINE_CATEGORY
50 #else
51 # define _nl_C (_nl_C_locobj.__locales)
52 #endif
55 /* For each category we keep a list of records for the locale files
56 which are somehow addressed. */
57 struct loaded_l10nfile *_nl_locale_file_list[__LC_LAST];
59 const char _nl_default_locale_path[] attribute_hidden = COMPLOCALEDIR;
61 /* Checks if the name is actually present, that is, not NULL and not
62 empty. */
63 static inline int
64 name_present (const char *name)
66 return name != NULL && name[0] != '\0';
69 /* Checks that the locale name neither extremely long, nor contains a
70 ".." path component (to prevent directory traversal). */
71 static inline int
72 valid_locale_name (const char *name)
74 /* Not set. */
75 size_t namelen = strlen (name);
76 /* Name too long. The limit is arbitrary and prevents stack overflow
77 issues later. */
78 if (__glibc_unlikely (namelen > 255))
79 return 0;
80 /* Directory traversal attempt. */
81 static const char slashdot[4] = {'/', '.', '.', '/'};
82 if (__glibc_unlikely (__memmem (name, namelen,
83 slashdot, sizeof (slashdot)) != NULL))
84 return 0;
85 if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.'))
86 return 0;
87 if (namelen >= 3
88 && __glibc_unlikely (((name[0] == '.'
89 && name[1] == '.'
90 && name[2] == '/')
91 || (name[namelen - 3] == '/'
92 && name[namelen - 2] == '.'
93 && name[namelen - 1] == '.'))))
94 return 0;
95 /* If there is a slash in the name, it must start with one. */
96 if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/')
97 return 0;
98 return 1;
101 struct __locale_data *
102 _nl_find_locale (const char *locale_path, size_t locale_path_len,
103 int category, const char **name)
105 int mask;
106 /* Name of the locale for this category. */
107 const char *cloc_name = *name;
108 const char *language;
109 const char *modifier;
110 const char *territory;
111 const char *codeset;
112 const char *normalized_codeset;
113 struct loaded_l10nfile *locale_file;
115 if (cloc_name[0] == '\0')
117 /* The user decides which locale to use by setting environment
118 variables. */
119 cloc_name = getenv ("LC_ALL");
120 if (!name_present (cloc_name))
121 cloc_name = getenv (_nl_category_names.str
122 + _nl_category_name_idxs[category]);
123 if (!name_present (cloc_name))
124 cloc_name = getenv ("LANG");
125 if (!name_present (cloc_name))
126 cloc_name = _nl_C_name;
129 /* We used to fall back to the C locale if the name contains a slash
130 character '/', but we now check for directory traversal in
131 valid_locale_name, so this is no longer necessary. */
133 if (__builtin_expect (strcmp (cloc_name, _nl_C_name), 1) == 0
134 || __builtin_expect (strcmp (cloc_name, _nl_POSIX_name), 1) == 0)
136 /* We need not load anything. The needed data is contained in
137 the library itself. */
138 *name = _nl_C_name;
139 return _nl_C[category];
141 else if (!valid_locale_name (cloc_name))
143 __set_errno (EINVAL);
144 return NULL;
147 *name = cloc_name;
149 /* We really have to load some data. First we try the archive,
150 but only if there was no LOCPATH environment variable specified. */
151 if (__glibc_likely (locale_path == NULL))
153 struct __locale_data *data
154 = _nl_load_locale_from_archive (category, name);
155 if (__glibc_likely (data != NULL))
156 return data;
158 /* Nothing in the archive with the given name. Expanding it as
159 an alias and retry. */
160 cloc_name = _nl_expand_alias (*name);
161 if (cloc_name != NULL)
163 data = _nl_load_locale_from_archive (category, &cloc_name);
164 if (__builtin_expect (data != NULL, 1))
165 return data;
168 /* Nothing in the archive. Set the default path to search below. */
169 locale_path = _nl_default_locale_path;
170 locale_path_len = sizeof _nl_default_locale_path;
172 else
173 /* We really have to load some data. First see whether the name is
174 an alias. Please note that this makes it impossible to have "C"
175 or "POSIX" as aliases. */
176 cloc_name = _nl_expand_alias (*name);
178 if (cloc_name == NULL)
179 /* It is no alias. */
180 cloc_name = *name;
182 /* Make a writable copy of the locale name. */
183 char *loc_name = strdupa (cloc_name);
185 /* LOCALE can consist of up to four recognized parts for the XPG syntax:
187 language[_territory[.codeset]][@modifier]
189 Beside the first all of them are allowed to be missing. If the
190 full specified locale is not found, the less specific one are
191 looked for. The various part will be stripped off according to
192 the following order:
193 (1) codeset
194 (2) normalized codeset
195 (3) territory
196 (4) modifier
198 mask = _nl_explode_name (loc_name, &language, &modifier, &territory,
199 &codeset, &normalized_codeset);
200 if (mask == -1)
201 /* Memory allocate problem. */
202 return NULL;
204 /* If exactly this locale was already asked for we have an entry with
205 the complete name. */
206 locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
207 locale_path, locale_path_len, mask,
208 language, territory, codeset,
209 normalized_codeset, modifier,
210 _nl_category_names.str
211 + _nl_category_name_idxs[category], 0);
213 if (locale_file == NULL)
215 /* Find status record for addressed locale file. We have to search
216 through all directories in the locale path. */
217 locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
218 locale_path, locale_path_len, mask,
219 language, territory, codeset,
220 normalized_codeset, modifier,
221 _nl_category_names.str
222 + _nl_category_name_idxs[category], 1);
223 if (locale_file == NULL)
224 /* This means we are out of core. */
225 return NULL;
228 /* The space for normalized_codeset is dynamically allocated. Free it. */
229 if (mask & XPG_NORM_CODESET)
230 free ((void *) normalized_codeset);
232 if (locale_file->decided == 0)
233 _nl_load_locale (locale_file, category);
235 if (locale_file->data == NULL)
237 int cnt;
238 for (cnt = 0; locale_file->successor[cnt] != NULL; ++cnt)
240 if (locale_file->successor[cnt]->decided == 0)
241 _nl_load_locale (locale_file->successor[cnt], category);
242 if (locale_file->successor[cnt]->data != NULL)
243 break;
245 /* Move the entry we found (or NULL) to the first place of
246 successors. */
247 locale_file->successor[0] = locale_file->successor[cnt];
248 locale_file = locale_file->successor[cnt];
250 if (locale_file == NULL)
251 return NULL;
254 /* The LC_CTYPE category allows to check whether a locale is really
255 usable. If the locale name contains a charset name and the
256 charset name used in the locale (present in the LC_CTYPE data) is
257 not the same (after resolving aliases etc) we reject the locale
258 since using it would irritate users expecting the charset named
259 in the locale name. */
260 if (codeset != NULL)
262 /* Get the codeset information from the locale file. */
263 static const int codeset_idx[] =
265 [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
266 [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
267 [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
268 [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
269 [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
270 [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
271 [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
272 [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
273 [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
274 [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
275 [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
276 [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
278 const struct __locale_data *data;
279 const char *locale_codeset;
280 char *clocale_codeset;
281 char *ccodeset;
283 data = (const struct __locale_data *) locale_file->data;
284 locale_codeset =
285 (const char *) data->values[codeset_idx[category]].string;
286 assert (locale_codeset != NULL);
287 /* Note the length of the allocated memory: +3 for up to two slashes
288 and the NUL byte. */
289 clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
290 strip (clocale_codeset, locale_codeset);
292 ccodeset = (char *) alloca (strlen (codeset) + 3);
293 strip (ccodeset, codeset);
295 if (__gconv_compare_alias (upstr (ccodeset, ccodeset),
296 upstr (clocale_codeset,
297 clocale_codeset)) != 0)
298 /* The codesets are not identical, don't use the locale. */
299 return NULL;
302 /* Determine the locale name for which loading succeeded. This
303 information comes from the file name. The form is
304 <path>/<locale>/LC_foo. We must extract the <locale> part. */
305 if (((const struct __locale_data *) locale_file->data)->name == NULL)
307 char *cp, *endp;
309 endp = strrchr (locale_file->filename, '/');
310 cp = endp - 1;
311 while (cp[-1] != '/')
312 --cp;
313 ((struct __locale_data *) locale_file->data)->name
314 = __strndup (cp, endp - cp);
317 /* Determine whether the user wants transliteration or not. */
318 if (modifier != NULL
319 && __strcasecmp_l (modifier, "TRANSLIT", _nl_C_locobj_ptr) == 0)
320 ((struct __locale_data *) locale_file->data)->use_translit = 1;
322 /* Increment the usage count. */
323 if (((const struct __locale_data *) locale_file->data)->usage_count
324 < MAX_USAGE_COUNT)
325 ++((struct __locale_data *) locale_file->data)->usage_count;
327 return (struct __locale_data *) locale_file->data;
331 /* Calling this function assumes the lock for handling global locale data
332 is acquired. */
333 void
334 _nl_remove_locale (int locale, struct __locale_data *data)
336 if (--data->usage_count == 0)
338 if (data->alloc != ld_archive)
340 /* First search the entry in the list of loaded files. */
341 struct loaded_l10nfile *ptr = _nl_locale_file_list[locale];
343 /* Search for the entry. It must be in the list. Otherwise it
344 is a bug and we crash badly. */
345 while ((struct __locale_data *) ptr->data != data)
346 ptr = ptr->next;
348 /* Mark the data as not available anymore. So when the data has
349 to be used again it is reloaded. */
350 ptr->decided = 0;
351 ptr->data = NULL;
354 /* This does the real work. */
355 _nl_unload_locale (data);