x86-64: Optimize strlen/strnlen/wcslen/wcsnlen with AVX2
[glibc.git] / locale / findlocale.c
blob02a97ac654f4d886a7a11806f668e2ecdcb8e5e5
1 /* Copyright (C) 1996-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <locale.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #ifdef _POSIX_MAPPED_FILES
26 # include <sys/mman.h>
27 #endif
29 #include "localeinfo.h"
30 #include "../iconv/gconv_charset.h"
31 #include "../iconv/gconv_int.h"
34 #ifdef NL_CURRENT_INDIRECT
35 # define DEFINE_CATEGORY(category, category_name, items, a) \
36 extern struct __locale_data _nl_C_##category; \
37 weak_extern (_nl_C_##category)
38 # include "categories.def"
39 # undef DEFINE_CATEGORY
41 /* Array indexed by category of pointers to _nl_C_CATEGORY slots.
42 Elements are zero for categories whose data is never used. */
43 struct __locale_data *const _nl_C[] attribute_hidden =
45 # define DEFINE_CATEGORY(category, category_name, items, a) \
46 [category] = &_nl_C_##category,
47 # include "categories.def"
48 # undef DEFINE_CATEGORY
50 #else
51 # define _nl_C (_nl_C_locobj.__locales)
52 #endif
55 /* For each category we keep a list of records for the locale files
56 which are somehow addressed. */
57 struct loaded_l10nfile *_nl_locale_file_list[__LC_LAST];
59 const char _nl_default_locale_path[] attribute_hidden = COMPLOCALEDIR;
61 /* Checks if the name is actually present, that is, not NULL and not
62 empty. */
63 static inline int
64 name_present (const char *name)
66 return name != NULL && name[0] != '\0';
69 /* Checks that the locale name neither extremely long, nor contains a
70 ".." path component (to prevent directory traversal). */
71 static inline int
72 valid_locale_name (const char *name)
74 /* Not set. */
75 size_t namelen = strlen (name);
76 /* Name too long. The limit is arbitrary and prevents stack overflow
77 issues later. */
78 if (__glibc_unlikely (namelen > 255))
79 return 0;
80 /* Directory traversal attempt. */
81 static const char slashdot[4] = {'/', '.', '.', '/'};
82 if (__glibc_unlikely (__memmem (name, namelen,
83 slashdot, sizeof (slashdot)) != NULL))
84 return 0;
85 if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.'))
86 return 0;
87 if (namelen >= 3
88 && __glibc_unlikely (((name[0] == '.'
89 && name[1] == '.'
90 && name[2] == '/')
91 || (name[namelen - 3] == '/'
92 && name[namelen - 2] == '.'
93 && name[namelen - 1] == '.'))))
94 return 0;
95 /* If there is a slash in the name, it must start with one. */
96 if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/')
97 return 0;
98 return 1;
101 struct __locale_data *
102 internal_function
103 _nl_find_locale (const char *locale_path, size_t locale_path_len,
104 int category, const char **name)
106 int mask;
107 /* Name of the locale for this category. */
108 const char *cloc_name = *name;
109 const char *language;
110 const char *modifier;
111 const char *territory;
112 const char *codeset;
113 const char *normalized_codeset;
114 struct loaded_l10nfile *locale_file;
116 if (cloc_name[0] == '\0')
118 /* The user decides which locale to use by setting environment
119 variables. */
120 cloc_name = getenv ("LC_ALL");
121 if (!name_present (cloc_name))
122 cloc_name = getenv (_nl_category_names.str
123 + _nl_category_name_idxs[category]);
124 if (!name_present (cloc_name))
125 cloc_name = getenv ("LANG");
126 if (!name_present (cloc_name))
127 cloc_name = _nl_C_name;
130 /* We used to fall back to the C locale if the name contains a slash
131 character '/', but we now check for directory traversal in
132 valid_locale_name, so this is no longer necessary. */
134 if (__builtin_expect (strcmp (cloc_name, _nl_C_name), 1) == 0
135 || __builtin_expect (strcmp (cloc_name, _nl_POSIX_name), 1) == 0)
137 /* We need not load anything. The needed data is contained in
138 the library itself. */
139 *name = _nl_C_name;
140 return _nl_C[category];
142 else if (!valid_locale_name (cloc_name))
144 __set_errno (EINVAL);
145 return NULL;
148 *name = cloc_name;
150 /* We really have to load some data. First we try the archive,
151 but only if there was no LOCPATH environment variable specified. */
152 if (__glibc_likely (locale_path == NULL))
154 struct __locale_data *data
155 = _nl_load_locale_from_archive (category, name);
156 if (__glibc_likely (data != NULL))
157 return data;
159 /* Nothing in the archive with the given name. Expanding it as
160 an alias and retry. */
161 cloc_name = _nl_expand_alias (*name);
162 if (cloc_name != NULL)
164 data = _nl_load_locale_from_archive (category, &cloc_name);
165 if (__builtin_expect (data != NULL, 1))
166 return data;
169 /* Nothing in the archive. Set the default path to search below. */
170 locale_path = _nl_default_locale_path;
171 locale_path_len = sizeof _nl_default_locale_path;
173 else
174 /* We really have to load some data. First see whether the name is
175 an alias. Please note that this makes it impossible to have "C"
176 or "POSIX" as aliases. */
177 cloc_name = _nl_expand_alias (*name);
179 if (cloc_name == NULL)
180 /* It is no alias. */
181 cloc_name = *name;
183 /* Make a writable copy of the locale name. */
184 char *loc_name = strdupa (cloc_name);
186 /* LOCALE can consist of up to four recognized parts for the XPG syntax:
188 language[_territory[.codeset]][@modifier]
190 Beside the first all of them are allowed to be missing. If the
191 full specified locale is not found, the less specific one are
192 looked for. The various part will be stripped off according to
193 the following order:
194 (1) codeset
195 (2) normalized codeset
196 (3) territory
197 (4) modifier
199 mask = _nl_explode_name (loc_name, &language, &modifier, &territory,
200 &codeset, &normalized_codeset);
201 if (mask == -1)
202 /* Memory allocate problem. */
203 return NULL;
205 /* If exactly this locale was already asked for we have an entry with
206 the complete name. */
207 locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
208 locale_path, locale_path_len, mask,
209 language, territory, codeset,
210 normalized_codeset, modifier,
211 _nl_category_names.str
212 + _nl_category_name_idxs[category], 0);
214 if (locale_file == NULL)
216 /* Find status record for addressed locale file. We have to search
217 through all directories in the locale path. */
218 locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
219 locale_path, locale_path_len, mask,
220 language, territory, codeset,
221 normalized_codeset, modifier,
222 _nl_category_names.str
223 + _nl_category_name_idxs[category], 1);
224 if (locale_file == NULL)
225 /* This means we are out of core. */
226 return NULL;
229 /* The space for normalized_codeset is dynamically allocated. Free it. */
230 if (mask & XPG_NORM_CODESET)
231 free ((void *) normalized_codeset);
233 if (locale_file->decided == 0)
234 _nl_load_locale (locale_file, category);
236 if (locale_file->data == NULL)
238 int cnt;
239 for (cnt = 0; locale_file->successor[cnt] != NULL; ++cnt)
241 if (locale_file->successor[cnt]->decided == 0)
242 _nl_load_locale (locale_file->successor[cnt], category);
243 if (locale_file->successor[cnt]->data != NULL)
244 break;
246 /* Move the entry we found (or NULL) to the first place of
247 successors. */
248 locale_file->successor[0] = locale_file->successor[cnt];
249 locale_file = locale_file->successor[cnt];
251 if (locale_file == NULL)
252 return NULL;
255 /* The LC_CTYPE category allows to check whether a locale is really
256 usable. If the locale name contains a charset name and the
257 charset name used in the locale (present in the LC_CTYPE data) is
258 not the same (after resolving aliases etc) we reject the locale
259 since using it would irritate users expecting the charset named
260 in the locale name. */
261 if (codeset != NULL)
263 /* Get the codeset information from the locale file. */
264 static const int codeset_idx[] =
266 [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
267 [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
268 [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
269 [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
270 [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
271 [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
272 [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
273 [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
274 [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
275 [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
276 [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
277 [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
279 const struct __locale_data *data;
280 const char *locale_codeset;
281 char *clocale_codeset;
282 char *ccodeset;
284 data = (const struct __locale_data *) locale_file->data;
285 locale_codeset =
286 (const char *) data->values[codeset_idx[category]].string;
287 assert (locale_codeset != NULL);
288 /* Note the length of the allocated memory: +3 for up to two slashes
289 and the NUL byte. */
290 clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
291 strip (clocale_codeset, locale_codeset);
293 ccodeset = (char *) alloca (strlen (codeset) + 3);
294 strip (ccodeset, codeset);
296 if (__gconv_compare_alias (upstr (ccodeset, ccodeset),
297 upstr (clocale_codeset,
298 clocale_codeset)) != 0)
299 /* The codesets are not identical, don't use the locale. */
300 return NULL;
303 /* Determine the locale name for which loading succeeded. This
304 information comes from the file name. The form is
305 <path>/<locale>/LC_foo. We must extract the <locale> part. */
306 if (((const struct __locale_data *) locale_file->data)->name == NULL)
308 char *cp, *endp;
310 endp = strrchr (locale_file->filename, '/');
311 cp = endp - 1;
312 while (cp[-1] != '/')
313 --cp;
314 ((struct __locale_data *) locale_file->data)->name
315 = __strndup (cp, endp - cp);
318 /* Determine whether the user wants transliteration or not. */
319 if (modifier != NULL
320 && __strcasecmp_l (modifier, "TRANSLIT", _nl_C_locobj_ptr) == 0)
321 ((struct __locale_data *) locale_file->data)->use_translit = 1;
323 /* Increment the usage count. */
324 if (((const struct __locale_data *) locale_file->data)->usage_count
325 < MAX_USAGE_COUNT)
326 ++((struct __locale_data *) locale_file->data)->usage_count;
328 return (struct __locale_data *) locale_file->data;
332 /* Calling this function assumes the lock for handling global locale data
333 is acquired. */
334 void
335 internal_function
336 _nl_remove_locale (int locale, struct __locale_data *data)
338 if (--data->usage_count == 0)
340 if (data->alloc != ld_archive)
342 /* First search the entry in the list of loaded files. */
343 struct loaded_l10nfile *ptr = _nl_locale_file_list[locale];
345 /* Search for the entry. It must be in the list. Otherwise it
346 is a bug and we crash badly. */
347 while ((struct __locale_data *) ptr->data != data)
348 ptr = ptr->next;
350 /* Mark the data as not available anymore. So when the data has
351 to be used again it is reloaded. */
352 ptr->decided = 0;
353 ptr->data = NULL;
356 /* This does the real work. */
357 _nl_unload_locale (data);