Fix OOB read in stdlib thousand grouping parsing [BZ #29727]
[glibc.git] / locale / loadlocale.c
blobd44310b4b51c83b3f775dbf0120be943dcd84ccd
1 /* Functions to read locale data files.
2 Copyright (C) 1996-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <locale.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #ifdef _POSIX_MAPPED_FILES
27 # include <sys/mman.h>
28 #endif
29 #include <sys/stat.h>
31 #include <not-cancel.h>
32 #include "localeinfo.h"
35 static const size_t _nl_category_num_items[] =
37 #define DEFINE_CATEGORY(category, category_name, items, a) \
38 [category] = _NL_ITEM_INDEX (_NL_NUM_##category),
39 #include "categories.def"
40 #undef DEFINE_CATEGORY
44 #define NO_PAREN(arg, rest...) arg, ##rest
46 /* The size of the array must be specified explicitly because some of
47 the 'items' may be subarrays, which will cause the compiler to deduce
48 an incorrect size from the initializer. */
49 #define DEFINE_CATEGORY(category, category_name, items, a) \
50 static const enum value_type _nl_value_type_##category \
51 [_NL_ITEM_INDEX (_NL_NUM_##category)] = { NO_PAREN items };
52 #define DEFINE_ELEMENT(element, element_name, optstd, type, rest...) \
53 [_NL_ITEM_INDEX (element)] = type,
54 #include "categories.def"
55 #undef DEFINE_CATEGORY
57 static const enum value_type *const _nl_value_types[] =
59 #define DEFINE_CATEGORY(category, category_name, items, a) \
60 [category] = _nl_value_type_##category,
61 #include "categories.def"
62 #undef DEFINE_CATEGORY
65 /* Fill in LOCDATA->private for the LC_CTYPE category. */
66 static void
67 _nl_intern_locale_data_fill_cache_ctype (struct __locale_data *locdata)
69 struct lc_ctype_data *data = locdata->private;
71 /* Default to no translation. Assumes zero initialization of *data. */
72 memset (data->outdigit_bytes, 1, sizeof (data->outdigit_bytes));
74 for (int i = 0; i <= 9; ++i)
76 const char *digit
77 = locdata->values[_NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB + i)].string;
78 unsigned char len;
79 if (digit[0] != '0' + i || digit[1] != '\0')
81 data->outdigit_translation_needed = true;
82 len = strlen (locdata->values[_NL_ITEM_INDEX
83 (_NL_CTYPE_OUTDIGIT0_MB + i)].string);
85 else
86 len = 1;
87 data->outdigit_bytes[i] = len;
88 if (i == 0)
89 data->outdigit_bytes_all_equal = len;
90 else if (data->outdigit_bytes_all_equal != len)
91 data->outdigit_bytes_all_equal = 0;
95 /* Updates data in LOCDATA->private for CATEGORY. */
96 static void
97 _nl_intern_locale_data_fill_cache (int category, struct __locale_data *locdata)
99 switch (category)
101 case LC_CTYPE:
102 _nl_intern_locale_data_fill_cache_ctype (locdata);
103 break;
107 /* Returns the number of bytes allocated of struct __locale_data for
108 CATEGORY. */
109 static size_t
110 _nl_intern_locale_data_extra_size (int category)
112 switch (category)
114 case LC_CTYPE:
115 return sizeof (struct lc_ctype_data);
116 default:
117 return 0;
121 struct __locale_data *
122 _nl_intern_locale_data (int category, const void *data, size_t datasize)
124 const struct
126 unsigned int magic;
127 unsigned int nstrings;
128 unsigned int strindex[0];
129 } *const filedata = data;
130 struct __locale_data *newdata;
131 size_t cnt;
133 if (__builtin_expect (datasize < sizeof *filedata, 0)
134 || __builtin_expect (filedata->magic != LIMAGIC (category), 0))
136 /* Bad data file. */
137 __set_errno (EINVAL);
138 return NULL;
141 if (__builtin_expect (filedata->nstrings < _nl_category_num_items[category],
143 || (__builtin_expect (sizeof *filedata
144 + filedata->nstrings * sizeof (unsigned int)
145 >= datasize, 0)))
147 /* Insufficient data. */
148 __set_errno (EINVAL);
149 return NULL;
152 size_t base_size = (sizeof *newdata
153 + filedata->nstrings * sizeof (union locale_data_value));
154 size_t extra_size = _nl_intern_locale_data_extra_size (category);
156 newdata = malloc (base_size + extra_size);
157 if (newdata == NULL)
158 return NULL;
160 newdata->filedata = (void *) filedata;
161 newdata->filesize = datasize;
162 if (extra_size == 0)
163 newdata->private = NULL;
164 else
166 newdata->private = (char *) newdata + base_size;
167 memset (newdata->private, 0, extra_size);
169 newdata->usage_count = 0;
170 newdata->use_translit = 0;
171 newdata->nstrings = filedata->nstrings;
172 for (cnt = 0; cnt < newdata->nstrings; ++cnt)
174 size_t idx = filedata->strindex[cnt];
175 if (__glibc_unlikely (idx > (size_t) newdata->filesize))
177 puntdata:
178 free (newdata);
179 __set_errno (EINVAL);
180 return NULL;
183 /* Determine the type. There is one special case: the LC_CTYPE
184 category can have more elements than there are in the
185 _nl_value_type_LC_XYZ array. There are all pointers. */
186 switch (category)
188 #define CATTEST(cat) \
189 case LC_##cat: \
190 if (cnt >= (sizeof (_nl_value_type_LC_##cat) \
191 / sizeof (_nl_value_type_LC_##cat[0]))) \
192 goto puntdata; \
193 break
194 CATTEST (NUMERIC);
195 CATTEST (TIME);
196 CATTEST (COLLATE);
197 CATTEST (MONETARY);
198 CATTEST (MESSAGES);
199 CATTEST (PAPER);
200 CATTEST (NAME);
201 CATTEST (ADDRESS);
202 CATTEST (TELEPHONE);
203 CATTEST (MEASUREMENT);
204 CATTEST (IDENTIFICATION);
205 default:
206 assert (category == LC_CTYPE);
207 break;
210 if ((category == LC_CTYPE
211 && cnt >= (sizeof (_nl_value_type_LC_CTYPE)
212 / sizeof (_nl_value_type_LC_CTYPE[0])))
213 || __builtin_expect (_nl_value_types[category][cnt] != word, 1))
214 newdata->values[cnt].string = newdata->filedata + idx;
215 else
217 if (!LOCFILE_ALIGNED_P (idx))
218 goto puntdata;
219 newdata->values[cnt].word =
220 *((const uint32_t *) (newdata->filedata + idx));
224 if (extra_size > 0)
225 _nl_intern_locale_data_fill_cache (category, newdata);
227 return newdata;
230 void
231 _nl_load_locale (struct loaded_l10nfile *file, int category)
233 int fd;
234 void *filedata;
235 struct __stat64_t64 st;
236 struct __locale_data *newdata;
237 int save_err;
238 int alloc = ld_mapped;
240 file->decided = 1;
241 file->data = NULL;
243 fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC);
244 if (__builtin_expect (fd, 0) < 0)
245 /* Cannot open the file. */
246 return;
248 if (__glibc_unlikely (__fstat64_time64 (fd, &st) < 0))
250 puntfd:
251 __close_nocancel_nostatus (fd);
252 return;
254 if (__glibc_unlikely (S_ISDIR (st.st_mode)))
256 /* LOCALE/LC_foo is a directory; open LOCALE/LC_foo/SYS_LC_foo
257 instead. */
258 char *newp;
259 size_t filenamelen;
261 __close_nocancel_nostatus (fd);
263 filenamelen = strlen (file->filename);
264 newp = (char *) alloca (filenamelen
265 + 5 + _nl_category_name_sizes[category] + 1);
266 __mempcpy (__mempcpy (__mempcpy (newp, file->filename, filenamelen),
267 "/SYS_", 5), _nl_category_names_get (category),
268 _nl_category_name_sizes[category] + 1);
270 fd = __open_nocancel (newp, O_RDONLY | O_CLOEXEC);
271 if (__builtin_expect (fd, 0) < 0)
272 return;
274 if (__glibc_unlikely (__fstat64_time64 (fd, &st) < 0))
275 goto puntfd;
278 /* Map in the file's data. */
279 save_err = errno;
280 #ifdef _POSIX_MAPPED_FILES
281 # ifndef MAP_COPY
282 /* Linux seems to lack read-only copy-on-write. */
283 # define MAP_COPY MAP_PRIVATE
284 # endif
285 # ifndef MAP_FILE
286 /* Some systems do not have this flag; it is superfluous. */
287 # define MAP_FILE 0
288 # endif
289 filedata = __mmap ((caddr_t) 0, st.st_size,
290 PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
291 if (__glibc_unlikely (filedata == MAP_FAILED))
293 filedata = NULL;
294 if (__builtin_expect (errno, ENOSYS) == ENOSYS)
296 #endif /* _POSIX_MAPPED_FILES */
297 /* No mmap; allocate a buffer and read from the file. */
298 alloc = ld_malloced;
299 filedata = malloc (st.st_size);
300 if (filedata != NULL)
302 off_t to_read = st.st_size;
303 ssize_t nread;
304 char *p = (char *) filedata;
305 while (to_read > 0)
307 nread = __read_nocancel (fd, p, to_read);
308 if (__builtin_expect (nread, 1) <= 0)
310 free (filedata);
311 if (nread == 0)
312 __set_errno (EINVAL); /* Bizarreness going on. */
313 goto puntfd;
315 p += nread;
316 to_read -= nread;
318 __set_errno (save_err);
320 #ifdef _POSIX_MAPPED_FILES
323 #endif /* _POSIX_MAPPED_FILES */
325 /* We have mapped the data, so we no longer need the descriptor. */
326 __close_nocancel_nostatus (fd);
328 if (__glibc_unlikely (filedata == NULL))
329 /* We failed to map or read the data. */
330 return;
332 newdata = _nl_intern_locale_data (category, filedata, st.st_size);
333 if (__glibc_unlikely (newdata == NULL))
334 /* Bad data. */
336 #ifdef _POSIX_MAPPED_FILES
337 if (alloc == ld_mapped)
338 __munmap ((caddr_t) filedata, st.st_size);
339 #endif
340 return;
343 /* _nl_intern_locale_data leaves us these fields to initialize. */
344 newdata->name = NULL; /* This will be filled if necessary in findlocale.c. */
345 newdata->alloc = alloc;
347 file->data = newdata;
350 void
351 _nl_unload_locale (int category, struct __locale_data *locale)
353 /* Deallocate locale->private. */
354 switch (category)
356 case LC_CTYPE:
357 _nl_cleanup_ctype (locale);
358 break;
359 case LC_TIME:
360 _nl_cleanup_time (locale);
361 break;
364 switch (__builtin_expect (locale->alloc, ld_mapped))
366 case ld_malloced:
367 free ((void *) locale->filedata);
368 break;
369 case ld_mapped:
370 #ifdef _POSIX_MAPPED_FILES
371 __munmap ((caddr_t) locale->filedata, locale->filesize);
372 break;
373 #endif
374 case ld_archive: /* Nothing to do. */
375 break;
378 if (__builtin_expect (locale->alloc, ld_mapped) != ld_archive)
379 free ((char *) locale->name);
381 free (locale);