unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / unictype / 3level.h
blob1da51593255b67d8798273fb7911c2864eb9399f
1 /* Copyright (C) 2000-2001, 2009-2020 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
6 NOTE: The canonical source of this file is maintained with the GNU C Library.
7 Bugs can be reported to bug-glibc@gnu.org.
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <https://www.gnu.org/licenses/>. */
22 /* Construction of sparse 3-level tables.
23 See wchar-lookup.h or coll-lookup.h for their structure and the
24 meaning of p and q.
26 Before including this file, set
27 TABLE to the name of the structure to be defined
28 ELEMENT to the type of every entry
29 DEFAULT to the default value for empty entries
30 ITERATE if you want the TABLE_iterate function to be defined
31 NO_FINALIZE if you don't want the TABLE_finalize function to be defined
33 This will define
35 struct TABLE;
36 void TABLE_init (struct TABLE *t);
37 ELEMENT TABLE_get (struct TABLE *t, uint32_t wc);
38 void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value);
39 void TABLE_iterate (struct TABLE *t,
40 void (*fn) (uint32_t wc, ELEMENT value));
41 void TABLE_finalize (struct TABLE *t);
44 #define CONCAT(a,b) CONCAT1(a,b)
45 #define CONCAT1(a,b) a##b
47 struct TABLE
49 /* Parameters. */
50 unsigned int p;
51 unsigned int q;
52 /* Working representation. */
53 size_t level1_alloc;
54 size_t level1_size;
55 uint32_t *level1;
56 size_t level2_alloc;
57 size_t level2_size;
58 uint32_t *level2;
59 size_t level3_alloc;
60 size_t level3_size;
61 ELEMENT *level3;
62 /* Compressed representation. */
63 size_t result_size;
64 char *result;
67 /* Initialize. Assumes t->p and t->q have already been set. */
68 static inline void
69 CONCAT(TABLE,_init) (struct TABLE *t)
71 t->level1 = NULL;
72 t->level1_alloc = t->level1_size = 0;
73 t->level2 = NULL;
74 t->level2_alloc = t->level2_size = 0;
75 t->level3 = NULL;
76 t->level3_alloc = t->level3_size = 0;
79 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
80 whether 'int' is 16 bit, 32 bit, or 64 bit. */
81 #define EMPTY ((uint32_t) ~0)
83 /* Retrieve an entry. */
84 static inline ELEMENT
85 CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc)
87 uint32_t index1 = wc >> (t->q + t->p);
88 if (index1 < t->level1_size)
90 uint32_t lookup1 = t->level1[index1];
91 if (lookup1 != EMPTY)
93 uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1))
94 + (lookup1 << t->q);
95 uint32_t lookup2 = t->level2[index2];
96 if (lookup2 != EMPTY)
98 uint32_t index3 = (wc & ((1 << t->p) - 1))
99 + (lookup2 << t->p);
100 ELEMENT lookup3 = t->level3[index3];
102 return lookup3;
106 return DEFAULT;
109 /* Add one entry. */
110 static void
111 CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value)
113 uint32_t index1 = wc >> (t->q + t->p);
114 uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
115 uint32_t index3 = wc & ((1 << t->p) - 1);
116 size_t i, i1, i2;
118 if (value == CONCAT(TABLE,_get) (t, wc))
119 return;
121 if (index1 >= t->level1_size)
123 if (index1 >= t->level1_alloc)
125 size_t alloc = 2 * t->level1_alloc;
126 if (alloc <= index1)
127 alloc = index1 + 1;
128 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
129 alloc * sizeof (uint32_t));
130 t->level1_alloc = alloc;
132 while (index1 >= t->level1_size)
133 t->level1[t->level1_size++] = EMPTY;
136 if (t->level1[index1] == EMPTY)
138 if (t->level2_size == t->level2_alloc)
140 size_t alloc = 2 * t->level2_alloc + 1;
141 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
142 (alloc << t->q) * sizeof (uint32_t));
143 t->level2_alloc = alloc;
145 i1 = t->level2_size << t->q;
146 i2 = (t->level2_size + 1) << t->q;
147 for (i = i1; i < i2; i++)
148 t->level2[i] = EMPTY;
149 t->level1[index1] = t->level2_size++;
152 index2 += t->level1[index1] << t->q;
154 if (t->level2[index2] == EMPTY)
156 if (t->level3_size == t->level3_alloc)
158 size_t alloc = 2 * t->level3_alloc + 1;
159 t->level3 = (ELEMENT *) xrealloc ((char *) t->level3,
160 (alloc << t->p) * sizeof (ELEMENT));
161 t->level3_alloc = alloc;
163 i1 = t->level3_size << t->p;
164 i2 = (t->level3_size + 1) << t->p;
165 for (i = i1; i < i2; i++)
166 t->level3[i] = DEFAULT;
167 t->level2[index2] = t->level3_size++;
170 index3 += t->level2[index2] << t->p;
172 t->level3[index3] = value;
175 #ifdef ITERATE
176 /* Apply a function to all entries in the table. */
177 static void
178 CONCAT(TABLE,_iterate) (struct TABLE *t,
179 void (*fn) (uint32_t wc, ELEMENT value))
181 uint32_t index1;
182 for (index1 = 0; index1 < t->level1_size; index1++)
184 uint32_t lookup1 = t->level1[index1];
185 if (lookup1 != EMPTY)
187 uint32_t lookup1_shifted = lookup1 << t->q;
188 uint32_t index2;
189 for (index2 = 0; index2 < (1 << t->q); index2++)
191 uint32_t lookup2 = t->level2[index2 + lookup1_shifted];
192 if (lookup2 != EMPTY)
194 uint32_t lookup2_shifted = lookup2 << t->p;
195 uint32_t index3;
196 for (index3 = 0; index3 < (1 << t->p); index3++)
198 ELEMENT lookup3 = t->level3[index3 + lookup2_shifted];
199 if (lookup3 != DEFAULT)
200 fn ((((index1 << t->q) + index2) << t->p) + index3,
201 lookup3);
208 #endif
210 #ifndef NO_FINALIZE
211 /* Finalize and shrink. */
212 static void
213 CONCAT(TABLE,_finalize) (struct TABLE *t)
215 size_t i, j, k;
216 uint32_t reorder3[t->level3_size];
217 uint32_t reorder2[t->level2_size];
218 uint32_t level1_offset, level2_offset, level3_offset, last_offset;
220 /* Uniquify level3 blocks. */
221 k = 0;
222 for (j = 0; j < t->level3_size; j++)
224 for (i = 0; i < k; i++)
225 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
226 (1 << t->p) * sizeof (ELEMENT)) == 0)
227 break;
228 /* Relocate block j to block i. */
229 reorder3[j] = i;
230 if (i == k)
232 if (i != j)
233 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
234 (1 << t->p) * sizeof (ELEMENT));
235 k++;
238 t->level3_size = k;
240 for (i = 0; i < (t->level2_size << t->q); i++)
241 if (t->level2[i] != EMPTY)
242 t->level2[i] = reorder3[t->level2[i]];
244 /* Uniquify level2 blocks. */
245 k = 0;
246 for (j = 0; j < t->level2_size; j++)
248 for (i = 0; i < k; i++)
249 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
250 (1 << t->q) * sizeof (uint32_t)) == 0)
251 break;
252 /* Relocate block j to block i. */
253 reorder2[j] = i;
254 if (i == k)
256 if (i != j)
257 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
258 (1 << t->q) * sizeof (uint32_t));
259 k++;
262 t->level2_size = k;
264 for (i = 0; i < t->level1_size; i++)
265 if (t->level1[i] != EMPTY)
266 t->level1[i] = reorder2[t->level1[i]];
268 /* Create and fill the resulting compressed representation. */
269 last_offset =
270 5 * sizeof (uint32_t)
271 + t->level1_size * sizeof (uint32_t)
272 + (t->level2_size << t->q) * sizeof (uint32_t)
273 + (t->level3_size << t->p) * sizeof (ELEMENT);
274 t->result_size = (last_offset + 3) & ~3ul;
275 t->result = (char *) xmalloc (t->result_size);
277 level1_offset =
278 5 * sizeof (uint32_t);
279 level2_offset =
280 5 * sizeof (uint32_t)
281 + t->level1_size * sizeof (uint32_t);
282 level3_offset =
283 5 * sizeof (uint32_t)
284 + t->level1_size * sizeof (uint32_t)
285 + (t->level2_size << t->q) * sizeof (uint32_t);
287 ((uint32_t *) t->result)[0] = t->q + t->p;
288 ((uint32_t *) t->result)[1] = t->level1_size;
289 ((uint32_t *) t->result)[2] = t->p;
290 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
291 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
293 for (i = 0; i < t->level1_size; i++)
294 ((uint32_t *) (t->result + level1_offset))[i] =
295 (t->level1[i] == EMPTY
297 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
299 for (i = 0; i < (t->level2_size << t->q); i++)
300 ((uint32_t *) (t->result + level2_offset))[i] =
301 (t->level2[i] == EMPTY
303 : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset);
305 for (i = 0; i < (t->level3_size << t->p); i++)
306 ((ELEMENT *) (t->result + level3_offset))[i] = t->level3[i];
308 if (last_offset < t->result_size)
309 memset (t->result + last_offset, 0, t->result_size - last_offset);
311 if (t->level1_alloc > 0)
312 free (t->level1);
313 if (t->level2_alloc > 0)
314 free (t->level2);
315 if (t->level3_alloc > 0)
316 free (t->level3);
318 #endif
320 #undef EMPTY
321 #undef TABLE
322 #undef ELEMENT
323 #undef DEFAULT
324 #undef ITERATE
325 #undef NO_FINALIZE