beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / tex / stringpool.w
blob5972657f56ef5e8b4f66a7c174cfcd631582991f
1 % stringpool.w
3 % Copyright 2009-2010 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
23 #include "ptexlib.h"
25 @ Control sequence names and diagnostic messages are variable-length strings
26 of eight-bit characters. Since PASCAL did not have a well-developed string
27 mechanism, \TeX\ did all of its string processing by homegrown methods.
29 Elaborate facilities for dynamic strings are not needed, so all of the
30 necessary operations can be handled with a simple data structure.
31 The array |str_pool| contains all of the (eight-bit) bytes off all
32 of the strings, and the array |str_start| contains indices of the starting
33 points of each string. Strings are referred to by integer numbers, so that
34 string number |s| comprises the characters |str_pool[j]| for
35 |str_start_macro(s)<=j<str_start_macro(s+1)|. Additional integer variables
36 |pool_ptr| and |str_ptr| indicate the number of entries used so far
37 in |str_pool| and |str_start|, respectively; locations
38 |str_pool[pool_ptr]| and |str_start_macro(str_ptr)| are
39 ready for the next string to be allocated.
41 String numbers 0 to |biggest_char| are reserved for strings that correspond to
42 single UNICODE characters. This is in accordance with the conventions of \.{WEB}
43 which converts single-character strings into the ASCII code number of the
44 single character involved.
47 lstring *string_pool; /* the array of strings */
48 lstring *_string_pool; /* this variable lives |STRING_OFFSET| below |string_pool|
49 (handy for debugging:
50 |_string_pool[str_ptr] == str_string(str_ptr)| */
52 str_number str_ptr = (STRING_OFFSET + 1); /* number of the current string being created */
53 str_number init_str_ptr; /* the starting value of |str_ptr| */
55 unsigned char *cur_string; /* current string buffer */
56 unsigned cur_length; /* current index in that buffer */
57 unsigned cur_string_size; /* malloced size of |cur_string| */
58 unsigned pool_size; /* occupied byte count */
61 @ Once a sequence of characters has been appended to |cur_string|, it
62 officially becomes a string when the function |make_string| is called.
63 This function returns the identification number of the new string as its
64 value.
67 void reset_cur_string(void)
69 cur_length = 0;
70 cur_string_size = 255;
71 cur_string = (unsigned char *) xmalloc(256);
72 memset(cur_string, 0, 256);
75 @ current string enters the pool
77 str_number make_string(void)
79 if (str_ptr == (max_strings + STRING_OFFSET))
80 overflow("number of strings",
81 (unsigned) (max_strings - init_str_ptr + STRING_OFFSET));
82 str_room(1);
83 cur_string[cur_length] = '\0'; /* now |lstring.s| is always a valid C string */
84 str_string(str_ptr) = (unsigned char *) cur_string;
85 str_length(str_ptr) = cur_length;
86 pool_size += cur_length;
87 reset_cur_string();
88 #if 0
89 printf("Made a string: %s (s=%d)\n", (char *)str_string(str_ptr), (int)str_ptr);
90 #endif
91 str_ptr++;
92 return (str_ptr - 1);
95 @ @c
96 int pool_to_unichar(unsigned char *t)
98 return (int) str2uni(t);
103 @ The following subroutine compares string |s| with another string of the
104 same length that appears in |buffer| starting at position |k|;
105 the result is |true| if and only if the strings are equal.
106 Empirical tests indicate that |str_eq_buf| is used in such a way that
107 it tends to return |true| about 80 percent of the time.
110 boolean str_eq_buf(str_number s, int k)
111 { /* test equality of strings */
112 int a; /* a unicode character */
113 if (s < STRING_OFFSET) {
114 a = buffer_to_unichar(k);
115 if (a != s)
116 return false;
117 } else {
118 unsigned char *j = str_string(s);
119 unsigned char *l = j + str_length(s);
120 while (j < l) {
121 if (*j++ != buffer[k++])
122 return false;
125 return true;
129 @ Here is a similar routine, but it compares two strings in the string pool,
130 and it does not assume that they have the same length.
133 boolean str_eq_str(str_number s, str_number t)
134 { /* test equality of strings */
135 int a = 0; /* a utf char */
136 unsigned char *j, *k, *l; /* running indices */
137 if (s < STRING_OFFSET) {
138 if (t >= STRING_OFFSET) {
139 k = str_string(t);
140 if (s <= 0x7F && (str_length(t) == 1) && *k == s)
141 return true;
142 a = pool_to_unichar(k);
143 if (a != s)
144 return false;
145 } else {
146 if (t != s)
147 return false;
149 } else if (t < STRING_OFFSET) {
150 j = str_string(s);
151 if (t <= 0x7F && (str_length(s) == 1) && *j == t)
152 return true;
153 a = pool_to_unichar(j);
154 if (a != t)
155 return false;
156 } else {
157 if (str_length(s) != str_length(t))
158 return false;
159 k = str_string(t);
160 j = str_string(s);
161 l = j + str_length(s);
162 while (j < l) {
163 if (*j++ != *k++)
164 return false;
167 return true;
170 @ string compare
172 boolean str_eq_cstr(str_number r, const char *s, size_t l)
174 if (l != (size_t) str_length(r))
175 return false;
176 return (strncmp((const char *) (str_string(r)), s, l) == 0);
180 @ The initial values of |str_pool|, |str_start|, |pool_ptr|,
181 and |str_ptr| are computed by the \.{INITEX} program, based in part
182 on the information that \.{WEB} has output while processing \TeX.
184 The first |string_offset| strings are single-characters strings matching
185 Unicode. There is no point in generating all of these. But |str_ptr| has
186 initialized properly, otherwise |print_char| cannot see the difference
187 between characters and strings.
190 @ initializes the string pool, but returns |false| if something goes wrong
192 boolean get_strings_started(void)
194 reset_cur_string();
195 return true;
198 @ The string recycling routines.
199 \TeX{} uses 2 upto 4 {\it new\/} strings when scanning a filename in an
200 \.{\\input}, \.{\\openin}, or \.{\\openout} operation. These strings are
201 normally lost because the reference to them are not saved after finishing
202 the operation. |search_string| searches through the string pool for the
203 given string and returns either 0 or the found string number.
206 str_number search_string(str_number search)
208 str_number s; /* running index */
209 size_t len; /* length of searched string */
210 len = str_length(search);
211 if (len == 0) {
212 return get_nullstr();
213 } else {
214 s = search - 1; /* start search with newest string below |s|; |search>1|! */
215 while (s >= STRING_OFFSET) {
216 /* first |string_offset| strings depend on implementation!! */
217 if (str_length(s) == len)
218 if (str_eq_str(s, search))
219 return s;
220 s--;
223 return 0;
226 @ @c
227 str_number maketexstring(const char *s)
229 if (s == NULL || *s == 0)
230 return get_nullstr();
231 return maketexlstring(s, strlen(s));
234 @ @c
235 str_number maketexlstring(const char *s, size_t l)
237 if (s == NULL || l == 0)
238 return get_nullstr();
239 str_string(str_ptr) = xmalloc((unsigned) (l + 1));
240 memcpy(str_string(str_ptr), s, (l + 1));
241 str_length(str_ptr) = (unsigned) l;
242 str_ptr++;
243 return (str_ptr - 1);
246 @ append a C string to a TeX string
248 void append_string(const unsigned char *s, unsigned l)
250 if (s == NULL || *s == 0)
251 return;
252 l = (unsigned) strlen((const char *) s);
253 str_room(l);
254 memcpy(cur_string + cur_length, s, l);
255 cur_length += l;
256 return;
259 @ @c
260 char *makecstring(int s)
262 size_t l;
263 return makeclstring(s, &l);
266 @ @c
267 char *makeclstring(int s, size_t * len)
269 if (s < STRING_OFFSET) {
270 *len = (size_t) utf8_size(s);
271 return (char *) uni2str((unsigned) s);
272 } else {
273 unsigned l = (unsigned) str_length(s);
274 char *cstrbuf = xmalloc(l + 1);
275 memcpy(cstrbuf, str_string(s), l);
276 cstrbuf[l] = '\0';
277 *len = (size_t) l;
278 return cstrbuf;
282 @ @c
283 int dump_string_pool(void)
285 int j;
286 int l;
287 int k = str_ptr;
288 dump_int(k - STRING_OFFSET);
289 for (j = STRING_OFFSET + 1; j < k; j++) {
290 l = (int) str_length(j);
291 if (str_string(j) == NULL)
292 l = -1;
293 dump_int(l);
294 if (l > 0)
295 dump_things(*str_string(j), str_length(j));
297 return (k - STRING_OFFSET);
300 @ @c
301 int undump_string_pool(void)
303 int j;
304 int x;
305 undump_int(str_ptr);
306 if (max_strings < str_ptr + strings_free)
307 max_strings = str_ptr + strings_free;
308 str_ptr += STRING_OFFSET;
309 if (ini_version)
310 libcfree(string_pool);
311 init_string_pool_array((unsigned) max_strings);
312 for (j = STRING_OFFSET + 1; j < str_ptr; j++) {
313 undump_int(x);
314 if (x >= 0) {
315 str_length(j) = (unsigned) x;
316 pool_size += (unsigned) x;
317 str_string(j) = xmallocarray(unsigned char, (unsigned) (x + 1));
318 undump_things(*str_string(j), (unsigned) x);
319 *(str_string(j) + str_length(j)) = '\0';
320 } else {
321 str_length(j) = 0;
324 init_str_ptr = str_ptr;
325 return str_ptr;
328 @ @c
329 void init_string_pool_array(unsigned s)
331 string_pool = xmallocarray(lstring, s);
332 _string_pool = string_pool - STRING_OFFSET;
333 memset(string_pool, 0, s * sizeof(lstring));
334 /* seed the null string */
335 string_pool[0].s = xmalloc(1);
336 string_pool[0].s[0] = '\0';
339 @ To destroy an already made string, we say |flush_str|.
341 void flush_str(str_number s)
343 #if 0
344 printf("Flushing a string: %s (s=%d,str_ptr=%d)\n", (char *)str_string(s), (int)s, (int)str_ptr);
345 #endif
346 if (s > STRING_OFFSET) { /* don't ever delete the null string */
347 pool_size -= (unsigned) str_length(s);
348 str_length(s) = 0;
349 xfree(str_string(s));
351 while (str_string((str_ptr - 1)) == NULL)
352 str_ptr--;