3 % Copyright
2009-2010 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
25 @ Control sequence names and diagnostic messages are variable-length strings
26 of eight-bit characters. Since
PASCAL did not have a well-developed string
27 mechanism
, \TeX\ did all of its string processing by homegrown methods.
29 Elaborate facilities for dynamic strings are not needed
, so all of the
30 necessary operations can be handled with a simple data structure.
31 The array |str_pool| contains all of the
(eight-bit
) bytes off all
32 of the strings
, and the array |str_start| contains indices of the starting
33 points of each string. Strings are referred to by integer numbers
, so that
34 string number |s| comprises the characters |str_pool
[j
]| for
35 |str_start_macro
(s
)<=j
<str_start_macro
(s
+1)|. Additional integer variables
36 |pool_ptr| and |str_ptr| indicate the number of entries used so far
37 in |str_pool| and |str_start|
, respectively
; locations
38 |str_pool
[pool_ptr
]| and |str_start_macro
(str_ptr
)| are
39 ready for the next string to be allocated.
41 String numbers
0 to |biggest_char| are reserved for strings that correspond to
42 single UNICODE characters. This is in accordance with the conventions of \.
{WEB
}
43 which converts single-character strings into the ASCII code number of the
44 single character involved.
47 lstring
*string_pool
; /* the array of strings
*/
48 lstring
*_string_pool
; /* this variable lives |STRING_OFFSET| below |string_pool|
50 |_string_pool
[str_ptr
] == str_string
(str_ptr
)|
*/
52 str_number str_ptr
= (STRING_OFFSET
+ 1); /* number of the current string being created
*/
53 str_number init_str_ptr
; /* the starting value of |str_ptr|
*/
55 unsigned char
*cur_string
; /* current string buffer
*/
56 unsigned cur_length
; /* current index in that buffer
*/
57 unsigned cur_string_size
; /* malloced size of |cur_string|
*/
58 unsigned pool_size
; /* occupied byte count
*/
61 @ Once a sequence of characters has been appended to |cur_string|
, it
62 officially becomes a string when the function |make_string| is called.
63 This function returns the identification number of the new string as its
67 void reset_cur_string
(void
)
70 cur_string_size
= 255;
71 cur_string
= (unsigned char
*) xmalloc
(256);
72 memset
(cur_string
, 0, 256);
75 @ current string enters the pool
77 str_number make_string
(void
)
79 if
(str_ptr
== (max_strings
+ STRING_OFFSET
))
80 overflow
("number of strings",
81 (unsigned
) (max_strings
- init_str_ptr
+ STRING_OFFSET
));
83 cur_string
[cur_length
] = '\
0'
; /* now |lstring.s| is always a valid C string
*/
84 str_string
(str_ptr
) = (unsigned char
*) cur_string
;
85 str_length
(str_ptr
) = cur_length
;
86 pool_size
+= cur_length
;
89 printf
("Made a string: %s (s=%d)\n", (char
*)str_string
(str_ptr
), (int
)str_ptr
);
96 int pool_to_unichar
(unsigned char
*t
)
98 return
(int
) str2uni
(t
);
103 @ The following subroutine compares string |s| with another string of the
104 same length that appears in |buffer| starting at position |k|
;
105 the result is |true| if and only if the strings are equal.
106 Empirical tests indicate that |str_eq_buf| is used in such a way that
107 it tends to return |true| about
80 percent of the time.
110 boolean str_eq_buf
(str_number s
, int k
)
111 { /* test equality of strings
*/
112 int a
; /* a unicode character
*/
113 if
(s
< STRING_OFFSET
) {
114 a
= buffer_to_unichar
(k
);
118 unsigned char
*j
= str_string
(s
);
119 unsigned char
*l
= j
+ str_length
(s
);
121 if
(*j
++ != buffer
[k
++])
129 @ Here is a similar routine
, but it compares two strings in the string pool
,
130 and it does not assume that they have the same length.
133 boolean str_eq_str
(str_number s
, str_number t
)
134 { /* test equality of strings
*/
135 int a
= 0; /* a utf char
*/
136 unsigned char
*j
, *k
, *l
; /* running indices
*/
137 if
(s
< STRING_OFFSET
) {
138 if
(t
>= STRING_OFFSET
) {
140 if
(s
<= 0x7F && (str_length(t) == 1) && *k == s)
142 a
= pool_to_unichar
(k
);
149 } else if
(t
< STRING_OFFSET
) {
151 if
(t
<= 0x7F && (str_length(s) == 1) && *j == t)
153 a
= pool_to_unichar
(j
);
157 if
(str_length
(s
) != str_length
(t
))
161 l
= j
+ str_length
(s
);
172 boolean str_eq_cstr
(str_number r
, const char
*s
, size_t l
)
174 if
(l
!= (size_t
) str_length
(r
))
176 return
(strncmp
((const char
*) (str_string
(r
)), s
, l
) == 0);
180 @ The initial values of |str_pool|
, |str_start|
, |pool_ptr|
,
181 and |str_ptr| are computed by the \.
{INITEX
} program
, based in part
182 on the information that \.
{WEB
} has output while processing \TeX.
184 The first |string_offset| strings are single-characters strings matching
185 Unicode. There is no point in generating all of these. But |str_ptr| has
186 initialized properly
, otherwise |print_char| cannot see the difference
187 between characters and strings.
190 @ initializes the string pool
, but returns |false| if something goes wrong
192 boolean get_strings_started
(void
)
198 @ The string recycling routines.
199 \TeX
{} uses
2 upto
4 {\it new\
/} strings when scanning a filename in an
200 \.
{\\input
}, \.
{\\openin
}, or \.
{\\openout
} operation. These strings are
201 normally lost because the reference to them are not saved after finishing
202 the operation. |search_string| searches through the string pool for the
203 given string and returns either
0 or the found string number.
206 str_number search_string
(str_number search
)
208 str_number s
; /* running index
*/
209 size_t len
; /* length of searched string
*/
210 len
= str_length
(search
);
212 return get_nullstr
();
214 s
= search
- 1; /* start search with newest string below |s|
; |search
>1|
! */
215 while
(s
>= STRING_OFFSET
) {
216 /* first |string_offset| strings depend on implementation
!! */
217 if
(str_length
(s
) == len
)
218 if
(str_eq_str
(s
, search
))
227 str_number maketexstring
(const char
*s
)
229 if
(s
== NULL ||
*s
== 0)
230 return get_nullstr
();
231 return maketexlstring
(s
, strlen
(s
));
235 str_number maketexlstring
(const char
*s
, size_t l
)
237 if
(s
== NULL || l
== 0)
238 return get_nullstr
();
239 str_string
(str_ptr
) = xmalloc
((unsigned
) (l
+ 1));
240 memcpy
(str_string
(str_ptr
), s
, (l
+ 1));
241 str_length
(str_ptr
) = (unsigned
) l
;
243 return
(str_ptr
- 1);
246 @ append a C string to a TeX string
248 void append_string
(const unsigned char
*s
, unsigned l
)
250 if
(s
== NULL ||
*s
== 0)
252 l
= (unsigned
) strlen
((const char
*) s
);
254 memcpy
(cur_string
+ cur_length
, s
, l
);
260 char
*makecstring
(int s
)
263 return makeclstring
(s
, &l);
267 char
*makeclstring
(int s
, size_t
* len
)
269 if
(s
< STRING_OFFSET
) {
270 *len
= (size_t
) utf8_size
(s
);
271 return
(char
*) uni2str
((unsigned
) s
);
273 unsigned l
= (unsigned
) str_length
(s
);
274 char
*cstrbuf
= xmalloc
(l
+ 1);
275 memcpy
(cstrbuf
, str_string
(s
), l
);
283 int dump_string_pool
(void
)
288 dump_int
(k
- STRING_OFFSET
);
289 for
(j
= STRING_OFFSET
+ 1; j
< k
; j
++) {
290 l
= (int
) str_length
(j
);
291 if
(str_string
(j
) == NULL)
295 dump_things
(*str_string
(j
), str_length
(j
));
297 return
(k
- STRING_OFFSET
);
301 int undump_string_pool
(void
)
306 if
(max_strings
< str_ptr
+ strings_free
)
307 max_strings
= str_ptr
+ strings_free
;
308 str_ptr
+= STRING_OFFSET
;
310 libcfree
(string_pool
);
311 init_string_pool_array
((unsigned
) max_strings
);
312 for
(j
= STRING_OFFSET
+ 1; j
< str_ptr
; j
++) {
315 str_length
(j
) = (unsigned
) x
;
316 pool_size
+= (unsigned
) x
;
317 str_string
(j
) = xmallocarray
(unsigned char
, (unsigned
) (x
+ 1));
318 undump_things
(*str_string
(j
), (unsigned
) x
);
319 *(str_string
(j
) + str_length
(j
)) = '\
0'
;
324 init_str_ptr
= str_ptr
;
329 void init_string_pool_array
(unsigned s
)
331 string_pool
= xmallocarray
(lstring
, s
);
332 _string_pool
= string_pool
- STRING_OFFSET
;
333 memset
(string_pool
, 0, s
* sizeof
(lstring
));
334 /* seed the null string
*/
335 string_pool
[0].s
= xmalloc
(1);
336 string_pool
[0].s
[0] = '\
0'
;
339 @ To destroy an already made string
, we say |flush_str|.
341 void flush_str
(str_number s
)
344 printf
("Flushing a string: %s (s=%d,str_ptr=%d)\n", (char
*)str_string
(s
), (int
)s
, (int
)str_ptr
);
346 if
(s
> STRING_OFFSET
) { /* don't ever delete the null string
*/
347 pool_size
-= (unsigned
) str_length
(s
);
349 xfree
(str_string
(s
));
351 while
(str_string
((str_ptr
- 1)) == NULL)