fix getsup (HH)
[luatex.git] / source / texk / web2c / luatexdir / tex / textoken.w
blob196ecf8c39924ec429b7506bcf3bef8cf30f2f34
1 % textoken.w
3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
22 #include "ptexlib.h"
24 @ @c
25 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
28 #define do_get_cat_code(a,b) do { \
29 if (line_catcode_table<=-0xFF) \
30 a= - line_catcode_table - 0xFF ; \
31 else if (line_catcode_table!=DEFAULT_CAT_TABLE) \
32 a=get_cat_code(line_catcode_table,b); \
33 else \
34 a=get_cat_code(cat_code_table_par,b); \
35 } while (0)
38 #define do_get_cat_code(a,b) do { \
39 if (line_catcode_table==DEFAULT_CAT_TABLE) \
40 a=get_cat_code(cat_code_table_par,b); \
41 else if (line_catcode_table>-0xFF) \
42 a=get_cat_code(line_catcode_table,b); \
43 else \
44 a= - line_catcode_table - 0xFF ; \
45 } while (0)
48 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
49 readily be transported into environments that do not have automatic facilities
50 for strings, garbage collection, etc., and so that it can be in control of what
51 error messages the user receives. The dynamic storage requirements of \TeX\ are
52 handled by providing two large arrays called |fixmem| and |varmem| in which
53 consecutive blocks of words are used as nodes by the \TeX\ routines.
55 Pointer variables are indices into this array, or into another array called
56 |eqtb| that will be explained later. A pointer variable might also be a special
57 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
58 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
59 does not assume that |mem[null]| exists.
61 @ Locations in |fixmem| are used for storing one-word records; a conventional
62 \.{AVAIL} stack is used for allocation in this array.
65 smemory_word *fixmem; /* the big dynamic storage area */
66 unsigned fix_mem_min; /* the smallest location of one-word memory in use */
67 unsigned fix_mem_max; /* the largest location of one-word memory in use */
69 @ In order to study the memory requirements of particular applications, it is
70 possible to prepare a version of \TeX\ that keeps track of current and maximum
71 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
72 commented out, \TeX\ will run a bit slower but it will report these statistics
73 when |tracing_stats| is sufficiently large.
76 int var_used, dyn_used; /* how much memory is in use */
78 halfword avail; /* head of the list of available one-word nodes */
79 unsigned fix_mem_end; /* the last one-word node used in |mem| */
81 halfword garbage; /* head of a junk list, write only */
82 halfword temp_token_head; /* head of a temporary list of some kind */
83 halfword hold_token_head; /* head of a temporary list of another kind */
84 halfword omit_template; /* a constant token list */
85 halfword null_list; /* permanently empty list */
86 halfword backup_head; /* head of token list built by |scan_keyword| */
88 @ @c
89 void initialize_tokens(void)
91 halfword p;
92 avail = null;
93 fix_mem_end = 0;
94 p = get_avail();
95 temp_token_head = p;
96 set_token_info(temp_token_head, 0);
97 p = get_avail();
98 hold_token_head = p;
99 set_token_info(hold_token_head, 0);
100 p = get_avail();
101 omit_template = p;
102 set_token_info(omit_template, 0);
103 p = get_avail();
104 null_list = p;
105 set_token_info(null_list, 0);
106 p = get_avail();
107 backup_head = p;
108 set_token_info(backup_head, 0);
109 p = get_avail();
110 garbage = p;
111 set_token_info(garbage, 0);
112 dyn_used = 0; /* initialize statistics */
115 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
116 field is null. However, \TeX\ will halt if there is no more room left.
117 @^inner loop@>
119 If the available-space list is empty, i.e., if |avail=null|, we try first to
120 increase |fix_mem_end|. If that cannot be done, i.e., if
121 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
122 work, we have to quit.
125 halfword get_avail(void)
126 { /* single-word node allocation */
127 unsigned p; /* the new node being got */
128 unsigned t;
129 p = (unsigned) avail; /* get top location in the |avail| stack */
130 if (p != null) {
131 avail = token_link(avail); /* and pop it off */
132 } else if (fix_mem_end < fix_mem_max) { /* or go into virgin territory */
133 incr(fix_mem_end);
134 p = fix_mem_end;
135 } else {
136 smemory_word *new_fixmem; /* the big dynamic storage area */
137 t = (fix_mem_max / 5);
138 new_fixmem =
139 fixmemcast(realloc
140 (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
141 if (new_fixmem == NULL) {
142 runaway(); /* if memory is exhausted, display possible runaway text */
143 overflow("token memory size", fix_mem_max);
144 } else {
145 fixmem = new_fixmem;
147 memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
148 fix_mem_max += t;
149 p = ++fix_mem_end;
151 token_link(p) = null; /* provide an oft-desired initialization of the new node */
152 incr(dyn_used); /* maintain statistics */
153 return (halfword) p;
156 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
157 that starts at position |p|.
158 @^inner loop@>
161 void flush_list(halfword p)
162 { /* makes list of single-word nodes available */
163 halfword q, r; /* list traversers */
164 if (p != null) {
165 r = p;
166 do {
167 q = r;
168 r = token_link(r);
169 decr(dyn_used);
170 } while (r != null); /* now |q| is the last node on the list */
171 token_link(q) = avail;
172 avail = p;
176 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
177 represented internally in one of two ways: (1)~A character whose ASCII code
178 number is |c| and whose command code is |m| is represented as the number
179 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
180 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
181 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
182 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
183 halfword.
185 A token |t| represents a |left_brace| command if and only if
186 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
187 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
188 |end_match| command if and only if |match_token<=t<=end_match_token|. The
189 following definitions take care of these token-oriented constants and a few
190 others.
192 @ A token list is a singly linked list of one-word nodes in |mem|, where each
193 word contains a token and a link. Macro definitions, output-routine definitions,
194 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
195 form of token lists, usually preceded by a node with a reference count in its
196 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
198 Three special commands appear in the token lists of macro definitions. When
199 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
200 when |m=end_match|, it means that parameter matching should end and \TeX\ should
201 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
202 insert parameter number |c| into the text at this point.
204 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
205 are omitted, but the final right brace of an output routine is included at the
206 end of its token list.
208 Here is an example macro definition that illustrates these conventions. After
209 \TeX\ processes the text
211 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
213 the definition of \.{\\mac} is represented as a token list containing
215 $$\def\,{\hskip2pt}
216 \vbox{\halign{\hfil#\hfil\cr
217 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
218 \.{\\b}, |end_match|,\cr
219 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
220 |other_char|\,\.1,\cr
221 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
223 The procedure |scan_toks| builds such token lists, and |macro_call| does the
224 parameter matching. @^reference counts@>
226 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
227 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
228 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
229 entry before the token list has been fully consumed, so we dare not simply
230 destroy a token list when its control sequence is being redefined.
232 If the parameter-matching part of a definition ends with `\.{\#\{}', the
233 corresponding token list will have `\.\{' just before the `|end_match|' and also
234 at the very end. The first `\.\{' is used to delimit the parameter; the second
235 one keeps the first from disappearing.
237 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
238 including the expansion of a macro or mark.
241 void print_meaning(void)
243 /* remap \mathchar onto \Umathchar */
245 if (cur_cmd == math_given_cmd) {
246 cur_cmd = xmath_given_cmd ;
249 print_cmd_chr((quarterword) cur_cmd, cur_chr);
250 if (cur_cmd >= call_cmd) {
251 print_char(':');
252 print_ln();
253 token_show(cur_chr);
254 } else {
255 /* Show the meaning of a mark node */
256 if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
257 print_char(':');
258 print_ln();
259 switch (cur_chr) {
260 case first_mark_code:
261 token_show(first_mark(0));
262 break;
263 case bot_mark_code:
264 token_show(bot_mark(0));
265 break;
266 case split_first_mark_code:
267 token_show(split_first_mark(0));
268 break;
269 case split_bot_mark_code:
270 token_show(split_bot_mark(0));
271 break;
272 default:
273 token_show(top_mark(0));
274 break;
280 @ The procedure |show_token_list|, which prints a symbolic form of the token list
281 that starts at a given node |p|, illustrates these conventions. The token list
282 being displayed should not begin with a reference count. However, the procedure
283 is intended to be robust, so that if the memory links are awry or if |p| is not
284 really a pointer to a token list, nothing catastrophic will happen.
286 An additional parameter |q| is also given; this parameter is either null or it
287 points to a node in the token list where a certain magic computation takes place
288 that will be explained later. (Basically, |q| is non-null when we are printing
289 the two-line context information at the time of an error message; |q| marks the
290 place corresponding to where the second line should begin.)
292 For example, if |p| points to the node containing the first \.a in the token list
293 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
294 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
295 second \.a, the magic computation will be performed just before the second \.a is
296 printed.
298 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
300 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
301 this cannot be confused with actual control sequences because a real control
302 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
305 #define not_so_bad(p) \
306 switch (m) { \
307 case assign_int_cmd: \
308 if (c >= (backend_int_base) && c <= (backend_int_last)) \
309 p("[internal backend integer]"); \
310 break; \
311 case assign_dimen_cmd: \
312 if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
313 p("[internal backend dimension]"); \
314 break; \
315 case assign_toks_cmd: \
316 if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
317 p("[internal backend tokenlist]"); \
318 break; \
319 default: \
320 p("BAD"); \
321 break; \
324 void show_token_list(int p, int q, int l)
326 int m, c; /* pieces of a token */
327 ASCII_code match_chr = '#'; /* character used in a `|match|' */
328 ASCII_code n = '0'; /* the highest parameter number, as an ASCII digit */
329 tally = 0;
330 if (l < 0)
331 l = 0x3FFFFFFF;
332 while ((p != null) && (tally < l)) {
333 if (p == q) {
334 /* Do magic computation */
335 set_trick_count();
337 /* Display token |p|, and |return| if there are problems */
338 if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
339 tprint_esc("CLOBBERED.");
340 return;
342 if (token_info(p) >= cs_token_flag) {
343 if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
344 print_cs(token_info(p) - cs_token_flag);
345 } else {
346 m = token_cmd(token_info(p));
347 c = token_chr(token_info(p));
348 if (token_info(p) < 0) {
349 tprint_esc("BAD");
350 } else {
352 Display the token $(|m|,|c|)$
354 The procedure usually ``learns'' the character code used for macro
355 parameters by seeing one in a |match| command before it runs into any
356 |out_param| commands.
358 switch (m) {
359 case left_brace_cmd:
360 case right_brace_cmd:
361 case math_shift_cmd:
362 case tab_mark_cmd:
363 case sup_mark_cmd:
364 case sub_mark_cmd:
365 case spacer_cmd:
366 case letter_cmd:
367 case other_char_cmd:
368 print(c);
369 break;
370 case mac_param_cmd:
371 if (!in_lua_escape && (is_in_csname==0))
372 print(c);
373 print(c);
374 break;
375 case out_param_cmd:
376 print(match_chr);
377 if (c <= 9) {
378 print_char(c + '0');
379 } else {
380 print_char('!');
381 return;
383 break;
384 case match_cmd:
385 match_chr = c;
386 print(c);
387 incr(n);
388 print_char(n);
389 if (n > '9')
390 return;
391 break;
392 case end_match_cmd:
393 if (c == 0)
394 tprint("->");
395 break;
396 default:
397 not_so_bad(tprint);
398 break;
402 p = token_link(p);
404 if (p != null)
405 tprint_esc("ETC.");
408 @ @c
409 #define do_buffer_to_unichar(a,b) do { \
410 a = (halfword)str2uni(buffer+b); \
411 b += utf8_size(a); \
412 } while (0)
414 @ Here's the way we sometimes want to display a token list, given a pointer to
415 its reference count; the pointer may be null.
418 void token_show(halfword p)
420 if (p != null)
421 show_token_list(token_link(p), null, 10000000);
424 @ |delete_token_ref|, is called when a pointer to a token list's reference count
425 is being removed. This means that the token list should disappear if the
426 reference count was |null|, otherwise the count should be decreased by one.
427 @^reference counts@>
429 @ |p| points to the reference count of a token list that is losing one
430 reference.
433 void delete_token_ref(halfword p)
435 if (token_ref_count(p) == 0)
436 flush_list(p);
437 else
438 decr(token_ref_count(p));
441 @ @c
442 int get_char_cat_code(int curchr)
444 int a;
445 do_get_cat_code(a,curchr);
446 return a;
449 @ @c
450 static void invalid_character_error(void)
452 const char *hlp[] = {
453 "A funny symbol that I can't read has just been input.",
454 "Continue, and I'll forget that it ever happened.",
455 NULL
457 deletions_allowed = false;
458 tex_error("Text line contains an invalid character", hlp);
459 deletions_allowed = true;
462 @ @c
463 static boolean process_sup_mark(void); /* below */
465 static int scan_control_sequence(void); /* below */
467 typedef enum {
468 next_line_ok,
469 next_line_return,
470 next_line_restart
471 } next_line_retval;
473 static next_line_retval next_line(void); /* below */
475 @ In case you are getting bored, here is a slightly less trivial routine: Given a
476 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
477 |scan_keyword| routine checks to see whether the next tokens of input match this
478 string. The match must be exact, except that uppercase letters will match their
479 lowercase counterparts; uppercase equivalents are determined by subtracting
480 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
481 only for its own limited set of keywords.
483 If a match is found, the characters are effectively removed from the input and
484 |true| is returned. Otherwise |false| is returned, and the input is left
485 essentially unchanged (except for the fact that some macros may have been
486 expanded, etc.). @^inner loop@>
489 boolean scan_keyword(const char *s)
490 { /* look for a given string */
491 halfword p; /* tail of the backup list */
492 halfword q; /* new node being added to the token list via |store_new_token| */
493 const char *k; /* index into |str_pool| */
494 halfword save_cur_cs = cur_cs;
495 if (strlen(s) == 0) /* was assert (strlen(s) > 1); */
496 return false ; /* but not with newtokenlib zero keyword simply doesn't match */
497 p = backup_head;
498 token_link(p) = null;
499 k = s;
500 while (*k) {
501 get_x_token(); /* recursion is possible here */
502 if ((cur_cs == 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
503 store_new_token(cur_tok);
504 k++;
505 } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
507 crashes on some alignments:
509 if (p != backup_head) {
510 q = get_avail();
511 token_info(q) = cur_tok;
512 token_link(q) = null;
513 token_link(p) = q;
514 begin_token_list(token_link(backup_head), backed_up);
515 } else {
516 back_input();
519 back_input();
520 if (p != backup_head) {
521 begin_token_list(token_link(backup_head), backed_up);
523 /* */
524 cur_cs = save_cur_cs;
525 return false;
528 if (token_link(backup_head) != null)
529 flush_list(token_link(backup_head));
530 cur_cs = save_cur_cs;
531 return true;
534 @ We can not return |undefined_control_sequence| under some conditions
535 (inside |shift_case|, for example). This needs thinking.
540 halfword active_to_cs(int curchr, int force)
542 halfword curcs;
543 char *a, *b;
544 char *utfbytes = xmalloc(8);
545 int nncs = no_new_control_sequence;
546 a = (char *) uni2str(0xFFFF);
547 utfbytes = strcpy(utfbytes, a);
548 if (force)
549 no_new_control_sequence = false;
550 if (curchr > 0) {
551 b = (char *) uni2str((unsigned) curchr);
552 utfbytes = strcat(utfbytes, b);
553 free(b);
554 curcs = string_lookup(utfbytes, strlen(utfbytes));
555 } else {
556 utfbytes[3] = '\0';
557 curcs = string_lookup(utfbytes, 4);
559 no_new_control_sequence = nncs;
560 free(a);
561 free(utfbytes);
562 return curcs;
566 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
568 halfword active_to_cs(int curchr, int force)
570 halfword curcs;
571 int nncs = no_new_control_sequence;
572 if (force) {
573 no_new_control_sequence = false;
575 if (curchr > 0) {
576 char *b = (char *) uni2str((unsigned) curchr);
577 char *utfbytes = xmalloc(8);
578 utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
579 utfbytes = strcat(utfbytes, b);
580 free(b);
581 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
582 free(utfbytes);
583 } else {
584 curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
586 no_new_control_sequence = nncs;
587 return curcs;
592 static unsigned char *uni2csstr(unsigned unic)
594 unsigned char *buf = xmalloc(8);
595 unsigned char *pt = buf;
596 *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
597 if (unic < 0x80)
598 *pt++ = (unsigned char) unic;
599 else if (unic < 0x800) {
600 *pt++ = (unsigned char) (0xc0 | (unic >> 6));
601 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
602 } else if (unic >= 0x110000) {
603 *pt++ = (unsigned char) (unic - 0x110000);
604 } else if (unic < 0x10000) {
605 *pt++ = (unsigned char) (0xe0 | (unic >> 12));
606 *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
607 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
608 } else {
609 int u, z, y, x;
610 unsigned val = unic - 0x10000;
611 u = (int) (((val & 0xf0000) >> 16) + 1);
612 z = (int) ((val & 0x0f000) >> 12);
613 y = (int) ((val & 0x00fc0) >> 6);
614 x = (int) (val & 0x0003f);
615 *pt++ = (unsigned char) (0xf0 | (u >> 2));
616 *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
617 *pt++ = (unsigned char) (0x80 | y);
618 *pt++ = (unsigned char) (0x80 | x);
620 *pt = '\0';
621 return buf;
624 halfword active_to_cs(int curchr, int force)
626 halfword curcs;
627 int nncs = no_new_control_sequence;
628 if (force) {
629 no_new_control_sequence = false;
631 if (curchr > 0) {
632 char * utfbytes = (char *) uni2csstr((unsigned) curchr);
633 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
634 free(utfbytes);
635 } else {
636 curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
638 no_new_control_sequence = nncs;
639 return curcs;
644 @ TODO this function should listen to \.{\\escapechar}
646 @ prints a control sequence
649 static char *cs_to_string(halfword p)
651 const char *s;
652 char *sh;
653 int k = 0;
654 static char ret[256] = { 0 };
655 if (p == 0 || p == null_cs) {
656 ret[k++] = '\\';
657 s = "csname";
658 while (*s) {
659 ret[k++] = *s++;
661 ret[k++] = '\\';
662 s = "endcsname";
663 while (*s) {
664 ret[k++] = *s++;
666 ret[k] = 0;
668 } else {
669 str_number txt = cs_text(p);
670 sh = makecstring(txt);
671 s = sh;
672 if (is_active_cs(txt)) {
673 s = s + 3;
674 while (*s) {
675 ret[k++] = *s++;
677 ret[k] = 0;
678 } else {
679 ret[k++] = '\\';
680 while (*s) {
681 ret[k++] = *s++;
683 ret[k] = 0;
685 free(sh);
687 return (char *) ret;
690 @ TODO this is a quick hack, will be solved differently soon
693 static char *cmd_chr_to_string(int cmd, int chr)
695 char *s;
696 str_number str;
697 int sel = selector;
698 selector = new_string;
699 print_cmd_chr((quarterword) cmd, chr);
700 str = make_string();
701 s = makecstring(str);
702 selector = sel;
703 flush_str(str);
704 return s;
707 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
708 develop in the next few sections of the program. Perhaps we shouldn't actually
709 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
710 reading the source files and gobbling them up. And it also helps \TeX\ to
711 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
713 The main duty of |get_next| is to input one token and to set |cur_cmd| and
714 |cur_chr| to that token's command code and modifier. Furthermore, if the input
715 token is a control sequence, the |eqtb| location of that control sequence is
716 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
718 Underlying this simple description is a certain amount of complexity because of
719 all the cases that need to be handled. However, the inner loop of |get_next| is
720 reasonably short and fast.
722 When |get_next| is asked to get the next token of a \.{\\read} line,
723 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
724 appear on that line. (There might not be any tokens at all, if the
725 |end_line_char| has |ignore| as its catcode.)
727 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
728 needed because a blank line of input is supposed to be exactly equivalent to the
729 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
730 line.
733 halfword par_loc; /* location of `\.{\\par}' in |eqtb| */
734 halfword par_token; /* token representing `\.{\\par}' */
736 @ Parts |get_next| are executed more often than any other instructions of \TeX.
737 @^mastication@>@^inner loop@>
739 The global variable |force_eof| is normally |false|; it is set |true| by an
740 \.{\\endinput} command. |luacstrings| is the number of lua print statements
741 waiting to be input, it is changed by |luatokencall|.
744 boolean force_eof; /* should the next \.{\\input} be aborted early? */
745 int luacstrings; /* how many lua strings are waiting to be input? */
747 @ If the user has set the |pausing| parameter to some positive value, and if
748 nonstop mode has not been selected, each line of input is displayed on the
749 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
750 response. If the response is simply |carriage_return|, the line is accepted as it
751 stands, otherwise the line typed is used instead of the line in the file.
754 void firm_up_the_line(void)
756 int k; /* an index into |buffer| */
757 ilimit = last;
758 if (pausing_par > 0) {
759 if (interaction > nonstop_mode) {
760 wake_up_terminal();
761 print_ln();
762 if (istart < ilimit) {
763 for (k = istart; k <= ilimit - 1; k++)
764 print_char(buffer[k]);
766 first = ilimit;
767 prompt_input("=>"); /* wait for user response */
768 if (last > first) {
769 for (k = first; k < +last - 1; k++) /* move line down in buffer */
770 buffer[k + istart - first] = buffer[k];
771 ilimit = istart + last - first;
777 @ Before getting into |get_next|, let's consider the subroutine that is called
778 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
779 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
780 the end of a file.
783 void check_outer_validity(void)
785 halfword p; /* points to inserted token list */
786 halfword q; /* auxiliary pointer */
787 if (suppress_outer_error_par)
788 return;
789 if (scanner_status != normal) {
790 deletions_allowed = false;
791 /* Back up an outer control sequence so that it can be reread; */
792 /* An outer control sequence that occurs in a \.{\\read} will not be reread,
793 since the error recovery for \.{\\read} is not very powerful. */
794 if (cur_cs != 0) {
795 if ((istate == token_list) || (iname < 1) || (iname > 17)) {
796 p = get_avail();
797 token_info(p) = cs_token_flag + cur_cs;
798 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
800 cur_cmd = spacer_cmd;
801 cur_chr = ' '; /* replace it by a space */
803 if (scanner_status > skipping) {
804 const char *errhlp[] = {
805 "I suspect you have forgotten a `}', causing me",
806 "to read past where you wanted me to stop.",
807 "I'll try to recover; but if the error is serious,",
808 "you'd better type `E' or `X' now and fix your file.",
809 NULL
811 char errmsg[256];
812 const char *startmsg;
813 const char *scannermsg;
814 /* Tell the user what has run away and try to recover */
815 runaway(); /* print a definition, argument, or preamble */
816 if (cur_cs == 0) {
817 startmsg = "File ended";
818 } else {
819 cur_cs = 0;
820 startmsg = "Forbidden control sequence found";
822 /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
823 and insert tokens that should lead to recovery; */
824 /* The recovery procedure can't be fully understood without knowing more
825 about the \TeX\ routines that should be aborted, but we can sketch the
826 ideas here: For a runaway definition we will insert a right brace; for a
827 runaway preamble, we will insert a special \.{\\cr} token and a right
828 brace; and for a runaway argument, we will set |long_state| to
829 |outer_call| and insert \.{\\par}. */
830 p = get_avail();
831 switch (scanner_status) {
832 case defining:
833 scannermsg = "definition";
834 token_info(p) = right_brace_token + '}';
835 break;
836 case matching:
837 scannermsg = "use";
838 token_info(p) = par_token;
839 long_state = outer_call_cmd;
840 break;
841 case aligning:
842 scannermsg = "preamble";
843 token_info(p) = right_brace_token + '}';
844 q = p;
845 p = get_avail();
846 token_link(p) = q;
847 token_info(p) = cs_token_flag + frozen_cr;
848 align_state = -1000000;
849 break;
850 case absorbing:
851 scannermsg = "text";
852 token_info(p) = right_brace_token + '}';
853 break;
854 default: /* can't happen */
855 scannermsg = "unknown";
856 break;
857 } /*there are no other cases */
858 begin_token_list(p, inserted);
859 snprintf(errmsg, 255, "%s while scanning %s of %s",
860 startmsg, scannermsg, cs_to_string(warning_index));
861 tex_error(errmsg, errhlp);
862 } else {
863 char errmsg[256];
864 const char *errhlp_no[] = {
865 "The file ended while I was skipping conditional text.",
866 "This kind of error happens when you say `\\if...' and forget",
867 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
868 NULL
870 const char *errhlp_cs[] = {
871 "A forbidden control sequence occurred in skipped text.",
872 "This kind of error happens when you say `\\if...' and forget",
873 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
874 NULL
876 const char **errhlp = (const char **) errhlp_no;
877 char *ss;
878 if (cur_cs != 0) {
879 errhlp = errhlp_cs;
880 cur_cs = 0;
882 ss = cmd_chr_to_string(if_test_cmd, cur_if);
883 snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
884 ss, (int) skip_line);
885 free(ss);
886 /* Incomplete \\if... */
887 cur_tok = cs_token_flag + frozen_fi;
888 /* back up one inserted token and call |error| */
890 OK_to_interrupt = false;
891 back_input();
892 token_type = inserted;
893 OK_to_interrupt = true;
894 tex_error(errmsg, errhlp);
897 deletions_allowed = true;
901 @ @c
903 #if 0
906 The other variant gives less clutter in tracing cache usage when profiling and for
907 some files (like the manual) also a bit of a speedup.
910 static boolean get_next_file(void)
912 SWITCH:
913 if (iloc <= ilimit) {
914 /* current line not yet finished */
915 do_buffer_to_unichar(cur_chr, iloc);
917 RESWITCH:
918 if (detokenized_line()) {
919 cur_cmd = (cur_chr == ' ' ? 10 : 12);
920 } else {
921 do_get_cat_code(cur_cmd, cur_chr);
924 Change state if necessary, and |goto switch| if the current
925 character should be ignored, or |goto reswitch| if the current
926 character changes to another;
928 The following 48-way switch accomplishes the scanning quickly, assuming
929 that a decent C compiler has translated the code. Note that the numeric
930 values for |mid_line|, |skip_blanks|, and |new_line| are spaced
931 apart from each other by |max_char_code+1|, so we can add a character's
932 command code to the state to get a single number that characterizes both.
934 Remark [ls/hh]: checking performance indicated that this switch was the
935 cause of many branch prediction errors but changing it to:
937 c = istate + cur_cmd;
938 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
939 return true;
940 } else if (c >= new_line) {
941 switch (c) {
943 } else if (c >= skip_blanks) {
944 switch (c) {
946 } else if (c >= mid_line) {
947 switch (c) {
949 } else {
950 istate = mid_line;
951 return true;
954 gives as many prediction errors. So, we can indeed assume that the compiler
955 does the right job, or that there is simply no other way.
958 switch (istate + cur_cmd) {
959 case mid_line + ignore_cmd:
960 case skip_blanks + ignore_cmd:
961 case new_line + ignore_cmd:
962 case skip_blanks + spacer_cmd:
963 case new_line + spacer_cmd:
964 /* Cases where character is ignored */
965 goto SWITCH;
966 break;
967 case mid_line + escape_cmd:
968 case new_line + escape_cmd:
969 case skip_blanks + escape_cmd:
970 /* Scan a control sequence ...; */
971 istate = (unsigned char) scan_control_sequence();
972 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
973 check_outer_validity();
974 break;
975 case mid_line + active_char_cmd:
976 case new_line + active_char_cmd:
977 case skip_blanks + active_char_cmd:
978 /* Process an active-character */
979 cur_cs = active_to_cs(cur_chr, false);
980 cur_cmd = eq_type(cur_cs);
981 cur_chr = equiv(cur_cs);
982 istate = mid_line;
983 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
984 check_outer_validity();
985 break;
986 case mid_line + sup_mark_cmd:
987 case new_line + sup_mark_cmd:
988 case skip_blanks + sup_mark_cmd:
989 /* If this |sup_mark| starts */
990 if (process_sup_mark())
991 goto RESWITCH;
992 else
993 istate = mid_line;
994 break;
995 case mid_line + invalid_char_cmd:
996 case new_line + invalid_char_cmd:
997 case skip_blanks + invalid_char_cmd:
998 /* Decry the invalid character and |goto restart|; */
999 invalid_character_error();
1000 return false; /* because state may be |token_list| now */
1001 break;
1002 case mid_line + spacer_cmd:
1003 /* Enter |skip_blanks| state, emit a space; */
1004 istate = skip_blanks;
1005 cur_chr = ' ';
1006 break;
1007 case mid_line + car_ret_cmd:
1009 Finish line, emit a space. When a character of type |spacer| gets through, its
1010 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1011 for tab and space, and for the space inserted at the end of a line, will be
1012 treated alike when macro parameters are being matched. We do this since such
1013 characters are indistinguishable on most computer terminal displays.
1015 iloc = ilimit + 1;
1016 cur_cmd = spacer_cmd;
1017 cur_chr = ' ';
1018 break;
1019 case skip_blanks + car_ret_cmd:
1020 case mid_line + comment_cmd:
1021 case new_line + comment_cmd:
1022 case skip_blanks + comment_cmd:
1023 /* Finish line, |goto switch|; */
1024 iloc = ilimit + 1;
1025 goto SWITCH;
1026 break;
1027 case new_line + car_ret_cmd:
1028 /* Finish line, emit a \.{\\par}; */
1029 iloc = ilimit + 1;
1030 cur_cs = par_loc;
1031 cur_cmd = eq_type(cur_cs);
1032 cur_chr = equiv(cur_cs);
1033 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1034 check_outer_validity();
1035 break;
1036 case skip_blanks + left_brace_cmd:
1037 case new_line + left_brace_cmd:
1038 istate = mid_line;
1039 /* fall through */
1040 case mid_line + left_brace_cmd:
1041 align_state++;
1042 break;
1043 case skip_blanks + right_brace_cmd:
1044 case new_line + right_brace_cmd:
1045 istate = mid_line;
1046 /* fall through */
1047 case mid_line + right_brace_cmd:
1048 align_state--;
1049 break;
1050 case mid_line + math_shift_cmd:
1051 case mid_line + tab_mark_cmd:
1052 case mid_line + mac_param_cmd:
1053 case mid_line + sub_mark_cmd:
1054 case mid_line + letter_cmd:
1055 case mid_line + other_char_cmd:
1056 break;
1058 case skip_blanks + math_shift:
1059 case skip_blanks + tab_mark:
1060 case skip_blanks + mac_param:
1061 case skip_blanks + sub_mark:
1062 case skip_blanks + letter:
1063 case skip_blanks + other_char:
1064 case new_line + math_shift:
1065 case new_line + tab_mark:
1066 case new_line + mac_param:
1067 case new_line + sub_mark:
1068 case new_line + letter:
1069 case new_line + other_char:
1071 default:
1072 istate = mid_line;
1073 break;
1075 } else {
1076 if (iname != 21)
1077 istate = new_line;
1079 Move to next line of file,
1080 or |goto restart| if there is no next line,
1081 or |return| if a \.{\\read} line has finished;
1083 do {
1084 next_line_retval r = next_line();
1085 if (r == next_line_return) {
1086 return true;
1087 } else if (r == next_line_restart) {
1088 return false;
1090 } while (0);
1091 check_interrupt();
1092 goto SWITCH;
1094 return true;
1097 #else
1099 /* 10 times less Bim in callgrind */
1102 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1103 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1104 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1105 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1108 static boolean get_next_file(void)
1110 int c = 0;
1111 SWITCH:
1112 if (iloc <= ilimit) {
1113 /* current line not yet finished */
1114 do_buffer_to_unichar(cur_chr, iloc);
1115 RESWITCH:
1116 if (detokenized_line()) {
1117 cur_cmd = (cur_chr == ' ' ? 10 : 12);
1118 } else {
1119 do_get_cat_code(cur_cmd, cur_chr);
1122 Change state if necessary, and |goto switch| if the current
1123 character should be ignored, or |goto reswitch| if the current
1124 character changes to another;
1126 c = istate + cur_cmd;
1127 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1128 return true;
1129 } else if (c >= new_line) {
1130 switch (c-new_line) {
1131 case escape_cmd:
1132 istate = (unsigned char) scan_control_sequence();
1133 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1134 check_outer_validity();
1135 return true;
1136 case left_brace_cmd:
1137 istate = mid_line;
1138 align_state++;
1139 return true;
1140 case right_brace_cmd:
1141 istate = mid_line;
1142 align_state--;
1143 return true;
1144 case math_shift_cmd:
1145 istate = mid_line;
1146 return true;
1147 case tab_mark_cmd:
1148 istate = mid_line;
1149 return true;
1150 case car_ret_cmd:
1151 /* Finish line, emit a \.{\\par}; */
1152 iloc = ilimit + 1;
1153 cur_cs = par_loc;
1154 cur_cmd = eq_type(cur_cs);
1155 cur_chr = equiv(cur_cs);
1156 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1157 check_outer_validity();
1158 return true;
1159 case mac_param_cmd:
1160 istate = mid_line;
1161 return true;
1162 case sup_mark_cmd:
1163 if (process_sup_mark())
1164 goto RESWITCH;
1165 else
1166 istate = mid_line;
1167 return true;
1168 case sub_mark_cmd:
1169 istate = mid_line;
1170 return true;
1171 case ignore_cmd:
1172 goto SWITCH;
1173 return true;
1174 case spacer_cmd:
1175 /* Cases where character is ignored */
1176 goto SWITCH;
1177 case letter_cmd:
1178 istate = mid_line;
1179 return true;
1180 case other_char_cmd:
1181 istate = mid_line;
1182 return true;
1183 case active_char_cmd:
1184 cur_cs = active_to_cs(cur_chr, false);
1185 cur_cmd = eq_type(cur_cs);
1186 cur_chr = equiv(cur_cs);
1187 istate = mid_line;
1188 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1189 check_outer_validity();
1190 return true;
1191 case comment_cmd:
1192 iloc = ilimit + 1;
1193 goto SWITCH;
1194 case invalid_char_cmd:
1195 invalid_character_error();
1196 return false; /* because state may be |token_list| now */
1197 default:
1198 istate = mid_line;
1199 return true;
1201 } else if (c >= skip_blanks) {
1202 switch (c-skip_blanks) {
1203 case escape_cmd:
1204 /* Scan a control sequence ...; */
1205 istate = (unsigned char) scan_control_sequence();
1206 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1207 check_outer_validity();
1208 return true;
1209 case left_brace_cmd:
1210 istate = mid_line;
1211 align_state++;
1212 return true;
1213 case right_brace_cmd:
1214 istate = mid_line;
1215 align_state--;
1216 return true;
1217 case math_shift_cmd:
1218 istate = mid_line;
1219 return true;
1220 case tab_mark_cmd:
1221 istate = mid_line;
1222 return true;
1223 case car_ret_cmd:
1224 iloc = ilimit + 1;
1225 goto SWITCH;
1226 case mac_param_cmd:
1227 istate = mid_line;
1228 return true;
1229 case sup_mark_cmd:
1230 /* If this |sup_mark| starts */
1231 if (process_sup_mark())
1232 goto RESWITCH;
1233 else
1234 istate = mid_line;
1235 return true;
1236 case sub_mark_cmd:
1237 istate = mid_line;
1238 return true;
1239 case ignore_cmd:
1240 goto SWITCH;
1241 case spacer_cmd:
1242 goto SWITCH;
1243 case letter_cmd:
1244 istate = mid_line;
1245 return true;
1246 case other_char_cmd:
1247 istate = mid_line;
1248 return true;
1249 case active_char_cmd:
1250 cur_cs = active_to_cs(cur_chr, false);
1251 cur_cmd = eq_type(cur_cs);
1252 cur_chr = equiv(cur_cs);
1253 istate = mid_line;
1254 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1255 check_outer_validity();
1256 return true;
1257 case comment_cmd:
1258 /* Finish line, |goto switch|; */
1259 iloc = ilimit + 1;
1260 goto SWITCH;
1261 case invalid_char_cmd:
1262 /* Decry the invalid character and |goto restart|; */
1263 invalid_character_error();
1264 return false; /* because state may be |token_list| now */
1265 default:
1266 istate = mid_line;
1267 return true;
1269 } else if (c >= mid_line) {
1270 switch (c-mid_line) {
1271 case escape_cmd:
1272 istate = (unsigned char) scan_control_sequence();
1273 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1274 check_outer_validity();
1275 return true;
1276 case left_brace_cmd:
1277 align_state++;
1278 return true;
1279 case right_brace_cmd:
1280 align_state--;
1281 return true;
1282 case math_shift_cmd:
1283 return true;
1284 case tab_mark_cmd:
1285 return true;
1286 case car_ret_cmd:
1288 Finish line, emit a space. When a character of type |spacer| gets through, its
1289 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1290 for tab and space, and for the space inserted at the end of a line, will be
1291 treated alike when macro parameters are being matched. We do this since such
1292 characters are indistinguishable on most computer terminal displays.
1294 iloc = ilimit + 1;
1295 cur_cmd = spacer_cmd;
1296 cur_chr = ' ';
1297 return true;
1298 case mac_param_cmd:
1299 return true;
1300 case sup_mark_cmd:
1301 if (process_sup_mark())
1302 goto RESWITCH;
1303 else
1304 istate = mid_line;
1305 return true;
1306 case sub_mark_cmd:
1307 return true;
1308 case ignore_cmd:
1309 goto SWITCH;
1310 case spacer_cmd:
1311 /* Enter |skip_blanks| state, emit a space; */
1312 istate = skip_blanks;
1313 cur_chr = ' ';
1314 return true;
1315 case letter_cmd:
1316 istate = mid_line;
1317 return true;
1318 case other_char_cmd:
1319 istate = mid_line;
1320 return true;
1321 case active_char_cmd:
1322 cur_cs = active_to_cs(cur_chr, false);
1323 cur_cmd = eq_type(cur_cs);
1324 cur_chr = equiv(cur_cs);
1325 istate = mid_line;
1326 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1327 check_outer_validity();
1328 return true;
1329 case comment_cmd:
1330 iloc = ilimit + 1;
1331 goto SWITCH;
1332 case invalid_char_cmd:
1333 invalid_character_error();
1334 return false; /* because state may be |token_list| now */
1335 default:
1336 istate = mid_line;
1337 return true;
1339 } else {
1340 istate = mid_line;
1341 return true;
1343 } else {
1344 if (iname != 21) {
1345 istate = new_line;
1348 Move to next line of file, or |goto restart| if there is no next line,
1349 or |return| if a \.{\\read} line has finished;
1351 do {
1352 next_line_retval r = next_line();
1353 if (r == next_line_return) {
1354 return true;
1355 } else if (r == next_line_restart) {
1356 return false;
1358 } while (0);
1359 check_interrupt();
1360 goto SWITCH;
1362 return true;
1365 #endif
1367 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1368 We only support a limited set:
1370 ^^^^^^XXXXXX
1371 ^^^^XXXXXX
1372 ^^XX ^^<char>
1376 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1378 #define add_nybble(c) \
1379 if (c<='9') { \
1380 cur_chr=(cur_chr<<4)+c-'0'; \
1381 } else { \
1382 cur_chr=(cur_chr<<4)+c-'a'+10; \
1385 #define set_nybble(c) \
1386 if (c<='9') { \
1387 cur_chr=c-'0'; \
1388 } else { \
1389 cur_chr=c-'a'+10; \
1392 #define one_hex_to_cur_chr(c1) \
1393 set_nybble(c1);
1395 #define two_hex_to_cur_chr(c1,c2) \
1396 set_nybble(c1); \
1397 add_nybble(c2);
1399 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1400 two_hex_to_cur_chr(c1,c2); \
1401 add_nybble(c3); \
1402 add_nybble(c4);
1404 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1405 four_hex_to_cur_chr(c1,c2,c3,c4); \
1406 add_nybble(c5); \
1407 add_nybble(c6);
1409 static boolean process_sup_mark(void)
1411 if (cur_chr == buffer[iloc]) {
1412 if (iloc < ilimit) {
1413 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1414 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1415 /* ^^^^^^XXXXXX */
1416 if ((iloc + 10) <= ilimit) {
1417 int c1 = buffer[iloc + 5];
1418 int c2 = buffer[iloc + 6];
1419 int c3 = buffer[iloc + 7];
1420 int c4 = buffer[iloc + 8];
1421 int c5 = buffer[iloc + 9];
1422 int c6 = buffer[iloc + 10];
1423 if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1424 is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1425 iloc = iloc + 11;
1426 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1427 return true;
1428 } else {
1429 tex_error("^^^^^^ needs six hex digits", NULL);
1431 } else {
1432 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1434 } else {
1435 /* ^^^^XXXX */
1436 if ((iloc + 6) <= ilimit) {
1437 int c1 = buffer[iloc + 3];
1438 int c2 = buffer[iloc + 4];
1439 int c3 = buffer[iloc + 5];
1440 int c4 = buffer[iloc + 6];
1441 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1442 iloc = iloc + 7;
1443 four_hex_to_cur_chr(c1,c2,c3,c4);
1444 return true;
1445 } else {
1446 tex_error("^^^^ needs four hex digits", NULL);
1448 } else {
1449 tex_error("^^^^ needs four hex digits, end of input", NULL);
1452 } else {
1453 /* ^^XX */
1454 if ((iloc + 2) <= ilimit) {
1455 int c1 = buffer[iloc + 1];
1456 int c2 = buffer[iloc + 2];
1457 if (is_hex(c1) && is_hex(c2)) {
1458 iloc = iloc + 3;
1459 two_hex_to_cur_chr(c1,c2);
1460 return true;
1463 /* go on, no error, good old tex */
1466 /* the rest */
1468 int c1 = buffer[iloc + 1];
1469 if (c1 < 0200) {
1470 iloc = iloc + 2;
1471 if (is_hex(c1) && (iloc <= ilimit)) {
1472 int c2 = buffer[iloc];
1473 if (is_hex(c2)) {
1474 incr(iloc);
1475 two_hex_to_cur_chr(c1,c2);
1476 return true;
1479 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1480 return true;
1484 return false;
1487 @ Control sequence names are scanned only when they appear in some line of a
1488 file; once they have been scanned the first time, their |eqtb| location serves as
1489 a unique identification, so \TeX\ doesn't need to refer to the original name any
1490 more except when it prints the equivalent in symbolic form.
1492 The program that scans a control sequence has been written carefully in order to
1493 avoid the blowups that might otherwise occur if a malicious user tried something
1494 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1495 it never looks at |buffer[ilimit+2]|.
1497 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1498 following a control sequence name, they are converted to single characters in the
1499 buffer and the process is repeated, slowly but surely.
1502 static boolean check_expanded_code(int *kk); /* below */
1504 static int scan_control_sequence(void)
1506 int retval = mid_line;
1507 if (iloc > ilimit) {
1508 cur_cs = null_cs; /* |state| is irrelevant in this case */
1509 } else {
1510 register int cat; /* |cat_code(cur_chr)|, usually */
1511 while (1) {
1512 int k = iloc;
1513 do_buffer_to_unichar(cur_chr, k);
1514 do_get_cat_code(cat, cur_chr);
1515 if (cat != letter_cmd || k > ilimit) {
1516 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1517 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1518 continue;
1519 } else {
1520 retval = skip_blanks;
1521 do {
1522 do_buffer_to_unichar(cur_chr, k);
1523 do_get_cat_code(cat, cur_chr);
1524 } while (cat == letter_cmd && k <= ilimit);
1526 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1527 continue;
1528 if (cat != letter_cmd) {
1529 /* backtrack one character which can be utf */
1531 decr(k);
1532 if (cur_chr > 0xFFFF)
1533 decr(k);
1534 if (cur_chr > 0x7FF)
1535 decr(k);
1536 if (cur_chr > 0x7F)
1537 decr(k);
1539 if (cur_chr <= 0x7F) {
1540 k -= 1; /* in most cases */
1541 } else if (cur_chr > 0xFFFF) {
1542 k -= 4;
1543 } else if (cur_chr > 0x7FF) {
1544 k -= 3;
1545 } else /* if (cur_chr > 0x7F) */ {
1546 k -= 2;
1548 /* now |k| points to first nonletter */
1551 cur_cs = id_lookup(iloc, k - iloc);
1552 iloc = k;
1553 break;
1556 cur_cmd = eq_type(cur_cs);
1557 cur_chr = equiv(cur_cs);
1558 return retval;
1561 @ Whenever we reach the following piece of code, we will have
1562 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1563 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1564 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1565 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1566 left two or three places.
1569 static boolean check_expanded_code(int *kk)
1571 int l;
1572 int k = *kk;
1573 int d = 1;
1574 if (buffer[k] == cur_chr && k < ilimit) {
1575 if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1576 if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1577 if ((k + 10) <= ilimit) {
1578 int c1 = buffer[k + 6 - 1];
1579 int c2 = buffer[k + 6];
1580 int c3 = buffer[k + 6 + 1];
1581 int c4 = buffer[k + 6 + 2];
1582 int c5 = buffer[k + 6 + 3];
1583 int c6 = buffer[k + 6 + 4];
1584 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1585 d = 6;
1586 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1587 } else {
1588 tex_error("^^^^^^ needs six hex digits", NULL);
1590 } else {
1591 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1593 } else {
1594 if ((k + 6) <= ilimit) {
1595 int c1 = buffer[k + 4 - 1];
1596 int c2 = buffer[k + 4];
1597 int c3 = buffer[k + 4 + 1];
1598 int c4 = buffer[k + 4 + 2];
1599 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1600 d = 4;
1601 four_hex_to_cur_chr(c1,c2,c3,c4);
1602 } else {
1603 tex_error("^^^^ needs four hex digits", NULL);
1605 } else {
1606 tex_error("^^^^ needs four hex digits, end of input", NULL);
1609 } else {
1610 int c1 = buffer[k + 1];
1611 if (c1 < 0200) {
1612 d = 1;
1613 if (is_hex(c1) && (k + 2) <= ilimit) {
1614 int c2 = buffer[k + 2];
1615 if (is_hex(c2)) {
1616 d = 2;
1617 two_hex_to_cur_chr(c1,c2);
1618 } else {
1619 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1621 } else {
1622 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1626 if (d > 2)
1627 d = 2 * d - 1;
1628 else
1629 d++;
1630 if (cur_chr <= 0x7F) {
1631 buffer[k - 1] = (packed_ASCII_code) cur_chr;
1632 } else if (cur_chr <= 0x7FF) {
1633 buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1634 k++;
1635 d--;
1636 buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1637 } else if (cur_chr <= 0xFFFF) {
1638 buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1639 k++;
1640 d--;
1641 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1642 k++;
1643 d--;
1644 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1645 } else {
1646 buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1647 k++;
1648 d--;
1649 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1650 k++;
1651 d--;
1652 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1653 k++;
1654 d--;
1655 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1657 l = k;
1658 ilimit = ilimit - d;
1659 while (l <= ilimit) {
1660 buffer[l] = buffer[l + d];
1661 l++;
1663 *kk = k;
1664 return true;
1666 return false;
1669 @ All of the easy branches of |get_next| have now been taken care of. There is
1670 one more branch.
1672 @c static next_line_retval next_line(void)
1674 boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1675 if (iname > 17) {
1676 /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1677 incr(line);
1678 first = istart;
1679 if (!force_eof) {
1680 if (iname <= 20) {
1681 if (pseudo_input()) { /* not end of file */
1682 firm_up_the_line(); /* this sets |ilimit| */
1683 line_catcode_table = DEFAULT_CAT_TABLE;
1684 if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1685 inhibit_eol = true;
1686 } else if ((every_eof_par != null) && !eof_seen[iindex]) {
1687 ilimit = first - 1;
1688 eof_seen[iindex] = true; /* fake one empty line */
1689 if (iname != 19)
1690 begin_token_list(every_eof_par, every_eof_text);
1691 return next_line_restart;
1692 } else {
1693 force_eof = true;
1695 } else {
1696 if (iname == 21) {
1697 if (luacstring_input()) { /* not end of strings */
1698 firm_up_the_line();
1699 line_catcode_table = (short) luacstring_cattable();
1700 line_partial = (signed char) luacstring_partial();
1701 if (luacstring_final_line() || line_partial
1702 || line_catcode_table == NO_CAT_TABLE)
1703 inhibit_eol = true;
1704 if (!line_partial)
1705 istate = new_line;
1706 } else {
1707 force_eof = true;
1709 } else {
1710 if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1711 firm_up_the_line(); /* this sets |ilimit| */
1712 line_catcode_table = DEFAULT_CAT_TABLE;
1713 } else if ((every_eof_par != null) && (!eof_seen[iindex])) {
1714 ilimit = first - 1;
1715 eof_seen[iindex] = true; /* fake one empty line */
1716 begin_token_list(every_eof_par, every_eof_text);
1717 return next_line_restart;
1718 } else {
1719 force_eof = true;
1724 if (force_eof) {
1725 if (tracing_nesting_par > 0)
1726 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1727 if (!((iname == 19) || (iname == 21))) {
1728 /* give warning for some unfinished groups and/or conditionals */
1729 file_warning();
1731 if ((iname > 21) || (iname == 20)) {
1732 report_stop_file(filetype_tex);
1733 decr(open_parens);
1735 force_eof = false;
1736 /* lua input or \.{\\scantextokens} */
1737 if (iname == 21 || iname == 19) {
1738 end_file_reading();
1739 } else {
1740 end_file_reading();
1741 if (! suppress_outer_error_par)
1742 check_outer_validity();
1744 return next_line_restart;
1746 if (inhibit_eol || end_line_char_inactive)
1747 ilimit--;
1748 else
1749 buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
1750 first = ilimit + 1;
1751 iloc = istart; /* ready to read */
1752 } else {
1753 if (!terminal_input) {
1754 /* \.{\\read} line has ended */
1755 cur_cmd = 0;
1756 cur_chr = 0;
1757 return next_line_return; /* OUTER */
1759 if (input_ptr > 0) {
1760 /* text was inserted during error recovery */
1761 end_file_reading();
1762 return next_line_restart; /* resume previous level */
1764 if (selector < log_only)
1765 open_log_file();
1766 if (interaction > nonstop_mode) {
1767 if (end_line_char_inactive)
1768 ilimit++;
1769 if (ilimit == istart) {
1770 /* previous line was empty */
1771 tprint_nl("(Please type a command or say `\\end')");
1773 print_ln();
1774 first = istart;
1775 prompt_input("*"); /* input on-line into |buffer| */
1776 ilimit = last;
1777 if (end_line_char_inactive)
1778 ilimit--;
1779 else
1780 buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
1781 first = ilimit + 1;
1782 iloc = istart;
1783 } else {
1785 Nonstop mode, which is intended for overnight batch processing,
1786 never waits for on-line input.
1788 fatal_error("*** (job aborted, no legal \\end found)");
1791 return next_line_ok;
1794 @ Let's consider now what happens when |get_next| is looking at a token list.
1797 static boolean get_next_tokenlist(void)
1799 register halfword t = token_info(iloc);
1800 iloc = token_link(iloc); /* move to next */
1801 if (t >= cs_token_flag) {
1802 /* a control sequence token */
1803 cur_cs = t - cs_token_flag;
1804 cur_cmd = eq_type(cur_cs);
1805 if (cur_cmd >= outer_call_cmd) {
1806 if (cur_cmd == dont_expand_cmd) {
1808 Get the next token, suppressing expansion. The present point in the program
1809 is reached only when the |expand| routine has inserted a special marker into
1810 the input. In this special case, |token_info(iloc)| is known to be a control
1811 sequence token, and |token_link(iloc)=null|.
1813 cur_cs = token_info(iloc) - cs_token_flag;
1814 iloc = null;
1815 cur_cmd = eq_type(cur_cs);
1816 if (cur_cmd > max_command_cmd) {
1817 cur_cmd = relax_cmd;
1818 cur_chr = no_expand_flag;
1819 return true;
1821 } else if (! suppress_outer_error_par) {
1822 check_outer_validity();
1825 cur_chr = equiv(cur_cs);
1826 } else {
1827 cur_cmd = token_cmd(t);
1828 cur_chr = token_chr(t);
1829 switch (cur_cmd) {
1830 case left_brace_cmd:
1831 align_state++;
1832 break;
1833 case right_brace_cmd:
1834 align_state--;
1835 break;
1836 case out_param_cmd:
1837 /* Insert macro parameter and |goto restart|; */
1838 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1839 return false;
1840 break;
1843 return true;
1846 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1847 routine are executed more often than any other instructions of \TeX.
1848 @^mastication@>@^inner loop@>
1850 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1853 void get_next(void)
1855 RESTART:
1856 cur_cs = 0;
1857 if (istate != token_list) {
1858 /* Input from external file, |goto restart| if no input found */
1859 if (!get_next_file())
1860 goto RESTART;
1861 } else {
1862 if (iloc == null) {
1863 end_token_list();
1864 goto RESTART; /* list exhausted, resume previous level */
1865 } else if (!get_next_tokenlist()) {
1866 goto RESTART; /* parameter needs to be expanded */
1869 /* If an alignment entry has just ended, take appropriate action */
1870 if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1871 insert_vj_template();
1872 goto RESTART;
1876 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1877 three related procedures that do a little more:
1879 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1880 |cur_tok|, a packed halfword version of the current token.
1882 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1883 but if the current token turns out to be a user-defined control sequence (i.e., a
1884 macro call), or a conditional, or something like \.{\\topmark} or
1885 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1886 the expansion of the macro or the evaluation of the conditional.
1888 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1889 has already been called.
1891 \yskip\noindent In fact, these three procedures account for almost every use of
1892 |get_next|.
1894 No new control sequences will be defined except during a call of |get_token|, or
1895 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1896 always |true| at other times.
1898 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1901 void get_token(void)
1903 no_new_control_sequence = false;
1904 get_next();
1905 no_new_control_sequence = true;
1906 if (cur_cs == 0)
1907 cur_tok = token_val(cur_cmd, cur_chr);
1908 else
1909 cur_tok = cs_token_flag + cur_cs;
1912 @ changes the string |s| to a token list
1915 halfword string_to_toks(const char *ss)
1917 halfword p; /* tail of the token list */
1918 halfword q; /* new node being added to the token list via |store_new_token| */
1919 halfword t; /* token being appended */
1920 const char *s = ss;
1921 const char *se = ss + strlen(s);
1922 p = temp_token_head;
1923 set_token_link(p, null);
1924 while (s < se) {
1925 t = (halfword) str2uni((const unsigned char *) s);
1926 s += utf8_size(t);
1927 if (t == ' ')
1928 t = space_token;
1929 else
1930 t = other_token + t;
1931 fast_store_new_token(t);
1933 return token_link(temp_token_head);
1936 @ The token lists for macros and for other things like \.{\\mark} and
1937 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1939 Before we get into the details of |scan_toks|, let's consider a much simpler
1940 task, that of converting the current string into a token list. The |str_toks|
1941 function does this; it classifies spaces as type |spacer| and everything else as
1942 type |other_char|.
1944 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1945 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1947 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1948 |lua| considers special while scanning a literal string
1950 @ changes the string |str_pool[b..pool_ptr]| to a token list
1953 halfword lua_str_toks(lstring b)
1955 halfword p; /* tail of the token list */
1956 halfword q; /* new node being added to the token list via |store_new_token| */
1957 halfword t; /* token being appended */
1958 unsigned char *k; /* index into string */
1959 p = temp_token_head;
1960 set_token_link(p, null);
1961 k = (unsigned char *) b.s;
1962 while (k < (unsigned char *) b.s + b.l) {
1963 t = pool_to_unichar(k);
1964 k += utf8_size(t);
1965 if (t == ' ') {
1966 t = space_token;
1967 } else {
1968 if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1969 fast_store_new_token(other_token + '\\');
1970 if (t == 10)
1971 t = 'n';
1972 if (t == 13)
1973 t = 'r';
1974 t = other_token + t;
1976 fast_store_new_token(t);
1978 return p;
1981 @ Incidentally, the main reason for wanting |str_toks| is the function
1982 |the_toks|, which has similar input/output characteristics.
1984 @ changes the string |str_pool[b..pool_ptr]| to a token list
1987 halfword str_toks(lstring s)
1989 halfword p; /* tail of the token list */
1990 halfword q; /* new node being added to the token list via |store_new_token| */
1991 halfword t; /* token being appended */
1992 unsigned char *k, *l; /* index into string */
1993 p = temp_token_head;
1994 set_token_link(p, null);
1995 k = s.s;
1996 l = k + s.l;
1997 while (k < l) {
1998 t = pool_to_unichar(k);
1999 k += utf8_size(t);
2000 if (t == ' ')
2001 t = space_token;
2002 else
2003 t = other_token + t;
2004 fast_store_new_token(t);
2006 return p;
2010 hh: most of the converter is similar to the one i made for macro so at some point i
2011 can make a helper; also todo: there is no need to go through the pool
2015 halfword str_scan_toks(int ct, lstring s)
2016 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2017 halfword p; /* tail of the token list */
2018 halfword q; /* new node being added to the token list via |store_new_token| */
2019 halfword t; /* token being appended */
2020 unsigned char *k, *l; /* index into string */
2021 int cc;
2022 p = temp_token_head;
2023 set_token_link(p, null);
2024 k = s.s;
2025 l = k + s.l;
2026 while (k < l) {
2027 t = pool_to_unichar(k);
2028 k += utf8_size(t);
2029 cc = get_cat_code(ct,t);
2030 if (cc == 0) {
2031 /* we have a potential control sequence so we check for it */
2032 int _lname = 0 ;
2033 int _s = 0 ;
2034 int _c = 0 ;
2035 halfword _cs = null ;
2036 unsigned char *_name = k ;
2037 while (k < l) {
2038 t = (halfword) str2uni((const unsigned char *) k);
2039 _s = utf8_size(t);
2040 _c = get_cat_code(ct,t);
2041 if (_c == 11) {
2042 k += _s ;
2043 _lname = _lname + _s ;
2044 } else if (_c == 10) {
2045 /* we ignore a trailing space like normal scanning does */
2046 k += _s ;
2047 break ;
2048 } else {
2049 break ;
2052 if (_s > 0) {
2053 /* we have a potential \cs */
2054 _cs = string_lookup((const char *) _name, _lname);
2055 if (_cs == undefined_control_sequence) {
2056 /* let's play safe and backtrack */
2057 t = cc * (1<<21) + t ;
2058 k = _name ;
2059 } else {
2060 t = cs_token_flag + _cs;
2062 } else {
2063 /* just a character with some meaning, so \unknown becomes effectively */
2064 /* \\unknown assuming that \\ has some useful meaning of course */
2065 t = cc * (1<<21) + t ;
2066 k = _name ;
2069 } else {
2070 /* whatever token, so for instance $x^2$ just works given a tex */
2071 /* catcode regime */
2072 t = cc * (1<<21) + t ;
2074 fast_store_new_token(t);
2077 return p;
2080 @ Here's part of the |expand| subroutine that we are now ready to complete:
2083 void ins_the_toks(void)
2085 (void) the_toks();
2086 ins_list(token_link(temp_token_head));
2089 #define set_toks_register(n,t,g) { \
2090 int a = (g>0) ? 4 : 0; \
2091 halfword ref = get_avail(); \
2092 set_token_ref_count(ref, 0); \
2093 set_token_link(ref, token_link(t)); \
2094 define(n + toks_base, call_cmd, ref); \
2097 void combine_the_toks(int how)
2099 halfword nt;
2100 get_x_token();
2101 /* target */
2102 if (cur_cmd == assign_toks_cmd) {
2103 nt = equiv(cur_cs) - toks_base;
2104 /* check range */
2105 } else {
2106 back_input();
2107 scan_int();
2108 nt = cur_val;
2110 /* source */
2111 do {
2112 get_x_token();
2113 } while (cur_cmd == spacer_cmd);
2114 if (cur_cmd == left_brace_cmd) {
2115 halfword x, source;
2116 back_input();
2117 x = scan_toks(false,how > 1); /* expanded or not */
2118 source = def_ref;
2119 /* action */
2120 if (source != null) {
2121 halfword target = toks(nt);
2122 if (target == null) {
2123 set_toks_register(nt,source,0);
2124 } else {
2125 halfword s = token_link(source);
2126 if (s != null) {
2127 halfword t = token_link(target);
2128 if (t == null) {
2129 /* can this happen ? */
2130 set_token_link(target, s);
2131 } else if (odd(how)) {
2132 /* prepend */
2133 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2134 halfword p = temp_token_head;
2135 halfword q;
2136 set_token_link(p, s); /* s = head, x = tail */
2137 p = x;
2138 while (t != null) {
2139 fast_store_new_token(token_info(t));
2140 t = token_link(t);
2142 set_toks_register(nt,temp_token_head,0);
2143 } else {
2144 set_token_link(x,t);
2145 set_token_link(target,s);
2147 } else {
2148 /* append */
2149 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2150 halfword p = temp_token_head;
2151 halfword q;
2152 set_token_link(p, null);
2153 while (t != null) {
2154 fast_store_new_token(token_info(t));
2155 t = token_link(t);
2157 set_token_link(p,s);
2158 set_toks_register(nt,temp_token_head,0);
2159 } else {
2160 while (token_link(t) != null) {
2161 t = token_link(t);
2163 set_token_link(t,s);
2169 } else {
2170 halfword source, ns;
2171 if (cur_cmd == assign_toks_cmd) {
2172 ns = equiv(cur_cs) - toks_base;
2173 /* check range */
2174 } else {
2175 back_input();
2176 scan_int();
2177 ns = cur_val;
2179 /* action */
2180 source = toks(ns);
2181 if (source != null) {
2182 halfword target = toks(nt);
2183 if (target == null) {
2184 equiv(toks_base+nt) = source;
2185 equiv(toks_base+ns) = null;
2186 } else {
2187 halfword s = token_link(source);
2188 if (s != null) {
2189 halfword t = token_link(target);
2190 if (t == null) {
2191 set_token_link(target, s);
2192 } else if (odd(how)) {
2193 /* prepend */
2194 halfword x = s;
2195 while (token_link(x) != null) {
2196 x = token_link(x);
2198 set_token_link(x,t);
2199 set_token_link(target,s);
2200 } else {
2201 /* append */
2202 while (token_link(t) != null) {
2203 t = token_link(t);
2205 set_token_link(t,s);
2207 equiv(toks_base+ns) = null;
2214 @ This routine, used in the next one, prints the job name, possibly modified by
2215 the |process_jobname| callback.
2218 static void print_job_name(void)
2220 if (job_name) {
2221 char *s, *ss; /* C strings for jobname before and after processing */
2222 int callback_id, lua_retval;
2223 s = (char*)str_string(job_name);
2224 callback_id = callback_defined(process_jobname_callback);
2225 if (callback_id > 0) {
2226 lua_retval = run_callback(callback_id, "S->S", s, &ss);
2227 if ((lua_retval == true) && (ss != NULL))
2228 s = ss;
2230 tprint(s);
2231 } else {
2232 print(job_name);
2236 @ Here is a routine that print the result of a convert command, using the
2237 argument |i|. It returns |false | if it does not know to print the code |c|. The
2238 function exists because lua code and tex code can both call it to convert
2239 something.
2241 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2242 get the next non-blank non-relax non-call token.
2246 int scan_lua_state(void)
2248 int sn = 0;
2249 do {
2250 get_x_token();
2251 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2252 back_input();
2253 if (cur_cmd != left_brace_cmd) {
2254 if (scan_keyword("name")) {
2255 (void) scan_toks(false, true);
2256 sn = def_ref;
2257 } else {
2258 scan_register_num();
2259 if (get_lua_name(cur_val))
2260 sn = (cur_val - 65536);
2263 return sn;
2266 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2267 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2268 to follow `\.{\\string}' and `\.{\\meaning}'.
2270 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2271 pending string in its output. In order to save such a pending string, we have to
2272 create a temporary string that is destroyed immediately after.
2275 #define push_selector { \
2276 old_setting = selector; \
2277 selector = new_string; \
2280 #define pop_selector { \
2281 selector = old_setting; \
2284 static int do_variable_dvi(halfword c)
2286 return 0;
2289 #define do_variable_backend_int(i) \
2290 cur_cmd = assign_int_cmd; \
2291 cur_val = backend_int_base + i; \
2292 cur_tok = token_val(cur_cmd, cur_val); \
2293 back_input();
2295 #define do_variable_backend_dimen(i) \
2296 cur_cmd = assign_dimen_cmd; \
2297 cur_val = backend_dimen_base + i; \
2298 cur_tok = token_val(cur_cmd, cur_val); \
2299 back_input();
2301 #define do_variable_backend_toks(i) \
2302 cur_cmd = assign_toks_cmd; \
2303 cur_val = backend_toks_base + i ; \
2304 cur_tok = token_val(cur_cmd, cur_val); \
2305 back_input();
2307 static int do_variable_pdf(halfword c)
2309 if (scan_keyword("compresslevel")) { do_variable_backend_int(c_pdf_compress_level); }
2310 else if (scan_keyword("decimaldigits")) { do_variable_backend_int(c_pdf_decimal_digits); }
2311 else if (scan_keyword("imageresolution")) { do_variable_backend_int(c_pdf_image_resolution); }
2312 else if (scan_keyword("pkresolution")) { do_variable_backend_int(c_pdf_pk_resolution); }
2313 else if (scan_keyword("uniqueresname")) { do_variable_backend_int(c_pdf_unique_resname); }
2314 else if (scan_keyword("minorversion")) { do_variable_backend_int(c_pdf_minor_version); }
2315 else if (scan_keyword("pagebox")) { do_variable_backend_int(c_pdf_pagebox); }
2316 else if (scan_keyword("inclusionerrorlevel")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2317 else if (scan_keyword("ignoreunknownimages")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2318 else if (scan_keyword("gamma")) { do_variable_backend_int(c_pdf_gamma); }
2319 else if (scan_keyword("imageapplygamma")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2320 else if (scan_keyword("imagegamma")) { do_variable_backend_int(c_pdf_image_gamma); }
2321 else if (scan_keyword("imagehicolor")) { do_variable_backend_int(c_pdf_image_hicolor); }
2322 else if (scan_keyword("imageaddfilename")) { do_variable_backend_int(c_pdf_image_addfilename); }
2323 else if (scan_keyword("objcompresslevel")) { do_variable_backend_int(c_pdf_obj_compress_level); }
2324 else if (scan_keyword("inclusioncopyfonts")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2325 else if (scan_keyword("gentounicode")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2326 else if (scan_keyword("pkfixeddpi")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2327 else if (scan_keyword("suppressoptionalinfo")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2328 else if (scan_keyword("omitcidset")) { do_variable_backend_int(c_pdf_omit_cidset); }
2330 else if (scan_keyword("horigin")) { do_variable_backend_dimen(d_pdf_h_origin); }
2331 else if (scan_keyword("vorigin")) { do_variable_backend_dimen(d_pdf_v_origin); }
2332 else if (scan_keyword("threadmargin")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2333 else if (scan_keyword("destmargin")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2334 else if (scan_keyword("linkmargin")) { do_variable_backend_dimen(d_pdf_link_margin); }
2335 else if (scan_keyword("xformmargin")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2337 else if (scan_keyword("pageattr")) { do_variable_backend_toks(t_pdf_page_attr); }
2338 else if (scan_keyword("pageresources")) { do_variable_backend_toks(t_pdf_page_resources); }
2339 else if (scan_keyword("pagesattr")) { do_variable_backend_toks(t_pdf_pages_attr); }
2340 else if (scan_keyword("xformattr")) { do_variable_backend_toks(t_pdf_xform_attr); }
2341 else if (scan_keyword("xformresources")) { do_variable_backend_toks(t_pdf_xform_resources); }
2342 else if (scan_keyword("pkmode")) { do_variable_backend_toks(t_pdf_pk_mode); }
2343 else if (scan_keyword("trailerid")) { do_variable_backend_toks(t_pdf_trailer_id); }
2345 else
2346 return 0;
2347 return 1;
2350 static int do_feedback_dvi(halfword c)
2352 return 0;
2355 /* codes not really needed but cleaner when testing */
2357 #define pdftex_version 140 /* these values will not change any more */
2358 #define pdftex_revision "0" /* these values will not change any more */
2360 static int do_feedback_pdf(halfword c)
2362 int old_setting; /* holds |selector| setting */
2363 int save_scanner_status; /* |scanner_status| upon entry */
2364 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2365 halfword save_warning_index;
2366 boolean bool; /* temp boolean */
2367 str_number s; /* first temp string */
2368 int ff; /* for use with |set_ff| */
2369 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2370 char *str; /* color stack init str */
2372 if (scan_keyword("lastlink")) {
2373 push_selector;
2374 print_int(pdf_last_link);
2375 pop_selector;
2376 } else if (scan_keyword("retval")) {
2377 push_selector;
2378 print_int(pdf_retval);
2379 pop_selector;
2380 } else if (scan_keyword("lastobj")) {
2381 push_selector;
2382 print_int(pdf_last_obj);
2383 pop_selector;
2384 } else if (scan_keyword("lastannot")) {
2385 push_selector;
2386 print_int(pdf_last_annot);
2387 pop_selector;
2388 } else if (scan_keyword("xformname")) {
2389 scan_int();
2390 check_obj_type(static_pdf, obj_type_xform, cur_val);
2391 push_selector;
2392 print_int(obj_info(static_pdf, cur_val));
2393 pop_selector;
2394 } else if (scan_keyword("creationdate")) {
2395 ins_list(string_to_toks(getcreationdate(static_pdf)));
2396 /* no further action */
2397 return 2;
2398 } else if (scan_keyword("fontname")) {
2399 scan_font_ident();
2400 if (cur_val == null_font)
2401 normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2402 pdf_check_vf(cur_val);
2403 if (!font_used(cur_val))
2404 pdf_init_font(static_pdf, cur_val);
2405 push_selector;
2406 set_ff(cur_val);
2407 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2408 pop_selector;
2409 } else if (scan_keyword("fontobjnum")) {
2410 scan_font_ident();
2411 if (cur_val == null_font)
2412 normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2413 pdf_check_vf(cur_val);
2414 if (!font_used(cur_val))
2415 pdf_init_font(static_pdf, cur_val);
2416 push_selector;
2417 set_ff(cur_val);
2418 print_int(pdf_font_num(ff));
2419 pop_selector;
2420 } else if (scan_keyword("fontsize")) {
2421 scan_font_ident();
2422 if (cur_val == null_font)
2423 normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2424 push_selector;
2425 print_scaled(font_size(cur_val));
2426 tprint("pt");
2427 pop_selector;
2428 } else if (scan_keyword("pageref")) {
2429 scan_int();
2430 if (cur_val <= 0)
2431 normal_error("pdf backend", "invalid page number when asking 'pageref'");
2432 push_selector;
2433 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2434 pop_selector;
2435 } else if (scan_keyword("colorstackinit")) {
2436 bool = scan_keyword("page");
2437 if (scan_keyword("direct"))
2438 cur_val = direct_always;
2439 else if (scan_keyword("page"))
2440 cur_val = direct_page;
2441 else if (scan_keyword("raw"))
2442 cur_val = direct_raw;
2443 else
2444 cur_val = set_origin;
2445 save_scanner_status = scanner_status;
2446 save_warning_index = warning_index;
2447 save_def_ref = def_ref;
2448 u = save_cur_string();
2449 scan_toks(false, true);
2450 s = tokens_to_string(def_ref);
2451 delete_token_ref(def_ref);
2452 def_ref = save_def_ref;
2453 warning_index = save_warning_index;
2454 scanner_status = save_scanner_status;
2455 str = makecstring(s);
2456 cur_val = newcolorstack(str, cur_val, bool);
2457 free(str);
2458 flush_str(s);
2459 cur_val_level = int_val_level;
2460 if (cur_val < 0) {
2461 print_err("Too many color stacks");
2462 help2("The number of color stacks is limited to 32768.",
2463 "I'll use the default color stack 0 here.");
2464 error();
2465 cur_val = 0;
2466 restore_cur_string(u);
2468 push_selector;
2469 print_int(cur_val);
2470 pop_selector;
2471 } else if (scan_keyword("version")) {
2472 push_selector;
2473 print_int(pdftex_version);
2474 pop_selector;
2475 } else if (scan_keyword("revision")) {
2476 ins_list(string_to_toks(pdftex_revision));
2477 return 2;
2478 } else {
2479 return 0;
2481 return 1;
2484 void conv_toks(void)
2486 int old_setting; /* holds |selector| setting */
2487 halfword p, q;
2488 int save_scanner_status; /* |scanner_status| upon entry */
2489 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2490 halfword save_warning_index;
2491 boolean bool; /* temp boolean */
2492 str_number s; /* first temp string */
2493 int sn; /* lua chunk name */
2494 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2495 int c = cur_chr; /* desired type of conversion */
2496 str_number str;
2497 int i = 0;
2498 /* Scan the argument for command |c| */
2499 switch (c) {
2500 case number_code:
2501 scan_int();
2502 push_selector;
2503 print_int(cur_val);
2504 pop_selector;
2505 break;
2506 case lua_function_code:
2507 scan_int();
2508 if (cur_val <= 0) {
2509 normal_error("luafunction", "invalid number");
2510 } else {
2511 u = save_cur_string();
2512 luacstrings = 0;
2513 luafunctioncall(cur_val);
2514 restore_cur_string(u);
2515 if (luacstrings > 0)
2516 lua_string_start();
2518 /* no further action */
2519 return;
2520 break;
2521 case lua_code:
2522 u = save_cur_string();
2523 save_scanner_status = scanner_status;
2524 save_def_ref = def_ref;
2525 save_warning_index = warning_index;
2526 sn = scan_lua_state();
2527 scan_toks(false, true);
2528 s = def_ref;
2529 warning_index = save_warning_index;
2530 def_ref = save_def_ref;
2531 scanner_status = save_scanner_status;
2532 luacstrings = 0;
2533 luatokencall(s, sn);
2534 delete_token_ref(s);
2535 restore_cur_string(u); /* TODO: check this, was different */
2536 if (luacstrings > 0)
2537 lua_string_start();
2538 /* no further action */
2539 return;
2540 break;
2541 case expanded_code:
2542 save_scanner_status = scanner_status;
2543 save_warning_index = warning_index;
2544 save_def_ref = def_ref;
2545 u = save_cur_string();
2546 scan_toks(false, true);
2547 warning_index = save_warning_index;
2548 scanner_status = save_scanner_status;
2549 ins_list(token_link(def_ref));
2550 def_ref = save_def_ref;
2551 restore_cur_string(u);
2552 /* no further action */
2553 return;
2554 break;
2555 case math_style_code:
2556 push_selector;
2557 print_math_style();
2558 pop_selector;
2559 break;
2560 case string_code:
2561 save_scanner_status = scanner_status;
2562 scanner_status = normal;
2563 get_token();
2564 scanner_status = save_scanner_status;
2565 push_selector;
2566 if (cur_cs != 0)
2567 sprint_cs(cur_cs);
2568 else
2569 print(cur_chr);
2570 pop_selector;
2571 break;
2572 case cs_string_code:
2573 save_scanner_status = scanner_status;
2574 scanner_status = normal;
2575 get_token();
2576 scanner_status = save_scanner_status;
2577 push_selector;
2578 if (cur_cs != 0)
2579 sprint_cs_name(cur_cs);
2580 else
2581 print(cur_chr);
2582 pop_selector;
2583 break;
2584 case roman_numeral_code:
2585 scan_int();
2586 push_selector;
2587 print_roman_int(cur_val);
2588 pop_selector;
2589 break;
2590 case meaning_code:
2591 save_scanner_status = scanner_status;
2592 scanner_status = normal;
2593 get_token();
2594 scanner_status = save_scanner_status;
2595 push_selector;
2596 print_meaning();
2597 pop_selector;
2598 break;
2599 case uchar_code:
2600 scan_char_num();
2601 push_selector;
2602 print(cur_val);
2603 pop_selector;
2604 break;
2605 case lua_escape_string_code:
2607 lstring escstr;
2608 int l = 0;
2609 save_scanner_status = scanner_status;
2610 save_def_ref = def_ref;
2611 save_warning_index = warning_index;
2612 scan_toks(false, true);
2613 bool = in_lua_escape;
2614 in_lua_escape = true;
2615 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2616 escstr.l = (unsigned) l;
2617 in_lua_escape = bool;
2618 delete_token_ref(def_ref);
2619 def_ref = save_def_ref;
2620 warning_index = save_warning_index;
2621 scanner_status = save_scanner_status;
2622 (void) lua_str_toks(escstr);
2623 ins_list(token_link(temp_token_head));
2624 free(escstr.s);
2625 return;
2627 /* no further action */
2628 break;
2629 case font_id_code:
2630 scan_font_ident();
2631 push_selector;
2632 print_int(cur_val);
2633 pop_selector;
2634 break;
2635 case font_name_code:
2636 scan_font_ident();
2637 push_selector;
2638 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2639 if (font_size(cur_val) != font_dsize(cur_val)) {
2640 tprint(" at ");
2641 print_scaled(font_size(cur_val));
2642 tprint("pt");
2644 pop_selector;
2645 break;
2646 case left_margin_kern_code:
2647 scan_int();
2648 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2649 normal_error("marginkern", "a non-empty hbox expected");
2650 push_selector;
2651 p = list_ptr(box(cur_val));
2652 while ((p != null) && (type(p) == glue_node)) {
2653 p = vlink(p);
2655 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2656 print_scaled(width(p));
2657 else
2658 print_char('0');
2659 tprint("pt");
2660 pop_selector;
2661 break;
2662 case right_margin_kern_code:
2663 scan_int();
2664 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2665 normal_error("marginkern", "a non-empty hbox expected");
2666 push_selector;
2667 p = list_ptr(box(cur_val));
2668 if (p != null) {
2669 p = tail_of_list(p);
2671 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2672 node that points to glue spec ... and we don't want to analyze that messy lot
2674 while ((p != null) && (type(p) == glue_node)) {
2675 p = alink(p);
2677 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2678 if (type(p) == disc_node) {
2679 q = alink(p);
2680 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2681 p = q;
2682 } else {
2684 officially we should look in the replace but currently protrusion doesn't
2685 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2686 don't need it now
2692 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2693 print_scaled(width(p));
2694 else
2695 print_char('0');
2696 tprint("pt");
2697 pop_selector;
2698 break;
2699 case uniform_deviate_code:
2700 scan_int();
2701 push_selector;
2702 print_int(unif_rand(cur_val));
2703 pop_selector;
2704 break;
2705 case normal_deviate_code:
2706 push_selector;
2707 print_int(norm_rand());
2708 pop_selector;
2709 break;
2710 case math_char_class_code:
2712 mathcodeval mval;
2713 scan_int();
2714 mval = get_math_code(cur_val);
2715 push_selector;
2716 print_int(mval.class_value);
2717 pop_selector;
2719 break;
2720 case math_char_fam_code:
2722 mathcodeval mval;
2723 scan_int();
2724 mval = get_math_code(cur_val);
2725 push_selector;
2726 print_int(mval.family_value);
2727 pop_selector;
2729 break;
2730 case math_char_slot_code:
2732 mathcodeval mval;
2733 scan_int();
2734 mval = get_math_code(cur_val);
2735 push_selector;
2736 print_int(mval.character_value);
2737 pop_selector;
2739 break;
2740 case insert_ht_code:
2741 scan_register_num();
2742 push_selector;
2743 i = cur_val;
2744 p = page_ins_head;
2745 while (i >= subtype(vlink(p)))
2746 p = vlink(p);
2747 if (subtype(p) == i)
2748 print_scaled(height(p));
2749 else
2750 print_char('0');
2751 tprint("pt");
2752 pop_selector;
2753 break;
2754 case job_name_code:
2755 if (job_name == 0)
2756 open_log_file();
2757 push_selector;
2758 print_job_name();
2759 pop_selector;
2760 break;
2761 case format_name_code:
2762 if (job_name == 0)
2763 open_log_file();
2764 push_selector;
2765 print(format_name);
2766 pop_selector;
2767 break;
2768 case luatex_banner_code:
2769 push_selector;
2770 tprint(luatex_banner);
2771 pop_selector;
2772 break;
2773 case luatex_revision_code:
2774 push_selector;
2775 print(get_luatexrevision());
2776 pop_selector;
2777 break;
2778 case luatex_date_code:
2779 push_selector;
2780 print_int(get_luatex_date_info());
2781 pop_selector;
2782 break;
2783 case etex_code:
2784 push_selector;
2785 tprint(eTeX_version_string);
2786 pop_selector;
2787 break;
2788 case eTeX_revision_code:
2789 push_selector;
2790 tprint(eTeX_revision);
2791 pop_selector;
2792 break;
2793 case font_identifier_code:
2794 confusion("convert");
2795 break;
2796 default:
2797 confusion("convert");
2798 break;
2800 str = make_string();
2801 (void) str_toks(str_lstring(str));
2802 flush_str(str);
2803 ins_list(token_link(temp_token_head));
2806 void do_feedback(void)
2808 int c = cur_chr;
2809 str_number str;
2810 int done = 1;
2811 switch (c) {
2812 case dvi_feedback_code:
2813 if (get_o_mode() == OMODE_DVI) {
2814 done = do_feedback_dvi(c);
2815 } else {
2816 tex_error("unexpected use of \\dvifeedback",null);
2817 return ;
2819 if (done==0) {
2820 /* we recover */
2821 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2822 return;
2823 } else if (done==2) {
2824 return;
2826 break;
2827 case pdf_feedback_code:
2828 if (get_o_mode() == OMODE_PDF) {
2829 done = do_feedback_pdf(c);
2830 } else {
2831 tex_error("unexpected use of \\pdffeedback",null);
2832 return ;
2834 if (done==0) {
2835 /* we recover */
2836 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2837 return;
2838 } else if (done==2) {
2839 return;
2841 break;
2842 default:
2843 confusion("feedback");
2844 break;
2846 str = make_string();
2847 (void) str_toks(str_lstring(str));
2848 flush_str(str);
2849 ins_list(token_link(temp_token_head));
2852 void do_variable(void)
2854 int c = cur_chr;
2855 int done = 1;
2856 switch (c) {
2857 case dvi_variable_code:
2858 done = do_variable_dvi(c);
2859 if (done==0) {
2860 /* we recover */
2861 normal_warning("dvi backend","unexpected use of \\dvivariable");
2863 return;
2864 break;
2865 case pdf_variable_code:
2866 done = do_variable_pdf(c);
2867 if (done==0) {
2868 /* we recover */
2869 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2871 return;
2872 break;
2873 default:
2874 confusion("variable");
2875 break;
2880 The following code is not used as we can only set math options and not query them. If
2881 an option is really important we will provide a proper variable. Most options are not
2882 meant for users anyway but for development.
2887 #define do_mathoption_int(i) \
2888 cur_cmd = assign_int_cmd; \
2889 cur_val = mathoption_int_base + i; \
2890 cur_tok = token_val(cur_cmd, cur_val); \
2891 back_input();
2893 void do_mathoption(void)
2895 if (scan_keyword("old")) { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2896 if (scan_keyword("noitaliccompensation")) { do_mathoption_int(c_mathoption_no_char_italic_code); }
2897 else if (scan_keyword("nocharitalic")) { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2898 else if (scan_keyword("useoldfractionscaling")) { do_mathoption_int(c_mathoption_old_code); }
2899 else if (scan_keyword("umathcodemeaning")) { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2904 @ This boolean is keeping track of the lua string escape state
2906 boolean in_lua_escape;
2908 static int the_convert_string_dvi(halfword c, int i)
2910 return 0 ;
2913 static int the_convert_string_pdf(halfword c, int i)
2915 int ff;
2916 if (get_o_mode() != OMODE_PDF) {
2917 return 0;
2918 } else if (scan_keyword("lastlink")) {
2919 print_int(pdf_last_link);
2920 } else if (scan_keyword("retval")) {
2921 print_int(pdf_retval);
2922 } else if (scan_keyword("lastobj")) {
2923 print_int(pdf_last_obj);
2924 } else if (scan_keyword("lastannot")) {
2925 print_int(pdf_last_annot);
2926 } else if (scan_keyword("xformname")) {
2927 print_int(obj_info(static_pdf, i));
2928 } else if (scan_keyword("creationdate")) {
2929 return 0;
2930 } else if (scan_keyword("fontname")) {
2931 set_ff(i);
2932 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2933 } else if (scan_keyword("fontobjnum")) {
2934 set_ff(i);
2935 print_int(pdf_font_num(ff));
2936 } else if (scan_keyword("fontsize")) {
2937 print_scaled(font_size(i));
2938 tprint("pt");
2939 } else if (scan_keyword("pageref")) {
2940 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2941 } else if (scan_keyword("colorstackinit")) {
2942 return 0;
2943 } else {
2944 return 0;
2946 return 1;
2949 str_number the_convert_string(halfword c, int i)
2951 int old_setting; /* saved |selector| setting */
2952 str_number ret = 0;
2953 boolean done = true ;
2954 old_setting = selector;
2955 selector = new_string;
2956 switch (c) {
2957 case number_code:
2958 print_int(i);
2959 break;
2960 /* case lua_function_code: */
2961 /* case lua_code: */
2962 /* case expanded_code: */
2963 case math_style_code:
2964 print_math_style();
2965 break;
2966 /* case string_code: */
2967 /* case cs_string_code: */
2968 case roman_numeral_code:
2969 print_roman_int(i);
2970 break;
2971 /* case meaning_code: */
2972 case uchar_code:
2973 print(i);
2974 break;
2975 /* lua_escape_string_code: */
2976 case font_id_code:
2977 print_int(i);
2978 break;
2979 case font_name_code:
2980 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2981 if (font_size(i) != font_dsize(i)) {
2982 tprint(" at ");
2983 print_scaled(font_size(i));
2984 tprint("pt");
2986 break;
2987 /* left_margin_kern_code: */
2988 /* right_margin_kern_code: */
2989 case uniform_deviate_code:
2990 print_int(unif_rand(i));
2991 break;
2992 case normal_deviate_code:
2993 print_int(norm_rand());
2994 break;
2995 /* math_char_class_code: */
2996 /* math_char_fam_code: */
2997 /* math_char_slot_code: */
2998 /* insert_ht_code: */
2999 case job_name_code:
3000 print_job_name();
3001 break;
3002 case format_name_code:
3003 print(format_name);
3004 break;
3005 case luatex_banner_code:
3006 tprint(luatex_banner);
3007 break;
3008 case luatex_revision_code:
3009 print(get_luatexrevision());
3010 break;
3011 case luatex_date_code:
3012 print_int(get_luatex_date_info());
3013 break;
3014 case etex_code:
3015 tprint(eTeX_version_string);
3016 break;
3017 case eTeX_revision_code:
3018 tprint(eTeX_revision);
3019 break;
3020 case font_identifier_code:
3021 print_font_identifier(i);
3022 break;
3023 /* backend: this might become obsolete */
3024 case dvi_feedback_code:
3025 done = the_convert_string_dvi(c,i);
3026 break;
3027 case pdf_feedback_code:
3028 done = the_convert_string_pdf(c,i);
3029 break;
3030 /* done */
3031 default:
3032 done = false;
3033 break;
3035 if (done) {
3036 ret = make_string();
3038 selector = old_setting;
3039 return ret;
3042 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3043 files potentially usable for reading appear in the following global variables.
3044 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3045 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3046 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3047 line.
3050 FILE *read_file[16]; /* used for \.{\\read} */
3051 int read_open[17]; /* state of |read_file[n]| */
3053 void initialize_read(void)
3055 int k;
3056 for (k = 0; k <= 16; k++)
3057 read_open[k] = closed;
3060 @ The |read_toks| procedure constructs a token list like that for any macro
3061 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3062 sequence that will receive this token list.
3065 void read_toks(int n, halfword r, halfword j)
3067 halfword p; /* tail of the token list */
3068 halfword q; /* new node being added to the token list via |store_new_token| */
3069 int s; /* saved value of |align_state| */
3070 int m; /* stream number */
3071 scanner_status = defining;
3072 warning_index = r;
3073 p = get_avail();
3074 def_ref = p;
3075 set_token_ref_count(def_ref, 0);
3076 p = def_ref; /* the reference count */
3077 store_new_token(end_match_token);
3078 if ((n < 0) || (n > 15))
3079 m = 16;
3080 else
3081 m = n;
3082 s = align_state;
3083 align_state = 1000000; /* disable tab marks, etc. */
3084 do {
3085 /* Input and store tokens from the next line of the file */
3086 begin_file_reading();
3087 iname = m + 1;
3088 if (read_open[m] == closed) {
3090 Input for \.{\\read} from the terminal
3092 Here we input on-line into the |buffer| array, prompting the user explicitly
3093 if |n>=0|. The value of |n| is set negative so that additional prompts
3094 will not be given in the case of multi-line input.
3096 if (interaction > nonstop_mode) {
3097 if (n < 0) {
3098 prompt_input("");
3099 } else {
3100 wake_up_terminal();
3101 print_ln();
3102 sprint_cs(r);
3103 prompt_input(" =");
3104 n = -1;
3106 } else {
3107 fatal_error
3108 ("*** (cannot \\read from terminal in nonstop modes)");
3111 } else if (read_open[m] == just_open) {
3113 Input the first line of |read_file[m]|
3115 The first line of a file must be treated specially, since |lua_input_ln|
3116 must be told not to start with |get|.
3118 if (lua_input_ln(read_file[m], (m + 1), false)) {
3119 read_open[m] = normal;
3120 } else {
3121 lua_a_close_in(read_file[m], (m + 1));
3122 read_open[m] = closed;
3125 } else {
3127 Input the next line of |read_file[m]|
3129 An empty line is appended at the end of a |read_file|.
3131 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3132 lua_a_close_in(read_file[m], (m + 1));
3133 read_open[m] = closed;
3134 if (align_state != 1000000) {
3135 runaway();
3136 print_err("File ended within \\read");
3137 help1("This \\read has unbalanced braces.");
3138 align_state = 1000000;
3139 error();
3144 ilimit = last;
3145 if (end_line_char_inactive)
3146 decr(ilimit);
3147 else
3148 buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
3149 first = ilimit + 1;
3150 iloc = istart;
3151 istate = new_line;
3152 /* Handle \.{\\readline} and |goto done|; */
3153 if (j == 1) {
3154 while (iloc <= ilimit) {
3155 /* current line not yet finished */
3156 do_buffer_to_unichar(cur_chr, iloc);
3157 if (cur_chr == ' ')
3158 cur_tok = space_token;
3159 else
3160 cur_tok = cur_chr + other_token;
3161 store_new_token(cur_tok);
3163 } else {
3164 while (1) {
3165 get_token();
3166 if (cur_tok == 0) {
3167 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3168 break;
3170 if (align_state < 1000000) {
3171 /* unmatched `\.\}' aborts the line */
3172 do {
3173 get_token();
3174 } while (cur_tok != 0);
3175 align_state = 1000000;
3176 break;
3178 store_new_token(cur_tok);
3181 end_file_reading();
3183 } while (align_state != 1000000);
3184 cur_val = def_ref;
3185 scanner_status = normal;
3186 align_state = s;
3189 @ return a string from tokens list
3192 str_number tokens_to_string(halfword p)
3194 int old_setting;
3195 if (selector == new_string)
3196 normal_error("tokens","tokens_to_string() called while selector = new_string");
3197 old_setting = selector;
3198 selector = new_string;
3199 show_token_list(token_link(p), null, -1);
3200 selector = old_setting;
3201 return make_string();
3204 @ @c
3205 #define make_room(a) \
3206 if ((unsigned)i+a+1>alloci) { \
3207 ret = xrealloc(ret,(alloci+64)); \
3208 alloci = alloci + 64; \
3211 #define append_i_byte(a) ret[i++] = (char)(a)
3213 #define Print_char(a) make_room(1); append_i_byte(a)
3215 #define Print_uchar(s) { \
3216 make_room(4); \
3217 if (s<=0x7F) { \
3218 append_i_byte(s); \
3219 } else if (s<=0x7FF) { \
3220 append_i_byte(0xC0 + (s / 0x40)); \
3221 append_i_byte(0x80 + (s % 0x40)); \
3222 } else if (s<=0xFFFF) { \
3223 append_i_byte(0xE0 + (s / 0x1000)); \
3224 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3225 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3226 } else if (s>=0x110000) { \
3227 append_i_byte(s-0x11000); \
3228 } else { \
3229 append_i_byte(0xF0 + (s / 0x40000)); \
3230 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3231 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3232 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3235 #define Print_esc(b) { \
3236 const char *v = b; \
3237 if (e>0 && e<STRING_OFFSET) { \
3238 Print_uchar (e); \
3240 make_room(strlen(v)); \
3241 while (*v) { append_i_byte(*v); v++; } \
3244 #define Print_str(b) { \
3245 const char *v = b; \
3246 make_room(strlen(v)); \
3247 while (*v) { append_i_byte(*v); v++; } \
3250 #define is_cat_letter(a) \
3251 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3253 @ the actual token conversion in this function is now functionally equivalent to
3254 |show_token_list|, except that it always prints the whole token list. TODO: check
3255 whether this causes problems in the lua library.
3258 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3260 register int p, c, m;
3261 int q;
3262 int infop;
3263 char *s, *sh;
3264 int e = 0;
3265 char *ret;
3266 int match_chr = '#';
3267 int n = '0';
3268 unsigned alloci = 1024;
3269 int i = 0;
3270 p = pp;
3271 if (p == null) {
3272 if (siz != NULL)
3273 *siz = 0;
3274 return NULL;
3276 ret = xmalloc(alloci);
3277 p = token_link(p); /* skip refcount */
3278 if (p != null) {
3279 e = escape_char_par;
3281 while (p != null) {
3282 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3283 Print_esc("CLOBBERED.");
3284 break;
3286 infop = token_info(p);
3287 if (infop >= cs_token_flag) {
3288 if (!(inhibit_par && infop == par_token)) {
3289 q = infop - cs_token_flag;
3290 if (q < hash_base) {
3291 if (q == null_cs) {
3292 Print_esc("csname");
3293 Print_esc("endcsname");
3294 } else {
3295 Print_esc("IMPOSSIBLE.");
3297 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3298 Print_esc("IMPOSSIBLE.");
3299 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3300 Print_esc("NONEXISTENT.");
3301 } else {
3302 str_number txt = cs_text(q);
3303 sh = makecstring(txt);
3304 s = sh;
3305 if (is_active_cs(txt)) {
3306 s = s + 3;
3307 while (*s) {
3308 Print_char(*s);
3309 s++;
3311 } else {
3312 if (e>=0 && e<0x110000) Print_uchar(e);
3313 while (*s) {
3314 Print_char(*s);
3315 s++;
3317 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3318 Print_char(' ');
3321 free(sh);
3324 } else {
3325 if (infop < 0) {
3326 Print_esc("BAD");
3327 } else {
3328 m = token_cmd(infop);
3329 c = token_chr(infop);
3330 switch (m) {
3331 case left_brace_cmd:
3332 case right_brace_cmd:
3333 case math_shift_cmd:
3334 case tab_mark_cmd:
3335 case sup_mark_cmd:
3336 case sub_mark_cmd:
3337 case spacer_cmd:
3338 case letter_cmd:
3339 case other_char_cmd:
3340 Print_uchar(c);
3341 break;
3342 case mac_param_cmd:
3343 if (!in_lua_escape && (is_in_csname==0))
3344 Print_uchar(c);
3345 Print_uchar(c);
3346 break;
3347 case out_param_cmd:
3348 Print_uchar(match_chr);
3349 if (c <= 9) {
3350 Print_char(c + '0');
3351 } else {
3352 Print_char('!');
3353 goto EXIT;
3355 break;
3356 case match_cmd:
3357 match_chr = c;
3358 Print_uchar(c);
3359 n++;
3360 Print_char(n);
3361 if (n > '9')
3362 goto EXIT;
3363 break;
3364 case end_match_cmd:
3365 if (c == 0) {
3366 Print_char('-');
3367 Print_char('>');
3369 break;
3370 default:
3371 not_so_bad(Print_esc);
3372 break;
3376 p = token_link(p);
3378 EXIT:
3379 ret[i] = '\0';
3380 if (siz != NULL)
3381 *siz = i;
3382 return ret;
3385 @ @c
3386 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3388 int siz;
3389 lstring *ret = xmalloc(sizeof(lstring));
3390 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3391 ret->l = (size_t) siz;
3392 return ret;
3395 @ @c
3396 void free_lstring(lstring * ls)
3398 if (ls == NULL)
3399 return;
3400 if (ls->s != NULL)
3401 free(ls->s);
3402 free(ls);