updated manual & compresslevel cleanup (HH)
[luatex.git] / source / texk / web2c / luatexdir / tex / textoken.w
blob8d147f4e2a8da4a91ba90ee5bdff7c51b679e70e
1 % textoken.w
3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
22 #include "ptexlib.h"
24 @ @c
25 #define pausing int_par(pausing_code)
26 #define cat_code_table int_par(cat_code_table_code)
27 #define tracing_nesting int_par(tracing_nesting_code)
28 #define suppress_outer_error int_par(suppress_outer_error_code)
29 #define suppress_mathpar_error int_par(suppress_mathpar_error_code)
32 #define every_eof equiv(every_eof_loc)
33 #define box(A) equiv(box_base+(A))
34 #define toks(A) equiv(toks_base+(A))
36 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
39 #define do_get_cat_code(a,b) do { \
40 if (line_catcode_table<=-0xFF) \
41 a= - line_catcode_table - 0xFF ; \
42 else if (line_catcode_table!=DEFAULT_CAT_TABLE) \
43 a=get_cat_code(line_catcode_table,b); \
44 else \
45 a=get_cat_code(cat_code_table,b); \
46 } while (0)
49 #define do_get_cat_code(a,b) do { \
50 if (line_catcode_table==DEFAULT_CAT_TABLE) \
51 a=get_cat_code(cat_code_table,b); \
52 else if (line_catcode_table>-0xFF) \
53 a=get_cat_code(line_catcode_table,b); \
54 else \
55 a= - line_catcode_table - 0xFF ; \
56 } while (0)
59 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
60 readily be transported into environments that do not have automatic facilities
61 for strings, garbage collection, etc., and so that it can be in control of what
62 error messages the user receives. The dynamic storage requirements of \TeX\ are
63 handled by providing two large arrays called |fixmem| and |varmem| in which
64 consecutive blocks of words are used as nodes by the \TeX\ routines.
66 Pointer variables are indices into this array, or into another array called
67 |eqtb| that will be explained later. A pointer variable might also be a special
68 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
69 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
70 does not assume that |mem[null]| exists.
72 @ Locations in |fixmem| are used for storing one-word records; a conventional
73 \.{AVAIL} stack is used for allocation in this array.
76 smemory_word *fixmem; /* the big dynamic storage area */
77 unsigned fix_mem_min; /* the smallest location of one-word memory in use */
78 unsigned fix_mem_max; /* the largest location of one-word memory in use */
80 @ In order to study the memory requirements of particular applications, it is
81 possible to prepare a version of \TeX\ that keeps track of current and maximum
82 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
83 commented out, \TeX\ will run a bit slower but it will report these statistics
84 when |tracing_stats| is sufficiently large.
87 int var_used, dyn_used; /* how much memory is in use */
89 halfword avail; /* head of the list of available one-word nodes */
90 unsigned fix_mem_end; /* the last one-word node used in |mem| */
92 halfword garbage; /* head of a junk list, write only */
93 halfword temp_token_head; /* head of a temporary list of some kind */
94 halfword hold_token_head; /* head of a temporary list of another kind */
95 halfword omit_template; /* a constant token list */
96 halfword null_list; /* permanently empty list */
97 halfword backup_head; /* head of token list built by |scan_keyword| */
99 @ @c
100 void initialize_tokens(void)
102 halfword p;
103 avail = null;
104 fix_mem_end = 0;
105 p = get_avail();
106 temp_token_head = p;
107 set_token_info(temp_token_head, 0);
108 p = get_avail();
109 hold_token_head = p;
110 set_token_info(hold_token_head, 0);
111 p = get_avail();
112 omit_template = p;
113 set_token_info(omit_template, 0);
114 p = get_avail();
115 null_list = p;
116 set_token_info(null_list, 0);
117 p = get_avail();
118 backup_head = p;
119 set_token_info(backup_head, 0);
120 p = get_avail();
121 garbage = p;
122 set_token_info(garbage, 0);
123 dyn_used = 0; /* initialize statistics */
126 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
127 field is null. However, \TeX\ will halt if there is no more room left.
128 @^inner loop@>
130 If the available-space list is empty, i.e., if |avail=null|, we try first to
131 increase |fix_mem_end|. If that cannot be done, i.e., if
132 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
133 work, we have to quit.
136 halfword get_avail(void)
137 { /* single-word node allocation */
138 unsigned p; /* the new node being got */
139 unsigned t;
140 p = (unsigned) avail; /* get top location in the |avail| stack */
141 if (p != null) {
142 avail = token_link(avail); /* and pop it off */
143 } else if (fix_mem_end < fix_mem_max) { /* or go into virgin territory */
144 incr(fix_mem_end);
145 p = fix_mem_end;
146 } else {
147 smemory_word *new_fixmem; /* the big dynamic storage area */
148 t = (fix_mem_max / 5);
149 new_fixmem =
150 fixmemcast(realloc
151 (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
152 if (new_fixmem == NULL) {
153 runaway(); /* if memory is exhausted, display possible runaway text */
154 overflow("token memory size", fix_mem_max);
155 } else {
156 fixmem = new_fixmem;
158 memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
159 fix_mem_max += t;
160 p = ++fix_mem_end;
162 token_link(p) = null; /* provide an oft-desired initialization of the new node */
163 incr(dyn_used); /* maintain statistics */
164 return (halfword) p;
167 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
168 that starts at position |p|.
169 @^inner loop@>
172 void flush_list(halfword p)
173 { /* makes list of single-word nodes available */
174 halfword q, r; /* list traversers */
175 if (p != null) {
176 r = p;
177 do {
178 q = r;
179 r = token_link(r);
180 decr(dyn_used);
181 } while (r != null); /* now |q| is the last node on the list */
182 token_link(q) = avail;
183 avail = p;
187 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
188 represented internally in one of two ways: (1)~A character whose ASCII code
189 number is |c| and whose command code is |m| is represented as the number
190 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
191 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
192 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
193 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
194 halfword.
196 A token |t| represents a |left_brace| command if and only if
197 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
198 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
199 |end_match| command if and only if |match_token<=t<=end_match_token|. The
200 following definitions take care of these token-oriented constants and a few
201 others.
203 @ A token list is a singly linked list of one-word nodes in |mem|, where each
204 word contains a token and a link. Macro definitions, output-routine definitions,
205 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
206 form of token lists, usually preceded by a node with a reference count in its
207 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
209 Three special commands appear in the token lists of macro definitions. When
210 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
211 when |m=end_match|, it means that parameter matching should end and \TeX\ should
212 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
213 insert parameter number |c| into the text at this point.
215 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
216 are omitted, but the final right brace of an output routine is included at the
217 end of its token list.
219 Here is an example macro definition that illustrates these conventions. After
220 \TeX\ processes the text
222 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
224 the definition of \.{\\mac} is represented as a token list containing
226 $$\def\,{\hskip2pt}
227 \vbox{\halign{\hfil#\hfil\cr
228 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
229 \.{\\b}, |end_match|,\cr
230 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
231 |other_char|\,\.1,\cr
232 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
234 The procedure |scan_toks| builds such token lists, and |macro_call| does the
235 parameter matching. @^reference counts@>
237 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
238 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
239 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
240 entry before the token list has been fully consumed, so we dare not simply
241 destroy a token list when its control sequence is being redefined.
243 If the parameter-matching part of a definition ends with `\.{\#\{}', the
244 corresponding token list will have `\.\{' just before the `|end_match|' and also
245 at the very end. The first `\.\{' is used to delimit the parameter; the second
246 one keeps the first from disappearing.
248 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
249 including the expansion of a macro or mark.
252 void print_meaning(void)
254 /* remap \mathchar onto \Umathchar */
256 if (cur_cmd == math_given_cmd) {
257 cur_cmd = xmath_given_cmd ;
260 print_cmd_chr((quarterword) cur_cmd, cur_chr);
261 if (cur_cmd >= call_cmd) {
262 print_char(':');
263 print_ln();
264 token_show(cur_chr);
265 } else {
266 /* Show the meaning of a mark node */
267 if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
268 print_char(':');
269 print_ln();
270 switch (cur_chr) {
271 case first_mark_code:
272 token_show(first_mark(0));
273 break;
274 case bot_mark_code:
275 token_show(bot_mark(0));
276 break;
277 case split_first_mark_code:
278 token_show(split_first_mark(0));
279 break;
280 case split_bot_mark_code:
281 token_show(split_bot_mark(0));
282 break;
283 default:
284 token_show(top_mark(0));
285 break;
291 @ The procedure |show_token_list|, which prints a symbolic form of the token list
292 that starts at a given node |p|, illustrates these conventions. The token list
293 being displayed should not begin with a reference count. However, the procedure
294 is intended to be robust, so that if the memory links are awry or if |p| is not
295 really a pointer to a token list, nothing catastrophic will happen.
297 An additional parameter |q| is also given; this parameter is either null or it
298 points to a node in the token list where a certain magic computation takes place
299 that will be explained later. (Basically, |q| is non-null when we are printing
300 the two-line context information at the time of an error message; |q| marks the
301 place corresponding to where the second line should begin.)
303 For example, if |p| points to the node containing the first \.a in the token list
304 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
305 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
306 second \.a, the magic computation will be performed just before the second \.a is
307 printed.
309 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
310 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
311 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
312 this cannot be confused with actual control sequences because a real control
313 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
316 #define not_so_bad(p) \
317 switch (m) { \
318 case assign_int_cmd: \
319 if (c >= (backend_int_base) && c <= (backend_int_last)) \
320 p("[internal backend integer]"); \
321 break; \
322 case assign_dimen_cmd: \
323 if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
324 p("[internal backend dimension]"); \
325 break; \
326 case assign_toks_cmd: \
327 if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
328 p("[internal backend tokenlist]"); \
329 break; \
330 default: \
331 p("BAD"); \
332 break; \
335 void show_token_list(int p, int q, int l)
337 int m, c; /* pieces of a token */
338 ASCII_code match_chr = '#'; /* character used in a `|match|' */
339 ASCII_code n = '0'; /* the highest parameter number, as an ASCII digit */
340 tally = 0;
341 if (l < 0)
342 l = 0x3FFFFFFF;
343 while ((p != null) && (tally < l)) {
344 if (p == q) {
345 /* Do magic computation */
346 set_trick_count();
348 /* Display token |p|, and |return| if there are problems */
349 if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
350 tprint_esc("CLOBBERED.");
351 return;
353 if (token_info(p) >= cs_token_flag) {
354 if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
355 print_cs(token_info(p) - cs_token_flag);
356 } else {
357 m = token_cmd(token_info(p));
358 c = token_chr(token_info(p));
359 if (token_info(p) < 0) {
360 tprint_esc("BAD");
361 } else {
363 Display the token $(|m|,|c|)$
365 The procedure usually ``learns'' the character code used for macro
366 parameters by seeing one in a |match| command before it runs into any
367 |out_param| commands.
369 switch (m) {
370 case left_brace_cmd:
371 case right_brace_cmd:
372 case math_shift_cmd:
373 case tab_mark_cmd:
374 case sup_mark_cmd:
375 case sub_mark_cmd:
376 case spacer_cmd:
377 case letter_cmd:
378 case other_char_cmd:
379 print(c);
380 break;
381 case mac_param_cmd:
382 if (!in_lua_escape && (is_in_csname==0))
383 print(c);
384 print(c);
385 break;
386 case out_param_cmd:
387 print(match_chr);
388 if (c <= 9) {
389 print_char(c + '0');
390 } else {
391 print_char('!');
392 return;
394 break;
395 case match_cmd:
396 match_chr = c;
397 print(c);
398 incr(n);
399 print_char(n);
400 if (n > '9')
401 return;
402 break;
403 case end_match_cmd:
404 if (c == 0)
405 tprint("->");
406 break;
407 default:
408 not_so_bad(tprint);
409 break;
413 p = token_link(p);
415 if (p != null)
416 tprint_esc("ETC.");
419 @ @c
420 #define do_buffer_to_unichar(a,b) do { \
421 a = (halfword)str2uni(buffer+b); \
422 b += utf8_size(a); \
423 } while (0)
425 @ Here's the way we sometimes want to display a token list, given a pointer to
426 its reference count; the pointer may be null.
429 void token_show(halfword p)
431 if (p != null)
432 show_token_list(token_link(p), null, 10000000);
435 @ |delete_token_ref|, is called when a pointer to a token list's reference count
436 is being removed. This means that the token list should disappear if the
437 reference count was |null|, otherwise the count should be decreased by one.
438 @^reference counts@>
440 @ |p| points to the reference count of a token list that is losing one
441 reference.
444 void delete_token_ref(halfword p)
446 if (token_ref_count(p) == 0)
447 flush_list(p);
448 else
449 decr(token_ref_count(p));
452 @ @c
453 int get_char_cat_code(int curchr)
455 int a;
456 do_get_cat_code(a,curchr);
457 return a;
460 @ @c
461 static void invalid_character_error(void)
463 const char *hlp[] = {
464 "A funny symbol that I can't read has just been input.",
465 "Continue, and I'll forget that it ever happened.",
466 NULL
468 deletions_allowed = false;
469 tex_error("Text line contains an invalid character", hlp);
470 deletions_allowed = true;
473 @ @c
474 static boolean process_sup_mark(void); /* below */
476 static int scan_control_sequence(void); /* below */
478 typedef enum {
479 next_line_ok,
480 next_line_return,
481 next_line_restart
482 } next_line_retval;
484 static next_line_retval next_line(void); /* below */
486 @ In case you are getting bored, here is a slightly less trivial routine: Given a
487 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
488 |scan_keyword| routine checks to see whether the next tokens of input match this
489 string. The match must be exact, except that uppercase letters will match their
490 lowercase counterparts; uppercase equivalents are determined by subtracting
491 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
492 only for its own limited set of keywords.
494 If a match is found, the characters are effectively removed from the input and
495 |true| is returned. Otherwise |false| is returned, and the input is left
496 essentially unchanged (except for the fact that some macros may have been
497 expanded, etc.). @^inner loop@>
500 boolean scan_keyword(const char *s)
501 { /* look for a given string */
502 halfword p; /* tail of the backup list */
503 halfword q; /* new node being added to the token list via |store_new_token| */
504 const char *k; /* index into |str_pool| */
505 halfword save_cur_cs = cur_cs;
506 if (strlen(s) == 0) /* was assert (strlen(s) > 1); */
507 return false ; /* but not with newtokenlib zero keyword simply doesn't match */
508 p = backup_head;
509 token_link(p) = null;
510 k = s;
511 while (*k) {
512 get_x_token(); /* recursion is possible here */
513 if ((cur_cs == 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
514 store_new_token(cur_tok);
515 k++;
516 } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
518 crashes on some alignments:
520 if (p != backup_head) {
521 q = get_avail();
522 token_info(q) = cur_tok;
523 token_link(q) = null;
524 token_link(p) = q;
525 begin_token_list(token_link(backup_head), backed_up);
526 } else {
527 back_input();
530 back_input();
531 if (p != backup_head) {
532 begin_token_list(token_link(backup_head), backed_up);
534 /* */
535 cur_cs = save_cur_cs;
536 return false;
539 if (token_link(backup_head) != null)
540 flush_list(token_link(backup_head));
541 cur_cs = save_cur_cs;
542 return true;
545 @ We can not return |undefined_control_sequence| under some conditions
546 (inside |shift_case|, for example). This needs thinking.
551 halfword active_to_cs(int curchr, int force)
553 halfword curcs;
554 char *a, *b;
555 char *utfbytes = xmalloc(8);
556 int nncs = no_new_control_sequence;
557 a = (char *) uni2str(0xFFFF);
558 utfbytes = strcpy(utfbytes, a);
559 if (force)
560 no_new_control_sequence = false;
561 if (curchr > 0) {
562 b = (char *) uni2str((unsigned) curchr);
563 utfbytes = strcat(utfbytes, b);
564 free(b);
565 curcs = string_lookup(utfbytes, strlen(utfbytes));
566 } else {
567 utfbytes[3] = '\0';
568 curcs = string_lookup(utfbytes, 4);
570 no_new_control_sequence = nncs;
571 free(a);
572 free(utfbytes);
573 return curcs;
577 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
579 halfword active_to_cs(int curchr, int force)
581 halfword curcs;
582 int nncs = no_new_control_sequence;
583 if (force) {
584 no_new_control_sequence = false;
586 if (curchr > 0) {
587 char *b = (char *) uni2str((unsigned) curchr);
588 char *utfbytes = xmalloc(8);
589 utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
590 utfbytes = strcat(utfbytes, b);
591 free(b);
592 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
593 free(utfbytes);
594 } else {
595 curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
597 no_new_control_sequence = nncs;
598 return curcs;
603 static unsigned char *uni2csstr(unsigned unic)
605 unsigned char *buf = xmalloc(8);
606 unsigned char *pt = buf;
607 *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
608 if (unic < 0x80)
609 *pt++ = (unsigned char) unic;
610 else if (unic < 0x800) {
611 *pt++ = (unsigned char) (0xc0 | (unic >> 6));
612 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
613 } else if (unic >= 0x110000) {
614 *pt++ = (unsigned char) (unic - 0x110000);
615 } else if (unic < 0x10000) {
616 *pt++ = (unsigned char) (0xe0 | (unic >> 12));
617 *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
618 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
619 } else {
620 int u, z, y, x;
621 unsigned val = unic - 0x10000;
622 u = (int) (((val & 0xf0000) >> 16) + 1);
623 z = (int) ((val & 0x0f000) >> 12);
624 y = (int) ((val & 0x00fc0) >> 6);
625 x = (int) (val & 0x0003f);
626 *pt++ = (unsigned char) (0xf0 | (u >> 2));
627 *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
628 *pt++ = (unsigned char) (0x80 | y);
629 *pt++ = (unsigned char) (0x80 | x);
631 *pt = '\0';
632 return buf;
635 halfword active_to_cs(int curchr, int force)
637 halfword curcs;
638 int nncs = no_new_control_sequence;
639 if (force) {
640 no_new_control_sequence = false;
642 if (curchr > 0) {
643 char * utfbytes = (char *) uni2csstr((unsigned) curchr);
644 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
645 free(utfbytes);
646 } else {
647 curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
649 no_new_control_sequence = nncs;
650 return curcs;
655 @ TODO this function should listen to \.{\\escapechar}
657 @ prints a control sequence
660 static char *cs_to_string(halfword p)
662 const char *s;
663 char *sh;
664 int k = 0;
665 static char ret[256] = { 0 };
666 if (p == 0 || p == null_cs) {
667 ret[k++] = '\\';
668 s = "csname";
669 while (*s) {
670 ret[k++] = *s++;
672 ret[k++] = '\\';
673 s = "endcsname";
674 while (*s) {
675 ret[k++] = *s++;
677 ret[k] = 0;
679 } else {
680 str_number txt = cs_text(p);
681 sh = makecstring(txt);
682 s = sh;
683 if (is_active_cs(txt)) {
684 s = s + 3;
685 while (*s) {
686 ret[k++] = *s++;
688 ret[k] = 0;
689 } else {
690 ret[k++] = '\\';
691 while (*s) {
692 ret[k++] = *s++;
694 ret[k] = 0;
696 free(sh);
698 return (char *) ret;
701 @ TODO this is a quick hack, will be solved differently soon
704 static char *cmd_chr_to_string(int cmd, int chr)
706 char *s;
707 str_number str;
708 int sel = selector;
709 selector = new_string;
710 print_cmd_chr((quarterword) cmd, chr);
711 str = make_string();
712 s = makecstring(str);
713 selector = sel;
714 flush_str(str);
715 return s;
718 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
719 develop in the next few sections of the program. Perhaps we shouldn't actually
720 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
721 reading the source files and gobbling them up. And it also helps \TeX\ to
722 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
724 The main duty of |get_next| is to input one token and to set |cur_cmd| and
725 |cur_chr| to that token's command code and modifier. Furthermore, if the input
726 token is a control sequence, the |eqtb| location of that control sequence is
727 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
729 Underlying this simple description is a certain amount of complexity because of
730 all the cases that need to be handled. However, the inner loop of |get_next| is
731 reasonably short and fast.
733 When |get_next| is asked to get the next token of a \.{\\read} line,
734 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
735 appear on that line. (There might not be any tokens at all, if the
736 |end_line_char| has |ignore| as its catcode.)
738 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
739 needed because a blank line of input is supposed to be exactly equivalent to the
740 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
741 line.
744 halfword par_loc; /* location of `\.{\\par}' in |eqtb| */
745 halfword par_token; /* token representing `\.{\\par}' */
747 @ Parts |get_next| are executed more often than any other instructions of \TeX.
748 @^mastication@>@^inner loop@>
750 The global variable |force_eof| is normally |false|; it is set |true| by an
751 \.{\\endinput} command. |luacstrings| is the number of lua print statements
752 waiting to be input, it is changed by |luatokencall|.
755 boolean force_eof; /* should the next \.{\\input} be aborted early? */
756 int luacstrings; /* how many lua strings are waiting to be input? */
758 @ If the user has set the |pausing| parameter to some positive value, and if
759 nonstop mode has not been selected, each line of input is displayed on the
760 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
761 response. If the response is simply |carriage_return|, the line is accepted as it
762 stands, otherwise the line typed is used instead of the line in the file.
765 void firm_up_the_line(void)
767 int k; /* an index into |buffer| */
768 ilimit = last;
769 if (pausing > 0) {
770 if (interaction > nonstop_mode) {
771 wake_up_terminal();
772 print_ln();
773 if (istart < ilimit) {
774 for (k = istart; k <= ilimit - 1; k++)
775 print_char(buffer[k]);
777 first = ilimit;
778 prompt_input("=>"); /* wait for user response */
779 if (last > first) {
780 for (k = first; k < +last - 1; k++) /* move line down in buffer */
781 buffer[k + istart - first] = buffer[k];
782 ilimit = istart + last - first;
788 @ Before getting into |get_next|, let's consider the subroutine that is called
789 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
790 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
791 the end of a file.
794 void check_outer_validity(void)
796 halfword p; /* points to inserted token list */
797 halfword q; /* auxiliary pointer */
798 if (suppress_outer_error)
799 return;
800 if (scanner_status != normal) {
801 deletions_allowed = false;
802 /* Back up an outer control sequence so that it can be reread; */
803 /* An outer control sequence that occurs in a \.{\\read} will not be reread,
804 since the error recovery for \.{\\read} is not very powerful. */
805 if (cur_cs != 0) {
806 if ((istate == token_list) || (iname < 1) || (iname > 17)) {
807 p = get_avail();
808 token_info(p) = cs_token_flag + cur_cs;
809 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
811 cur_cmd = spacer_cmd;
812 cur_chr = ' '; /* replace it by a space */
814 if (scanner_status > skipping) {
815 const char *errhlp[] = {
816 "I suspect you have forgotten a `}', causing me",
817 "to read past where you wanted me to stop.",
818 "I'll try to recover; but if the error is serious,",
819 "you'd better type `E' or `X' now and fix your file.",
820 NULL
822 char errmsg[256];
823 const char *startmsg;
824 const char *scannermsg;
825 /* Tell the user what has run away and try to recover */
826 runaway(); /* print a definition, argument, or preamble */
827 if (cur_cs == 0) {
828 startmsg = "File ended";
829 } else {
830 cur_cs = 0;
831 startmsg = "Forbidden control sequence found";
833 /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
834 and insert tokens that should lead to recovery; */
835 /* The recovery procedure can't be fully understood without knowing more
836 about the \TeX\ routines that should be aborted, but we can sketch the
837 ideas here: For a runaway definition we will insert a right brace; for a
838 runaway preamble, we will insert a special \.{\\cr} token and a right
839 brace; and for a runaway argument, we will set |long_state| to
840 |outer_call| and insert \.{\\par}. */
841 p = get_avail();
842 switch (scanner_status) {
843 case defining:
844 scannermsg = "definition";
845 token_info(p) = right_brace_token + '}';
846 break;
847 case matching:
848 scannermsg = "use";
849 token_info(p) = par_token;
850 long_state = outer_call_cmd;
851 break;
852 case aligning:
853 scannermsg = "preamble";
854 token_info(p) = right_brace_token + '}';
855 q = p;
856 p = get_avail();
857 token_link(p) = q;
858 token_info(p) = cs_token_flag + frozen_cr;
859 align_state = -1000000;
860 break;
861 case absorbing:
862 scannermsg = "text";
863 token_info(p) = right_brace_token + '}';
864 break;
865 default: /* can't happen */
866 scannermsg = "unknown";
867 break;
868 } /*there are no other cases */
869 begin_token_list(p, inserted);
870 snprintf(errmsg, 255, "%s while scanning %s of %s",
871 startmsg, scannermsg, cs_to_string(warning_index));
872 tex_error(errmsg, errhlp);
873 } else {
874 char errmsg[256];
875 const char *errhlp_no[] = {
876 "The file ended while I was skipping conditional text.",
877 "This kind of error happens when you say `\\if...' and forget",
878 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
879 NULL
881 const char *errhlp_cs[] = {
882 "A forbidden control sequence occurred in skipped text.",
883 "This kind of error happens when you say `\\if...' and forget",
884 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
885 NULL
887 const char **errhlp = (const char **) errhlp_no;
888 char *ss;
889 if (cur_cs != 0) {
890 errhlp = errhlp_cs;
891 cur_cs = 0;
893 ss = cmd_chr_to_string(if_test_cmd, cur_if);
894 snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
895 ss, (int) skip_line);
896 free(ss);
897 /* Incomplete \\if... */
898 cur_tok = cs_token_flag + frozen_fi;
899 /* back up one inserted token and call |error| */
901 OK_to_interrupt = false;
902 back_input();
903 token_type = inserted;
904 OK_to_interrupt = true;
905 tex_error(errmsg, errhlp);
908 deletions_allowed = true;
912 @ @c
914 #if 0
917 The other variant gives less clutter in tracing cache usage when profiling and for
918 some files (like the manual) also a bit of a speedup.
921 static boolean get_next_file(void)
923 SWITCH:
924 if (iloc <= ilimit) {
925 /* current line not yet finished */
926 do_buffer_to_unichar(cur_chr, iloc);
928 RESWITCH:
929 if (detokenized_line()) {
930 cur_cmd = (cur_chr == ' ' ? 10 : 12);
931 } else {
932 do_get_cat_code(cur_cmd, cur_chr);
935 Change state if necessary, and |goto switch| if the current
936 character should be ignored, or |goto reswitch| if the current
937 character changes to another;
939 The following 48-way switch accomplishes the scanning quickly, assuming
940 that a decent C compiler has translated the code. Note that the numeric
941 values for |mid_line|, |skip_blanks|, and |new_line| are spaced
942 apart from each other by |max_char_code+1|, so we can add a character's
943 command code to the state to get a single number that characterizes both.
945 Remark [ls/hh]: checking performance indicated that this switch was the
946 cause of many branch prediction errors but changing it to:
948 c = istate + cur_cmd;
949 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
950 return true;
951 } else if (c >= new_line) {
952 switch (c) {
954 } else if (c >= skip_blanks) {
955 switch (c) {
957 } else if (c >= mid_line) {
958 switch (c) {
960 } else {
961 istate = mid_line;
962 return true;
965 gives as many prediction errors. So, we can indeed assume that the compiler
966 does the right job, or that there is simply no other way.
969 switch (istate + cur_cmd) {
970 case mid_line + ignore_cmd:
971 case skip_blanks + ignore_cmd:
972 case new_line + ignore_cmd:
973 case skip_blanks + spacer_cmd:
974 case new_line + spacer_cmd:
975 /* Cases where character is ignored */
976 goto SWITCH;
977 break;
978 case mid_line + escape_cmd:
979 case new_line + escape_cmd:
980 case skip_blanks + escape_cmd:
981 /* Scan a control sequence ...; */
982 istate = (unsigned char) scan_control_sequence();
983 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
984 check_outer_validity();
985 break;
986 case mid_line + active_char_cmd:
987 case new_line + active_char_cmd:
988 case skip_blanks + active_char_cmd:
989 /* Process an active-character */
990 cur_cs = active_to_cs(cur_chr, false);
991 cur_cmd = eq_type(cur_cs);
992 cur_chr = equiv(cur_cs);
993 istate = mid_line;
994 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
995 check_outer_validity();
996 break;
997 case mid_line + sup_mark_cmd:
998 case new_line + sup_mark_cmd:
999 case skip_blanks + sup_mark_cmd:
1000 /* If this |sup_mark| starts */
1001 if (process_sup_mark())
1002 goto RESWITCH;
1003 else
1004 istate = mid_line;
1005 break;
1006 case mid_line + invalid_char_cmd:
1007 case new_line + invalid_char_cmd:
1008 case skip_blanks + invalid_char_cmd:
1009 /* Decry the invalid character and |goto restart|; */
1010 invalid_character_error();
1011 return false; /* because state may be |token_list| now */
1012 break;
1013 case mid_line + spacer_cmd:
1014 /* Enter |skip_blanks| state, emit a space; */
1015 istate = skip_blanks;
1016 cur_chr = ' ';
1017 break;
1018 case mid_line + car_ret_cmd:
1020 Finish line, emit a space. When a character of type |spacer| gets through, its
1021 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1022 for tab and space, and for the space inserted at the end of a line, will be
1023 treated alike when macro parameters are being matched. We do this since such
1024 characters are indistinguishable on most computer terminal displays.
1026 iloc = ilimit + 1;
1027 cur_cmd = spacer_cmd;
1028 cur_chr = ' ';
1029 break;
1030 case skip_blanks + car_ret_cmd:
1031 case mid_line + comment_cmd:
1032 case new_line + comment_cmd:
1033 case skip_blanks + comment_cmd:
1034 /* Finish line, |goto switch|; */
1035 iloc = ilimit + 1;
1036 goto SWITCH;
1037 break;
1038 case new_line + car_ret_cmd:
1039 /* Finish line, emit a \.{\\par}; */
1040 iloc = ilimit + 1;
1041 cur_cs = par_loc;
1042 cur_cmd = eq_type(cur_cs);
1043 cur_chr = equiv(cur_cs);
1044 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1045 check_outer_validity();
1046 break;
1047 case skip_blanks + left_brace_cmd:
1048 case new_line + left_brace_cmd:
1049 istate = mid_line;
1050 /* fall through */
1051 case mid_line + left_brace_cmd:
1052 align_state++;
1053 break;
1054 case skip_blanks + right_brace_cmd:
1055 case new_line + right_brace_cmd:
1056 istate = mid_line;
1057 /* fall through */
1058 case mid_line + right_brace_cmd:
1059 align_state--;
1060 break;
1061 case mid_line + math_shift_cmd:
1062 case mid_line + tab_mark_cmd:
1063 case mid_line + mac_param_cmd:
1064 case mid_line + sub_mark_cmd:
1065 case mid_line + letter_cmd:
1066 case mid_line + other_char_cmd:
1067 break;
1069 case skip_blanks + math_shift:
1070 case skip_blanks + tab_mark:
1071 case skip_blanks + mac_param:
1072 case skip_blanks + sub_mark:
1073 case skip_blanks + letter:
1074 case skip_blanks + other_char:
1075 case new_line + math_shift:
1076 case new_line + tab_mark:
1077 case new_line + mac_param:
1078 case new_line + sub_mark:
1079 case new_line + letter:
1080 case new_line + other_char:
1082 default:
1083 istate = mid_line;
1084 break;
1086 } else {
1087 if (iname != 21)
1088 istate = new_line;
1090 Move to next line of file,
1091 or |goto restart| if there is no next line,
1092 or |return| if a \.{\\read} line has finished;
1094 do {
1095 next_line_retval r = next_line();
1096 if (r == next_line_return) {
1097 return true;
1098 } else if (r == next_line_restart) {
1099 return false;
1101 } while (0);
1102 check_interrupt();
1103 goto SWITCH;
1105 return true;
1108 #else
1110 /* 10 times less Bim in callgrind */
1113 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1114 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1115 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1116 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1119 static boolean get_next_file(void)
1121 int c = 0;
1122 SWITCH:
1123 if (iloc <= ilimit) {
1124 /* current line not yet finished */
1125 do_buffer_to_unichar(cur_chr, iloc);
1126 RESWITCH:
1127 if (detokenized_line()) {
1128 cur_cmd = (cur_chr == ' ' ? 10 : 12);
1129 } else {
1130 do_get_cat_code(cur_cmd, cur_chr);
1133 Change state if necessary, and |goto switch| if the current
1134 character should be ignored, or |goto reswitch| if the current
1135 character changes to another;
1137 c = istate + cur_cmd;
1138 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1139 return true;
1140 } else if (c >= new_line) {
1141 switch (c-new_line) {
1142 case escape_cmd:
1143 istate = (unsigned char) scan_control_sequence();
1144 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1145 check_outer_validity();
1146 return true;
1147 case left_brace_cmd:
1148 istate = mid_line;
1149 align_state++;
1150 return true;
1151 case right_brace_cmd:
1152 istate = mid_line;
1153 align_state--;
1154 return true;
1155 case math_shift_cmd:
1156 istate = mid_line;
1157 return true;
1158 case tab_mark_cmd:
1159 istate = mid_line;
1160 return true;
1161 case car_ret_cmd:
1162 /* Finish line, emit a \.{\\par}; */
1163 iloc = ilimit + 1;
1164 cur_cs = par_loc;
1165 cur_cmd = eq_type(cur_cs);
1166 cur_chr = equiv(cur_cs);
1167 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1168 check_outer_validity();
1169 return true;
1170 case mac_param_cmd:
1171 istate = mid_line;
1172 return true;
1173 case sup_mark_cmd:
1174 if (process_sup_mark())
1175 goto RESWITCH;
1176 else
1177 istate = mid_line;
1178 return true;
1179 case sub_mark_cmd:
1180 istate = mid_line;
1181 return true;
1182 case ignore_cmd:
1183 goto SWITCH;
1184 return true;
1185 case spacer_cmd:
1186 /* Cases where character is ignored */
1187 goto SWITCH;
1188 case letter_cmd:
1189 istate = mid_line;
1190 return true;
1191 case other_char_cmd:
1192 istate = mid_line;
1193 return true;
1194 case active_char_cmd:
1195 cur_cs = active_to_cs(cur_chr, false);
1196 cur_cmd = eq_type(cur_cs);
1197 cur_chr = equiv(cur_cs);
1198 istate = mid_line;
1199 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1200 check_outer_validity();
1201 return true;
1202 case comment_cmd:
1203 iloc = ilimit + 1;
1204 goto SWITCH;
1205 case invalid_char_cmd:
1206 invalid_character_error();
1207 return false; /* because state may be |token_list| now */
1208 default:
1209 istate = mid_line;
1210 return true;
1212 } else if (c >= skip_blanks) {
1213 switch (c-skip_blanks) {
1214 case escape_cmd:
1215 /* Scan a control sequence ...; */
1216 istate = (unsigned char) scan_control_sequence();
1217 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1218 check_outer_validity();
1219 return true;
1220 case left_brace_cmd:
1221 istate = mid_line;
1222 align_state++;
1223 return true;
1224 case right_brace_cmd:
1225 istate = mid_line;
1226 align_state--;
1227 return true;
1228 case math_shift_cmd:
1229 istate = mid_line;
1230 return true;
1231 case tab_mark_cmd:
1232 istate = mid_line;
1233 return true;
1234 case car_ret_cmd:
1235 iloc = ilimit + 1;
1236 goto SWITCH;
1237 case mac_param_cmd:
1238 istate = mid_line;
1239 return true;
1240 case sup_mark_cmd:
1241 /* If this |sup_mark| starts */
1242 if (process_sup_mark())
1243 goto RESWITCH;
1244 else
1245 istate = mid_line;
1246 return true;
1247 case sub_mark_cmd:
1248 istate = mid_line;
1249 return true;
1250 case ignore_cmd:
1251 goto SWITCH;
1252 case spacer_cmd:
1253 goto SWITCH;
1254 case letter_cmd:
1255 istate = mid_line;
1256 return true;
1257 case other_char_cmd:
1258 istate = mid_line;
1259 return true;
1260 case active_char_cmd:
1261 cur_cs = active_to_cs(cur_chr, false);
1262 cur_cmd = eq_type(cur_cs);
1263 cur_chr = equiv(cur_cs);
1264 istate = mid_line;
1265 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1266 check_outer_validity();
1267 return true;
1268 case comment_cmd:
1269 /* Finish line, |goto switch|; */
1270 iloc = ilimit + 1;
1271 goto SWITCH;
1272 case invalid_char_cmd:
1273 /* Decry the invalid character and |goto restart|; */
1274 invalid_character_error();
1275 return false; /* because state may be |token_list| now */
1276 default:
1277 istate = mid_line;
1278 return true;
1280 } else if (c >= mid_line) {
1281 switch (c-mid_line) {
1282 case escape_cmd:
1283 istate = (unsigned char) scan_control_sequence();
1284 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1285 check_outer_validity();
1286 return true;
1287 case left_brace_cmd:
1288 align_state++;
1289 return true;
1290 case right_brace_cmd:
1291 align_state--;
1292 return true;
1293 case math_shift_cmd:
1294 return true;
1295 case tab_mark_cmd:
1296 return true;
1297 case car_ret_cmd:
1299 Finish line, emit a space. When a character of type |spacer| gets through, its
1300 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1301 for tab and space, and for the space inserted at the end of a line, will be
1302 treated alike when macro parameters are being matched. We do this since such
1303 characters are indistinguishable on most computer terminal displays.
1305 iloc = ilimit + 1;
1306 cur_cmd = spacer_cmd;
1307 cur_chr = ' ';
1308 return true;
1309 case mac_param_cmd:
1310 return true;
1311 case sup_mark_cmd:
1312 if (process_sup_mark())
1313 goto RESWITCH;
1314 else
1315 istate = mid_line;
1316 return true;
1317 case sub_mark_cmd:
1318 return true;
1319 case ignore_cmd:
1320 goto SWITCH;
1321 case spacer_cmd:
1322 /* Enter |skip_blanks| state, emit a space; */
1323 istate = skip_blanks;
1324 cur_chr = ' ';
1325 return true;
1326 case letter_cmd:
1327 istate = mid_line;
1328 return true;
1329 case other_char_cmd:
1330 istate = mid_line;
1331 return true;
1332 case active_char_cmd:
1333 cur_cs = active_to_cs(cur_chr, false);
1334 cur_cmd = eq_type(cur_cs);
1335 cur_chr = equiv(cur_cs);
1336 istate = mid_line;
1337 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1338 check_outer_validity();
1339 return true;
1340 case comment_cmd:
1341 iloc = ilimit + 1;
1342 goto SWITCH;
1343 case invalid_char_cmd:
1344 invalid_character_error();
1345 return false; /* because state may be |token_list| now */
1346 default:
1347 istate = mid_line;
1348 return true;
1350 } else {
1351 istate = mid_line;
1352 return true;
1354 } else {
1355 if (iname != 21) {
1356 istate = new_line;
1359 Move to next line of file, or |goto restart| if there is no next line,
1360 or |return| if a \.{\\read} line has finished;
1362 do {
1363 next_line_retval r = next_line();
1364 if (r == next_line_return) {
1365 return true;
1366 } else if (r == next_line_restart) {
1367 return false;
1369 } while (0);
1370 check_interrupt();
1371 goto SWITCH;
1373 return true;
1376 #endif
1378 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1379 We only support a limited set:
1381 ^^^^^^XXXXXX
1382 ^^^^XXXXXX
1383 ^^XX ^^<char>
1387 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1389 #define add_nybble(c) \
1390 if (c<='9') { \
1391 cur_chr=(cur_chr<<4)+c-'0'; \
1392 } else { \
1393 cur_chr=(cur_chr<<4)+c-'a'+10; \
1396 #define set_nybble(c) \
1397 if (c<='9') { \
1398 cur_chr=c-'0'; \
1399 } else { \
1400 cur_chr=c-'a'+10; \
1403 #define one_hex_to_cur_chr(c1) \
1404 set_nybble(c1);
1406 #define two_hex_to_cur_chr(c1,c2) \
1407 set_nybble(c1); \
1408 add_nybble(c2);
1410 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1411 two_hex_to_cur_chr(c1,c2); \
1412 add_nybble(c3); \
1413 add_nybble(c4);
1415 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1416 four_hex_to_cur_chr(c1,c2,c3,c4); \
1417 add_nybble(c5); \
1418 add_nybble(c6);
1420 static boolean process_sup_mark(void)
1422 if (cur_chr == buffer[iloc]) {
1423 if (iloc < ilimit) {
1424 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1425 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1426 /* ^^^^^^XXXXXX */
1427 if ((iloc + 10) <= ilimit) {
1428 int c1 = buffer[iloc + 5];
1429 int c2 = buffer[iloc + 6];
1430 int c3 = buffer[iloc + 7];
1431 int c4 = buffer[iloc + 8];
1432 int c5 = buffer[iloc + 9];
1433 int c6 = buffer[iloc + 10];
1434 if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1435 is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1436 iloc = iloc + 11;
1437 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1438 return true;
1439 } else {
1440 tex_error("^^^^^^ needs six hex digits", NULL);
1442 } else {
1443 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1445 } else {
1446 /* ^^^^XXXX */
1447 if ((iloc + 6) <= ilimit) {
1448 int c1 = buffer[iloc + 3];
1449 int c2 = buffer[iloc + 4];
1450 int c3 = buffer[iloc + 5];
1451 int c4 = buffer[iloc + 6];
1452 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1453 iloc = iloc + 7;
1454 four_hex_to_cur_chr(c1,c2,c3,c4);
1455 return true;
1456 } else {
1457 tex_error("^^^^ needs four hex digits", NULL);
1459 } else {
1460 tex_error("^^^^ needs four hex digits, end of input", NULL);
1463 } else {
1464 /* ^^XX */
1465 if ((iloc + 2) <= ilimit) {
1466 int c1 = buffer[iloc + 1];
1467 int c2 = buffer[iloc + 2];
1468 if (is_hex(c1) && is_hex(c2)) {
1469 iloc = iloc + 3;
1470 two_hex_to_cur_chr(c1,c2);
1471 return true;
1474 /* go on, no error, good old tex */
1477 /* the rest */
1479 int c1 = buffer[iloc + 1];
1480 if (c1 < 0200) {
1481 iloc = iloc + 2;
1482 if (is_hex(c1) && (iloc <= ilimit)) {
1483 int c2 = buffer[iloc];
1484 if (is_hex(c2)) {
1485 incr(iloc);
1486 two_hex_to_cur_chr(c1,c2);
1487 return true;
1490 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1491 return true;
1495 return false;
1498 @ Control sequence names are scanned only when they appear in some line of a
1499 file; once they have been scanned the first time, their |eqtb| location serves as
1500 a unique identification, so \TeX\ doesn't need to refer to the original name any
1501 more except when it prints the equivalent in symbolic form.
1503 The program that scans a control sequence has been written carefully in order to
1504 avoid the blowups that might otherwise occur if a malicious user tried something
1505 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1506 it never looks at |buffer[ilimit+2]|.
1508 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1509 following a control sequence name, they are converted to single characters in the
1510 buffer and the process is repeated, slowly but surely.
1513 static boolean check_expanded_code(int *kk); /* below */
1515 static int scan_control_sequence(void)
1517 int retval = mid_line;
1518 if (iloc > ilimit) {
1519 cur_cs = null_cs; /* |state| is irrelevant in this case */
1520 } else {
1521 register int cat; /* |cat_code(cur_chr)|, usually */
1522 while (1) {
1523 int k = iloc;
1524 do_buffer_to_unichar(cur_chr, k);
1525 do_get_cat_code(cat, cur_chr);
1526 if (cat != letter_cmd || k > ilimit) {
1527 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1528 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1529 continue;
1530 } else {
1531 retval = skip_blanks;
1532 do {
1533 do_buffer_to_unichar(cur_chr, k);
1534 do_get_cat_code(cat, cur_chr);
1535 } while (cat == letter_cmd && k <= ilimit);
1537 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1538 continue;
1539 if (cat != letter_cmd) {
1540 /* backtrack one character which can be utf */
1542 decr(k);
1543 if (cur_chr > 0xFFFF)
1544 decr(k);
1545 if (cur_chr > 0x7FF)
1546 decr(k);
1547 if (cur_chr > 0x7F)
1548 decr(k);
1550 if (cur_chr <= 0x7F) {
1551 k -= 1; /* in most cases */
1552 } else if (cur_chr > 0xFFFF) {
1553 k -= 4;
1554 } else if (cur_chr > 0x7FF) {
1555 k -= 3;
1556 } else /* if (cur_chr > 0x7F) */ {
1557 k -= 2;
1559 /* now |k| points to first nonletter */
1562 cur_cs = id_lookup(iloc, k - iloc);
1563 iloc = k;
1564 break;
1567 cur_cmd = eq_type(cur_cs);
1568 cur_chr = equiv(cur_cs);
1569 return retval;
1572 @ Whenever we reach the following piece of code, we will have
1573 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1574 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1575 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1576 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1577 left two or three places.
1580 static boolean check_expanded_code(int *kk)
1582 int l;
1583 int k = *kk;
1584 int d = 1;
1585 if (buffer[k] == cur_chr && k < ilimit) {
1586 if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1587 if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1588 if ((k + 10) <= ilimit) {
1589 int c1 = buffer[k + 6 - 1];
1590 int c2 = buffer[k + 6];
1591 int c3 = buffer[k + 6 + 1];
1592 int c4 = buffer[k + 6 + 2];
1593 int c5 = buffer[k + 6 + 3];
1594 int c6 = buffer[k + 6 + 4];
1595 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1596 d = 6;
1597 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1598 } else {
1599 tex_error("^^^^^^ needs six hex digits", NULL);
1601 } else {
1602 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1604 } else {
1605 if ((k + 6) <= ilimit) {
1606 int c1 = buffer[k + 4 - 1];
1607 int c2 = buffer[k + 4];
1608 int c3 = buffer[k + 4 + 1];
1609 int c4 = buffer[k + 4 + 2];
1610 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1611 d = 4;
1612 four_hex_to_cur_chr(c1,c2,c3,c4);
1613 } else {
1614 tex_error("^^^^ needs four hex digits", NULL);
1616 } else {
1617 tex_error("^^^^ needs four hex digits, end of input", NULL);
1620 } else {
1621 int c1 = buffer[k + 1];
1622 if (c1 < 0200) {
1623 d = 1;
1624 if (is_hex(c1) && (k + 2) <= ilimit) {
1625 int c2 = buffer[k + 2];
1626 if (is_hex(c2)) {
1627 d = 2;
1628 two_hex_to_cur_chr(c1,c2);
1629 } else {
1630 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1632 } else {
1633 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1637 if (d > 2)
1638 d = 2 * d - 1;
1639 else
1640 d++;
1641 if (cur_chr <= 0x7F) {
1642 buffer[k - 1] = (packed_ASCII_code) cur_chr;
1643 } else if (cur_chr <= 0x7FF) {
1644 buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1645 k++;
1646 d--;
1647 buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1648 } else if (cur_chr <= 0xFFFF) {
1649 buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1650 k++;
1651 d--;
1652 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1653 k++;
1654 d--;
1655 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1656 } else {
1657 buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1658 k++;
1659 d--;
1660 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1661 k++;
1662 d--;
1663 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1664 k++;
1665 d--;
1666 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1668 l = k;
1669 ilimit = ilimit - d;
1670 while (l <= ilimit) {
1671 buffer[l] = buffer[l + d];
1672 l++;
1674 *kk = k;
1675 return true;
1677 return false;
1680 @ All of the easy branches of |get_next| have now been taken care of. There is
1681 one more branch.
1683 @c static next_line_retval next_line(void)
1685 boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1686 if (iname > 17) {
1687 /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1688 incr(line);
1689 first = istart;
1690 if (!force_eof) {
1691 if (iname <= 20) {
1692 if (pseudo_input()) { /* not end of file */
1693 firm_up_the_line(); /* this sets |ilimit| */
1694 line_catcode_table = DEFAULT_CAT_TABLE;
1695 if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1696 inhibit_eol = true;
1697 } else if ((every_eof != null) && !eof_seen[iindex]) {
1698 ilimit = first - 1;
1699 eof_seen[iindex] = true; /* fake one empty line */
1700 if (iname != 19)
1701 begin_token_list(every_eof, every_eof_text);
1702 return next_line_restart;
1703 } else {
1704 force_eof = true;
1706 } else {
1707 if (iname == 21) {
1708 if (luacstring_input()) { /* not end of strings */
1709 firm_up_the_line();
1710 line_catcode_table = (short) luacstring_cattable();
1711 line_partial = (signed char) luacstring_partial();
1712 if (luacstring_final_line() || line_partial
1713 || line_catcode_table == NO_CAT_TABLE)
1714 inhibit_eol = true;
1715 if (!line_partial)
1716 istate = new_line;
1717 } else {
1718 force_eof = true;
1720 } else {
1721 if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1722 firm_up_the_line(); /* this sets |ilimit| */
1723 line_catcode_table = DEFAULT_CAT_TABLE;
1724 } else if ((every_eof != null) && (!eof_seen[iindex])) {
1725 ilimit = first - 1;
1726 eof_seen[iindex] = true; /* fake one empty line */
1727 begin_token_list(every_eof, every_eof_text);
1728 return next_line_restart;
1729 } else {
1730 force_eof = true;
1735 if (force_eof) {
1736 if (tracing_nesting > 0)
1737 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1738 if (!((iname == 19) || (iname == 21))) {
1739 /* give warning for some unfinished groups and/or conditionals */
1740 file_warning();
1742 if ((iname > 21) || (iname == 20)) {
1743 report_stop_file(filetype_tex);
1744 decr(open_parens);
1746 force_eof = false;
1747 /* lua input or \.{\\scantextokens} */
1748 if (iname == 21 || iname == 19) {
1749 end_file_reading();
1750 } else {
1751 end_file_reading();
1752 if (! suppress_outer_error)
1753 check_outer_validity();
1755 return next_line_restart;
1757 if (inhibit_eol || end_line_char_inactive)
1758 ilimit--;
1759 else
1760 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1761 first = ilimit + 1;
1762 iloc = istart; /* ready to read */
1763 } else {
1764 if (!terminal_input) {
1765 /* \.{\\read} line has ended */
1766 cur_cmd = 0;
1767 cur_chr = 0;
1768 return next_line_return; /* OUTER */
1770 if (input_ptr > 0) {
1771 /* text was inserted during error recovery */
1772 end_file_reading();
1773 return next_line_restart; /* resume previous level */
1775 if (selector < log_only)
1776 open_log_file();
1777 if (interaction > nonstop_mode) {
1778 if (end_line_char_inactive)
1779 ilimit++;
1780 if (ilimit == istart) {
1781 /* previous line was empty */
1782 tprint_nl("(Please type a command or say `\\end')");
1784 print_ln();
1785 first = istart;
1786 prompt_input("*"); /* input on-line into |buffer| */
1787 ilimit = last;
1788 if (end_line_char_inactive)
1789 ilimit--;
1790 else
1791 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1792 first = ilimit + 1;
1793 iloc = istart;
1794 } else {
1796 Nonstop mode, which is intended for overnight batch processing,
1797 never waits for on-line input.
1799 fatal_error("*** (job aborted, no legal \\end found)");
1802 return next_line_ok;
1805 @ Let's consider now what happens when |get_next| is looking at a token list.
1808 static boolean get_next_tokenlist(void)
1810 register halfword t = token_info(iloc);
1811 iloc = token_link(iloc); /* move to next */
1812 if (t >= cs_token_flag) {
1813 /* a control sequence token */
1814 cur_cs = t - cs_token_flag;
1815 cur_cmd = eq_type(cur_cs);
1816 if (cur_cmd >= outer_call_cmd) {
1817 if (cur_cmd == dont_expand_cmd) {
1819 Get the next token, suppressing expansion. The present point in the program
1820 is reached only when the |expand| routine has inserted a special marker into
1821 the input. In this special case, |token_info(iloc)| is known to be a control
1822 sequence token, and |token_link(iloc)=null|.
1824 cur_cs = token_info(iloc) - cs_token_flag;
1825 iloc = null;
1826 cur_cmd = eq_type(cur_cs);
1827 if (cur_cmd > max_command_cmd) {
1828 cur_cmd = relax_cmd;
1829 cur_chr = no_expand_flag;
1830 return true;
1832 } else if (! suppress_outer_error) {
1833 check_outer_validity();
1836 cur_chr = equiv(cur_cs);
1837 } else {
1838 cur_cmd = token_cmd(t);
1839 cur_chr = token_chr(t);
1840 switch (cur_cmd) {
1841 case left_brace_cmd:
1842 align_state++;
1843 break;
1844 case right_brace_cmd:
1845 align_state--;
1846 break;
1847 case out_param_cmd:
1848 /* Insert macro parameter and |goto restart|; */
1849 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1850 return false;
1851 break;
1854 return true;
1857 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1858 routine are executed more often than any other instructions of \TeX.
1859 @^mastication@>@^inner loop@>
1861 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1864 void get_next(void)
1866 RESTART:
1867 cur_cs = 0;
1868 if (istate != token_list) {
1869 /* Input from external file, |goto restart| if no input found */
1870 if (!get_next_file())
1871 goto RESTART;
1872 } else {
1873 if (iloc == null) {
1874 end_token_list();
1875 goto RESTART; /* list exhausted, resume previous level */
1876 } else if (!get_next_tokenlist()) {
1877 goto RESTART; /* parameter needs to be expanded */
1880 /* If an alignment entry has just ended, take appropriate action */
1881 if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1882 insert_vj_template();
1883 goto RESTART;
1887 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1888 three related procedures that do a little more:
1890 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1891 |cur_tok|, a packed halfword version of the current token.
1893 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1894 but if the current token turns out to be a user-defined control sequence (i.e., a
1895 macro call), or a conditional, or something like \.{\\topmark} or
1896 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1897 the expansion of the macro or the evaluation of the conditional.
1899 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1900 has already been called.
1902 \yskip\noindent In fact, these three procedures account for almost every use of
1903 |get_next|.
1905 No new control sequences will be defined except during a call of |get_token|, or
1906 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1907 always |true| at other times.
1909 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1912 void get_token(void)
1914 no_new_control_sequence = false;
1915 get_next();
1916 no_new_control_sequence = true;
1917 if (cur_cs == 0)
1918 cur_tok = token_val(cur_cmd, cur_chr);
1919 else
1920 cur_tok = cs_token_flag + cur_cs;
1923 @ changes the string |s| to a token list
1926 halfword string_to_toks(const char *ss)
1928 halfword p; /* tail of the token list */
1929 halfword q; /* new node being added to the token list via |store_new_token| */
1930 halfword t; /* token being appended */
1931 const char *s = ss;
1932 const char *se = ss + strlen(s);
1933 p = temp_token_head;
1934 set_token_link(p, null);
1935 while (s < se) {
1936 t = (halfword) str2uni((const unsigned char *) s);
1937 s += utf8_size(t);
1938 if (t == ' ')
1939 t = space_token;
1940 else
1941 t = other_token + t;
1942 fast_store_new_token(t);
1944 return token_link(temp_token_head);
1947 @ The token lists for macros and for other things like \.{\\mark} and
1948 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1950 Before we get into the details of |scan_toks|, let's consider a much simpler
1951 task, that of converting the current string into a token list. The |str_toks|
1952 function does this; it classifies spaces as type |spacer| and everything else as
1953 type |other_char|.
1955 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1956 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1958 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1959 |lua| considers special while scanning a literal string
1961 @ changes the string |str_pool[b..pool_ptr]| to a token list
1964 halfword lua_str_toks(lstring b)
1966 halfword p; /* tail of the token list */
1967 halfword q; /* new node being added to the token list via |store_new_token| */
1968 halfword t; /* token being appended */
1969 unsigned char *k; /* index into string */
1970 p = temp_token_head;
1971 set_token_link(p, null);
1972 k = (unsigned char *) b.s;
1973 while (k < (unsigned char *) b.s + b.l) {
1974 t = pool_to_unichar(k);
1975 k += utf8_size(t);
1976 if (t == ' ') {
1977 t = space_token;
1978 } else {
1979 if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1980 fast_store_new_token(other_token + '\\');
1981 if (t == 10)
1982 t = 'n';
1983 if (t == 13)
1984 t = 'r';
1985 t = other_token + t;
1987 fast_store_new_token(t);
1989 return p;
1992 @ Incidentally, the main reason for wanting |str_toks| is the function
1993 |the_toks|, which has similar input/output characteristics.
1995 @ changes the string |str_pool[b..pool_ptr]| to a token list
1998 halfword str_toks(lstring s)
2000 halfword p; /* tail of the token list */
2001 halfword q; /* new node being added to the token list via |store_new_token| */
2002 halfword t; /* token being appended */
2003 unsigned char *k, *l; /* index into string */
2004 p = temp_token_head;
2005 set_token_link(p, null);
2006 k = s.s;
2007 l = k + s.l;
2008 while (k < l) {
2009 t = pool_to_unichar(k);
2010 k += utf8_size(t);
2011 if (t == ' ')
2012 t = space_token;
2013 else
2014 t = other_token + t;
2015 fast_store_new_token(t);
2017 return p;
2021 hh: most of the converter is similar to the one i made for macro so at some point i
2022 can make a helper; also todo: there is no need to go through the pool
2026 halfword str_scan_toks(int ct, lstring s)
2027 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2028 halfword p; /* tail of the token list */
2029 halfword q; /* new node being added to the token list via |store_new_token| */
2030 halfword t; /* token being appended */
2031 unsigned char *k, *l; /* index into string */
2032 int cc;
2033 p = temp_token_head;
2034 set_token_link(p, null);
2035 k = s.s;
2036 l = k + s.l;
2037 while (k < l) {
2038 t = pool_to_unichar(k);
2039 k += utf8_size(t);
2040 cc = get_cat_code(ct,t);
2041 if (cc == 0) {
2042 /* we have a potential control sequence so we check for it */
2043 int _lname = 0 ;
2044 int _s = 0 ;
2045 int _c = 0 ;
2046 halfword _cs = null ;
2047 unsigned char *_name = k ;
2048 while (k < l) {
2049 t = (halfword) str2uni((const unsigned char *) k);
2050 _s = utf8_size(t);
2051 _c = get_cat_code(ct,t);
2052 if (_c == 11) {
2053 k += _s ;
2054 _lname = _lname + _s ;
2055 } else if (_c == 10) {
2056 /* we ignore a trailing space like normal scanning does */
2057 k += _s ;
2058 break ;
2059 } else {
2060 break ;
2063 if (_s > 0) {
2064 /* we have a potential \cs */
2065 _cs = string_lookup((const char *) _name, _lname);
2066 if (_cs == undefined_control_sequence) {
2067 /* let's play safe and backtrack */
2068 t = cc * (1<<21) + t ;
2069 k = _name ;
2070 } else {
2071 t = cs_token_flag + _cs;
2073 } else {
2074 /* just a character with some meaning, so \unknown becomes effectively */
2075 /* \\unknown assuming that \\ has some useful meaning of course */
2076 t = cc * (1<<21) + t ;
2077 k = _name ;
2080 } else {
2081 /* whatever token, so for instance $x^2$ just works given a tex */
2082 /* catcode regime */
2083 t = cc * (1<<21) + t ;
2085 fast_store_new_token(t);
2088 return p;
2091 @ Here's part of the |expand| subroutine that we are now ready to complete:
2094 void ins_the_toks(void)
2096 (void) the_toks();
2097 ins_list(token_link(temp_token_head));
2100 #define set_toks_register(n,t,g) { \
2101 int a = (g>0) ? 4 : 0; \
2102 halfword ref = get_avail(); \
2103 set_token_ref_count(ref, 0); \
2104 set_token_link(ref, token_link(t)); \
2105 define(n + toks_base, call_cmd, ref); \
2108 void combine_the_toks(int how)
2110 halfword nt;
2111 get_x_token();
2112 /* target */
2113 if (cur_cmd == assign_toks_cmd) {
2114 nt = equiv(cur_cs) - toks_base;
2115 /* check range */
2116 } else {
2117 back_input();
2118 scan_int();
2119 nt = cur_val;
2121 /* source */
2122 do {
2123 get_x_token();
2124 } while (cur_cmd == spacer_cmd);
2125 if (cur_cmd == left_brace_cmd) {
2126 halfword x, source;
2127 back_input();
2128 x = scan_toks(false,how > 1); /* expanded or not */
2129 source = def_ref;
2130 /* action */
2131 if (source != null) {
2132 halfword target = toks(nt);
2133 if (target == null) {
2134 set_toks_register(nt,source,0);
2135 } else {
2136 halfword s = token_link(source);
2137 if (s != null) {
2138 halfword t = token_link(target);
2139 if (t == null) {
2140 /* can this happen ? */
2141 set_token_link(target, s);
2142 } else if (odd(how)) {
2143 /* prepend */
2144 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2145 halfword p = temp_token_head;
2146 halfword q;
2147 set_token_link(p, s); /* s = head, x = tail */
2148 p = x;
2149 while (t != null) {
2150 fast_store_new_token(token_info(t));
2151 t = token_link(t);
2153 set_toks_register(nt,temp_token_head,0);
2154 } else {
2155 set_token_link(x,t);
2156 set_token_link(target,s);
2158 } else {
2159 /* append */
2160 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2161 halfword p = temp_token_head;
2162 halfword q;
2163 set_token_link(p, null);
2164 while (t != null) {
2165 fast_store_new_token(token_info(t));
2166 t = token_link(t);
2168 set_token_link(p,s);
2169 set_toks_register(nt,temp_token_head,0);
2170 } else {
2171 while (token_link(t) != null) {
2172 t = token_link(t);
2174 set_token_link(t,s);
2180 } else {
2181 halfword source, ns;
2182 if (cur_cmd == assign_toks_cmd) {
2183 ns = equiv(cur_cs) - toks_base;
2184 /* check range */
2185 } else {
2186 back_input();
2187 scan_int();
2188 ns = cur_val;
2190 /* action */
2191 source = toks(ns);
2192 if (source != null) {
2193 halfword target = toks(nt);
2194 if (target == null) {
2195 equiv(toks_base+nt) = source;
2196 equiv(toks_base+ns) = null;
2197 } else {
2198 halfword s = token_link(source);
2199 if (s != null) {
2200 halfword t = token_link(target);
2201 if (t == null) {
2202 set_token_link(target, s);
2203 } else if (odd(how)) {
2204 /* prepend */
2205 halfword x = s;
2206 while (token_link(x) != null) {
2207 x = token_link(x);
2209 set_token_link(x,t);
2210 set_token_link(target,s);
2211 } else {
2212 /* append */
2213 while (token_link(t) != null) {
2214 t = token_link(t);
2216 set_token_link(t,s);
2218 equiv(toks_base+ns) = null;
2225 @ This routine, used in the next one, prints the job name, possibly modified by
2226 the |process_jobname| callback.
2229 static void print_job_name(void)
2231 if (job_name) {
2232 char *s, *ss; /* C strings for jobname before and after processing */
2233 int callback_id, lua_retval;
2234 s = (char*)str_string(job_name);
2235 callback_id = callback_defined(process_jobname_callback);
2236 if (callback_id > 0) {
2237 lua_retval = run_callback(callback_id, "S->S", s, &ss);
2238 if ((lua_retval == true) && (ss != NULL))
2239 s = ss;
2241 tprint(s);
2242 } else {
2243 print(job_name);
2247 @ Here is a routine that print the result of a convert command, using the
2248 argument |i|. It returns |false | if it does not know to print the code |c|. The
2249 function exists because lua code and tex code can both call it to convert
2250 something.
2252 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2253 get the next non-blank non-relax non-call token.
2257 int scan_lua_state(void)
2259 int sn = 0;
2260 do {
2261 get_x_token();
2262 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2263 back_input();
2264 if (cur_cmd != left_brace_cmd) {
2265 if (scan_keyword("name")) {
2266 (void) scan_toks(false, true);
2267 sn = def_ref;
2268 } else {
2269 scan_register_num();
2270 if (get_lua_name(cur_val))
2271 sn = (cur_val - 65536);
2274 return sn;
2277 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2278 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2279 to follow `\.{\\string}' and `\.{\\meaning}'.
2281 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2282 pending string in its output. In order to save such a pending string, we have to
2283 create a temporary string that is destroyed immediately after.
2286 #define push_selector { \
2287 old_setting = selector; \
2288 selector = new_string; \
2291 #define pop_selector { \
2292 selector = old_setting; \
2295 static int do_variable_dvi(halfword c)
2297 return 0;
2300 #define do_variable_backend_int(i) \
2301 cur_cmd = assign_int_cmd; \
2302 cur_val = backend_int_base + i; \
2303 cur_tok = token_val(cur_cmd, cur_val); \
2304 back_input();
2306 #define do_variable_backend_dimen(i) \
2307 cur_cmd = assign_dimen_cmd; \
2308 cur_val = backend_dimen_base + i; \
2309 cur_tok = token_val(cur_cmd, cur_val); \
2310 back_input();
2312 #define do_variable_backend_toks(i) \
2313 cur_cmd = assign_toks_cmd; \
2314 cur_val = backend_toks_base + i ; \
2315 cur_tok = token_val(cur_cmd, cur_val); \
2316 back_input();
2318 static int do_variable_pdf(halfword c)
2320 if (scan_keyword("compresslevel")) { do_variable_backend_int(c_pdf_compress_level); }
2321 else if (scan_keyword("decimaldigits")) { do_variable_backend_int(c_pdf_decimal_digits); }
2322 else if (scan_keyword("imageresolution")) { do_variable_backend_int(c_pdf_image_resolution); }
2323 else if (scan_keyword("pkresolution")) { do_variable_backend_int(c_pdf_pk_resolution); }
2324 else if (scan_keyword("uniqueresname")) { do_variable_backend_int(c_pdf_unique_resname); }
2325 else if (scan_keyword("minorversion")) { do_variable_backend_int(c_pdf_minor_version); }
2326 else if (scan_keyword("pagebox")) { do_variable_backend_int(c_pdf_pagebox); }
2327 else if (scan_keyword("inclusionerrorlevel")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2328 else if (scan_keyword("ignoreunknownimages")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2329 else if (scan_keyword("gamma")) { do_variable_backend_int(c_pdf_gamma); }
2330 else if (scan_keyword("imageapplygamma")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2331 else if (scan_keyword("imagegamma")) { do_variable_backend_int(c_pdf_image_gamma); }
2332 else if (scan_keyword("imagehicolor")) { do_variable_backend_int(c_pdf_image_hicolor); }
2333 else if (scan_keyword("imageaddfilename")) { do_variable_backend_int(c_pdf_image_addfilename); }
2334 else if (scan_keyword("objcompresslevel")) { do_variable_backend_int(c_pdf_obj_compress_level); }
2335 else if (scan_keyword("inclusioncopyfonts")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2336 else if (scan_keyword("gentounicode")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2337 else if (scan_keyword("pkfixeddpi")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2338 else if (scan_keyword("suppressoptionalinfo")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2340 else if (scan_keyword("horigin")) { do_variable_backend_dimen(d_pdf_h_origin); }
2341 else if (scan_keyword("vorigin")) { do_variable_backend_dimen(d_pdf_v_origin); }
2342 else if (scan_keyword("threadmargin")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2343 else if (scan_keyword("destmargin")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2344 else if (scan_keyword("linkmargin")) { do_variable_backend_dimen(d_pdf_link_margin); }
2345 else if (scan_keyword("xformmargin")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2347 else if (scan_keyword("pageattr")) { do_variable_backend_toks(t_pdf_page_attr); }
2348 else if (scan_keyword("pageresources")) { do_variable_backend_toks(t_pdf_page_resources); }
2349 else if (scan_keyword("pagesattr")) { do_variable_backend_toks(t_pdf_pages_attr); }
2350 else if (scan_keyword("xformattr")) { do_variable_backend_toks(t_pdf_xform_attr); }
2351 else if (scan_keyword("xformresources")) { do_variable_backend_toks(t_pdf_xform_resources); }
2352 else if (scan_keyword("pkmode")) { do_variable_backend_toks(t_pdf_pk_mode); }
2353 else if (scan_keyword("trailerid")) { do_variable_backend_toks(t_pdf_trailer_id); }
2355 else
2356 return 0;
2357 return 1;
2360 static int do_feedback_dvi(halfword c)
2362 return 0;
2365 /* codes not really needed but cleaner when testing */
2367 #define pdftex_version 40 /* these values will not change any more */
2368 #define pdftex_revision "0" /* these values will not change any more */
2370 static int do_feedback_pdf(halfword c)
2372 int old_setting; /* holds |selector| setting */
2373 int save_scanner_status; /* |scanner_status| upon entry */
2374 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2375 halfword save_warning_index;
2376 boolean bool; /* temp boolean */
2377 str_number s; /* first temp string */
2378 int ff; /* for use with |set_ff| */
2379 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2380 char *str; /* color stack init str */
2382 if (scan_keyword("lastlink")) {
2383 push_selector;
2384 print_int(pdf_last_link);
2385 pop_selector;
2386 } else if (scan_keyword("retval")) {
2387 push_selector;
2388 print_int(pdf_retval);
2389 pop_selector;
2390 } else if (scan_keyword("lastobj")) {
2391 push_selector;
2392 print_int(pdf_last_obj);
2393 pop_selector;
2394 } else if (scan_keyword("lastannot")) {
2395 push_selector;
2396 print_int(pdf_last_annot);
2397 pop_selector;
2398 } else if (scan_keyword("xformname")) {
2399 scan_int();
2400 check_obj_type(static_pdf, obj_type_xform, cur_val);
2401 push_selector;
2402 print_int(obj_info(static_pdf, cur_val));
2403 pop_selector;
2404 } else if (scan_keyword("creationdate")) {
2405 ins_list(string_to_toks(getcreationdate(static_pdf)));
2406 /* no further action */
2407 return 2;
2408 } else if (scan_keyword("fontname")) {
2409 scan_font_ident();
2410 if (cur_val == null_font)
2411 normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2412 pdf_check_vf(cur_val);
2413 if (!font_used(cur_val))
2414 pdf_init_font(static_pdf, cur_val);
2415 push_selector;
2416 set_ff(cur_val);
2417 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2418 pop_selector;
2419 } else if (scan_keyword("fontobjnum")) {
2420 scan_font_ident();
2421 if (cur_val == null_font)
2422 normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2423 pdf_check_vf(cur_val);
2424 if (!font_used(cur_val))
2425 pdf_init_font(static_pdf, cur_val);
2426 push_selector;
2427 set_ff(cur_val);
2428 print_int(pdf_font_num(ff));
2429 pop_selector;
2430 } else if (scan_keyword("fontsize")) {
2431 scan_font_ident();
2432 if (cur_val == null_font)
2433 normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2434 push_selector;
2435 print_scaled(font_size(cur_val));
2436 tprint("pt");
2437 pop_selector;
2438 } else if (scan_keyword("pageref")) {
2439 scan_int();
2440 if (cur_val <= 0)
2441 normal_error("pdf backend", "invalid page number when asking 'pageref'");
2442 push_selector;
2443 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2444 pop_selector;
2445 } else if (scan_keyword("colorstackinit")) {
2446 bool = scan_keyword("page");
2447 if (scan_keyword("direct"))
2448 cur_val = direct_always;
2449 else if (scan_keyword("page"))
2450 cur_val = direct_page;
2451 else
2452 cur_val = set_origin;
2453 save_scanner_status = scanner_status;
2454 save_warning_index = warning_index;
2455 save_def_ref = def_ref;
2456 u = save_cur_string();
2457 scan_toks(false, true);
2458 s = tokens_to_string(def_ref);
2459 delete_token_ref(def_ref);
2460 def_ref = save_def_ref;
2461 warning_index = save_warning_index;
2462 scanner_status = save_scanner_status;
2463 str = makecstring(s);
2464 cur_val = newcolorstack(str, cur_val, bool);
2465 free(str);
2466 flush_str(s);
2467 cur_val_level = int_val_level;
2468 if (cur_val < 0) {
2469 print_err("Too many color stacks");
2470 help2("The number of color stacks is limited to 32768.",
2471 "I'll use the default color stack 0 here.");
2472 error();
2473 cur_val = 0;
2474 restore_cur_string(u);
2476 push_selector;
2477 print_int(cur_val);
2478 pop_selector;
2479 } else if (scan_keyword("version")) {
2480 push_selector;
2481 print_int(pdftex_version);
2482 pop_selector;
2483 } else if (scan_keyword("revision")) {
2484 ins_list(string_to_toks(pdftex_revision));
2485 return 2;
2486 } else {
2487 return 0;
2489 return 1;
2492 void conv_toks(void)
2494 int old_setting; /* holds |selector| setting */
2495 halfword p, q;
2496 int save_scanner_status; /* |scanner_status| upon entry */
2497 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2498 halfword save_warning_index;
2499 boolean bool; /* temp boolean */
2500 str_number s; /* first temp string */
2501 int sn; /* lua chunk name */
2502 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2503 int c = cur_chr; /* desired type of conversion */
2504 str_number str;
2505 int i = 0;
2506 /* Scan the argument for command |c| */
2507 switch (c) {
2508 case number_code:
2509 scan_int();
2510 push_selector;
2511 print_int(cur_val);
2512 pop_selector;
2513 break;
2514 case lua_function_code:
2515 scan_int();
2516 if (cur_val <= 0) {
2517 normal_error("luafunction", "invalid number");
2518 } else {
2519 u = save_cur_string();
2520 luacstrings = 0;
2521 luafunctioncall(cur_val);
2522 restore_cur_string(u);
2523 if (luacstrings > 0)
2524 lua_string_start();
2526 /* no further action */
2527 return;
2528 break;
2529 case lua_code:
2530 u = save_cur_string();
2531 save_scanner_status = scanner_status;
2532 save_def_ref = def_ref;
2533 save_warning_index = warning_index;
2534 sn = scan_lua_state();
2535 scan_toks(false, true);
2536 s = def_ref;
2537 warning_index = save_warning_index;
2538 def_ref = save_def_ref;
2539 scanner_status = save_scanner_status;
2540 luacstrings = 0;
2541 luatokencall(s, sn);
2542 delete_token_ref(s);
2543 restore_cur_string(u); /* TODO: check this, was different */
2544 if (luacstrings > 0)
2545 lua_string_start();
2546 /* no further action */
2547 return;
2548 break;
2549 case expanded_code:
2550 save_scanner_status = scanner_status;
2551 save_warning_index = warning_index;
2552 save_def_ref = def_ref;
2553 u = save_cur_string();
2554 scan_toks(false, true);
2555 warning_index = save_warning_index;
2556 scanner_status = save_scanner_status;
2557 ins_list(token_link(def_ref));
2558 def_ref = save_def_ref;
2559 restore_cur_string(u);
2560 /* no further action */
2561 return;
2562 break;
2563 case math_style_code:
2564 push_selector;
2565 print_math_style();
2566 pop_selector;
2567 break;
2568 case string_code:
2569 save_scanner_status = scanner_status;
2570 scanner_status = normal;
2571 get_token();
2572 scanner_status = save_scanner_status;
2573 push_selector;
2574 if (cur_cs != 0)
2575 sprint_cs(cur_cs);
2576 else
2577 print(cur_chr);
2578 pop_selector;
2579 break;
2580 case cs_string_code:
2581 save_scanner_status = scanner_status;
2582 scanner_status = normal;
2583 get_token();
2584 scanner_status = save_scanner_status;
2585 push_selector;
2586 if (cur_cs != 0)
2587 sprint_cs_name(cur_cs);
2588 else
2589 print(cur_chr);
2590 pop_selector;
2591 break;
2592 case roman_numeral_code:
2593 scan_int();
2594 push_selector;
2595 print_roman_int(cur_val);
2596 pop_selector;
2597 break;
2598 case meaning_code:
2599 save_scanner_status = scanner_status;
2600 scanner_status = normal;
2601 get_token();
2602 scanner_status = save_scanner_status;
2603 push_selector;
2604 print_meaning();
2605 pop_selector;
2606 break;
2607 case uchar_code:
2608 scan_char_num();
2609 push_selector;
2610 print(cur_val);
2611 pop_selector;
2612 break;
2613 case lua_escape_string_code:
2615 lstring escstr;
2616 int l = 0;
2617 save_scanner_status = scanner_status;
2618 save_def_ref = def_ref;
2619 save_warning_index = warning_index;
2620 scan_toks(false, true);
2621 bool = in_lua_escape;
2622 in_lua_escape = true;
2623 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2624 escstr.l = (unsigned) l;
2625 in_lua_escape = bool;
2626 delete_token_ref(def_ref);
2627 def_ref = save_def_ref;
2628 warning_index = save_warning_index;
2629 scanner_status = save_scanner_status;
2630 (void) lua_str_toks(escstr);
2631 ins_list(token_link(temp_token_head));
2632 free(escstr.s);
2633 return;
2635 /* no further action */
2636 break;
2637 case font_id_code:
2638 scan_font_ident();
2639 push_selector;
2640 print_int(cur_val);
2641 pop_selector;
2642 break;
2643 case font_name_code:
2644 scan_font_ident();
2645 push_selector;
2646 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2647 if (font_size(cur_val) != font_dsize(cur_val)) {
2648 tprint(" at ");
2649 print_scaled(font_size(cur_val));
2650 tprint("pt");
2652 pop_selector;
2653 break;
2654 case left_margin_kern_code:
2655 scan_int();
2656 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657 normal_error("marginkern", "a non-empty hbox expected");
2658 push_selector;
2659 p = list_ptr(box(cur_val));
2660 while ((p != null) && (type(p) == glue_node)) {
2661 p = vlink(p);
2663 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2664 print_scaled(width(p));
2665 else
2666 print_char('0');
2667 tprint("pt");
2668 pop_selector;
2669 break;
2670 case right_margin_kern_code:
2671 scan_int();
2672 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2673 normal_error("marginkern", "a non-empty hbox expected");
2674 push_selector;
2675 p = list_ptr(box(cur_val));
2676 if (p != null) {
2677 p = tail_of_list(p);
2679 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2680 node that points to glue spec ... and we don't want to analyze that messy lot
2682 while ((p != null) && (type(p) == glue_node)) {
2683 p = alink(p);
2685 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2686 if (type(p) == disc_node) {
2687 q = alink(p);
2688 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2689 p = q;
2690 } else {
2692 officially we should look in the replace but currently protrusion doesn't
2693 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2694 don't need it now
2700 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2701 print_scaled(width(p));
2702 else
2703 print_char('0');
2704 tprint("pt");
2705 pop_selector;
2706 break;
2707 case uniform_deviate_code:
2708 scan_int();
2709 push_selector;
2710 print_int(unif_rand(cur_val));
2711 pop_selector;
2712 break;
2713 case normal_deviate_code:
2714 scan_int();
2715 push_selector;
2716 print_int(norm_rand());
2717 pop_selector;
2718 break;
2719 case math_char_class_code:
2721 mathcodeval mval;
2722 scan_int();
2723 mval = get_math_code(cur_val);
2724 push_selector;
2725 print_int(mval.class_value);
2726 pop_selector;
2728 break;
2729 case math_char_fam_code:
2731 mathcodeval mval;
2732 scan_int();
2733 mval = get_math_code(cur_val);
2734 push_selector;
2735 print_int(mval.family_value);
2736 pop_selector;
2738 break;
2739 case math_char_slot_code:
2741 mathcodeval mval;
2742 scan_int();
2743 mval = get_math_code(cur_val);
2744 push_selector;
2745 print_int(mval.character_value);
2746 pop_selector;
2748 break;
2749 case insert_ht_code:
2750 scan_register_num();
2751 push_selector;
2752 i = cur_val;
2753 p = page_ins_head;
2754 while (i >= subtype(vlink(p)))
2755 p = vlink(p);
2756 if (subtype(p) == i)
2757 print_scaled(height(p));
2758 else
2759 print_char('0');
2760 tprint("pt");
2761 pop_selector;
2762 break;
2763 case job_name_code:
2764 if (job_name == 0)
2765 open_log_file();
2766 push_selector;
2767 print_job_name();
2768 pop_selector;
2769 break;
2770 case format_name_code:
2771 if (job_name == 0)
2772 open_log_file();
2773 push_selector;
2774 print(format_name);
2775 pop_selector;
2776 break;
2777 case luatex_banner_code:
2778 push_selector;
2779 tprint(luatex_banner);
2780 pop_selector;
2781 break;
2782 case luatex_revision_code:
2783 push_selector;
2784 print(get_luatexrevision());
2785 pop_selector;
2786 break;
2787 case luatex_date_code:
2788 push_selector;
2789 print_int(get_luatex_date_info());
2790 pop_selector;
2791 break;
2792 case etex_code:
2793 push_selector;
2794 tprint(eTeX_version_string);
2795 pop_selector;
2796 break;
2797 case eTeX_revision_code:
2798 push_selector;
2799 tprint(eTeX_revision);
2800 pop_selector;
2801 break;
2802 case font_identifier_code:
2803 confusion("convert");
2804 break;
2805 default:
2806 confusion("convert");
2807 break;
2809 str = make_string();
2810 (void) str_toks(str_lstring(str));
2811 flush_str(str);
2812 ins_list(token_link(temp_token_head));
2815 void do_feedback(void)
2817 int c = cur_chr;
2818 str_number str;
2819 int done = 1;
2820 switch (c) {
2821 case dvi_feedback_code:
2822 if (get_o_mode() == OMODE_DVI) {
2823 done = do_feedback_dvi(c);
2824 } else {
2825 tex_error("unexpected use of \\dvifeedback",null);
2826 return ;
2828 if (done==0) {
2829 /* we recover */
2830 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2831 return;
2832 } else if (done==2) {
2833 return;
2835 break;
2836 case pdf_feedback_code:
2837 if (get_o_mode() == OMODE_PDF) {
2838 done = do_feedback_pdf(c);
2839 } else {
2840 tex_error("unexpected use of \\pdffeedback",null);
2841 return ;
2843 if (done==0) {
2844 /* we recover */
2845 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2846 return;
2847 } else if (done==2) {
2848 return;
2850 break;
2851 default:
2852 confusion("feedback");
2853 break;
2855 str = make_string();
2856 (void) str_toks(str_lstring(str));
2857 flush_str(str);
2858 ins_list(token_link(temp_token_head));
2861 void do_variable(void)
2863 int c = cur_chr;
2864 int done = 1;
2865 switch (c) {
2866 case dvi_variable_code:
2867 done = do_variable_dvi(c);
2868 if (done==0) {
2869 /* we recover */
2870 normal_warning("dvi backend","unexpected use of \\dvivariable");
2872 return;
2873 break;
2874 case pdf_variable_code:
2875 done = do_variable_pdf(c);
2876 if (done==0) {
2877 /* we recover */
2878 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2880 return;
2881 break;
2882 default:
2883 confusion("variable");
2884 break;
2889 The following code is not used as we can only set math options and not query them. If
2890 an option is really important we will provide a proper variable. Most options are not
2891 meant for users anyway but for development.
2896 #define do_mathoption_int(i) \
2897 cur_cmd = assign_int_cmd; \
2898 cur_val = mathoption_int_base + i; \
2899 cur_tok = token_val(cur_cmd, cur_val); \
2900 back_input();
2902 void do_mathoption(void)
2904 if (scan_keyword("old")) { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2905 if (scan_keyword("noitaliccompensation")) { do_mathoption_int(c_mathoption_no_char_italic_code); }
2906 else if (scan_keyword("nocharitalic")) { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2907 else if (scan_keyword("useoldfractionscaling")) { do_mathoption_int(c_mathoption_old_code); }
2908 else if (scan_keyword("umathcodemeaning")) { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2913 @ This boolean is keeping track of the lua string escape state
2915 boolean in_lua_escape;
2917 static int the_convert_string_dvi(halfword c, int i)
2919 return 0 ;
2922 static int the_convert_string_pdf(halfword c, int i)
2924 int ff;
2925 if (get_o_mode() != OMODE_PDF) {
2926 return 0;
2927 } else if (scan_keyword("lastlink")) {
2928 print_int(pdf_last_link);
2929 } else if (scan_keyword("retval")) {
2930 print_int(pdf_retval);
2931 } else if (scan_keyword("lastobj")) {
2932 print_int(pdf_last_obj);
2933 } else if (scan_keyword("lastannot")) {
2934 print_int(pdf_last_annot);
2935 } else if (scan_keyword("xformname")) {
2936 print_int(obj_info(static_pdf, i));
2937 } else if (scan_keyword("creationdate")) {
2938 return 0;
2939 } else if (scan_keyword("fontname")) {
2940 set_ff(i);
2941 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2942 } else if (scan_keyword("fontobjnum")) {
2943 set_ff(i);
2944 print_int(pdf_font_num(ff));
2945 } else if (scan_keyword("fontsize")) {
2946 print_scaled(font_size(i));
2947 tprint("pt");
2948 } else if (scan_keyword("pageref")) {
2949 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2950 } else if (scan_keyword("colorstackinit")) {
2951 return 0;
2952 } else {
2953 return 0;
2955 return 1;
2958 str_number the_convert_string(halfword c, int i)
2960 int old_setting; /* saved |selector| setting */
2961 str_number ret = 0;
2962 boolean done = true ;
2963 old_setting = selector;
2964 selector = new_string;
2965 switch (c) {
2966 case number_code:
2967 print_int(i);
2968 break;
2969 /* case lua_function_code: */
2970 /* case lua_code: */
2971 /* case expanded_code: */
2972 case math_style_code:
2973 print_math_style();
2974 break;
2975 /* case string_code: */
2976 /* case cs_string_code: */
2977 case roman_numeral_code:
2978 print_roman_int(i);
2979 break;
2980 /* case meaning_code: */
2981 case uchar_code:
2982 print(i);
2983 break;
2984 /* lua_escape_string_code: */
2985 case font_id_code:
2986 print_int(i);
2987 break;
2988 case font_name_code:
2989 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2990 if (font_size(i) != font_dsize(i)) {
2991 tprint(" at ");
2992 print_scaled(font_size(i));
2993 tprint("pt");
2995 break;
2996 /* left_margin_kern_code: */
2997 /* right_margin_kern_code: */
2998 case uniform_deviate_code:
2999 print_int(unif_rand(i));
3000 break;
3001 case normal_deviate_code:
3002 print_int(norm_rand());
3003 break;
3004 /* math_char_class_code: */
3005 /* math_char_fam_code: */
3006 /* math_char_slot_code: */
3007 /* insert_ht_code: */
3008 case job_name_code:
3009 print_job_name();
3010 break;
3011 case format_name_code:
3012 print(format_name);
3013 break;
3014 case luatex_banner_code:
3015 tprint(luatex_banner);
3016 break;
3017 case luatex_revision_code:
3018 print(get_luatexrevision());
3019 break;
3020 case luatex_date_code:
3021 print_int(get_luatex_date_info());
3022 break;
3023 case etex_code:
3024 tprint(eTeX_version_string);
3025 break;
3026 case eTeX_revision_code:
3027 tprint(eTeX_revision);
3028 break;
3029 case font_identifier_code:
3030 print_font_identifier(i);
3031 break;
3032 /* backend: this might become obsolete */
3033 case dvi_feedback_code:
3034 done = the_convert_string_dvi(c,i);
3035 break;
3036 case pdf_feedback_code:
3037 done = the_convert_string_pdf(c,i);
3038 break;
3039 /* done */
3040 default:
3041 done = false;
3042 break;
3044 if (done) {
3045 ret = make_string();
3047 selector = old_setting;
3048 return ret;
3051 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3052 files potentially usable for reading appear in the following global variables.
3053 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3054 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3055 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3056 line.
3059 FILE *read_file[16]; /* used for \.{\\read} */
3060 int read_open[17]; /* state of |read_file[n]| */
3062 void initialize_read(void)
3064 int k;
3065 for (k = 0; k <= 16; k++)
3066 read_open[k] = closed;
3069 @ The |read_toks| procedure constructs a token list like that for any macro
3070 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3071 sequence that will receive this token list.
3074 void read_toks(int n, halfword r, halfword j)
3076 halfword p; /* tail of the token list */
3077 halfword q; /* new node being added to the token list via |store_new_token| */
3078 int s; /* saved value of |align_state| */
3079 int m; /* stream number */
3080 scanner_status = defining;
3081 warning_index = r;
3082 p = get_avail();
3083 def_ref = p;
3084 set_token_ref_count(def_ref, 0);
3085 p = def_ref; /* the reference count */
3086 store_new_token(end_match_token);
3087 if ((n < 0) || (n > 15))
3088 m = 16;
3089 else
3090 m = n;
3091 s = align_state;
3092 align_state = 1000000; /* disable tab marks, etc. */
3093 do {
3094 /* Input and store tokens from the next line of the file */
3095 begin_file_reading();
3096 iname = m + 1;
3097 if (read_open[m] == closed) {
3099 Input for \.{\\read} from the terminal
3101 Here we input on-line into the |buffer| array, prompting the user explicitly
3102 if |n>=0|. The value of |n| is set negative so that additional prompts
3103 will not be given in the case of multi-line input.
3105 if (interaction > nonstop_mode) {
3106 if (n < 0) {
3107 prompt_input("");
3108 } else {
3109 wake_up_terminal();
3110 print_ln();
3111 sprint_cs(r);
3112 prompt_input(" =");
3113 n = -1;
3115 } else {
3116 fatal_error
3117 ("*** (cannot \\read from terminal in nonstop modes)");
3120 } else if (read_open[m] == just_open) {
3122 Input the first line of |read_file[m]|
3124 The first line of a file must be treated specially, since |lua_input_ln|
3125 must be told not to start with |get|.
3127 if (lua_input_ln(read_file[m], (m + 1), false)) {
3128 read_open[m] = normal;
3129 } else {
3130 lua_a_close_in(read_file[m], (m + 1));
3131 read_open[m] = closed;
3134 } else {
3136 Input the next line of |read_file[m]|
3138 An empty line is appended at the end of a |read_file|.
3140 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3141 lua_a_close_in(read_file[m], (m + 1));
3142 read_open[m] = closed;
3143 if (align_state != 1000000) {
3144 runaway();
3145 print_err("File ended within \\read");
3146 help1("This \\read has unbalanced braces.");
3147 align_state = 1000000;
3148 error();
3153 ilimit = last;
3154 if (end_line_char_inactive)
3155 decr(ilimit);
3156 else
3157 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3158 first = ilimit + 1;
3159 iloc = istart;
3160 istate = new_line;
3161 /* Handle \.{\\readline} and |goto done|; */
3162 if (j == 1) {
3163 while (iloc <= ilimit) {
3164 /* current line not yet finished */
3165 do_buffer_to_unichar(cur_chr, iloc);
3166 if (cur_chr == ' ')
3167 cur_tok = space_token;
3168 else
3169 cur_tok = cur_chr + other_token;
3170 store_new_token(cur_tok);
3172 } else {
3173 while (1) {
3174 get_token();
3175 if (cur_tok == 0) {
3176 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3177 break;
3179 if (align_state < 1000000) {
3180 /* unmatched `\.\}' aborts the line */
3181 do {
3182 get_token();
3183 } while (cur_tok != 0);
3184 align_state = 1000000;
3185 break;
3187 store_new_token(cur_tok);
3190 end_file_reading();
3192 } while (align_state != 1000000);
3193 cur_val = def_ref;
3194 scanner_status = normal;
3195 align_state = s;
3198 @ return a string from tokens list
3201 str_number tokens_to_string(halfword p)
3203 int old_setting;
3204 if (selector == new_string)
3205 normal_error("tokens","tokens_to_string() called while selector = new_string");
3206 old_setting = selector;
3207 selector = new_string;
3208 show_token_list(token_link(p), null, -1);
3209 selector = old_setting;
3210 return make_string();
3213 @ @c
3214 #define make_room(a) \
3215 if ((unsigned)i+a+1>alloci) { \
3216 ret = xrealloc(ret,(alloci+64)); \
3217 alloci = alloci + 64; \
3220 #define append_i_byte(a) ret[i++] = (char)(a)
3222 #define Print_char(a) make_room(1); append_i_byte(a)
3224 #define Print_uchar(s) { \
3225 make_room(4); \
3226 if (s<=0x7F) { \
3227 append_i_byte(s); \
3228 } else if (s<=0x7FF) { \
3229 append_i_byte(0xC0 + (s / 0x40)); \
3230 append_i_byte(0x80 + (s % 0x40)); \
3231 } else if (s<=0xFFFF) { \
3232 append_i_byte(0xE0 + (s / 0x1000)); \
3233 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3234 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3235 } else if (s>=0x110000) { \
3236 append_i_byte(s-0x11000); \
3237 } else { \
3238 append_i_byte(0xF0 + (s / 0x40000)); \
3239 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3240 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3241 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3244 #define Print_esc(b) { \
3245 const char *v = b; \
3246 if (e>0 && e<STRING_OFFSET) { \
3247 Print_uchar (e); \
3249 make_room(strlen(v)); \
3250 while (*v) { append_i_byte(*v); v++; } \
3253 #define Print_str(b) { \
3254 const char *v = b; \
3255 make_room(strlen(v)); \
3256 while (*v) { append_i_byte(*v); v++; } \
3259 #define is_cat_letter(a) \
3260 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3262 @ the actual token conversion in this function is now functionally equivalent to
3263 |show_token_list|, except that it always prints the whole token list. TODO: check
3264 whether this causes problems in the lua library.
3267 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3269 register int p, c, m;
3270 int q;
3271 int infop;
3272 char *s, *sh;
3273 int e = 0;
3274 char *ret;
3275 int match_chr = '#';
3276 int n = '0';
3277 unsigned alloci = 1024;
3278 int i = 0;
3279 p = pp;
3280 if (p == null) {
3281 if (siz != NULL)
3282 *siz = 0;
3283 return NULL;
3285 ret = xmalloc(alloci);
3286 p = token_link(p); /* skip refcount */
3287 if (p != null) {
3288 e = int_par(escape_char_code);
3290 while (p != null) {
3291 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3292 Print_esc("CLOBBERED.");
3293 break;
3295 infop = token_info(p);
3296 if (infop >= cs_token_flag) {
3297 if (!(inhibit_par && infop == par_token)) {
3298 q = infop - cs_token_flag;
3299 if (q < hash_base) {
3300 if (q == null_cs) {
3301 Print_esc("csname");
3302 Print_esc("endcsname");
3303 } else {
3304 Print_esc("IMPOSSIBLE.");
3306 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3307 Print_esc("IMPOSSIBLE.");
3308 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3309 Print_esc("NONEXISTENT.");
3310 } else {
3311 str_number txt = cs_text(q);
3312 sh = makecstring(txt);
3313 s = sh;
3314 if (is_active_cs(txt)) {
3315 s = s + 3;
3316 while (*s) {
3317 Print_char(*s);
3318 s++;
3320 } else {
3321 if (e>=0 && e<0x110000) Print_uchar(e);
3322 while (*s) {
3323 Print_char(*s);
3324 s++;
3326 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3327 Print_char(' ');
3330 free(sh);
3333 } else {
3334 if (infop < 0) {
3335 Print_esc("BAD");
3336 } else {
3337 m = token_cmd(infop);
3338 c = token_chr(infop);
3339 switch (m) {
3340 case left_brace_cmd:
3341 case right_brace_cmd:
3342 case math_shift_cmd:
3343 case tab_mark_cmd:
3344 case sup_mark_cmd:
3345 case sub_mark_cmd:
3346 case spacer_cmd:
3347 case letter_cmd:
3348 case other_char_cmd:
3349 Print_uchar(c);
3350 break;
3351 case mac_param_cmd:
3352 if (!in_lua_escape && (is_in_csname==0))
3353 Print_uchar(c);
3354 Print_uchar(c);
3355 break;
3356 case out_param_cmd:
3357 Print_uchar(match_chr);
3358 if (c <= 9) {
3359 Print_char(c + '0');
3360 } else {
3361 Print_char('!');
3362 goto EXIT;
3364 break;
3365 case match_cmd:
3366 match_chr = c;
3367 Print_uchar(c);
3368 n++;
3369 Print_char(n);
3370 if (n > '9')
3371 goto EXIT;
3372 break;
3373 case end_match_cmd:
3374 if (c == 0) {
3375 Print_char('-');
3376 Print_char('>');
3378 break;
3379 default:
3380 not_so_bad(Print_esc);
3381 break;
3385 p = token_link(p);
3387 EXIT:
3388 ret[i] = '\0';
3389 if (siz != NULL)
3390 *siz = i;
3391 return ret;
3394 @ @c
3395 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3397 int siz;
3398 lstring *ret = xmalloc(sizeof(lstring));
3399 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3400 ret->l = (size_t) siz;
3401 return ret;
3404 @ @c
3405 void free_lstring(lstring * ls)
3407 if (ls == NULL)
3408 return;
3409 if (ls->s != NULL)
3410 free(ls->s);
3411 free(ls);