beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / tex / textoken.w
blobf8fb8b467a0a3444cddebe2856cc4d712eb3deb3
1 % textoken.w
3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
22 #include "ptexlib.h"
24 @ @c
25 #define pausing int_par(pausing_code)
26 #define cat_code_table int_par(cat_code_table_code)
27 #define tracing_nesting int_par(tracing_nesting_code)
28 #define suppress_outer_error int_par(suppress_outer_error_code)
29 #define suppress_mathpar_error int_par(suppress_mathpar_error_code)
32 #define every_eof equiv(every_eof_loc)
33 #define box(A) equiv(box_base+(A))
34 #define toks(A) equiv(toks_base+(A))
36 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
38 #define do_get_cat_code(a,b) do { \
39 if (line_catcode_table!=DEFAULT_CAT_TABLE) \
40 a=get_cat_code(line_catcode_table,b); \
41 else \
42 a=get_cat_code(cat_code_table,b); \
43 } while (0)
46 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
47 readily be transported into environments that do not have automatic facilities
48 for strings, garbage collection, etc., and so that it can be in control of what
49 error messages the user receives. The dynamic storage requirements of \TeX\ are
50 handled by providing two large arrays called |fixmem| and |varmem| in which
51 consecutive blocks of words are used as nodes by the \TeX\ routines.
53 Pointer variables are indices into this array, or into another array called
54 |eqtb| that will be explained later. A pointer variable might also be a special
55 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
56 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
57 does not assume that |mem[null]| exists.
59 @ Locations in |fixmem| are used for storing one-word records; a conventional
60 \.{AVAIL} stack is used for allocation in this array.
63 smemory_word *fixmem; /* the big dynamic storage area */
64 unsigned fix_mem_min; /* the smallest location of one-word memory in use */
65 unsigned fix_mem_max; /* the largest location of one-word memory in use */
67 @ In order to study the memory requirements of particular applications, it is
68 possible to prepare a version of \TeX\ that keeps track of current and maximum
69 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
70 commented out, \TeX\ will run a bit slower but it will report these statistics
71 when |tracing_stats| is sufficiently large.
74 int var_used, dyn_used; /* how much memory is in use */
76 halfword avail; /* head of the list of available one-word nodes */
77 unsigned fix_mem_end; /* the last one-word node used in |mem| */
79 halfword garbage; /* head of a junk list, write only */
80 halfword temp_token_head; /* head of a temporary list of some kind */
81 halfword hold_token_head; /* head of a temporary list of another kind */
82 halfword omit_template; /* a constant token list */
83 halfword null_list; /* permanently empty list */
84 halfword backup_head; /* head of token list built by |scan_keyword| */
86 @ @c
87 void initialize_tokens(void)
89 halfword p;
90 avail = null;
91 fix_mem_end = 0;
92 p = get_avail();
93 temp_token_head = p;
94 set_token_info(temp_token_head, 0);
95 p = get_avail();
96 hold_token_head = p;
97 set_token_info(hold_token_head, 0);
98 p = get_avail();
99 omit_template = p;
100 set_token_info(omit_template, 0);
101 p = get_avail();
102 null_list = p;
103 set_token_info(null_list, 0);
104 p = get_avail();
105 backup_head = p;
106 set_token_info(backup_head, 0);
107 p = get_avail();
108 garbage = p;
109 set_token_info(garbage, 0);
110 dyn_used = 0; /* initialize statistics */
113 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
114 field is null. However, \TeX\ will halt if there is no more room left.
115 @^inner loop@>
117 If the available-space list is empty, i.e., if |avail=null|, we try first to
118 increase |fix_mem_end|. If that cannot be done, i.e., if
119 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
120 work, we have to quit.
123 halfword get_avail(void)
124 { /* single-word node allocation */
125 unsigned p; /* the new node being got */
126 unsigned t;
127 p = (unsigned) avail; /* get top location in the |avail| stack */
128 if (p != null) {
129 avail = token_link(avail); /* and pop it off */
130 } else if (fix_mem_end < fix_mem_max) { /* or go into virgin territory */
131 incr(fix_mem_end);
132 p = fix_mem_end;
133 } else {
134 smemory_word *new_fixmem; /* the big dynamic storage area */
135 t = (fix_mem_max / 5);
136 new_fixmem =
137 fixmemcast(realloc
138 (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
139 if (new_fixmem == NULL) {
140 runaway(); /* if memory is exhausted, display possible runaway text */
141 overflow("token memory size", fix_mem_max);
142 } else {
143 fixmem = new_fixmem;
145 memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
146 fix_mem_max += t;
147 p = ++fix_mem_end;
149 token_link(p) = null; /* provide an oft-desired initialization of the new node */
150 incr(dyn_used); /* maintain statistics */
151 return (halfword) p;
154 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
155 that starts at position |p|.
156 @^inner loop@>
159 void flush_list(halfword p)
160 { /* makes list of single-word nodes available */
161 halfword q, r; /* list traversers */
162 if (p != null) {
163 r = p;
164 do {
165 q = r;
166 r = token_link(r);
167 decr(dyn_used);
168 } while (r != null); /* now |q| is the last node on the list */
169 token_link(q) = avail;
170 avail = p;
174 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
175 represented internally in one of two ways: (1)~A character whose ASCII code
176 number is |c| and whose command code is |m| is represented as the number
177 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
178 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
179 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
180 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
181 halfword.
183 A token |t| represents a |left_brace| command if and only if
184 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
185 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
186 |end_match| command if and only if |match_token<=t<=end_match_token|. The
187 following definitions take care of these token-oriented constants and a few
188 others.
190 @ A token list is a singly linked list of one-word nodes in |mem|, where each
191 word contains a token and a link. Macro definitions, output-routine definitions,
192 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
193 form of token lists, usually preceded by a node with a reference count in its
194 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
196 Three special commands appear in the token lists of macro definitions. When
197 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
198 when |m=end_match|, it means that parameter matching should end and \TeX\ should
199 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
200 insert parameter number |c| into the text at this point.
202 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
203 are omitted, but the final right brace of an output routine is included at the
204 end of its token list.
206 Here is an example macro definition that illustrates these conventions. After
207 \TeX\ processes the text
209 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
211 the definition of \.{\\mac} is represented as a token list containing
213 $$\def\,{\hskip2pt}
214 \vbox{\halign{\hfil#\hfil\cr
215 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
216 \.{\\b}, |end_match|,\cr
217 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
218 |other_char|\,\.1,\cr
219 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
221 The procedure |scan_toks| builds such token lists, and |macro_call| does the
222 parameter matching. @^reference counts@>
224 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
225 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
226 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
227 entry before the token list has been fully consumed, so we dare not simply
228 destroy a token list when its control sequence is being redefined.
230 If the parameter-matching part of a definition ends with `\.{\#\{}', the
231 corresponding token list will have `\.\{' just before the `|end_match|' and also
232 at the very end. The first `\.\{' is used to delimit the parameter; the second
233 one keeps the first from disappearing.
235 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
236 including the expansion of a macro or mark.
239 void print_meaning(void)
241 /* remap \mathchar onto \Umathchar */
242 if (cur_cmd == math_given_cmd) {
243 cur_cmd = xmath_given_cmd ;
244 } /* else if (cur_cmd == math_char_num_cmd) {
245 if (cur_chr == 0) {
246 cur_chr = 1 ;
248 } */
249 print_cmd_chr((quarterword) cur_cmd, cur_chr);
250 if (cur_cmd >= call_cmd) {
251 print_char(':');
252 print_ln();
253 token_show(cur_chr);
254 } else {
255 /* Show the meaning of a mark node */
256 if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
257 print_char(':');
258 print_ln();
259 switch (cur_chr) {
260 case first_mark_code:
261 token_show(first_mark(0));
262 break;
263 case bot_mark_code:
264 token_show(bot_mark(0));
265 break;
266 case split_first_mark_code:
267 token_show(split_first_mark(0));
268 break;
269 case split_bot_mark_code:
270 token_show(split_bot_mark(0));
271 break;
272 default:
273 token_show(top_mark(0));
274 break;
280 @ The procedure |show_token_list|, which prints a symbolic form of the token list
281 that starts at a given node |p|, illustrates these conventions. The token list
282 being displayed should not begin with a reference count. However, the procedure
283 is intended to be robust, so that if the memory links are awry or if |p| is not
284 really a pointer to a token list, nothing catastrophic will happen.
286 An additional parameter |q| is also given; this parameter is either null or it
287 points to a node in the token list where a certain magic computation takes place
288 that will be explained later. (Basically, |q| is non-null when we are printing
289 the two-line context information at the time of an error message; |q| marks the
290 place corresponding to where the second line should begin.)
292 For example, if |p| points to the node containing the first \.a in the token list
293 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
294 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
295 second \.a, the magic computation will be performed just before the second \.a is
296 printed.
298 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
300 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
301 this cannot be confused with actual control sequences because a real control
302 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
305 #define not_so_bad(p) \
306 switch (m) { \
307 case assign_int_cmd: \
308 if (c >= (backend_int_base) && c <= (backend_int_last)) \
309 p("[internal backend integer]"); \
310 break; \
311 case assign_dimen_cmd: \
312 if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
313 p("[internal backend dimension]"); \
314 break; \
315 case assign_toks_cmd: \
316 if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
317 p("[internal backend tokenlist]"); \
318 break; \
319 default: \
320 p("BAD"); \
321 break; \
324 void show_token_list(int p, int q, int l)
326 int m, c; /* pieces of a token */
327 ASCII_code match_chr = '#'; /* character used in a `|match|' */
328 ASCII_code n = '0'; /* the highest parameter number, as an ASCII digit */
329 tally = 0;
330 if (l < 0)
331 l = 0x3FFFFFFF;
332 while ((p != null) && (tally < l)) {
333 if (p == q) {
334 /* Do magic computation */
335 set_trick_count();
337 /* Display token |p|, and |return| if there are problems */
338 if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
339 tprint_esc("CLOBBERED.");
340 return;
342 if (token_info(p) >= cs_token_flag) {
343 if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
344 print_cs(token_info(p) - cs_token_flag);
345 } else {
346 m = token_cmd(token_info(p));
347 c = token_chr(token_info(p));
348 if (token_info(p) < 0) {
349 tprint_esc("BAD");
350 } else {
352 Display the token $(|m|,|c|)$
354 The procedure usually ``learns'' the character code used for macro
355 parameters by seeing one in a |match| command before it runs into any
356 |out_param| commands.
358 switch (m) {
359 case left_brace_cmd:
360 case right_brace_cmd:
361 case math_shift_cmd:
362 case tab_mark_cmd:
363 case sup_mark_cmd:
364 case sub_mark_cmd:
365 case spacer_cmd:
366 case letter_cmd:
367 case other_char_cmd:
368 print(c);
369 break;
370 case mac_param_cmd:
371 if (!in_lua_escape && (is_in_csname==0))
372 print(c);
373 print(c);
374 break;
375 case out_param_cmd:
376 print(match_chr);
377 if (c <= 9) {
378 print_char(c + '0');
379 } else {
380 print_char('!');
381 return;
383 break;
384 case match_cmd:
385 match_chr = c;
386 print(c);
387 incr(n);
388 print_char(n);
389 if (n > '9')
390 return;
391 break;
392 case end_match_cmd:
393 if (c == 0)
394 tprint("->");
395 break;
396 default:
397 not_so_bad(tprint);
398 break;
402 p = token_link(p);
404 if (p != null)
405 tprint_esc("ETC.");
408 @ @c
409 #define do_buffer_to_unichar(a,b) do { \
410 a = (halfword)str2uni(buffer+b); \
411 b += utf8_size(a); \
412 } while (0)
414 @ Here's the way we sometimes want to display a token list, given a pointer to
415 its reference count; the pointer may be null.
418 void token_show(halfword p)
420 if (p != null)
421 show_token_list(token_link(p), null, 10000000);
424 @ |delete_token_ref|, is called when a pointer to a token list's reference count
425 is being removed. This means that the token list should disappear if the
426 reference count was |null|, otherwise the count should be decreased by one.
427 @^reference counts@>
429 @ |p| points to the reference count of a token list that is losing one
430 reference.
433 void delete_token_ref(halfword p)
435 if (token_ref_count(p) == 0)
436 flush_list(p);
437 else
438 decr(token_ref_count(p));
441 @ @c
442 int get_char_cat_code(int curchr)
444 int a;
445 do_get_cat_code(a,curchr);
446 return a;
449 @ @c
450 static void invalid_character_error(void)
452 const char *hlp[] = {
453 "A funny symbol that I can't read has just been input.",
454 "Continue, and I'll forget that it ever happened.",
455 NULL
457 deletions_allowed = false;
458 tex_error("Text line contains an invalid character", hlp);
459 deletions_allowed = true;
462 @ @c
463 static boolean process_sup_mark(void); /* below */
465 static int scan_control_sequence(void); /* below */
467 typedef enum {
468 next_line_ok,
469 next_line_return,
470 next_line_restart
471 } next_line_retval;
473 static next_line_retval next_line(void); /* below */
475 @ In case you are getting bored, here is a slightly less trivial routine: Given a
476 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
477 |scan_keyword| routine checks to see whether the next tokens of input match this
478 string. The match must be exact, except that uppercase letters will match their
479 lowercase counterparts; uppercase equivalents are determined by subtracting
480 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
481 only for its own limited set of keywords.
483 If a match is found, the characters are effectively removed from the input and
484 |true| is returned. Otherwise |false| is returned, and the input is left
485 essentially unchanged (except for the fact that some macros may have been
486 expanded, etc.). @^inner loop@>
489 boolean scan_keyword(const char *s)
490 { /* look for a given string */
491 halfword p; /* tail of the backup list */
492 halfword q; /* new node being added to the token list via |store_new_token| */
493 const char *k; /* index into |str_pool| */
494 halfword save_cur_cs = cur_cs;
495 int saved_align_state = align_state;
496 if (strlen(s) == 0) /* was assert (strlen(s) > 1); */
497 return false ; /* but not with newtokenlib zero keyword simply doesn't match */
498 p = backup_head;
499 token_link(p) = null;
500 k = s;
501 while (*k) {
502 get_x_token(); /* recursion is possible here */
503 if ((cur_cs == 0) &&
504 ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
505 store_new_token(cur_tok);
506 k++;
507 } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
508 if (p != backup_head) {
509 q = get_avail();
510 token_info(q) = cur_tok;
511 token_link(q) = null;
512 token_link(p) = q;
513 begin_token_list(token_link(backup_head), backed_up);
514 if (cur_cmd != endv_cmd)
515 align_state = saved_align_state;
516 } else {
517 back_input();
519 cur_cs = save_cur_cs;
520 return false;
523 if (token_link(backup_head) != null)
524 flush_list(token_link(backup_head));
525 cur_cs = save_cur_cs;
526 if (cur_cmd != endv_cmd)
527 align_state = saved_align_state;
528 return true;
531 @ We can not return |undefined_control_sequence| under some conditions
532 (inside |shift_case|, for example). This needs thinking.
537 halfword active_to_cs(int curchr, int force)
539 halfword curcs;
540 char *a, *b;
541 char *utfbytes = xmalloc(8);
542 int nncs = no_new_control_sequence;
543 a = (char *) uni2str(0xFFFF);
544 utfbytes = strcpy(utfbytes, a);
545 if (force)
546 no_new_control_sequence = false;
547 if (curchr > 0) {
548 b = (char *) uni2str((unsigned) curchr);
549 utfbytes = strcat(utfbytes, b);
550 free(b);
551 curcs = string_lookup(utfbytes, strlen(utfbytes));
552 } else {
553 utfbytes[3] = '\0';
554 curcs = string_lookup(utfbytes, 4);
556 no_new_control_sequence = nncs;
557 free(a);
558 free(utfbytes);
559 return curcs;
563 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
565 halfword active_to_cs(int curchr, int force)
567 halfword curcs;
568 int nncs = no_new_control_sequence;
569 if (force) {
570 no_new_control_sequence = false;
572 if (curchr > 0) {
573 char *b = (char *) uni2str((unsigned) curchr);
574 char *utfbytes = xmalloc(8);
575 utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
576 utfbytes = strcat(utfbytes, b);
577 free(b);
578 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
579 free(utfbytes);
580 } else {
581 curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
583 no_new_control_sequence = nncs;
584 return curcs;
589 static unsigned char *uni2csstr(unsigned unic)
591 unsigned char *buf = xmalloc(8);
592 unsigned char *pt = buf;
593 *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
594 if (unic < 0x80)
595 *pt++ = (unsigned char) unic;
596 else if (unic < 0x800) {
597 *pt++ = (unsigned char) (0xc0 | (unic >> 6));
598 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
599 } else if (unic >= 0x110000) {
600 *pt++ = (unsigned char) (unic - 0x110000);
601 } else if (unic < 0x10000) {
602 *pt++ = (unsigned char) (0xe0 | (unic >> 12));
603 *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
604 *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
605 } else {
606 int u, z, y, x;
607 unsigned val = unic - 0x10000;
608 u = (int) (((val & 0xf0000) >> 16) + 1);
609 z = (int) ((val & 0x0f000) >> 12);
610 y = (int) ((val & 0x00fc0) >> 6);
611 x = (int) (val & 0x0003f);
612 *pt++ = (unsigned char) (0xf0 | (u >> 2));
613 *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
614 *pt++ = (unsigned char) (0x80 | y);
615 *pt++ = (unsigned char) (0x80 | x);
617 *pt = '\0';
618 return buf;
621 halfword active_to_cs(int curchr, int force)
623 halfword curcs;
624 int nncs = no_new_control_sequence;
625 if (force) {
626 no_new_control_sequence = false;
628 if (curchr > 0) {
629 char * utfbytes = (char *) uni2csstr((unsigned) curchr);
630 curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
631 free(utfbytes);
632 } else {
633 curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
635 no_new_control_sequence = nncs;
636 return curcs;
641 @ TODO this function should listen to \.{\\escapechar}
643 @ prints a control sequence
646 static char *cs_to_string(halfword p)
648 const char *s;
649 char *sh;
650 int k = 0;
651 static char ret[256] = { 0 };
652 if (p == 0 || p == null_cs) {
653 ret[k++] = '\\';
654 s = "csname";
655 while (*s) {
656 ret[k++] = *s++;
658 ret[k++] = '\\';
659 s = "endcsname";
660 while (*s) {
661 ret[k++] = *s++;
663 ret[k] = 0;
665 } else {
666 str_number txt = cs_text(p);
667 sh = makecstring(txt);
668 s = sh;
669 if (is_active_cs(txt)) {
670 s = s + 3;
671 while (*s) {
672 ret[k++] = *s++;
674 ret[k] = 0;
675 } else {
676 ret[k++] = '\\';
677 while (*s) {
678 ret[k++] = *s++;
680 ret[k] = 0;
682 free(sh);
684 return (char *) ret;
687 @ TODO this is a quick hack, will be solved differently soon
690 static char *cmd_chr_to_string(int cmd, int chr)
692 char *s;
693 str_number str;
694 int sel = selector;
695 selector = new_string;
696 print_cmd_chr((quarterword) cmd, chr);
697 str = make_string();
698 s = makecstring(str);
699 selector = sel;
700 flush_str(str);
701 return s;
704 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
705 develop in the next few sections of the program. Perhaps we shouldn't actually
706 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
707 reading the source files and gobbling them up. And it also helps \TeX\ to
708 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
710 The main duty of |get_next| is to input one token and to set |cur_cmd| and
711 |cur_chr| to that token's command code and modifier. Furthermore, if the input
712 token is a control sequence, the |eqtb| location of that control sequence is
713 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
715 Underlying this simple description is a certain amount of complexity because of
716 all the cases that need to be handled. However, the inner loop of |get_next| is
717 reasonably short and fast.
719 When |get_next| is asked to get the next token of a \.{\\read} line,
720 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
721 appear on that line. (There might not be any tokens at all, if the
722 |end_line_char| has |ignore| as its catcode.)
724 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
725 needed because a blank line of input is supposed to be exactly equivalent to the
726 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
727 line.
730 halfword par_loc; /* location of `\.{\\par}' in |eqtb| */
731 halfword par_token; /* token representing `\.{\\par}' */
733 @ Parts |get_next| are executed more often than any other instructions of \TeX.
734 @^mastication@>@^inner loop@>
736 The global variable |force_eof| is normally |false|; it is set |true| by an
737 \.{\\endinput} command. |luacstrings| is the number of lua print statements
738 waiting to be input, it is changed by |luatokencall|.
741 boolean force_eof; /* should the next \.{\\input} be aborted early? */
742 int luacstrings; /* how many lua strings are waiting to be input? */
744 @ If the user has set the |pausing| parameter to some positive value, and if
745 nonstop mode has not been selected, each line of input is displayed on the
746 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
747 response. If the response is simply |carriage_return|, the line is accepted as it
748 stands, otherwise the line typed is used instead of the line in the file.
751 void firm_up_the_line(void)
753 int k; /* an index into |buffer| */
754 ilimit = last;
755 if (pausing > 0) {
756 if (interaction > nonstop_mode) {
757 wake_up_terminal();
758 print_ln();
759 if (istart < ilimit) {
760 for (k = istart; k <= ilimit - 1; k++)
761 print_char(buffer[k]);
763 first = ilimit;
764 prompt_input("=>"); /* wait for user response */
765 if (last > first) {
766 for (k = first; k < +last - 1; k++) /* move line down in buffer */
767 buffer[k + istart - first] = buffer[k];
768 ilimit = istart + last - first;
774 @ Before getting into |get_next|, let's consider the subroutine that is called
775 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
776 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
777 the end of a file.
780 void check_outer_validity(void)
782 halfword p; /* points to inserted token list */
783 halfword q; /* auxiliary pointer */
784 if (suppress_outer_error)
785 return;
786 if (scanner_status != normal) {
787 deletions_allowed = false;
788 /* Back up an outer control sequence so that it can be reread; */
789 /* An outer control sequence that occurs in a \.{\\read} will not be reread,
790 since the error recovery for \.{\\read} is not very powerful. */
791 if (cur_cs != 0) {
792 if ((istate == token_list) || (iname < 1) || (iname > 17)) {
793 p = get_avail();
794 token_info(p) = cs_token_flag + cur_cs;
795 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
797 cur_cmd = spacer_cmd;
798 cur_chr = ' '; /* replace it by a space */
800 if (scanner_status > skipping) {
801 const char *errhlp[] = {
802 "I suspect you have forgotten a `}', causing me",
803 "to read past where you wanted me to stop.",
804 "I'll try to recover; but if the error is serious,",
805 "you'd better type `E' or `X' now and fix your file.",
806 NULL
808 char errmsg[256];
809 const char *startmsg;
810 const char *scannermsg;
811 /* Tell the user what has run away and try to recover */
812 runaway(); /* print a definition, argument, or preamble */
813 if (cur_cs == 0) {
814 startmsg = "File ended";
815 } else {
816 cur_cs = 0;
817 startmsg = "Forbidden control sequence found";
819 /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
820 and insert tokens that should lead to recovery; */
821 /* The recovery procedure can't be fully understood without knowing more
822 about the \TeX\ routines that should be aborted, but we can sketch the
823 ideas here: For a runaway definition we will insert a right brace; for a
824 runaway preamble, we will insert a special \.{\\cr} token and a right
825 brace; and for a runaway argument, we will set |long_state| to
826 |outer_call| and insert \.{\\par}. */
827 p = get_avail();
828 switch (scanner_status) {
829 case defining:
830 scannermsg = "definition";
831 token_info(p) = right_brace_token + '}';
832 break;
833 case matching:
834 scannermsg = "use";
835 token_info(p) = par_token;
836 long_state = outer_call_cmd;
837 break;
838 case aligning:
839 scannermsg = "preamble";
840 token_info(p) = right_brace_token + '}';
841 q = p;
842 p = get_avail();
843 token_link(p) = q;
844 token_info(p) = cs_token_flag + frozen_cr;
845 align_state = -1000000;
846 break;
847 case absorbing:
848 scannermsg = "text";
849 token_info(p) = right_brace_token + '}';
850 break;
851 default: /* can't happen */
852 scannermsg = "unknown";
853 break;
854 } /*there are no other cases */
855 begin_token_list(p, inserted);
856 snprintf(errmsg, 255, "%s while scanning %s of %s",
857 startmsg, scannermsg, cs_to_string(warning_index));
858 tex_error(errmsg, errhlp);
859 } else {
860 char errmsg[256];
861 const char *errhlp_no[] = {
862 "The file ended while I was skipping conditional text.",
863 "This kind of error happens when you say `\\if...' and forget",
864 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
865 NULL
867 const char *errhlp_cs[] = {
868 "A forbidden control sequence occurred in skipped text.",
869 "This kind of error happens when you say `\\if...' and forget",
870 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
871 NULL
873 const char **errhlp = (const char **) errhlp_no;
874 char *ss;
875 if (cur_cs != 0) {
876 errhlp = errhlp_cs;
877 cur_cs = 0;
879 ss = cmd_chr_to_string(if_test_cmd, cur_if);
880 snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
881 ss, (int) skip_line);
882 free(ss);
883 /* Incomplete \\if... */
884 cur_tok = cs_token_flag + frozen_fi;
885 /* back up one inserted token and call |error| */
887 OK_to_interrupt = false;
888 back_input();
889 token_type = inserted;
890 OK_to_interrupt = true;
891 tex_error(errmsg, errhlp);
894 deletions_allowed = true;
898 @ @c
900 #if 0
903 The other variant gives less clutter in tracing cache usage when profiling and for
904 some files (like the manual) also a bit of a speedup.
907 static boolean get_next_file(void)
909 SWITCH:
910 if (iloc <= ilimit) {
911 /* current line not yet finished */
912 do_buffer_to_unichar(cur_chr, iloc);
914 RESWITCH:
915 if (detokenized_line()) {
916 cur_cmd = (cur_chr == ' ' ? 10 : 12);
917 } else {
918 do_get_cat_code(cur_cmd, cur_chr);
921 Change state if necessary, and |goto switch| if the current
922 character should be ignored, or |goto reswitch| if the current
923 character changes to another;
925 The following 48-way switch accomplishes the scanning quickly, assuming
926 that a decent C compiler has translated the code. Note that the numeric
927 values for |mid_line|, |skip_blanks|, and |new_line| are spaced
928 apart from each other by |max_char_code+1|, so we can add a character's
929 command code to the state to get a single number that characterizes both.
931 Remark [ls/hh]: checking performance indicated that this switch was the
932 cause of many branch prediction errors but changing it to:
934 c = istate + cur_cmd;
935 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
936 return true;
937 } else if (c >= new_line) {
938 switch (c) {
940 } else if (c >= skip_blanks) {
941 switch (c) {
943 } else if (c >= mid_line) {
944 switch (c) {
946 } else {
947 istate = mid_line;
948 return true;
951 gives as many prediction errors. So, we can indeed assume that the compiler
952 does the right job, or that there is simply no other way.
955 switch (istate + cur_cmd) {
956 case mid_line + ignore_cmd:
957 case skip_blanks + ignore_cmd:
958 case new_line + ignore_cmd:
959 case skip_blanks + spacer_cmd:
960 case new_line + spacer_cmd:
961 /* Cases where character is ignored */
962 goto SWITCH;
963 break;
964 case mid_line + escape_cmd:
965 case new_line + escape_cmd:
966 case skip_blanks + escape_cmd:
967 /* Scan a control sequence ...; */
968 istate = (unsigned char) scan_control_sequence();
969 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
970 check_outer_validity();
971 break;
972 case mid_line + active_char_cmd:
973 case new_line + active_char_cmd:
974 case skip_blanks + active_char_cmd:
975 /* Process an active-character */
976 cur_cs = active_to_cs(cur_chr, false);
977 cur_cmd = eq_type(cur_cs);
978 cur_chr = equiv(cur_cs);
979 istate = mid_line;
980 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
981 check_outer_validity();
982 break;
983 case mid_line + sup_mark_cmd:
984 case new_line + sup_mark_cmd:
985 case skip_blanks + sup_mark_cmd:
986 /* If this |sup_mark| starts */
987 if (process_sup_mark())
988 goto RESWITCH;
989 else
990 istate = mid_line;
991 break;
992 case mid_line + invalid_char_cmd:
993 case new_line + invalid_char_cmd:
994 case skip_blanks + invalid_char_cmd:
995 /* Decry the invalid character and |goto restart|; */
996 invalid_character_error();
997 return false; /* because state may be |token_list| now */
998 break;
999 case mid_line + spacer_cmd:
1000 /* Enter |skip_blanks| state, emit a space; */
1001 istate = skip_blanks;
1002 cur_chr = ' ';
1003 break;
1004 case mid_line + car_ret_cmd:
1006 Finish line, emit a space. When a character of type |spacer| gets through, its
1007 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1008 for tab and space, and for the space inserted at the end of a line, will be
1009 treated alike when macro parameters are being matched. We do this since such
1010 characters are indistinguishable on most computer terminal displays.
1012 iloc = ilimit + 1;
1013 cur_cmd = spacer_cmd;
1014 cur_chr = ' ';
1015 break;
1016 case skip_blanks + car_ret_cmd:
1017 case mid_line + comment_cmd:
1018 case new_line + comment_cmd:
1019 case skip_blanks + comment_cmd:
1020 /* Finish line, |goto switch|; */
1021 iloc = ilimit + 1;
1022 goto SWITCH;
1023 break;
1024 case new_line + car_ret_cmd:
1025 /* Finish line, emit a \.{\\par}; */
1026 iloc = ilimit + 1;
1027 cur_cs = par_loc;
1028 cur_cmd = eq_type(cur_cs);
1029 cur_chr = equiv(cur_cs);
1030 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1031 check_outer_validity();
1032 break;
1033 case skip_blanks + left_brace_cmd:
1034 case new_line + left_brace_cmd:
1035 istate = mid_line;
1036 /* fall through */
1037 case mid_line + left_brace_cmd:
1038 align_state++;
1039 break;
1040 case skip_blanks + right_brace_cmd:
1041 case new_line + right_brace_cmd:
1042 istate = mid_line;
1043 /* fall through */
1044 case mid_line + right_brace_cmd:
1045 align_state--;
1046 break;
1047 case mid_line + math_shift_cmd:
1048 case mid_line + tab_mark_cmd:
1049 case mid_line + mac_param_cmd:
1050 case mid_line + sub_mark_cmd:
1051 case mid_line + letter_cmd:
1052 case mid_line + other_char_cmd:
1053 break;
1055 case skip_blanks + math_shift:
1056 case skip_blanks + tab_mark:
1057 case skip_blanks + mac_param:
1058 case skip_blanks + sub_mark:
1059 case skip_blanks + letter:
1060 case skip_blanks + other_char:
1061 case new_line + math_shift:
1062 case new_line + tab_mark:
1063 case new_line + mac_param:
1064 case new_line + sub_mark:
1065 case new_line + letter:
1066 case new_line + other_char:
1068 default:
1069 istate = mid_line;
1070 break;
1072 } else {
1073 if (iname != 21)
1074 istate = new_line;
1076 Move to next line of file,
1077 or |goto restart| if there is no next line,
1078 or |return| if a \.{\\read} line has finished;
1080 do {
1081 next_line_retval r = next_line();
1082 if (r == next_line_return) {
1083 return true;
1084 } else if (r == next_line_restart) {
1085 return false;
1087 } while (0);
1088 check_interrupt();
1089 goto SWITCH;
1091 return true;
1094 #else
1096 /* 10 times less Bim in callgrind */
1099 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1100 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1101 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1102 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1105 static boolean get_next_file(void)
1107 int c = 0;
1108 SWITCH:
1109 if (iloc <= ilimit) {
1110 /* current line not yet finished */
1111 do_buffer_to_unichar(cur_chr, iloc);
1112 RESWITCH:
1113 if (detokenized_line()) {
1114 cur_cmd = (cur_chr == ' ' ? 10 : 12);
1115 } else {
1116 do_get_cat_code(cur_cmd, cur_chr);
1119 Change state if necessary, and |goto switch| if the current
1120 character should be ignored, or |goto reswitch| if the current
1121 character changes to another;
1123 c = istate + cur_cmd;
1124 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1125 return true;
1126 } else if (c >= new_line) {
1127 switch (c-new_line) {
1128 case escape_cmd:
1129 istate = (unsigned char) scan_control_sequence();
1130 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1131 check_outer_validity();
1132 return true;
1133 case left_brace_cmd:
1134 istate = mid_line;
1135 align_state++;
1136 return true;
1137 case right_brace_cmd:
1138 istate = mid_line;
1139 align_state--;
1140 return true;
1141 case math_shift_cmd:
1142 istate = mid_line;
1143 return true;
1144 case tab_mark_cmd:
1145 istate = mid_line;
1146 return true;
1147 case car_ret_cmd:
1148 /* Finish line, emit a \.{\\par}; */
1149 iloc = ilimit + 1;
1150 cur_cs = par_loc;
1151 cur_cmd = eq_type(cur_cs);
1152 cur_chr = equiv(cur_cs);
1153 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1154 check_outer_validity();
1155 return true;
1156 case mac_param_cmd:
1157 istate = mid_line;
1158 return true;
1159 case sup_mark_cmd:
1160 if (process_sup_mark())
1161 goto RESWITCH;
1162 else
1163 istate = mid_line;
1164 return true;
1165 case sub_mark_cmd:
1166 istate = mid_line;
1167 return true;
1168 case ignore_cmd:
1169 goto SWITCH;
1170 return true;
1171 case spacer_cmd:
1172 /* Cases where character is ignored */
1173 goto SWITCH;
1174 case letter_cmd:
1175 istate = mid_line;
1176 return true;
1177 case other_char_cmd:
1178 istate = mid_line;
1179 return true;
1180 case active_char_cmd:
1181 cur_cs = active_to_cs(cur_chr, false);
1182 cur_cmd = eq_type(cur_cs);
1183 cur_chr = equiv(cur_cs);
1184 istate = mid_line;
1185 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1186 check_outer_validity();
1187 return true;
1188 case comment_cmd:
1189 iloc = ilimit + 1;
1190 goto SWITCH;
1191 case invalid_char_cmd:
1192 invalid_character_error();
1193 return false; /* because state may be |token_list| now */
1194 default:
1195 istate = mid_line;
1196 return true;
1198 } else if (c >= skip_blanks) {
1199 switch (c-skip_blanks) {
1200 case escape_cmd:
1201 /* Scan a control sequence ...; */
1202 istate = (unsigned char) scan_control_sequence();
1203 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1204 check_outer_validity();
1205 return true;
1206 case left_brace_cmd:
1207 istate = mid_line;
1208 align_state++;
1209 return true;
1210 case right_brace_cmd:
1211 istate = mid_line;
1212 align_state--;
1213 return true;
1214 case math_shift_cmd:
1215 istate = mid_line;
1216 return true;
1217 case tab_mark_cmd:
1218 istate = mid_line;
1219 return true;
1220 case car_ret_cmd:
1221 iloc = ilimit + 1;
1222 goto SWITCH;
1223 case mac_param_cmd:
1224 istate = mid_line;
1225 return true;
1226 case sup_mark_cmd:
1227 /* If this |sup_mark| starts */
1228 if (process_sup_mark())
1229 goto RESWITCH;
1230 else
1231 istate = mid_line;
1232 return true;
1233 case sub_mark_cmd:
1234 istate = mid_line;
1235 return true;
1236 case ignore_cmd:
1237 goto SWITCH;
1238 case spacer_cmd:
1239 goto SWITCH;
1240 case letter_cmd:
1241 istate = mid_line;
1242 return true;
1243 case other_char_cmd:
1244 istate = mid_line;
1245 return true;
1246 case active_char_cmd:
1247 cur_cs = active_to_cs(cur_chr, false);
1248 cur_cmd = eq_type(cur_cs);
1249 cur_chr = equiv(cur_cs);
1250 istate = mid_line;
1251 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1252 check_outer_validity();
1253 return true;
1254 case comment_cmd:
1255 /* Finish line, |goto switch|; */
1256 iloc = ilimit + 1;
1257 goto SWITCH;
1258 case invalid_char_cmd:
1259 /* Decry the invalid character and |goto restart|; */
1260 invalid_character_error();
1261 return false; /* because state may be |token_list| now */
1262 default:
1263 istate = mid_line;
1264 return true;
1266 } else if (c >= mid_line) {
1267 switch (c-mid_line) {
1268 case escape_cmd:
1269 istate = (unsigned char) scan_control_sequence();
1270 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1271 check_outer_validity();
1272 return true;
1273 case left_brace_cmd:
1274 align_state++;
1275 return true;
1276 case right_brace_cmd:
1277 align_state--;
1278 return true;
1279 case math_shift_cmd:
1280 return true;
1281 case tab_mark_cmd:
1282 return true;
1283 case car_ret_cmd:
1285 Finish line, emit a space. When a character of type |spacer| gets through, its
1286 character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1287 for tab and space, and for the space inserted at the end of a line, will be
1288 treated alike when macro parameters are being matched. We do this since such
1289 characters are indistinguishable on most computer terminal displays.
1291 iloc = ilimit + 1;
1292 cur_cmd = spacer_cmd;
1293 cur_chr = ' ';
1294 return true;
1295 case mac_param_cmd:
1296 return true;
1297 case sup_mark_cmd:
1298 if (process_sup_mark())
1299 goto RESWITCH;
1300 else
1301 istate = mid_line;
1302 return true;
1303 case sub_mark_cmd:
1304 return true;
1305 case ignore_cmd:
1306 goto SWITCH;
1307 case spacer_cmd:
1308 /* Enter |skip_blanks| state, emit a space; */
1309 istate = skip_blanks;
1310 cur_chr = ' ';
1311 return true;
1312 case letter_cmd:
1313 istate = mid_line;
1314 return true;
1315 case other_char_cmd:
1316 istate = mid_line;
1317 return true;
1318 case active_char_cmd:
1319 cur_cs = active_to_cs(cur_chr, false);
1320 cur_cmd = eq_type(cur_cs);
1321 cur_chr = equiv(cur_cs);
1322 istate = mid_line;
1323 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1324 check_outer_validity();
1325 return true;
1326 case comment_cmd:
1327 iloc = ilimit + 1;
1328 goto SWITCH;
1329 case invalid_char_cmd:
1330 invalid_character_error();
1331 return false; /* because state may be |token_list| now */
1332 default:
1333 istate = mid_line;
1334 return true;
1336 } else {
1337 istate = mid_line;
1338 return true;
1340 } else {
1341 if (iname != 21) {
1342 istate = new_line;
1345 Move to next line of file, or |goto restart| if there is no next line,
1346 or |return| if a \.{\\read} line has finished;
1348 do {
1349 next_line_retval r = next_line();
1350 if (r == next_line_return) {
1351 return true;
1352 } else if (r == next_line_restart) {
1353 return false;
1355 } while (0);
1356 check_interrupt();
1357 goto SWITCH;
1359 return true;
1362 #endif
1364 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1365 We only support a limited set:
1367 ^^^^^^XXXXXX
1368 ^^^^XXXXXX
1369 ^^XX ^^<char>
1373 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1375 #define add_nybble(c) \
1376 if (c<='9') { \
1377 cur_chr=(cur_chr<<4)+c-'0'; \
1378 } else { \
1379 cur_chr=(cur_chr<<4)+c-'a'+10; \
1382 #define set_nybble(c) \
1383 if (c<='9') { \
1384 cur_chr=c-'0'; \
1385 } else { \
1386 cur_chr=c-'a'+10; \
1389 #define one_hex_to_cur_chr(c1) \
1390 set_nybble(c1);
1392 #define two_hex_to_cur_chr(c1,c2) \
1393 set_nybble(c1); \
1394 add_nybble(c2);
1396 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1397 two_hex_to_cur_chr(c1,c2); \
1398 add_nybble(c3); \
1399 add_nybble(c4);
1401 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1402 four_hex_to_cur_chr(c1,c2,c3,c4); \
1403 add_nybble(c5); \
1404 add_nybble(c6);
1406 static boolean process_sup_mark(void)
1408 if (cur_chr == buffer[iloc]) {
1409 if (iloc < ilimit) {
1410 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1411 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1412 /* ^^^^^^XXXXXX */
1413 if ((iloc + 10) <= ilimit) {
1414 int c1 = buffer[iloc + 5];
1415 int c2 = buffer[iloc + 6];
1416 int c3 = buffer[iloc + 7];
1417 int c4 = buffer[iloc + 8];
1418 int c5 = buffer[iloc + 9];
1419 int c6 = buffer[iloc + 10];
1420 if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1421 is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1422 iloc = iloc + 11;
1423 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1424 return true;
1425 } else {
1426 tex_error("^^^^^^ needs six hex digits", NULL);
1428 } else {
1429 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1431 } else {
1432 /* ^^^^XXXX */
1433 if ((iloc + 6) <= ilimit) {
1434 int c1 = buffer[iloc + 3];
1435 int c2 = buffer[iloc + 4];
1436 int c3 = buffer[iloc + 5];
1437 int c4 = buffer[iloc + 6];
1438 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1439 iloc = iloc + 7;
1440 four_hex_to_cur_chr(c1,c2,c3,c4);
1441 return true;
1442 } else {
1443 tex_error("^^^^ needs four hex digits", NULL);
1445 } else {
1446 tex_error("^^^^ needs four hex digits, end of input", NULL);
1449 } else {
1450 /* ^^XX */
1451 if ((iloc + 2) <= ilimit) {
1452 int c1 = buffer[iloc + 1];
1453 int c2 = buffer[iloc + 2];
1454 if (is_hex(c1) && is_hex(c2)) {
1455 iloc = iloc + 3;
1456 two_hex_to_cur_chr(c1,c2);
1457 return true;
1460 /* go on, no error, good old tex */
1463 /* the rest */
1465 int c1 = buffer[iloc + 1];
1466 if (c1 < 0200) {
1467 iloc = iloc + 2;
1468 if (is_hex(c1) && (iloc <= ilimit)) {
1469 int c2 = buffer[iloc];
1470 if (is_hex(c2)) {
1471 incr(iloc);
1472 two_hex_to_cur_chr(c1,c2);
1473 return true;
1476 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1477 return true;
1481 return false;
1484 @ Control sequence names are scanned only when they appear in some line of a
1485 file; once they have been scanned the first time, their |eqtb| location serves as
1486 a unique identification, so \TeX\ doesn't need to refer to the original name any
1487 more except when it prints the equivalent in symbolic form.
1489 The program that scans a control sequence has been written carefully in order to
1490 avoid the blowups that might otherwise occur if a malicious user tried something
1491 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1492 it never looks at |buffer[ilimit+2]|.
1494 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1495 following a control sequence name, they are converted to single characters in the
1496 buffer and the process is repeated, slowly but surely.
1499 static boolean check_expanded_code(int *kk); /* below */
1501 static int scan_control_sequence(void)
1503 int retval = mid_line;
1504 if (iloc > ilimit) {
1505 cur_cs = null_cs; /* |state| is irrelevant in this case */
1506 } else {
1507 register int cat; /* |cat_code(cur_chr)|, usually */
1508 while (1) {
1509 int k = iloc;
1510 do_buffer_to_unichar(cur_chr, k);
1511 do_get_cat_code(cat, cur_chr);
1512 if (cat != letter_cmd || k > ilimit) {
1513 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1514 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1515 continue;
1516 } else {
1517 retval = skip_blanks;
1518 do {
1519 do_buffer_to_unichar(cur_chr, k);
1520 do_get_cat_code(cat, cur_chr);
1521 } while (cat == letter_cmd && k <= ilimit);
1523 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */
1524 continue;
1525 if (cat != letter_cmd) {
1526 /* backtrack one character which can be utf */
1528 decr(k);
1529 if (cur_chr > 0xFFFF)
1530 decr(k);
1531 if (cur_chr > 0x7FF)
1532 decr(k);
1533 if (cur_chr > 0x7F)
1534 decr(k);
1536 if (cur_chr <= 0x7F) {
1537 k -= 1; /* in most cases */
1538 } else if (cur_chr > 0xFFFF) {
1539 k -= 4;
1540 } else if (cur_chr > 0x7FF) {
1541 k -= 3;
1542 } else /* if (cur_chr > 0x7F) */ {
1543 k -= 2;
1545 /* now |k| points to first nonletter */
1548 cur_cs = id_lookup(iloc, k - iloc);
1549 iloc = k;
1550 break;
1553 cur_cmd = eq_type(cur_cs);
1554 cur_chr = equiv(cur_cs);
1555 return retval;
1558 @ Whenever we reach the following piece of code, we will have
1559 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1560 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1561 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1562 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1563 left two or three places.
1566 static boolean check_expanded_code(int *kk)
1568 int l;
1569 int k = *kk;
1570 int d = 1;
1571 if (buffer[k] == cur_chr && k < ilimit) {
1572 if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1573 if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1574 if ((k + 10) <= ilimit) {
1575 int c1 = buffer[k + 6 - 1];
1576 int c2 = buffer[k + 6];
1577 int c3 = buffer[k + 6 + 1];
1578 int c4 = buffer[k + 6 + 2];
1579 int c5 = buffer[k + 6 + 3];
1580 int c6 = buffer[k + 6 + 4];
1581 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1582 d = 6;
1583 six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1584 } else {
1585 tex_error("^^^^^^ needs six hex digits", NULL);
1587 } else {
1588 tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1590 } else {
1591 if ((k + 6) <= ilimit) {
1592 int c1 = buffer[k + 4 - 1];
1593 int c2 = buffer[k + 4];
1594 int c3 = buffer[k + 4 + 1];
1595 int c4 = buffer[k + 4 + 2];
1596 if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1597 d = 4;
1598 four_hex_to_cur_chr(c1,c2,c3,c4);
1599 } else {
1600 tex_error("^^^^ needs four hex digits", NULL);
1602 } else {
1603 tex_error("^^^^ needs four hex digits, end of input", NULL);
1606 } else {
1607 int c1 = buffer[k + 1];
1608 if (c1 < 0200) {
1609 d = 1;
1610 if (is_hex(c1) && (k + 2) <= ilimit) {
1611 int c2 = buffer[k + 2];
1612 if (is_hex(c2)) {
1613 d = 2;
1614 two_hex_to_cur_chr(c1,c2);
1615 } else {
1616 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1618 } else {
1619 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1623 if (d > 2)
1624 d = 2 * d - 1;
1625 else
1626 d++;
1627 if (cur_chr <= 0x7F) {
1628 buffer[k - 1] = (packed_ASCII_code) cur_chr;
1629 } else if (cur_chr <= 0x7FF) {
1630 buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1631 k++;
1632 d--;
1633 buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1634 } else if (cur_chr <= 0xFFFF) {
1635 buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1636 k++;
1637 d--;
1638 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1639 k++;
1640 d--;
1641 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1642 } else {
1643 buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1644 k++;
1645 d--;
1646 buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1647 k++;
1648 d--;
1649 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1650 k++;
1651 d--;
1652 buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1654 l = k;
1655 ilimit = ilimit - d;
1656 while (l <= ilimit) {
1657 buffer[l] = buffer[l + d];
1658 l++;
1660 *kk = k;
1661 return true;
1663 return false;
1666 @ All of the easy branches of |get_next| have now been taken care of. There is
1667 one more branch.
1669 @c static next_line_retval next_line(void)
1671 boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1672 if (iname > 17) {
1673 /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1674 incr(line);
1675 first = istart;
1676 if (!force_eof) {
1677 if (iname <= 20) {
1678 if (pseudo_input()) { /* not end of file */
1679 firm_up_the_line(); /* this sets |ilimit| */
1680 line_catcode_table = DEFAULT_CAT_TABLE;
1681 if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1682 inhibit_eol = true;
1683 } else if ((every_eof != null) && !eof_seen[iindex]) {
1684 ilimit = first - 1;
1685 eof_seen[iindex] = true; /* fake one empty line */
1686 if (iname != 19)
1687 begin_token_list(every_eof, every_eof_text);
1688 return next_line_restart;
1689 } else {
1690 force_eof = true;
1692 } else {
1693 if (iname == 21) {
1694 if (luacstring_input()) { /* not end of strings */
1695 firm_up_the_line();
1696 line_catcode_table = (short) luacstring_cattable();
1697 line_partial = (signed char) luacstring_partial();
1698 if (luacstring_final_line() || line_partial
1699 || line_catcode_table == NO_CAT_TABLE)
1700 inhibit_eol = true;
1701 if (!line_partial)
1702 istate = new_line;
1703 } else {
1704 force_eof = true;
1706 } else {
1707 if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1708 firm_up_the_line(); /* this sets |ilimit| */
1709 line_catcode_table = DEFAULT_CAT_TABLE;
1710 } else if ((every_eof != null) && (!eof_seen[iindex])) {
1711 ilimit = first - 1;
1712 eof_seen[iindex] = true; /* fake one empty line */
1713 begin_token_list(every_eof, every_eof_text);
1714 return next_line_restart;
1715 } else {
1716 force_eof = true;
1721 if (force_eof) {
1722 if (tracing_nesting > 0)
1723 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1724 if (!((iname == 19) || (iname == 21))) {
1725 /* give warning for some unfinished groups and/or conditionals */
1726 file_warning();
1728 if ((iname > 21) || (iname == 20)) {
1729 report_stop_file(filetype_tex);
1730 decr(open_parens);
1732 force_eof = false;
1733 /* lua input or \.{\\scantextokens} */
1734 if (iname == 21 || iname == 19) {
1735 end_file_reading();
1736 } else {
1737 end_file_reading();
1738 if (! suppress_outer_error)
1739 check_outer_validity();
1741 return next_line_restart;
1743 if (inhibit_eol || end_line_char_inactive)
1744 ilimit--;
1745 else
1746 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1747 first = ilimit + 1;
1748 iloc = istart; /* ready to read */
1749 } else {
1750 if (!terminal_input) {
1751 /* \.{\\read} line has ended */
1752 cur_cmd = 0;
1753 cur_chr = 0;
1754 return next_line_return; /* OUTER */
1756 if (input_ptr > 0) {
1757 /* text was inserted during error recovery */
1758 end_file_reading();
1759 return next_line_restart; /* resume previous level */
1761 if (selector < log_only)
1762 open_log_file();
1763 if (interaction > nonstop_mode) {
1764 if (end_line_char_inactive)
1765 ilimit++;
1766 if (ilimit == istart) {
1767 /* previous line was empty */
1768 tprint_nl("(Please type a command or say `\\end')");
1770 print_ln();
1771 first = istart;
1772 prompt_input("*"); /* input on-line into |buffer| */
1773 ilimit = last;
1774 if (end_line_char_inactive)
1775 ilimit--;
1776 else
1777 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1778 first = ilimit + 1;
1779 iloc = istart;
1780 } else {
1782 Nonstop mode, which is intended for overnight batch processing,
1783 never waits for on-line input.
1785 fatal_error("*** (job aborted, no legal \\end found)");
1788 return next_line_ok;
1791 @ Let's consider now what happens when |get_next| is looking at a token list.
1794 static boolean get_next_tokenlist(void)
1796 register halfword t = token_info(iloc);
1797 iloc = token_link(iloc); /* move to next */
1798 if (t >= cs_token_flag) {
1799 /* a control sequence token */
1800 cur_cs = t - cs_token_flag;
1801 cur_cmd = eq_type(cur_cs);
1802 if (cur_cmd >= outer_call_cmd) {
1803 if (cur_cmd == dont_expand_cmd) {
1805 Get the next token, suppressing expansion. The present point in the program
1806 is reached only when the |expand| routine has inserted a special marker into
1807 the input. In this special case, |token_info(iloc)| is known to be a control
1808 sequence token, and |token_link(iloc)=null|.
1810 cur_cs = token_info(iloc) - cs_token_flag;
1811 iloc = null;
1812 cur_cmd = eq_type(cur_cs);
1813 if (cur_cmd > max_command_cmd) {
1814 cur_cmd = relax_cmd;
1815 cur_chr = no_expand_flag;
1816 return true;
1818 } else if (! suppress_outer_error) {
1819 check_outer_validity();
1822 cur_chr = equiv(cur_cs);
1823 } else {
1824 cur_cmd = token_cmd(t);
1825 cur_chr = token_chr(t);
1826 switch (cur_cmd) {
1827 case left_brace_cmd:
1828 align_state++;
1829 break;
1830 case right_brace_cmd:
1831 align_state--;
1832 break;
1833 case out_param_cmd:
1834 /* Insert macro parameter and |goto restart|; */
1835 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1836 return false;
1837 break;
1840 return true;
1843 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1844 routine are executed more often than any other instructions of \TeX.
1845 @^mastication@>@^inner loop@>
1847 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1850 void get_next(void)
1852 RESTART:
1853 cur_cs = 0;
1854 if (istate != token_list) {
1855 /* Input from external file, |goto restart| if no input found */
1856 if (!get_next_file())
1857 goto RESTART;
1858 } else {
1859 if (iloc == null) {
1860 end_token_list();
1861 goto RESTART; /* list exhausted, resume previous level */
1862 } else if (!get_next_tokenlist()) {
1863 goto RESTART; /* parameter needs to be expanded */
1866 /* If an alignment entry has just ended, take appropriate action */
1867 if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1868 insert_vj_template();
1869 goto RESTART;
1873 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1874 three related procedures that do a little more:
1876 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1877 |cur_tok|, a packed halfword version of the current token.
1879 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1880 but if the current token turns out to be a user-defined control sequence (i.e., a
1881 macro call), or a conditional, or something like \.{\\topmark} or
1882 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1883 the expansion of the macro or the evaluation of the conditional.
1885 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1886 has already been called.
1888 \yskip\noindent In fact, these three procedures account for almost every use of
1889 |get_next|.
1891 No new control sequences will be defined except during a call of |get_token|, or
1892 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1893 always |true| at other times.
1895 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1898 void get_token(void)
1900 no_new_control_sequence = false;
1901 get_next();
1902 no_new_control_sequence = true;
1903 if (cur_cs == 0)
1904 cur_tok = token_val(cur_cmd, cur_chr);
1905 else
1906 cur_tok = cs_token_flag + cur_cs;
1909 @ changes the string |s| to a token list
1912 halfword string_to_toks(const char *ss)
1914 halfword p; /* tail of the token list */
1915 halfword q; /* new node being added to the token list via |store_new_token| */
1916 halfword t; /* token being appended */
1917 const char *s = ss;
1918 const char *se = ss + strlen(s);
1919 p = temp_token_head;
1920 set_token_link(p, null);
1921 while (s < se) {
1922 t = (halfword) str2uni((const unsigned char *) s);
1923 s += utf8_size(t);
1924 if (t == ' ')
1925 t = space_token;
1926 else
1927 t = other_token + t;
1928 fast_store_new_token(t);
1930 return token_link(temp_token_head);
1933 @ The token lists for macros and for other things like \.{\\mark} and
1934 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1936 Before we get into the details of |scan_toks|, let's consider a much simpler
1937 task, that of converting the current string into a token list. The |str_toks|
1938 function does this; it classifies spaces as type |spacer| and everything else as
1939 type |other_char|.
1941 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1942 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1944 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1945 |lua| considers special while scanning a literal string
1947 @ changes the string |str_pool[b..pool_ptr]| to a token list
1950 halfword lua_str_toks(lstring b)
1952 halfword p; /* tail of the token list */
1953 halfword q; /* new node being added to the token list via |store_new_token| */
1954 halfword t; /* token being appended */
1955 unsigned char *k; /* index into string */
1956 p = temp_token_head;
1957 set_token_link(p, null);
1958 k = (unsigned char *) b.s;
1959 while (k < (unsigned char *) b.s + b.l) {
1960 t = pool_to_unichar(k);
1961 k += utf8_size(t);
1962 if (t == ' ') {
1963 t = space_token;
1964 } else {
1965 if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1966 fast_store_new_token(other_token + '\\');
1967 if (t == 10)
1968 t = 'n';
1969 if (t == 13)
1970 t = 'r';
1971 t = other_token + t;
1973 fast_store_new_token(t);
1975 return p;
1978 @ Incidentally, the main reason for wanting |str_toks| is the function
1979 |the_toks|, which has similar input/output characteristics.
1981 @ changes the string |str_pool[b..pool_ptr]| to a token list
1984 halfword str_toks(lstring s)
1986 halfword p; /* tail of the token list */
1987 halfword q; /* new node being added to the token list via |store_new_token| */
1988 halfword t; /* token being appended */
1989 unsigned char *k, *l; /* index into string */
1990 p = temp_token_head;
1991 set_token_link(p, null);
1992 k = s.s;
1993 l = k + s.l;
1994 while (k < l) {
1995 t = pool_to_unichar(k);
1996 k += utf8_size(t);
1997 if (t == ' ')
1998 t = space_token;
1999 else
2000 t = other_token + t;
2001 fast_store_new_token(t);
2003 return p;
2007 hh: most of the converter is similar to the one i made for macro so at some point i
2008 can make a helper; also todo: there is no need to go through the pool
2012 halfword str_scan_toks(int ct, lstring s)
2013 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2014 halfword p; /* tail of the token list */
2015 halfword q; /* new node being added to the token list via |store_new_token| */
2016 halfword t; /* token being appended */
2017 unsigned char *k, *l; /* index into string */
2018 int cc;
2019 p = temp_token_head;
2020 set_token_link(p, null);
2021 k = s.s;
2022 l = k + s.l;
2023 while (k < l) {
2024 t = pool_to_unichar(k);
2025 k += utf8_size(t);
2026 cc = get_cat_code(ct,t);
2027 if (cc == 0) {
2028 /* we have a potential control sequence so we check for it */
2029 int _lname = 0 ;
2030 int _s = 0 ;
2031 int _c = 0 ;
2032 halfword _cs = null ;
2033 unsigned char *_name = k ;
2034 while (k < l) {
2035 t = (halfword) str2uni((const unsigned char *) k);
2036 _s = utf8_size(t);
2037 _c = get_cat_code(ct,t);
2038 if (_c == 11) {
2039 k += _s ;
2040 _lname = _lname + _s ;
2041 } else if (_c == 10) {
2042 /* we ignore a trailing space like normal scanning does */
2043 k += _s ;
2044 break ;
2045 } else {
2046 break ;
2049 if (_s > 0) {
2050 /* we have a potential \cs */
2051 _cs = string_lookup((const char *) _name, _lname);
2052 if (_cs == undefined_control_sequence) {
2053 /* let's play safe and backtrack */
2054 t = cc * (1<<21) + t ;
2055 k = _name ;
2056 } else {
2057 t = cs_token_flag + _cs;
2059 } else {
2060 /* just a character with some meaning, so \unknown becomes effectively */
2061 /* \\unknown assuming that \\ has some useful meaning of course */
2062 t = cc * (1<<21) + t ;
2063 k = _name ;
2066 } else {
2067 /* whatever token, so for instance $x^2$ just works given a tex */
2068 /* catcode regime */
2069 t = cc * (1<<21) + t ;
2071 fast_store_new_token(t);
2074 return p;
2077 @ Here's part of the |expand| subroutine that we are now ready to complete:
2080 void ins_the_toks(void)
2082 (void) the_toks();
2083 ins_list(token_link(temp_token_head));
2086 #define set_toks_register(n,t,g) { \
2087 int a = (g>0) ? 4 : 0; \
2088 halfword ref = get_avail(); \
2089 set_token_ref_count(ref, 0); \
2090 set_token_link(ref, token_link(t)); \
2091 define(n + toks_base, call_cmd, ref); \
2094 void combine_the_toks(int how)
2096 halfword nt;
2097 get_x_token();
2098 /* target */
2099 if (cur_cmd == assign_toks_cmd) {
2100 nt = equiv(cur_cs) - toks_base;
2101 /* check range */
2102 } else {
2103 back_input();
2104 scan_int();
2105 nt = cur_val;
2107 /* source */
2108 do {
2109 get_x_token();
2110 } while (cur_cmd == spacer_cmd);
2111 if (cur_cmd == left_brace_cmd) {
2112 halfword x, source;
2113 back_input();
2114 x = scan_toks(false,how > 1); /* expanded or not */
2115 source = def_ref;
2116 /* action */
2117 if (source != null) {
2118 halfword target = toks(nt);
2119 if (target == null) {
2120 set_toks_register(nt,source,0);
2121 } else {
2122 halfword s = token_link(source);
2123 if (s != null) {
2124 halfword t = token_link(target);
2125 if (t == null) {
2126 /* can this happen ? */
2127 set_token_link(target, s);
2128 } else if (odd(how)) {
2129 /* prepend */
2130 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2131 halfword p = temp_token_head;
2132 halfword q;
2133 set_token_link(p, s); /* s = head, x = tail */
2134 p = x;
2135 while (t != null) {
2136 fast_store_new_token(token_info(t));
2137 t = token_link(t);
2139 set_toks_register(nt,temp_token_head,0);
2140 } else {
2141 set_token_link(x,t);
2142 set_token_link(target,s);
2144 } else {
2145 /* append */
2146 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2147 halfword p = temp_token_head;
2148 halfword q;
2149 set_token_link(p, null);
2150 while (t != null) {
2151 fast_store_new_token(token_info(t));
2152 t = token_link(t);
2154 set_token_link(p,s);
2155 set_toks_register(nt,temp_token_head,0);
2156 } else {
2157 while (token_link(t) != null) {
2158 t = token_link(t);
2160 set_token_link(t,s);
2166 } else {
2167 halfword source, ns;
2168 if (cur_cmd == assign_toks_cmd) {
2169 ns = equiv(cur_cs) - toks_base;
2170 /* check range */
2171 } else {
2172 back_input();
2173 scan_int();
2174 ns = cur_val;
2176 /* action */
2177 source = toks(ns);
2178 if (source != null) {
2179 halfword target = toks(nt);
2180 if (target == null) {
2181 equiv(toks_base+nt) = source;
2182 equiv(toks_base+ns) = null;
2183 } else {
2184 halfword s = token_link(source);
2185 if (s != null) {
2186 halfword t = token_link(target);
2187 if (t == null) {
2188 set_token_link(target, s);
2189 } else if (odd(how)) {
2190 /* prepend */
2191 halfword x = s;
2192 while (token_link(x) != null) {
2193 x = token_link(x);
2195 set_token_link(x,t);
2196 set_token_link(target,s);
2197 } else {
2198 /* append */
2199 while (token_link(t) != null) {
2200 t = token_link(t);
2202 set_token_link(t,s);
2204 equiv(toks_base+ns) = null;
2211 @ This routine, used in the next one, prints the job name, possibly modified by
2212 the |process_jobname| callback.
2215 static void print_job_name(void)
2217 if (job_name) {
2218 char *s, *ss; /* C strings for jobname before and after processing */
2219 int callback_id, lua_retval;
2220 s = (char*)str_string(job_name);
2221 callback_id = callback_defined(process_jobname_callback);
2222 if (callback_id > 0) {
2223 lua_retval = run_callback(callback_id, "S->S", s, &ss);
2224 if ((lua_retval == true) && (ss != NULL))
2225 s = ss;
2227 tprint(s);
2228 } else {
2229 print(job_name);
2233 @ Here is a routine that print the result of a convert command, using the
2234 argument |i|. It returns |false | if it does not know to print the code |c|. The
2235 function exists because lua code and tex code can both call it to convert
2236 something.
2238 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2239 get the next non-blank non-relax non-call token.
2243 int scan_lua_state(void)
2245 int sn = 0;
2246 do {
2247 get_x_token();
2248 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2249 back_input();
2250 if (cur_cmd != left_brace_cmd) {
2251 if (scan_keyword("name")) {
2252 (void) scan_toks(false, true);
2253 sn = def_ref;
2254 } else {
2255 scan_register_num();
2256 if (get_lua_name(cur_val))
2257 sn = (cur_val - 65536);
2260 return sn;
2263 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2264 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2265 to follow `\.{\\string}' and `\.{\\meaning}'.
2267 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2268 pending string in its output. In order to save such a pending string, we have to
2269 create a temporary string that is destroyed immediately after.
2272 #define push_selector { \
2273 old_setting = selector; \
2274 selector = new_string; \
2277 #define pop_selector { \
2278 selector = old_setting; \
2281 static int do_variable_dvi(halfword c)
2283 return 0;
2286 #define do_variable_backend_int(i) \
2287 cur_cmd = assign_int_cmd; \
2288 cur_val = backend_int_base + i; \
2289 cur_tok = token_val(cur_cmd, cur_val); \
2290 back_input();
2292 #define do_variable_backend_dimen(i) \
2293 cur_cmd = assign_dimen_cmd; \
2294 cur_val = backend_dimen_base + i; \
2295 cur_tok = token_val(cur_cmd, cur_val); \
2296 back_input();
2298 #define do_variable_backend_toks(i) \
2299 cur_cmd = assign_toks_cmd; \
2300 cur_val = backend_toks_base + i ; \
2301 cur_tok = token_val(cur_cmd, cur_val); \
2302 back_input();
2304 static int do_variable_pdf(halfword c)
2306 if (scan_keyword("compresslevel")) { do_variable_backend_int(c_pdf_compress_level); }
2307 else if (scan_keyword("decimaldigits")) { do_variable_backend_int(c_pdf_decimal_digits); }
2308 else if (scan_keyword("imageresolution")) { do_variable_backend_int(c_pdf_image_resolution); }
2309 else if (scan_keyword("pkresolution")) { do_variable_backend_int(c_pdf_pk_resolution); }
2310 else if (scan_keyword("uniqueresname")) { do_variable_backend_int(c_pdf_unique_resname); }
2311 else if (scan_keyword("minorversion")) { do_variable_backend_int(c_pdf_minor_version); }
2312 else if (scan_keyword("pagebox")) { do_variable_backend_int(c_pdf_pagebox); }
2313 else if (scan_keyword("inclusionerrorlevel")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2314 else if (scan_keyword("ignoreunknownimages")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2315 else if (scan_keyword("gamma")) { do_variable_backend_int(c_pdf_gamma); }
2316 else if (scan_keyword("imageapplygamma")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2317 else if (scan_keyword("imagegamma")) { do_variable_backend_int(c_pdf_image_gamma); }
2318 else if (scan_keyword("imagehicolor")) { do_variable_backend_int(c_pdf_image_hicolor); }
2319 else if (scan_keyword("imageaddfilename")) { do_variable_backend_int(c_pdf_image_addfilename); }
2320 else if (scan_keyword("objcompresslevel")) { do_variable_backend_int(c_pdf_objcompresslevel); }
2321 else if (scan_keyword("inclusioncopyfonts")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2322 else if (scan_keyword("gentounicode")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2323 else if (scan_keyword("pkfixeddpi")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2325 else if (scan_keyword("horigin")) { do_variable_backend_dimen(d_pdf_h_origin); }
2326 else if (scan_keyword("vorigin")) { do_variable_backend_dimen(d_pdf_v_origin); }
2327 else if (scan_keyword("threadmargin")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2328 else if (scan_keyword("destmargin")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2329 else if (scan_keyword("linkmargin")) { do_variable_backend_dimen(d_pdf_link_margin); }
2330 else if (scan_keyword("xformmargin")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2332 else if (scan_keyword("pageattr")) { do_variable_backend_toks(t_pdf_page_attr); }
2333 else if (scan_keyword("pageresources")) { do_variable_backend_toks(t_pdf_page_resources); }
2334 else if (scan_keyword("pagesattr")) { do_variable_backend_toks(t_pdf_pages_attr); }
2335 else if (scan_keyword("xformattr")) { do_variable_backend_toks(t_pdf_xform_attr); }
2336 else if (scan_keyword("xformresources")) { do_variable_backend_toks(t_pdf_xform_resources); }
2337 else if (scan_keyword("pkmode")) { do_variable_backend_toks(t_pdf_pk_mode); }
2339 else
2340 return 0;
2341 return 1;
2344 static int do_feedback_dvi(halfword c)
2346 return 0;
2349 /* codes not really needed but cleaner when testing */
2351 #define pdftex_version 40 /* these values will not change any more */
2352 #define pdftex_revision "0" /* these values will not change any more */
2354 static int do_feedback_pdf(halfword c)
2356 int old_setting; /* holds |selector| setting */
2357 int save_scanner_status; /* |scanner_status| upon entry */
2358 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2359 halfword save_warning_index;
2360 boolean bool; /* temp boolean */
2361 str_number s; /* first temp string */
2362 int ff; /* for use with |set_ff| */
2363 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2364 char *str; /* color stack init str */
2366 if (scan_keyword("lastlink")) {
2367 push_selector;
2368 print_int(pdf_last_link);
2369 pop_selector;
2370 } else if (scan_keyword("retval")) {
2371 push_selector;
2372 print_int(pdf_retval);
2373 pop_selector;
2374 } else if (scan_keyword("lastobj")) {
2375 push_selector;
2376 print_int(pdf_last_obj);
2377 pop_selector;
2378 } else if (scan_keyword("lastannot")) {
2379 push_selector;
2380 print_int(pdf_last_annot);
2381 pop_selector;
2382 } else if (scan_keyword("xformname")) {
2383 scan_int();
2384 check_obj_type(static_pdf, obj_type_xform, cur_val);
2385 push_selector;
2386 print_int(obj_info(static_pdf, cur_val));
2387 pop_selector;
2388 } else if (scan_keyword("creationdate")) {
2389 ins_list(string_to_toks(getcreationdate(static_pdf)));
2390 /* no further action */
2391 return 2;
2392 } else if (scan_keyword("fontname")) {
2393 scan_font_ident();
2394 if (cur_val == null_font)
2395 normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2396 pdf_check_vf(cur_val);
2397 if (!font_used(cur_val))
2398 pdf_init_font(static_pdf, cur_val);
2399 push_selector;
2400 set_ff(cur_val);
2401 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2402 pop_selector;
2403 } else if (scan_keyword("fontobjnum")) {
2404 scan_font_ident();
2405 if (cur_val == null_font)
2406 normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2407 pdf_check_vf(cur_val);
2408 if (!font_used(cur_val))
2409 pdf_init_font(static_pdf, cur_val);
2410 push_selector;
2411 set_ff(cur_val);
2412 print_int(pdf_font_num(ff));
2413 pop_selector;
2414 } else if (scan_keyword("fontsize")) {
2415 scan_font_ident();
2416 if (cur_val == null_font)
2417 normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2418 push_selector;
2419 print_scaled(font_size(cur_val));
2420 tprint("pt");
2421 pop_selector;
2422 } else if (scan_keyword("pageref")) {
2423 scan_int();
2424 if (cur_val <= 0)
2425 normal_error("pdf backend", "invalid page number when asking 'pageref'");
2426 push_selector;
2427 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2428 pop_selector;
2429 } else if (scan_keyword("colorstackinit")) {
2430 bool = scan_keyword("page");
2431 if (scan_keyword("direct"))
2432 cur_val = direct_always;
2433 else if (scan_keyword("page"))
2434 cur_val = direct_page;
2435 else
2436 cur_val = set_origin;
2437 save_scanner_status = scanner_status;
2438 save_warning_index = warning_index;
2439 save_def_ref = def_ref;
2440 u = save_cur_string();
2441 scan_toks(false, true);
2442 s = tokens_to_string(def_ref);
2443 delete_token_ref(def_ref);
2444 def_ref = save_def_ref;
2445 warning_index = save_warning_index;
2446 scanner_status = save_scanner_status;
2447 str = makecstring(s);
2448 cur_val = newcolorstack(str, cur_val, bool);
2449 free(str);
2450 flush_str(s);
2451 cur_val_level = int_val_level;
2452 if (cur_val < 0) {
2453 print_err("Too many color stacks");
2454 help2("The number of color stacks is limited to 32768.",
2455 "I'll use the default color stack 0 here.");
2456 error();
2457 cur_val = 0;
2458 restore_cur_string(u);
2460 push_selector;
2461 print_int(cur_val);
2462 pop_selector;
2463 } else if (scan_keyword("version")) {
2464 push_selector;
2465 print_int(pdftex_version);
2466 pop_selector;
2467 } else if (scan_keyword("revision")) {
2468 ins_list(string_to_toks(pdftex_revision));
2469 return 2;
2470 } else {
2471 return 0;
2473 return 1;
2476 void conv_toks(void)
2478 int old_setting; /* holds |selector| setting */
2479 halfword p, q;
2480 int save_scanner_status; /* |scanner_status| upon entry */
2481 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2482 halfword save_warning_index;
2483 boolean bool; /* temp boolean */
2484 str_number s; /* first temp string */
2485 int sn; /* lua chunk name */
2486 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2487 int c = cur_chr; /* desired type of conversion */
2488 str_number str;
2489 int i = 0;
2490 /* Scan the argument for command |c| */
2491 switch (c) {
2492 case number_code:
2493 scan_int();
2494 push_selector;
2495 print_int(cur_val);
2496 pop_selector;
2497 break;
2498 case lua_function_code:
2499 scan_int();
2500 if (cur_val <= 0) {
2501 normal_error("luafunction", "invalid number");
2502 } else {
2503 u = save_cur_string();
2504 luacstrings = 0;
2505 luafunctioncall(cur_val);
2506 restore_cur_string(u);
2507 if (luacstrings > 0)
2508 lua_string_start();
2510 /* no further action */
2511 return;
2512 break;
2513 case lua_code:
2514 u = save_cur_string();
2515 save_scanner_status = scanner_status;
2516 save_def_ref = def_ref;
2517 save_warning_index = warning_index;
2518 sn = scan_lua_state();
2519 scan_toks(false, true);
2520 s = def_ref;
2521 warning_index = save_warning_index;
2522 def_ref = save_def_ref;
2523 scanner_status = save_scanner_status;
2524 luacstrings = 0;
2525 luatokencall(s, sn);
2526 delete_token_ref(s);
2527 restore_cur_string(u); /* TODO: check this, was different */
2528 if (luacstrings > 0)
2529 lua_string_start();
2530 /* no further action */
2531 return;
2532 break;
2533 case expanded_code:
2534 save_scanner_status = scanner_status;
2535 save_warning_index = warning_index;
2536 save_def_ref = def_ref;
2537 u = save_cur_string();
2538 scan_toks(false, true);
2539 warning_index = save_warning_index;
2540 scanner_status = save_scanner_status;
2541 ins_list(token_link(def_ref));
2542 def_ref = save_def_ref;
2543 restore_cur_string(u);
2544 /* no further action */
2545 return;
2546 break;
2547 case math_style_code:
2548 push_selector;
2549 print_math_style();
2550 pop_selector;
2551 break;
2552 case string_code:
2553 save_scanner_status = scanner_status;
2554 scanner_status = normal;
2555 get_token();
2556 scanner_status = save_scanner_status;
2557 push_selector;
2558 if (cur_cs != 0)
2559 sprint_cs(cur_cs);
2560 else
2561 print(cur_chr);
2562 pop_selector;
2563 break;
2564 case cs_string_code:
2565 save_scanner_status = scanner_status;
2566 scanner_status = normal;
2567 get_token();
2568 scanner_status = save_scanner_status;
2569 push_selector;
2570 if (cur_cs != 0)
2571 sprint_cs_name(cur_cs);
2572 else
2573 print(cur_chr);
2574 pop_selector;
2575 break;
2576 case roman_numeral_code:
2577 scan_int();
2578 push_selector;
2579 print_roman_int(cur_val);
2580 pop_selector;
2581 break;
2582 case meaning_code:
2583 save_scanner_status = scanner_status;
2584 scanner_status = normal;
2585 get_token();
2586 scanner_status = save_scanner_status;
2587 push_selector;
2588 print_meaning();
2589 pop_selector;
2590 break;
2591 case uchar_code:
2592 scan_char_num();
2593 push_selector;
2594 print(cur_val);
2595 pop_selector;
2596 break;
2597 case lua_escape_string_code:
2599 lstring escstr;
2600 int l = 0;
2601 save_scanner_status = scanner_status;
2602 save_def_ref = def_ref;
2603 save_warning_index = warning_index;
2604 scan_toks(false, true);
2605 bool = in_lua_escape;
2606 in_lua_escape = true;
2607 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2608 escstr.l = (unsigned) l;
2609 in_lua_escape = bool;
2610 delete_token_ref(def_ref);
2611 def_ref = save_def_ref;
2612 warning_index = save_warning_index;
2613 scanner_status = save_scanner_status;
2614 (void) lua_str_toks(escstr);
2615 ins_list(token_link(temp_token_head));
2616 free(escstr.s);
2617 return;
2619 /* no further action */
2620 break;
2621 case font_id_code:
2622 scan_font_ident();
2623 push_selector;
2624 print_int(cur_val);
2625 pop_selector;
2626 break;
2627 case font_name_code:
2628 scan_font_ident();
2629 push_selector;
2630 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2631 if (font_size(cur_val) != font_dsize(cur_val)) {
2632 tprint(" at ");
2633 print_scaled(font_size(cur_val));
2634 tprint("pt");
2636 pop_selector;
2637 break;
2638 case left_margin_kern_code:
2639 scan_int();
2640 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2641 normal_error("marginkern", "a non-empty hbox expected");
2642 push_selector;
2643 p = list_ptr(box(cur_val));
2644 while ((p != null) && (type(p) == glue_node)) {
2645 p = vlink(p);
2647 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2648 print_scaled(width(p));
2649 else
2650 print_char('0');
2651 tprint("pt");
2652 pop_selector;
2653 break;
2654 case right_margin_kern_code:
2655 scan_int();
2656 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657 normal_error("marginkern", "a non-empty hbox expected");
2658 push_selector;
2659 p = list_ptr(box(cur_val));
2660 if (p != null) {
2661 p = tail_of_list(p);
2663 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2664 node that points to glue spec ... and we don't want to analyze that messy lot
2666 while ((p != null) && (type(p) == glue_node)) {
2667 p = alink(p);
2669 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2670 if (type(p) == disc_node) {
2671 q = alink(p);
2672 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2673 p = q;
2674 } else {
2676 officially we should look in the replace but currently protrusion doesn't
2677 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2678 don't need it now
2684 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2685 print_scaled(width(p));
2686 else
2687 print_char('0');
2688 tprint("pt");
2689 pop_selector;
2690 break;
2691 case uniform_deviate_code:
2692 scan_int();
2693 push_selector;
2694 print_int(unif_rand(cur_val));
2695 pop_selector;
2696 break;
2697 case normal_deviate_code:
2698 scan_int();
2699 push_selector;
2700 print_int(norm_rand());
2701 pop_selector;
2702 break;
2703 case math_char_class_code:
2705 mathcodeval mval;
2706 scan_int();
2707 mval = get_math_code(cur_val);
2708 push_selector;
2709 print_int(mval.class_value);
2710 pop_selector;
2712 break;
2713 case math_char_fam_code:
2715 mathcodeval mval;
2716 scan_int();
2717 mval = get_math_code(cur_val);
2718 push_selector;
2719 print_int(mval.family_value);
2720 pop_selector;
2722 break;
2723 case math_char_slot_code:
2725 mathcodeval mval;
2726 scan_int();
2727 mval = get_math_code(cur_val);
2728 push_selector;
2729 print_int(mval.character_value);
2730 pop_selector;
2732 break;
2733 case insert_ht_code:
2734 scan_register_num();
2735 push_selector;
2736 i = cur_val;
2737 p = page_ins_head;
2738 while (i >= subtype(vlink(p)))
2739 p = vlink(p);
2740 if (subtype(p) == i)
2741 print_scaled(height(p));
2742 else
2743 print_char('0');
2744 tprint("pt");
2745 pop_selector;
2746 break;
2747 case job_name_code:
2748 if (job_name == 0)
2749 open_log_file();
2750 push_selector;
2751 print_job_name();
2752 pop_selector;
2753 break;
2754 case format_name_code:
2755 if (job_name == 0)
2756 open_log_file();
2757 push_selector;
2758 print(format_name);
2759 pop_selector;
2760 break;
2761 case luatex_banner_code:
2762 push_selector;
2763 tprint(luatex_banner);
2764 pop_selector;
2765 break;
2766 case luatex_revision_code:
2767 push_selector;
2768 print(get_luatexrevision());
2769 pop_selector;
2770 break;
2771 case luatex_date_code:
2772 push_selector;
2773 print_int(get_luatex_date_info());
2774 pop_selector;
2775 break;
2776 case etex_code:
2777 push_selector;
2778 tprint(eTeX_version_string);
2779 pop_selector;
2780 break;
2781 case eTeX_revision_code:
2782 push_selector;
2783 tprint(eTeX_revision);
2784 pop_selector;
2785 break;
2786 case font_identifier_code:
2787 confusion("convert");
2788 break;
2789 default:
2790 confusion("convert");
2791 break;
2793 str = make_string();
2794 (void) str_toks(str_lstring(str));
2795 flush_str(str);
2796 ins_list(token_link(temp_token_head));
2799 void do_feedback(void)
2801 int c = cur_chr;
2802 str_number str;
2803 int done = 1;
2804 switch (c) {
2805 case dvi_feedback_code:
2806 if (get_o_mode() == OMODE_DVI) {
2807 done = do_feedback_dvi(c);
2808 } else {
2809 tex_error("unexpected use of \\dvifeedback",null);
2810 return ;
2812 if (done==0) {
2813 /* we recover */
2814 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2815 return;
2816 } else if (done==2) {
2817 return;
2819 break;
2820 case pdf_feedback_code:
2821 if (get_o_mode() == OMODE_PDF) {
2822 done = do_feedback_pdf(c);
2823 } else {
2824 tex_error("unexpected use of \\pdffeedback",null);
2825 return ;
2827 if (done==0) {
2828 /* we recover */
2829 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2830 return;
2831 } else if (done==2) {
2832 return;
2834 break;
2835 default:
2836 confusion("feedback");
2837 break;
2839 str = make_string();
2840 (void) str_toks(str_lstring(str));
2841 flush_str(str);
2842 ins_list(token_link(temp_token_head));
2845 void do_variable(void)
2847 int c = cur_chr;
2848 int done = 1;
2849 switch (c) {
2850 case dvi_variable_code:
2851 done = do_variable_dvi(c);
2852 if (done==0) {
2853 /* we recover */
2854 normal_warning("dvi backend","unexpected use of \\dvivariable");
2856 return;
2857 break;
2858 case pdf_variable_code:
2859 done = do_variable_pdf(c);
2860 if (done==0) {
2861 /* we recover */
2862 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2864 return;
2865 break;
2866 default:
2867 confusion("variable");
2868 break;
2872 @ This boolean is keeping track of the lua string escape state
2874 boolean in_lua_escape;
2876 static int the_convert_string_dvi(halfword c, int i)
2878 return 0 ;
2881 static int the_convert_string_pdf(halfword c, int i)
2883 int ff;
2884 if (get_o_mode() != OMODE_PDF) {
2885 return 0;
2886 } else if (scan_keyword("lastlink")) {
2887 print_int(pdf_last_link);
2888 } else if (scan_keyword("retval")) {
2889 print_int(pdf_retval);
2890 } else if (scan_keyword("lastobj")) {
2891 print_int(pdf_last_obj);
2892 } else if (scan_keyword("lastannot")) {
2893 print_int(pdf_last_annot);
2894 } else if (scan_keyword("xformname")) {
2895 print_int(obj_info(static_pdf, i));
2896 } else if (scan_keyword("creationdate")) {
2897 return 0;
2898 } else if (scan_keyword("fontname")) {
2899 set_ff(i);
2900 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2901 } else if (scan_keyword("fontobjnum")) {
2902 set_ff(i);
2903 print_int(pdf_font_num(ff));
2904 } else if (scan_keyword("fontsize")) {
2905 print_scaled(font_size(i));
2906 tprint("pt");
2907 } else if (scan_keyword("pageref")) {
2908 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2909 } else if (scan_keyword("colorstackinit")) {
2910 return 0;
2911 } else {
2912 return 0;
2914 return 1;
2917 str_number the_convert_string(halfword c, int i)
2919 int old_setting; /* saved |selector| setting */
2920 str_number ret = 0;
2921 boolean done = true ;
2922 old_setting = selector;
2923 selector = new_string;
2924 switch (c) {
2925 case number_code:
2926 print_int(i);
2927 break;
2928 /* case lua_function_code: */
2929 /* case lua_code: */
2930 /* case expanded_code: */
2931 case math_style_code:
2932 print_math_style();
2933 break;
2934 /* case string_code: */
2935 /* case cs_string_code: */
2936 case roman_numeral_code:
2937 print_roman_int(i);
2938 break;
2939 /* case meaning_code: */
2940 case uchar_code:
2941 print(i);
2942 break;
2943 /* lua_escape_string_code: */
2944 case font_id_code:
2945 print_int(i);
2946 break;
2947 case font_name_code:
2948 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2949 if (font_size(i) != font_dsize(i)) {
2950 tprint(" at ");
2951 print_scaled(font_size(i));
2952 tprint("pt");
2954 break;
2955 /* left_margin_kern_code: */
2956 /* right_margin_kern_code: */
2957 case uniform_deviate_code:
2958 print_int(unif_rand(i));
2959 break;
2960 case normal_deviate_code:
2961 print_int(norm_rand());
2962 break;
2963 /* math_char_class_code: */
2964 /* math_char_fam_code: */
2965 /* math_char_slot_code: */
2966 /* insert_ht_code: */
2967 case job_name_code:
2968 print_job_name();
2969 break;
2970 case format_name_code:
2971 print(format_name);
2972 break;
2973 case luatex_banner_code:
2974 tprint(luatex_banner);
2975 break;
2976 case luatex_revision_code:
2977 print(get_luatexrevision());
2978 break;
2979 case luatex_date_code:
2980 print_int(get_luatex_date_info());
2981 break;
2982 case etex_code:
2983 tprint(eTeX_version_string);
2984 break;
2985 case eTeX_revision_code:
2986 tprint(eTeX_revision);
2987 break;
2988 case font_identifier_code:
2989 print_font_identifier(i);
2990 break;
2991 /* backend: this might become obsolete */
2992 case dvi_feedback_code:
2993 done = the_convert_string_dvi(c,i);
2994 break;
2995 case pdf_feedback_code:
2996 done = the_convert_string_pdf(c,i);
2997 break;
2998 /* done */
2999 default:
3000 done = false;
3001 break;
3003 if (done) {
3004 ret = make_string();
3006 selector = old_setting;
3007 return ret;
3010 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3011 files potentially usable for reading appear in the following global variables.
3012 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3013 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3014 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3015 line.
3018 FILE *read_file[16]; /* used for \.{\\read} */
3019 int read_open[17]; /* state of |read_file[n]| */
3021 void initialize_read(void)
3023 int k;
3024 for (k = 0; k <= 16; k++)
3025 read_open[k] = closed;
3028 @ The |read_toks| procedure constructs a token list like that for any macro
3029 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3030 sequence that will receive this token list.
3033 void read_toks(int n, halfword r, halfword j)
3035 halfword p; /* tail of the token list */
3036 halfword q; /* new node being added to the token list via |store_new_token| */
3037 int s; /* saved value of |align_state| */
3038 int m; /* stream number */
3039 scanner_status = defining;
3040 warning_index = r;
3041 p = get_avail();
3042 def_ref = p;
3043 set_token_ref_count(def_ref, 0);
3044 p = def_ref; /* the reference count */
3045 store_new_token(end_match_token);
3046 if ((n < 0) || (n > 15))
3047 m = 16;
3048 else
3049 m = n;
3050 s = align_state;
3051 align_state = 1000000; /* disable tab marks, etc. */
3052 do {
3053 /* Input and store tokens from the next line of the file */
3054 begin_file_reading();
3055 iname = m + 1;
3056 if (read_open[m] == closed) {
3058 Input for \.{\\read} from the terminal
3060 Here we input on-line into the |buffer| array, prompting the user explicitly
3061 if |n>=0|. The value of |n| is set negative so that additional prompts
3062 will not be given in the case of multi-line input.
3064 if (interaction > nonstop_mode) {
3065 if (n < 0) {
3066 prompt_input("");
3067 } else {
3068 wake_up_terminal();
3069 print_ln();
3070 sprint_cs(r);
3071 prompt_input(" =");
3072 n = -1;
3074 } else {
3075 fatal_error
3076 ("*** (cannot \\read from terminal in nonstop modes)");
3079 } else if (read_open[m] == just_open) {
3081 Input the first line of |read_file[m]|
3083 The first line of a file must be treated specially, since |lua_input_ln|
3084 must be told not to start with |get|.
3086 if (lua_input_ln(read_file[m], (m + 1), false)) {
3087 read_open[m] = normal;
3088 } else {
3089 lua_a_close_in(read_file[m], (m + 1));
3090 read_open[m] = closed;
3093 } else {
3095 Input the next line of |read_file[m]|
3097 An empty line is appended at the end of a |read_file|.
3099 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3100 lua_a_close_in(read_file[m], (m + 1));
3101 read_open[m] = closed;
3102 if (align_state != 1000000) {
3103 runaway();
3104 print_err("File ended within \\read");
3105 help1("This \\read has unbalanced braces.");
3106 align_state = 1000000;
3107 error();
3112 ilimit = last;
3113 if (end_line_char_inactive)
3114 decr(ilimit);
3115 else
3116 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3117 first = ilimit + 1;
3118 iloc = istart;
3119 istate = new_line;
3120 /* Handle \.{\\readline} and |goto done|; */
3121 if (j == 1) {
3122 while (iloc <= ilimit) {
3123 /* current line not yet finished */
3124 do_buffer_to_unichar(cur_chr, iloc);
3125 if (cur_chr == ' ')
3126 cur_tok = space_token;
3127 else
3128 cur_tok = cur_chr + other_token;
3129 store_new_token(cur_tok);
3131 } else {
3132 while (1) {
3133 get_token();
3134 if (cur_tok == 0) {
3135 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3136 break;
3138 if (align_state < 1000000) {
3139 /* unmatched `\.\}' aborts the line */
3140 do {
3141 get_token();
3142 } while (cur_tok != 0);
3143 align_state = 1000000;
3144 break;
3146 store_new_token(cur_tok);
3149 end_file_reading();
3151 } while (align_state != 1000000);
3152 cur_val = def_ref;
3153 scanner_status = normal;
3154 align_state = s;
3157 @ return a string from tokens list
3160 str_number tokens_to_string(halfword p)
3162 int old_setting;
3163 if (selector == new_string)
3164 normal_error("tokens","tokens_to_string() called while selector = new_string");
3165 old_setting = selector;
3166 selector = new_string;
3167 show_token_list(token_link(p), null, -1);
3168 selector = old_setting;
3169 return make_string();
3172 @ @c
3173 #define make_room(a) \
3174 if ((unsigned)i+a+1>alloci) { \
3175 ret = xrealloc(ret,(alloci+64)); \
3176 alloci = alloci + 64; \
3179 #define append_i_byte(a) ret[i++] = (char)(a)
3181 #define Print_char(a) make_room(1); append_i_byte(a)
3183 #define Print_uchar(s) { \
3184 make_room(4); \
3185 if (s<=0x7F) { \
3186 append_i_byte(s); \
3187 } else if (s<=0x7FF) { \
3188 append_i_byte(0xC0 + (s / 0x40)); \
3189 append_i_byte(0x80 + (s % 0x40)); \
3190 } else if (s<=0xFFFF) { \
3191 append_i_byte(0xE0 + (s / 0x1000)); \
3192 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3193 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3194 } else if (s>=0x110000) { \
3195 append_i_byte(s-0x11000); \
3196 } else { \
3197 append_i_byte(0xF0 + (s / 0x40000)); \
3198 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3199 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3200 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3203 #define Print_esc(b) { \
3204 const char *v = b; \
3205 if (e>0 && e<STRING_OFFSET) { \
3206 Print_uchar (e); \
3208 make_room(strlen(v)); \
3209 while (*v) { append_i_byte(*v); v++; } \
3212 #define Print_str(b) { \
3213 const char *v = b; \
3214 make_room(strlen(v)); \
3215 while (*v) { append_i_byte(*v); v++; } \
3218 #define is_cat_letter(a) \
3219 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3221 @ the actual token conversion in this function is now functionally equivalent to
3222 |show_token_list|, except that it always prints the whole token list. TODO: check
3223 whether this causes problems in the lua library.
3226 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3228 register int p, c, m;
3229 int q;
3230 int infop;
3231 char *s, *sh;
3232 int e = 0;
3233 char *ret;
3234 int match_chr = '#';
3235 int n = '0';
3236 unsigned alloci = 1024;
3237 int i = 0;
3238 p = pp;
3239 if (p == null) {
3240 if (siz != NULL)
3241 *siz = 0;
3242 return NULL;
3244 ret = xmalloc(alloci);
3245 p = token_link(p); /* skip refcount */
3246 if (p != null) {
3247 e = int_par(escape_char_code);
3249 while (p != null) {
3250 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3251 Print_esc("CLOBBERED.");
3252 break;
3254 infop = token_info(p);
3255 if (infop >= cs_token_flag) {
3256 if (!(inhibit_par && infop == par_token)) {
3257 q = infop - cs_token_flag;
3258 if (q < hash_base) {
3259 if (q == null_cs) {
3260 Print_esc("csname");
3261 Print_esc("endcsname");
3262 } else {
3263 Print_esc("IMPOSSIBLE.");
3265 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3266 Print_esc("IMPOSSIBLE.");
3267 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3268 Print_esc("NONEXISTENT.");
3269 } else {
3270 str_number txt = cs_text(q);
3271 sh = makecstring(txt);
3272 s = sh;
3273 if (is_active_cs(txt)) {
3274 s = s + 3;
3275 while (*s) {
3276 Print_char(*s);
3277 s++;
3279 } else {
3280 if (e>=0 && e<0x110000) Print_uchar(e);
3281 while (*s) {
3282 Print_char(*s);
3283 s++;
3285 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3286 Print_char(' ');
3289 free(sh);
3292 } else {
3293 if (infop < 0) {
3294 Print_esc("BAD");
3295 } else {
3296 m = token_cmd(infop);
3297 c = token_chr(infop);
3298 switch (m) {
3299 case left_brace_cmd:
3300 case right_brace_cmd:
3301 case math_shift_cmd:
3302 case tab_mark_cmd:
3303 case sup_mark_cmd:
3304 case sub_mark_cmd:
3305 case spacer_cmd:
3306 case letter_cmd:
3307 case other_char_cmd:
3308 Print_uchar(c);
3309 break;
3310 case mac_param_cmd:
3311 if (!in_lua_escape && (is_in_csname==0))
3312 Print_uchar(c);
3313 Print_uchar(c);
3314 break;
3315 case out_param_cmd:
3316 Print_uchar(match_chr);
3317 if (c <= 9) {
3318 Print_char(c + '0');
3319 } else {
3320 Print_char('!');
3321 goto EXIT;
3323 break;
3324 case match_cmd:
3325 match_chr = c;
3326 Print_uchar(c);
3327 n++;
3328 Print_char(n);
3329 if (n > '9')
3330 goto EXIT;
3331 break;
3332 case end_match_cmd:
3333 if (c == 0) {
3334 Print_char('-');
3335 Print_char('>');
3337 break;
3338 default:
3339 not_so_bad(Print_esc);
3340 break;
3344 p = token_link(p);
3346 EXIT:
3347 ret[i] = '\0';
3348 if (siz != NULL)
3349 *siz = i;
3350 return ret;
3353 @ @c
3354 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3356 int siz;
3357 lstring *ret = xmalloc(sizeof(lstring));
3358 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3359 ret->l = (size_t) siz;
3360 return ret;
3363 @ @c
3364 void free_lstring(lstring * ls)
3366 if (ls == NULL)
3367 return;
3368 if (ls->s != NULL)
3369 free(ls->s);
3370 free(ls);