source/texk/web2c/luatexdir/tex/textoken.w

   1 % textoken.w
   2 %
   3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
   4 %
   5 % This file is part of LuaTeX.
   6 %
   7 % LuaTeX is free software; you can redistribute it and/or modify it under
   8 % the terms of the GNU General Public License as published by the Free
   9 % Software Foundation; either version 2 of the License, or (at your
  10 % option) any later version.
  11 %
  12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
  13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 % FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 % License for more details.
  16 %
  17 % You should have received a copy of the GNU General Public License along
  18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
  19
  20 @ @c
  21
  22 #include "ptexlib.h"
  23
  24 @ @c
  25 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
  26
  27 /*
  28 #define do_get_cat_code(a,b) do { \
  29     if (line_catcode_table<=-0xFF) \
  30       a= - line_catcode_table - 0xFF ; \
  31     else if (line_catcode_table!=DEFAULT_CAT_TABLE) \
  32       a=get_cat_code(line_catcode_table,b); \
  33     else \
  34       a=get_cat_code(cat_code_table_par,b); \
  35   } while (0)
  36 */
  37
  38 #define do_get_cat_code(a,b) do { \
  39     if (line_catcode_table==DEFAULT_CAT_TABLE) \
  40       a=get_cat_code(cat_code_table_par,b); \
  41     else if (line_catcode_table>-0xFF) \
  42       a=get_cat_code(line_catcode_table,b); \
  43     else \
  44       a= - line_catcode_table - 0xFF ; \
  45   } while (0)
  46
  47
  48 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
  49 readily be transported into environments that do not have automatic facilities
  50 for strings, garbage collection, etc., and so that it can be in control of what
  51 error messages the user receives. The dynamic storage requirements of \TeX\ are
  52 handled by providing two large arrays called |fixmem| and |varmem| in which
  53 consecutive blocks of words are used as nodes by the \TeX\ routines.
  54
  55 Pointer variables are indices into this array, or into another array called
  56 |eqtb| that will be explained later. A pointer variable might also be a special
  57 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
  58 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
  59 does not assume that |mem[null]| exists.
  60
  61 @ Locations in |fixmem| are used for storing one-word records; a conventional
  62 \.{AVAIL} stack is used for allocation in this array.
  63
  64 @c
  65 smemory_word *fixmem;           /* the big dynamic storage area */
  66 unsigned fix_mem_min;           /* the smallest location of one-word memory in use */
  67 unsigned fix_mem_max;           /* the largest location of one-word memory in use */
  68
  69 @ In order to study the memory requirements of particular applications, it is
  70 possible to prepare a version of \TeX\ that keeps track of current and maximum
  71 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
  72 commented out, \TeX\ will run a bit slower but it will report these statistics
  73 when |tracing_stats| is sufficiently large.
  74
  75 @c
  76 int var_used, dyn_used;         /* how much memory is in use */
  77
  78 halfword avail;                 /* head of the list of available one-word nodes */
  79 unsigned fix_mem_end;           /* the last one-word node used in |mem| */
  80
  81 halfword garbage;               /* head of a junk list, write only */
  82 halfword temp_token_head;       /* head of a temporary list of some kind */
  83 halfword hold_token_head;       /* head of a temporary list of another kind */
  84 halfword omit_template;         /* a constant token list */
  85 halfword null_list;             /* permanently empty list */
  86 halfword backup_head;           /* head of token list built by |scan_keyword| */
  87
  88 @ @c
  89 void initialize_tokens(void)
  90 {
  91     halfword p;
  92     avail = null;
  93     fix_mem_end = 0;
  94     p = get_avail();
  95     temp_token_head = p;
  96     set_token_info(temp_token_head, 0);
  97     p = get_avail();
  98     hold_token_head = p;
  99     set_token_info(hold_token_head, 0);
 100     p = get_avail();
 101     omit_template = p;
 102     set_token_info(omit_template, 0);
 103     p = get_avail();
 104     null_list = p;
 105     set_token_info(null_list, 0);
 106     p = get_avail();
 107     backup_head = p;
 108     set_token_info(backup_head, 0);
 109     p = get_avail();
 110     garbage = p;
 111     set_token_info(garbage, 0);
 112     dyn_used = 0;               /* initialize statistics */
 113 }
 114
 115 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
 116 field is null. However, \TeX\ will halt if there is no more room left.
 117 @^inner loop@>
 118
 119 If the available-space list is empty, i.e., if |avail=null|, we try first to
 120 increase |fix_mem_end|. If that cannot be done, i.e., if
 121 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
 122 work, we have to quit.
 123
 124 @c
 125 halfword get_avail(void)
 126 {                               /* single-word node allocation */
 127     unsigned p;                 /* the new node being got */
 128     unsigned t;
 129     p = (unsigned) avail;       /* get top location in the |avail| stack */
 130     if (p != null) {
 131         avail = token_link(avail);      /* and pop it off */
 132     } else if (fix_mem_end < fix_mem_max) {     /* or go into virgin territory */
 133         incr(fix_mem_end);
 134         p = fix_mem_end;
 135     } else {
 136         smemory_word *new_fixmem;       /* the big dynamic storage area */
 137         t = (fix_mem_max / 5);
 138         new_fixmem =
 139             fixmemcast(realloc
 140                        (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
 141         if (new_fixmem == NULL) {
 142             runaway();          /* if memory is exhausted, display possible runaway text */
 143             overflow("token memory size", fix_mem_max);
 144         } else {
 145             fixmem = new_fixmem;
 146         }
 147         memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
 148         fix_mem_max += t;
 149         p = ++fix_mem_end;
 150     }
 151     token_link(p) = null;       /* provide an oft-desired initialization of the new node */
 152     incr(dyn_used);             /* maintain statistics */
 153     return (halfword) p;
 154 }
 155
 156 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
 157 that starts at position |p|.
 158 @^inner loop@>
 159
 160 @c
 161 void flush_list(halfword p)
 162 {                               /* makes list of single-word nodes available */
 163     halfword q, r;              /* list traversers */
 164     if (p != null) {
 165         r = p;
 166         do {
 167             q = r;
 168             r = token_link(r);
 169             decr(dyn_used);
 170         } while (r != null);    /* now |q| is the last node on the list */
 171         token_link(q) = avail;
 172         avail = p;
 173     }
 174 }
 175
 176 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
 177 represented internally in one of two ways: (1)~A character whose ASCII code
 178 number is |c| and whose command code is |m| is represented as the number
 179 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
 180 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
 181 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
 182 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
 183 halfword.
 184
 185 A token |t| represents a |left_brace| command if and only if
 186 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
 187 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
 188 |end_match| command if and only if |match_token<=t<=end_match_token|. The
 189 following definitions take care of these token-oriented constants and a few
 190 others.
 191
 192 @ A token list is a singly linked list of one-word nodes in |mem|, where each
 193 word contains a token and a link. Macro definitions, output-routine definitions,
 194 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
 195 form of token lists, usually preceded by a node with a reference count in its
 196 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
 197
 198 Three special commands appear in the token lists of macro definitions. When
 199 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
 200 when |m=end_match|, it means that parameter matching should end and \TeX\ should
 201 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
 202 insert parameter number |c| into the text at this point.
 203
 204 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
 205 are omitted, but the final right brace of an output routine is included at the
 206 end of its token list.
 207
 208 Here is an example macro definition that illustrates these conventions. After
 209 \TeX\ processes the text
 210
 211 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
 212
 213 the definition of \.{\\mac} is represented as a token list containing
 214
 215 $$\def\,{\hskip2pt}
 216 \vbox{\halign{\hfil#\hfil\cr
 217 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
 218 \.{\\b}, |end_match|,\cr
 219 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
 220 |other_char|\,\.1,\cr
 221 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
 222
 223 The procedure |scan_toks| builds such token lists, and |macro_call| does the
 224 parameter matching. @^reference counts@>
 225
 226 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
 227 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
 228 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
 229 entry before the token list has been fully consumed, so we dare not simply
 230 destroy a token list when its control sequence is being redefined.
 231
 232 If the parameter-matching part of a definition ends with `\.{\#\{}', the
 233 corresponding token list will have `\.\{' just before the `|end_match|' and also
 234 at the very end. The first `\.\{' is used to delimit the parameter; the second
 235 one keeps the first from disappearing.
 236
 237 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
 238 including the expansion of a macro or mark.
 239
 240 @c
 241 void print_meaning(void)
 242 {
 243     /* remap \mathchar onto \Umathchar */
 244 /*
 245     if (cur_cmd == math_given_cmd) {
 246         cur_cmd = xmath_given_cmd ;
 247     }
 248 */
 249     print_cmd_chr((quarterword) cur_cmd, cur_chr);
 250     if (cur_cmd >= call_cmd) {
 251         print_char(':');
 252         print_ln();
 253         token_show(cur_chr);
 254     } else {
 255         /* Show the meaning of a mark node */
 256         if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
 257             print_char(':');
 258             print_ln();
 259             switch (cur_chr) {
 260                 case first_mark_code:
 261                     token_show(first_mark(0));
 262                     break;
 263                 case bot_mark_code:
 264                     token_show(bot_mark(0));
 265                     break;
 266                 case split_first_mark_code:
 267                     token_show(split_first_mark(0));
 268                     break;
 269                 case split_bot_mark_code:
 270                     token_show(split_bot_mark(0));
 271                     break;
 272                 default:
 273                     token_show(top_mark(0));
 274                     break;
 275             }
 276         }
 277     }
 278 }
 279
 280 @ The procedure |show_token_list|, which prints a symbolic form of the token list
 281 that starts at a given node |p|, illustrates these conventions. The token list
 282 being displayed should not begin with a reference count. However, the procedure
 283 is intended to be robust, so that if the memory links are awry or if |p| is not
 284 really a pointer to a token list, nothing catastrophic will happen.
 285
 286 An additional parameter |q| is also given; this parameter is either null or it
 287 points to a node in the token list where a certain magic computation takes place
 288 that will be explained later. (Basically, |q| is non-null when we are printing
 289 the two-line context information at the time of an error message; |q| marks the
 290 place corresponding to where the second line should begin.)
 291
 292 For example, if |p| points to the node containing the first \.a in the token list
 293 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
 294 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
 295 second \.a, the magic computation will be performed just before the second \.a is
 296 printed.
 297
 298 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
 299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
 300 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
 301 this cannot be confused with actual control sequences because a real control
 302 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
 303
 304 @c
 305 #define not_so_bad(p) \
 306     switch (m) { \
 307         case assign_int_cmd: \
 308             if (c >= (backend_int_base) && c <= (backend_int_last)) \
 309                 p("[internal backend integer]"); \
 310             break; \
 311         case assign_dimen_cmd: \
 312             if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
 313                 p("[internal backend dimension]"); \
 314             break; \
 315         case assign_toks_cmd: \
 316             if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
 317                 p("[internal backend tokenlist]"); \
 318             break; \
 319         default: \
 320             p("BAD"); \
 321             break; \
 322     }
 323
 324 void show_token_list(int p, int q, int l)
 325 {
 326     int m, c;                    /* pieces of a token */
 327     ASCII_code match_chr = '#';  /* character used in a `|match|' */
 328     ASCII_code n = '0';          /* the highest parameter number, as an ASCII digit */
 329     tally = 0;
 330     if (l < 0)
 331         l = 0x3FFFFFFF;
 332     while ((p != null) && (tally < l)) {
 333         if (p == q) {
 334             /* Do magic computation */
 335             set_trick_count();
 336         }
 337         /* Display token |p|, and |return| if there are problems */
 338         if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
 339             tprint_esc("CLOBBERED.");
 340             return;
 341         }
 342         if (token_info(p) >= cs_token_flag) {
 343             if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
 344                 print_cs(token_info(p) - cs_token_flag);
 345         } else {
 346             m = token_cmd(token_info(p));
 347             c = token_chr(token_info(p));
 348             if (token_info(p) < 0) {
 349                 tprint_esc("BAD");
 350             } else {
 351                 /*
 352                     Display the token $(|m|,|c|)$
 353
 354                     The procedure usually ``learns'' the character code used for macro
 355                     parameters by seeing one in a |match| command before it runs into any
 356                     |out_param| commands.
 357                 */
 358                 switch (m) {
 359                     case left_brace_cmd:
 360                     case right_brace_cmd:
 361                     case math_shift_cmd:
 362                     case tab_mark_cmd:
 363                     case sup_mark_cmd:
 364                     case sub_mark_cmd:
 365                     case spacer_cmd:
 366                     case letter_cmd:
 367                     case other_char_cmd:
 368                         print(c);
 369                         break;
 370                     case mac_param_cmd:
 371                         if (!in_lua_escape && (is_in_csname==0))
 372                             print(c);
 373                         print(c);
 374                         break;
 375                     case out_param_cmd:
 376                         print(match_chr);
 377                         if (c <= 9) {
 378                             print_char(c + '0');
 379                         } else {
 380                             print_char('!');
 381                             return;
 382                         }
 383                         break;
 384                     case match_cmd:
 385                         match_chr = c;
 386                         print(c);
 387                         incr(n);
 388                         print_char(n);
 389                         if (n > '9')
 390                             return;
 391                         break;
 392                     case end_match_cmd:
 393                         if (c == 0)
 394                             tprint("->");
 395                         break;
 396                     default:
 397                         not_so_bad(tprint);
 398                         break;
 399                 }
 400             }
 401         }
 402         p = token_link(p);
 403     }
 404     if (p != null)
 405         tprint_esc("ETC.");
 406 }
 407
 408 @ @c
 409 #define do_buffer_to_unichar(a,b) do { \
 410     a = (halfword)str2uni(buffer+b); \
 411     b += utf8_size(a); \
 412 } while (0)
 413
 414 @ Here's the way we sometimes want to display a token list, given a pointer to
 415 its reference count; the pointer may be null.
 416
 417 @c
 418 void token_show(halfword p)
 419 {
 420     if (p != null)
 421         show_token_list(token_link(p), null, 10000000);
 422 }
 423
 424 @ |delete_token_ref|, is called when a pointer to a token list's reference count
 425 is being removed. This means that the token list should disappear if the
 426 reference count was |null|, otherwise the count should be decreased by one.
 427 @^reference counts@>
 428
 429 @ |p| points to the reference count of a token list that is losing one
 430 reference.
 431
 432 @c
 433 void delete_token_ref(halfword p)
 434 {
 435     if (token_ref_count(p) == 0)
 436         flush_list(p);
 437     else
 438         decr(token_ref_count(p));
 439 }
 440
 441 @ @c
 442 int get_char_cat_code(int curchr)
 443 {
 444     int a;
 445     do_get_cat_code(a,curchr);
 446     return a;
 447 }
 448
 449 @ @c
 450 static void invalid_character_error(void)
 451 {
 452     const char *hlp[] = {
 453         "A funny symbol that I can't read has just been input.",
 454         "Continue, and I'll forget that it ever happened.",
 455         NULL
 456     };
 457     deletions_allowed = false;
 458     tex_error("Text line contains an invalid character", hlp);
 459     deletions_allowed = true;
 460 }
 461
 462 @ @c
 463 static boolean process_sup_mark(void);  /* below */
 464
 465 static int scan_control_sequence(void); /* below */
 466
 467 typedef enum {
 468     next_line_ok,
 469     next_line_return,
 470     next_line_restart
 471 } next_line_retval;
 472
 473 static next_line_retval next_line(void); /* below */
 474
 475 @ In case you are getting bored, here is a slightly less trivial routine: Given a
 476 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
 477 |scan_keyword| routine checks to see whether the next tokens of input match this
 478 string. The match must be exact, except that uppercase letters will match their
 479 lowercase counterparts; uppercase equivalents are determined by subtracting
 480 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
 481 only for its own limited set of keywords.
 482
 483 If a match is found, the characters are effectively removed from the input and
 484 |true| is returned. Otherwise |false| is returned, and the input is left
 485 essentially unchanged (except for the fact that some macros may have been
 486 expanded, etc.). @^inner loop@>
 487
 488 @c
 489 boolean scan_keyword(const char *s)
 490 {                               /* look for a given string */
 491     halfword p;                 /* tail of the backup list */
 492     halfword q;                 /* new node being added to the token list via |store_new_token| */
 493     const char *k;              /* index into |str_pool| */
 494     halfword save_cur_cs = cur_cs;
 495     if (strlen(s) == 0)        /* was assert (strlen(s) > 1); */
 496       return false ;           /* but not with newtokenlib  zero keyword simply doesn't match  */
 497     p = backup_head;
 498     token_link(p) = null;
 499     k = s;
 500     while (*k) {
 501         get_x_token();      /* recursion is possible here */
 502         if ((cur_cs == 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
 503             store_new_token(cur_tok);
 504             k++;
 505         } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
 506             /*
 507                 crashes on some alignments:
 508
 509                 if (p != backup_head) {
 510                     q = get_avail();
 511                     token_info(q) = cur_tok;
 512                     token_link(q) = null;
 513                     token_link(p) = q;
 514                     begin_token_list(token_link(backup_head), backed_up);
 515                 } else {
 516                     back_input();
 517                 }
 518             */
 519             back_input();
 520             if (p != backup_head) {
 521                 begin_token_list(token_link(backup_head), backed_up);
 522             }
 523             /*  */
 524             cur_cs = save_cur_cs;
 525             return false;
 526         }
 527     }
 528     if (token_link(backup_head) != null)
 529         flush_list(token_link(backup_head));
 530     cur_cs = save_cur_cs;
 531     return true;
 532 }
 533
 534 @ We can not return |undefined_control_sequence| under some conditions
 535  (inside |shift_case|, for example). This needs thinking.
 536
 537 @c
 538
 539 /*
 540     halfword active_to_cs(int curchr, int force)
 541     {
 542         halfword curcs;
 543         char *a, *b;
 544         char *utfbytes = xmalloc(8);
 545         int nncs = no_new_control_sequence;
 546         a = (char *) uni2str(0xFFFF);
 547         utfbytes = strcpy(utfbytes, a);
 548         if (force)
 549             no_new_control_sequence = false;
 550         if (curchr > 0) {
 551             b = (char *) uni2str((unsigned) curchr);
 552             utfbytes = strcat(utfbytes, b);
 553             free(b);
 554             curcs = string_lookup(utfbytes, strlen(utfbytes));
 555         } else {
 556             utfbytes[3] = '\0';
 557             curcs = string_lookup(utfbytes, 4);
 558         }
 559         no_new_control_sequence = nncs;
 560         free(a);
 561         free(utfbytes);
 562         return curcs;
 563     }
 564 */
 565
 566 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
 567
 568 halfword active_to_cs(int curchr, int force)
 569 {
 570     halfword curcs;
 571     int nncs = no_new_control_sequence;
 572     if (force) {
 573         no_new_control_sequence = false;
 574     }
 575     if (curchr > 0) {
 576         char *b = (char *) uni2str((unsigned) curchr);
 577         char *utfbytes = xmalloc(8);
 578         utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
 579         utfbytes = strcat(utfbytes, b);
 580         free(b);
 581         curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 582         free(utfbytes);
 583     } else {
 584         curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
 585     }
 586     no_new_control_sequence = nncs;
 587     return curcs;
 588 }
 589
 590 /*
 591
 592     static unsigned char *uni2csstr(unsigned unic)
 593     {
 594         unsigned char *buf = xmalloc(8);
 595         unsigned char *pt = buf;
 596         *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
 597         if (unic < 0x80)
 598             *pt++ = (unsigned char) unic;
 599         else if (unic < 0x800) {
 600             *pt++ = (unsigned char) (0xc0 | (unic >> 6));
 601             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 602         } else if (unic >= 0x110000) {
 603             *pt++ = (unsigned char) (unic - 0x110000);
 604         } else if (unic < 0x10000) {
 605             *pt++ = (unsigned char) (0xe0 | (unic >> 12));
 606             *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
 607             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 608         } else {
 609             int u, z, y, x;
 610             unsigned val = unic - 0x10000;
 611             u = (int) (((val & 0xf0000) >> 16) + 1);
 612             z = (int) ((val & 0x0f000) >> 12);
 613             y = (int) ((val & 0x00fc0) >> 6);
 614             x = (int) (val & 0x0003f);
 615             *pt++ = (unsigned char) (0xf0 | (u >> 2));
 616             *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
 617             *pt++ = (unsigned char) (0x80 | y);
 618             *pt++ = (unsigned char) (0x80 | x);
 619         }
 620         *pt = '\0';
 621         return buf;
 622     }
 623
 624     halfword active_to_cs(int curchr, int force)
 625     {
 626         halfword curcs;
 627         int nncs = no_new_control_sequence;
 628         if (force) {
 629             no_new_control_sequence = false;
 630         }
 631         if (curchr > 0) {
 632             char * utfbytes = (char *) uni2csstr((unsigned) curchr);
 633             curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 634             free(utfbytes);
 635         } else {
 636             curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
 637         }
 638         no_new_control_sequence = nncs;
 639         return curcs;
 640     }
 641
 642 */
 643
 644 @ TODO this function should listen to \.{\\escapechar}
 645
 646 @ prints a control sequence
 647
 648 @c
 649 static char *cs_to_string(halfword p)
 650 {
 651     const char *s;
 652     char *sh;
 653     int k = 0;
 654     static char ret[256] = { 0 };
 655     if (p == 0 || p == null_cs) {
 656         ret[k++] = '\\';
 657         s = "csname";
 658         while (*s) {
 659             ret[k++] = *s++;
 660         }
 661         ret[k++] = '\\';
 662         s = "endcsname";
 663         while (*s) {
 664             ret[k++] = *s++;
 665         }
 666         ret[k] = 0;
 667
 668     } else {
 669         str_number txt = cs_text(p);
 670         sh = makecstring(txt);
 671         s = sh;
 672         if (is_active_cs(txt)) {
 673             s = s + 3;
 674             while (*s) {
 675                 ret[k++] = *s++;
 676             }
 677             ret[k] = 0;
 678         } else {
 679             ret[k++] = '\\';
 680             while (*s) {
 681                 ret[k++] = *s++;
 682             }
 683             ret[k] = 0;
 684         }
 685         free(sh);
 686     }
 687     return (char *) ret;
 688 }
 689
 690 @ TODO this is a quick hack, will be solved differently soon
 691
 692 @c
 693 static char *cmd_chr_to_string(int cmd, int chr)
 694 {
 695     char *s;
 696     str_number str;
 697     int sel = selector;
 698     selector = new_string;
 699     print_cmd_chr((quarterword) cmd, chr);
 700     str = make_string();
 701     s = makecstring(str);
 702     selector = sel;
 703     flush_str(str);
 704     return s;
 705 }
 706
 707 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
 708 develop in the next few sections of the program. Perhaps we shouldn't actually
 709 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
 710 reading the source files and gobbling them up. And it also helps \TeX\ to
 711 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
 712
 713 The main duty of |get_next| is to input one token and to set |cur_cmd| and
 714 |cur_chr| to that token's command code and modifier. Furthermore, if the input
 715 token is a control sequence, the |eqtb| location of that control sequence is
 716 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
 717
 718 Underlying this simple description is a certain amount of complexity because of
 719 all the cases that need to be handled. However, the inner loop of |get_next| is
 720 reasonably short and fast.
 721
 722 When |get_next| is asked to get the next token of a \.{\\read} line,
 723 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
 724 appear on that line. (There might not be any tokens at all, if the
 725 |end_line_char| has |ignore| as its catcode.)
 726
 727 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
 728 needed because a blank line of input is supposed to be exactly equivalent to the
 729 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
 730 line.
 731
 732 @c
 733 halfword par_loc;   /* location of `\.{\\par}' in |eqtb| */
 734 halfword par_token; /* token representing `\.{\\par}' */
 735
 736 @ Parts |get_next| are executed more often than any other instructions of \TeX.
 737 @^mastication@>@^inner loop@>
 738
 739 The global variable |force_eof| is normally |false|; it is set |true| by an
 740 \.{\\endinput} command. |luacstrings| is the number of lua print statements
 741 waiting to be input, it is changed by |luatokencall|.
 742
 743 @c
 744 boolean force_eof; /* should the next \.{\\input} be aborted early? */
 745 int luacstrings;   /* how many lua strings are waiting to be input? */
 746
 747 @ If the user has set the |pausing| parameter to some positive value, and if
 748 nonstop mode has not been selected, each line of input is displayed on the
 749 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
 750 response. If the response is simply |carriage_return|, the line is accepted as it
 751 stands, otherwise the line typed is used instead of the line in the file.
 752
 753 @c
 754 void firm_up_the_line(void)
 755 {
 756     int k;                      /* an index into |buffer| */
 757     ilimit = last;
 758     if (pausing_par > 0) {
 759         if (interaction > nonstop_mode) {
 760             wake_up_terminal();
 761             print_ln();
 762             if (istart < ilimit) {
 763                 for (k = istart; k <= ilimit - 1; k++)
 764                     print_char(buffer[k]);
 765             }
 766             first = ilimit;
 767             prompt_input("=>"); /* wait for user response */
 768             if (last > first) {
 769                 for (k = first; k < +last - 1; k++)     /* move line down in buffer */
 770                     buffer[k + istart - first] = buffer[k];
 771                 ilimit = istart + last - first;
 772             }
 773         }
 774     }
 775 }
 776
 777 @ Before getting into |get_next|, let's consider the subroutine that is called
 778 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
 779 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
 780 the end of a file.
 781
 782 @c
 783 void check_outer_validity(void)
 784 {
 785     halfword p;                 /* points to inserted token list */
 786     halfword q;                 /* auxiliary pointer */
 787     if (suppress_outer_error_par)
 788         return;
 789     if (scanner_status != normal) {
 790         deletions_allowed = false;
 791         /* Back up an outer control sequence so that it can be reread; */
 792         /* An outer control sequence that occurs in a \.{\\read} will not be reread,
 793            since the error recovery for \.{\\read} is not very powerful. */
 794         if (cur_cs != 0) {
 795             if ((istate == token_list) || (iname < 1) || (iname > 17)) {
 796                 p = get_avail();
 797                 token_info(p) = cs_token_flag + cur_cs;
 798                 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
 799             }
 800             cur_cmd = spacer_cmd;
 801             cur_chr = ' ';      /* replace it by a space */
 802         }
 803         if (scanner_status > skipping) {
 804             const char *errhlp[] = {
 805                 "I suspect you have forgotten a `}', causing me",
 806                 "to read past where you wanted me to stop.",
 807                 "I'll try to recover; but if the error is serious,",
 808                 "you'd better type `E' or `X' now and fix your file.",
 809                 NULL
 810             };
 811             char errmsg[256];
 812             const char *startmsg;
 813             const char *scannermsg;
 814             /* Tell the user what has run away and try to recover */
 815             runaway();          /* print a definition, argument, or preamble */
 816             if (cur_cs == 0) {
 817                 startmsg = "File ended";
 818             } else {
 819                 cur_cs = 0;
 820                 startmsg = "Forbidden control sequence found";
 821             }
 822             /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
 823                and insert tokens that should lead to recovery; */
 824             /* The recovery procedure can't be fully understood without knowing more
 825                about the \TeX\ routines that should be aborted, but we can sketch the
 826                ideas here:  For a runaway definition we will insert a right brace; for a
 827                runaway preamble, we will insert a special \.{\\cr} token and a right
 828                brace; and for a runaway argument, we will set |long_state| to
 829                |outer_call| and insert \.{\\par}. */
 830             p = get_avail();
 831             switch (scanner_status) {
 832             case defining:
 833                 scannermsg = "definition";
 834                 token_info(p) = right_brace_token + '}';
 835                 break;
 836             case matching:
 837                 scannermsg = "use";
 838                 token_info(p) = par_token;
 839                 long_state = outer_call_cmd;
 840                 break;
 841             case aligning:
 842                 scannermsg = "preamble";
 843                 token_info(p) = right_brace_token + '}';
 844                 q = p;
 845                 p = get_avail();
 846                 token_link(p) = q;
 847                 token_info(p) = cs_token_flag + frozen_cr;
 848                 align_state = -1000000;
 849                 break;
 850             case absorbing:
 851                 scannermsg = "text";
 852                 token_info(p) = right_brace_token + '}';
 853                 break;
 854             default:           /* can't happen */
 855                 scannermsg = "unknown";
 856                 break;
 857             }                   /*there are no other cases */
 858             begin_token_list(p, inserted);
 859             snprintf(errmsg, 255, "%s while scanning %s of %s",
 860                      startmsg, scannermsg, cs_to_string(warning_index));
 861             tex_error(errmsg, errhlp);
 862         } else {
 863             char errmsg[256];
 864             const char *errhlp_no[] = {
 865                 "The file ended while I was skipping conditional text.",
 866                 "This kind of error happens when you say `\\if...' and forget",
 867                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 868                 NULL
 869             };
 870             const char *errhlp_cs[] = {
 871                 "A forbidden control sequence occurred in skipped text.",
 872                 "This kind of error happens when you say `\\if...' and forget",
 873                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 874                 NULL
 875             };
 876             const char **errhlp = (const char **) errhlp_no;
 877             char *ss;
 878             if (cur_cs != 0) {
 879                 errhlp = errhlp_cs;
 880                 cur_cs = 0;
 881             }
 882             ss = cmd_chr_to_string(if_test_cmd, cur_if);
 883             snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
 884                  ss, (int) skip_line);
 885             free(ss);
 886             /* Incomplete \\if... */
 887             cur_tok = cs_token_flag + frozen_fi;
 888             /* back up one inserted token and call |error| */
 889             {
 890                 OK_to_interrupt = false;
 891                 back_input();
 892                 token_type = inserted;
 893                 OK_to_interrupt = true;
 894                 tex_error(errmsg, errhlp);
 895             }
 896         }
 897         deletions_allowed = true;
 898     }
 899 }
 900
 901 @ @c
 902
 903 #if 0
 904
 905 /*
 906     The other variant gives less clutter in tracing cache usage when profiling and for
 907     some files (like the manual) also a bit of a speedup.
 908 */
 909
 910 static boolean get_next_file(void)
 911 {
 912   SWITCH:
 913     if (iloc <= ilimit) {
 914         /* current line not yet finished */
 915         do_buffer_to_unichar(cur_chr, iloc);
 916
 917       RESWITCH:
 918         if (detokenized_line()) {
 919             cur_cmd = (cur_chr == ' ' ? 10 : 12);
 920         } else {
 921             do_get_cat_code(cur_cmd, cur_chr);
 922         }
 923         /*
 924             Change state if necessary, and |goto switch| if the current
 925             character should be ignored, or |goto reswitch| if the current
 926             character changes to another;
 927
 928             The following 48-way switch accomplishes the scanning quickly, assuming
 929             that a decent C compiler has translated the code. Note that the numeric
 930             values for |mid_line|, |skip_blanks|, and |new_line| are spaced
 931             apart from each other by |max_char_code+1|, so we can add a character's
 932             command code to the state to get a single number that characterizes both.
 933
 934             Remark [ls/hh]: checking performance indicated that this switch was the
 935             cause of many branch prediction errors but changing it to:
 936
 937                 c = istate + cur_cmd;
 938                 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
 939                     return true;
 940                 } else if (c >= new_line) {
 941                     switch (c) {
 942                     }
 943                 } else if (c >= skip_blanks) {
 944                     switch (c) {
 945                     }
 946                 } else if (c >= mid_line) {
 947                     switch (c) {
 948                     }
 949                 } else {
 950                     istate = mid_line;
 951                     return true;
 952                 }
 953
 954             gives as many prediction errors. So, we can indeed assume that the compiler
 955             does the right job, or that there is simply no other way.
 956         */
 957
 958         switch (istate + cur_cmd) {
 959             case mid_line + ignore_cmd:
 960             case skip_blanks + ignore_cmd:
 961             case new_line + ignore_cmd:
 962             case skip_blanks + spacer_cmd:
 963             case new_line + spacer_cmd:
 964                 /* Cases where character is ignored */
 965                 goto SWITCH;
 966                 break;
 967             case mid_line + escape_cmd:
 968             case new_line + escape_cmd:
 969             case skip_blanks + escape_cmd:
 970                 /* Scan a control sequence ...; */
 971                 istate = (unsigned char) scan_control_sequence();
 972                 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
 973                     check_outer_validity();
 974                 break;
 975             case mid_line + active_char_cmd:
 976             case new_line + active_char_cmd:
 977             case skip_blanks + active_char_cmd:
 978                 /* Process an active-character  */
 979                 cur_cs = active_to_cs(cur_chr, false);
 980                 cur_cmd = eq_type(cur_cs);
 981                 cur_chr = equiv(cur_cs);
 982                 istate = mid_line;
 983                 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
 984                     check_outer_validity();
 985                 break;
 986             case mid_line + sup_mark_cmd:
 987             case new_line + sup_mark_cmd:
 988             case skip_blanks + sup_mark_cmd:
 989                 /* If this |sup_mark| starts */
 990                 if (process_sup_mark())
 991                     goto RESWITCH;
 992                 else
 993                     istate = mid_line;
 994                 break;
 995             case mid_line + invalid_char_cmd:
 996             case new_line + invalid_char_cmd:
 997             case skip_blanks + invalid_char_cmd:
 998                 /* Decry the invalid character and |goto restart|; */
 999                 invalid_character_error();
1000                 return false; /* because state may be |token_list| now */
1001                 break;
1002             case mid_line + spacer_cmd:
1003                 /* Enter |skip_blanks| state, emit a space; */
1004                 istate = skip_blanks;
1005                 cur_chr = ' ';
1006                 break;
1007             case mid_line + car_ret_cmd:
1008                 /*
1009                     Finish line, emit a space. When a character of type |spacer| gets through, its
1010                     character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1011                     for tab and space, and for the space inserted at the end of a line, will be
1012                     treated alike when macro parameters are being matched. We do this since such
1013                     characters are indistinguishable on most computer terminal displays.
1014                  */
1015                 iloc = ilimit + 1;
1016                 cur_cmd = spacer_cmd;
1017                 cur_chr = ' ';
1018                 break;
1019             case skip_blanks + car_ret_cmd:
1020             case mid_line + comment_cmd:
1021             case new_line + comment_cmd:
1022             case skip_blanks + comment_cmd:
1023                 /* Finish line, |goto switch|; */
1024                 iloc = ilimit + 1;
1025                 goto SWITCH;
1026                 break;
1027             case new_line + car_ret_cmd:
1028                 /* Finish line, emit a \.{\\par}; */
1029                 iloc = ilimit + 1;
1030                 cur_cs = par_loc;
1031                 cur_cmd = eq_type(cur_cs);
1032                 cur_chr = equiv(cur_cs);
1033                 if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1034                     check_outer_validity();
1035                 break;
1036             case skip_blanks + left_brace_cmd:
1037             case new_line + left_brace_cmd:
1038                 istate = mid_line;
1039                 /* fall through */
1040             case mid_line + left_brace_cmd:
1041                 align_state++;
1042                 break;
1043             case skip_blanks + right_brace_cmd:
1044             case new_line + right_brace_cmd:
1045                 istate = mid_line;
1046                 /* fall through */
1047             case mid_line + right_brace_cmd:
1048                 align_state--;
1049                 break;
1050             case mid_line + math_shift_cmd:
1051             case mid_line + tab_mark_cmd:
1052             case mid_line + mac_param_cmd:
1053             case mid_line + sub_mark_cmd:
1054             case mid_line + letter_cmd:
1055             case mid_line + other_char_cmd:
1056                 break;
1057             /*
1058             case skip_blanks + math_shift:
1059             case skip_blanks + tab_mark:
1060             case skip_blanks + mac_param:
1061             case skip_blanks + sub_mark:
1062             case skip_blanks + letter:
1063             case skip_blanks + other_char:
1064             case new_line    + math_shift:
1065             case new_line    + tab_mark:
1066             case new_line    + mac_param:
1067             case new_line    + sub_mark:
1068             case new_line    + letter:
1069             case new_line    + other_char:
1070             */
1071             default:
1072                 istate = mid_line;
1073                 break;
1074         }
1075     } else {
1076         if (iname != 21)
1077             istate = new_line;
1078         /*
1079            Move to next line of file,
1080            or |goto restart| if there is no next line,
1081            or |return| if a \.{\\read} line has finished;
1082          */
1083         do {
1084             next_line_retval r = next_line();
1085             if (r == next_line_return) {
1086                 return true;
1087             } else if (r == next_line_restart) {
1088                 return false;
1089             }
1090         } while (0);
1091         check_interrupt();
1092         goto SWITCH;
1093     }
1094     return true;
1095 }
1096
1097 #else
1098
1099 /* 10 times less Bim in callgrind */
1100
1101 /*
1102     escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1103     tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1104     sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1105     other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1106 */
1107
1108 static boolean get_next_file(void)
1109 {
1110     int c = 0;
1111   SWITCH:
1112     if (iloc <= ilimit) {
1113         /* current line not yet finished */
1114         do_buffer_to_unichar(cur_chr, iloc);
1115       RESWITCH:
1116         if (detokenized_line()) {
1117             cur_cmd = (cur_chr == ' ' ? 10 : 12);
1118         } else {
1119             do_get_cat_code(cur_cmd, cur_chr);
1120         }
1121         /*
1122            Change state if necessary, and |goto switch| if the current
1123            character should be ignored, or |goto reswitch| if the current
1124            character changes to another;
1125         */
1126         c = istate + cur_cmd;
1127         if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1128             return true;
1129         } else if (c >= new_line) {
1130             switch (c-new_line) {
1131                 case escape_cmd:
1132                     istate = (unsigned char) scan_control_sequence();
1133                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1134                         check_outer_validity();
1135                     return true;
1136                 case left_brace_cmd:
1137                     istate = mid_line;
1138                     align_state++;
1139                     return true;
1140                 case right_brace_cmd:
1141                     istate = mid_line;
1142                     align_state--;
1143                     return true;
1144                 case math_shift_cmd:
1145                     istate = mid_line;
1146                     return true;
1147                 case tab_mark_cmd:
1148                     istate = mid_line;
1149                     return true;
1150                 case car_ret_cmd:
1151                     /* Finish line, emit a \.{\\par}; */
1152                     iloc = ilimit + 1;
1153                     cur_cs = par_loc;
1154                     cur_cmd = eq_type(cur_cs);
1155                     cur_chr = equiv(cur_cs);
1156                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1157                         check_outer_validity();
1158                     return true;
1159                 case mac_param_cmd:
1160                     istate = mid_line;
1161                     return true;
1162                 case sup_mark_cmd:
1163                     if (process_sup_mark())
1164                         goto RESWITCH;
1165                     else
1166                         istate = mid_line;
1167                     return true;
1168                 case sub_mark_cmd:
1169                     istate = mid_line;
1170                     return true;
1171                 case ignore_cmd:
1172                     goto SWITCH;
1173                     return true;
1174                 case spacer_cmd:
1175                     /* Cases where character is ignored */
1176                     goto SWITCH;
1177                 case letter_cmd:
1178                     istate = mid_line;
1179                     return true;
1180                 case other_char_cmd:
1181                     istate = mid_line;
1182                     return true;
1183                 case active_char_cmd:
1184                     cur_cs = active_to_cs(cur_chr, false);
1185                     cur_cmd = eq_type(cur_cs);
1186                     cur_chr = equiv(cur_cs);
1187                     istate = mid_line;
1188                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1189                         check_outer_validity();
1190                     return true;
1191                 case comment_cmd:
1192                     iloc = ilimit + 1;
1193                     goto SWITCH;
1194                 case invalid_char_cmd:
1195                     invalid_character_error();
1196                     return false; /* because state may be |token_list| now */
1197                 default:
1198                     istate = mid_line;
1199                     return true;
1200             }
1201         } else if (c >= skip_blanks) {
1202             switch (c-skip_blanks) {
1203                 case escape_cmd:
1204                     /* Scan a control sequence ...; */
1205                     istate = (unsigned char) scan_control_sequence();
1206                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1207                         check_outer_validity();
1208                     return true;
1209                 case left_brace_cmd:
1210                     istate = mid_line;
1211                     align_state++;
1212                     return true;
1213                 case right_brace_cmd:
1214                     istate = mid_line;
1215                     align_state--;
1216                     return true;
1217                 case math_shift_cmd:
1218                     istate = mid_line;
1219                     return true;
1220                 case tab_mark_cmd:
1221                     istate = mid_line;
1222                     return true;
1223                 case car_ret_cmd:
1224                     iloc = ilimit + 1;
1225                     goto SWITCH;
1226                 case mac_param_cmd:
1227                     istate = mid_line;
1228                     return true;
1229                 case sup_mark_cmd:
1230                     /* If this |sup_mark| starts */
1231                     if (process_sup_mark())
1232                         goto RESWITCH;
1233                     else
1234                         istate = mid_line;
1235                     return true;
1236                 case sub_mark_cmd:
1237                     istate = mid_line;
1238                     return true;
1239                 case ignore_cmd:
1240                     goto SWITCH;
1241                 case spacer_cmd:
1242                     goto SWITCH;
1243                 case letter_cmd:
1244                     istate = mid_line;
1245                     return true;
1246                 case other_char_cmd:
1247                     istate = mid_line;
1248                     return true;
1249                 case active_char_cmd:
1250                     cur_cs = active_to_cs(cur_chr, false);
1251                     cur_cmd = eq_type(cur_cs);
1252                     cur_chr = equiv(cur_cs);
1253                     istate = mid_line;
1254                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1255                         check_outer_validity();
1256                     return true;
1257                 case comment_cmd:
1258                     /* Finish line, |goto switch|; */
1259                     iloc = ilimit + 1;
1260                     goto SWITCH;
1261                 case invalid_char_cmd:
1262                     /* Decry the invalid character and |goto restart|; */
1263                     invalid_character_error();
1264                     return false; /* because state may be |token_list| now */
1265                 default:
1266                     istate = mid_line;
1267                     return true;
1268             }
1269         } else if (c >= mid_line) {
1270             switch (c-mid_line) {
1271                 case escape_cmd:
1272                     istate = (unsigned char) scan_control_sequence();
1273                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1274                         check_outer_validity();
1275                     return true;
1276                 case left_brace_cmd:
1277                     align_state++;
1278                     return true;
1279                 case right_brace_cmd:
1280                     align_state--;
1281                     return true;
1282                 case math_shift_cmd:
1283                     return true;
1284                 case tab_mark_cmd:
1285                     return true;
1286                 case car_ret_cmd:
1287                     /*
1288                         Finish line, emit a space. When a character of type |spacer| gets through, its
1289                         character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1290                         for tab and space, and for the space inserted at the end of a line, will be
1291                         treated alike when macro parameters are being matched. We do this since such
1292                         characters are indistinguishable on most computer terminal displays.
1293                      */
1294                     iloc = ilimit + 1;
1295                     cur_cmd = spacer_cmd;
1296                     cur_chr = ' ';
1297                     return true;
1298                 case mac_param_cmd:
1299                     return true;
1300                 case sup_mark_cmd:
1301                     if (process_sup_mark())
1302                         goto RESWITCH;
1303                     else
1304                         istate = mid_line;
1305                     return true;
1306                 case sub_mark_cmd:
1307                     return true;
1308                 case ignore_cmd:
1309                     goto SWITCH;
1310                 case spacer_cmd:
1311                     /* Enter |skip_blanks| state, emit a space; */
1312                     istate = skip_blanks;
1313                     cur_chr = ' ';
1314                     return true;
1315                 case letter_cmd:
1316                     istate = mid_line;
1317                     return true;
1318                 case other_char_cmd:
1319                     istate = mid_line;
1320                     return true;
1321                 case active_char_cmd:
1322                     cur_cs = active_to_cs(cur_chr, false);
1323                     cur_cmd = eq_type(cur_cs);
1324                     cur_chr = equiv(cur_cs);
1325                     istate = mid_line;
1326                     if (! suppress_outer_error_par && cur_cmd >= outer_call_cmd)
1327                         check_outer_validity();
1328                     return true;
1329                 case comment_cmd:
1330                     iloc = ilimit + 1;
1331                     goto SWITCH;
1332                 case invalid_char_cmd:
1333                     invalid_character_error();
1334                     return false; /* because state may be |token_list| now */
1335                 default:
1336                     istate = mid_line;
1337                     return true;
1338             }
1339         } else {
1340             istate = mid_line;
1341             return true;
1342         }
1343     } else {
1344         if (iname != 21) {
1345             istate = new_line;
1346         }
1347         /*
1348            Move to next line of file, or |goto restart| if there is no next line,
1349            or |return| if a \.{\\read} line has finished;
1350         */
1351         do {
1352             next_line_retval r = next_line();
1353             if (r == next_line_return) {
1354                 return true;
1355             } else if (r == next_line_restart) {
1356                 return false;
1357             }
1358         } while (0);
1359         check_interrupt();
1360         goto SWITCH;
1361     }
1362     return true;
1363 }
1364
1365 #endif
1366
1367 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1368 We only support a limited set:
1369
1370 ^^^^^^XXXXXX
1371 ^^^^XXXXXX
1372 ^^XX ^^<char>
1373
1374 @c
1375
1376 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1377
1378 #define add_nybble(c) \
1379     if (c<='9') { \
1380         cur_chr=(cur_chr<<4)+c-'0'; \
1381     } else { \
1382         cur_chr=(cur_chr<<4)+c-'a'+10; \
1383     }
1384
1385 #define set_nybble(c) \
1386     if (c<='9') { \
1387         cur_chr=c-'0'; \
1388     } else { \
1389         cur_chr=c-'a'+10; \
1390     }
1391
1392 #define one_hex_to_cur_chr(c1) \
1393     set_nybble(c1);
1394
1395 #define two_hex_to_cur_chr(c1,c2) \
1396     set_nybble(c1); \
1397     add_nybble(c2);
1398
1399 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1400     two_hex_to_cur_chr(c1,c2); \
1401     add_nybble(c3); \
1402     add_nybble(c4);
1403
1404 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1405     four_hex_to_cur_chr(c1,c2,c3,c4); \
1406     add_nybble(c5); \
1407     add_nybble(c6);
1408
1409 static boolean process_sup_mark(void)
1410 {
1411     if (cur_chr == buffer[iloc]) {
1412         if (iloc < ilimit) {
1413             if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1414                 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1415                     /* ^^^^^^XXXXXX */
1416                     if ((iloc + 10) <= ilimit) {
1417                         int c1 = buffer[iloc +  5];
1418                         int c2 = buffer[iloc +  6];
1419                         int c3 = buffer[iloc +  7];
1420                         int c4 = buffer[iloc +  8];
1421                         int c5 = buffer[iloc +  9];
1422                         int c6 = buffer[iloc + 10];
1423                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1424                             is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1425                             iloc = iloc + 11;
1426                             six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1427                             return true;
1428                         } else {
1429                             tex_error("^^^^^^ needs six hex digits", NULL);
1430                         }
1431                     } else {
1432                         tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1433                     }
1434                 } else {
1435                     /* ^^^^XXXX */
1436                     if ((iloc + 6) <= ilimit) {
1437                         int c1 = buffer[iloc + 3];
1438                         int c2 = buffer[iloc + 4];
1439                         int c3 = buffer[iloc + 5];
1440                         int c4 = buffer[iloc + 6];
1441                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1442                             iloc = iloc + 7;
1443                             four_hex_to_cur_chr(c1,c2,c3,c4);
1444                             return true;
1445                         } else {
1446                             tex_error("^^^^ needs four hex digits", NULL);
1447                         }
1448                     } else {
1449                         tex_error("^^^^ needs four hex digits, end of input", NULL);
1450                     }
1451                 }
1452             } else {
1453                 /* ^^XX */
1454                 if ((iloc + 2) <= ilimit) {
1455                     int c1 = buffer[iloc + 1];
1456                     int c2 = buffer[iloc + 2];
1457                     if (is_hex(c1) && is_hex(c2)) {
1458                         iloc = iloc + 3;
1459                         two_hex_to_cur_chr(c1,c2);
1460                         return true;
1461                     }
1462                 }
1463                 /* go on, no error, good old tex */
1464             }
1465         }
1466         /* the rest */
1467         {
1468             int c1 = buffer[iloc + 1];
1469             if (c1 < 0200) {
1470                 iloc = iloc + 2;
1471                 if (is_hex(c1) && (iloc <= ilimit)) {
1472                     int c2 = buffer[iloc];
1473                     if (is_hex(c2)) {
1474                         incr(iloc);
1475                         two_hex_to_cur_chr(c1,c2);
1476                         return true;
1477                     }
1478                 }
1479                 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1480                 return true;
1481             }
1482         }
1483     }
1484     return false;
1485 }
1486
1487 @ Control sequence names are scanned only when they appear in some line of a
1488 file; once they have been scanned the first time, their |eqtb| location serves as
1489 a unique identification, so \TeX\ doesn't need to refer to the original name any
1490 more except when it prints the equivalent in symbolic form.
1491
1492 The program that scans a control sequence has been written carefully in order to
1493 avoid the blowups that might otherwise occur if a malicious user tried something
1494 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1495 it never looks at |buffer[ilimit+2]|.
1496
1497 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1498 following a control sequence name, they are converted to single characters in the
1499 buffer and the process is repeated, slowly but surely.
1500
1501 @c
1502 static boolean check_expanded_code(int *kk);    /* below */
1503
1504 static int scan_control_sequence(void)
1505 {
1506     int retval = mid_line;
1507     if (iloc > ilimit) {
1508         cur_cs = null_cs;       /* |state| is irrelevant in this case */
1509     } else {
1510         register int cat;       /* |cat_code(cur_chr)|, usually */
1511         while (1) {
1512             int k = iloc;
1513             do_buffer_to_unichar(cur_chr, k);
1514             do_get_cat_code(cat, cur_chr);
1515             if (cat != letter_cmd || k > ilimit) {
1516                 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1517                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1518                     continue;
1519             } else {
1520                 retval = skip_blanks;
1521                 do {
1522                     do_buffer_to_unichar(cur_chr, k);
1523                     do_get_cat_code(cat, cur_chr);
1524                 } while (cat == letter_cmd && k <= ilimit);
1525
1526                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1527                     continue;
1528                 if (cat != letter_cmd) {
1529                     /* backtrack one character which can be utf */
1530                     /*
1531                     decr(k);
1532                     if (cur_chr > 0xFFFF)
1533                         decr(k);
1534                     if (cur_chr > 0x7FF)
1535                         decr(k);
1536                     if (cur_chr > 0x7F)
1537                         decr(k);
1538                     */
1539                     if (cur_chr <= 0x7F) {
1540                         k -= 1; /* in most cases */
1541                     } else if (cur_chr > 0xFFFF) {
1542                         k -= 4;
1543                     } else if (cur_chr > 0x7FF) {
1544                         k -= 3;
1545                     } else /* if (cur_chr > 0x7F) */ {
1546                         k -= 2;
1547                     }
1548                     /* now |k| points to first nonletter */
1549                 }
1550             }
1551             cur_cs = id_lookup(iloc, k - iloc);
1552             iloc = k;
1553             break;
1554         }
1555     }
1556     cur_cmd = eq_type(cur_cs);
1557     cur_chr = equiv(cur_cs);
1558     return retval;
1559 }
1560
1561 @ Whenever we reach the following piece of code, we will have
1562 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1563 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1564 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1565 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1566 left two or three places.
1567
1568 @c
1569 static boolean check_expanded_code(int *kk)
1570 {
1571     int l;
1572     int k = *kk;
1573     int d = 1;
1574     if (buffer[k] == cur_chr && k < ilimit) {
1575         if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1576             if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1577                 if ((k + 10) <= ilimit) {
1578                     int c1 = buffer[k + 6 - 1];
1579                     int c2 = buffer[k + 6];
1580                     int c3 = buffer[k + 6 + 1];
1581                     int c4 = buffer[k + 6 + 2];
1582                     int c5 = buffer[k + 6 + 3];
1583                     int c6 = buffer[k + 6 + 4];
1584                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1585                         d = 6;
1586                         six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1587                     } else {
1588                         tex_error("^^^^^^ needs six hex digits", NULL);
1589                     }
1590                 } else {
1591                     tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1592                 }
1593             } else {
1594                 if ((k + 6) <= ilimit) {
1595                     int c1 = buffer[k + 4 - 1];
1596                     int c2 = buffer[k + 4];
1597                     int c3 = buffer[k + 4 + 1];
1598                     int c4 = buffer[k + 4 + 2];
1599                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1600                         d = 4;
1601                         four_hex_to_cur_chr(c1,c2,c3,c4);
1602                     } else {
1603                         tex_error("^^^^ needs four hex digits", NULL);
1604                     }
1605                 } else {
1606                     tex_error("^^^^ needs four hex digits, end of input", NULL);
1607                 }
1608             }
1609         } else {
1610             int c1 = buffer[k + 1];
1611             if (c1 < 0200) {
1612                 d = 1;
1613                 if (is_hex(c1) && (k + 2) <= ilimit) {
1614                     int c2 = buffer[k + 2];
1615                     if (is_hex(c2)) {
1616                         d = 2;
1617                         two_hex_to_cur_chr(c1,c2);
1618                     } else {
1619                         cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1620                     }
1621                 } else {
1622                     cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1623                 }
1624             }
1625         }
1626         if (d > 2)
1627             d = 2 * d - 1;
1628         else
1629             d++;
1630         if (cur_chr <= 0x7F) {
1631             buffer[k - 1] = (packed_ASCII_code) cur_chr;
1632         } else if (cur_chr <= 0x7FF) {
1633             buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1634             k++;
1635             d--;
1636             buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1637         } else if (cur_chr <= 0xFFFF) {
1638             buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1639             k++;
1640             d--;
1641             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1642             k++;
1643             d--;
1644             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1645         } else {
1646             buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1647             k++;
1648             d--;
1649             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1650             k++;
1651             d--;
1652             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1653             k++;
1654             d--;
1655             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1656         }
1657         l = k;
1658         ilimit = ilimit - d;
1659         while (l <= ilimit) {
1660             buffer[l] = buffer[l + d];
1661             l++;
1662         }
1663         *kk = k;
1664         return true;
1665     }
1666     return false;
1667 }
1668
1669 @ All of the easy branches of |get_next| have now been taken care of. There is
1670 one more branch.
1671
1672 @c static next_line_retval next_line(void)
1673 {
1674     boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1675     if (iname > 17) {
1676         /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1677         incr(line);
1678         first = istart;
1679         if (!force_eof) {
1680             if (iname <= 20) {
1681                 if (pseudo_input()) {   /* not end of file */
1682                     firm_up_the_line(); /* this sets |ilimit| */
1683                     line_catcode_table = DEFAULT_CAT_TABLE;
1684                     if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1685                         inhibit_eol = true;
1686                 } else if ((every_eof_par != null) && !eof_seen[iindex]) {
1687                     ilimit = first - 1;
1688                     eof_seen[iindex] = true; /* fake one empty line */
1689                     if (iname != 19)
1690                         begin_token_list(every_eof_par, every_eof_text);
1691                     return next_line_restart;
1692                 } else {
1693                     force_eof = true;
1694                 }
1695             } else {
1696                 if (iname == 21) {
1697                     if (luacstring_input()) { /* not end of strings  */
1698                         firm_up_the_line();
1699                         line_catcode_table = (short) luacstring_cattable();
1700                         line_partial = (signed char) luacstring_partial();
1701                         if (luacstring_final_line() || line_partial
1702                             || line_catcode_table == NO_CAT_TABLE)
1703                             inhibit_eol = true;
1704                         if (!line_partial)
1705                             istate = new_line;
1706                     } else {
1707                         force_eof = true;
1708                     }
1709                 } else {
1710                     if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1711                         firm_up_the_line(); /* this sets |ilimit| */
1712                         line_catcode_table = DEFAULT_CAT_TABLE;
1713                     } else if ((every_eof_par != null) && (!eof_seen[iindex])) {
1714                         ilimit = first - 1;
1715                         eof_seen[iindex] = true; /* fake one empty line */
1716                         begin_token_list(every_eof_par, every_eof_text);
1717                         return next_line_restart;
1718                     } else {
1719                         force_eof = true;
1720                     }
1721                 }
1722             }
1723         }
1724         if (force_eof) {
1725             if (tracing_nesting_par > 0)
1726                 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1727                     if (!((iname == 19) || (iname == 21))) {
1728                         /* give warning for some unfinished groups and/or conditionals */
1729                         file_warning();
1730                     }
1731             if ((iname > 21) || (iname == 20)) {
1732                 report_stop_file(filetype_tex);
1733                 decr(open_parens);
1734             }
1735             force_eof = false;
1736             /* lua input or \.{\\scantextokens} */
1737             if (iname == 21 || iname == 19) {
1738                 end_file_reading();
1739             } else {
1740                 end_file_reading();
1741                 if (! suppress_outer_error_par)
1742                     check_outer_validity();
1743             }
1744             return next_line_restart;
1745         }
1746         if (inhibit_eol || end_line_char_inactive)
1747             ilimit--;
1748         else
1749             buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
1750         first = ilimit + 1;
1751         iloc = istart; /* ready to read */
1752     } else {
1753         if (!terminal_input) {
1754             /* \.{\\read} line has ended */
1755             cur_cmd = 0;
1756             cur_chr = 0;
1757             return next_line_return;    /* OUTER */
1758         }
1759         if (input_ptr > 0) {
1760             /* text was inserted during error recovery */
1761             end_file_reading();
1762             return next_line_restart; /* resume previous level */
1763         }
1764         if (selector < log_only)
1765             open_log_file();
1766         if (interaction > nonstop_mode) {
1767             if (end_line_char_inactive)
1768                 ilimit++;
1769             if (ilimit == istart) {
1770                 /* previous line was empty */
1771                 tprint_nl("(Please type a command or say `\\end')");
1772             }
1773             print_ln();
1774             first = istart;
1775             prompt_input("*"); /* input on-line into |buffer| */
1776             ilimit = last;
1777             if (end_line_char_inactive)
1778                 ilimit--;
1779             else
1780                 buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
1781             first = ilimit + 1;
1782             iloc = istart;
1783         } else {
1784             /*
1785                 Nonstop mode, which is intended for overnight batch processing,
1786                 never waits for on-line input.
1787             */
1788             fatal_error("*** (job aborted, no legal \\end found)");
1789         }
1790     }
1791     return next_line_ok;
1792 }
1793
1794 @ Let's consider now what happens when |get_next| is looking at a token list.
1795
1796 @c
1797 static boolean get_next_tokenlist(void)
1798 {
1799     register halfword t = token_info(iloc);
1800     iloc = token_link(iloc); /* move to next */
1801     if (t >= cs_token_flag) {
1802         /* a control sequence token */
1803         cur_cs = t - cs_token_flag;
1804         cur_cmd = eq_type(cur_cs);
1805         if (cur_cmd >= outer_call_cmd) {
1806             if (cur_cmd == dont_expand_cmd) {
1807                 /*
1808                     Get the next token, suppressing expansion. The present point in the program
1809                     is reached only when the |expand| routine has inserted a special marker into
1810                     the input. In this special case, |token_info(iloc)| is known to be a control
1811                     sequence token, and |token_link(iloc)=null|.
1812                 */
1813                 cur_cs = token_info(iloc) - cs_token_flag;
1814                 iloc = null;
1815                 cur_cmd = eq_type(cur_cs);
1816                 if (cur_cmd > max_command_cmd) {
1817                     cur_cmd = relax_cmd;
1818                     cur_chr = no_expand_flag;
1819                     return true;
1820                 }
1821             } else if (! suppress_outer_error_par) {
1822                 check_outer_validity();
1823             }
1824         }
1825         cur_chr = equiv(cur_cs);
1826     } else {
1827         cur_cmd = token_cmd(t);
1828         cur_chr = token_chr(t);
1829         switch (cur_cmd) {
1830             case left_brace_cmd:
1831                 align_state++;
1832                 break;
1833             case right_brace_cmd:
1834                 align_state--;
1835                 break;
1836             case out_param_cmd:
1837                 /* Insert macro parameter and |goto restart|; */
1838                 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1839                 return false;
1840                 break;
1841         }
1842     }
1843     return true;
1844 }
1845
1846 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1847 routine are executed more often than any other instructions of \TeX.
1848 @^mastication@>@^inner loop@>
1849
1850 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1851
1852 @c
1853 void get_next(void)
1854 {
1855   RESTART:
1856     cur_cs = 0;
1857     if (istate != token_list) {
1858         /* Input from external file, |goto restart| if no input found */
1859         if (!get_next_file())
1860             goto RESTART;
1861     } else {
1862         if (iloc == null) {
1863             end_token_list();
1864             goto RESTART;       /* list exhausted, resume previous level */
1865         } else if (!get_next_tokenlist()) {
1866             goto RESTART;       /* parameter needs to be expanded */
1867         }
1868     }
1869     /* If an alignment entry has just ended, take appropriate action */
1870     if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1871         insert_vj_template();
1872         goto RESTART;
1873     }
1874 }
1875
1876 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1877 three related procedures that do a little more:
1878
1879 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1880 |cur_tok|, a packed halfword version of the current token.
1881
1882 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1883 but if the current token turns out to be a user-defined control sequence (i.e., a
1884 macro call), or a conditional, or something like \.{\\topmark} or
1885 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1886 the expansion of the macro or the evaluation of the conditional.
1887
1888 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1889 has already been called.
1890
1891 \yskip\noindent In fact, these three procedures account for almost every use of
1892 |get_next|.
1893
1894 No new control sequences will be defined except during a call of |get_token|, or
1895 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1896 always |true| at other times.
1897
1898 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1899
1900 @c
1901 void get_token(void)
1902 {
1903     no_new_control_sequence = false;
1904     get_next();
1905     no_new_control_sequence = true;
1906     if (cur_cs == 0)
1907         cur_tok = token_val(cur_cmd, cur_chr);
1908     else
1909         cur_tok = cs_token_flag + cur_cs;
1910 }
1911
1912 @ changes the string |s| to a token list
1913
1914 @c
1915 halfword string_to_toks(const char *ss)
1916 {
1917     halfword p; /* tail of the token list */
1918     halfword q; /* new node being added to the token list via |store_new_token| */
1919     halfword t; /* token being appended */
1920     const char *s = ss;
1921     const char *se = ss + strlen(s);
1922     p = temp_token_head;
1923     set_token_link(p, null);
1924     while (s < se) {
1925         t = (halfword) str2uni((const unsigned char *) s);
1926         s += utf8_size(t);
1927         if (t == ' ')
1928             t = space_token;
1929         else
1930             t = other_token + t;
1931         fast_store_new_token(t);
1932     }
1933     return token_link(temp_token_head);
1934 }
1935
1936 @ The token lists for macros and for other things like \.{\\mark} and
1937 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1938
1939 Before we get into the details of |scan_toks|, let's consider a much simpler
1940 task, that of converting the current string into a token list. The |str_toks|
1941 function does this; it classifies spaces as type |spacer| and everything else as
1942 type |other_char|.
1943
1944 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1945 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1946
1947 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1948 |lua| considers special while scanning a literal string
1949
1950 @ changes the string |str_pool[b..pool_ptr]| to a token list
1951
1952 @c
1953 halfword lua_str_toks(lstring b)
1954 {
1955     halfword p;       /* tail of the token list */
1956     halfword q;       /* new node being added to the token list via |store_new_token| */
1957     halfword t;       /* token being appended */
1958     unsigned char *k; /* index into string */
1959     p = temp_token_head;
1960     set_token_link(p, null);
1961     k = (unsigned char *) b.s;
1962     while (k < (unsigned char *) b.s + b.l) {
1963         t = pool_to_unichar(k);
1964         k += utf8_size(t);
1965         if (t == ' ') {
1966             t = space_token;
1967         } else {
1968             if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1969                 fast_store_new_token(other_token + '\\');
1970             if (t == 10)
1971                 t = 'n';
1972             if (t == 13)
1973                 t = 'r';
1974             t = other_token + t;
1975         }
1976         fast_store_new_token(t);
1977     }
1978     return p;
1979 }
1980
1981 @ Incidentally, the main reason for wanting |str_toks| is the function
1982 |the_toks|, which has similar input/output characteristics.
1983
1984 @ changes the string |str_pool[b..pool_ptr]| to a token list
1985
1986 @c
1987 halfword str_toks(lstring s)
1988 {
1989     halfword p;           /* tail of the token list */
1990     halfword q;           /* new node being added to the token list via |store_new_token| */
1991     halfword t;           /* token being appended */
1992     unsigned char *k, *l; /* index into string */
1993     p = temp_token_head;
1994     set_token_link(p, null);
1995     k = s.s;
1996     l = k + s.l;
1997     while (k < l) {
1998         t = pool_to_unichar(k);
1999         k += utf8_size(t);
2000         if (t == ' ')
2001             t = space_token;
2002         else
2003             t = other_token + t;
2004         fast_store_new_token(t);
2005     }
2006     return p;
2007 }
2008
2009 /*
2010     hh: most of the converter is similar to the one i made for macro so at some point i
2011     can make a helper; also todo: there is no need to go through the pool
2012
2013 */
2014
2015 halfword str_scan_toks(int ct, lstring s)
2016 {                         /* changes the string |str_pool[b..pool_ptr]| to a token list */
2017     halfword p;           /* tail of the token list */
2018     halfword q;           /* new node being added to the token list via |store_new_token| */
2019     halfword t;           /* token being appended */
2020     unsigned char *k, *l; /* index into string */
2021     int cc;
2022     p = temp_token_head;
2023     set_token_link(p, null);
2024     k = s.s;
2025     l = k + s.l;
2026     while (k < l) {
2027         t = pool_to_unichar(k);
2028         k += utf8_size(t);
2029         cc = get_cat_code(ct,t);
2030             if (cc == 0) {
2031                 /* we have a potential control sequence so we check for it */
2032                 int _lname = 0 ;
2033                 int _s = 0 ;
2034                 int _c = 0 ;
2035                 halfword _cs = null ;
2036                 unsigned char *_name  = k ;
2037                 while (k < l) {
2038                     t = (halfword) str2uni((const unsigned char *) k);
2039                     _s = utf8_size(t);
2040                     _c = get_cat_code(ct,t);
2041                     if (_c == 11) {
2042                         k += _s ;
2043                         _lname = _lname + _s ;
2044                     } else if (_c == 10) {
2045                         /* we ignore a trailing space like normal scanning does */
2046                         k += _s ;
2047                         break ;
2048                     } else {
2049                         break ;
2050                     }
2051                 }
2052                 if (_s > 0) {
2053                     /* we have a potential \cs */
2054                     _cs = string_lookup((const char *) _name, _lname);
2055                     if (_cs == undefined_control_sequence) {
2056                         /* let's play safe and backtrack */
2057                         t = cc * (1<<21) + t ;
2058                         k = _name ;
2059                     } else {
2060                         t = cs_token_flag + _cs;
2061                     }
2062                 } else {
2063                     /* just a character with some meaning, so \unknown becomes effectively */
2064                     /* \\unknown assuming that \\ has some useful meaning of course        */
2065                     t = cc * (1<<21) + t ;
2066                     k = _name ;
2067                 }
2068
2069             } else {
2070                 /* whatever token, so for instance $x^2$ just works given a tex */
2071                 /* catcode regime */
2072                 t = cc * (1<<21) + t ;
2073             }
2074             fast_store_new_token(t);
2075
2076     }
2077     return p;
2078 }
2079
2080 @ Here's part of the |expand| subroutine that we are now ready to complete:
2081
2082 @c
2083 void ins_the_toks(void)
2084 {
2085     (void) the_toks();
2086     ins_list(token_link(temp_token_head));
2087 }
2088
2089 #define set_toks_register(n,t,g) { \
2090     int a = (g>0) ? 4 : 0; \
2091     halfword ref = get_avail();  \
2092     set_token_ref_count(ref, 0); \
2093     set_token_link(ref, token_link(t)); \
2094     define(n + toks_base, call_cmd, ref); \
2095 }
2096
2097 void combine_the_toks(int how)
2098 {
2099     halfword nt;
2100     get_x_token();
2101     /* target */
2102     if (cur_cmd == assign_toks_cmd) {
2103         nt = equiv(cur_cs) - toks_base;
2104         /* check range */
2105     } else {
2106         back_input();
2107         scan_int();
2108         nt = cur_val;
2109     }
2110     /* source */
2111     do {
2112         get_x_token();
2113     } while (cur_cmd == spacer_cmd);
2114     if (cur_cmd == left_brace_cmd) {
2115         halfword x, source;
2116         back_input();
2117         x = scan_toks(false,how > 1); /* expanded or not */
2118         source = def_ref;
2119         /* action */
2120         if (source != null) {
2121             halfword target = toks(nt);
2122             if (target == null) {
2123                 set_toks_register(nt,source,0);
2124             } else {
2125                 halfword s = token_link(source);
2126                 if (s != null) {
2127                     halfword t = token_link(target);
2128                     if (t == null) {
2129                         /* can this happen ? */
2130                         set_token_link(target, s);
2131                     } else if (odd(how)) {
2132                         /* prepend */
2133                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2134                             halfword p = temp_token_head;
2135                             halfword q;
2136                             set_token_link(p, s); /* s = head, x = tail */
2137                             p = x;
2138                             while (t != null) {
2139                                 fast_store_new_token(token_info(t));
2140                                 t = token_link(t);
2141                             }
2142                             set_toks_register(nt,temp_token_head,0);
2143                         } else {
2144                             set_token_link(x,t);
2145                             set_token_link(target,s);
2146                         }
2147                     } else {
2148                         /* append */
2149                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2150                             halfword p = temp_token_head;
2151                             halfword q;
2152                             set_token_link(p, null);
2153                             while (t != null) {
2154                                 fast_store_new_token(token_info(t));
2155                                 t = token_link(t);
2156                             }
2157                             set_token_link(p,s);
2158                             set_toks_register(nt,temp_token_head,0);
2159                         } else {
2160                             while (token_link(t) != null) {
2161                                 t = token_link(t);
2162                             }
2163                             set_token_link(t,s);
2164                         }
2165                     }
2166                 }
2167             }
2168         }
2169     } else {
2170         halfword source, ns;
2171         if (cur_cmd == assign_toks_cmd) {
2172             ns = equiv(cur_cs) - toks_base;
2173             /* check range */
2174         } else {
2175             back_input();
2176             scan_int();
2177             ns = cur_val;
2178         }
2179         /* action */
2180         source = toks(ns);
2181         if (source != null) {
2182             halfword target = toks(nt);
2183             if (target == null) {
2184                 equiv(toks_base+nt) = source;
2185                 equiv(toks_base+ns) = null;
2186             } else {
2187                 halfword s = token_link(source);
2188                 if (s != null) {
2189                     halfword t = token_link(target);
2190                     if (t == null) {
2191                         set_token_link(target, s);
2192                     } else if (odd(how)) {
2193                         /* prepend */
2194                         halfword x = s;
2195                         while (token_link(x) != null) {
2196                             x = token_link(x);
2197                         }
2198                         set_token_link(x,t);
2199                         set_token_link(target,s);
2200                     } else {
2201                         /* append */
2202                         while (token_link(t) != null) {
2203                             t = token_link(t);
2204                         }
2205                         set_token_link(t,s);
2206                     }
2207                      equiv(toks_base+ns) = null;
2208                 }
2209             }
2210         }
2211     }
2212 }
2213
2214 @ This routine, used in the next one, prints the job name, possibly modified by
2215 the |process_jobname| callback.
2216
2217 @c
2218 static void print_job_name(void)
2219 {
2220    if (job_name) {
2221       char *s, *ss; /* C strings for jobname before and after processing */
2222       int callback_id, lua_retval;
2223       s = (char*)str_string(job_name);
2224       callback_id = callback_defined(process_jobname_callback);
2225       if (callback_id > 0) {
2226         lua_retval = run_callback(callback_id, "S->S", s, &ss);
2227         if ((lua_retval == true) && (ss != NULL))
2228             s = ss;
2229       }
2230       tprint(s);
2231    } else {
2232       print(job_name);
2233    }
2234 }
2235
2236 @ Here is a routine that print the result of a convert command, using the
2237 argument |i|. It returns |false | if it does not know to print the code |c|. The
2238 function exists because lua code and tex code can both call it to convert
2239 something.
2240
2241 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2242 get the next non-blank non-relax non-call token.
2243
2244 @c
2245
2246 int scan_lua_state(void)
2247 {
2248     int sn = 0;
2249     do {
2250         get_x_token();
2251     } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2252     back_input();
2253     if (cur_cmd != left_brace_cmd) {
2254         if (scan_keyword("name")) {
2255             (void) scan_toks(false, true);
2256             sn = def_ref;
2257         } else {
2258             scan_register_num();
2259             if (get_lua_name(cur_val))
2260                 sn = (cur_val - 65536);
2261         }
2262     }
2263     return sn;
2264 }
2265
2266 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2267 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2268 to follow `\.{\\string}' and `\.{\\meaning}'.
2269
2270 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2271 pending string in its output. In order to save such a pending string, we have to
2272 create a temporary string that is destroyed immediately after.
2273
2274 @c
2275 #define push_selector { \
2276     old_setting = selector; \
2277     selector = new_string; \
2278 }
2279
2280 #define pop_selector { \
2281     selector = old_setting; \
2282 }
2283
2284 static int do_variable_dvi(halfword c)
2285 {
2286     return 0;
2287 }
2288
2289 #define do_variable_backend_int(i) \
2290     cur_cmd = assign_int_cmd; \
2291     cur_val = backend_int_base + i; \
2292     cur_tok = token_val(cur_cmd, cur_val); \
2293     back_input();
2294
2295 #define do_variable_backend_dimen(i) \
2296     cur_cmd = assign_dimen_cmd; \
2297     cur_val = backend_dimen_base + i; \
2298     cur_tok = token_val(cur_cmd, cur_val); \
2299     back_input();
2300
2301 #define do_variable_backend_toks(i) \
2302     cur_cmd = assign_toks_cmd; \
2303     cur_val = backend_toks_base + i ; \
2304     cur_tok = token_val(cur_cmd, cur_val); \
2305     back_input();
2306
2307 static int do_variable_pdf(halfword c)
2308 {
2309          if (scan_keyword("compresslevel"))        { do_variable_backend_int(c_pdf_compress_level); }
2310     else if (scan_keyword("decimaldigits"))        { do_variable_backend_int(c_pdf_decimal_digits); }
2311     else if (scan_keyword("imageresolution"))      { do_variable_backend_int(c_pdf_image_resolution); }
2312     else if (scan_keyword("pkresolution"))         { do_variable_backend_int(c_pdf_pk_resolution); }
2313     else if (scan_keyword("uniqueresname"))        { do_variable_backend_int(c_pdf_unique_resname); }
2314     else if (scan_keyword("minorversion"))         { do_variable_backend_int(c_pdf_minor_version); }
2315     else if (scan_keyword("pagebox"))              { do_variable_backend_int(c_pdf_pagebox); }
2316     else if (scan_keyword("inclusionerrorlevel"))  { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2317     else if (scan_keyword("ignoreunknownimages"))  { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2318     else if (scan_keyword("gamma"))                { do_variable_backend_int(c_pdf_gamma); }
2319     else if (scan_keyword("imageapplygamma"))      { do_variable_backend_int(c_pdf_image_apply_gamma); }
2320     else if (scan_keyword("imagegamma"))           { do_variable_backend_int(c_pdf_image_gamma); }
2321     else if (scan_keyword("imagehicolor"))         { do_variable_backend_int(c_pdf_image_hicolor); }
2322     else if (scan_keyword("imageaddfilename"))     { do_variable_backend_int(c_pdf_image_addfilename); }
2323     else if (scan_keyword("objcompresslevel"))     { do_variable_backend_int(c_pdf_obj_compress_level); }
2324     else if (scan_keyword("inclusioncopyfonts"))   { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2325     else if (scan_keyword("gentounicode"))         { do_variable_backend_int(c_pdf_gen_tounicode); }
2326     else if (scan_keyword("pkfixeddpi"))           { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2327     else if (scan_keyword("suppressoptionalinfo")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2328     else if (scan_keyword("omitcidset"))           { do_variable_backend_int(c_pdf_omit_cidset); }
2329
2330     else if (scan_keyword("horigin"))              { do_variable_backend_dimen(d_pdf_h_origin); }
2331     else if (scan_keyword("vorigin"))              { do_variable_backend_dimen(d_pdf_v_origin); }
2332     else if (scan_keyword("threadmargin"))         { do_variable_backend_dimen(d_pdf_thread_margin); }
2333     else if (scan_keyword("destmargin"))           { do_variable_backend_dimen(d_pdf_dest_margin); }
2334     else if (scan_keyword("linkmargin"))           { do_variable_backend_dimen(d_pdf_link_margin); }
2335     else if (scan_keyword("xformmargin"))          { do_variable_backend_dimen(d_pdf_xform_margin); }
2336
2337     else if (scan_keyword("pageattr"))             { do_variable_backend_toks(t_pdf_page_attr); }
2338     else if (scan_keyword("pageresources"))        { do_variable_backend_toks(t_pdf_page_resources); }
2339     else if (scan_keyword("pagesattr"))            { do_variable_backend_toks(t_pdf_pages_attr); }
2340     else if (scan_keyword("xformattr"))            { do_variable_backend_toks(t_pdf_xform_attr); }
2341     else if (scan_keyword("xformresources"))       { do_variable_backend_toks(t_pdf_xform_resources); }
2342     else if (scan_keyword("pkmode"))               { do_variable_backend_toks(t_pdf_pk_mode); }
2343     else if (scan_keyword("trailerid"))            { do_variable_backend_toks(t_pdf_trailer_id); }
2344
2345     else
2346         return 0;
2347     return 1;
2348 }
2349
2350 static int do_feedback_dvi(halfword c)
2351 {
2352     return 0;
2353 }
2354
2355 /* codes not really needed but cleaner when testing */
2356
2357 #define pdftex_version  140 /* these values will not change any more */
2358 #define pdftex_revision "0" /* these values will not change any more */
2359
2360 static int do_feedback_pdf(halfword c)
2361 {
2362     int old_setting;            /* holds |selector| setting */
2363     int save_scanner_status;    /* |scanner_status| upon entry */
2364     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2365     halfword save_warning_index;
2366     boolean bool;               /* temp boolean */
2367     str_number s;               /* first temp string */
2368     int ff;                     /* for use with |set_ff| */
2369     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2370     char *str;                  /* color stack init str */
2371
2372     if (scan_keyword("lastlink")) {
2373         push_selector;
2374         print_int(pdf_last_link);
2375         pop_selector;
2376     } else if (scan_keyword("retval")) {
2377         push_selector;
2378         print_int(pdf_retval);
2379         pop_selector;
2380     } else if (scan_keyword("lastobj")) {
2381         push_selector;
2382         print_int(pdf_last_obj);
2383         pop_selector;
2384     } else if (scan_keyword("lastannot")) {
2385         push_selector;
2386         print_int(pdf_last_annot);
2387         pop_selector;
2388     } else if (scan_keyword("xformname")) {
2389         scan_int();
2390         check_obj_type(static_pdf, obj_type_xform, cur_val);
2391         push_selector;
2392         print_int(obj_info(static_pdf, cur_val));
2393         pop_selector;
2394     } else if (scan_keyword("creationdate")) {
2395         ins_list(string_to_toks(getcreationdate(static_pdf)));
2396         /* no further action */
2397         return 2;
2398     } else if (scan_keyword("fontname")) {
2399         scan_font_ident();
2400         if (cur_val == null_font)
2401             normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2402         pdf_check_vf(cur_val);
2403         if (!font_used(cur_val))
2404             pdf_init_font(static_pdf, cur_val);
2405         push_selector;
2406         set_ff(cur_val);
2407         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2408         pop_selector;
2409     } else if (scan_keyword("fontobjnum")) {
2410         scan_font_ident();
2411         if (cur_val == null_font)
2412             normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2413         pdf_check_vf(cur_val);
2414         if (!font_used(cur_val))
2415             pdf_init_font(static_pdf, cur_val);
2416         push_selector;
2417         set_ff(cur_val);
2418         print_int(pdf_font_num(ff));
2419         pop_selector;
2420     } else if (scan_keyword("fontsize")) {
2421         scan_font_ident();
2422         if (cur_val == null_font)
2423             normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2424         push_selector;
2425         print_scaled(font_size(cur_val));
2426         tprint("pt");
2427         pop_selector;
2428     } else if (scan_keyword("pageref")) {
2429         scan_int();
2430         if (cur_val <= 0)
2431             normal_error("pdf backend", "invalid page number when asking 'pageref'");
2432         push_selector;
2433         print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2434         pop_selector;
2435     } else if (scan_keyword("colorstackinit")) {
2436         bool = scan_keyword("page");
2437         if (scan_keyword("direct"))
2438             cur_val = direct_always;
2439         else if (scan_keyword("page"))
2440             cur_val = direct_page;
2441         else if (scan_keyword("raw"))
2442             cur_val = direct_raw;
2443         else
2444             cur_val = set_origin;
2445         save_scanner_status = scanner_status;
2446         save_warning_index = warning_index;
2447         save_def_ref = def_ref;
2448         u = save_cur_string();
2449         scan_toks(false, true);
2450         s = tokens_to_string(def_ref);
2451         delete_token_ref(def_ref);
2452         def_ref = save_def_ref;
2453         warning_index = save_warning_index;
2454         scanner_status = save_scanner_status;
2455         str = makecstring(s);
2456         cur_val = newcolorstack(str, cur_val, bool);
2457         free(str);
2458         flush_str(s);
2459         cur_val_level = int_val_level;
2460         if (cur_val < 0) {
2461             print_err("Too many color stacks");
2462             help2("The number of color stacks is limited to 32768.",
2463                   "I'll use the default color stack 0 here.");
2464             error();
2465             cur_val = 0;
2466             restore_cur_string(u);
2467         }
2468         push_selector;
2469         print_int(cur_val);
2470         pop_selector;
2471     } else if (scan_keyword("version")) {
2472         push_selector;
2473         print_int(pdftex_version);
2474         pop_selector;
2475     } else if (scan_keyword("revision")) {
2476         ins_list(string_to_toks(pdftex_revision));
2477         return 2;
2478     } else {
2479         return 0;
2480     }
2481     return 1;
2482 }
2483
2484 void conv_toks(void)
2485 {
2486     int old_setting;            /* holds |selector| setting */
2487     halfword p, q;
2488     int save_scanner_status;    /* |scanner_status| upon entry */
2489     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2490     halfword save_warning_index;
2491     boolean bool;               /* temp boolean */
2492     str_number s;               /* first temp string */
2493     int sn;                     /* lua chunk name */
2494     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2495     int c = cur_chr;            /* desired type of conversion */
2496     str_number str;
2497     int i = 0;
2498     /* Scan the argument for command |c| */
2499     switch (c) {
2500         case number_code:
2501             scan_int();
2502             push_selector;
2503             print_int(cur_val);
2504             pop_selector;
2505             break;
2506         case lua_function_code:
2507             scan_int();
2508             if (cur_val <= 0) {
2509                 normal_error("luafunction", "invalid number");
2510             } else {
2511                 u = save_cur_string();
2512                 luacstrings = 0;
2513                 luafunctioncall(cur_val);
2514                 restore_cur_string(u);
2515                 if (luacstrings > 0)
2516                     lua_string_start();
2517             }
2518             /* no further action */
2519             return;
2520             break;
2521         case lua_code:
2522             u = save_cur_string();
2523             save_scanner_status = scanner_status;
2524             save_def_ref = def_ref;
2525             save_warning_index = warning_index;
2526             sn = scan_lua_state();
2527             scan_toks(false, true);
2528             s = def_ref;
2529             warning_index = save_warning_index;
2530             def_ref = save_def_ref;
2531             scanner_status = save_scanner_status;
2532             luacstrings = 0;
2533             luatokencall(s, sn);
2534             delete_token_ref(s);
2535             restore_cur_string(u);  /* TODO: check this, was different */
2536             if (luacstrings > 0)
2537                 lua_string_start();
2538             /* no further action */
2539             return;
2540             break;
2541         case expanded_code:
2542             save_scanner_status = scanner_status;
2543             save_warning_index = warning_index;
2544             save_def_ref = def_ref;
2545             u = save_cur_string();
2546             scan_toks(false, true);
2547             warning_index = save_warning_index;
2548             scanner_status = save_scanner_status;
2549             ins_list(token_link(def_ref));
2550             def_ref = save_def_ref;
2551             restore_cur_string(u);
2552             /* no further action */
2553             return;
2554             break;
2555         case math_style_code:
2556             push_selector;
2557             print_math_style();
2558             pop_selector;
2559             break;
2560         case string_code:
2561             save_scanner_status = scanner_status;
2562             scanner_status = normal;
2563             get_token();
2564             scanner_status = save_scanner_status;
2565             push_selector;
2566             if (cur_cs != 0)
2567                 sprint_cs(cur_cs);
2568             else
2569                 print(cur_chr);
2570             pop_selector;
2571             break;
2572         case cs_string_code:
2573             save_scanner_status = scanner_status;
2574             scanner_status = normal;
2575             get_token();
2576             scanner_status = save_scanner_status;
2577             push_selector;
2578             if (cur_cs != 0)
2579                 sprint_cs_name(cur_cs);
2580             else
2581                 print(cur_chr);
2582             pop_selector;
2583             break;
2584         case roman_numeral_code:
2585             scan_int();
2586             push_selector;
2587             print_roman_int(cur_val);
2588             pop_selector;
2589             break;
2590         case meaning_code:
2591             save_scanner_status = scanner_status;
2592             scanner_status = normal;
2593             get_token();
2594             scanner_status = save_scanner_status;
2595             push_selector;
2596             print_meaning();
2597             pop_selector;
2598             break;
2599         case uchar_code:
2600             scan_char_num();
2601             push_selector;
2602             print(cur_val);
2603             pop_selector;
2604             break;
2605         case lua_escape_string_code:
2606             {
2607                 lstring escstr;
2608                 int l = 0;
2609                 save_scanner_status = scanner_status;
2610                 save_def_ref = def_ref;
2611                 save_warning_index = warning_index;
2612                 scan_toks(false, true);
2613                 bool = in_lua_escape;
2614                 in_lua_escape = true;
2615                 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2616                 escstr.l = (unsigned) l;
2617                 in_lua_escape = bool;
2618                 delete_token_ref(def_ref);
2619                 def_ref = save_def_ref;
2620                 warning_index = save_warning_index;
2621                 scanner_status = save_scanner_status;
2622                 (void) lua_str_toks(escstr);
2623                 ins_list(token_link(temp_token_head));
2624                 free(escstr.s);
2625                 return;
2626             }
2627             /* no further action */
2628             break;
2629         case font_id_code:
2630             scan_font_ident();
2631             push_selector;
2632             print_int(cur_val);
2633             pop_selector;
2634             break;
2635         case font_name_code:
2636             scan_font_ident();
2637             push_selector;
2638             append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2639             if (font_size(cur_val) != font_dsize(cur_val)) {
2640                 tprint(" at ");
2641                 print_scaled(font_size(cur_val));
2642                 tprint("pt");
2643             }
2644             pop_selector;
2645             break;
2646         case left_margin_kern_code:
2647             scan_int();
2648             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2649                 normal_error("marginkern", "a non-empty hbox expected");
2650             push_selector;
2651             p = list_ptr(box(cur_val));
2652             while ((p != null) && (type(p) == glue_node)) {
2653                 p = vlink(p);
2654             }
2655             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2656                 print_scaled(width(p));
2657             else
2658                 print_char('0');
2659             tprint("pt");
2660             pop_selector;
2661             break;
2662         case right_margin_kern_code:
2663             scan_int();
2664             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2665                 normal_error("marginkern", "a non-empty hbox expected");
2666             push_selector;
2667             p = list_ptr(box(cur_val));
2668             if (p != null) {
2669                 p = tail_of_list(p);
2670                 /*
2671                     there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2672                     node that points to glue spec ... and we don't want to analyze that messy lot
2673                 */
2674                 while ((p != null) && (type(p) == glue_node)) {
2675                     p = alink(p);
2676                 }
2677                 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2678                     if (type(p) == disc_node) {
2679                         q = alink(p);
2680                         if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2681                             p = q;
2682                         } else {
2683                             /*
2684                                 officially we should look in the replace but currently protrusion doesn't
2685                                 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2686                                 don't need it now
2687                             */
2688                         }
2689                     }
2690                 }
2691             }
2692             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2693                 print_scaled(width(p));
2694             else
2695                 print_char('0');
2696             tprint("pt");
2697             pop_selector;
2698             break;
2699         case uniform_deviate_code:
2700             scan_int();
2701             push_selector;
2702             print_int(unif_rand(cur_val));
2703             pop_selector;
2704             break;
2705         case normal_deviate_code:
2706             push_selector;
2707             print_int(norm_rand());
2708             pop_selector;
2709             break;
2710         case math_char_class_code:
2711             {
2712                 mathcodeval mval;
2713                 scan_int();
2714                 mval = get_math_code(cur_val);
2715                 push_selector;
2716                 print_int(mval.class_value);
2717                 pop_selector;
2718             }
2719             break;
2720         case math_char_fam_code:
2721             {
2722                 mathcodeval mval;
2723                 scan_int();
2724                 mval = get_math_code(cur_val);
2725                 push_selector;
2726                 print_int(mval.family_value);
2727                 pop_selector;
2728             }
2729             break;
2730         case math_char_slot_code:
2731             {
2732                 mathcodeval mval;
2733                 scan_int();
2734                 mval = get_math_code(cur_val);
2735                 push_selector;
2736                 print_int(mval.character_value);
2737                 pop_selector;
2738             }
2739             break;
2740         case insert_ht_code:
2741             scan_register_num();
2742             push_selector;
2743             i = cur_val;
2744             p = page_ins_head;
2745             while (i >= subtype(vlink(p)))
2746                 p = vlink(p);
2747             if (subtype(p) == i)
2748                 print_scaled(height(p));
2749             else
2750                 print_char('0');
2751             tprint("pt");
2752             pop_selector;
2753             break;
2754         case job_name_code:
2755             if (job_name == 0)
2756                 open_log_file();
2757             push_selector;
2758             print_job_name();
2759             pop_selector;
2760             break;
2761         case format_name_code:
2762             if (job_name == 0)
2763                 open_log_file();
2764             push_selector;
2765             print(format_name);
2766             pop_selector;
2767             break;
2768         case luatex_banner_code:
2769             push_selector;
2770             tprint(luatex_banner);
2771             pop_selector;
2772             break;
2773         case luatex_revision_code:
2774             push_selector;
2775             print(get_luatexrevision());
2776             pop_selector;
2777             break;
2778         case luatex_date_code:
2779             push_selector;
2780             print_int(get_luatex_date_info());
2781             pop_selector;
2782             break;
2783         case etex_code:
2784             push_selector;
2785             tprint(eTeX_version_string);
2786             pop_selector;
2787             break;
2788         case eTeX_revision_code:
2789             push_selector;
2790             tprint(eTeX_revision);
2791             pop_selector;
2792             break;
2793         case font_identifier_code:
2794             confusion("convert");
2795             break;
2796         default:
2797             confusion("convert");
2798             break;
2799     }
2800     str = make_string();
2801     (void) str_toks(str_lstring(str));
2802     flush_str(str);
2803     ins_list(token_link(temp_token_head));
2804 }
2805
2806 void do_feedback(void)
2807 {
2808     int c = cur_chr;
2809     str_number str;
2810     int done = 1;
2811     switch (c) {
2812         case dvi_feedback_code:
2813             if (get_o_mode() == OMODE_DVI) {
2814                 done = do_feedback_dvi(c);
2815             } else {
2816                 tex_error("unexpected use of \\dvifeedback",null);
2817                 return ;
2818             }
2819             if (done==0) {
2820                 /* we recover */
2821                 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2822                 return;
2823             } else if (done==2) {
2824                 return;
2825             }
2826             break;
2827         case pdf_feedback_code:
2828             if (get_o_mode() == OMODE_PDF) {
2829                 done = do_feedback_pdf(c);
2830             } else {
2831                 tex_error("unexpected use of \\pdffeedback",null);
2832                 return ;
2833             }
2834             if (done==0) {
2835                 /* we recover */
2836                 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2837                 return;
2838             } else if (done==2) {
2839                 return;
2840             }
2841             break;
2842         default:
2843             confusion("feedback");
2844             break;
2845     }
2846     str = make_string();
2847     (void) str_toks(str_lstring(str));
2848     flush_str(str);
2849     ins_list(token_link(temp_token_head));
2850 }
2851
2852 void do_variable(void)
2853 {
2854     int c = cur_chr;
2855     int done = 1;
2856     switch (c) {
2857         case dvi_variable_code:
2858             done = do_variable_dvi(c);
2859             if (done==0) {
2860                 /* we recover */
2861                 normal_warning("dvi backend","unexpected use of \\dvivariable");
2862             }
2863             return;
2864             break;
2865         case pdf_variable_code:
2866             done = do_variable_pdf(c);
2867             if (done==0) {
2868                 /* we recover */
2869                 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2870             }
2871             return;
2872             break;
2873         default:
2874             confusion("variable");
2875             break;
2876     }
2877 }
2878
2879 /*
2880     The following code is not used as we can only set math options and not query them. If
2881     an option is really important we will provide a proper variable. Most options are not
2882     meant for users anyway but for development.
2883 */
2884
2885 /*
2886
2887 #define do_mathoption_int(i) \
2888     cur_cmd = assign_int_cmd; \
2889     cur_val = mathoption_int_base + i; \
2890     cur_tok = token_val(cur_cmd, cur_val); \
2891     back_input();
2892
2893 void do_mathoption(void)
2894 {
2895          if (scan_keyword("old"))                    { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2896          if (scan_keyword("noitaliccompensation"))   { do_mathoption_int(c_mathoption_no_char_italic_code); }
2897     else if (scan_keyword("nocharitalic"))           { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2898     else if (scan_keyword("useoldfractionscaling"))  { do_mathoption_int(c_mathoption_old_code); }
2899     else if (scan_keyword("umathcodemeaning"))       { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2900 }
2901
2902 */
2903
2904 @ This boolean is keeping track of the lua string escape state
2905 @c
2906 boolean in_lua_escape;
2907
2908 static int the_convert_string_dvi(halfword c, int i)
2909 {
2910     return 0 ;
2911 }
2912
2913 static int the_convert_string_pdf(halfword c, int i)
2914 {
2915     int ff;
2916     if (get_o_mode() != OMODE_PDF) {
2917         return 0;
2918     } else if (scan_keyword("lastlink")) {
2919         print_int(pdf_last_link);
2920     } else if (scan_keyword("retval")) {
2921         print_int(pdf_retval);
2922     } else if (scan_keyword("lastobj")) {
2923         print_int(pdf_last_obj);
2924     } else if (scan_keyword("lastannot")) {
2925         print_int(pdf_last_annot);
2926     } else if (scan_keyword("xformname")) {
2927         print_int(obj_info(static_pdf, i));
2928     } else if (scan_keyword("creationdate")) {
2929         return 0;
2930     } else if (scan_keyword("fontname")) {
2931         set_ff(i);
2932         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2933     } else if (scan_keyword("fontobjnum")) {
2934         set_ff(i);
2935         print_int(pdf_font_num(ff));
2936     } else if (scan_keyword("fontsize")) {
2937         print_scaled(font_size(i));
2938         tprint("pt");
2939     } else if (scan_keyword("pageref")) {
2940         print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2941     } else if (scan_keyword("colorstackinit")) {
2942         return 0;
2943     } else {
2944         return 0;
2945     }
2946     return 1;
2947 }
2948
2949 str_number the_convert_string(halfword c, int i)
2950 {
2951     int old_setting;            /* saved |selector| setting */
2952     str_number ret = 0;
2953     boolean done = true ;
2954     old_setting = selector;
2955     selector = new_string;
2956     switch (c) {
2957         case number_code:
2958             print_int(i);
2959             break;
2960      /* case lua_function_code: */
2961      /* case lua_code: */
2962      /* case expanded_code: */
2963         case math_style_code:
2964             print_math_style();
2965             break;
2966      /* case string_code: */
2967      /* case cs_string_code: */
2968         case roman_numeral_code:
2969             print_roman_int(i);
2970             break;
2971      /* case meaning_code: */
2972         case uchar_code:
2973             print(i);
2974             break;
2975      /* lua_escape_string_code: */
2976         case font_id_code:
2977             print_int(i);
2978             break;
2979         case font_name_code:
2980             append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2981             if (font_size(i) != font_dsize(i)) {
2982                 tprint(" at ");
2983                 print_scaled(font_size(i));
2984                 tprint("pt");
2985             }
2986             break;
2987      /* left_margin_kern_code: */
2988      /* right_margin_kern_code: */
2989         case uniform_deviate_code:
2990             print_int(unif_rand(i));
2991             break;
2992         case normal_deviate_code:
2993             print_int(norm_rand());
2994             break;
2995      /* math_char_class_code: */
2996      /* math_char_fam_code: */
2997      /* math_char_slot_code: */
2998      /* insert_ht_code: */
2999         case job_name_code:
3000             print_job_name();
3001             break;
3002         case format_name_code:
3003             print(format_name);
3004             break;
3005         case luatex_banner_code:
3006             tprint(luatex_banner);
3007             break;
3008         case luatex_revision_code:
3009             print(get_luatexrevision());
3010             break;
3011         case luatex_date_code:
3012             print_int(get_luatex_date_info());
3013             break;
3014         case etex_code:
3015             tprint(eTeX_version_string);
3016             break;
3017         case eTeX_revision_code:
3018             tprint(eTeX_revision);
3019             break;
3020         case font_identifier_code:
3021             print_font_identifier(i);
3022             break;
3023         /* backend: this might become obsolete */
3024         case dvi_feedback_code:
3025             done = the_convert_string_dvi(c,i);
3026             break;
3027         case pdf_feedback_code:
3028             done = the_convert_string_pdf(c,i);
3029             break;
3030         /* done */
3031         default:
3032             done = false;
3033             break;
3034     }
3035     if (done) {
3036         ret = make_string();
3037     }
3038     selector = old_setting;
3039     return ret;
3040 }
3041
3042 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3043 files potentially usable for reading appear in the following global variables.
3044 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3045 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3046 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3047 line.
3048
3049 @c
3050 FILE *read_file[16]; /* used for \.{\\read} */
3051 int read_open[17];   /* state of |read_file[n]| */
3052
3053 void initialize_read(void)
3054 {
3055     int k;
3056     for (k = 0; k <= 16; k++)
3057         read_open[k] = closed;
3058 }
3059
3060 @ The |read_toks| procedure constructs a token list like that for any macro
3061 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3062 sequence that will receive this token list.
3063
3064 @c
3065 void read_toks(int n, halfword r, halfword j)
3066 {
3067     halfword p; /* tail of the token list */
3068     halfword q; /* new node being added to the token list via |store_new_token| */
3069     int s;      /* saved value of |align_state| */
3070     int m;      /* stream number */
3071     scanner_status = defining;
3072     warning_index = r;
3073     p = get_avail();
3074     def_ref = p;
3075     set_token_ref_count(def_ref, 0);
3076     p = def_ref;                /* the reference count */
3077     store_new_token(end_match_token);
3078     if ((n < 0) || (n > 15))
3079         m = 16;
3080     else
3081         m = n;
3082     s = align_state;
3083     align_state = 1000000;      /* disable tab marks, etc. */
3084     do {
3085         /* Input and store tokens from the next line of the file */
3086         begin_file_reading();
3087         iname = m + 1;
3088         if (read_open[m] == closed) {
3089             /*
3090                 Input for \.{\\read} from the terminal
3091
3092                 Here we input on-line into the |buffer| array, prompting the user explicitly
3093                 if |n>=0|.  The value of |n| is set negative so that additional prompts
3094                 will not be given in the case of multi-line input.
3095             */
3096             if (interaction > nonstop_mode) {
3097                 if (n < 0) {
3098                     prompt_input("");
3099                 } else {
3100                     wake_up_terminal();
3101                     print_ln();
3102                     sprint_cs(r);
3103                     prompt_input(" =");
3104                     n = -1;
3105                 }
3106             } else {
3107                 fatal_error
3108                     ("*** (cannot \\read from terminal in nonstop modes)");
3109             }
3110
3111         } else if (read_open[m] == just_open) {
3112             /*
3113                 Input the first line of |read_file[m]|
3114
3115                 The first line of a file must be treated specially, since |lua_input_ln|
3116                 must be told not to start with |get|.
3117             */
3118             if (lua_input_ln(read_file[m], (m + 1), false)) {
3119                 read_open[m] = normal;
3120             } else {
3121                 lua_a_close_in(read_file[m], (m + 1));
3122                 read_open[m] = closed;
3123             }
3124
3125         } else {
3126             /*
3127                 Input the next line of |read_file[m]|
3128
3129                 An empty line is appended at the end of a |read_file|.
3130             */
3131             if (!lua_input_ln(read_file[m], (m + 1), true)) {
3132                 lua_a_close_in(read_file[m], (m + 1));
3133                 read_open[m] = closed;
3134                 if (align_state != 1000000) {
3135                     runaway();
3136                     print_err("File ended within \\read");
3137                     help1("This \\read has unbalanced braces.");
3138                     align_state = 1000000;
3139                     error();
3140                 }
3141             }
3142
3143         }
3144         ilimit = last;
3145         if (end_line_char_inactive)
3146             decr(ilimit);
3147         else
3148             buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
3149         first = ilimit + 1;
3150         iloc = istart;
3151         istate = new_line;
3152         /* Handle \.{\\readline} and |goto done|; */
3153         if (j == 1) {
3154             while (iloc <= ilimit) {
3155                 /* current line not yet finished */
3156                 do_buffer_to_unichar(cur_chr, iloc);
3157                 if (cur_chr == ' ')
3158                     cur_tok = space_token;
3159                 else
3160                     cur_tok = cur_chr + other_token;
3161                 store_new_token(cur_tok);
3162             }
3163         } else {
3164             while (1) {
3165                 get_token();
3166                 if (cur_tok == 0) {
3167                     /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3168                     break;
3169                 }
3170                 if (align_state < 1000000) {
3171                     /* unmatched `\.\}' aborts the line */
3172                     do {
3173                         get_token();
3174                     } while (cur_tok != 0);
3175                     align_state = 1000000;
3176                     break;
3177                 }
3178                 store_new_token(cur_tok);
3179             }
3180         }
3181         end_file_reading();
3182
3183     } while (align_state != 1000000);
3184     cur_val = def_ref;
3185     scanner_status = normal;
3186     align_state = s;
3187 }
3188
3189 @ return a string from tokens list
3190
3191 @c
3192 str_number tokens_to_string(halfword p)
3193 {
3194     int old_setting;
3195     if (selector == new_string)
3196         normal_error("tokens","tokens_to_string() called while selector = new_string");
3197     old_setting = selector;
3198     selector = new_string;
3199     show_token_list(token_link(p), null, -1);
3200     selector = old_setting;
3201     return make_string();
3202 }
3203
3204 @ @c
3205 #define make_room(a)                     \
3206     if ((unsigned)i+a+1>alloci) {        \
3207         ret = xrealloc(ret,(alloci+64)); \
3208         alloci = alloci + 64;            \
3209     }
3210
3211 #define append_i_byte(a) ret[i++] = (char)(a)
3212
3213 #define Print_char(a) make_room(1); append_i_byte(a)
3214
3215 #define Print_uchar(s) {                                       \
3216     make_room(4);                                              \
3217     if (s<=0x7F) {                                             \
3218       append_i_byte(s);                                        \
3219     } else if (s<=0x7FF) {                                     \
3220       append_i_byte(0xC0 + (s / 0x40));                        \
3221       append_i_byte(0x80 + (s % 0x40));                        \
3222     } else if (s<=0xFFFF) {                                    \
3223       append_i_byte(0xE0 + (s / 0x1000));                      \
3224       append_i_byte(0x80 + ((s % 0x1000) / 0x40));             \
3225       append_i_byte(0x80 + ((s % 0x1000) % 0x40));             \
3226     } else if (s>=0x110000) {                                  \
3227       append_i_byte(s-0x11000);                                \
3228     } else {                                                   \
3229       append_i_byte(0xF0 + (s / 0x40000));                     \
3230       append_i_byte(0x80 + ((s % 0x40000) / 0x1000));          \
3231       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3232       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3233     } }
3234
3235 #define Print_esc(b) {                     \
3236     const char *v = b;                     \
3237     if (e>0 && e<STRING_OFFSET) {          \
3238         Print_uchar (e);                   \
3239     }                                      \
3240     make_room(strlen(v));                  \
3241     while (*v) { append_i_byte(*v); v++; } \
3242   }
3243
3244 #define Print_str(b) {                     \
3245     const char *v = b;                     \
3246     make_room(strlen(v));                  \
3247     while (*v) { append_i_byte(*v); v++; } \
3248   }
3249
3250 #define is_cat_letter(a) \
3251     (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3252
3253 @ the actual token conversion in this function is now functionally equivalent to
3254 |show_token_list|, except that it always prints the whole token list. TODO: check
3255 whether this causes problems in the lua library.
3256
3257 @c
3258 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3259 {
3260     register int p, c, m;
3261     int q;
3262     int infop;
3263     char *s, *sh;
3264     int e = 0;
3265     char *ret;
3266     int match_chr = '#';
3267     int n = '0';
3268     unsigned alloci = 1024;
3269     int i = 0;
3270     p = pp;
3271     if (p == null) {
3272         if (siz != NULL)
3273             *siz = 0;
3274         return NULL;
3275     }
3276     ret = xmalloc(alloci);
3277     p = token_link(p);          /* skip refcount */
3278     if (p != null) {
3279         e = escape_char_par;
3280     }
3281     while (p != null) {
3282         if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3283             Print_esc("CLOBBERED.");
3284             break;
3285         }
3286         infop = token_info(p);
3287         if (infop >= cs_token_flag) {
3288             if (!(inhibit_par && infop == par_token)) {
3289                 q = infop - cs_token_flag;
3290                 if (q < hash_base) {
3291                     if (q == null_cs) {
3292                         Print_esc("csname");
3293                         Print_esc("endcsname");
3294                     } else {
3295                         Print_esc("IMPOSSIBLE.");
3296                     }
3297                 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3298                     Print_esc("IMPOSSIBLE.");
3299                 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3300                     Print_esc("NONEXISTENT.");
3301                 } else {
3302                     str_number txt = cs_text(q);
3303                     sh = makecstring(txt);
3304                     s = sh;
3305                     if (is_active_cs(txt)) {
3306                         s = s + 3;
3307                         while (*s) {
3308                             Print_char(*s);
3309                             s++;
3310                         }
3311                     } else {
3312                         if (e>=0 && e<0x110000) Print_uchar(e);
3313                         while (*s) {
3314                             Print_char(*s);
3315                             s++;
3316                         }
3317                         if ((!single_letter(txt)) || is_cat_letter(txt)) {
3318                             Print_char(' ');
3319                         }
3320                     }
3321                     free(sh);
3322                 }
3323             }
3324         } else {
3325             if (infop < 0) {
3326                 Print_esc("BAD");
3327             } else {
3328                 m = token_cmd(infop);
3329                 c = token_chr(infop);
3330                 switch (m) {
3331                     case left_brace_cmd:
3332                     case right_brace_cmd:
3333                     case math_shift_cmd:
3334                     case tab_mark_cmd:
3335                     case sup_mark_cmd:
3336                     case sub_mark_cmd:
3337                     case spacer_cmd:
3338                     case letter_cmd:
3339                     case other_char_cmd:
3340                         Print_uchar(c);
3341                         break;
3342                     case mac_param_cmd:
3343                         if (!in_lua_escape && (is_in_csname==0))
3344                             Print_uchar(c);
3345                         Print_uchar(c);
3346                         break;
3347                     case out_param_cmd:
3348                         Print_uchar(match_chr);
3349                         if (c <= 9) {
3350                             Print_char(c + '0');
3351                         } else {
3352                             Print_char('!');
3353                             goto EXIT;
3354                         }
3355                         break;
3356                     case match_cmd:
3357                         match_chr = c;
3358                         Print_uchar(c);
3359                         n++;
3360                         Print_char(n);
3361                         if (n > '9')
3362                             goto EXIT;
3363                         break;
3364                     case end_match_cmd:
3365                         if (c == 0) {
3366                             Print_char('-');
3367                             Print_char('>');
3368                         }
3369                         break;
3370                     default:
3371                         not_so_bad(Print_esc);
3372                         break;
3373                 }
3374             }
3375         }
3376         p = token_link(p);
3377     }
3378   EXIT:
3379     ret[i] = '\0';
3380     if (siz != NULL)
3381         *siz = i;
3382     return ret;
3383 }
3384
3385 @ @c
3386 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3387 {
3388     int siz;
3389     lstring *ret = xmalloc(sizeof(lstring));
3390     ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3391     ret->l = (size_t) siz;
3392     return ret;
3393 }
3394
3395 @ @c
3396 void free_lstring(lstring * ls)
3397 {
3398     if (ls == NULL)
3399         return;
3400     if (ls->s != NULL)
3401         free(ls->s);
3402     free(ls);
3403 }