source/texk/web2c/luatexdir/tex/textoken.w

   1 % textoken.w
   2 %
   3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
   4 %
   5 % This file is part of LuaTeX.
   6 %
   7 % LuaTeX is free software; you can redistribute it and/or modify it under
   8 % the terms of the GNU General Public License as published by the Free
   9 % Software Foundation; either version 2 of the License, or (at your
  10 % option) any later version.
  11 %
  12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
  13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 % FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 % License for more details.
  16 %
  17 % You should have received a copy of the GNU General Public License along
  18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
  19
  20 @ @c
  21
  22 #include "ptexlib.h"
  23
  24 @ @c
  25 #define pausing int_par(pausing_code)
  26 #define cat_code_table int_par(cat_code_table_code)
  27 #define tracing_nesting int_par(tracing_nesting_code)
  28 #define suppress_outer_error int_par(suppress_outer_error_code)
  29 #define suppress_mathpar_error int_par(suppress_mathpar_error_code)
  30
  31
  32 #define every_eof equiv(every_eof_loc)
  33 #define box(A) equiv(box_base+(A))
  34 #define toks(A) equiv(toks_base+(A))
  35
  36 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
  37
  38 /*
  39 #define do_get_cat_code(a,b) do { \
  40     if (line_catcode_table<=-0xFF) \
  41       a= - line_catcode_table - 0xFF ; \
  42     else if (line_catcode_table!=DEFAULT_CAT_TABLE) \
  43       a=get_cat_code(line_catcode_table,b); \
  44     else \
  45       a=get_cat_code(cat_code_table,b); \
  46   } while (0)
  47 */
  48
  49 #define do_get_cat_code(a,b) do { \
  50     if (line_catcode_table==DEFAULT_CAT_TABLE) \
  51       a=get_cat_code(cat_code_table,b); \
  52     else if (line_catcode_table>-0xFF) \
  53       a=get_cat_code(line_catcode_table,b); \
  54     else \
  55       a= - line_catcode_table - 0xFF ; \
  56   } while (0)
  57
  58
  59 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
  60 readily be transported into environments that do not have automatic facilities
  61 for strings, garbage collection, etc., and so that it can be in control of what
  62 error messages the user receives. The dynamic storage requirements of \TeX\ are
  63 handled by providing two large arrays called |fixmem| and |varmem| in which
  64 consecutive blocks of words are used as nodes by the \TeX\ routines.
  65
  66 Pointer variables are indices into this array, or into another array called
  67 |eqtb| that will be explained later. A pointer variable might also be a special
  68 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
  69 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
  70 does not assume that |mem[null]| exists.
  71
  72 @ Locations in |fixmem| are used for storing one-word records; a conventional
  73 \.{AVAIL} stack is used for allocation in this array.
  74
  75 @c
  76 smemory_word *fixmem;           /* the big dynamic storage area */
  77 unsigned fix_mem_min;           /* the smallest location of one-word memory in use */
  78 unsigned fix_mem_max;           /* the largest location of one-word memory in use */
  79
  80 @ In order to study the memory requirements of particular applications, it is
  81 possible to prepare a version of \TeX\ that keeps track of current and maximum
  82 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
  83 commented out, \TeX\ will run a bit slower but it will report these statistics
  84 when |tracing_stats| is sufficiently large.
  85
  86 @c
  87 int var_used, dyn_used;         /* how much memory is in use */
  88
  89 halfword avail;                 /* head of the list of available one-word nodes */
  90 unsigned fix_mem_end;           /* the last one-word node used in |mem| */
  91
  92 halfword garbage;               /* head of a junk list, write only */
  93 halfword temp_token_head;       /* head of a temporary list of some kind */
  94 halfword hold_token_head;       /* head of a temporary list of another kind */
  95 halfword omit_template;         /* a constant token list */
  96 halfword null_list;             /* permanently empty list */
  97 halfword backup_head;           /* head of token list built by |scan_keyword| */
  98
  99 @ @c
 100 void initialize_tokens(void)
 101 {
 102     halfword p;
 103     avail = null;
 104     fix_mem_end = 0;
 105     p = get_avail();
 106     temp_token_head = p;
 107     set_token_info(temp_token_head, 0);
 108     p = get_avail();
 109     hold_token_head = p;
 110     set_token_info(hold_token_head, 0);
 111     p = get_avail();
 112     omit_template = p;
 113     set_token_info(omit_template, 0);
 114     p = get_avail();
 115     null_list = p;
 116     set_token_info(null_list, 0);
 117     p = get_avail();
 118     backup_head = p;
 119     set_token_info(backup_head, 0);
 120     p = get_avail();
 121     garbage = p;
 122     set_token_info(garbage, 0);
 123     dyn_used = 0;               /* initialize statistics */
 124 }
 125
 126 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
 127 field is null. However, \TeX\ will halt if there is no more room left.
 128 @^inner loop@>
 129
 130 If the available-space list is empty, i.e., if |avail=null|, we try first to
 131 increase |fix_mem_end|. If that cannot be done, i.e., if
 132 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
 133 work, we have to quit.
 134
 135 @c
 136 halfword get_avail(void)
 137 {                               /* single-word node allocation */
 138     unsigned p;                 /* the new node being got */
 139     unsigned t;
 140     p = (unsigned) avail;       /* get top location in the |avail| stack */
 141     if (p != null) {
 142         avail = token_link(avail);      /* and pop it off */
 143     } else if (fix_mem_end < fix_mem_max) {     /* or go into virgin territory */
 144         incr(fix_mem_end);
 145         p = fix_mem_end;
 146     } else {
 147         smemory_word *new_fixmem;       /* the big dynamic storage area */
 148         t = (fix_mem_max / 5);
 149         new_fixmem =
 150             fixmemcast(realloc
 151                        (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
 152         if (new_fixmem == NULL) {
 153             runaway();          /* if memory is exhausted, display possible runaway text */
 154             overflow("token memory size", fix_mem_max);
 155         } else {
 156             fixmem = new_fixmem;
 157         }
 158         memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
 159         fix_mem_max += t;
 160         p = ++fix_mem_end;
 161     }
 162     token_link(p) = null;       /* provide an oft-desired initialization of the new node */
 163     incr(dyn_used);             /* maintain statistics */
 164     return (halfword) p;
 165 }
 166
 167 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
 168 that starts at position |p|.
 169 @^inner loop@>
 170
 171 @c
 172 void flush_list(halfword p)
 173 {                               /* makes list of single-word nodes available */
 174     halfword q, r;              /* list traversers */
 175     if (p != null) {
 176         r = p;
 177         do {
 178             q = r;
 179             r = token_link(r);
 180             decr(dyn_used);
 181         } while (r != null);    /* now |q| is the last node on the list */
 182         token_link(q) = avail;
 183         avail = p;
 184     }
 185 }
 186
 187 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
 188 represented internally in one of two ways: (1)~A character whose ASCII code
 189 number is |c| and whose command code is |m| is represented as the number
 190 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
 191 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
 192 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
 193 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
 194 halfword.
 195
 196 A token |t| represents a |left_brace| command if and only if
 197 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
 198 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
 199 |end_match| command if and only if |match_token<=t<=end_match_token|. The
 200 following definitions take care of these token-oriented constants and a few
 201 others.
 202
 203 @ A token list is a singly linked list of one-word nodes in |mem|, where each
 204 word contains a token and a link. Macro definitions, output-routine definitions,
 205 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
 206 form of token lists, usually preceded by a node with a reference count in its
 207 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
 208
 209 Three special commands appear in the token lists of macro definitions. When
 210 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
 211 when |m=end_match|, it means that parameter matching should end and \TeX\ should
 212 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
 213 insert parameter number |c| into the text at this point.
 214
 215 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
 216 are omitted, but the final right brace of an output routine is included at the
 217 end of its token list.
 218
 219 Here is an example macro definition that illustrates these conventions. After
 220 \TeX\ processes the text
 221
 222 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
 223
 224 the definition of \.{\\mac} is represented as a token list containing
 225
 226 $$\def\,{\hskip2pt}
 227 \vbox{\halign{\hfil#\hfil\cr
 228 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
 229 \.{\\b}, |end_match|,\cr
 230 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
 231 |other_char|\,\.1,\cr
 232 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
 233
 234 The procedure |scan_toks| builds such token lists, and |macro_call| does the
 235 parameter matching. @^reference counts@>
 236
 237 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
 238 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
 239 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
 240 entry before the token list has been fully consumed, so we dare not simply
 241 destroy a token list when its control sequence is being redefined.
 242
 243 If the parameter-matching part of a definition ends with `\.{\#\{}', the
 244 corresponding token list will have `\.\{' just before the `|end_match|' and also
 245 at the very end. The first `\.\{' is used to delimit the parameter; the second
 246 one keeps the first from disappearing.
 247
 248 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
 249 including the expansion of a macro or mark.
 250
 251 @c
 252 void print_meaning(void)
 253 {
 254     /* remap \mathchar onto \Umathchar */
 255 /*
 256     if (cur_cmd == math_given_cmd) {
 257         cur_cmd = xmath_given_cmd ;
 258     }
 259 */
 260     print_cmd_chr((quarterword) cur_cmd, cur_chr);
 261     if (cur_cmd >= call_cmd) {
 262         print_char(':');
 263         print_ln();
 264         token_show(cur_chr);
 265     } else {
 266         /* Show the meaning of a mark node */
 267         if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
 268             print_char(':');
 269             print_ln();
 270             switch (cur_chr) {
 271                 case first_mark_code:
 272                     token_show(first_mark(0));
 273                     break;
 274                 case bot_mark_code:
 275                     token_show(bot_mark(0));
 276                     break;
 277                 case split_first_mark_code:
 278                     token_show(split_first_mark(0));
 279                     break;
 280                 case split_bot_mark_code:
 281                     token_show(split_bot_mark(0));
 282                     break;
 283                 default:
 284                     token_show(top_mark(0));
 285                     break;
 286             }
 287         }
 288     }
 289 }
 290
 291 @ The procedure |show_token_list|, which prints a symbolic form of the token list
 292 that starts at a given node |p|, illustrates these conventions. The token list
 293 being displayed should not begin with a reference count. However, the procedure
 294 is intended to be robust, so that if the memory links are awry or if |p| is not
 295 really a pointer to a token list, nothing catastrophic will happen.
 296
 297 An additional parameter |q| is also given; this parameter is either null or it
 298 points to a node in the token list where a certain magic computation takes place
 299 that will be explained later. (Basically, |q| is non-null when we are printing
 300 the two-line context information at the time of an error message; |q| marks the
 301 place corresponding to where the second line should begin.)
 302
 303 For example, if |p| points to the node containing the first \.a in the token list
 304 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
 305 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
 306 second \.a, the magic computation will be performed just before the second \.a is
 307 printed.
 308
 309 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
 310 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
 311 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
 312 this cannot be confused with actual control sequences because a real control
 313 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
 314
 315 @c
 316 #define not_so_bad(p) \
 317     switch (m) { \
 318         case assign_int_cmd: \
 319             if (c >= (backend_int_base) && c <= (backend_int_last)) \
 320                 p("[internal backend integer]"); \
 321             break; \
 322         case assign_dimen_cmd: \
 323             if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
 324                 p("[internal backend dimension]"); \
 325             break; \
 326         case assign_toks_cmd: \
 327             if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
 328                 p("[internal backend tokenlist]"); \
 329             break; \
 330         default: \
 331             p("BAD"); \
 332             break; \
 333     }
 334
 335 void show_token_list(int p, int q, int l)
 336 {
 337     int m, c;                    /* pieces of a token */
 338     ASCII_code match_chr = '#';  /* character used in a `|match|' */
 339     ASCII_code n = '0';          /* the highest parameter number, as an ASCII digit */
 340     tally = 0;
 341     if (l < 0)
 342         l = 0x3FFFFFFF;
 343     while ((p != null) && (tally < l)) {
 344         if (p == q) {
 345             /* Do magic computation */
 346             set_trick_count();
 347         }
 348         /* Display token |p|, and |return| if there are problems */
 349         if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
 350             tprint_esc("CLOBBERED.");
 351             return;
 352         }
 353         if (token_info(p) >= cs_token_flag) {
 354             if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
 355                 print_cs(token_info(p) - cs_token_flag);
 356         } else {
 357             m = token_cmd(token_info(p));
 358             c = token_chr(token_info(p));
 359             if (token_info(p) < 0) {
 360                 tprint_esc("BAD");
 361             } else {
 362                 /*
 363                     Display the token $(|m|,|c|)$
 364
 365                     The procedure usually ``learns'' the character code used for macro
 366                     parameters by seeing one in a |match| command before it runs into any
 367                     |out_param| commands.
 368                 */
 369                 switch (m) {
 370                     case left_brace_cmd:
 371                     case right_brace_cmd:
 372                     case math_shift_cmd:
 373                     case tab_mark_cmd:
 374                     case sup_mark_cmd:
 375                     case sub_mark_cmd:
 376                     case spacer_cmd:
 377                     case letter_cmd:
 378                     case other_char_cmd:
 379                         print(c);
 380                         break;
 381                     case mac_param_cmd:
 382                         if (!in_lua_escape && (is_in_csname==0))
 383                             print(c);
 384                         print(c);
 385                         break;
 386                     case out_param_cmd:
 387                         print(match_chr);
 388                         if (c <= 9) {
 389                             print_char(c + '0');
 390                         } else {
 391                             print_char('!');
 392                             return;
 393                         }
 394                         break;
 395                     case match_cmd:
 396                         match_chr = c;
 397                         print(c);
 398                         incr(n);
 399                         print_char(n);
 400                         if (n > '9')
 401                             return;
 402                         break;
 403                     case end_match_cmd:
 404                         if (c == 0)
 405                             tprint("->");
 406                         break;
 407                     default:
 408                         not_so_bad(tprint);
 409                         break;
 410                 }
 411             }
 412         }
 413         p = token_link(p);
 414     }
 415     if (p != null)
 416         tprint_esc("ETC.");
 417 }
 418
 419 @ @c
 420 #define do_buffer_to_unichar(a,b) do { \
 421     a = (halfword)str2uni(buffer+b); \
 422     b += utf8_size(a); \
 423 } while (0)
 424
 425 @ Here's the way we sometimes want to display a token list, given a pointer to
 426 its reference count; the pointer may be null.
 427
 428 @c
 429 void token_show(halfword p)
 430 {
 431     if (p != null)
 432         show_token_list(token_link(p), null, 10000000);
 433 }
 434
 435 @ |delete_token_ref|, is called when a pointer to a token list's reference count
 436 is being removed. This means that the token list should disappear if the
 437 reference count was |null|, otherwise the count should be decreased by one.
 438 @^reference counts@>
 439
 440 @ |p| points to the reference count of a token list that is losing one
 441 reference.
 442
 443 @c
 444 void delete_token_ref(halfword p)
 445 {
 446     if (token_ref_count(p) == 0)
 447         flush_list(p);
 448     else
 449         decr(token_ref_count(p));
 450 }
 451
 452 @ @c
 453 int get_char_cat_code(int curchr)
 454 {
 455     int a;
 456     do_get_cat_code(a,curchr);
 457     return a;
 458 }
 459
 460 @ @c
 461 static void invalid_character_error(void)
 462 {
 463     const char *hlp[] = {
 464         "A funny symbol that I can't read has just been input.",
 465         "Continue, and I'll forget that it ever happened.",
 466         NULL
 467     };
 468     deletions_allowed = false;
 469     tex_error("Text line contains an invalid character", hlp);
 470     deletions_allowed = true;
 471 }
 472
 473 @ @c
 474 static boolean process_sup_mark(void);  /* below */
 475
 476 static int scan_control_sequence(void); /* below */
 477
 478 typedef enum {
 479     next_line_ok,
 480     next_line_return,
 481     next_line_restart
 482 } next_line_retval;
 483
 484 static next_line_retval next_line(void); /* below */
 485
 486 @ In case you are getting bored, here is a slightly less trivial routine: Given a
 487 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
 488 |scan_keyword| routine checks to see whether the next tokens of input match this
 489 string. The match must be exact, except that uppercase letters will match their
 490 lowercase counterparts; uppercase equivalents are determined by subtracting
 491 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
 492 only for its own limited set of keywords.
 493
 494 If a match is found, the characters are effectively removed from the input and
 495 |true| is returned. Otherwise |false| is returned, and the input is left
 496 essentially unchanged (except for the fact that some macros may have been
 497 expanded, etc.). @^inner loop@>
 498
 499 @c
 500 boolean scan_keyword(const char *s)
 501 {                               /* look for a given string */
 502     halfword p;                 /* tail of the backup list */
 503     halfword q;                 /* new node being added to the token list via |store_new_token| */
 504     const char *k;              /* index into |str_pool| */
 505     halfword save_cur_cs = cur_cs;
 506     if (strlen(s) == 0)        /* was assert (strlen(s) > 1); */
 507       return false ;           /* but not with newtokenlib  zero keyword simply doesn't match  */
 508     p = backup_head;
 509     token_link(p) = null;
 510     k = s;
 511     while (*k) {
 512         get_x_token();      /* recursion is possible here */
 513         if ((cur_cs == 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
 514             store_new_token(cur_tok);
 515             k++;
 516         } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
 517             /*
 518                 crashes on some alignments:
 519
 520                 if (p != backup_head) {
 521                     q = get_avail();
 522                     token_info(q) = cur_tok;
 523                     token_link(q) = null;
 524                     token_link(p) = q;
 525                     begin_token_list(token_link(backup_head), backed_up);
 526                 } else {
 527                     back_input();
 528                 }
 529             */
 530             back_input();
 531             if (p != backup_head) {
 532                 begin_token_list(token_link(backup_head), backed_up);
 533             }
 534             /*  */
 535             cur_cs = save_cur_cs;
 536             return false;
 537         }
 538     }
 539     if (token_link(backup_head) != null)
 540         flush_list(token_link(backup_head));
 541     cur_cs = save_cur_cs;
 542     return true;
 543 }
 544
 545 @ We can not return |undefined_control_sequence| under some conditions
 546  (inside |shift_case|, for example). This needs thinking.
 547
 548 @c
 549
 550 /*
 551     halfword active_to_cs(int curchr, int force)
 552     {
 553         halfword curcs;
 554         char *a, *b;
 555         char *utfbytes = xmalloc(8);
 556         int nncs = no_new_control_sequence;
 557         a = (char *) uni2str(0xFFFF);
 558         utfbytes = strcpy(utfbytes, a);
 559         if (force)
 560             no_new_control_sequence = false;
 561         if (curchr > 0) {
 562             b = (char *) uni2str((unsigned) curchr);
 563             utfbytes = strcat(utfbytes, b);
 564             free(b);
 565             curcs = string_lookup(utfbytes, strlen(utfbytes));
 566         } else {
 567             utfbytes[3] = '\0';
 568             curcs = string_lookup(utfbytes, 4);
 569         }
 570         no_new_control_sequence = nncs;
 571         free(a);
 572         free(utfbytes);
 573         return curcs;
 574     }
 575 */
 576
 577 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
 578
 579 halfword active_to_cs(int curchr, int force)
 580 {
 581     halfword curcs;
 582     int nncs = no_new_control_sequence;
 583     if (force) {
 584         no_new_control_sequence = false;
 585     }
 586     if (curchr > 0) {
 587         char *b = (char *) uni2str((unsigned) curchr);
 588         char *utfbytes = xmalloc(8);
 589         utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
 590         utfbytes = strcat(utfbytes, b);
 591         free(b);
 592         curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 593         free(utfbytes);
 594     } else {
 595         curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
 596     }
 597     no_new_control_sequence = nncs;
 598     return curcs;
 599 }
 600
 601 /*
 602
 603     static unsigned char *uni2csstr(unsigned unic)
 604     {
 605         unsigned char *buf = xmalloc(8);
 606         unsigned char *pt = buf;
 607         *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
 608         if (unic < 0x80)
 609             *pt++ = (unsigned char) unic;
 610         else if (unic < 0x800) {
 611             *pt++ = (unsigned char) (0xc0 | (unic >> 6));
 612             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 613         } else if (unic >= 0x110000) {
 614             *pt++ = (unsigned char) (unic - 0x110000);
 615         } else if (unic < 0x10000) {
 616             *pt++ = (unsigned char) (0xe0 | (unic >> 12));
 617             *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
 618             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 619         } else {
 620             int u, z, y, x;
 621             unsigned val = unic - 0x10000;
 622             u = (int) (((val & 0xf0000) >> 16) + 1);
 623             z = (int) ((val & 0x0f000) >> 12);
 624             y = (int) ((val & 0x00fc0) >> 6);
 625             x = (int) (val & 0x0003f);
 626             *pt++ = (unsigned char) (0xf0 | (u >> 2));
 627             *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
 628             *pt++ = (unsigned char) (0x80 | y);
 629             *pt++ = (unsigned char) (0x80 | x);
 630         }
 631         *pt = '\0';
 632         return buf;
 633     }
 634
 635     halfword active_to_cs(int curchr, int force)
 636     {
 637         halfword curcs;
 638         int nncs = no_new_control_sequence;
 639         if (force) {
 640             no_new_control_sequence = false;
 641         }
 642         if (curchr > 0) {
 643             char * utfbytes = (char *) uni2csstr((unsigned) curchr);
 644             curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 645             free(utfbytes);
 646         } else {
 647             curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
 648         }
 649         no_new_control_sequence = nncs;
 650         return curcs;
 651     }
 652
 653 */
 654
 655 @ TODO this function should listen to \.{\\escapechar}
 656
 657 @ prints a control sequence
 658
 659 @c
 660 static char *cs_to_string(halfword p)
 661 {
 662     const char *s;
 663     char *sh;
 664     int k = 0;
 665     static char ret[256] = { 0 };
 666     if (p == 0 || p == null_cs) {
 667         ret[k++] = '\\';
 668         s = "csname";
 669         while (*s) {
 670             ret[k++] = *s++;
 671         }
 672         ret[k++] = '\\';
 673         s = "endcsname";
 674         while (*s) {
 675             ret[k++] = *s++;
 676         }
 677         ret[k] = 0;
 678
 679     } else {
 680         str_number txt = cs_text(p);
 681         sh = makecstring(txt);
 682         s = sh;
 683         if (is_active_cs(txt)) {
 684             s = s + 3;
 685             while (*s) {
 686                 ret[k++] = *s++;
 687             }
 688             ret[k] = 0;
 689         } else {
 690             ret[k++] = '\\';
 691             while (*s) {
 692                 ret[k++] = *s++;
 693             }
 694             ret[k] = 0;
 695         }
 696         free(sh);
 697     }
 698     return (char *) ret;
 699 }
 700
 701 @ TODO this is a quick hack, will be solved differently soon
 702
 703 @c
 704 static char *cmd_chr_to_string(int cmd, int chr)
 705 {
 706     char *s;
 707     str_number str;
 708     int sel = selector;
 709     selector = new_string;
 710     print_cmd_chr((quarterword) cmd, chr);
 711     str = make_string();
 712     s = makecstring(str);
 713     selector = sel;
 714     flush_str(str);
 715     return s;
 716 }
 717
 718 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
 719 develop in the next few sections of the program. Perhaps we shouldn't actually
 720 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
 721 reading the source files and gobbling them up. And it also helps \TeX\ to
 722 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
 723
 724 The main duty of |get_next| is to input one token and to set |cur_cmd| and
 725 |cur_chr| to that token's command code and modifier. Furthermore, if the input
 726 token is a control sequence, the |eqtb| location of that control sequence is
 727 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
 728
 729 Underlying this simple description is a certain amount of complexity because of
 730 all the cases that need to be handled. However, the inner loop of |get_next| is
 731 reasonably short and fast.
 732
 733 When |get_next| is asked to get the next token of a \.{\\read} line,
 734 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
 735 appear on that line. (There might not be any tokens at all, if the
 736 |end_line_char| has |ignore| as its catcode.)
 737
 738 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
 739 needed because a blank line of input is supposed to be exactly equivalent to the
 740 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
 741 line.
 742
 743 @c
 744 halfword par_loc;   /* location of `\.{\\par}' in |eqtb| */
 745 halfword par_token; /* token representing `\.{\\par}' */
 746
 747 @ Parts |get_next| are executed more often than any other instructions of \TeX.
 748 @^mastication@>@^inner loop@>
 749
 750 The global variable |force_eof| is normally |false|; it is set |true| by an
 751 \.{\\endinput} command. |luacstrings| is the number of lua print statements
 752 waiting to be input, it is changed by |luatokencall|.
 753
 754 @c
 755 boolean force_eof; /* should the next \.{\\input} be aborted early? */
 756 int luacstrings;   /* how many lua strings are waiting to be input? */
 757
 758 @ If the user has set the |pausing| parameter to some positive value, and if
 759 nonstop mode has not been selected, each line of input is displayed on the
 760 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
 761 response. If the response is simply |carriage_return|, the line is accepted as it
 762 stands, otherwise the line typed is used instead of the line in the file.
 763
 764 @c
 765 void firm_up_the_line(void)
 766 {
 767     int k;                      /* an index into |buffer| */
 768     ilimit = last;
 769     if (pausing > 0) {
 770         if (interaction > nonstop_mode) {
 771             wake_up_terminal();
 772             print_ln();
 773             if (istart < ilimit) {
 774                 for (k = istart; k <= ilimit - 1; k++)
 775                     print_char(buffer[k]);
 776             }
 777             first = ilimit;
 778             prompt_input("=>"); /* wait for user response */
 779             if (last > first) {
 780                 for (k = first; k < +last - 1; k++)     /* move line down in buffer */
 781                     buffer[k + istart - first] = buffer[k];
 782                 ilimit = istart + last - first;
 783             }
 784         }
 785     }
 786 }
 787
 788 @ Before getting into |get_next|, let's consider the subroutine that is called
 789 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
 790 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
 791 the end of a file.
 792
 793 @c
 794 void check_outer_validity(void)
 795 {
 796     halfword p;                 /* points to inserted token list */
 797     halfword q;                 /* auxiliary pointer */
 798     if (suppress_outer_error)
 799         return;
 800     if (scanner_status != normal) {
 801         deletions_allowed = false;
 802         /* Back up an outer control sequence so that it can be reread; */
 803         /* An outer control sequence that occurs in a \.{\\read} will not be reread,
 804            since the error recovery for \.{\\read} is not very powerful. */
 805         if (cur_cs != 0) {
 806             if ((istate == token_list) || (iname < 1) || (iname > 17)) {
 807                 p = get_avail();
 808                 token_info(p) = cs_token_flag + cur_cs;
 809                 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
 810             }
 811             cur_cmd = spacer_cmd;
 812             cur_chr = ' ';      /* replace it by a space */
 813         }
 814         if (scanner_status > skipping) {
 815             const char *errhlp[] = {
 816                 "I suspect you have forgotten a `}', causing me",
 817                 "to read past where you wanted me to stop.",
 818                 "I'll try to recover; but if the error is serious,",
 819                 "you'd better type `E' or `X' now and fix your file.",
 820                 NULL
 821             };
 822             char errmsg[256];
 823             const char *startmsg;
 824             const char *scannermsg;
 825             /* Tell the user what has run away and try to recover */
 826             runaway();          /* print a definition, argument, or preamble */
 827             if (cur_cs == 0) {
 828                 startmsg = "File ended";
 829             } else {
 830                 cur_cs = 0;
 831                 startmsg = "Forbidden control sequence found";
 832             }
 833             /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
 834                and insert tokens that should lead to recovery; */
 835             /* The recovery procedure can't be fully understood without knowing more
 836                about the \TeX\ routines that should be aborted, but we can sketch the
 837                ideas here:  For a runaway definition we will insert a right brace; for a
 838                runaway preamble, we will insert a special \.{\\cr} token and a right
 839                brace; and for a runaway argument, we will set |long_state| to
 840                |outer_call| and insert \.{\\par}. */
 841             p = get_avail();
 842             switch (scanner_status) {
 843             case defining:
 844                 scannermsg = "definition";
 845                 token_info(p) = right_brace_token + '}';
 846                 break;
 847             case matching:
 848                 scannermsg = "use";
 849                 token_info(p) = par_token;
 850                 long_state = outer_call_cmd;
 851                 break;
 852             case aligning:
 853                 scannermsg = "preamble";
 854                 token_info(p) = right_brace_token + '}';
 855                 q = p;
 856                 p = get_avail();
 857                 token_link(p) = q;
 858                 token_info(p) = cs_token_flag + frozen_cr;
 859                 align_state = -1000000;
 860                 break;
 861             case absorbing:
 862                 scannermsg = "text";
 863                 token_info(p) = right_brace_token + '}';
 864                 break;
 865             default:           /* can't happen */
 866                 scannermsg = "unknown";
 867                 break;
 868             }                   /*there are no other cases */
 869             begin_token_list(p, inserted);
 870             snprintf(errmsg, 255, "%s while scanning %s of %s",
 871                      startmsg, scannermsg, cs_to_string(warning_index));
 872             tex_error(errmsg, errhlp);
 873         } else {
 874             char errmsg[256];
 875             const char *errhlp_no[] = {
 876                 "The file ended while I was skipping conditional text.",
 877                 "This kind of error happens when you say `\\if...' and forget",
 878                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 879                 NULL
 880             };
 881             const char *errhlp_cs[] = {
 882                 "A forbidden control sequence occurred in skipped text.",
 883                 "This kind of error happens when you say `\\if...' and forget",
 884                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 885                 NULL
 886             };
 887             const char **errhlp = (const char **) errhlp_no;
 888             char *ss;
 889             if (cur_cs != 0) {
 890                 errhlp = errhlp_cs;
 891                 cur_cs = 0;
 892             }
 893             ss = cmd_chr_to_string(if_test_cmd, cur_if);
 894             snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
 895                  ss, (int) skip_line);
 896             free(ss);
 897             /* Incomplete \\if... */
 898             cur_tok = cs_token_flag + frozen_fi;
 899             /* back up one inserted token and call |error| */
 900             {
 901                 OK_to_interrupt = false;
 902                 back_input();
 903                 token_type = inserted;
 904                 OK_to_interrupt = true;
 905                 tex_error(errmsg, errhlp);
 906             }
 907         }
 908         deletions_allowed = true;
 909     }
 910 }
 911
 912 @ @c
 913
 914 #if 0
 915
 916 /*
 917     The other variant gives less clutter in tracing cache usage when profiling and for
 918     some files (like the manual) also a bit of a speedup.
 919 */
 920
 921 static boolean get_next_file(void)
 922 {
 923   SWITCH:
 924     if (iloc <= ilimit) {
 925         /* current line not yet finished */
 926         do_buffer_to_unichar(cur_chr, iloc);
 927
 928       RESWITCH:
 929         if (detokenized_line()) {
 930             cur_cmd = (cur_chr == ' ' ? 10 : 12);
 931         } else {
 932             do_get_cat_code(cur_cmd, cur_chr);
 933         }
 934         /*
 935             Change state if necessary, and |goto switch| if the current
 936             character should be ignored, or |goto reswitch| if the current
 937             character changes to another;
 938
 939             The following 48-way switch accomplishes the scanning quickly, assuming
 940             that a decent C compiler has translated the code. Note that the numeric
 941             values for |mid_line|, |skip_blanks|, and |new_line| are spaced
 942             apart from each other by |max_char_code+1|, so we can add a character's
 943             command code to the state to get a single number that characterizes both.
 944
 945             Remark [ls/hh]: checking performance indicated that this switch was the
 946             cause of many branch prediction errors but changing it to:
 947
 948                 c = istate + cur_cmd;
 949                 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
 950                     return true;
 951                 } else if (c >= new_line) {
 952                     switch (c) {
 953                     }
 954                 } else if (c >= skip_blanks) {
 955                     switch (c) {
 956                     }
 957                 } else if (c >= mid_line) {
 958                     switch (c) {
 959                     }
 960                 } else {
 961                     istate = mid_line;
 962                     return true;
 963                 }
 964
 965             gives as many prediction errors. So, we can indeed assume that the compiler
 966             does the right job, or that there is simply no other way.
 967         */
 968
 969         switch (istate + cur_cmd) {
 970             case mid_line + ignore_cmd:
 971             case skip_blanks + ignore_cmd:
 972             case new_line + ignore_cmd:
 973             case skip_blanks + spacer_cmd:
 974             case new_line + spacer_cmd:
 975                 /* Cases where character is ignored */
 976                 goto SWITCH;
 977                 break;
 978             case mid_line + escape_cmd:
 979             case new_line + escape_cmd:
 980             case skip_blanks + escape_cmd:
 981                 /* Scan a control sequence ...; */
 982                 istate = (unsigned char) scan_control_sequence();
 983                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
 984                     check_outer_validity();
 985                 break;
 986             case mid_line + active_char_cmd:
 987             case new_line + active_char_cmd:
 988             case skip_blanks + active_char_cmd:
 989                 /* Process an active-character  */
 990                 cur_cs = active_to_cs(cur_chr, false);
 991                 cur_cmd = eq_type(cur_cs);
 992                 cur_chr = equiv(cur_cs);
 993                 istate = mid_line;
 994                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
 995                     check_outer_validity();
 996                 break;
 997             case mid_line + sup_mark_cmd:
 998             case new_line + sup_mark_cmd:
 999             case skip_blanks + sup_mark_cmd:
1000                 /* If this |sup_mark| starts */
1001                 if (process_sup_mark())
1002                     goto RESWITCH;
1003                 else
1004                     istate = mid_line;
1005                 break;
1006             case mid_line + invalid_char_cmd:
1007             case new_line + invalid_char_cmd:
1008             case skip_blanks + invalid_char_cmd:
1009                 /* Decry the invalid character and |goto restart|; */
1010                 invalid_character_error();
1011                 return false; /* because state may be |token_list| now */
1012                 break;
1013             case mid_line + spacer_cmd:
1014                 /* Enter |skip_blanks| state, emit a space; */
1015                 istate = skip_blanks;
1016                 cur_chr = ' ';
1017                 break;
1018             case mid_line + car_ret_cmd:
1019                 /*
1020                     Finish line, emit a space. When a character of type |spacer| gets through, its
1021                     character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1022                     for tab and space, and for the space inserted at the end of a line, will be
1023                     treated alike when macro parameters are being matched. We do this since such
1024                     characters are indistinguishable on most computer terminal displays.
1025                  */
1026                 iloc = ilimit + 1;
1027                 cur_cmd = spacer_cmd;
1028                 cur_chr = ' ';
1029                 break;
1030             case skip_blanks + car_ret_cmd:
1031             case mid_line + comment_cmd:
1032             case new_line + comment_cmd:
1033             case skip_blanks + comment_cmd:
1034                 /* Finish line, |goto switch|; */
1035                 iloc = ilimit + 1;
1036                 goto SWITCH;
1037                 break;
1038             case new_line + car_ret_cmd:
1039                 /* Finish line, emit a \.{\\par}; */
1040                 iloc = ilimit + 1;
1041                 cur_cs = par_loc;
1042                 cur_cmd = eq_type(cur_cs);
1043                 cur_chr = equiv(cur_cs);
1044                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1045                     check_outer_validity();
1046                 break;
1047             case skip_blanks + left_brace_cmd:
1048             case new_line + left_brace_cmd:
1049                 istate = mid_line;
1050                 /* fall through */
1051             case mid_line + left_brace_cmd:
1052                 align_state++;
1053                 break;
1054             case skip_blanks + right_brace_cmd:
1055             case new_line + right_brace_cmd:
1056                 istate = mid_line;
1057                 /* fall through */
1058             case mid_line + right_brace_cmd:
1059                 align_state--;
1060                 break;
1061             case mid_line + math_shift_cmd:
1062             case mid_line + tab_mark_cmd:
1063             case mid_line + mac_param_cmd:
1064             case mid_line + sub_mark_cmd:
1065             case mid_line + letter_cmd:
1066             case mid_line + other_char_cmd:
1067                 break;
1068             /*
1069             case skip_blanks + math_shift:
1070             case skip_blanks + tab_mark:
1071             case skip_blanks + mac_param:
1072             case skip_blanks + sub_mark:
1073             case skip_blanks + letter:
1074             case skip_blanks + other_char:
1075             case new_line    + math_shift:
1076             case new_line    + tab_mark:
1077             case new_line    + mac_param:
1078             case new_line    + sub_mark:
1079             case new_line    + letter:
1080             case new_line    + other_char:
1081             */
1082             default:
1083                 istate = mid_line;
1084                 break;
1085         }
1086     } else {
1087         if (iname != 21)
1088             istate = new_line;
1089         /*
1090            Move to next line of file,
1091            or |goto restart| if there is no next line,
1092            or |return| if a \.{\\read} line has finished;
1093          */
1094         do {
1095             next_line_retval r = next_line();
1096             if (r == next_line_return) {
1097                 return true;
1098             } else if (r == next_line_restart) {
1099                 return false;
1100             }
1101         } while (0);
1102         check_interrupt();
1103         goto SWITCH;
1104     }
1105     return true;
1106 }
1107
1108 #else
1109
1110 /* 10 times less Bim in callgrind */
1111
1112 /*
1113     escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1114     tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1115     sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1116     other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1117 */
1118
1119 static boolean get_next_file(void)
1120 {
1121     int c = 0;
1122   SWITCH:
1123     if (iloc <= ilimit) {
1124         /* current line not yet finished */
1125         do_buffer_to_unichar(cur_chr, iloc);
1126       RESWITCH:
1127         if (detokenized_line()) {
1128             cur_cmd = (cur_chr == ' ' ? 10 : 12);
1129         } else {
1130             do_get_cat_code(cur_cmd, cur_chr);
1131         }
1132         /*
1133            Change state if necessary, and |goto switch| if the current
1134            character should be ignored, or |goto reswitch| if the current
1135            character changes to another;
1136         */
1137         c = istate + cur_cmd;
1138         if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1139             return true;
1140         } else if (c >= new_line) {
1141             switch (c-new_line) {
1142                 case escape_cmd:
1143                     istate = (unsigned char) scan_control_sequence();
1144                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1145                         check_outer_validity();
1146                     return true;
1147                 case left_brace_cmd:
1148                     istate = mid_line;
1149                     align_state++;
1150                     return true;
1151                 case right_brace_cmd:
1152                     istate = mid_line;
1153                     align_state--;
1154                     return true;
1155                 case math_shift_cmd:
1156                     istate = mid_line;
1157                     return true;
1158                 case tab_mark_cmd:
1159                     istate = mid_line;
1160                     return true;
1161                 case car_ret_cmd:
1162                     /* Finish line, emit a \.{\\par}; */
1163                     iloc = ilimit + 1;
1164                     cur_cs = par_loc;
1165                     cur_cmd = eq_type(cur_cs);
1166                     cur_chr = equiv(cur_cs);
1167                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1168                         check_outer_validity();
1169                     return true;
1170                 case mac_param_cmd:
1171                     istate = mid_line;
1172                     return true;
1173                 case sup_mark_cmd:
1174                     if (process_sup_mark())
1175                         goto RESWITCH;
1176                     else
1177                         istate = mid_line;
1178                     return true;
1179                 case sub_mark_cmd:
1180                     istate = mid_line;
1181                     return true;
1182                 case ignore_cmd:
1183                     goto SWITCH;
1184                     return true;
1185                 case spacer_cmd:
1186                     /* Cases where character is ignored */
1187                     goto SWITCH;
1188                 case letter_cmd:
1189                     istate = mid_line;
1190                     return true;
1191                 case other_char_cmd:
1192                     istate = mid_line;
1193                     return true;
1194                 case active_char_cmd:
1195                     cur_cs = active_to_cs(cur_chr, false);
1196                     cur_cmd = eq_type(cur_cs);
1197                     cur_chr = equiv(cur_cs);
1198                     istate = mid_line;
1199                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1200                         check_outer_validity();
1201                     return true;
1202                 case comment_cmd:
1203                     iloc = ilimit + 1;
1204                     goto SWITCH;
1205                 case invalid_char_cmd:
1206                     invalid_character_error();
1207                     return false; /* because state may be |token_list| now */
1208                 default:
1209                     istate = mid_line;
1210                     return true;
1211             }
1212         } else if (c >= skip_blanks) {
1213             switch (c-skip_blanks) {
1214                 case escape_cmd:
1215                     /* Scan a control sequence ...; */
1216                     istate = (unsigned char) scan_control_sequence();
1217                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1218                         check_outer_validity();
1219                     return true;
1220                 case left_brace_cmd:
1221                     istate = mid_line;
1222                     align_state++;
1223                     return true;
1224                 case right_brace_cmd:
1225                     istate = mid_line;
1226                     align_state--;
1227                     return true;
1228                 case math_shift_cmd:
1229                     istate = mid_line;
1230                     return true;
1231                 case tab_mark_cmd:
1232                     istate = mid_line;
1233                     return true;
1234                 case car_ret_cmd:
1235                     iloc = ilimit + 1;
1236                     goto SWITCH;
1237                 case mac_param_cmd:
1238                     istate = mid_line;
1239                     return true;
1240                 case sup_mark_cmd:
1241                     /* If this |sup_mark| starts */
1242                     if (process_sup_mark())
1243                         goto RESWITCH;
1244                     else
1245                         istate = mid_line;
1246                     return true;
1247                 case sub_mark_cmd:
1248                     istate = mid_line;
1249                     return true;
1250                 case ignore_cmd:
1251                     goto SWITCH;
1252                 case spacer_cmd:
1253                     goto SWITCH;
1254                 case letter_cmd:
1255                     istate = mid_line;
1256                     return true;
1257                 case other_char_cmd:
1258                     istate = mid_line;
1259                     return true;
1260                 case active_char_cmd:
1261                     cur_cs = active_to_cs(cur_chr, false);
1262                     cur_cmd = eq_type(cur_cs);
1263                     cur_chr = equiv(cur_cs);
1264                     istate = mid_line;
1265                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1266                         check_outer_validity();
1267                     return true;
1268                 case comment_cmd:
1269                     /* Finish line, |goto switch|; */
1270                     iloc = ilimit + 1;
1271                     goto SWITCH;
1272                 case invalid_char_cmd:
1273                     /* Decry the invalid character and |goto restart|; */
1274                     invalid_character_error();
1275                     return false; /* because state may be |token_list| now */
1276                 default:
1277                     istate = mid_line;
1278                     return true;
1279             }
1280         } else if (c >= mid_line) {
1281             switch (c-mid_line) {
1282                 case escape_cmd:
1283                     istate = (unsigned char) scan_control_sequence();
1284                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1285                         check_outer_validity();
1286                     return true;
1287                 case left_brace_cmd:
1288                     align_state++;
1289                     return true;
1290                 case right_brace_cmd:
1291                     align_state--;
1292                     return true;
1293                 case math_shift_cmd:
1294                     return true;
1295                 case tab_mark_cmd:
1296                     return true;
1297                 case car_ret_cmd:
1298                     /*
1299                         Finish line, emit a space. When a character of type |spacer| gets through, its
1300                         character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1301                         for tab and space, and for the space inserted at the end of a line, will be
1302                         treated alike when macro parameters are being matched. We do this since such
1303                         characters are indistinguishable on most computer terminal displays.
1304                      */
1305                     iloc = ilimit + 1;
1306                     cur_cmd = spacer_cmd;
1307                     cur_chr = ' ';
1308                     return true;
1309                 case mac_param_cmd:
1310                     return true;
1311                 case sup_mark_cmd:
1312                     if (process_sup_mark())
1313                         goto RESWITCH;
1314                     else
1315                         istate = mid_line;
1316                     return true;
1317                 case sub_mark_cmd:
1318                     return true;
1319                 case ignore_cmd:
1320                     goto SWITCH;
1321                 case spacer_cmd:
1322                     /* Enter |skip_blanks| state, emit a space; */
1323                     istate = skip_blanks;
1324                     cur_chr = ' ';
1325                     return true;
1326                 case letter_cmd:
1327                     istate = mid_line;
1328                     return true;
1329                 case other_char_cmd:
1330                     istate = mid_line;
1331                     return true;
1332                 case active_char_cmd:
1333                     cur_cs = active_to_cs(cur_chr, false);
1334                     cur_cmd = eq_type(cur_cs);
1335                     cur_chr = equiv(cur_cs);
1336                     istate = mid_line;
1337                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1338                         check_outer_validity();
1339                     return true;
1340                 case comment_cmd:
1341                     iloc = ilimit + 1;
1342                     goto SWITCH;
1343                 case invalid_char_cmd:
1344                     invalid_character_error();
1345                     return false; /* because state may be |token_list| now */
1346                 default:
1347                     istate = mid_line;
1348                     return true;
1349             }
1350         } else {
1351             istate = mid_line;
1352             return true;
1353         }
1354     } else {
1355         if (iname != 21) {
1356             istate = new_line;
1357         }
1358         /*
1359            Move to next line of file, or |goto restart| if there is no next line,
1360            or |return| if a \.{\\read} line has finished;
1361         */
1362         do {
1363             next_line_retval r = next_line();
1364             if (r == next_line_return) {
1365                 return true;
1366             } else if (r == next_line_restart) {
1367                 return false;
1368             }
1369         } while (0);
1370         check_interrupt();
1371         goto SWITCH;
1372     }
1373     return true;
1374 }
1375
1376 #endif
1377
1378 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1379 We only support a limited set:
1380
1381 ^^^^^^XXXXXX
1382 ^^^^XXXXXX
1383 ^^XX ^^<char>
1384
1385 @c
1386
1387 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1388
1389 #define add_nybble(c) \
1390     if (c<='9') { \
1391         cur_chr=(cur_chr<<4)+c-'0'; \
1392     } else { \
1393         cur_chr=(cur_chr<<4)+c-'a'+10; \
1394     }
1395
1396 #define set_nybble(c) \
1397     if (c<='9') { \
1398         cur_chr=c-'0'; \
1399     } else { \
1400         cur_chr=c-'a'+10; \
1401     }
1402
1403 #define one_hex_to_cur_chr(c1) \
1404     set_nybble(c1);
1405
1406 #define two_hex_to_cur_chr(c1,c2) \
1407     set_nybble(c1); \
1408     add_nybble(c2);
1409
1410 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1411     two_hex_to_cur_chr(c1,c2); \
1412     add_nybble(c3); \
1413     add_nybble(c4);
1414
1415 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1416     four_hex_to_cur_chr(c1,c2,c3,c4); \
1417     add_nybble(c5); \
1418     add_nybble(c6);
1419
1420 static boolean process_sup_mark(void)
1421 {
1422     if (cur_chr == buffer[iloc]) {
1423         if (iloc < ilimit) {
1424             if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1425                 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1426                     /* ^^^^^^XXXXXX */
1427                     if ((iloc + 10) <= ilimit) {
1428                         int c1 = buffer[iloc +  5];
1429                         int c2 = buffer[iloc +  6];
1430                         int c3 = buffer[iloc +  7];
1431                         int c4 = buffer[iloc +  8];
1432                         int c5 = buffer[iloc +  9];
1433                         int c6 = buffer[iloc + 10];
1434                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1435                             is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1436                             iloc = iloc + 11;
1437                             six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1438                             return true;
1439                         } else {
1440                             tex_error("^^^^^^ needs six hex digits", NULL);
1441                         }
1442                     } else {
1443                         tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1444                     }
1445                 } else {
1446                     /* ^^^^XXXX */
1447                     if ((iloc + 6) <= ilimit) {
1448                         int c1 = buffer[iloc + 3];
1449                         int c2 = buffer[iloc + 4];
1450                         int c3 = buffer[iloc + 5];
1451                         int c4 = buffer[iloc + 6];
1452                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1453                             iloc = iloc + 7;
1454                             four_hex_to_cur_chr(c1,c2,c3,c4);
1455                             return true;
1456                         } else {
1457                             tex_error("^^^^ needs four hex digits", NULL);
1458                         }
1459                     } else {
1460                         tex_error("^^^^ needs four hex digits, end of input", NULL);
1461                     }
1462                 }
1463             } else {
1464                 /* ^^XX */
1465                 if ((iloc + 2) <= ilimit) {
1466                     int c1 = buffer[iloc + 1];
1467                     int c2 = buffer[iloc + 2];
1468                     if (is_hex(c1) && is_hex(c2)) {
1469                         iloc = iloc + 3;
1470                         two_hex_to_cur_chr(c1,c2);
1471                         return true;
1472                     }
1473                 }
1474                 /* go on, no error, good old tex */
1475             }
1476         }
1477         /* the rest */
1478         {
1479             int c1 = buffer[iloc + 1];
1480             if (c1 < 0200) {
1481                 iloc = iloc + 2;
1482                 if (is_hex(c1) && (iloc <= ilimit)) {
1483                     int c2 = buffer[iloc];
1484                     if (is_hex(c2)) {
1485                         incr(iloc);
1486                         two_hex_to_cur_chr(c1,c2);
1487                         return true;
1488                     }
1489                 }
1490                 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1491                 return true;
1492             }
1493         }
1494     }
1495     return false;
1496 }
1497
1498 @ Control sequence names are scanned only when they appear in some line of a
1499 file; once they have been scanned the first time, their |eqtb| location serves as
1500 a unique identification, so \TeX\ doesn't need to refer to the original name any
1501 more except when it prints the equivalent in symbolic form.
1502
1503 The program that scans a control sequence has been written carefully in order to
1504 avoid the blowups that might otherwise occur if a malicious user tried something
1505 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1506 it never looks at |buffer[ilimit+2]|.
1507
1508 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1509 following a control sequence name, they are converted to single characters in the
1510 buffer and the process is repeated, slowly but surely.
1511
1512 @c
1513 static boolean check_expanded_code(int *kk);    /* below */
1514
1515 static int scan_control_sequence(void)
1516 {
1517     int retval = mid_line;
1518     if (iloc > ilimit) {
1519         cur_cs = null_cs;       /* |state| is irrelevant in this case */
1520     } else {
1521         register int cat;       /* |cat_code(cur_chr)|, usually */
1522         while (1) {
1523             int k = iloc;
1524             do_buffer_to_unichar(cur_chr, k);
1525             do_get_cat_code(cat, cur_chr);
1526             if (cat != letter_cmd || k > ilimit) {
1527                 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1528                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1529                     continue;
1530             } else {
1531                 retval = skip_blanks;
1532                 do {
1533                     do_buffer_to_unichar(cur_chr, k);
1534                     do_get_cat_code(cat, cur_chr);
1535                 } while (cat == letter_cmd && k <= ilimit);
1536
1537                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1538                     continue;
1539                 if (cat != letter_cmd) {
1540                     /* backtrack one character which can be utf */
1541                     /*
1542                     decr(k);
1543                     if (cur_chr > 0xFFFF)
1544                         decr(k);
1545                     if (cur_chr > 0x7FF)
1546                         decr(k);
1547                     if (cur_chr > 0x7F)
1548                         decr(k);
1549                     */
1550                     if (cur_chr <= 0x7F) {
1551                         k -= 1; /* in most cases */
1552                     } else if (cur_chr > 0xFFFF) {
1553                         k -= 4;
1554                     } else if (cur_chr > 0x7FF) {
1555                         k -= 3;
1556                     } else /* if (cur_chr > 0x7F) */ {
1557                         k -= 2;
1558                     }
1559                     /* now |k| points to first nonletter */
1560                 }
1561             }
1562             cur_cs = id_lookup(iloc, k - iloc);
1563             iloc = k;
1564             break;
1565         }
1566     }
1567     cur_cmd = eq_type(cur_cs);
1568     cur_chr = equiv(cur_cs);
1569     return retval;
1570 }
1571
1572 @ Whenever we reach the following piece of code, we will have
1573 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1574 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1575 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1576 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1577 left two or three places.
1578
1579 @c
1580 static boolean check_expanded_code(int *kk)
1581 {
1582     int l;
1583     int k = *kk;
1584     int d = 1;
1585     if (buffer[k] == cur_chr && k < ilimit) {
1586         if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1587             if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1588                 if ((k + 10) <= ilimit) {
1589                     int c1 = buffer[k + 6 - 1];
1590                     int c2 = buffer[k + 6];
1591                     int c3 = buffer[k + 6 + 1];
1592                     int c4 = buffer[k + 6 + 2];
1593                     int c5 = buffer[k + 6 + 3];
1594                     int c6 = buffer[k + 6 + 4];
1595                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1596                         d = 6;
1597                         six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1598                     } else {
1599                         tex_error("^^^^^^ needs six hex digits", NULL);
1600                     }
1601                 } else {
1602                     tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1603                 }
1604             } else {
1605                 if ((k + 6) <= ilimit) {
1606                     int c1 = buffer[k + 4 - 1];
1607                     int c2 = buffer[k + 4];
1608                     int c3 = buffer[k + 4 + 1];
1609                     int c4 = buffer[k + 4 + 2];
1610                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1611                         d = 4;
1612                         four_hex_to_cur_chr(c1,c2,c3,c4);
1613                     } else {
1614                         tex_error("^^^^ needs four hex digits", NULL);
1615                     }
1616                 } else {
1617                     tex_error("^^^^ needs four hex digits, end of input", NULL);
1618                 }
1619             }
1620         } else {
1621             int c1 = buffer[k + 1];
1622             if (c1 < 0200) {
1623                 d = 1;
1624                 if (is_hex(c1) && (k + 2) <= ilimit) {
1625                     int c2 = buffer[k + 2];
1626                     if (is_hex(c2)) {
1627                         d = 2;
1628                         two_hex_to_cur_chr(c1,c2);
1629                     } else {
1630                         cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1631                     }
1632                 } else {
1633                     cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1634                 }
1635             }
1636         }
1637         if (d > 2)
1638             d = 2 * d - 1;
1639         else
1640             d++;
1641         if (cur_chr <= 0x7F) {
1642             buffer[k - 1] = (packed_ASCII_code) cur_chr;
1643         } else if (cur_chr <= 0x7FF) {
1644             buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1645             k++;
1646             d--;
1647             buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1648         } else if (cur_chr <= 0xFFFF) {
1649             buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1650             k++;
1651             d--;
1652             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1653             k++;
1654             d--;
1655             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1656         } else {
1657             buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1658             k++;
1659             d--;
1660             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1661             k++;
1662             d--;
1663             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1664             k++;
1665             d--;
1666             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1667         }
1668         l = k;
1669         ilimit = ilimit - d;
1670         while (l <= ilimit) {
1671             buffer[l] = buffer[l + d];
1672             l++;
1673         }
1674         *kk = k;
1675         return true;
1676     }
1677     return false;
1678 }
1679
1680 @ All of the easy branches of |get_next| have now been taken care of. There is
1681 one more branch.
1682
1683 @c static next_line_retval next_line(void)
1684 {
1685     boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1686     if (iname > 17) {
1687         /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1688         incr(line);
1689         first = istart;
1690         if (!force_eof) {
1691             if (iname <= 20) {
1692                 if (pseudo_input()) {   /* not end of file */
1693                     firm_up_the_line(); /* this sets |ilimit| */
1694                     line_catcode_table = DEFAULT_CAT_TABLE;
1695                     if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1696                         inhibit_eol = true;
1697                 } else if ((every_eof != null) && !eof_seen[iindex]) {
1698                     ilimit = first - 1;
1699                     eof_seen[iindex] = true; /* fake one empty line */
1700                     if (iname != 19)
1701                         begin_token_list(every_eof, every_eof_text);
1702                     return next_line_restart;
1703                 } else {
1704                     force_eof = true;
1705                 }
1706             } else {
1707                 if (iname == 21) {
1708                     if (luacstring_input()) { /* not end of strings  */
1709                         firm_up_the_line();
1710                         line_catcode_table = (short) luacstring_cattable();
1711                         line_partial = (signed char) luacstring_partial();
1712                         if (luacstring_final_line() || line_partial
1713                             || line_catcode_table == NO_CAT_TABLE)
1714                             inhibit_eol = true;
1715                         if (!line_partial)
1716                             istate = new_line;
1717                     } else {
1718                         force_eof = true;
1719                     }
1720                 } else {
1721                     if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1722                         firm_up_the_line(); /* this sets |ilimit| */
1723                         line_catcode_table = DEFAULT_CAT_TABLE;
1724                     } else if ((every_eof != null) && (!eof_seen[iindex])) {
1725                         ilimit = first - 1;
1726                         eof_seen[iindex] = true; /* fake one empty line */
1727                         begin_token_list(every_eof, every_eof_text);
1728                         return next_line_restart;
1729                     } else {
1730                         force_eof = true;
1731                     }
1732                 }
1733             }
1734         }
1735         if (force_eof) {
1736             if (tracing_nesting > 0)
1737                 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1738                     if (!((iname == 19) || (iname == 21))) {
1739                         /* give warning for some unfinished groups and/or conditionals */
1740                         file_warning();
1741                     }
1742             if ((iname > 21) || (iname == 20)) {
1743                 report_stop_file(filetype_tex);
1744                 decr(open_parens);
1745             }
1746             force_eof = false;
1747             /* lua input or \.{\\scantextokens} */
1748             if (iname == 21 || iname == 19) {
1749                 end_file_reading();
1750             } else {
1751                 end_file_reading();
1752                 if (! suppress_outer_error)
1753                     check_outer_validity();
1754             }
1755             return next_line_restart;
1756         }
1757         if (inhibit_eol || end_line_char_inactive)
1758             ilimit--;
1759         else
1760             buffer[ilimit] = (packed_ASCII_code) end_line_char;
1761         first = ilimit + 1;
1762         iloc = istart; /* ready to read */
1763     } else {
1764         if (!terminal_input) {
1765             /* \.{\\read} line has ended */
1766             cur_cmd = 0;
1767             cur_chr = 0;
1768             return next_line_return;    /* OUTER */
1769         }
1770         if (input_ptr > 0) {
1771             /* text was inserted during error recovery */
1772             end_file_reading();
1773             return next_line_restart; /* resume previous level */
1774         }
1775         if (selector < log_only)
1776             open_log_file();
1777         if (interaction > nonstop_mode) {
1778             if (end_line_char_inactive)
1779                 ilimit++;
1780             if (ilimit == istart) {
1781                 /* previous line was empty */
1782                 tprint_nl("(Please type a command or say `\\end')");
1783             }
1784             print_ln();
1785             first = istart;
1786             prompt_input("*"); /* input on-line into |buffer| */
1787             ilimit = last;
1788             if (end_line_char_inactive)
1789                 ilimit--;
1790             else
1791                 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1792             first = ilimit + 1;
1793             iloc = istart;
1794         } else {
1795             /*
1796                 Nonstop mode, which is intended for overnight batch processing,
1797                 never waits for on-line input.
1798             */
1799             fatal_error("*** (job aborted, no legal \\end found)");
1800         }
1801     }
1802     return next_line_ok;
1803 }
1804
1805 @ Let's consider now what happens when |get_next| is looking at a token list.
1806
1807 @c
1808 static boolean get_next_tokenlist(void)
1809 {
1810     register halfword t = token_info(iloc);
1811     iloc = token_link(iloc); /* move to next */
1812     if (t >= cs_token_flag) {
1813         /* a control sequence token */
1814         cur_cs = t - cs_token_flag;
1815         cur_cmd = eq_type(cur_cs);
1816         if (cur_cmd >= outer_call_cmd) {
1817             if (cur_cmd == dont_expand_cmd) {
1818                 /*
1819                     Get the next token, suppressing expansion. The present point in the program
1820                     is reached only when the |expand| routine has inserted a special marker into
1821                     the input. In this special case, |token_info(iloc)| is known to be a control
1822                     sequence token, and |token_link(iloc)=null|.
1823                 */
1824                 cur_cs = token_info(iloc) - cs_token_flag;
1825                 iloc = null;
1826                 cur_cmd = eq_type(cur_cs);
1827                 if (cur_cmd > max_command_cmd) {
1828                     cur_cmd = relax_cmd;
1829                     cur_chr = no_expand_flag;
1830                     return true;
1831                 }
1832             } else if (! suppress_outer_error) {
1833                 check_outer_validity();
1834             }
1835         }
1836         cur_chr = equiv(cur_cs);
1837     } else {
1838         cur_cmd = token_cmd(t);
1839         cur_chr = token_chr(t);
1840         switch (cur_cmd) {
1841             case left_brace_cmd:
1842                 align_state++;
1843                 break;
1844             case right_brace_cmd:
1845                 align_state--;
1846                 break;
1847             case out_param_cmd:
1848                 /* Insert macro parameter and |goto restart|; */
1849                 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1850                 return false;
1851                 break;
1852         }
1853     }
1854     return true;
1855 }
1856
1857 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1858 routine are executed more often than any other instructions of \TeX.
1859 @^mastication@>@^inner loop@>
1860
1861 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1862
1863 @c
1864 void get_next(void)
1865 {
1866   RESTART:
1867     cur_cs = 0;
1868     if (istate != token_list) {
1869         /* Input from external file, |goto restart| if no input found */
1870         if (!get_next_file())
1871             goto RESTART;
1872     } else {
1873         if (iloc == null) {
1874             end_token_list();
1875             goto RESTART;       /* list exhausted, resume previous level */
1876         } else if (!get_next_tokenlist()) {
1877             goto RESTART;       /* parameter needs to be expanded */
1878         }
1879     }
1880     /* If an alignment entry has just ended, take appropriate action */
1881     if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1882         insert_vj_template();
1883         goto RESTART;
1884     }
1885 }
1886
1887 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1888 three related procedures that do a little more:
1889
1890 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1891 |cur_tok|, a packed halfword version of the current token.
1892
1893 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1894 but if the current token turns out to be a user-defined control sequence (i.e., a
1895 macro call), or a conditional, or something like \.{\\topmark} or
1896 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1897 the expansion of the macro or the evaluation of the conditional.
1898
1899 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1900 has already been called.
1901
1902 \yskip\noindent In fact, these three procedures account for almost every use of
1903 |get_next|.
1904
1905 No new control sequences will be defined except during a call of |get_token|, or
1906 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1907 always |true| at other times.
1908
1909 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1910
1911 @c
1912 void get_token(void)
1913 {
1914     no_new_control_sequence = false;
1915     get_next();
1916     no_new_control_sequence = true;
1917     if (cur_cs == 0)
1918         cur_tok = token_val(cur_cmd, cur_chr);
1919     else
1920         cur_tok = cs_token_flag + cur_cs;
1921 }
1922
1923 @ changes the string |s| to a token list
1924
1925 @c
1926 halfword string_to_toks(const char *ss)
1927 {
1928     halfword p; /* tail of the token list */
1929     halfword q; /* new node being added to the token list via |store_new_token| */
1930     halfword t; /* token being appended */
1931     const char *s = ss;
1932     const char *se = ss + strlen(s);
1933     p = temp_token_head;
1934     set_token_link(p, null);
1935     while (s < se) {
1936         t = (halfword) str2uni((const unsigned char *) s);
1937         s += utf8_size(t);
1938         if (t == ' ')
1939             t = space_token;
1940         else
1941             t = other_token + t;
1942         fast_store_new_token(t);
1943     }
1944     return token_link(temp_token_head);
1945 }
1946
1947 @ The token lists for macros and for other things like \.{\\mark} and
1948 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1949
1950 Before we get into the details of |scan_toks|, let's consider a much simpler
1951 task, that of converting the current string into a token list. The |str_toks|
1952 function does this; it classifies spaces as type |spacer| and everything else as
1953 type |other_char|.
1954
1955 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1956 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1957
1958 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1959 |lua| considers special while scanning a literal string
1960
1961 @ changes the string |str_pool[b..pool_ptr]| to a token list
1962
1963 @c
1964 halfword lua_str_toks(lstring b)
1965 {
1966     halfword p;       /* tail of the token list */
1967     halfword q;       /* new node being added to the token list via |store_new_token| */
1968     halfword t;       /* token being appended */
1969     unsigned char *k; /* index into string */
1970     p = temp_token_head;
1971     set_token_link(p, null);
1972     k = (unsigned char *) b.s;
1973     while (k < (unsigned char *) b.s + b.l) {
1974         t = pool_to_unichar(k);
1975         k += utf8_size(t);
1976         if (t == ' ') {
1977             t = space_token;
1978         } else {
1979             if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1980                 fast_store_new_token(other_token + '\\');
1981             if (t == 10)
1982                 t = 'n';
1983             if (t == 13)
1984                 t = 'r';
1985             t = other_token + t;
1986         }
1987         fast_store_new_token(t);
1988     }
1989     return p;
1990 }
1991
1992 @ Incidentally, the main reason for wanting |str_toks| is the function
1993 |the_toks|, which has similar input/output characteristics.
1994
1995 @ changes the string |str_pool[b..pool_ptr]| to a token list
1996
1997 @c
1998 halfword str_toks(lstring s)
1999 {
2000     halfword p;           /* tail of the token list */
2001     halfword q;           /* new node being added to the token list via |store_new_token| */
2002     halfword t;           /* token being appended */
2003     unsigned char *k, *l; /* index into string */
2004     p = temp_token_head;
2005     set_token_link(p, null);
2006     k = s.s;
2007     l = k + s.l;
2008     while (k < l) {
2009         t = pool_to_unichar(k);
2010         k += utf8_size(t);
2011         if (t == ' ')
2012             t = space_token;
2013         else
2014             t = other_token + t;
2015         fast_store_new_token(t);
2016     }
2017     return p;
2018 }
2019
2020 /*
2021     hh: most of the converter is similar to the one i made for macro so at some point i
2022     can make a helper; also todo: there is no need to go through the pool
2023
2024 */
2025
2026 halfword str_scan_toks(int ct, lstring s)
2027 {                         /* changes the string |str_pool[b..pool_ptr]| to a token list */
2028     halfword p;           /* tail of the token list */
2029     halfword q;           /* new node being added to the token list via |store_new_token| */
2030     halfword t;           /* token being appended */
2031     unsigned char *k, *l; /* index into string */
2032     int cc;
2033     p = temp_token_head;
2034     set_token_link(p, null);
2035     k = s.s;
2036     l = k + s.l;
2037     while (k < l) {
2038         t = pool_to_unichar(k);
2039         k += utf8_size(t);
2040         cc = get_cat_code(ct,t);
2041             if (cc == 0) {
2042                 /* we have a potential control sequence so we check for it */
2043                 int _lname = 0 ;
2044                 int _s = 0 ;
2045                 int _c = 0 ;
2046                 halfword _cs = null ;
2047                 unsigned char *_name  = k ;
2048                 while (k < l) {
2049                     t = (halfword) str2uni((const unsigned char *) k);
2050                     _s = utf8_size(t);
2051                     _c = get_cat_code(ct,t);
2052                     if (_c == 11) {
2053                         k += _s ;
2054                         _lname = _lname + _s ;
2055                     } else if (_c == 10) {
2056                         /* we ignore a trailing space like normal scanning does */
2057                         k += _s ;
2058                         break ;
2059                     } else {
2060                         break ;
2061                     }
2062                 }
2063                 if (_s > 0) {
2064                     /* we have a potential \cs */
2065                     _cs = string_lookup((const char *) _name, _lname);
2066                     if (_cs == undefined_control_sequence) {
2067                         /* let's play safe and backtrack */
2068                         t = cc * (1<<21) + t ;
2069                         k = _name ;
2070                     } else {
2071                         t = cs_token_flag + _cs;
2072                     }
2073                 } else {
2074                     /* just a character with some meaning, so \unknown becomes effectively */
2075                     /* \\unknown assuming that \\ has some useful meaning of course        */
2076                     t = cc * (1<<21) + t ;
2077                     k = _name ;
2078                 }
2079
2080             } else {
2081                 /* whatever token, so for instance $x^2$ just works given a tex */
2082                 /* catcode regime */
2083                 t = cc * (1<<21) + t ;
2084             }
2085             fast_store_new_token(t);
2086
2087     }
2088     return p;
2089 }
2090
2091 @ Here's part of the |expand| subroutine that we are now ready to complete:
2092
2093 @c
2094 void ins_the_toks(void)
2095 {
2096     (void) the_toks();
2097     ins_list(token_link(temp_token_head));
2098 }
2099
2100 #define set_toks_register(n,t,g) { \
2101     int a = (g>0) ? 4 : 0; \
2102     halfword ref = get_avail();  \
2103     set_token_ref_count(ref, 0); \
2104     set_token_link(ref, token_link(t)); \
2105     define(n + toks_base, call_cmd, ref); \
2106 }
2107
2108 void combine_the_toks(int how)
2109 {
2110     halfword nt;
2111     get_x_token();
2112     /* target */
2113     if (cur_cmd == assign_toks_cmd) {
2114         nt = equiv(cur_cs) - toks_base;
2115         /* check range */
2116     } else {
2117         back_input();
2118         scan_int();
2119         nt = cur_val;
2120     }
2121     /* source */
2122     do {
2123         get_x_token();
2124     } while (cur_cmd == spacer_cmd);
2125     if (cur_cmd == left_brace_cmd) {
2126         halfword x, source;
2127         back_input();
2128         x = scan_toks(false,how > 1); /* expanded or not */
2129         source = def_ref;
2130         /* action */
2131         if (source != null) {
2132             halfword target = toks(nt);
2133             if (target == null) {
2134                 set_toks_register(nt,source,0);
2135             } else {
2136                 halfword s = token_link(source);
2137                 if (s != null) {
2138                     halfword t = token_link(target);
2139                     if (t == null) {
2140                         /* can this happen ? */
2141                         set_token_link(target, s);
2142                     } else if (odd(how)) {
2143                         /* prepend */
2144                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2145                             halfword p = temp_token_head;
2146                             halfword q;
2147                             set_token_link(p, s); /* s = head, x = tail */
2148                             p = x;
2149                             while (t != null) {
2150                                 fast_store_new_token(token_info(t));
2151                                 t = token_link(t);
2152                             }
2153                             set_toks_register(nt,temp_token_head,0);
2154                         } else {
2155                             set_token_link(x,t);
2156                             set_token_link(target,s);
2157                         }
2158                     } else {
2159                         /* append */
2160                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2161                             halfword p = temp_token_head;
2162                             halfword q;
2163                             set_token_link(p, null);
2164                             while (t != null) {
2165                                 fast_store_new_token(token_info(t));
2166                                 t = token_link(t);
2167                             }
2168                             set_token_link(p,s);
2169                             set_toks_register(nt,temp_token_head,0);
2170                         } else {
2171                             while (token_link(t) != null) {
2172                                 t = token_link(t);
2173                             }
2174                             set_token_link(t,s);
2175                         }
2176                     }
2177                 }
2178             }
2179         }
2180     } else {
2181         halfword source, ns;
2182         if (cur_cmd == assign_toks_cmd) {
2183             ns = equiv(cur_cs) - toks_base;
2184             /* check range */
2185         } else {
2186             back_input();
2187             scan_int();
2188             ns = cur_val;
2189         }
2190         /* action */
2191         source = toks(ns);
2192         if (source != null) {
2193             halfword target = toks(nt);
2194             if (target == null) {
2195                 equiv(toks_base+nt) = source;
2196                 equiv(toks_base+ns) = null;
2197             } else {
2198                 halfword s = token_link(source);
2199                 if (s != null) {
2200                     halfword t = token_link(target);
2201                     if (t == null) {
2202                         set_token_link(target, s);
2203                     } else if (odd(how)) {
2204                         /* prepend */
2205                         halfword x = s;
2206                         while (token_link(x) != null) {
2207                             x = token_link(x);
2208                         }
2209                         set_token_link(x,t);
2210                         set_token_link(target,s);
2211                     } else {
2212                         /* append */
2213                         while (token_link(t) != null) {
2214                             t = token_link(t);
2215                         }
2216                         set_token_link(t,s);
2217                     }
2218                      equiv(toks_base+ns) = null;
2219                 }
2220             }
2221         }
2222     }
2223 }
2224
2225 @ This routine, used in the next one, prints the job name, possibly modified by
2226 the |process_jobname| callback.
2227
2228 @c
2229 static void print_job_name(void)
2230 {
2231    if (job_name) {
2232       char *s, *ss; /* C strings for jobname before and after processing */
2233       int callback_id, lua_retval;
2234       s = (char*)str_string(job_name);
2235       callback_id = callback_defined(process_jobname_callback);
2236       if (callback_id > 0) {
2237         lua_retval = run_callback(callback_id, "S->S", s, &ss);
2238         if ((lua_retval == true) && (ss != NULL))
2239             s = ss;
2240       }
2241       tprint(s);
2242    } else {
2243       print(job_name);
2244    }
2245 }
2246
2247 @ Here is a routine that print the result of a convert command, using the
2248 argument |i|. It returns |false | if it does not know to print the code |c|. The
2249 function exists because lua code and tex code can both call it to convert
2250 something.
2251
2252 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2253 get the next non-blank non-relax non-call token.
2254
2255 @c
2256
2257 int scan_lua_state(void)
2258 {
2259     int sn = 0;
2260     do {
2261         get_x_token();
2262     } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2263     back_input();
2264     if (cur_cmd != left_brace_cmd) {
2265         if (scan_keyword("name")) {
2266             (void) scan_toks(false, true);
2267             sn = def_ref;
2268         } else {
2269             scan_register_num();
2270             if (get_lua_name(cur_val))
2271                 sn = (cur_val - 65536);
2272         }
2273     }
2274     return sn;
2275 }
2276
2277 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2278 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2279 to follow `\.{\\string}' and `\.{\\meaning}'.
2280
2281 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2282 pending string in its output. In order to save such a pending string, we have to
2283 create a temporary string that is destroyed immediately after.
2284
2285 @c
2286 #define push_selector { \
2287     old_setting = selector; \
2288     selector = new_string; \
2289 }
2290
2291 #define pop_selector { \
2292     selector = old_setting; \
2293 }
2294
2295 static int do_variable_dvi(halfword c)
2296 {
2297     return 0;
2298 }
2299
2300 #define do_variable_backend_int(i) \
2301     cur_cmd = assign_int_cmd; \
2302     cur_val = backend_int_base + i; \
2303     cur_tok = token_val(cur_cmd, cur_val); \
2304     back_input();
2305
2306 #define do_variable_backend_dimen(i) \
2307     cur_cmd = assign_dimen_cmd; \
2308     cur_val = backend_dimen_base + i; \
2309     cur_tok = token_val(cur_cmd, cur_val); \
2310     back_input();
2311
2312 #define do_variable_backend_toks(i) \
2313     cur_cmd = assign_toks_cmd; \
2314     cur_val = backend_toks_base + i ; \
2315     cur_tok = token_val(cur_cmd, cur_val); \
2316     back_input();
2317
2318 static int do_variable_pdf(halfword c)
2319 {
2320          if (scan_keyword("compresslevel"))        { do_variable_backend_int(c_pdf_compress_level); }
2321     else if (scan_keyword("decimaldigits"))        { do_variable_backend_int(c_pdf_decimal_digits); }
2322     else if (scan_keyword("imageresolution"))      { do_variable_backend_int(c_pdf_image_resolution); }
2323     else if (scan_keyword("pkresolution"))         { do_variable_backend_int(c_pdf_pk_resolution); }
2324     else if (scan_keyword("uniqueresname"))        { do_variable_backend_int(c_pdf_unique_resname); }
2325     else if (scan_keyword("minorversion"))         { do_variable_backend_int(c_pdf_minor_version); }
2326     else if (scan_keyword("pagebox"))              { do_variable_backend_int(c_pdf_pagebox); }
2327     else if (scan_keyword("inclusionerrorlevel"))  { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2328     else if (scan_keyword("ignoreunknownimages"))  { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2329     else if (scan_keyword("gamma"))                { do_variable_backend_int(c_pdf_gamma); }
2330     else if (scan_keyword("imageapplygamma"))      { do_variable_backend_int(c_pdf_image_apply_gamma); }
2331     else if (scan_keyword("imagegamma"))           { do_variable_backend_int(c_pdf_image_gamma); }
2332     else if (scan_keyword("imagehicolor"))         { do_variable_backend_int(c_pdf_image_hicolor); }
2333     else if (scan_keyword("imageaddfilename"))     { do_variable_backend_int(c_pdf_image_addfilename); }
2334     else if (scan_keyword("objcompresslevel"))     { do_variable_backend_int(c_pdf_obj_compress_level); }
2335     else if (scan_keyword("inclusioncopyfonts"))   { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2336     else if (scan_keyword("gentounicode"))         { do_variable_backend_int(c_pdf_gen_tounicode); }
2337     else if (scan_keyword("pkfixeddpi"))           { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2338     else if (scan_keyword("suppressoptionalinfo")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2339
2340     else if (scan_keyword("horigin"))              { do_variable_backend_dimen(d_pdf_h_origin); }
2341     else if (scan_keyword("vorigin"))              { do_variable_backend_dimen(d_pdf_v_origin); }
2342     else if (scan_keyword("threadmargin"))         { do_variable_backend_dimen(d_pdf_thread_margin); }
2343     else if (scan_keyword("destmargin"))           { do_variable_backend_dimen(d_pdf_dest_margin); }
2344     else if (scan_keyword("linkmargin"))           { do_variable_backend_dimen(d_pdf_link_margin); }
2345     else if (scan_keyword("xformmargin"))          { do_variable_backend_dimen(d_pdf_xform_margin); }
2346
2347     else if (scan_keyword("pageattr"))             { do_variable_backend_toks(t_pdf_page_attr); }
2348     else if (scan_keyword("pageresources"))        { do_variable_backend_toks(t_pdf_page_resources); }
2349     else if (scan_keyword("pagesattr"))            { do_variable_backend_toks(t_pdf_pages_attr); }
2350     else if (scan_keyword("xformattr"))            { do_variable_backend_toks(t_pdf_xform_attr); }
2351     else if (scan_keyword("xformresources"))       { do_variable_backend_toks(t_pdf_xform_resources); }
2352     else if (scan_keyword("pkmode"))               { do_variable_backend_toks(t_pdf_pk_mode); }
2353     else if (scan_keyword("trailerid"))            { do_variable_backend_toks(t_pdf_trailer_id); }
2354
2355     else
2356         return 0;
2357     return 1;
2358 }
2359
2360 static int do_feedback_dvi(halfword c)
2361 {
2362     return 0;
2363 }
2364
2365 /* codes not really needed but cleaner when testing */
2366
2367 #define pdftex_version  40  /* these values will not change any more */
2368 #define pdftex_revision "0" /* these values will not change any more */
2369
2370 static int do_feedback_pdf(halfword c)
2371 {
2372     int old_setting;            /* holds |selector| setting */
2373     int save_scanner_status;    /* |scanner_status| upon entry */
2374     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2375     halfword save_warning_index;
2376     boolean bool;               /* temp boolean */
2377     str_number s;               /* first temp string */
2378     int ff;                     /* for use with |set_ff| */
2379     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2380     char *str;                  /* color stack init str */
2381
2382     if (scan_keyword("lastlink")) {
2383         push_selector;
2384         print_int(pdf_last_link);
2385         pop_selector;
2386     } else if (scan_keyword("retval")) {
2387         push_selector;
2388         print_int(pdf_retval);
2389         pop_selector;
2390     } else if (scan_keyword("lastobj")) {
2391         push_selector;
2392         print_int(pdf_last_obj);
2393         pop_selector;
2394     } else if (scan_keyword("lastannot")) {
2395         push_selector;
2396         print_int(pdf_last_annot);
2397         pop_selector;
2398     } else if (scan_keyword("xformname")) {
2399         scan_int();
2400         check_obj_type(static_pdf, obj_type_xform, cur_val);
2401         push_selector;
2402         print_int(obj_info(static_pdf, cur_val));
2403         pop_selector;
2404     } else if (scan_keyword("creationdate")) {
2405         ins_list(string_to_toks(getcreationdate(static_pdf)));
2406         /* no further action */
2407         return 2;
2408     } else if (scan_keyword("fontname")) {
2409         scan_font_ident();
2410         if (cur_val == null_font)
2411             normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2412         pdf_check_vf(cur_val);
2413         if (!font_used(cur_val))
2414             pdf_init_font(static_pdf, cur_val);
2415         push_selector;
2416         set_ff(cur_val);
2417         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2418         pop_selector;
2419     } else if (scan_keyword("fontobjnum")) {
2420         scan_font_ident();
2421         if (cur_val == null_font)
2422             normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2423         pdf_check_vf(cur_val);
2424         if (!font_used(cur_val))
2425             pdf_init_font(static_pdf, cur_val);
2426         push_selector;
2427         set_ff(cur_val);
2428         print_int(pdf_font_num(ff));
2429         pop_selector;
2430     } else if (scan_keyword("fontsize")) {
2431         scan_font_ident();
2432         if (cur_val == null_font)
2433             normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2434         push_selector;
2435         print_scaled(font_size(cur_val));
2436         tprint("pt");
2437         pop_selector;
2438     } else if (scan_keyword("pageref")) {
2439         scan_int();
2440         if (cur_val <= 0)
2441             normal_error("pdf backend", "invalid page number when asking 'pageref'");
2442         push_selector;
2443         print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2444         pop_selector;
2445     } else if (scan_keyword("colorstackinit")) {
2446         bool = scan_keyword("page");
2447         if (scan_keyword("direct"))
2448             cur_val = direct_always;
2449         else if (scan_keyword("page"))
2450             cur_val = direct_page;
2451         else
2452             cur_val = set_origin;
2453         save_scanner_status = scanner_status;
2454         save_warning_index = warning_index;
2455         save_def_ref = def_ref;
2456         u = save_cur_string();
2457         scan_toks(false, true);
2458         s = tokens_to_string(def_ref);
2459         delete_token_ref(def_ref);
2460         def_ref = save_def_ref;
2461         warning_index = save_warning_index;
2462         scanner_status = save_scanner_status;
2463         str = makecstring(s);
2464         cur_val = newcolorstack(str, cur_val, bool);
2465         free(str);
2466         flush_str(s);
2467         cur_val_level = int_val_level;
2468         if (cur_val < 0) {
2469             print_err("Too many color stacks");
2470             help2("The number of color stacks is limited to 32768.",
2471                   "I'll use the default color stack 0 here.");
2472             error();
2473             cur_val = 0;
2474             restore_cur_string(u);
2475         }
2476         push_selector;
2477         print_int(cur_val);
2478         pop_selector;
2479     } else if (scan_keyword("version")) {
2480         push_selector;
2481         print_int(pdftex_version);
2482         pop_selector;
2483     } else if (scan_keyword("revision")) {
2484         ins_list(string_to_toks(pdftex_revision));
2485         return 2;
2486     } else {
2487         return 0;
2488     }
2489     return 1;
2490 }
2491
2492 void conv_toks(void)
2493 {
2494     int old_setting;            /* holds |selector| setting */
2495     halfword p, q;
2496     int save_scanner_status;    /* |scanner_status| upon entry */
2497     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2498     halfword save_warning_index;
2499     boolean bool;               /* temp boolean */
2500     str_number s;               /* first temp string */
2501     int sn;                     /* lua chunk name */
2502     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2503     int c = cur_chr;            /* desired type of conversion */
2504     str_number str;
2505     int i = 0;
2506     /* Scan the argument for command |c| */
2507     switch (c) {
2508         case number_code:
2509             scan_int();
2510             push_selector;
2511             print_int(cur_val);
2512             pop_selector;
2513             break;
2514         case lua_function_code:
2515             scan_int();
2516             if (cur_val <= 0) {
2517                 normal_error("luafunction", "invalid number");
2518             } else {
2519                 u = save_cur_string();
2520                 luacstrings = 0;
2521                 luafunctioncall(cur_val);
2522                 restore_cur_string(u);
2523                 if (luacstrings > 0)
2524                     lua_string_start();
2525             }
2526             /* no further action */
2527             return;
2528             break;
2529         case lua_code:
2530             u = save_cur_string();
2531             save_scanner_status = scanner_status;
2532             save_def_ref = def_ref;
2533             save_warning_index = warning_index;
2534             sn = scan_lua_state();
2535             scan_toks(false, true);
2536             s = def_ref;
2537             warning_index = save_warning_index;
2538             def_ref = save_def_ref;
2539             scanner_status = save_scanner_status;
2540             luacstrings = 0;
2541             luatokencall(s, sn);
2542             delete_token_ref(s);
2543             restore_cur_string(u);  /* TODO: check this, was different */
2544             if (luacstrings > 0)
2545                 lua_string_start();
2546             /* no further action */
2547             return;
2548             break;
2549         case expanded_code:
2550             save_scanner_status = scanner_status;
2551             save_warning_index = warning_index;
2552             save_def_ref = def_ref;
2553             u = save_cur_string();
2554             scan_toks(false, true);
2555             warning_index = save_warning_index;
2556             scanner_status = save_scanner_status;
2557             ins_list(token_link(def_ref));
2558             def_ref = save_def_ref;
2559             restore_cur_string(u);
2560             /* no further action */
2561             return;
2562             break;
2563         case math_style_code:
2564             push_selector;
2565             print_math_style();
2566             pop_selector;
2567             break;
2568         case string_code:
2569             save_scanner_status = scanner_status;
2570             scanner_status = normal;
2571             get_token();
2572             scanner_status = save_scanner_status;
2573             push_selector;
2574             if (cur_cs != 0)
2575                 sprint_cs(cur_cs);
2576             else
2577                 print(cur_chr);
2578             pop_selector;
2579             break;
2580         case cs_string_code:
2581             save_scanner_status = scanner_status;
2582             scanner_status = normal;
2583             get_token();
2584             scanner_status = save_scanner_status;
2585             push_selector;
2586             if (cur_cs != 0)
2587                 sprint_cs_name(cur_cs);
2588             else
2589                 print(cur_chr);
2590             pop_selector;
2591             break;
2592         case roman_numeral_code:
2593             scan_int();
2594             push_selector;
2595             print_roman_int(cur_val);
2596             pop_selector;
2597             break;
2598         case meaning_code:
2599             save_scanner_status = scanner_status;
2600             scanner_status = normal;
2601             get_token();
2602             scanner_status = save_scanner_status;
2603             push_selector;
2604             print_meaning();
2605             pop_selector;
2606             break;
2607         case uchar_code:
2608             scan_char_num();
2609             push_selector;
2610             print(cur_val);
2611             pop_selector;
2612             break;
2613         case lua_escape_string_code:
2614             {
2615                 lstring escstr;
2616                 int l = 0;
2617                 save_scanner_status = scanner_status;
2618                 save_def_ref = def_ref;
2619                 save_warning_index = warning_index;
2620                 scan_toks(false, true);
2621                 bool = in_lua_escape;
2622                 in_lua_escape = true;
2623                 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2624                 escstr.l = (unsigned) l;
2625                 in_lua_escape = bool;
2626                 delete_token_ref(def_ref);
2627                 def_ref = save_def_ref;
2628                 warning_index = save_warning_index;
2629                 scanner_status = save_scanner_status;
2630                 (void) lua_str_toks(escstr);
2631                 ins_list(token_link(temp_token_head));
2632                 free(escstr.s);
2633                 return;
2634             }
2635             /* no further action */
2636             break;
2637         case font_id_code:
2638             scan_font_ident();
2639             push_selector;
2640             print_int(cur_val);
2641             pop_selector;
2642             break;
2643         case font_name_code:
2644             scan_font_ident();
2645             push_selector;
2646             append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2647             if (font_size(cur_val) != font_dsize(cur_val)) {
2648                 tprint(" at ");
2649                 print_scaled(font_size(cur_val));
2650                 tprint("pt");
2651             }
2652             pop_selector;
2653             break;
2654         case left_margin_kern_code:
2655             scan_int();
2656             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657                 normal_error("marginkern", "a non-empty hbox expected");
2658             push_selector;
2659             p = list_ptr(box(cur_val));
2660             while ((p != null) && (type(p) == glue_node)) {
2661                 p = vlink(p);
2662             }
2663             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2664                 print_scaled(width(p));
2665             else
2666                 print_char('0');
2667             tprint("pt");
2668             pop_selector;
2669             break;
2670         case right_margin_kern_code:
2671             scan_int();
2672             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2673                 normal_error("marginkern", "a non-empty hbox expected");
2674             push_selector;
2675             p = list_ptr(box(cur_val));
2676             if (p != null) {
2677                 p = tail_of_list(p);
2678                 /*
2679                     there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2680                     node that points to glue spec ... and we don't want to analyze that messy lot
2681                 */
2682                 while ((p != null) && (type(p) == glue_node)) {
2683                     p = alink(p);
2684                 }
2685                 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2686                     if (type(p) == disc_node) {
2687                         q = alink(p);
2688                         if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2689                             p = q;
2690                         } else {
2691                             /*
2692                                 officially we should look in the replace but currently protrusion doesn't
2693                                 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2694                                 don't need it now
2695                             */
2696                         }
2697                     }
2698                 }
2699             }
2700             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2701                 print_scaled(width(p));
2702             else
2703                 print_char('0');
2704             tprint("pt");
2705             pop_selector;
2706             break;
2707         case uniform_deviate_code:
2708             scan_int();
2709             push_selector;
2710             print_int(unif_rand(cur_val));
2711             pop_selector;
2712             break;
2713         case normal_deviate_code:
2714             scan_int();
2715             push_selector;
2716             print_int(norm_rand());
2717             pop_selector;
2718             break;
2719         case math_char_class_code:
2720             {
2721                 mathcodeval mval;
2722                 scan_int();
2723                 mval = get_math_code(cur_val);
2724                 push_selector;
2725                 print_int(mval.class_value);
2726                 pop_selector;
2727             }
2728             break;
2729         case math_char_fam_code:
2730             {
2731                 mathcodeval mval;
2732                 scan_int();
2733                 mval = get_math_code(cur_val);
2734                 push_selector;
2735                 print_int(mval.family_value);
2736                 pop_selector;
2737             }
2738             break;
2739         case math_char_slot_code:
2740             {
2741                 mathcodeval mval;
2742                 scan_int();
2743                 mval = get_math_code(cur_val);
2744                 push_selector;
2745                 print_int(mval.character_value);
2746                 pop_selector;
2747             }
2748             break;
2749         case insert_ht_code:
2750             scan_register_num();
2751             push_selector;
2752             i = cur_val;
2753             p = page_ins_head;
2754             while (i >= subtype(vlink(p)))
2755                 p = vlink(p);
2756             if (subtype(p) == i)
2757                 print_scaled(height(p));
2758             else
2759                 print_char('0');
2760             tprint("pt");
2761             pop_selector;
2762             break;
2763         case job_name_code:
2764             if (job_name == 0)
2765                 open_log_file();
2766             push_selector;
2767             print_job_name();
2768             pop_selector;
2769             break;
2770         case format_name_code:
2771             if (job_name == 0)
2772                 open_log_file();
2773             push_selector;
2774             print(format_name);
2775             pop_selector;
2776             break;
2777         case luatex_banner_code:
2778             push_selector;
2779             tprint(luatex_banner);
2780             pop_selector;
2781             break;
2782         case luatex_revision_code:
2783             push_selector;
2784             print(get_luatexrevision());
2785             pop_selector;
2786             break;
2787         case luatex_date_code:
2788             push_selector;
2789             print_int(get_luatex_date_info());
2790             pop_selector;
2791             break;
2792         case etex_code:
2793             push_selector;
2794             tprint(eTeX_version_string);
2795             pop_selector;
2796             break;
2797         case eTeX_revision_code:
2798             push_selector;
2799             tprint(eTeX_revision);
2800             pop_selector;
2801             break;
2802         case font_identifier_code:
2803             confusion("convert");
2804             break;
2805         default:
2806             confusion("convert");
2807             break;
2808     }
2809     str = make_string();
2810     (void) str_toks(str_lstring(str));
2811     flush_str(str);
2812     ins_list(token_link(temp_token_head));
2813 }
2814
2815 void do_feedback(void)
2816 {
2817     int c = cur_chr;
2818     str_number str;
2819     int done = 1;
2820     switch (c) {
2821         case dvi_feedback_code:
2822             if (get_o_mode() == OMODE_DVI) {
2823                 done = do_feedback_dvi(c);
2824             } else {
2825                 tex_error("unexpected use of \\dvifeedback",null);
2826                 return ;
2827             }
2828             if (done==0) {
2829                 /* we recover */
2830                 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2831                 return;
2832             } else if (done==2) {
2833                 return;
2834             }
2835             break;
2836         case pdf_feedback_code:
2837             if (get_o_mode() == OMODE_PDF) {
2838                 done = do_feedback_pdf(c);
2839             } else {
2840                 tex_error("unexpected use of \\pdffeedback",null);
2841                 return ;
2842             }
2843             if (done==0) {
2844                 /* we recover */
2845                 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2846                 return;
2847             } else if (done==2) {
2848                 return;
2849             }
2850             break;
2851         default:
2852             confusion("feedback");
2853             break;
2854     }
2855     str = make_string();
2856     (void) str_toks(str_lstring(str));
2857     flush_str(str);
2858     ins_list(token_link(temp_token_head));
2859 }
2860
2861 void do_variable(void)
2862 {
2863     int c = cur_chr;
2864     int done = 1;
2865     switch (c) {
2866         case dvi_variable_code:
2867             done = do_variable_dvi(c);
2868             if (done==0) {
2869                 /* we recover */
2870                 normal_warning("dvi backend","unexpected use of \\dvivariable");
2871             }
2872             return;
2873             break;
2874         case pdf_variable_code:
2875             done = do_variable_pdf(c);
2876             if (done==0) {
2877                 /* we recover */
2878                 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2879             }
2880             return;
2881             break;
2882         default:
2883             confusion("variable");
2884             break;
2885     }
2886 }
2887
2888 /*
2889     The following code is not used as we can only set math options and not query them. If
2890     an option is really important we will provide a proper variable. Most options are not
2891     meant for users anyway but for development.
2892 */
2893
2894 /*
2895
2896 #define do_mathoption_int(i) \
2897     cur_cmd = assign_int_cmd; \
2898     cur_val = mathoption_int_base + i; \
2899     cur_tok = token_val(cur_cmd, cur_val); \
2900     back_input();
2901
2902 void do_mathoption(void)
2903 {
2904          if (scan_keyword("old"))                    { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2905          if (scan_keyword("noitaliccompensation"))   { do_mathoption_int(c_mathoption_no_char_italic_code); }
2906     else if (scan_keyword("nocharitalic"))           { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2907     else if (scan_keyword("useoldfractionscaling"))  { do_mathoption_int(c_mathoption_old_code); }
2908     else if (scan_keyword("umathcodemeaning"))       { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2909 }
2910
2911 */
2912
2913 @ This boolean is keeping track of the lua string escape state
2914 @c
2915 boolean in_lua_escape;
2916
2917 static int the_convert_string_dvi(halfword c, int i)
2918 {
2919     return 0 ;
2920 }
2921
2922 static int the_convert_string_pdf(halfword c, int i)
2923 {
2924     int ff;
2925     if (get_o_mode() != OMODE_PDF) {
2926         return 0;
2927     } else if (scan_keyword("lastlink")) {
2928         print_int(pdf_last_link);
2929     } else if (scan_keyword("retval")) {
2930         print_int(pdf_retval);
2931     } else if (scan_keyword("lastobj")) {
2932         print_int(pdf_last_obj);
2933     } else if (scan_keyword("lastannot")) {
2934         print_int(pdf_last_annot);
2935     } else if (scan_keyword("xformname")) {
2936         print_int(obj_info(static_pdf, i));
2937     } else if (scan_keyword("creationdate")) {
2938         return 0;
2939     } else if (scan_keyword("fontname")) {
2940         set_ff(i);
2941         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2942     } else if (scan_keyword("fontobjnum")) {
2943         set_ff(i);
2944         print_int(pdf_font_num(ff));
2945     } else if (scan_keyword("fontsize")) {
2946         print_scaled(font_size(i));
2947         tprint("pt");
2948     } else if (scan_keyword("pageref")) {
2949         print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2950     } else if (scan_keyword("colorstackinit")) {
2951         return 0;
2952     } else {
2953         return 0;
2954     }
2955     return 1;
2956 }
2957
2958 str_number the_convert_string(halfword c, int i)
2959 {
2960     int old_setting;            /* saved |selector| setting */
2961     str_number ret = 0;
2962     boolean done = true ;
2963     old_setting = selector;
2964     selector = new_string;
2965     switch (c) {
2966         case number_code:
2967             print_int(i);
2968             break;
2969      /* case lua_function_code: */
2970      /* case lua_code: */
2971      /* case expanded_code: */
2972         case math_style_code:
2973             print_math_style();
2974             break;
2975      /* case string_code: */
2976      /* case cs_string_code: */
2977         case roman_numeral_code:
2978             print_roman_int(i);
2979             break;
2980      /* case meaning_code: */
2981         case uchar_code:
2982             print(i);
2983             break;
2984      /* lua_escape_string_code: */
2985         case font_id_code:
2986             print_int(i);
2987             break;
2988         case font_name_code:
2989             append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2990             if (font_size(i) != font_dsize(i)) {
2991                 tprint(" at ");
2992                 print_scaled(font_size(i));
2993                 tprint("pt");
2994             }
2995             break;
2996      /* left_margin_kern_code: */
2997      /* right_margin_kern_code: */
2998         case uniform_deviate_code:
2999             print_int(unif_rand(i));
3000             break;
3001         case normal_deviate_code:
3002             print_int(norm_rand());
3003             break;
3004      /* math_char_class_code: */
3005      /* math_char_fam_code: */
3006      /* math_char_slot_code: */
3007      /* insert_ht_code: */
3008         case job_name_code:
3009             print_job_name();
3010             break;
3011         case format_name_code:
3012             print(format_name);
3013             break;
3014         case luatex_banner_code:
3015             tprint(luatex_banner);
3016             break;
3017         case luatex_revision_code:
3018             print(get_luatexrevision());
3019             break;
3020         case luatex_date_code:
3021             print_int(get_luatex_date_info());
3022             break;
3023         case etex_code:
3024             tprint(eTeX_version_string);
3025             break;
3026         case eTeX_revision_code:
3027             tprint(eTeX_revision);
3028             break;
3029         case font_identifier_code:
3030             print_font_identifier(i);
3031             break;
3032         /* backend: this might become obsolete */
3033         case dvi_feedback_code:
3034             done = the_convert_string_dvi(c,i);
3035             break;
3036         case pdf_feedback_code:
3037             done = the_convert_string_pdf(c,i);
3038             break;
3039         /* done */
3040         default:
3041             done = false;
3042             break;
3043     }
3044     if (done) {
3045         ret = make_string();
3046     }
3047     selector = old_setting;
3048     return ret;
3049 }
3050
3051 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3052 files potentially usable for reading appear in the following global variables.
3053 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3054 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3055 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3056 line.
3057
3058 @c
3059 FILE *read_file[16]; /* used for \.{\\read} */
3060 int read_open[17];   /* state of |read_file[n]| */
3061
3062 void initialize_read(void)
3063 {
3064     int k;
3065     for (k = 0; k <= 16; k++)
3066         read_open[k] = closed;
3067 }
3068
3069 @ The |read_toks| procedure constructs a token list like that for any macro
3070 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3071 sequence that will receive this token list.
3072
3073 @c
3074 void read_toks(int n, halfword r, halfword j)
3075 {
3076     halfword p; /* tail of the token list */
3077     halfword q; /* new node being added to the token list via |store_new_token| */
3078     int s;      /* saved value of |align_state| */
3079     int m;      /* stream number */
3080     scanner_status = defining;
3081     warning_index = r;
3082     p = get_avail();
3083     def_ref = p;
3084     set_token_ref_count(def_ref, 0);
3085     p = def_ref;                /* the reference count */
3086     store_new_token(end_match_token);
3087     if ((n < 0) || (n > 15))
3088         m = 16;
3089     else
3090         m = n;
3091     s = align_state;
3092     align_state = 1000000;      /* disable tab marks, etc. */
3093     do {
3094         /* Input and store tokens from the next line of the file */
3095         begin_file_reading();
3096         iname = m + 1;
3097         if (read_open[m] == closed) {
3098             /*
3099                 Input for \.{\\read} from the terminal
3100
3101                 Here we input on-line into the |buffer| array, prompting the user explicitly
3102                 if |n>=0|.  The value of |n| is set negative so that additional prompts
3103                 will not be given in the case of multi-line input.
3104             */
3105             if (interaction > nonstop_mode) {
3106                 if (n < 0) {
3107                     prompt_input("");
3108                 } else {
3109                     wake_up_terminal();
3110                     print_ln();
3111                     sprint_cs(r);
3112                     prompt_input(" =");
3113                     n = -1;
3114                 }
3115             } else {
3116                 fatal_error
3117                     ("*** (cannot \\read from terminal in nonstop modes)");
3118             }
3119
3120         } else if (read_open[m] == just_open) {
3121             /*
3122                 Input the first line of |read_file[m]|
3123
3124                 The first line of a file must be treated specially, since |lua_input_ln|
3125                 must be told not to start with |get|.
3126             */
3127             if (lua_input_ln(read_file[m], (m + 1), false)) {
3128                 read_open[m] = normal;
3129             } else {
3130                 lua_a_close_in(read_file[m], (m + 1));
3131                 read_open[m] = closed;
3132             }
3133
3134         } else {
3135             /*
3136                 Input the next line of |read_file[m]|
3137
3138                 An empty line is appended at the end of a |read_file|.
3139             */
3140             if (!lua_input_ln(read_file[m], (m + 1), true)) {
3141                 lua_a_close_in(read_file[m], (m + 1));
3142                 read_open[m] = closed;
3143                 if (align_state != 1000000) {
3144                     runaway();
3145                     print_err("File ended within \\read");
3146                     help1("This \\read has unbalanced braces.");
3147                     align_state = 1000000;
3148                     error();
3149                 }
3150             }
3151
3152         }
3153         ilimit = last;
3154         if (end_line_char_inactive)
3155             decr(ilimit);
3156         else
3157             buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3158         first = ilimit + 1;
3159         iloc = istart;
3160         istate = new_line;
3161         /* Handle \.{\\readline} and |goto done|; */
3162         if (j == 1) {
3163             while (iloc <= ilimit) {
3164                 /* current line not yet finished */
3165                 do_buffer_to_unichar(cur_chr, iloc);
3166                 if (cur_chr == ' ')
3167                     cur_tok = space_token;
3168                 else
3169                     cur_tok = cur_chr + other_token;
3170                 store_new_token(cur_tok);
3171             }
3172         } else {
3173             while (1) {
3174                 get_token();
3175                 if (cur_tok == 0) {
3176                     /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3177                     break;
3178                 }
3179                 if (align_state < 1000000) {
3180                     /* unmatched `\.\}' aborts the line */
3181                     do {
3182                         get_token();
3183                     } while (cur_tok != 0);
3184                     align_state = 1000000;
3185                     break;
3186                 }
3187                 store_new_token(cur_tok);
3188             }
3189         }
3190         end_file_reading();
3191
3192     } while (align_state != 1000000);
3193     cur_val = def_ref;
3194     scanner_status = normal;
3195     align_state = s;
3196 }
3197
3198 @ return a string from tokens list
3199
3200 @c
3201 str_number tokens_to_string(halfword p)
3202 {
3203     int old_setting;
3204     if (selector == new_string)
3205         normal_error("tokens","tokens_to_string() called while selector = new_string");
3206     old_setting = selector;
3207     selector = new_string;
3208     show_token_list(token_link(p), null, -1);
3209     selector = old_setting;
3210     return make_string();
3211 }
3212
3213 @ @c
3214 #define make_room(a)                     \
3215     if ((unsigned)i+a+1>alloci) {        \
3216         ret = xrealloc(ret,(alloci+64)); \
3217         alloci = alloci + 64;            \
3218     }
3219
3220 #define append_i_byte(a) ret[i++] = (char)(a)
3221
3222 #define Print_char(a) make_room(1); append_i_byte(a)
3223
3224 #define Print_uchar(s) {                                       \
3225     make_room(4);                                              \
3226     if (s<=0x7F) {                                             \
3227       append_i_byte(s);                                        \
3228     } else if (s<=0x7FF) {                                     \
3229       append_i_byte(0xC0 + (s / 0x40));                        \
3230       append_i_byte(0x80 + (s % 0x40));                        \
3231     } else if (s<=0xFFFF) {                                    \
3232       append_i_byte(0xE0 + (s / 0x1000));                      \
3233       append_i_byte(0x80 + ((s % 0x1000) / 0x40));             \
3234       append_i_byte(0x80 + ((s % 0x1000) % 0x40));             \
3235     } else if (s>=0x110000) {                                  \
3236       append_i_byte(s-0x11000);                                \
3237     } else {                                                   \
3238       append_i_byte(0xF0 + (s / 0x40000));                     \
3239       append_i_byte(0x80 + ((s % 0x40000) / 0x1000));          \
3240       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3241       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3242     } }
3243
3244 #define Print_esc(b) {                     \
3245     const char *v = b;                     \
3246     if (e>0 && e<STRING_OFFSET) {          \
3247         Print_uchar (e);                   \
3248     }                                      \
3249     make_room(strlen(v));                  \
3250     while (*v) { append_i_byte(*v); v++; } \
3251   }
3252
3253 #define Print_str(b) {                     \
3254     const char *v = b;                     \
3255     make_room(strlen(v));                  \
3256     while (*v) { append_i_byte(*v); v++; } \
3257   }
3258
3259 #define is_cat_letter(a) \
3260     (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3261
3262 @ the actual token conversion in this function is now functionally equivalent to
3263 |show_token_list|, except that it always prints the whole token list. TODO: check
3264 whether this causes problems in the lua library.
3265
3266 @c
3267 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3268 {
3269     register int p, c, m;
3270     int q;
3271     int infop;
3272     char *s, *sh;
3273     int e = 0;
3274     char *ret;
3275     int match_chr = '#';
3276     int n = '0';
3277     unsigned alloci = 1024;
3278     int i = 0;
3279     p = pp;
3280     if (p == null) {
3281         if (siz != NULL)
3282             *siz = 0;
3283         return NULL;
3284     }
3285     ret = xmalloc(alloci);
3286     p = token_link(p);          /* skip refcount */
3287     if (p != null) {
3288         e = int_par(escape_char_code);
3289     }
3290     while (p != null) {
3291         if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3292             Print_esc("CLOBBERED.");
3293             break;
3294         }
3295         infop = token_info(p);
3296         if (infop >= cs_token_flag) {
3297             if (!(inhibit_par && infop == par_token)) {
3298                 q = infop - cs_token_flag;
3299                 if (q < hash_base) {
3300                     if (q == null_cs) {
3301                         Print_esc("csname");
3302                         Print_esc("endcsname");
3303                     } else {
3304                         Print_esc("IMPOSSIBLE.");
3305                     }
3306                 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3307                     Print_esc("IMPOSSIBLE.");
3308                 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3309                     Print_esc("NONEXISTENT.");
3310                 } else {
3311                     str_number txt = cs_text(q);
3312                     sh = makecstring(txt);
3313                     s = sh;
3314                     if (is_active_cs(txt)) {
3315                         s = s + 3;
3316                         while (*s) {
3317                             Print_char(*s);
3318                             s++;
3319                         }
3320                     } else {
3321                         if (e>=0 && e<0x110000) Print_uchar(e);
3322                         while (*s) {
3323                             Print_char(*s);
3324                             s++;
3325                         }
3326                         if ((!single_letter(txt)) || is_cat_letter(txt)) {
3327                             Print_char(' ');
3328                         }
3329                     }
3330                     free(sh);
3331                 }
3332             }
3333         } else {
3334             if (infop < 0) {
3335                 Print_esc("BAD");
3336             } else {
3337                 m = token_cmd(infop);
3338                 c = token_chr(infop);
3339                 switch (m) {
3340                     case left_brace_cmd:
3341                     case right_brace_cmd:
3342                     case math_shift_cmd:
3343                     case tab_mark_cmd:
3344                     case sup_mark_cmd:
3345                     case sub_mark_cmd:
3346                     case spacer_cmd:
3347                     case letter_cmd:
3348                     case other_char_cmd:
3349                         Print_uchar(c);
3350                         break;
3351                     case mac_param_cmd:
3352                         if (!in_lua_escape && (is_in_csname==0))
3353                             Print_uchar(c);
3354                         Print_uchar(c);
3355                         break;
3356                     case out_param_cmd:
3357                         Print_uchar(match_chr);
3358                         if (c <= 9) {
3359                             Print_char(c + '0');
3360                         } else {
3361                             Print_char('!');
3362                             goto EXIT;
3363                         }
3364                         break;
3365                     case match_cmd:
3366                         match_chr = c;
3367                         Print_uchar(c);
3368                         n++;
3369                         Print_char(n);
3370                         if (n > '9')
3371                             goto EXIT;
3372                         break;
3373                     case end_match_cmd:
3374                         if (c == 0) {
3375                             Print_char('-');
3376                             Print_char('>');
3377                         }
3378                         break;
3379                     default:
3380                         not_so_bad(Print_esc);
3381                         break;
3382                 }
3383             }
3384         }
3385         p = token_link(p);
3386     }
3387   EXIT:
3388     ret[i] = '\0';
3389     if (siz != NULL)
3390         *siz = i;
3391     return ret;
3392 }
3393
3394 @ @c
3395 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3396 {
3397     int siz;
3398     lstring *ret = xmalloc(sizeof(lstring));
3399     ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3400     ret->l = (size_t) siz;
3401     return ret;
3402 }
3403
3404 @ @c
3405 void free_lstring(lstring * ls)
3406 {
3407     if (ls == NULL)
3408         return;
3409     if (ls->s != NULL)
3410         free(ls->s);
3411     free(ls);
3412 }