source/texk/web2c/luatexdir/tex/textoken.w

   1 % textoken.w
   2 %
   3 % Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org>
   4 %
   5 % This file is part of LuaTeX.
   6 %
   7 % LuaTeX is free software; you can redistribute it and/or modify it under
   8 % the terms of the GNU General Public License as published by the Free
   9 % Software Foundation; either version 2 of the License, or (at your
  10 % option) any later version.
  11 %
  12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
  13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 % FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 % License for more details.
  16 %
  17 % You should have received a copy of the GNU General Public License along
  18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
  19
  20 @ @c
  21
  22 #include "ptexlib.h"
  23
  24 @ @c
  25 #define pausing int_par(pausing_code)
  26 #define cat_code_table int_par(cat_code_table_code)
  27 #define tracing_nesting int_par(tracing_nesting_code)
  28 #define suppress_outer_error int_par(suppress_outer_error_code)
  29 #define suppress_mathpar_error int_par(suppress_mathpar_error_code)
  30
  31
  32 #define every_eof equiv(every_eof_loc)
  33 #define box(A) equiv(box_base+(A))
  34 #define toks(A) equiv(toks_base+(A))
  35
  36 #define detokenized_line() (line_catcode_table==NO_CAT_TABLE)
  37
  38 #define do_get_cat_code(a,b) do {                                         \
  39     if (line_catcode_table!=DEFAULT_CAT_TABLE)                          \
  40       a=get_cat_code(line_catcode_table,b);                       \
  41     else                                                                \
  42       a=get_cat_code(cat_code_table,b);                           \
  43   } while (0)
  44
  45
  46 @ The \TeX\ system does nearly all of its own memory allocation, so that it can
  47 readily be transported into environments that do not have automatic facilities
  48 for strings, garbage collection, etc., and so that it can be in control of what
  49 error messages the user receives. The dynamic storage requirements of \TeX\ are
  50 handled by providing two large arrays called |fixmem| and |varmem| in which
  51 consecutive blocks of words are used as nodes by the \TeX\ routines.
  52
  53 Pointer variables are indices into this array, or into another array called
  54 |eqtb| that will be explained later. A pointer variable might also be a special
  55 flag that lies outside the bounds of |mem|, so we allow pointers to assume any
  56 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
  57 does not assume that |mem[null]| exists.
  58
  59 @ Locations in |fixmem| are used for storing one-word records; a conventional
  60 \.{AVAIL} stack is used for allocation in this array.
  61
  62 @c
  63 smemory_word *fixmem;           /* the big dynamic storage area */
  64 unsigned fix_mem_min;           /* the smallest location of one-word memory in use */
  65 unsigned fix_mem_max;           /* the largest location of one-word memory in use */
  66
  67 @ In order to study the memory requirements of particular applications, it is
  68 possible to prepare a version of \TeX\ that keeps track of current and maximum
  69 memory usage. When code between the delimiters |@!stat| $\ldots$ |tats| is not
  70 commented out, \TeX\ will run a bit slower but it will report these statistics
  71 when |tracing_stats| is sufficiently large.
  72
  73 @c
  74 int var_used, dyn_used;         /* how much memory is in use */
  75
  76 halfword avail;                 /* head of the list of available one-word nodes */
  77 unsigned fix_mem_end;           /* the last one-word node used in |mem| */
  78
  79 halfword garbage;               /* head of a junk list, write only */
  80 halfword temp_token_head;       /* head of a temporary list of some kind */
  81 halfword hold_token_head;       /* head of a temporary list of another kind */
  82 halfword omit_template;         /* a constant token list */
  83 halfword null_list;             /* permanently empty list */
  84 halfword backup_head;           /* head of token list built by |scan_keyword| */
  85
  86 @ @c
  87 void initialize_tokens(void)
  88 {
  89     halfword p;
  90     avail = null;
  91     fix_mem_end = 0;
  92     p = get_avail();
  93     temp_token_head = p;
  94     set_token_info(temp_token_head, 0);
  95     p = get_avail();
  96     hold_token_head = p;
  97     set_token_info(hold_token_head, 0);
  98     p = get_avail();
  99     omit_template = p;
 100     set_token_info(omit_template, 0);
 101     p = get_avail();
 102     null_list = p;
 103     set_token_info(null_list, 0);
 104     p = get_avail();
 105     backup_head = p;
 106     set_token_info(backup_head, 0);
 107     p = get_avail();
 108     garbage = p;
 109     set_token_info(garbage, 0);
 110     dyn_used = 0;               /* initialize statistics */
 111 }
 112
 113 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
 114 field is null. However, \TeX\ will halt if there is no more room left.
 115 @^inner loop@>
 116
 117 If the available-space list is empty, i.e., if |avail=null|, we try first to
 118 increase |fix_mem_end|. If that cannot be done, i.e., if
 119 |fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. If, that doesn't
 120 work, we have to quit.
 121
 122 @c
 123 halfword get_avail(void)
 124 {                               /* single-word node allocation */
 125     unsigned p;                 /* the new node being got */
 126     unsigned t;
 127     p = (unsigned) avail;       /* get top location in the |avail| stack */
 128     if (p != null) {
 129         avail = token_link(avail);      /* and pop it off */
 130     } else if (fix_mem_end < fix_mem_max) {     /* or go into virgin territory */
 131         incr(fix_mem_end);
 132         p = fix_mem_end;
 133     } else {
 134         smemory_word *new_fixmem;       /* the big dynamic storage area */
 135         t = (fix_mem_max / 5);
 136         new_fixmem =
 137             fixmemcast(realloc
 138                        (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1)));
 139         if (new_fixmem == NULL) {
 140             runaway();          /* if memory is exhausted, display possible runaway text */
 141             overflow("token memory size", fix_mem_max);
 142         } else {
 143             fixmem = new_fixmem;
 144         }
 145         memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word));
 146         fix_mem_max += t;
 147         p = ++fix_mem_end;
 148     }
 149     token_link(p) = null;       /* provide an oft-desired initialization of the new node */
 150     incr(dyn_used);             /* maintain statistics */
 151     return (halfword) p;
 152 }
 153
 154 @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes
 155 that starts at position |p|.
 156 @^inner loop@>
 157
 158 @c
 159 void flush_list(halfword p)
 160 {                               /* makes list of single-word nodes available */
 161     halfword q, r;              /* list traversers */
 162     if (p != null) {
 163         r = p;
 164         do {
 165             q = r;
 166             r = token_link(r);
 167             decr(dyn_used);
 168         } while (r != null);    /* now |q| is the last node on the list */
 169         token_link(q) = avail;
 170         avail = p;
 171     }
 172 }
 173
 174 @ A \TeX\ token is either a character or a control sequence, and it is @^token@>
 175 represented internally in one of two ways: (1)~A character whose ASCII code
 176 number is |c| and whose command code is |m| is represented as the number
 177 $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control sequence
 178 whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here
 179 |cs_token_flag=@t$2^{25}-1$@>| is larger than $2^{21}m+c$, yet it is small enough
 180 that |cs_token_flag+p< max_halfword|; thus, a token fits comfortably in a
 181 halfword.
 182
 183 A token |t| represents a |left_brace| command if and only if
 184 |t<left_brace_limit|; it represents a |right_brace| command if and only if we
 185 have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or
 186 |end_match| command if and only if |match_token<=t<=end_match_token|. The
 187 following definitions take care of these token-oriented constants and a few
 188 others.
 189
 190 @ A token list is a singly linked list of one-word nodes in |mem|, where each
 191 word contains a token and a link. Macro definitions, output-routine definitions,
 192 marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the
 193 form of token lists, usually preceded by a node with a reference count in its
 194 |token_ref_count| field. The token stored in location |p| is called |info(p)|.
 195
 196 Three special commands appear in the token lists of macro definitions. When
 197 |m=match|, it means that \TeX\ should scan a parameter for the current macro;
 198 when |m=end_match|, it means that parameter matching should end and \TeX\ should
 199 start reading the macro text; and when |m=out_param|, it means that \TeX\ should
 200 insert parameter number |c| into the text at this point.
 201
 202 The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition
 203 are omitted, but the final right brace of an output routine is included at the
 204 end of its token list.
 205
 206 Here is an example macro definition that illustrates these conventions. After
 207 \TeX\ processes the text
 208
 209 $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$
 210
 211 the definition of \.{\\mac} is represented as a token list containing
 212
 213 $$\def\,{\hskip2pt}
 214 \vbox{\halign{\hfil#\hfil\cr
 215 (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ ,
 216 \.{\\b}, |end_match|,\cr
 217 |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#,
 218 |other_char|\,\.1,\cr
 219 |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$
 220
 221 The procedure |scan_toks| builds such token lists, and |macro_call| does the
 222 parameter matching. @^reference counts@>
 223
 224 Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference
 225 counts would be needed even if \TeX\ had no \.{\\let} operation: When the token
 226 list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb|
 227 entry before the token list has been fully consumed, so we dare not simply
 228 destroy a token list when its control sequence is being redefined.
 229
 230 If the parameter-matching part of a definition ends with `\.{\#\{}', the
 231 corresponding token list will have `\.\{' just before the `|end_match|' and also
 232 at the very end. The first `\.\{' is used to delimit the parameter; the second
 233 one keeps the first from disappearing.
 234
 235 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form,
 236 including the expansion of a macro or mark.
 237
 238 @c
 239 void print_meaning(void)
 240 {
 241     /* remap \mathchar onto \Umathchar */
 242     if (cur_cmd == math_given_cmd) {
 243         cur_cmd = xmath_given_cmd ;
 244     } /* else if (cur_cmd == math_char_num_cmd) {
 245         if (cur_chr == 0) {
 246             cur_chr = 1 ;
 247         }
 248     } */
 249     print_cmd_chr((quarterword) cur_cmd, cur_chr);
 250     if (cur_cmd >= call_cmd) {
 251         print_char(':');
 252         print_ln();
 253         token_show(cur_chr);
 254     } else {
 255         /* Show the meaning of a mark node */
 256         if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) {
 257             print_char(':');
 258             print_ln();
 259             switch (cur_chr) {
 260                 case first_mark_code:
 261                     token_show(first_mark(0));
 262                     break;
 263                 case bot_mark_code:
 264                     token_show(bot_mark(0));
 265                     break;
 266                 case split_first_mark_code:
 267                     token_show(split_first_mark(0));
 268                     break;
 269                 case split_bot_mark_code:
 270                     token_show(split_bot_mark(0));
 271                     break;
 272                 default:
 273                     token_show(top_mark(0));
 274                     break;
 275             }
 276         }
 277     }
 278 }
 279
 280 @ The procedure |show_token_list|, which prints a symbolic form of the token list
 281 that starts at a given node |p|, illustrates these conventions. The token list
 282 being displayed should not begin with a reference count. However, the procedure
 283 is intended to be robust, so that if the memory links are awry or if |p| is not
 284 really a pointer to a token list, nothing catastrophic will happen.
 285
 286 An additional parameter |q| is also given; this parameter is either null or it
 287 points to a node in the token list where a certain magic computation takes place
 288 that will be explained later. (Basically, |q| is non-null when we are printing
 289 the two-line context information at the time of an error message; |q| marks the
 290 place corresponding to where the second line should begin.)
 291
 292 For example, if |p| points to the node containing the first \.a in the token list
 293 above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\
 294 ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the
 295 second \.a, the magic computation will be performed just before the second \.a is
 296 printed.
 297
 298 The generation will stop, and `\.{\\ETC.}' will be printed, if the length of
 299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
 300 control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}';
 301 this cannot be confused with actual control sequences because a real control
 302 sequence named \.{BAD} would come out `\.{\\BAD\ }'.
 303
 304 @c
 305 #define not_so_bad(p) \
 306     switch (m) { \
 307         case assign_int_cmd: \
 308             if (c >= (backend_int_base) && c <= (backend_int_last)) \
 309                 p("[internal backend integer]"); \
 310             break; \
 311         case assign_dimen_cmd: \
 312             if (c >= (backend_dimen_base) && c <= (backend_dimen_last)) \
 313                 p("[internal backend dimension]"); \
 314             break; \
 315         case assign_toks_cmd: \
 316             if (c >= (backend_toks_base) && c <= (backend_toks_last)) \
 317                 p("[internal backend tokenlist]"); \
 318             break; \
 319         default: \
 320             p("BAD"); \
 321             break; \
 322     }
 323
 324 void show_token_list(int p, int q, int l)
 325 {
 326     int m, c;                    /* pieces of a token */
 327     ASCII_code match_chr = '#';  /* character used in a `|match|' */
 328     ASCII_code n = '0';          /* the highest parameter number, as an ASCII digit */
 329     tally = 0;
 330     if (l < 0)
 331         l = 0x3FFFFFFF;
 332     while ((p != null) && (tally < l)) {
 333         if (p == q) {
 334             /* Do magic computation */
 335             set_trick_count();
 336         }
 337         /* Display token |p|, and |return| if there are problems */
 338         if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) {
 339             tprint_esc("CLOBBERED.");
 340             return;
 341         }
 342         if (token_info(p) >= cs_token_flag) {
 343             if (!((inhibit_par_tokens) && (token_info(p) == par_token)))
 344                 print_cs(token_info(p) - cs_token_flag);
 345         } else {
 346             m = token_cmd(token_info(p));
 347             c = token_chr(token_info(p));
 348             if (token_info(p) < 0) {
 349                 tprint_esc("BAD");
 350             } else {
 351                 /*
 352                     Display the token $(|m|,|c|)$
 353
 354                     The procedure usually ``learns'' the character code used for macro
 355                     parameters by seeing one in a |match| command before it runs into any
 356                     |out_param| commands.
 357                 */
 358                 switch (m) {
 359                     case left_brace_cmd:
 360                     case right_brace_cmd:
 361                     case math_shift_cmd:
 362                     case tab_mark_cmd:
 363                     case sup_mark_cmd:
 364                     case sub_mark_cmd:
 365                     case spacer_cmd:
 366                     case letter_cmd:
 367                     case other_char_cmd:
 368                         print(c);
 369                         break;
 370                     case mac_param_cmd:
 371                         if (!in_lua_escape && (is_in_csname==0))
 372                             print(c);
 373                         print(c);
 374                         break;
 375                     case out_param_cmd:
 376                         print(match_chr);
 377                         if (c <= 9) {
 378                             print_char(c + '0');
 379                         } else {
 380                             print_char('!');
 381                             return;
 382                         }
 383                         break;
 384                     case match_cmd:
 385                         match_chr = c;
 386                         print(c);
 387                         incr(n);
 388                         print_char(n);
 389                         if (n > '9')
 390                             return;
 391                         break;
 392                     case end_match_cmd:
 393                         if (c == 0)
 394                             tprint("->");
 395                         break;
 396                     default:
 397                         not_so_bad(tprint);
 398                         break;
 399                 }
 400             }
 401         }
 402         p = token_link(p);
 403     }
 404     if (p != null)
 405         tprint_esc("ETC.");
 406 }
 407
 408 @ @c
 409 #define do_buffer_to_unichar(a,b) do { \
 410     a = (halfword)str2uni(buffer+b); \
 411     b += utf8_size(a); \
 412 } while (0)
 413
 414 @ Here's the way we sometimes want to display a token list, given a pointer to
 415 its reference count; the pointer may be null.
 416
 417 @c
 418 void token_show(halfword p)
 419 {
 420     if (p != null)
 421         show_token_list(token_link(p), null, 10000000);
 422 }
 423
 424 @ |delete_token_ref|, is called when a pointer to a token list's reference count
 425 is being removed. This means that the token list should disappear if the
 426 reference count was |null|, otherwise the count should be decreased by one.
 427 @^reference counts@>
 428
 429 @ |p| points to the reference count of a token list that is losing one
 430 reference.
 431
 432 @c
 433 void delete_token_ref(halfword p)
 434 {
 435     if (token_ref_count(p) == 0)
 436         flush_list(p);
 437     else
 438         decr(token_ref_count(p));
 439 }
 440
 441 @ @c
 442 int get_char_cat_code(int curchr)
 443 {
 444     int a;
 445     do_get_cat_code(a,curchr);
 446     return a;
 447 }
 448
 449 @ @c
 450 static void invalid_character_error(void)
 451 {
 452     const char *hlp[] = {
 453         "A funny symbol that I can't read has just been input.",
 454         "Continue, and I'll forget that it ever happened.",
 455         NULL
 456     };
 457     deletions_allowed = false;
 458     tex_error("Text line contains an invalid character", hlp);
 459     deletions_allowed = true;
 460 }
 461
 462 @ @c
 463 static boolean process_sup_mark(void);  /* below */
 464
 465 static int scan_control_sequence(void); /* below */
 466
 467 typedef enum {
 468     next_line_ok,
 469     next_line_return,
 470     next_line_restart
 471 } next_line_retval;
 472
 473 static next_line_retval next_line(void); /* below */
 474
 475 @ In case you are getting bored, here is a slightly less trivial routine: Given a
 476 string of lowercase letters, like `\.{pt}' or `\.{plus}' or `\.{width}', the
 477 |scan_keyword| routine checks to see whether the next tokens of input match this
 478 string. The match must be exact, except that uppercase letters will match their
 479 lowercase counterparts; uppercase equivalents are determined by subtracting
 480 |"a"-"A"|, rather than using the |uc_code| table, since \TeX\ uses this routine
 481 only for its own limited set of keywords.
 482
 483 If a match is found, the characters are effectively removed from the input and
 484 |true| is returned. Otherwise |false| is returned, and the input is left
 485 essentially unchanged (except for the fact that some macros may have been
 486 expanded, etc.). @^inner loop@>
 487
 488 @c
 489 boolean scan_keyword(const char *s)
 490 {                               /* look for a given string */
 491     halfword p;                 /* tail of the backup list */
 492     halfword q;                 /* new node being added to the token list via |store_new_token| */
 493     const char *k;              /* index into |str_pool| */
 494     halfword save_cur_cs = cur_cs;
 495     int saved_align_state = align_state;
 496     if (strlen(s) == 0)        /* was assert (strlen(s) > 1); */
 497       return false ;           /* but not with newtokenlib  zero keyword simply doesn't match  */
 498     p = backup_head;
 499     token_link(p) = null;
 500     k = s;
 501     while (*k) {
 502         get_x_token();      /* recursion is possible here */
 503         if ((cur_cs == 0) &&
 504             ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
 505             store_new_token(cur_tok);
 506             k++;
 507         } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) {
 508             if (p != backup_head) {
 509                 q = get_avail();
 510                 token_info(q) = cur_tok;
 511                 token_link(q) = null;
 512                 token_link(p) = q;
 513                 begin_token_list(token_link(backup_head), backed_up);
 514                 if (cur_cmd != endv_cmd)
 515                     align_state = saved_align_state;
 516             } else {
 517                 back_input();
 518             }
 519             cur_cs = save_cur_cs;
 520             return false;
 521         }
 522     }
 523     if (token_link(backup_head) != null)
 524         flush_list(token_link(backup_head));
 525     cur_cs = save_cur_cs;
 526     if (cur_cmd != endv_cmd)
 527         align_state = saved_align_state;
 528     return true;
 529 }
 530
 531 @ We can not return |undefined_control_sequence| under some conditions
 532  (inside |shift_case|, for example). This needs thinking.
 533
 534 @c
 535
 536 /*
 537     halfword active_to_cs(int curchr, int force)
 538     {
 539         halfword curcs;
 540         char *a, *b;
 541         char *utfbytes = xmalloc(8);
 542         int nncs = no_new_control_sequence;
 543         a = (char *) uni2str(0xFFFF);
 544         utfbytes = strcpy(utfbytes, a);
 545         if (force)
 546             no_new_control_sequence = false;
 547         if (curchr > 0) {
 548             b = (char *) uni2str((unsigned) curchr);
 549             utfbytes = strcat(utfbytes, b);
 550             free(b);
 551             curcs = string_lookup(utfbytes, strlen(utfbytes));
 552         } else {
 553             utfbytes[3] = '\0';
 554             curcs = string_lookup(utfbytes, 4);
 555         }
 556         no_new_control_sequence = nncs;
 557         free(a);
 558         free(utfbytes);
 559         return curcs;
 560     }
 561 */
 562
 563 /*static char * FFFF = "\xEF\xBF\xBF";*/ /* 0xFFFF */
 564
 565 halfword active_to_cs(int curchr, int force)
 566 {
 567     halfword curcs;
 568     int nncs = no_new_control_sequence;
 569     if (force) {
 570         no_new_control_sequence = false;
 571     }
 572     if (curchr > 0) {
 573         char *b = (char *) uni2str((unsigned) curchr);
 574         char *utfbytes = xmalloc(8);
 575         utfbytes = strcpy(utfbytes, "\xEF\xBF\xBF");
 576         utfbytes = strcat(utfbytes, b);
 577         free(b);
 578         curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 579         free(utfbytes);
 580     } else {
 581         curcs = string_lookup("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not 3 ? */
 582     }
 583     no_new_control_sequence = nncs;
 584     return curcs;
 585 }
 586
 587 /*
 588
 589     static unsigned char *uni2csstr(unsigned unic)
 590     {
 591         unsigned char *buf = xmalloc(8);
 592         unsigned char *pt = buf;
 593         *pt++ = 239; *pt++ = 191; *pt++ = 191; // 0xFFFF
 594         if (unic < 0x80)
 595             *pt++ = (unsigned char) unic;
 596         else if (unic < 0x800) {
 597             *pt++ = (unsigned char) (0xc0 | (unic >> 6));
 598             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 599         } else if (unic >= 0x110000) {
 600             *pt++ = (unsigned char) (unic - 0x110000);
 601         } else if (unic < 0x10000) {
 602             *pt++ = (unsigned char) (0xe0 | (unic >> 12));
 603             *pt++ = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
 604             *pt++ = (unsigned char) (0x80 | (unic & 0x3f));
 605         } else {
 606             int u, z, y, x;
 607             unsigned val = unic - 0x10000;
 608             u = (int) (((val & 0xf0000) >> 16) + 1);
 609             z = (int) ((val & 0x0f000) >> 12);
 610             y = (int) ((val & 0x00fc0) >> 6);
 611             x = (int) (val & 0x0003f);
 612             *pt++ = (unsigned char) (0xf0 | (u >> 2));
 613             *pt++ = (unsigned char) (0x80 | ((u & 3) << 4) | z);
 614             *pt++ = (unsigned char) (0x80 | y);
 615             *pt++ = (unsigned char) (0x80 | x);
 616         }
 617         *pt = '\0';
 618         return buf;
 619     }
 620
 621     halfword active_to_cs(int curchr, int force)
 622     {
 623         halfword curcs;
 624         int nncs = no_new_control_sequence;
 625         if (force) {
 626             no_new_control_sequence = false;
 627         }
 628         if (curchr > 0) {
 629             char * utfbytes = (char *) uni2csstr((unsigned) curchr);
 630             curcs = string_lookup(utfbytes, utf8_size(curchr)+3);
 631             free(utfbytes);
 632         } else {
 633             curcs = string_lookup(FFFF, 4); // 0xFFFF ... why not 3 ?
 634         }
 635         no_new_control_sequence = nncs;
 636         return curcs;
 637     }
 638
 639 */
 640
 641 @ TODO this function should listen to \.{\\escapechar}
 642
 643 @ prints a control sequence
 644
 645 @c
 646 static char *cs_to_string(halfword p)
 647 {
 648     const char *s;
 649     char *sh;
 650     int k = 0;
 651     static char ret[256] = { 0 };
 652     if (p == 0 || p == null_cs) {
 653         ret[k++] = '\\';
 654         s = "csname";
 655         while (*s) {
 656             ret[k++] = *s++;
 657         }
 658         ret[k++] = '\\';
 659         s = "endcsname";
 660         while (*s) {
 661             ret[k++] = *s++;
 662         }
 663         ret[k] = 0;
 664
 665     } else {
 666         str_number txt = cs_text(p);
 667         sh = makecstring(txt);
 668         s = sh;
 669         if (is_active_cs(txt)) {
 670             s = s + 3;
 671             while (*s) {
 672                 ret[k++] = *s++;
 673             }
 674             ret[k] = 0;
 675         } else {
 676             ret[k++] = '\\';
 677             while (*s) {
 678                 ret[k++] = *s++;
 679             }
 680             ret[k] = 0;
 681         }
 682         free(sh);
 683     }
 684     return (char *) ret;
 685 }
 686
 687 @ TODO this is a quick hack, will be solved differently soon
 688
 689 @c
 690 static char *cmd_chr_to_string(int cmd, int chr)
 691 {
 692     char *s;
 693     str_number str;
 694     int sel = selector;
 695     selector = new_string;
 696     print_cmd_chr((quarterword) cmd, chr);
 697     str = make_string();
 698     s = makecstring(str);
 699     selector = sel;
 700     flush_str(str);
 701     return s;
 702 }
 703
 704 @ The heart of \TeX's input mechanism is the |get_next| procedure, which we shall
 705 develop in the next few sections of the program. Perhaps we shouldn't actually
 706 call it the ``heart,'' however, because it really acts as \TeX's eyes and mouth,
 707 reading the source files and gobbling them up. And it also helps \TeX\ to
 708 regurgitate stored token lists that are to be processed again. @^eyes and mouth@>
 709
 710 The main duty of |get_next| is to input one token and to set |cur_cmd| and
 711 |cur_chr| to that token's command code and modifier. Furthermore, if the input
 712 token is a control sequence, the |eqtb| location of that control sequence is
 713 stored in |cur_cs|; otherwise |cur_cs| is set to zero.
 714
 715 Underlying this simple description is a certain amount of complexity because of
 716 all the cases that need to be handled. However, the inner loop of |get_next| is
 717 reasonably short and fast.
 718
 719 When |get_next| is asked to get the next token of a \.{\\read} line,
 720 it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens
 721 appear on that line. (There might not be any tokens at all, if the
 722 |end_line_char| has |ignore| as its catcode.)
 723
 724 The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity is
 725 needed because a blank line of input is supposed to be exactly equivalent to the
 726 appearance of \.{\\par}; we must set |cur_cs:=par_loc| when detecting a blank
 727 line.
 728
 729 @c
 730 halfword par_loc;   /* location of `\.{\\par}' in |eqtb| */
 731 halfword par_token; /* token representing `\.{\\par}' */
 732
 733 @ Parts |get_next| are executed more often than any other instructions of \TeX.
 734 @^mastication@>@^inner loop@>
 735
 736 The global variable |force_eof| is normally |false|; it is set |true| by an
 737 \.{\\endinput} command. |luacstrings| is the number of lua print statements
 738 waiting to be input, it is changed by |luatokencall|.
 739
 740 @c
 741 boolean force_eof; /* should the next \.{\\input} be aborted early? */
 742 int luacstrings;   /* how many lua strings are waiting to be input? */
 743
 744 @ If the user has set the |pausing| parameter to some positive value, and if
 745 nonstop mode has not been selected, each line of input is displayed on the
 746 terminal and the transcript file, followed by `\.{=>}'. \TeX\ waits for a
 747 response. If the response is simply |carriage_return|, the line is accepted as it
 748 stands, otherwise the line typed is used instead of the line in the file.
 749
 750 @c
 751 void firm_up_the_line(void)
 752 {
 753     int k;                      /* an index into |buffer| */
 754     ilimit = last;
 755     if (pausing > 0) {
 756         if (interaction > nonstop_mode) {
 757             wake_up_terminal();
 758             print_ln();
 759             if (istart < ilimit) {
 760                 for (k = istart; k <= ilimit - 1; k++)
 761                     print_char(buffer[k]);
 762             }
 763             first = ilimit;
 764             prompt_input("=>"); /* wait for user response */
 765             if (last > first) {
 766                 for (k = first; k < +last - 1; k++)     /* move line down in buffer */
 767                     buffer[k + istart - first] = buffer[k];
 768                 ilimit = istart + last - first;
 769             }
 770         }
 771     }
 772 }
 773
 774 @ Before getting into |get_next|, let's consider the subroutine that is called
 775 when an `\.{\\outer}' control sequence has been scanned or when the end of a file
 776 has been reached. These two cases are distinguished by |cur_cs|, which is zero at
 777 the end of a file.
 778
 779 @c
 780 void check_outer_validity(void)
 781 {
 782     halfword p;                 /* points to inserted token list */
 783     halfword q;                 /* auxiliary pointer */
 784     if (suppress_outer_error)
 785         return;
 786     if (scanner_status != normal) {
 787         deletions_allowed = false;
 788         /* Back up an outer control sequence so that it can be reread; */
 789         /* An outer control sequence that occurs in a \.{\\read} will not be reread,
 790            since the error recovery for \.{\\read} is not very powerful. */
 791         if (cur_cs != 0) {
 792             if ((istate == token_list) || (iname < 1) || (iname > 17)) {
 793                 p = get_avail();
 794                 token_info(p) = cs_token_flag + cur_cs;
 795                 begin_token_list(p, backed_up); /* prepare to read the control sequence again */
 796             }
 797             cur_cmd = spacer_cmd;
 798             cur_chr = ' ';      /* replace it by a space */
 799         }
 800         if (scanner_status > skipping) {
 801             const char *errhlp[] = {
 802                 "I suspect you have forgotten a `}', causing me",
 803                 "to read past where you wanted me to stop.",
 804                 "I'll try to recover; but if the error is serious,",
 805                 "you'd better type `E' or `X' now and fix your file.",
 806                 NULL
 807             };
 808             char errmsg[256];
 809             const char *startmsg;
 810             const char *scannermsg;
 811             /* Tell the user what has run away and try to recover */
 812             runaway();          /* print a definition, argument, or preamble */
 813             if (cur_cs == 0) {
 814                 startmsg = "File ended";
 815             } else {
 816                 cur_cs = 0;
 817                 startmsg = "Forbidden control sequence found";
 818             }
 819             /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}',
 820                and insert tokens that should lead to recovery; */
 821             /* The recovery procedure can't be fully understood without knowing more
 822                about the \TeX\ routines that should be aborted, but we can sketch the
 823                ideas here:  For a runaway definition we will insert a right brace; for a
 824                runaway preamble, we will insert a special \.{\\cr} token and a right
 825                brace; and for a runaway argument, we will set |long_state| to
 826                |outer_call| and insert \.{\\par}. */
 827             p = get_avail();
 828             switch (scanner_status) {
 829             case defining:
 830                 scannermsg = "definition";
 831                 token_info(p) = right_brace_token + '}';
 832                 break;
 833             case matching:
 834                 scannermsg = "use";
 835                 token_info(p) = par_token;
 836                 long_state = outer_call_cmd;
 837                 break;
 838             case aligning:
 839                 scannermsg = "preamble";
 840                 token_info(p) = right_brace_token + '}';
 841                 q = p;
 842                 p = get_avail();
 843                 token_link(p) = q;
 844                 token_info(p) = cs_token_flag + frozen_cr;
 845                 align_state = -1000000;
 846                 break;
 847             case absorbing:
 848                 scannermsg = "text";
 849                 token_info(p) = right_brace_token + '}';
 850                 break;
 851             default:           /* can't happen */
 852                 scannermsg = "unknown";
 853                 break;
 854             }                   /*there are no other cases */
 855             begin_token_list(p, inserted);
 856             snprintf(errmsg, 255, "%s while scanning %s of %s",
 857                      startmsg, scannermsg, cs_to_string(warning_index));
 858             tex_error(errmsg, errhlp);
 859         } else {
 860             char errmsg[256];
 861             const char *errhlp_no[] = {
 862                 "The file ended while I was skipping conditional text.",
 863                 "This kind of error happens when you say `\\if...' and forget",
 864                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 865                 NULL
 866             };
 867             const char *errhlp_cs[] = {
 868                 "A forbidden control sequence occurred in skipped text.",
 869                 "This kind of error happens when you say `\\if...' and forget",
 870                 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
 871                 NULL
 872             };
 873             const char **errhlp = (const char **) errhlp_no;
 874             char *ss;
 875             if (cur_cs != 0) {
 876                 errhlp = errhlp_cs;
 877                 cur_cs = 0;
 878             }
 879             ss = cmd_chr_to_string(if_test_cmd, cur_if);
 880             snprintf(errmsg, 255, "Incomplete %s; all text was ignored after line %d",
 881                  ss, (int) skip_line);
 882             free(ss);
 883             /* Incomplete \\if... */
 884             cur_tok = cs_token_flag + frozen_fi;
 885             /* back up one inserted token and call |error| */
 886             {
 887                 OK_to_interrupt = false;
 888                 back_input();
 889                 token_type = inserted;
 890                 OK_to_interrupt = true;
 891                 tex_error(errmsg, errhlp);
 892             }
 893         }
 894         deletions_allowed = true;
 895     }
 896 }
 897
 898 @ @c
 899
 900 #if 0
 901
 902 /*
 903     The other variant gives less clutter in tracing cache usage when profiling and for
 904     some files (like the manual) also a bit of a speedup.
 905 */
 906
 907 static boolean get_next_file(void)
 908 {
 909   SWITCH:
 910     if (iloc <= ilimit) {
 911         /* current line not yet finished */
 912         do_buffer_to_unichar(cur_chr, iloc);
 913
 914       RESWITCH:
 915         if (detokenized_line()) {
 916             cur_cmd = (cur_chr == ' ' ? 10 : 12);
 917         } else {
 918             do_get_cat_code(cur_cmd, cur_chr);
 919         }
 920         /*
 921             Change state if necessary, and |goto switch| if the current
 922             character should be ignored, or |goto reswitch| if the current
 923             character changes to another;
 924
 925             The following 48-way switch accomplishes the scanning quickly, assuming
 926             that a decent C compiler has translated the code. Note that the numeric
 927             values for |mid_line|, |skip_blanks|, and |new_line| are spaced
 928             apart from each other by |max_char_code+1|, so we can add a character's
 929             command code to the state to get a single number that characterizes both.
 930
 931             Remark [ls/hh]: checking performance indicated that this switch was the
 932             cause of many branch prediction errors but changing it to:
 933
 934                 c = istate + cur_cmd;
 935                 if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
 936                     return true;
 937                 } else if (c >= new_line) {
 938                     switch (c) {
 939                     }
 940                 } else if (c >= skip_blanks) {
 941                     switch (c) {
 942                     }
 943                 } else if (c >= mid_line) {
 944                     switch (c) {
 945                     }
 946                 } else {
 947                     istate = mid_line;
 948                     return true;
 949                 }
 950
 951             gives as many prediction errors. So, we can indeed assume that the compiler
 952             does the right job, or that there is simply no other way.
 953         */
 954
 955         switch (istate + cur_cmd) {
 956             case mid_line + ignore_cmd:
 957             case skip_blanks + ignore_cmd:
 958             case new_line + ignore_cmd:
 959             case skip_blanks + spacer_cmd:
 960             case new_line + spacer_cmd:
 961                 /* Cases where character is ignored */
 962                 goto SWITCH;
 963                 break;
 964             case mid_line + escape_cmd:
 965             case new_line + escape_cmd:
 966             case skip_blanks + escape_cmd:
 967                 /* Scan a control sequence ...; */
 968                 istate = (unsigned char) scan_control_sequence();
 969                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
 970                     check_outer_validity();
 971                 break;
 972             case mid_line + active_char_cmd:
 973             case new_line + active_char_cmd:
 974             case skip_blanks + active_char_cmd:
 975                 /* Process an active-character  */
 976                 cur_cs = active_to_cs(cur_chr, false);
 977                 cur_cmd = eq_type(cur_cs);
 978                 cur_chr = equiv(cur_cs);
 979                 istate = mid_line;
 980                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
 981                     check_outer_validity();
 982                 break;
 983             case mid_line + sup_mark_cmd:
 984             case new_line + sup_mark_cmd:
 985             case skip_blanks + sup_mark_cmd:
 986                 /* If this |sup_mark| starts */
 987                 if (process_sup_mark())
 988                     goto RESWITCH;
 989                 else
 990                     istate = mid_line;
 991                 break;
 992             case mid_line + invalid_char_cmd:
 993             case new_line + invalid_char_cmd:
 994             case skip_blanks + invalid_char_cmd:
 995                 /* Decry the invalid character and |goto restart|; */
 996                 invalid_character_error();
 997                 return false; /* because state may be |token_list| now */
 998                 break;
 999             case mid_line + spacer_cmd:
1000                 /* Enter |skip_blanks| state, emit a space; */
1001                 istate = skip_blanks;
1002                 cur_chr = ' ';
1003                 break;
1004             case mid_line + car_ret_cmd:
1005                 /*
1006                     Finish line, emit a space. When a character of type |spacer| gets through, its
1007                     character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1008                     for tab and space, and for the space inserted at the end of a line, will be
1009                     treated alike when macro parameters are being matched. We do this since such
1010                     characters are indistinguishable on most computer terminal displays.
1011                  */
1012                 iloc = ilimit + 1;
1013                 cur_cmd = spacer_cmd;
1014                 cur_chr = ' ';
1015                 break;
1016             case skip_blanks + car_ret_cmd:
1017             case mid_line + comment_cmd:
1018             case new_line + comment_cmd:
1019             case skip_blanks + comment_cmd:
1020                 /* Finish line, |goto switch|; */
1021                 iloc = ilimit + 1;
1022                 goto SWITCH;
1023                 break;
1024             case new_line + car_ret_cmd:
1025                 /* Finish line, emit a \.{\\par}; */
1026                 iloc = ilimit + 1;
1027                 cur_cs = par_loc;
1028                 cur_cmd = eq_type(cur_cs);
1029                 cur_chr = equiv(cur_cs);
1030                 if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1031                     check_outer_validity();
1032                 break;
1033             case skip_blanks + left_brace_cmd:
1034             case new_line + left_brace_cmd:
1035                 istate = mid_line;
1036                 /* fall through */
1037             case mid_line + left_brace_cmd:
1038                 align_state++;
1039                 break;
1040             case skip_blanks + right_brace_cmd:
1041             case new_line + right_brace_cmd:
1042                 istate = mid_line;
1043                 /* fall through */
1044             case mid_line + right_brace_cmd:
1045                 align_state--;
1046                 break;
1047             case mid_line + math_shift_cmd:
1048             case mid_line + tab_mark_cmd:
1049             case mid_line + mac_param_cmd:
1050             case mid_line + sub_mark_cmd:
1051             case mid_line + letter_cmd:
1052             case mid_line + other_char_cmd:
1053                 break;
1054             /*
1055             case skip_blanks + math_shift:
1056             case skip_blanks + tab_mark:
1057             case skip_blanks + mac_param:
1058             case skip_blanks + sub_mark:
1059             case skip_blanks + letter:
1060             case skip_blanks + other_char:
1061             case new_line    + math_shift:
1062             case new_line    + tab_mark:
1063             case new_line    + mac_param:
1064             case new_line    + sub_mark:
1065             case new_line    + letter:
1066             case new_line    + other_char:
1067             */
1068             default:
1069                 istate = mid_line;
1070                 break;
1071         }
1072     } else {
1073         if (iname != 21)
1074             istate = new_line;
1075         /*
1076            Move to next line of file,
1077            or |goto restart| if there is no next line,
1078            or |return| if a \.{\\read} line has finished;
1079          */
1080         do {
1081             next_line_retval r = next_line();
1082             if (r == next_line_return) {
1083                 return true;
1084             } else if (r == next_line_restart) {
1085                 return false;
1086             }
1087         } while (0);
1088         check_interrupt();
1089         goto SWITCH;
1090     }
1091     return true;
1092 }
1093
1094 #else
1095
1096 /* 10 times less Bim in callgrind */
1097
1098 /*
1099     escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1100     tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1101     sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1102     other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1103 */
1104
1105 static boolean get_next_file(void)
1106 {
1107     int c = 0;
1108   SWITCH:
1109     if (iloc <= ilimit) {
1110         /* current line not yet finished */
1111         do_buffer_to_unichar(cur_chr, iloc);
1112       RESWITCH:
1113         if (detokenized_line()) {
1114             cur_cmd = (cur_chr == ' ' ? 10 : 12);
1115         } else {
1116             do_get_cat_code(cur_cmd, cur_chr);
1117         }
1118         /*
1119            Change state if necessary, and |goto switch| if the current
1120            character should be ignored, or |goto reswitch| if the current
1121            character changes to another;
1122         */
1123         c = istate + cur_cmd;
1124         if (c == (mid_line + letter_cmd) || c == (mid_line + other_char_cmd)) {
1125             return true;
1126         } else if (c >= new_line) {
1127             switch (c-new_line) {
1128                 case escape_cmd:
1129                     istate = (unsigned char) scan_control_sequence();
1130                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1131                         check_outer_validity();
1132                     return true;
1133                 case left_brace_cmd:
1134                     istate = mid_line;
1135                     align_state++;
1136                     return true;
1137                 case right_brace_cmd:
1138                     istate = mid_line;
1139                     align_state--;
1140                     return true;
1141                 case math_shift_cmd:
1142                     istate = mid_line;
1143                     return true;
1144                 case tab_mark_cmd:
1145                     istate = mid_line;
1146                     return true;
1147                 case car_ret_cmd:
1148                     /* Finish line, emit a \.{\\par}; */
1149                     iloc = ilimit + 1;
1150                     cur_cs = par_loc;
1151                     cur_cmd = eq_type(cur_cs);
1152                     cur_chr = equiv(cur_cs);
1153                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1154                         check_outer_validity();
1155                     return true;
1156                 case mac_param_cmd:
1157                     istate = mid_line;
1158                     return true;
1159                 case sup_mark_cmd:
1160                     if (process_sup_mark())
1161                         goto RESWITCH;
1162                     else
1163                         istate = mid_line;
1164                     return true;
1165                 case sub_mark_cmd:
1166                     istate = mid_line;
1167                     return true;
1168                 case ignore_cmd:
1169                     goto SWITCH;
1170                     return true;
1171                 case spacer_cmd:
1172                     /* Cases where character is ignored */
1173                     goto SWITCH;
1174                 case letter_cmd:
1175                     istate = mid_line;
1176                     return true;
1177                 case other_char_cmd:
1178                     istate = mid_line;
1179                     return true;
1180                 case active_char_cmd:
1181                     cur_cs = active_to_cs(cur_chr, false);
1182                     cur_cmd = eq_type(cur_cs);
1183                     cur_chr = equiv(cur_cs);
1184                     istate = mid_line;
1185                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1186                         check_outer_validity();
1187                     return true;
1188                 case comment_cmd:
1189                     iloc = ilimit + 1;
1190                     goto SWITCH;
1191                 case invalid_char_cmd:
1192                     invalid_character_error();
1193                     return false; /* because state may be |token_list| now */
1194                 default:
1195                     istate = mid_line;
1196                     return true;
1197             }
1198         } else if (c >= skip_blanks) {
1199             switch (c-skip_blanks) {
1200                 case escape_cmd:
1201                     /* Scan a control sequence ...; */
1202                     istate = (unsigned char) scan_control_sequence();
1203                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1204                         check_outer_validity();
1205                     return true;
1206                 case left_brace_cmd:
1207                     istate = mid_line;
1208                     align_state++;
1209                     return true;
1210                 case right_brace_cmd:
1211                     istate = mid_line;
1212                     align_state--;
1213                     return true;
1214                 case math_shift_cmd:
1215                     istate = mid_line;
1216                     return true;
1217                 case tab_mark_cmd:
1218                     istate = mid_line;
1219                     return true;
1220                 case car_ret_cmd:
1221                     iloc = ilimit + 1;
1222                     goto SWITCH;
1223                 case mac_param_cmd:
1224                     istate = mid_line;
1225                     return true;
1226                 case sup_mark_cmd:
1227                     /* If this |sup_mark| starts */
1228                     if (process_sup_mark())
1229                         goto RESWITCH;
1230                     else
1231                         istate = mid_line;
1232                     return true;
1233                 case sub_mark_cmd:
1234                     istate = mid_line;
1235                     return true;
1236                 case ignore_cmd:
1237                     goto SWITCH;
1238                 case spacer_cmd:
1239                     goto SWITCH;
1240                 case letter_cmd:
1241                     istate = mid_line;
1242                     return true;
1243                 case other_char_cmd:
1244                     istate = mid_line;
1245                     return true;
1246                 case active_char_cmd:
1247                     cur_cs = active_to_cs(cur_chr, false);
1248                     cur_cmd = eq_type(cur_cs);
1249                     cur_chr = equiv(cur_cs);
1250                     istate = mid_line;
1251                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1252                         check_outer_validity();
1253                     return true;
1254                 case comment_cmd:
1255                     /* Finish line, |goto switch|; */
1256                     iloc = ilimit + 1;
1257                     goto SWITCH;
1258                 case invalid_char_cmd:
1259                     /* Decry the invalid character and |goto restart|; */
1260                     invalid_character_error();
1261                     return false; /* because state may be |token_list| now */
1262                 default:
1263                     istate = mid_line;
1264                     return true;
1265             }
1266         } else if (c >= mid_line) {
1267             switch (c-mid_line) {
1268                 case escape_cmd:
1269                     istate = (unsigned char) scan_control_sequence();
1270                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1271                         check_outer_validity();
1272                     return true;
1273                 case left_brace_cmd:
1274                     align_state++;
1275                     return true;
1276                 case right_brace_cmd:
1277                     align_state--;
1278                     return true;
1279                 case math_shift_cmd:
1280                     return true;
1281                 case tab_mark_cmd:
1282                     return true;
1283                 case car_ret_cmd:
1284                     /*
1285                         Finish line, emit a space. When a character of type |spacer| gets through, its
1286                         character code is changed to $\.{"\ "}=040$. This means that the ASCII codes
1287                         for tab and space, and for the space inserted at the end of a line, will be
1288                         treated alike when macro parameters are being matched. We do this since such
1289                         characters are indistinguishable on most computer terminal displays.
1290                      */
1291                     iloc = ilimit + 1;
1292                     cur_cmd = spacer_cmd;
1293                     cur_chr = ' ';
1294                     return true;
1295                 case mac_param_cmd:
1296                     return true;
1297                 case sup_mark_cmd:
1298                     if (process_sup_mark())
1299                         goto RESWITCH;
1300                     else
1301                         istate = mid_line;
1302                     return true;
1303                 case sub_mark_cmd:
1304                     return true;
1305                 case ignore_cmd:
1306                     goto SWITCH;
1307                 case spacer_cmd:
1308                     /* Enter |skip_blanks| state, emit a space; */
1309                     istate = skip_blanks;
1310                     cur_chr = ' ';
1311                     return true;
1312                 case letter_cmd:
1313                     istate = mid_line;
1314                     return true;
1315                 case other_char_cmd:
1316                     istate = mid_line;
1317                     return true;
1318                 case active_char_cmd:
1319                     cur_cs = active_to_cs(cur_chr, false);
1320                     cur_cmd = eq_type(cur_cs);
1321                     cur_chr = equiv(cur_cs);
1322                     istate = mid_line;
1323                     if (! suppress_outer_error && cur_cmd >= outer_call_cmd)
1324                         check_outer_validity();
1325                     return true;
1326                 case comment_cmd:
1327                     iloc = ilimit + 1;
1328                     goto SWITCH;
1329                 case invalid_char_cmd:
1330                     invalid_character_error();
1331                     return false; /* because state may be |token_list| now */
1332                 default:
1333                     istate = mid_line;
1334                     return true;
1335             }
1336         } else {
1337             istate = mid_line;
1338             return true;
1339         }
1340     } else {
1341         if (iname != 21) {
1342             istate = new_line;
1343         }
1344         /*
1345            Move to next line of file, or |goto restart| if there is no next line,
1346            or |return| if a \.{\\read} line has finished;
1347         */
1348         do {
1349             next_line_retval r = next_line();
1350             if (r == next_line_return) {
1351                 return true;
1352             } else if (r == next_line_restart) {
1353                 return false;
1354             }
1355         } while (0);
1356         check_interrupt();
1357         goto SWITCH;
1358     }
1359     return true;
1360 }
1361
1362 #endif
1363
1364 @ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit.
1365 We only support a limited set:
1366
1367 ^^^^^^XXXXXX
1368 ^^^^XXXXXX
1369 ^^XX ^^<char>
1370
1371 @c
1372
1373 #define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f'))
1374
1375 #define add_nybble(c) \
1376     if (c<='9') { \
1377         cur_chr=(cur_chr<<4)+c-'0'; \
1378     } else { \
1379         cur_chr=(cur_chr<<4)+c-'a'+10; \
1380     }
1381
1382 #define set_nybble(c) \
1383     if (c<='9') { \
1384         cur_chr=c-'0'; \
1385     } else { \
1386         cur_chr=c-'a'+10; \
1387     }
1388
1389 #define one_hex_to_cur_chr(c1) \
1390     set_nybble(c1);
1391
1392 #define two_hex_to_cur_chr(c1,c2) \
1393     set_nybble(c1); \
1394     add_nybble(c2);
1395
1396 #define four_hex_to_cur_chr(c1,c2,c3,c4) \
1397     two_hex_to_cur_chr(c1,c2); \
1398     add_nybble(c3); \
1399     add_nybble(c4);
1400
1401 #define six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6) \
1402     four_hex_to_cur_chr(c1,c2,c3,c4); \
1403     add_nybble(c5); \
1404     add_nybble(c6);
1405
1406 static boolean process_sup_mark(void)
1407 {
1408     if (cur_chr == buffer[iloc]) {
1409         if (iloc < ilimit) {
1410             if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2])) {
1411                 if ((cur_chr == buffer[iloc + 3]) && (cur_chr == buffer[iloc + 4])) {
1412                     /* ^^^^^^XXXXXX */
1413                     if ((iloc + 10) <= ilimit) {
1414                         int c1 = buffer[iloc +  5];
1415                         int c2 = buffer[iloc +  6];
1416                         int c3 = buffer[iloc +  7];
1417                         int c4 = buffer[iloc +  8];
1418                         int c5 = buffer[iloc +  9];
1419                         int c6 = buffer[iloc + 10];
1420                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) &&
1421                             is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1422                             iloc = iloc + 11;
1423                             six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1424                             return true;
1425                         } else {
1426                             tex_error("^^^^^^ needs six hex digits", NULL);
1427                         }
1428                     } else {
1429                         tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1430                     }
1431                 } else {
1432                     /* ^^^^XXXX */
1433                     if ((iloc + 6) <= ilimit) {
1434                         int c1 = buffer[iloc + 3];
1435                         int c2 = buffer[iloc + 4];
1436                         int c3 = buffer[iloc + 5];
1437                         int c4 = buffer[iloc + 6];
1438                         if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1439                             iloc = iloc + 7;
1440                             four_hex_to_cur_chr(c1,c2,c3,c4);
1441                             return true;
1442                         } else {
1443                             tex_error("^^^^ needs four hex digits", NULL);
1444                         }
1445                     } else {
1446                         tex_error("^^^^ needs four hex digits, end of input", NULL);
1447                     }
1448                 }
1449             } else {
1450                 /* ^^XX */
1451                 if ((iloc + 2) <= ilimit) {
1452                     int c1 = buffer[iloc + 1];
1453                     int c2 = buffer[iloc + 2];
1454                     if (is_hex(c1) && is_hex(c2)) {
1455                         iloc = iloc + 3;
1456                         two_hex_to_cur_chr(c1,c2);
1457                         return true;
1458                     }
1459                 }
1460                 /* go on, no error, good old tex */
1461             }
1462         }
1463         /* the rest */
1464         {
1465             int c1 = buffer[iloc + 1];
1466             if (c1 < 0200) {
1467                 iloc = iloc + 2;
1468                 if (is_hex(c1) && (iloc <= ilimit)) {
1469                     int c2 = buffer[iloc];
1470                     if (is_hex(c2)) {
1471                         incr(iloc);
1472                         two_hex_to_cur_chr(c1,c2);
1473                         return true;
1474                     }
1475                 }
1476                 cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1477                 return true;
1478             }
1479         }
1480     }
1481     return false;
1482 }
1483
1484 @ Control sequence names are scanned only when they appear in some line of a
1485 file; once they have been scanned the first time, their |eqtb| location serves as
1486 a unique identification, so \TeX\ doesn't need to refer to the original name any
1487 more except when it prints the equivalent in symbolic form.
1488
1489 The program that scans a control sequence has been written carefully in order to
1490 avoid the blowups that might otherwise occur if a malicious user tried something
1491 like `\.{\\catcode\'15=0}'. The algorithm might look at |buffer[ilimit+1]|, but
1492 it never looks at |buffer[ilimit+2]|.
1493
1494 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' appear in or just
1495 following a control sequence name, they are converted to single characters in the
1496 buffer and the process is repeated, slowly but surely.
1497
1498 @c
1499 static boolean check_expanded_code(int *kk);    /* below */
1500
1501 static int scan_control_sequence(void)
1502 {
1503     int retval = mid_line;
1504     if (iloc > ilimit) {
1505         cur_cs = null_cs;       /* |state| is irrelevant in this case */
1506     } else {
1507         register int cat;       /* |cat_code(cur_chr)|, usually */
1508         while (1) {
1509             int k = iloc;
1510             do_buffer_to_unichar(cur_chr, k);
1511             do_get_cat_code(cat, cur_chr);
1512             if (cat != letter_cmd || k > ilimit) {
1513                 retval = (cat == spacer_cmd ? skip_blanks : mid_line);
1514                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1515                     continue;
1516             } else {
1517                 retval = skip_blanks;
1518                 do {
1519                     do_buffer_to_unichar(cur_chr, k);
1520                     do_get_cat_code(cat, cur_chr);
1521                 } while (cat == letter_cmd && k <= ilimit);
1522
1523                 if (cat == sup_mark_cmd && check_expanded_code(&k))     /* If an expanded...; */
1524                     continue;
1525                 if (cat != letter_cmd) {
1526                     /* backtrack one character which can be utf */
1527                     /*
1528                     decr(k);
1529                     if (cur_chr > 0xFFFF)
1530                         decr(k);
1531                     if (cur_chr > 0x7FF)
1532                         decr(k);
1533                     if (cur_chr > 0x7F)
1534                         decr(k);
1535                     */
1536                     if (cur_chr <= 0x7F) {
1537                         k -= 1; /* in most cases */
1538                     } else if (cur_chr > 0xFFFF) {
1539                         k -= 4;
1540                     } else if (cur_chr > 0x7FF) {
1541                         k -= 3;
1542                     } else /* if (cur_chr > 0x7F) */ {
1543                         k -= 2;
1544                     }
1545                     /* now |k| points to first nonletter */
1546                 }
1547             }
1548             cur_cs = id_lookup(iloc, k - iloc);
1549             iloc = k;
1550             break;
1551         }
1552     }
1553     cur_cmd = eq_type(cur_cs);
1554     cur_chr = equiv(cur_cs);
1555     return retval;
1556 }
1557
1558 @ Whenever we reach the following piece of code, we will have
1559 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and
1560 |cat=get_cat_code(cat_code_table,cur_chr)|. If an expanded code like \.{\^\^A} or
1561 \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| or |buffer[(k-1)..(k+2)]|, we will
1562 store the corresponding code in |buffer[k-1]| and shift the rest of the buffer
1563 left two or three places.
1564
1565 @c
1566 static boolean check_expanded_code(int *kk)
1567 {
1568     int l;
1569     int k = *kk;
1570     int d = 1;
1571     if (buffer[k] == cur_chr && k < ilimit) {
1572         if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2])) {
1573             if ((cur_chr == buffer[k + 3]) && (cur_chr == buffer[k + 4])) {
1574                 if ((k + 10) <= ilimit) {
1575                     int c1 = buffer[k + 6 - 1];
1576                     int c2 = buffer[k + 6];
1577                     int c3 = buffer[k + 6 + 1];
1578                     int c4 = buffer[k + 6 + 2];
1579                     int c5 = buffer[k + 6 + 3];
1580                     int c6 = buffer[k + 6 + 4];
1581                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1582                         d = 6;
1583                         six_hex_to_cur_chr(c1,c2,c3,c4,c5,c6);
1584                     } else {
1585                         tex_error("^^^^^^ needs six hex digits", NULL);
1586                     }
1587                 } else {
1588                     tex_error("^^^^^^ needs six hex digits, end of input", NULL);
1589                 }
1590             } else {
1591                 if ((k + 6) <= ilimit) {
1592                     int c1 = buffer[k + 4 - 1];
1593                     int c2 = buffer[k + 4];
1594                     int c3 = buffer[k + 4 + 1];
1595                     int c4 = buffer[k + 4 + 2];
1596                     if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1597                         d = 4;
1598                         four_hex_to_cur_chr(c1,c2,c3,c4);
1599                     } else {
1600                         tex_error("^^^^ needs four hex digits", NULL);
1601                     }
1602                 } else {
1603                     tex_error("^^^^ needs four hex digits, end of input", NULL);
1604                 }
1605             }
1606         } else {
1607             int c1 = buffer[k + 1];
1608             if (c1 < 0200) {
1609                 d = 1;
1610                 if (is_hex(c1) && (k + 2) <= ilimit) {
1611                     int c2 = buffer[k + 2];
1612                     if (is_hex(c2)) {
1613                         d = 2;
1614                         two_hex_to_cur_chr(c1,c2);
1615                     } else {
1616                         cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1617                     }
1618                 } else {
1619                     cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100);
1620                 }
1621             }
1622         }
1623         if (d > 2)
1624             d = 2 * d - 1;
1625         else
1626             d++;
1627         if (cur_chr <= 0x7F) {
1628             buffer[k - 1] = (packed_ASCII_code) cur_chr;
1629         } else if (cur_chr <= 0x7FF) {
1630             buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40);
1631             k++;
1632             d--;
1633             buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40);
1634         } else if (cur_chr <= 0xFFFF) {
1635             buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000);
1636             k++;
1637             d--;
1638             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40);
1639             k++;
1640             d--;
1641             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40);
1642         } else {
1643             buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000);
1644             k++;
1645             d--;
1646             buffer[k - 1] = (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000);
1647             k++;
1648             d--;
1649             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) / 0x40);
1650             k++;
1651             d--;
1652             buffer[k - 1] = (packed_ASCII_code) (0x80 + ((cur_chr % 0x40000) % 0x1000) % 0x40);
1653         }
1654         l = k;
1655         ilimit = ilimit - d;
1656         while (l <= ilimit) {
1657             buffer[l] = buffer[l + d];
1658             l++;
1659         }
1660         *kk = k;
1661         return true;
1662     }
1663     return false;
1664 }
1665
1666 @ All of the easy branches of |get_next| have now been taken care of. There is
1667 one more branch.
1668
1669 @c static next_line_retval next_line(void)
1670 {
1671     boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */
1672     if (iname > 17) {
1673         /* Read next line of file into |buffer|, or |goto restart| if the file has ended */
1674         incr(line);
1675         first = istart;
1676         if (!force_eof) {
1677             if (iname <= 20) {
1678                 if (pseudo_input()) {   /* not end of file */
1679                     firm_up_the_line(); /* this sets |ilimit| */
1680                     line_catcode_table = DEFAULT_CAT_TABLE;
1681                     if ((iname == 19) && (pseudo_lines(pseudo_files) == null))
1682                         inhibit_eol = true;
1683                 } else if ((every_eof != null) && !eof_seen[iindex]) {
1684                     ilimit = first - 1;
1685                     eof_seen[iindex] = true; /* fake one empty line */
1686                     if (iname != 19)
1687                         begin_token_list(every_eof, every_eof_text);
1688                     return next_line_restart;
1689                 } else {
1690                     force_eof = true;
1691                 }
1692             } else {
1693                 if (iname == 21) {
1694                     if (luacstring_input()) { /* not end of strings  */
1695                         firm_up_the_line();
1696                         line_catcode_table = (short) luacstring_cattable();
1697                         line_partial = (signed char) luacstring_partial();
1698                         if (luacstring_final_line() || line_partial
1699                             || line_catcode_table == NO_CAT_TABLE)
1700                             inhibit_eol = true;
1701                         if (!line_partial)
1702                             istate = new_line;
1703                     } else {
1704                         force_eof = true;
1705                     }
1706                 } else {
1707                     if (lua_input_ln(cur_file, 0, true)) { /* not end of file */
1708                         firm_up_the_line(); /* this sets |ilimit| */
1709                         line_catcode_table = DEFAULT_CAT_TABLE;
1710                     } else if ((every_eof != null) && (!eof_seen[iindex])) {
1711                         ilimit = first - 1;
1712                         eof_seen[iindex] = true; /* fake one empty line */
1713                         begin_token_list(every_eof, every_eof_text);
1714                         return next_line_restart;
1715                     } else {
1716                         force_eof = true;
1717                     }
1718                 }
1719             }
1720         }
1721         if (force_eof) {
1722             if (tracing_nesting > 0)
1723                 if ((grp_stack[in_open] != cur_boundary) || (if_stack[in_open] != cond_ptr))
1724                     if (!((iname == 19) || (iname == 21))) {
1725                         /* give warning for some unfinished groups and/or conditionals */
1726                         file_warning();
1727                     }
1728             if ((iname > 21) || (iname == 20)) {
1729                 report_stop_file(filetype_tex);
1730                 decr(open_parens);
1731             }
1732             force_eof = false;
1733             /* lua input or \.{\\scantextokens} */
1734             if (iname == 21 || iname == 19) {
1735                 end_file_reading();
1736             } else {
1737                 end_file_reading();
1738                 if (! suppress_outer_error)
1739                     check_outer_validity();
1740             }
1741             return next_line_restart;
1742         }
1743         if (inhibit_eol || end_line_char_inactive)
1744             ilimit--;
1745         else
1746             buffer[ilimit] = (packed_ASCII_code) end_line_char;
1747         first = ilimit + 1;
1748         iloc = istart; /* ready to read */
1749     } else {
1750         if (!terminal_input) {
1751             /* \.{\\read} line has ended */
1752             cur_cmd = 0;
1753             cur_chr = 0;
1754             return next_line_return;    /* OUTER */
1755         }
1756         if (input_ptr > 0) {
1757             /* text was inserted during error recovery */
1758             end_file_reading();
1759             return next_line_restart; /* resume previous level */
1760         }
1761         if (selector < log_only)
1762             open_log_file();
1763         if (interaction > nonstop_mode) {
1764             if (end_line_char_inactive)
1765                 ilimit++;
1766             if (ilimit == istart) {
1767                 /* previous line was empty */
1768                 tprint_nl("(Please type a command or say `\\end')");
1769             }
1770             print_ln();
1771             first = istart;
1772             prompt_input("*"); /* input on-line into |buffer| */
1773             ilimit = last;
1774             if (end_line_char_inactive)
1775                 ilimit--;
1776             else
1777                 buffer[ilimit] = (packed_ASCII_code) end_line_char;
1778             first = ilimit + 1;
1779             iloc = istart;
1780         } else {
1781             /*
1782                 Nonstop mode, which is intended for overnight batch processing,
1783                 never waits for on-line input.
1784             */
1785             fatal_error("*** (job aborted, no legal \\end found)");
1786         }
1787     }
1788     return next_line_ok;
1789 }
1790
1791 @ Let's consider now what happens when |get_next| is looking at a token list.
1792
1793 @c
1794 static boolean get_next_tokenlist(void)
1795 {
1796     register halfword t = token_info(iloc);
1797     iloc = token_link(iloc); /* move to next */
1798     if (t >= cs_token_flag) {
1799         /* a control sequence token */
1800         cur_cs = t - cs_token_flag;
1801         cur_cmd = eq_type(cur_cs);
1802         if (cur_cmd >= outer_call_cmd) {
1803             if (cur_cmd == dont_expand_cmd) {
1804                 /*
1805                     Get the next token, suppressing expansion. The present point in the program
1806                     is reached only when the |expand| routine has inserted a special marker into
1807                     the input. In this special case, |token_info(iloc)| is known to be a control
1808                     sequence token, and |token_link(iloc)=null|.
1809                 */
1810                 cur_cs = token_info(iloc) - cs_token_flag;
1811                 iloc = null;
1812                 cur_cmd = eq_type(cur_cs);
1813                 if (cur_cmd > max_command_cmd) {
1814                     cur_cmd = relax_cmd;
1815                     cur_chr = no_expand_flag;
1816                     return true;
1817                 }
1818             } else if (! suppress_outer_error) {
1819                 check_outer_validity();
1820             }
1821         }
1822         cur_chr = equiv(cur_cs);
1823     } else {
1824         cur_cmd = token_cmd(t);
1825         cur_chr = token_chr(t);
1826         switch (cur_cmd) {
1827             case left_brace_cmd:
1828                 align_state++;
1829                 break;
1830             case right_brace_cmd:
1831                 align_state--;
1832                 break;
1833             case out_param_cmd:
1834                 /* Insert macro parameter and |goto restart|; */
1835                 begin_token_list(param_stack[param_start + cur_chr - 1], parameter);
1836                 return false;
1837                 break;
1838         }
1839     }
1840     return true;
1841 }
1842
1843 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1844 routine are executed more often than any other instructions of \TeX.
1845 @^mastication@>@^inner loop@>
1846
1847 @ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token
1848
1849 @c
1850 void get_next(void)
1851 {
1852   RESTART:
1853     cur_cs = 0;
1854     if (istate != token_list) {
1855         /* Input from external file, |goto restart| if no input found */
1856         if (!get_next_file())
1857             goto RESTART;
1858     } else {
1859         if (iloc == null) {
1860             end_token_list();
1861             goto RESTART;       /* list exhausted, resume previous level */
1862         } else if (!get_next_tokenlist()) {
1863             goto RESTART;       /* parameter needs to be expanded */
1864         }
1865     }
1866     /* If an alignment entry has just ended, take appropriate action */
1867     if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) {
1868         insert_vj_template();
1869         goto RESTART;
1870     }
1871 }
1872
1873 @ Since |get_next| is used so frequently in \TeX, it is convenient to define
1874 three related procedures that do a little more:
1875
1876 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it also sets
1877 |cur_tok|, a packed halfword version of the current token.
1878
1879 \yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like |get_token|,
1880 but if the current token turns out to be a user-defined control sequence (i.e., a
1881 macro call), or a conditional, or something like \.{\\topmark} or
1882 \.{\\expandafter} or \.{\\csname}, it is eliminated from the input by beginning
1883 the expansion of the macro or the evaluation of the conditional.
1884
1885 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1886 has already been called.
1887
1888 \yskip\noindent In fact, these three procedures account for almost every use of
1889 |get_next|.
1890
1891 No new control sequences will be defined except during a call of |get_token|, or
1892 when \.{\\csname} compresses a token list, because |no_new_control_sequence| is
1893 always |true| at other times.
1894
1895 @ sets |cur_cmd|, |cur_chr|, |cur_tok|
1896
1897 @c
1898 void get_token(void)
1899 {
1900     no_new_control_sequence = false;
1901     get_next();
1902     no_new_control_sequence = true;
1903     if (cur_cs == 0)
1904         cur_tok = token_val(cur_cmd, cur_chr);
1905     else
1906         cur_tok = cs_token_flag + cur_cs;
1907 }
1908
1909 @ changes the string |s| to a token list
1910
1911 @c
1912 halfword string_to_toks(const char *ss)
1913 {
1914     halfword p; /* tail of the token list */
1915     halfword q; /* new node being added to the token list via |store_new_token| */
1916     halfword t; /* token being appended */
1917     const char *s = ss;
1918     const char *se = ss + strlen(s);
1919     p = temp_token_head;
1920     set_token_link(p, null);
1921     while (s < se) {
1922         t = (halfword) str2uni((const unsigned char *) s);
1923         s += utf8_size(t);
1924         if (t == ' ')
1925             t = space_token;
1926         else
1927             t = other_token + t;
1928         fast_store_new_token(t);
1929     }
1930     return token_link(temp_token_head);
1931 }
1932
1933 @ The token lists for macros and for other things like \.{\\mark} and
1934 \.{\\output} and \.{\\write} are produced by a procedure called |scan_toks|.
1935
1936 Before we get into the details of |scan_toks|, let's consider a much simpler
1937 task, that of converting the current string into a token list. The |str_toks|
1938 function does this; it classifies spaces as type |spacer| and everything else as
1939 type |other_char|.
1940
1941 The token list created by |str_toks| begins at |link(temp_token_head)| and ends
1942 at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.)
1943
1944 |lua_str_toks| is almost identical, but it also escapes the three symbols that
1945 |lua| considers special while scanning a literal string
1946
1947 @ changes the string |str_pool[b..pool_ptr]| to a token list
1948
1949 @c
1950 halfword lua_str_toks(lstring b)
1951 {
1952     halfword p;       /* tail of the token list */
1953     halfword q;       /* new node being added to the token list via |store_new_token| */
1954     halfword t;       /* token being appended */
1955     unsigned char *k; /* index into string */
1956     p = temp_token_head;
1957     set_token_link(p, null);
1958     k = (unsigned char *) b.s;
1959     while (k < (unsigned char *) b.s + b.l) {
1960         t = pool_to_unichar(k);
1961         k += utf8_size(t);
1962         if (t == ' ') {
1963             t = space_token;
1964         } else {
1965             if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13))
1966                 fast_store_new_token(other_token + '\\');
1967             if (t == 10)
1968                 t = 'n';
1969             if (t == 13)
1970                 t = 'r';
1971             t = other_token + t;
1972         }
1973         fast_store_new_token(t);
1974     }
1975     return p;
1976 }
1977
1978 @ Incidentally, the main reason for wanting |str_toks| is the function
1979 |the_toks|, which has similar input/output characteristics.
1980
1981 @ changes the string |str_pool[b..pool_ptr]| to a token list
1982
1983 @c
1984 halfword str_toks(lstring s)
1985 {
1986     halfword p;           /* tail of the token list */
1987     halfword q;           /* new node being added to the token list via |store_new_token| */
1988     halfword t;           /* token being appended */
1989     unsigned char *k, *l; /* index into string */
1990     p = temp_token_head;
1991     set_token_link(p, null);
1992     k = s.s;
1993     l = k + s.l;
1994     while (k < l) {
1995         t = pool_to_unichar(k);
1996         k += utf8_size(t);
1997         if (t == ' ')
1998             t = space_token;
1999         else
2000             t = other_token + t;
2001         fast_store_new_token(t);
2002     }
2003     return p;
2004 }
2005
2006 /*
2007     hh: most of the converter is similar to the one i made for macro so at some point i
2008     can make a helper; also todo: there is no need to go through the pool
2009
2010 */
2011
2012 halfword str_scan_toks(int ct, lstring s)
2013 {                         /* changes the string |str_pool[b..pool_ptr]| to a token list */
2014     halfword p;           /* tail of the token list */
2015     halfword q;           /* new node being added to the token list via |store_new_token| */
2016     halfword t;           /* token being appended */
2017     unsigned char *k, *l; /* index into string */
2018     int cc;
2019     p = temp_token_head;
2020     set_token_link(p, null);
2021     k = s.s;
2022     l = k + s.l;
2023     while (k < l) {
2024         t = pool_to_unichar(k);
2025         k += utf8_size(t);
2026         cc = get_cat_code(ct,t);
2027             if (cc == 0) {
2028                 /* we have a potential control sequence so we check for it */
2029                 int _lname = 0 ;
2030                 int _s = 0 ;
2031                 int _c = 0 ;
2032                 halfword _cs = null ;
2033                 unsigned char *_name  = k ;
2034                 while (k < l) {
2035                     t = (halfword) str2uni((const unsigned char *) k);
2036                     _s = utf8_size(t);
2037                     _c = get_cat_code(ct,t);
2038                     if (_c == 11) {
2039                         k += _s ;
2040                         _lname = _lname + _s ;
2041                     } else if (_c == 10) {
2042                         /* we ignore a trailing space like normal scanning does */
2043                         k += _s ;
2044                         break ;
2045                     } else {
2046                         break ;
2047                     }
2048                 }
2049                 if (_s > 0) {
2050                     /* we have a potential \cs */
2051                     _cs = string_lookup((const char *) _name, _lname);
2052                     if (_cs == undefined_control_sequence) {
2053                         /* let's play safe and backtrack */
2054                         t = cc * (1<<21) + t ;
2055                         k = _name ;
2056                     } else {
2057                         t = cs_token_flag + _cs;
2058                     }
2059                 } else {
2060                     /* just a character with some meaning, so \unknown becomes effectively */
2061                     /* \\unknown assuming that \\ has some useful meaning of course        */
2062                     t = cc * (1<<21) + t ;
2063                     k = _name ;
2064                 }
2065
2066             } else {
2067                 /* whatever token, so for instance $x^2$ just works given a tex */
2068                 /* catcode regime */
2069                 t = cc * (1<<21) + t ;
2070             }
2071             fast_store_new_token(t);
2072
2073     }
2074     return p;
2075 }
2076
2077 @ Here's part of the |expand| subroutine that we are now ready to complete:
2078
2079 @c
2080 void ins_the_toks(void)
2081 {
2082     (void) the_toks();
2083     ins_list(token_link(temp_token_head));
2084 }
2085
2086 #define set_toks_register(n,t,g) { \
2087     int a = (g>0) ? 4 : 0; \
2088     halfword ref = get_avail();  \
2089     set_token_ref_count(ref, 0); \
2090     set_token_link(ref, token_link(t)); \
2091     define(n + toks_base, call_cmd, ref); \
2092 }
2093
2094 void combine_the_toks(int how)
2095 {
2096     halfword nt;
2097     get_x_token();
2098     /* target */
2099     if (cur_cmd == assign_toks_cmd) {
2100         nt = equiv(cur_cs) - toks_base;
2101         /* check range */
2102     } else {
2103         back_input();
2104         scan_int();
2105         nt = cur_val;
2106     }
2107     /* source */
2108     do {
2109         get_x_token();
2110     } while (cur_cmd == spacer_cmd);
2111     if (cur_cmd == left_brace_cmd) {
2112         halfword x, source;
2113         back_input();
2114         x = scan_toks(false,how > 1); /* expanded or not */
2115         source = def_ref;
2116         /* action */
2117         if (source != null) {
2118             halfword target = toks(nt);
2119             if (target == null) {
2120                 set_toks_register(nt,source,0);
2121             } else {
2122                 halfword s = token_link(source);
2123                 if (s != null) {
2124                     halfword t = token_link(target);
2125                     if (t == null) {
2126                         /* can this happen ? */
2127                         set_token_link(target, s);
2128                     } else if (odd(how)) {
2129                         /* prepend */
2130                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2131                             halfword p = temp_token_head;
2132                             halfword q;
2133                             set_token_link(p, s); /* s = head, x = tail */
2134                             p = x;
2135                             while (t != null) {
2136                                 fast_store_new_token(token_info(t));
2137                                 t = token_link(t);
2138                             }
2139                             set_toks_register(nt,temp_token_head,0);
2140                         } else {
2141                             set_token_link(x,t);
2142                             set_token_link(target,s);
2143                         }
2144                     } else {
2145                         /* append */
2146                         if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2147                             halfword p = temp_token_head;
2148                             halfword q;
2149                             set_token_link(p, null);
2150                             while (t != null) {
2151                                 fast_store_new_token(token_info(t));
2152                                 t = token_link(t);
2153                             }
2154                             set_token_link(p,s);
2155                             set_toks_register(nt,temp_token_head,0);
2156                         } else {
2157                             while (token_link(t) != null) {
2158                                 t = token_link(t);
2159                             }
2160                             set_token_link(t,s);
2161                         }
2162                     }
2163                 }
2164             }
2165         }
2166     } else {
2167         halfword source, ns;
2168         if (cur_cmd == assign_toks_cmd) {
2169             ns = equiv(cur_cs) - toks_base;
2170             /* check range */
2171         } else {
2172             back_input();
2173             scan_int();
2174             ns = cur_val;
2175         }
2176         /* action */
2177         source = toks(ns);
2178         if (source != null) {
2179             halfword target = toks(nt);
2180             if (target == null) {
2181                 equiv(toks_base+nt) = source;
2182                 equiv(toks_base+ns) = null;
2183             } else {
2184                 halfword s = token_link(source);
2185                 if (s != null) {
2186                     halfword t = token_link(target);
2187                     if (t == null) {
2188                         set_token_link(target, s);
2189                     } else if (odd(how)) {
2190                         /* prepend */
2191                         halfword x = s;
2192                         while (token_link(x) != null) {
2193                             x = token_link(x);
2194                         }
2195                         set_token_link(x,t);
2196                         set_token_link(target,s);
2197                     } else {
2198                         /* append */
2199                         while (token_link(t) != null) {
2200                             t = token_link(t);
2201                         }
2202                         set_token_link(t,s);
2203                     }
2204                      equiv(toks_base+ns) = null;
2205                 }
2206             }
2207         }
2208     }
2209 }
2210
2211 @ This routine, used in the next one, prints the job name, possibly modified by
2212 the |process_jobname| callback.
2213
2214 @c
2215 static void print_job_name(void)
2216 {
2217    if (job_name) {
2218       char *s, *ss; /* C strings for jobname before and after processing */
2219       int callback_id, lua_retval;
2220       s = (char*)str_string(job_name);
2221       callback_id = callback_defined(process_jobname_callback);
2222       if (callback_id > 0) {
2223         lua_retval = run_callback(callback_id, "S->S", s, &ss);
2224         if ((lua_retval == true) && (ss != NULL))
2225             s = ss;
2226       }
2227       tprint(s);
2228    } else {
2229       print(job_name);
2230    }
2231 }
2232
2233 @ Here is a routine that print the result of a convert command, using the
2234 argument |i|. It returns |false | if it does not know to print the code |c|. The
2235 function exists because lua code and tex code can both call it to convert
2236 something.
2237
2238 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2239 get the next non-blank non-relax non-call token.
2240
2241 @c
2242
2243 int scan_lua_state(void)
2244 {
2245     int sn = 0;
2246     do {
2247         get_x_token();
2248     } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2249     back_input();
2250     if (cur_cmd != left_brace_cmd) {
2251         if (scan_keyword("name")) {
2252             (void) scan_toks(false, true);
2253             sn = def_ref;
2254         } else {
2255             scan_register_num();
2256             if (get_lua_name(cur_val))
2257                 sn = (cur_val - 65536);
2258         }
2259     }
2260     return sn;
2261 }
2262
2263 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2264 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2265 to follow `\.{\\string}' and `\.{\\meaning}'.
2266
2267 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2268 pending string in its output. In order to save such a pending string, we have to
2269 create a temporary string that is destroyed immediately after.
2270
2271 @c
2272 #define push_selector { \
2273     old_setting = selector; \
2274     selector = new_string; \
2275 }
2276
2277 #define pop_selector { \
2278     selector = old_setting; \
2279 }
2280
2281 static int do_variable_dvi(halfword c)
2282 {
2283     return 0;
2284 }
2285
2286 #define do_variable_backend_int(i) \
2287     cur_cmd = assign_int_cmd; \
2288     cur_val = backend_int_base + i; \
2289     cur_tok = token_val(cur_cmd, cur_val); \
2290     back_input();
2291
2292 #define do_variable_backend_dimen(i) \
2293     cur_cmd = assign_dimen_cmd; \
2294     cur_val = backend_dimen_base + i; \
2295     cur_tok = token_val(cur_cmd, cur_val); \
2296     back_input();
2297
2298 #define do_variable_backend_toks(i) \
2299     cur_cmd = assign_toks_cmd; \
2300     cur_val = backend_toks_base + i ; \
2301     cur_tok = token_val(cur_cmd, cur_val); \
2302     back_input();
2303
2304 static int do_variable_pdf(halfword c)
2305 {
2306          if (scan_keyword("compresslevel"))       { do_variable_backend_int(c_pdf_compress_level); }
2307     else if (scan_keyword("decimaldigits"))       { do_variable_backend_int(c_pdf_decimal_digits); }
2308     else if (scan_keyword("imageresolution"))     { do_variable_backend_int(c_pdf_image_resolution); }
2309     else if (scan_keyword("pkresolution"))        { do_variable_backend_int(c_pdf_pk_resolution); }
2310     else if (scan_keyword("uniqueresname"))       { do_variable_backend_int(c_pdf_unique_resname); }
2311     else if (scan_keyword("minorversion"))        { do_variable_backend_int(c_pdf_minor_version); }
2312     else if (scan_keyword("pagebox"))             { do_variable_backend_int(c_pdf_pagebox); }
2313     else if (scan_keyword("inclusionerrorlevel")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2314     else if (scan_keyword("ignoreunknownimages")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2315     else if (scan_keyword("gamma"))               { do_variable_backend_int(c_pdf_gamma); }
2316     else if (scan_keyword("imageapplygamma"))     { do_variable_backend_int(c_pdf_image_apply_gamma); }
2317     else if (scan_keyword("imagegamma"))          { do_variable_backend_int(c_pdf_image_gamma); }
2318     else if (scan_keyword("imagehicolor"))        { do_variable_backend_int(c_pdf_image_hicolor); }
2319     else if (scan_keyword("imageaddfilename"))    { do_variable_backend_int(c_pdf_image_addfilename); }
2320     else if (scan_keyword("objcompresslevel"))    { do_variable_backend_int(c_pdf_objcompresslevel); }
2321     else if (scan_keyword("inclusioncopyfonts"))  { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2322     else if (scan_keyword("gentounicode"))        { do_variable_backend_int(c_pdf_gen_tounicode); }
2323     else if (scan_keyword("pkfixeddpi"))          { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2324
2325     else if (scan_keyword("horigin"))             { do_variable_backend_dimen(d_pdf_h_origin); }
2326     else if (scan_keyword("vorigin"))             { do_variable_backend_dimen(d_pdf_v_origin); }
2327     else if (scan_keyword("threadmargin"))        { do_variable_backend_dimen(d_pdf_thread_margin); }
2328     else if (scan_keyword("destmargin"))          { do_variable_backend_dimen(d_pdf_dest_margin); }
2329     else if (scan_keyword("linkmargin"))          { do_variable_backend_dimen(d_pdf_link_margin); }
2330     else if (scan_keyword("xformmargin"))         { do_variable_backend_dimen(d_pdf_xform_margin); }
2331
2332     else if (scan_keyword("pageattr"))            { do_variable_backend_toks(t_pdf_page_attr); }
2333     else if (scan_keyword("pageresources"))       { do_variable_backend_toks(t_pdf_page_resources); }
2334     else if (scan_keyword("pagesattr"))           { do_variable_backend_toks(t_pdf_pages_attr); }
2335     else if (scan_keyword("xformattr"))           { do_variable_backend_toks(t_pdf_xform_attr); }
2336     else if (scan_keyword("xformresources"))      { do_variable_backend_toks(t_pdf_xform_resources); }
2337     else if (scan_keyword("pkmode"))              { do_variable_backend_toks(t_pdf_pk_mode); }
2338
2339     else
2340         return 0;
2341     return 1;
2342 }
2343
2344 static int do_feedback_dvi(halfword c)
2345 {
2346     return 0;
2347 }
2348
2349 /* codes not really needed but cleaner when testing */
2350
2351 #define pdftex_version  40  /* these values will not change any more */
2352 #define pdftex_revision "0" /* these values will not change any more */
2353
2354 static int do_feedback_pdf(halfword c)
2355 {
2356     int old_setting;            /* holds |selector| setting */
2357     int save_scanner_status;    /* |scanner_status| upon entry */
2358     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2359     halfword save_warning_index;
2360     boolean bool;               /* temp boolean */
2361     str_number s;               /* first temp string */
2362     int ff;                     /* for use with |set_ff| */
2363     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2364     char *str;                  /* color stack init str */
2365
2366     if (scan_keyword("lastlink")) {
2367         push_selector;
2368         print_int(pdf_last_link);
2369         pop_selector;
2370     } else if (scan_keyword("retval")) {
2371         push_selector;
2372         print_int(pdf_retval);
2373         pop_selector;
2374     } else if (scan_keyword("lastobj")) {
2375         push_selector;
2376         print_int(pdf_last_obj);
2377         pop_selector;
2378     } else if (scan_keyword("lastannot")) {
2379         push_selector;
2380         print_int(pdf_last_annot);
2381         pop_selector;
2382     } else if (scan_keyword("xformname")) {
2383         scan_int();
2384         check_obj_type(static_pdf, obj_type_xform, cur_val);
2385         push_selector;
2386         print_int(obj_info(static_pdf, cur_val));
2387         pop_selector;
2388     } else if (scan_keyword("creationdate")) {
2389         ins_list(string_to_toks(getcreationdate(static_pdf)));
2390         /* no further action */
2391         return 2;
2392     } else if (scan_keyword("fontname")) {
2393         scan_font_ident();
2394         if (cur_val == null_font)
2395             normal_error("pdf backend", "invalid font identifier when asking 'fontname'");
2396         pdf_check_vf(cur_val);
2397         if (!font_used(cur_val))
2398             pdf_init_font(static_pdf, cur_val);
2399         push_selector;
2400         set_ff(cur_val);
2401         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2402         pop_selector;
2403     } else if (scan_keyword("fontobjnum")) {
2404         scan_font_ident();
2405         if (cur_val == null_font)
2406             normal_error("pdf backend", "invalid font identifier when asking 'objnum'");
2407         pdf_check_vf(cur_val);
2408         if (!font_used(cur_val))
2409             pdf_init_font(static_pdf, cur_val);
2410         push_selector;
2411         set_ff(cur_val);
2412         print_int(pdf_font_num(ff));
2413         pop_selector;
2414     } else if (scan_keyword("fontsize")) {
2415         scan_font_ident();
2416         if (cur_val == null_font)
2417             normal_error("pdf backend", "invalid font identifier when asking 'fontsize'");
2418         push_selector;
2419         print_scaled(font_size(cur_val));
2420         tprint("pt");
2421         pop_selector;
2422     } else if (scan_keyword("pageref")) {
2423         scan_int();
2424         if (cur_val <= 0)
2425             normal_error("pdf backend", "invalid page number when asking 'pageref'");
2426         push_selector;
2427         print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2428         pop_selector;
2429     } else if (scan_keyword("colorstackinit")) {
2430         bool = scan_keyword("page");
2431         if (scan_keyword("direct"))
2432             cur_val = direct_always;
2433         else if (scan_keyword("page"))
2434             cur_val = direct_page;
2435         else
2436             cur_val = set_origin;
2437         save_scanner_status = scanner_status;
2438         save_warning_index = warning_index;
2439         save_def_ref = def_ref;
2440         u = save_cur_string();
2441         scan_toks(false, true);
2442         s = tokens_to_string(def_ref);
2443         delete_token_ref(def_ref);
2444         def_ref = save_def_ref;
2445         warning_index = save_warning_index;
2446         scanner_status = save_scanner_status;
2447         str = makecstring(s);
2448         cur_val = newcolorstack(str, cur_val, bool);
2449         free(str);
2450         flush_str(s);
2451         cur_val_level = int_val_level;
2452         if (cur_val < 0) {
2453             print_err("Too many color stacks");
2454             help2("The number of color stacks is limited to 32768.",
2455                   "I'll use the default color stack 0 here.");
2456             error();
2457             cur_val = 0;
2458             restore_cur_string(u);
2459         }
2460         push_selector;
2461         print_int(cur_val);
2462         pop_selector;
2463     } else if (scan_keyword("version")) {
2464         push_selector;
2465         print_int(pdftex_version);
2466         pop_selector;
2467     } else if (scan_keyword("revision")) {
2468         ins_list(string_to_toks(pdftex_revision));
2469         return 2;
2470     } else {
2471         return 0;
2472     }
2473     return 1;
2474 }
2475
2476 void conv_toks(void)
2477 {
2478     int old_setting;            /* holds |selector| setting */
2479     halfword p, q;
2480     int save_scanner_status;    /* |scanner_status| upon entry */
2481     halfword save_def_ref;      /* |def_ref| upon entry, important if inside `\.{\\message}' */
2482     halfword save_warning_index;
2483     boolean bool;               /* temp boolean */
2484     str_number s;               /* first temp string */
2485     int sn;                     /* lua chunk name */
2486     str_number u = 0;           /* third temp string, will become non-nil if a string is already being built */
2487     int c = cur_chr;            /* desired type of conversion */
2488     str_number str;
2489     int i = 0;
2490     /* Scan the argument for command |c| */
2491     switch (c) {
2492         case number_code:
2493             scan_int();
2494             push_selector;
2495             print_int(cur_val);
2496             pop_selector;
2497             break;
2498         case lua_function_code:
2499             scan_int();
2500             if (cur_val <= 0) {
2501                 normal_error("luafunction", "invalid number");
2502             } else {
2503                 u = save_cur_string();
2504                 luacstrings = 0;
2505                 luafunctioncall(cur_val);
2506                 restore_cur_string(u);
2507                 if (luacstrings > 0)
2508                     lua_string_start();
2509             }
2510             /* no further action */
2511             return;
2512             break;
2513         case lua_code:
2514             u = save_cur_string();
2515             save_scanner_status = scanner_status;
2516             save_def_ref = def_ref;
2517             save_warning_index = warning_index;
2518             sn = scan_lua_state();
2519             scan_toks(false, true);
2520             s = def_ref;
2521             warning_index = save_warning_index;
2522             def_ref = save_def_ref;
2523             scanner_status = save_scanner_status;
2524             luacstrings = 0;
2525             luatokencall(s, sn);
2526             delete_token_ref(s);
2527             restore_cur_string(u);  /* TODO: check this, was different */
2528             if (luacstrings > 0)
2529                 lua_string_start();
2530             /* no further action */
2531             return;
2532             break;
2533         case expanded_code:
2534             save_scanner_status = scanner_status;
2535             save_warning_index = warning_index;
2536             save_def_ref = def_ref;
2537             u = save_cur_string();
2538             scan_toks(false, true);
2539             warning_index = save_warning_index;
2540             scanner_status = save_scanner_status;
2541             ins_list(token_link(def_ref));
2542             def_ref = save_def_ref;
2543             restore_cur_string(u);
2544             /* no further action */
2545             return;
2546             break;
2547         case math_style_code:
2548             push_selector;
2549             print_math_style();
2550             pop_selector;
2551             break;
2552         case string_code:
2553             save_scanner_status = scanner_status;
2554             scanner_status = normal;
2555             get_token();
2556             scanner_status = save_scanner_status;
2557             push_selector;
2558             if (cur_cs != 0)
2559                 sprint_cs(cur_cs);
2560             else
2561                 print(cur_chr);
2562             pop_selector;
2563             break;
2564         case cs_string_code:
2565             save_scanner_status = scanner_status;
2566             scanner_status = normal;
2567             get_token();
2568             scanner_status = save_scanner_status;
2569             push_selector;
2570             if (cur_cs != 0)
2571                 sprint_cs_name(cur_cs);
2572             else
2573                 print(cur_chr);
2574             pop_selector;
2575             break;
2576         case roman_numeral_code:
2577             scan_int();
2578             push_selector;
2579             print_roman_int(cur_val);
2580             pop_selector;
2581             break;
2582         case meaning_code:
2583             save_scanner_status = scanner_status;
2584             scanner_status = normal;
2585             get_token();
2586             scanner_status = save_scanner_status;
2587             push_selector;
2588             print_meaning();
2589             pop_selector;
2590             break;
2591         case uchar_code:
2592             scan_char_num();
2593             push_selector;
2594             print(cur_val);
2595             pop_selector;
2596             break;
2597         case lua_escape_string_code:
2598             {
2599                 lstring escstr;
2600                 int l = 0;
2601                 save_scanner_status = scanner_status;
2602                 save_def_ref = def_ref;
2603                 save_warning_index = warning_index;
2604                 scan_toks(false, true);
2605                 bool = in_lua_escape;
2606                 in_lua_escape = true;
2607                 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2608                 escstr.l = (unsigned) l;
2609                 in_lua_escape = bool;
2610                 delete_token_ref(def_ref);
2611                 def_ref = save_def_ref;
2612                 warning_index = save_warning_index;
2613                 scanner_status = save_scanner_status;
2614                 (void) lua_str_toks(escstr);
2615                 ins_list(token_link(temp_token_head));
2616                 free(escstr.s);
2617                 return;
2618             }
2619             /* no further action */
2620             break;
2621         case font_id_code:
2622             scan_font_ident();
2623             push_selector;
2624             print_int(cur_val);
2625             pop_selector;
2626             break;
2627         case font_name_code:
2628             scan_font_ident();
2629             push_selector;
2630             append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2631             if (font_size(cur_val) != font_dsize(cur_val)) {
2632                 tprint(" at ");
2633                 print_scaled(font_size(cur_val));
2634                 tprint("pt");
2635             }
2636             pop_selector;
2637             break;
2638         case left_margin_kern_code:
2639             scan_int();
2640             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2641                 normal_error("marginkern", "a non-empty hbox expected");
2642             push_selector;
2643             p = list_ptr(box(cur_val));
2644             while ((p != null) && (type(p) == glue_node)) {
2645                 p = vlink(p);
2646             }
2647             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2648                 print_scaled(width(p));
2649             else
2650                 print_char('0');
2651             tprint("pt");
2652             pop_selector;
2653             break;
2654         case right_margin_kern_code:
2655             scan_int();
2656             if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657                 normal_error("marginkern", "a non-empty hbox expected");
2658             push_selector;
2659             p = list_ptr(box(cur_val));
2660             if (p != null) {
2661                 p = tail_of_list(p);
2662                 /*
2663                     there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2664                     node that points to glue spec ... and we don't want to analyze that messy lot
2665                 */
2666                 while ((p != null) && (type(p) == glue_node)) {
2667                     p = alink(p);
2668                 }
2669                 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2670                     if (type(p) == disc_node) {
2671                         q = alink(p);
2672                         if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2673                             p = q;
2674                         } else {
2675                             /*
2676                                 officially we should look in the replace but currently protrusion doesn't
2677                                 work anyway with "foo\discretionary{}{}{bar-} " (no following char) so we
2678                                 don't need it now
2679                             */
2680                         }
2681                     }
2682                 }
2683             }
2684             if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2685                 print_scaled(width(p));
2686             else
2687                 print_char('0');
2688             tprint("pt");
2689             pop_selector;
2690             break;
2691         case uniform_deviate_code:
2692             scan_int();
2693             push_selector;
2694             print_int(unif_rand(cur_val));
2695             pop_selector;
2696             break;
2697         case normal_deviate_code:
2698             scan_int();
2699             push_selector;
2700             print_int(norm_rand());
2701             pop_selector;
2702             break;
2703         case math_char_class_code:
2704             {
2705                 mathcodeval mval;
2706                 scan_int();
2707                 mval = get_math_code(cur_val);
2708                 push_selector;
2709                 print_int(mval.class_value);
2710                 pop_selector;
2711             }
2712             break;
2713         case math_char_fam_code:
2714             {
2715                 mathcodeval mval;
2716                 scan_int();
2717                 mval = get_math_code(cur_val);
2718                 push_selector;
2719                 print_int(mval.family_value);
2720                 pop_selector;
2721             }
2722             break;
2723         case math_char_slot_code:
2724             {
2725                 mathcodeval mval;
2726                 scan_int();
2727                 mval = get_math_code(cur_val);
2728                 push_selector;
2729                 print_int(mval.character_value);
2730                 pop_selector;
2731             }
2732             break;
2733         case insert_ht_code:
2734             scan_register_num();
2735             push_selector;
2736             i = cur_val;
2737             p = page_ins_head;
2738             while (i >= subtype(vlink(p)))
2739                 p = vlink(p);
2740             if (subtype(p) == i)
2741                 print_scaled(height(p));
2742             else
2743                 print_char('0');
2744             tprint("pt");
2745             pop_selector;
2746             break;
2747         case job_name_code:
2748             if (job_name == 0)
2749                 open_log_file();
2750             push_selector;
2751             print_job_name();
2752             pop_selector;
2753             break;
2754         case format_name_code:
2755             if (job_name == 0)
2756                 open_log_file();
2757             push_selector;
2758             print(format_name);
2759             pop_selector;
2760             break;
2761         case luatex_banner_code:
2762             push_selector;
2763             tprint(luatex_banner);
2764             pop_selector;
2765             break;
2766         case luatex_revision_code:
2767             push_selector;
2768             print(get_luatexrevision());
2769             pop_selector;
2770             break;
2771         case luatex_date_code:
2772             push_selector;
2773             print_int(get_luatex_date_info());
2774             pop_selector;
2775             break;
2776         case etex_code:
2777             push_selector;
2778             tprint(eTeX_version_string);
2779             pop_selector;
2780             break;
2781         case eTeX_revision_code:
2782             push_selector;
2783             tprint(eTeX_revision);
2784             pop_selector;
2785             break;
2786         case font_identifier_code:
2787             confusion("convert");
2788             break;
2789         default:
2790             confusion("convert");
2791             break;
2792     }
2793     str = make_string();
2794     (void) str_toks(str_lstring(str));
2795     flush_str(str);
2796     ins_list(token_link(temp_token_head));
2797 }
2798
2799 void do_feedback(void)
2800 {
2801     int c = cur_chr;
2802     str_number str;
2803     int done = 1;
2804     switch (c) {
2805         case dvi_feedback_code:
2806             if (get_o_mode() == OMODE_DVI) {
2807                 done = do_feedback_dvi(c);
2808             } else {
2809                 tex_error("unexpected use of \\dvifeedback",null);
2810                 return ;
2811             }
2812             if (done==0) {
2813                 /* we recover */
2814                 normal_warning("dvi backend","unexpected use of \\dvifeedback");
2815                 return;
2816             } else if (done==2) {
2817                 return;
2818             }
2819             break;
2820         case pdf_feedback_code:
2821             if (get_o_mode() == OMODE_PDF) {
2822                 done = do_feedback_pdf(c);
2823             } else {
2824                 tex_error("unexpected use of \\pdffeedback",null);
2825                 return ;
2826             }
2827             if (done==0) {
2828                 /* we recover */
2829                 normal_warning("pdf backend","unexpected use of \\pdffeedback");
2830                 return;
2831             } else if (done==2) {
2832                 return;
2833             }
2834             break;
2835         default:
2836             confusion("feedback");
2837             break;
2838     }
2839     str = make_string();
2840     (void) str_toks(str_lstring(str));
2841     flush_str(str);
2842     ins_list(token_link(temp_token_head));
2843 }
2844
2845 void do_variable(void)
2846 {
2847     int c = cur_chr;
2848     int done = 1;
2849     switch (c) {
2850         case dvi_variable_code:
2851             done = do_variable_dvi(c);
2852             if (done==0) {
2853                 /* we recover */
2854                 normal_warning("dvi backend","unexpected use of \\dvivariable");
2855             }
2856             return;
2857             break;
2858         case pdf_variable_code:
2859             done = do_variable_pdf(c);
2860             if (done==0) {
2861                 /* we recover */
2862                 normal_warning("pdf backend","unexpected use of \\pdfvariable");
2863             }
2864             return;
2865             break;
2866         default:
2867             confusion("variable");
2868             break;
2869     }
2870 }
2871
2872 @ This boolean is keeping track of the lua string escape state
2873 @c
2874 boolean in_lua_escape;
2875
2876 static int the_convert_string_dvi(halfword c, int i)
2877 {
2878     return 0 ;
2879 }
2880
2881 static int the_convert_string_pdf(halfword c, int i)
2882 {
2883     int ff;
2884     if (get_o_mode() != OMODE_PDF) {
2885         return 0;
2886     } else if (scan_keyword("lastlink")) {
2887         print_int(pdf_last_link);
2888     } else if (scan_keyword("retval")) {
2889         print_int(pdf_retval);
2890     } else if (scan_keyword("lastobj")) {
2891         print_int(pdf_last_obj);
2892     } else if (scan_keyword("lastannot")) {
2893         print_int(pdf_last_annot);
2894     } else if (scan_keyword("xformname")) {
2895         print_int(obj_info(static_pdf, i));
2896     } else if (scan_keyword("creationdate")) {
2897         return 0;
2898     } else if (scan_keyword("fontname")) {
2899         set_ff(i);
2900         print_int(obj_info(static_pdf, pdf_font_num(ff)));
2901     } else if (scan_keyword("fontobjnum")) {
2902         set_ff(i);
2903         print_int(pdf_font_num(ff));
2904     } else if (scan_keyword("fontsize")) {
2905         print_scaled(font_size(i));
2906         tprint("pt");
2907     } else if (scan_keyword("pageref")) {
2908         print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2909     } else if (scan_keyword("colorstackinit")) {
2910         return 0;
2911     } else {
2912         return 0;
2913     }
2914     return 1;
2915 }
2916
2917 str_number the_convert_string(halfword c, int i)
2918 {
2919     int old_setting;            /* saved |selector| setting */
2920     str_number ret = 0;
2921     boolean done = true ;
2922     old_setting = selector;
2923     selector = new_string;
2924     switch (c) {
2925         case number_code:
2926             print_int(i);
2927             break;
2928      /* case lua_function_code: */
2929      /* case lua_code: */
2930      /* case expanded_code: */
2931         case math_style_code:
2932             print_math_style();
2933             break;
2934      /* case string_code: */
2935      /* case cs_string_code: */
2936         case roman_numeral_code:
2937             print_roman_int(i);
2938             break;
2939      /* case meaning_code: */
2940         case uchar_code:
2941             print(i);
2942             break;
2943      /* lua_escape_string_code: */
2944         case font_id_code:
2945             print_int(i);
2946             break;
2947         case font_name_code:
2948             append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2949             if (font_size(i) != font_dsize(i)) {
2950                 tprint(" at ");
2951                 print_scaled(font_size(i));
2952                 tprint("pt");
2953             }
2954             break;
2955      /* left_margin_kern_code: */
2956      /* right_margin_kern_code: */
2957         case uniform_deviate_code:
2958             print_int(unif_rand(i));
2959             break;
2960         case normal_deviate_code:
2961             print_int(norm_rand());
2962             break;
2963      /* math_char_class_code: */
2964      /* math_char_fam_code: */
2965      /* math_char_slot_code: */
2966      /* insert_ht_code: */
2967         case job_name_code:
2968             print_job_name();
2969             break;
2970         case format_name_code:
2971             print(format_name);
2972             break;
2973         case luatex_banner_code:
2974             tprint(luatex_banner);
2975             break;
2976         case luatex_revision_code:
2977             print(get_luatexrevision());
2978             break;
2979         case luatex_date_code:
2980             print_int(get_luatex_date_info());
2981             break;
2982         case etex_code:
2983             tprint(eTeX_version_string);
2984             break;
2985         case eTeX_revision_code:
2986             tprint(eTeX_revision);
2987             break;
2988         case font_identifier_code:
2989             print_font_identifier(i);
2990             break;
2991         /* backend: this might become obsolete */
2992         case dvi_feedback_code:
2993             done = the_convert_string_dvi(c,i);
2994             break;
2995         case pdf_feedback_code:
2996             done = the_convert_string_pdf(c,i);
2997             break;
2998         /* done */
2999         default:
3000             done = false;
3001             break;
3002     }
3003     if (done) {
3004         ret = make_string();
3005     }
3006     selector = old_setting;
3007     return ret;
3008 }
3009
3010 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3011 files potentially usable for reading appear in the following global variables.
3012 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3013 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3014 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3015 line.
3016
3017 @c
3018 FILE *read_file[16]; /* used for \.{\\read} */
3019 int read_open[17];   /* state of |read_file[n]| */
3020
3021 void initialize_read(void)
3022 {
3023     int k;
3024     for (k = 0; k <= 16; k++)
3025         read_open[k] = closed;
3026 }
3027
3028 @ The |read_toks| procedure constructs a token list like that for any macro
3029 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3030 sequence that will receive this token list.
3031
3032 @c
3033 void read_toks(int n, halfword r, halfword j)
3034 {
3035     halfword p; /* tail of the token list */
3036     halfword q; /* new node being added to the token list via |store_new_token| */
3037     int s;      /* saved value of |align_state| */
3038     int m;      /* stream number */
3039     scanner_status = defining;
3040     warning_index = r;
3041     p = get_avail();
3042     def_ref = p;
3043     set_token_ref_count(def_ref, 0);
3044     p = def_ref;                /* the reference count */
3045     store_new_token(end_match_token);
3046     if ((n < 0) || (n > 15))
3047         m = 16;
3048     else
3049         m = n;
3050     s = align_state;
3051     align_state = 1000000;      /* disable tab marks, etc. */
3052     do {
3053         /* Input and store tokens from the next line of the file */
3054         begin_file_reading();
3055         iname = m + 1;
3056         if (read_open[m] == closed) {
3057             /*
3058                 Input for \.{\\read} from the terminal
3059
3060                 Here we input on-line into the |buffer| array, prompting the user explicitly
3061                 if |n>=0|.  The value of |n| is set negative so that additional prompts
3062                 will not be given in the case of multi-line input.
3063             */
3064             if (interaction > nonstop_mode) {
3065                 if (n < 0) {
3066                     prompt_input("");
3067                 } else {
3068                     wake_up_terminal();
3069                     print_ln();
3070                     sprint_cs(r);
3071                     prompt_input(" =");
3072                     n = -1;
3073                 }
3074             } else {
3075                 fatal_error
3076                     ("*** (cannot \\read from terminal in nonstop modes)");
3077             }
3078
3079         } else if (read_open[m] == just_open) {
3080             /*
3081                 Input the first line of |read_file[m]|
3082
3083                 The first line of a file must be treated specially, since |lua_input_ln|
3084                 must be told not to start with |get|.
3085             */
3086             if (lua_input_ln(read_file[m], (m + 1), false)) {
3087                 read_open[m] = normal;
3088             } else {
3089                 lua_a_close_in(read_file[m], (m + 1));
3090                 read_open[m] = closed;
3091             }
3092
3093         } else {
3094             /*
3095                 Input the next line of |read_file[m]|
3096
3097                 An empty line is appended at the end of a |read_file|.
3098             */
3099             if (!lua_input_ln(read_file[m], (m + 1), true)) {
3100                 lua_a_close_in(read_file[m], (m + 1));
3101                 read_open[m] = closed;
3102                 if (align_state != 1000000) {
3103                     runaway();
3104                     print_err("File ended within \\read");
3105                     help1("This \\read has unbalanced braces.");
3106                     align_state = 1000000;
3107                     error();
3108                 }
3109             }
3110
3111         }
3112         ilimit = last;
3113         if (end_line_char_inactive)
3114             decr(ilimit);
3115         else
3116             buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3117         first = ilimit + 1;
3118         iloc = istart;
3119         istate = new_line;
3120         /* Handle \.{\\readline} and |goto done|; */
3121         if (j == 1) {
3122             while (iloc <= ilimit) {
3123                 /* current line not yet finished */
3124                 do_buffer_to_unichar(cur_chr, iloc);
3125                 if (cur_chr == ' ')
3126                     cur_tok = space_token;
3127                 else
3128                     cur_tok = cur_chr + other_token;
3129                 store_new_token(cur_tok);
3130             }
3131         } else {
3132             while (1) {
3133                 get_token();
3134                 if (cur_tok == 0) {
3135                     /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3136                     break;
3137                 }
3138                 if (align_state < 1000000) {
3139                     /* unmatched `\.\}' aborts the line */
3140                     do {
3141                         get_token();
3142                     } while (cur_tok != 0);
3143                     align_state = 1000000;
3144                     break;
3145                 }
3146                 store_new_token(cur_tok);
3147             }
3148         }
3149         end_file_reading();
3150
3151     } while (align_state != 1000000);
3152     cur_val = def_ref;
3153     scanner_status = normal;
3154     align_state = s;
3155 }
3156
3157 @ return a string from tokens list
3158
3159 @c
3160 str_number tokens_to_string(halfword p)
3161 {
3162     int old_setting;
3163     if (selector == new_string)
3164         normal_error("tokens","tokens_to_string() called while selector = new_string");
3165     old_setting = selector;
3166     selector = new_string;
3167     show_token_list(token_link(p), null, -1);
3168     selector = old_setting;
3169     return make_string();
3170 }
3171
3172 @ @c
3173 #define make_room(a)                     \
3174     if ((unsigned)i+a+1>alloci) {        \
3175         ret = xrealloc(ret,(alloci+64)); \
3176         alloci = alloci + 64;            \
3177     }
3178
3179 #define append_i_byte(a) ret[i++] = (char)(a)
3180
3181 #define Print_char(a) make_room(1); append_i_byte(a)
3182
3183 #define Print_uchar(s) {                                       \
3184     make_room(4);                                              \
3185     if (s<=0x7F) {                                             \
3186       append_i_byte(s);                                        \
3187     } else if (s<=0x7FF) {                                     \
3188       append_i_byte(0xC0 + (s / 0x40));                        \
3189       append_i_byte(0x80 + (s % 0x40));                        \
3190     } else if (s<=0xFFFF) {                                    \
3191       append_i_byte(0xE0 + (s / 0x1000));                      \
3192       append_i_byte(0x80 + ((s % 0x1000) / 0x40));             \
3193       append_i_byte(0x80 + ((s % 0x1000) % 0x40));             \
3194     } else if (s>=0x110000) {                                  \
3195       append_i_byte(s-0x11000);                                \
3196     } else {                                                   \
3197       append_i_byte(0xF0 + (s / 0x40000));                     \
3198       append_i_byte(0x80 + ((s % 0x40000) / 0x1000));          \
3199       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3200       append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3201     } }
3202
3203 #define Print_esc(b) {                     \
3204     const char *v = b;                     \
3205     if (e>0 && e<STRING_OFFSET) {          \
3206         Print_uchar (e);                   \
3207     }                                      \
3208     make_room(strlen(v));                  \
3209     while (*v) { append_i_byte(*v); v++; } \
3210   }
3211
3212 #define Print_str(b) {                     \
3213     const char *v = b;                     \
3214     make_room(strlen(v));                  \
3215     while (*v) { append_i_byte(*v); v++; } \
3216   }
3217
3218 #define is_cat_letter(a) \
3219     (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3220
3221 @ the actual token conversion in this function is now functionally equivalent to
3222 |show_token_list|, except that it always prints the whole token list. TODO: check
3223 whether this causes problems in the lua library.
3224
3225 @c
3226 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3227 {
3228     register int p, c, m;
3229     int q;
3230     int infop;
3231     char *s, *sh;
3232     int e = 0;
3233     char *ret;
3234     int match_chr = '#';
3235     int n = '0';
3236     unsigned alloci = 1024;
3237     int i = 0;
3238     p = pp;
3239     if (p == null) {
3240         if (siz != NULL)
3241             *siz = 0;
3242         return NULL;
3243     }
3244     ret = xmalloc(alloci);
3245     p = token_link(p);          /* skip refcount */
3246     if (p != null) {
3247         e = int_par(escape_char_code);
3248     }
3249     while (p != null) {
3250         if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3251             Print_esc("CLOBBERED.");
3252             break;
3253         }
3254         infop = token_info(p);
3255         if (infop >= cs_token_flag) {
3256             if (!(inhibit_par && infop == par_token)) {
3257                 q = infop - cs_token_flag;
3258                 if (q < hash_base) {
3259                     if (q == null_cs) {
3260                         Print_esc("csname");
3261                         Print_esc("endcsname");
3262                     } else {
3263                         Print_esc("IMPOSSIBLE.");
3264                     }
3265                 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3266                     Print_esc("IMPOSSIBLE.");
3267                 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3268                     Print_esc("NONEXISTENT.");
3269                 } else {
3270                     str_number txt = cs_text(q);
3271                     sh = makecstring(txt);
3272                     s = sh;
3273                     if (is_active_cs(txt)) {
3274                         s = s + 3;
3275                         while (*s) {
3276                             Print_char(*s);
3277                             s++;
3278                         }
3279                     } else {
3280                         if (e>=0 && e<0x110000) Print_uchar(e);
3281                         while (*s) {
3282                             Print_char(*s);
3283                             s++;
3284                         }
3285                         if ((!single_letter(txt)) || is_cat_letter(txt)) {
3286                             Print_char(' ');
3287                         }
3288                     }
3289                     free(sh);
3290                 }
3291             }
3292         } else {
3293             if (infop < 0) {
3294                 Print_esc("BAD");
3295             } else {
3296                 m = token_cmd(infop);
3297                 c = token_chr(infop);
3298                 switch (m) {
3299                     case left_brace_cmd:
3300                     case right_brace_cmd:
3301                     case math_shift_cmd:
3302                     case tab_mark_cmd:
3303                     case sup_mark_cmd:
3304                     case sub_mark_cmd:
3305                     case spacer_cmd:
3306                     case letter_cmd:
3307                     case other_char_cmd:
3308                         Print_uchar(c);
3309                         break;
3310                     case mac_param_cmd:
3311                         if (!in_lua_escape && (is_in_csname==0))
3312                             Print_uchar(c);
3313                         Print_uchar(c);
3314                         break;
3315                     case out_param_cmd:
3316                         Print_uchar(match_chr);
3317                         if (c <= 9) {
3318                             Print_char(c + '0');
3319                         } else {
3320                             Print_char('!');
3321                             goto EXIT;
3322                         }
3323                         break;
3324                     case match_cmd:
3325                         match_chr = c;
3326                         Print_uchar(c);
3327                         n++;
3328                         Print_char(n);
3329                         if (n > '9')
3330                             goto EXIT;
3331                         break;
3332                     case end_match_cmd:
3333                         if (c == 0) {
3334                             Print_char('-');
3335                             Print_char('>');
3336                         }
3337                         break;
3338                     default:
3339                         not_so_bad(Print_esc);
3340                         break;
3341                 }
3342             }
3343         }
3344         p = token_link(p);
3345     }
3346   EXIT:
3347     ret[i] = '\0';
3348     if (siz != NULL)
3349         *siz = i;
3350     return ret;
3351 }
3352
3353 @ @c
3354 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3355 {
3356     int siz;
3357     lstring *ret = xmalloc(sizeof(lstring));
3358     ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3359     ret->l = (size_t) siz;
3360     return ret;
3361 }
3362
3363 @ @c
3364 void free_lstring(lstring * ls)
3365 {
3366     if (ls == NULL)
3367         return;
3368     if (ls->s != NULL)
3369         free(ls->s);
3370     free(ls);
3371 }