3 % Copyright
2006-2011 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
25 #define pausing int_par
(pausing_code
)
26 #define cat_code_table int_par
(cat_code_table_code
)
27 #define tracing_nesting int_par
(tracing_nesting_code
)
28 #define suppress_outer_error int_par
(suppress_outer_error_code
)
29 #define suppress_mathpar_error int_par
(suppress_mathpar_error_code
)
32 #define every_eof equiv
(every_eof_loc
)
33 #define box
(A
) equiv
(box_base
+(A
))
34 #define toks
(A
) equiv
(toks_base
+(A
))
36 #define detokenized_line
() (line_catcode_table
==NO_CAT_TABLE
)
38 #define do_get_cat_code
(a
,b
) do
{ \
39 if
(line_catcode_table
!=DEFAULT_CAT_TABLE
) \
40 a
=get_cat_code
(line_catcode_table
,b
); \
42 a
=get_cat_code
(cat_code_table
,b
); \
46 @ The \TeX\ system does nearly all of its own memory allocation
, so that it can
47 readily be transported into environments that do not have automatic facilities
48 for strings
, garbage collection
, etc.
, and so that it can be in control of what
49 error messages the user receives. The dynamic storage requirements of \TeX\ are
50 handled by providing two large arrays called |fixmem| and |varmem| in which
51 consecutive blocks of words are used as nodes by the \TeX\ routines.
53 Pointer variables are indices into this array
, or into another array called
54 |eqtb| that will be explained later. A pointer variable might also be a special
55 flag that lies outside the bounds of |mem|
, so we allow pointers to assume any
56 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
57 does not assume that |mem
[null
]| exists.
59 @ Locations in |fixmem| are used for storing one-word records
; a conventional
60 \.
{AVAIL} stack is used for allocation in this array.
63 smemory_word
*fixmem
; /* the big dynamic storage area
*/
64 unsigned fix_mem_min
; /* the smallest location of one-word memory in use
*/
65 unsigned fix_mem_max
; /* the largest location of one-word memory in use
*/
67 @ In order to study the memory requirements of particular applications
, it is
68 possible to prepare a version of \TeX\ that keeps track of current and maximum
69 memory usage. When code between the delimiters |@
!stat| $\ldots$ |tats| is not
70 commented out
, \TeX\ will run a bit slower but it will report these statistics
71 when |tracing_stats| is sufficiently large.
74 int var_used
, dyn_used
; /* how much memory is in use
*/
76 halfword avail
; /* head of the list of available one-word nodes
*/
77 unsigned fix_mem_end
; /* the last one-word node used in |mem|
*/
79 halfword garbage
; /* head of a junk list
, write only
*/
80 halfword temp_token_head
; /* head of a temporary list of some kind
*/
81 halfword hold_token_head
; /* head of a temporary list of another kind
*/
82 halfword omit_template
; /* a constant token list
*/
83 halfword null_list
; /* permanently empty list
*/
84 halfword backup_head
; /* head of token list built by |scan_keyword|
*/
87 void initialize_tokens
(void
)
94 set_token_info
(temp_token_head
, 0);
97 set_token_info
(hold_token_head
, 0);
100 set_token_info
(omit_template
, 0);
103 set_token_info
(null_list
, 0);
106 set_token_info
(backup_head
, 0);
109 set_token_info
(garbage
, 0);
110 dyn_used
= 0; /* initialize statistics
*/
113 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
114 field is null. However
, \TeX\ will halt if there is no more room left.
117 If the available-space list is empty
, i.e.
, if |avail
=null|
, we try first to
118 increase |fix_mem_end|. If that cannot be done
, i.e.
, if
119 |fix_mem_end
=fix_mem_max|
, we try to reallocate array |fixmem|. If
, that doesn't
120 work
, we have to quit.
123 halfword get_avail
(void
)
124 { /* single-word node allocation
*/
125 unsigned p
; /* the new node being got
*/
127 p
= (unsigned
) avail
; /* get top location in the |avail| stack
*/
129 avail
= token_link
(avail
); /* and pop it off
*/
130 } else if
(fix_mem_end
< fix_mem_max
) { /* or go into virgin territory
*/
134 smemory_word
*new_fixmem
; /* the big dynamic storage area
*/
135 t
= (fix_mem_max
/ 5);
138 (fixmem
, sizeof
(smemory_word
) * (fix_mem_max
+ t
+ 1)));
139 if
(new_fixmem
== NULL) {
140 runaway
(); /* if memory is exhausted
, display possible runaway text
*/
141 overflow
("token memory size", fix_mem_max
);
145 memset
(voidcast
(fixmem
+ fix_mem_max
+ 1), 0, t
* sizeof
(smemory_word
));
149 token_link
(p
) = null
; /* provide an oft-desired initialization of the new node
*/
150 incr
(dyn_used
); /* maintain statistics
*/
154 @ The procedure |flush_list
(p
)| frees an entire linked list of one-word nodes
155 that starts at position |p|.
159 void flush_list
(halfword p
)
160 { /* makes list of single-word nodes available
*/
161 halfword q
, r
; /* list traversers
*/
168 } while
(r
!= null
); /* now |q| is the last node on the list
*/
169 token_link
(q
) = avail
;
174 @ A \TeX\ token is either a character or a control sequence
, and it is @^token@
>
175 represented internally in one of two ways
: (1)~A character whose ASCII code
176 number is |c| and whose command code is |m| is represented as the number
177 $
2^
{21}m
+c$
; the command code is in the range |
1<=m
<=14|.
(2)~A control sequence
178 whose |eqtb| address is |p| is represented as the number |cs_token_flag
+p|. Here
179 |cs_token_flag
=@t$
2^
{25}-1$@
>| is larger than $
2^
{21}m
+c$
, yet it is small enough
180 that |cs_token_flag
+p
< max_halfword|
; thus
, a token fits comfortably in a
183 A token |t| represents a |left_brace| command if and only if
184 |t
<left_brace_limit|
; it represents a |right_brace| command if and only if we
185 have |left_brace_limit
<=t
<right_brace_limit|
; and it represents a |match| or
186 |end_match| command if and only if |match_token
<=t
<=end_match_token|. The
187 following definitions take care of these token-oriented constants and a few
190 @ A token list is a singly linked list of one-word nodes in |mem|
, where each
191 word contains a token and a link. Macro definitions
, output-routine definitions
,
192 marks
, \.
{\\write
} texts
, and a few other things are remembered by \TeX\ in the
193 form of token lists
, usually preceded by a node with a reference count in its
194 |token_ref_count| field. The token stored in location |p| is called |info
(p
)|.
196 Three special commands appear in the token lists of macro definitions. When
197 |m
=match|
, it means that \TeX\ should scan a parameter for the current macro
;
198 when |m
=end_match|
, it means that parameter matching should end and \TeX\ should
199 start reading the macro text
; and when |m
=out_param|
, it means that \TeX\ should
200 insert parameter number |c| into the text at this point.
202 The enclosing \.
{\char'
173} and \.
{\char'
175} characters of a macro definition
203 are omitted
, but the final right brace of an output routine is included at the
204 end of its token list.
206 Here is an example macro definition that illustrates these conventions. After
207 \TeX\ processes the text
209 $$\.
{\\def\\mac a\#
1\#
2 \\b \
{\#
1\\
-a \#\#
1\#
2 \#
2\
}}$$
211 the definition of \.
{\\mac
} is represented as a token list containing
214 \vbox
{\halign
{\hfil#\hfil\cr
215 (reference count
), |letter|\
,\.a
, |match|\
,\#
, |match|\
,\#
, |spacer|\
,\.\
,
216 \.
{\\b
}, |end_match|
,\cr
217 |out_param|\
,1, \.
{\\
-}, |letter|\
,\.a
, |spacer|\
,\.\
, |mac_param|\
,\#
,
218 |other_char|\
,\
.1,\cr
219 |out_param|\
,2, |spacer|\
,\.\
, |out_param|\
,2.\cr
}}$$
221 The procedure |scan_toks| builds such token lists
, and |macro_call| does the
222 parameter matching. @^reference counts@
>
224 Examples such as $$\.
{\\def\\m\
{\\def\\m\
{a\
}\ b\
}}$$ explain why reference
225 counts would be needed even if \TeX\ had no \.
{\\let
} operation
: When the token
226 list for \.
{\\m
} is being read
, the redefinition of \.
{\\m
} changes the |eqtb|
227 entry before the token list has been fully consumed
, so we dare not simply
228 destroy a token list when its control sequence is being redefined.
230 If the parameter-matching part of a definition ends with `\.
{\#\
{}'
, the
231 corresponding token list will have `\.\
{' just before the `|end_match|' and also
232 at the very end. The first `\.\
{' is used to delimit the parameter
; the second
233 one keeps the first from disappearing.
235 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form
,
236 including the expansion of a macro or mark.
239 void print_meaning
(void
)
241 /* remap \mathchar onto \Umathchar
*/
242 if
(cur_cmd
== math_given_cmd
) {
243 cur_cmd
= xmath_given_cmd
;
244 } /* else if
(cur_cmd
== math_char_num_cmd
) {
249 print_cmd_chr
((quarterword
) cur_cmd
, cur_chr
);
250 if
(cur_cmd
>= call_cmd
) {
255 /* Show the meaning of a mark node
*/
256 if
((cur_cmd
== top_bot_mark_cmd
) && (cur_chr < marks_code)) {
260 case first_mark_code
:
261 token_show
(first_mark
(0));
264 token_show
(bot_mark
(0));
266 case split_first_mark_code
:
267 token_show
(split_first_mark
(0));
269 case split_bot_mark_code
:
270 token_show
(split_bot_mark
(0));
273 token_show
(top_mark
(0));
280 @ The procedure |show_token_list|
, which prints a symbolic form of the token list
281 that starts at a given node |p|
, illustrates these conventions. The token list
282 being displayed should not begin with a reference count. However
, the procedure
283 is intended to be robust
, so that if the memory links are awry or if |p| is not
284 really a pointer to a token list
, nothing catastrophic will happen.
286 An additional parameter |q| is also given
; this parameter is either null or it
287 points to a node in the token list where a certain magic computation takes place
288 that will be explained later.
(Basically
, |q| is non-null when we are printing
289 the two-line context information at the time of an error message
; |q| marks the
290 place corresponding to where the second line should begin.
)
292 For example
, if |p| points to the node containing the first \.a in the token list
293 above
, then |show_token_list| will print the string $$\hbox
{`\.
{a\#
1\#
2\ \\b\
294 ->\#
1\\
-a\ \#\#
1\#
2\ \#
2}'
;}$$ and if |q| points to the node containing the
295 second \.a
, the magic computation will be performed just before the second \.a is
298 The generation will stop
, and `\.
{\\ETC.
}' will be printed
, if the length of
299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
300 control sequences that are not followed by a blank space
, e.g.
, `\.
{\\BAD.
}'
;
301 this cannot be confused with actual control sequences because a real control
302 sequence named \.
{BAD
} would come out `\.
{\\BAD\
}'.
305 #define not_so_bad
(p
) \
307 case assign_int_cmd
: \
308 if
(c
>= (backend_int_base
) && c <= (backend_int_last)) \
309 p
("[internal backend integer]"); \
311 case assign_dimen_cmd
: \
312 if
(c
>= (backend_dimen_base
) && c <= (backend_dimen_last)) \
313 p
("[internal backend dimension]"); \
315 case assign_toks_cmd
: \
316 if
(c
>= (backend_toks_base
) && c <= (backend_toks_last)) \
317 p
("[internal backend tokenlist]"); \
324 void show_token_list
(int p
, int q
, int l
)
326 int m
, c
; /* pieces of a token
*/
327 ASCII_code match_chr
= '#'
; /* character used in a `|match|'
*/
328 ASCII_code n
= '
0'
; /* the highest parameter number
, as an ASCII digit
*/
332 while
((p
!= null
) && (tally < l)) {
334 /* Do magic computation
*/
337 /* Display token |p|
, and |return| if there are problems
*/
338 if
((p
< (int
) fix_mem_min
) ||
(p
> (int
) fix_mem_end
)) {
339 tprint_esc
("CLOBBERED.");
342 if
(token_info
(p
) >= cs_token_flag
) {
343 if
(!((inhibit_par_tokens
) && (token_info(p) == par_token)))
344 print_cs
(token_info
(p
) - cs_token_flag
);
346 m
= token_cmd
(token_info
(p
));
347 c
= token_chr
(token_info
(p
));
348 if
(token_info
(p
) < 0) {
352 Display the token $
(|m|
,|c|
)$
354 The procedure usually ``learns'' the character code used for macro
355 parameters by seeing one in a |match| command before it runs into any
356 |out_param| commands.
360 case right_brace_cmd
:
371 if
(!in_lua_escape
&& (is_in_csname==0))
409 #define do_buffer_to_unichar
(a
,b
) do
{ \
410 a
= (halfword
)str2uni
(buffer
+b
); \
414 @ Here's the way we sometimes want to display a token list
, given a pointer to
415 its reference count
; the pointer may be null.
418 void token_show
(halfword p
)
421 show_token_list
(token_link
(p
), null
, 10000000);
424 @ |delete_token_ref|
, is called when a pointer to a token list's reference count
425 is being removed. This means that the token list should disappear if the
426 reference count was |null|
, otherwise the count should be decreased by one.
429 @ |p| points to the reference count of a token list that is losing one
433 void delete_token_ref
(halfword p
)
435 if
(token_ref_count
(p
) == 0)
438 decr
(token_ref_count
(p
));
442 int get_char_cat_code
(int curchr
)
445 do_get_cat_code
(a
,curchr
);
450 static void invalid_character_error
(void
)
452 const char
*hlp
[] = {
453 "A funny symbol that I can't read has just been input.",
454 "Continue, and I'll forget that it ever happened.",
457 deletions_allowed
= false
;
458 tex_error
("Text line contains an invalid character", hlp
);
459 deletions_allowed
= true
;
463 static boolean process_sup_mark
(void
); /* below
*/
465 static int scan_control_sequence
(void
); /* below
*/
473 static next_line_retval next_line
(void
); /* below
*/
475 @ In case you are getting bored
, here is a slightly less trivial routine
: Given a
476 string of lowercase letters
, like `\.
{pt
}' or `\.
{plus
}' or `\.
{width
}'
, the
477 |scan_keyword| routine checks to see whether the next tokens of input match this
478 string. The match must be exact
, except that uppercase letters will match their
479 lowercase counterparts
; uppercase equivalents are determined by subtracting
480 |
"a"-"A"|
, rather than using the |uc_code| table
, since \TeX\ uses this routine
481 only for its own limited set of keywords.
483 If a match is found
, the characters are effectively removed from the input and
484 |true| is returned. Otherwise |false| is returned
, and the input is left
485 essentially unchanged
(except for the fact that some macros may have been
486 expanded
, etc.
). @^inner loop@
>
489 boolean scan_keyword
(const char
*s
)
490 { /* look for a given string
*/
491 halfword p
; /* tail of the backup list
*/
492 halfword q
; /* new node being added to the token list via |store_new_token|
*/
493 const char
*k
; /* index into |str_pool|
*/
494 halfword save_cur_cs
= cur_cs
;
495 int saved_align_state
= align_state
;
496 if
(strlen
(s
) == 0) /* was assert
(strlen
(s
) > 1); */
497 return false
; /* but not with newtokenlib zero keyword simply doesn't match
*/
499 token_link
(p
) = null
;
502 get_x_token
(); /* recursion is possible here
*/
504 ((cur_chr
== *k
) ||
(cur_chr
== *k
- 'a'
+ 'A'
))) {
505 store_new_token
(cur_tok
);
507 } else if
((cur_cmd
!= spacer_cmd
) ||
(p
!= backup_head
)) {
508 if
(p
!= backup_head
) {
510 token_info
(q
) = cur_tok
;
511 token_link
(q
) = null
;
513 begin_token_list
(token_link
(backup_head
), backed_up
);
514 if
(cur_cmd
!= endv_cmd
)
515 align_state
= saved_align_state
;
519 cur_cs
= save_cur_cs
;
523 if
(token_link
(backup_head
) != null
)
524 flush_list
(token_link
(backup_head
));
525 cur_cs
= save_cur_cs
;
526 if
(cur_cmd
!= endv_cmd
)
527 align_state
= saved_align_state
;
531 @ We can not return |undefined_control_sequence| under some conditions
532 (inside |shift_case|
, for example
). This needs thinking.
537 halfword active_to_cs
(int curchr
, int force
)
541 char
*utfbytes
= xmalloc
(8);
542 int nncs
= no_new_control_sequence
;
543 a
= (char
*) uni2str
(0xFFFF);
544 utfbytes
= strcpy
(utfbytes
, a
);
546 no_new_control_sequence
= false
;
548 b
= (char
*) uni2str
((unsigned
) curchr
);
549 utfbytes
= strcat
(utfbytes
, b
);
551 curcs
= string_lookup
(utfbytes
, strlen
(utfbytes
));
554 curcs
= string_lookup
(utfbytes
, 4);
556 no_new_control_sequence
= nncs
;
563 /*static char
* FFFF
= "\xEF\xBF\xBF";*/ /* 0xFFFF */
565 halfword active_to_cs
(int curchr
, int force
)
568 int nncs
= no_new_control_sequence
;
570 no_new_control_sequence
= false
;
573 char
*b
= (char
*) uni2str
((unsigned
) curchr
);
574 char
*utfbytes
= xmalloc
(8);
575 utfbytes
= strcpy
(utfbytes
, "\xEF\xBF\xBF");
576 utfbytes
= strcat
(utfbytes
, b
);
578 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
581 curcs
= string_lookup
("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not
3 ?
*/
583 no_new_control_sequence
= nncs
;
589 static unsigned char
*uni2csstr
(unsigned unic
)
591 unsigned char
*buf
= xmalloc
(8);
592 unsigned char
*pt
= buf
;
593 *pt
++ = 239; *pt
++ = 191; *pt
++ = 191; // 0xFFFF
595 *pt
++ = (unsigned char
) unic
;
596 else if
(unic
< 0x800) {
597 *pt
++ = (unsigned char
) (0xc0 |
(unic
>> 6));
598 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
599 } else if
(unic
>= 0x110000) {
600 *pt
++ = (unsigned char
) (unic
- 0x110000);
601 } else if
(unic
< 0x10000) {
602 *pt
++ = (unsigned char
) (0xe0 |
(unic
>> 12));
603 *pt
++ = (unsigned char
) (0x80 |
((unic
>> 6) & 0x3f));
604 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
607 unsigned val
= unic
- 0x10000;
608 u
= (int
) (((val
& 0xf0000) >> 16) + 1);
609 z
= (int
) ((val
& 0x0f000) >> 12);
610 y
= (int
) ((val
& 0x00fc0) >> 6);
611 x
= (int
) (val
& 0x0003f);
612 *pt
++ = (unsigned char
) (0xf0 |
(u
>> 2));
613 *pt
++ = (unsigned char
) (0x80 |
((u
& 3) << 4) | z);
614 *pt
++ = (unsigned char
) (0x80 | y
);
615 *pt
++ = (unsigned char
) (0x80 | x
);
621 halfword active_to_cs
(int curchr
, int force
)
624 int nncs
= no_new_control_sequence
;
626 no_new_control_sequence
= false
;
629 char
* utfbytes
= (char
*) uni2csstr
((unsigned
) curchr
);
630 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
633 curcs
= string_lookup
(FFFF
, 4); // 0xFFFF ... why not
3 ?
635 no_new_control_sequence
= nncs
;
641 @ TODO this function should listen to \.
{\\escapechar
}
643 @ prints a control sequence
646 static char
*cs_to_string
(halfword p
)
651 static char ret
[256] = { 0 };
652 if
(p
== 0 || p
== null_cs
) {
666 str_number txt
= cs_text
(p
);
667 sh
= makecstring
(txt
);
669 if
(is_active_cs
(txt
)) {
687 @ TODO this is a quick hack
, will be solved differently soon
690 static char
*cmd_chr_to_string
(int cmd
, int chr
)
695 selector
= new_string
;
696 print_cmd_chr
((quarterword
) cmd
, chr
);
698 s
= makecstring
(str
);
704 @ The heart of \TeX's input mechanism is the |get_next| procedure
, which we shall
705 develop in the next few sections of the program. Perhaps we shouldn't actually
706 call it the ``heart
,'' however
, because it really acts as \TeX's eyes and mouth
,
707 reading the source files and gobbling them up. And it also helps \TeX\ to
708 regurgitate stored token lists that are to be processed again. @^eyes and mouth@
>
710 The main duty of |get_next| is to input one token and to set |cur_cmd| and
711 |cur_chr| to that token's command code and modifier. Furthermore
, if the input
712 token is a control sequence
, the |eqtb| location of that control sequence is
713 stored in |cur_cs|
; otherwise |cur_cs| is set to zero.
715 Underlying this simple description is a certain amount of complexity because of
716 all the cases that need to be handled. However
, the inner loop of |get_next| is
717 reasonably short and fast.
719 When |get_next| is asked to get the next token of a \.
{\\read
} line
,
720 it sets |cur_cmd
=cur_chr
=cur_cs
=0| in the case that no more tokens
721 appear on that line.
(There might not be any tokens at all
, if the
722 |end_line_char| has |ignore| as its catcode.
)
724 The value of |par_loc| is the |eqtb| address of `\.
{\\par
}'. This quantity is
725 needed because a blank line of input is supposed to be exactly equivalent to the
726 appearance of \.
{\\par
}; we must set |cur_cs
:=par_loc| when detecting a blank
730 halfword par_loc
; /* location of `\.
{\\par
}' in |eqtb|
*/
731 halfword par_token
; /* token representing `\.
{\\par
}'
*/
733 @ Parts |get_next| are executed more often than any other instructions of \TeX.
734 @^mastication@
>@^inner loop@
>
736 The global variable |force_eof| is normally |false|
; it is set |true| by an
737 \.
{\\endinput
} command. |luacstrings| is the number of lua print statements
738 waiting to be input
, it is changed by |luatokencall|.
741 boolean force_eof
; /* should the next \.
{\\input
} be aborted early?
*/
742 int luacstrings
; /* how many lua strings are waiting to be input?
*/
744 @ If the user has set the |pausing| parameter to some positive value
, and if
745 nonstop mode has not been selected
, each line of input is displayed on the
746 terminal and the transcript file
, followed by `\.
{=>}'. \TeX\ waits for a
747 response. If the response is simply |carriage_return|
, the line is accepted as it
748 stands
, otherwise the line typed is used instead of the line in the file.
751 void firm_up_the_line
(void
)
753 int k
; /* an index into |buffer|
*/
756 if
(interaction
> nonstop_mode
) {
759 if
(istart
< ilimit
) {
760 for
(k
= istart
; k
<= ilimit
- 1; k
++)
761 print_char
(buffer
[k
]);
764 prompt_input
("=>"); /* wait for user response
*/
766 for
(k
= first
; k
< +last
- 1; k
++) /* move line down in buffer
*/
767 buffer
[k
+ istart
- first
] = buffer
[k
];
768 ilimit
= istart
+ last
- first
;
774 @ Before getting into |get_next|
, let's consider the subroutine that is called
775 when an `\.
{\\outer
}' control sequence has been scanned or when the end of a file
776 has been reached. These two cases are distinguished by |cur_cs|
, which is zero at
780 void check_outer_validity
(void
)
782 halfword p
; /* points to inserted token list
*/
783 halfword q
; /* auxiliary pointer
*/
784 if
(suppress_outer_error
)
786 if
(scanner_status
!= normal
) {
787 deletions_allowed
= false
;
788 /* Back up an outer control sequence so that it can be reread
; */
789 /* An outer control sequence that occurs in a \.
{\\read
} will not be reread
,
790 since the error recovery for \.
{\\read
} is not very powerful.
*/
792 if
((istate
== token_list
) ||
(iname
< 1) ||
(iname
> 17)) {
794 token_info
(p
) = cs_token_flag
+ cur_cs
;
795 begin_token_list
(p
, backed_up
); /* prepare to read the control sequence again
*/
797 cur_cmd
= spacer_cmd
;
798 cur_chr
= ' '
; /* replace it by a space
*/
800 if
(scanner_status
> skipping
) {
801 const char
*errhlp
[] = {
802 "I suspect you have forgotten a `}', causing me",
803 "to read past where you wanted me to stop.",
804 "I'll try to recover; but if the error is serious,",
805 "you'd better type `E' or `X' now and fix your file.",
809 const char
*startmsg
;
810 const char
*scannermsg
;
811 /* Tell the user what has run away and try to recover
*/
812 runaway
(); /* print a definition
, argument
, or preamble
*/
814 startmsg
= "File ended";
817 startmsg
= "Forbidden control sequence found";
819 /* Print either `\.
{definition
}' or `\.
{use
}' or `\.
{preamble
}' or `\.
{text
}'
,
820 and insert tokens that should lead to recovery
; */
821 /* The recovery procedure can't be fully understood without knowing more
822 about the \TeX\ routines that should be aborted
, but we can sketch the
823 ideas here
: For a runaway definition we will insert a right brace
; for a
824 runaway preamble
, we will insert a special \.
{\\cr
} token and a right
825 brace
; and for a runaway argument
, we will set |long_state| to
826 |outer_call| and insert \.
{\\par
}.
*/
828 switch
(scanner_status
) {
830 scannermsg
= "definition";
831 token_info
(p
) = right_brace_token
+ '
}'
;
835 token_info
(p
) = par_token
;
836 long_state
= outer_call_cmd
;
839 scannermsg
= "preamble";
840 token_info
(p
) = right_brace_token
+ '
}'
;
844 token_info
(p
) = cs_token_flag
+ frozen_cr
;
845 align_state
= -1000000;
849 token_info
(p
) = right_brace_token
+ '
}'
;
851 default
: /* can't happen
*/
852 scannermsg
= "unknown";
854 } /*there are no other cases
*/
855 begin_token_list
(p
, inserted
);
856 snprintf
(errmsg
, 255, "%s while scanning %s of %s",
857 startmsg
, scannermsg
, cs_to_string
(warning_index
));
858 tex_error
(errmsg
, errhlp
);
861 const char
*errhlp_no
[] = {
862 "The file ended while I was skipping conditional text.",
863 "This kind of error happens when you say `\\if...' and forget",
864 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
867 const char
*errhlp_cs
[] = {
868 "A forbidden control sequence occurred in skipped text.",
869 "This kind of error happens when you say `\\if...' and forget",
870 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
873 const char
**errhlp
= (const char
**) errhlp_no
;
879 ss
= cmd_chr_to_string
(if_test_cmd
, cur_if
);
880 snprintf
(errmsg
, 255, "Incomplete %s; all text was ignored after line %d",
881 ss
, (int
) skip_line
);
883 /* Incomplete \\if...
*/
884 cur_tok
= cs_token_flag
+ frozen_fi
;
885 /* back up one inserted token and call |error|
*/
887 OK_to_interrupt
= false
;
889 token_type
= inserted
;
890 OK_to_interrupt
= true
;
891 tex_error
(errmsg
, errhlp
);
894 deletions_allowed
= true
;
903 The other variant gives less clutter in tracing cache usage when profiling and for
904 some files
(like the manual
) also a bit of a speedup.
907 static boolean get_next_file
(void
)
910 if
(iloc
<= ilimit
) {
911 /* current line not yet finished
*/
912 do_buffer_to_unichar
(cur_chr
, iloc
);
915 if
(detokenized_line
()) {
916 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
918 do_get_cat_code
(cur_cmd
, cur_chr
);
921 Change state if necessary
, and |goto switch| if the current
922 character should be ignored
, or |goto reswitch| if the current
923 character changes to another
;
925 The following
48-way switch accomplishes the scanning quickly
, assuming
926 that a decent C compiler has translated the code. Note that the numeric
927 values for |mid_line|
, |skip_blanks|
, and |new_line| are spaced
928 apart from each other by |max_char_code
+1|
, so we can add a character's
929 command code to the state to get a single number that characterizes both.
931 Remark
[ls
/hh
]: checking performance indicated that this switch was the
932 cause of many branch prediction errors but changing it to
:
934 c
= istate
+ cur_cmd
;
935 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
937 } else if
(c
>= new_line
) {
940 } else if
(c
>= skip_blanks
) {
943 } else if
(c
>= mid_line
) {
951 gives as many prediction errors. So
, we can indeed assume that the compiler
952 does the right job
, or that there is simply no other way.
955 switch
(istate
+ cur_cmd
) {
956 case mid_line
+ ignore_cmd
:
957 case skip_blanks
+ ignore_cmd
:
958 case new_line
+ ignore_cmd
:
959 case skip_blanks
+ spacer_cmd
:
960 case new_line
+ spacer_cmd
:
961 /* Cases where character is ignored
*/
964 case mid_line
+ escape_cmd
:
965 case new_line
+ escape_cmd
:
966 case skip_blanks
+ escape_cmd
:
967 /* Scan a control sequence ...
; */
968 istate
= (unsigned char
) scan_control_sequence
();
969 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
970 check_outer_validity
();
972 case mid_line
+ active_char_cmd
:
973 case new_line
+ active_char_cmd
:
974 case skip_blanks
+ active_char_cmd
:
975 /* Process an active-character
*/
976 cur_cs
= active_to_cs
(cur_chr
, false
);
977 cur_cmd
= eq_type
(cur_cs
);
978 cur_chr
= equiv
(cur_cs
);
980 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
981 check_outer_validity
();
983 case mid_line
+ sup_mark_cmd
:
984 case new_line
+ sup_mark_cmd
:
985 case skip_blanks
+ sup_mark_cmd
:
986 /* If this |sup_mark| starts
*/
987 if
(process_sup_mark
())
992 case mid_line
+ invalid_char_cmd
:
993 case new_line
+ invalid_char_cmd
:
994 case skip_blanks
+ invalid_char_cmd
:
995 /* Decry the invalid character and |goto restart|
; */
996 invalid_character_error
();
997 return false
; /* because state may be |token_list| now
*/
999 case mid_line
+ spacer_cmd
:
1000 /* Enter |skip_blanks| state
, emit a space
; */
1001 istate
= skip_blanks
;
1004 case mid_line
+ car_ret_cmd
:
1006 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1007 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1008 for tab and space
, and for the space inserted at the end of a line
, will be
1009 treated alike when macro parameters are being matched. We do this since such
1010 characters are indistinguishable on most computer terminal displays.
1013 cur_cmd
= spacer_cmd
;
1016 case skip_blanks
+ car_ret_cmd
:
1017 case mid_line
+ comment_cmd
:
1018 case new_line
+ comment_cmd
:
1019 case skip_blanks
+ comment_cmd
:
1020 /* Finish line
, |goto switch|
; */
1024 case new_line
+ car_ret_cmd
:
1025 /* Finish line
, emit a \.
{\\par
}; */
1028 cur_cmd
= eq_type
(cur_cs
);
1029 cur_chr
= equiv
(cur_cs
);
1030 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1031 check_outer_validity
();
1033 case skip_blanks
+ left_brace_cmd
:
1034 case new_line
+ left_brace_cmd
:
1037 case mid_line
+ left_brace_cmd
:
1040 case skip_blanks
+ right_brace_cmd
:
1041 case new_line
+ right_brace_cmd
:
1044 case mid_line
+ right_brace_cmd
:
1047 case mid_line
+ math_shift_cmd
:
1048 case mid_line
+ tab_mark_cmd
:
1049 case mid_line
+ mac_param_cmd
:
1050 case mid_line
+ sub_mark_cmd
:
1051 case mid_line
+ letter_cmd
:
1052 case mid_line
+ other_char_cmd
:
1055 case skip_blanks
+ math_shift
:
1056 case skip_blanks
+ tab_mark
:
1057 case skip_blanks
+ mac_param
:
1058 case skip_blanks
+ sub_mark
:
1059 case skip_blanks
+ letter
:
1060 case skip_blanks
+ other_char
:
1061 case new_line
+ math_shift
:
1062 case new_line
+ tab_mark
:
1063 case new_line
+ mac_param
:
1064 case new_line
+ sub_mark
:
1065 case new_line
+ letter
:
1066 case new_line
+ other_char
:
1076 Move to next line of file
,
1077 or |goto restart| if there is no next line
,
1078 or |return| if a \.
{\\read
} line has finished
;
1081 next_line_retval r
= next_line
();
1082 if
(r
== next_line_return
) {
1084 } else if
(r
== next_line_restart
) {
1096 /* 10 times less Bim in callgrind
*/
1099 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1100 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1101 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1102 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1105 static boolean get_next_file
(void
)
1109 if
(iloc
<= ilimit
) {
1110 /* current line not yet finished
*/
1111 do_buffer_to_unichar
(cur_chr
, iloc
);
1113 if
(detokenized_line
()) {
1114 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
1116 do_get_cat_code
(cur_cmd
, cur_chr
);
1119 Change state if necessary
, and |goto switch| if the current
1120 character should be ignored
, or |goto reswitch| if the current
1121 character changes to another
;
1123 c
= istate
+ cur_cmd
;
1124 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
1126 } else if
(c
>= new_line
) {
1127 switch
(c-new_line
) {
1129 istate
= (unsigned char
) scan_control_sequence
();
1130 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1131 check_outer_validity
();
1133 case left_brace_cmd
:
1137 case right_brace_cmd
:
1141 case math_shift_cmd
:
1148 /* Finish line
, emit a \.
{\\par
}; */
1151 cur_cmd
= eq_type
(cur_cs
);
1152 cur_chr
= equiv
(cur_cs
);
1153 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1154 check_outer_validity
();
1160 if
(process_sup_mark
())
1172 /* Cases where character is ignored
*/
1177 case other_char_cmd
:
1180 case active_char_cmd
:
1181 cur_cs
= active_to_cs
(cur_chr
, false
);
1182 cur_cmd
= eq_type
(cur_cs
);
1183 cur_chr
= equiv
(cur_cs
);
1185 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1186 check_outer_validity
();
1191 case invalid_char_cmd
:
1192 invalid_character_error
();
1193 return false
; /* because state may be |token_list| now
*/
1198 } else if
(c
>= skip_blanks
) {
1199 switch
(c-skip_blanks
) {
1201 /* Scan a control sequence ...
; */
1202 istate
= (unsigned char
) scan_control_sequence
();
1203 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1204 check_outer_validity
();
1206 case left_brace_cmd
:
1210 case right_brace_cmd
:
1214 case math_shift_cmd
:
1227 /* If this |sup_mark| starts
*/
1228 if
(process_sup_mark
())
1243 case other_char_cmd
:
1246 case active_char_cmd
:
1247 cur_cs
= active_to_cs
(cur_chr
, false
);
1248 cur_cmd
= eq_type
(cur_cs
);
1249 cur_chr
= equiv
(cur_cs
);
1251 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1252 check_outer_validity
();
1255 /* Finish line
, |goto switch|
; */
1258 case invalid_char_cmd
:
1259 /* Decry the invalid character and |goto restart|
; */
1260 invalid_character_error
();
1261 return false
; /* because state may be |token_list| now
*/
1266 } else if
(c
>= mid_line
) {
1267 switch
(c-mid_line
) {
1269 istate
= (unsigned char
) scan_control_sequence
();
1270 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1271 check_outer_validity
();
1273 case left_brace_cmd
:
1276 case right_brace_cmd
:
1279 case math_shift_cmd
:
1285 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1286 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1287 for tab and space
, and for the space inserted at the end of a line
, will be
1288 treated alike when macro parameters are being matched. We do this since such
1289 characters are indistinguishable on most computer terminal displays.
1292 cur_cmd
= spacer_cmd
;
1298 if
(process_sup_mark
())
1308 /* Enter |skip_blanks| state
, emit a space
; */
1309 istate
= skip_blanks
;
1315 case other_char_cmd
:
1318 case active_char_cmd
:
1319 cur_cs
= active_to_cs
(cur_chr
, false
);
1320 cur_cmd
= eq_type
(cur_cs
);
1321 cur_chr
= equiv
(cur_cs
);
1323 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1324 check_outer_validity
();
1329 case invalid_char_cmd
:
1330 invalid_character_error
();
1331 return false
; /* because state may be |token_list| now
*/
1345 Move to next line of file
, or |goto restart| if there is no next line
,
1346 or |return| if a \.
{\\read
} line has finished
;
1349 next_line_retval r
= next_line
();
1350 if
(r
== next_line_return
) {
1352 } else if
(r
== next_line_restart
) {
1364 @ Notice that a code like \.
{\^\^
8} becomes \.x if not followed by a hex digit.
1365 We only support a limited set
:
1373 #define is_hex
(a
) ((a
>='
0'
&&a<='9')||(a>='a'&&a<='f'))
1375 #define add_nybble
(c
) \
1377 cur_chr
=(cur_chr
<<4)+c-'
0'
; \
1379 cur_chr
=(cur_chr
<<4)+c-'a'
+10; \
1382 #define set_nybble
(c
) \
1389 #define one_hex_to_cur_chr
(c1
) \
1392 #define two_hex_to_cur_chr
(c1
,c2
) \
1396 #define four_hex_to_cur_chr
(c1
,c2
,c3
,c4
) \
1397 two_hex_to_cur_chr
(c1
,c2
); \
1401 #define six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
) \
1402 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
); \
1406 static boolean process_sup_mark
(void
)
1408 if
(cur_chr
== buffer
[iloc
]) {
1409 if
(iloc
< ilimit
) {
1410 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])) {
1411 if
((cur_chr
== buffer
[iloc
+ 3]) && (cur_chr == buffer[iloc + 4])) {
1413 if
((iloc
+ 10) <= ilimit
) {
1414 int c1
= buffer
[iloc
+ 5];
1415 int c2
= buffer
[iloc
+ 6];
1416 int c3
= buffer
[iloc
+ 7];
1417 int c4
= buffer
[iloc
+ 8];
1418 int c5
= buffer
[iloc
+ 9];
1419 int c6
= buffer
[iloc
+ 10];
1420 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) &&
1421 is_hex
(c4
) && is_hex(c5) && is_hex(c6)) {
1423 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1426 tex_error
("^^^^^^ needs six hex digits", NULL);
1429 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1433 if
((iloc
+ 6) <= ilimit
) {
1434 int c1
= buffer
[iloc
+ 3];
1435 int c2
= buffer
[iloc
+ 4];
1436 int c3
= buffer
[iloc
+ 5];
1437 int c4
= buffer
[iloc
+ 6];
1438 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1440 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1443 tex_error
("^^^^ needs four hex digits", NULL);
1446 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1451 if
((iloc
+ 2) <= ilimit
) {
1452 int c1
= buffer
[iloc
+ 1];
1453 int c2
= buffer
[iloc
+ 2];
1454 if
(is_hex
(c1
) && is_hex(c2)) {
1456 two_hex_to_cur_chr
(c1
,c2
);
1460 /* go on
, no error
, good old tex
*/
1465 int c1
= buffer
[iloc
+ 1];
1468 if
(is_hex
(c1
) && (iloc <= ilimit)) {
1469 int c2
= buffer
[iloc
];
1472 two_hex_to_cur_chr
(c1
,c2
);
1476 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1484 @ Control sequence names are scanned only when they appear in some line of a
1485 file
; once they have been scanned the first time
, their |eqtb| location serves as
1486 a unique identification
, so \TeX\ doesn't need to refer to the original name any
1487 more except when it prints the equivalent in symbolic form.
1489 The program that scans a control sequence has been written carefully in order to
1490 avoid the blowups that might otherwise occur if a malicious user tried something
1491 like `\.
{\\catcode\'
15=0}'. The algorithm might look at |buffer
[ilimit
+1]|
, but
1492 it never looks at |buffer
[ilimit
+2]|.
1494 If expanded characters like `\.
{\^\^A
}' or `\.
{\^\^df
}' appear in or just
1495 following a control sequence name
, they are converted to single characters in the
1496 buffer and the process is repeated
, slowly but surely.
1499 static boolean check_expanded_code
(int
*kk
); /* below
*/
1501 static int scan_control_sequence
(void
)
1503 int retval
= mid_line
;
1504 if
(iloc
> ilimit
) {
1505 cur_cs
= null_cs
; /* |state| is irrelevant in this case
*/
1507 register int cat
; /* |cat_code
(cur_chr
)|
, usually
*/
1510 do_buffer_to_unichar
(cur_chr
, k
);
1511 do_get_cat_code
(cat
, cur_chr
);
1512 if
(cat
!= letter_cmd || k
> ilimit
) {
1513 retval
= (cat
== spacer_cmd ? skip_blanks
: mid_line
);
1514 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1517 retval
= skip_blanks
;
1519 do_buffer_to_unichar
(cur_chr
, k
);
1520 do_get_cat_code
(cat
, cur_chr
);
1521 } while
(cat
== letter_cmd
&& k <= ilimit);
1523 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1525 if
(cat
!= letter_cmd
) {
1526 /* backtrack one character which can be utf
*/
1529 if
(cur_chr
> 0xFFFF)
1531 if
(cur_chr
> 0x7FF)
1536 if
(cur_chr
<= 0x7F) {
1537 k
-= 1; /* in most cases
*/
1538 } else if
(cur_chr
> 0xFFFF) {
1540 } else if
(cur_chr
> 0x7FF) {
1542 } else
/* if
(cur_chr
> 0x7F) */ {
1545 /* now |k| points to first nonletter
*/
1548 cur_cs
= id_lookup
(iloc
, k
- iloc
);
1553 cur_cmd
= eq_type
(cur_cs
);
1554 cur_chr
= equiv
(cur_cs
);
1558 @ Whenever we reach the following piece of code
, we will have
1559 |cur_chr
=buffer
[k-1
]| and |k
<=ilimit
+1| and
1560 |cat
=get_cat_code
(cat_code_table
,cur_chr
)|. If an expanded code like \.
{\^\^A
} or
1561 \.
{\^\^df
} appears in |buffer
[(k-1
)..
(k
+1)]| or |buffer
[(k-1
)..
(k
+2)]|
, we will
1562 store the corresponding code in |buffer
[k-1
]| and shift the rest of the buffer
1563 left two or three places.
1566 static boolean check_expanded_code
(int
*kk
)
1571 if
(buffer
[k
] == cur_chr
&& k < ilimit) {
1572 if
((cur_chr
== buffer
[k
+ 1]) && (cur_chr == buffer[k + 2])) {
1573 if
((cur_chr
== buffer
[k
+ 3]) && (cur_chr == buffer[k + 4])) {
1574 if
((k
+ 10) <= ilimit
) {
1575 int c1
= buffer
[k
+ 6 - 1];
1576 int c2
= buffer
[k
+ 6];
1577 int c3
= buffer
[k
+ 6 + 1];
1578 int c4
= buffer
[k
+ 6 + 2];
1579 int c5
= buffer
[k
+ 6 + 3];
1580 int c6
= buffer
[k
+ 6 + 4];
1581 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1583 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1585 tex_error
("^^^^^^ needs six hex digits", NULL);
1588 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1591 if
((k
+ 6) <= ilimit
) {
1592 int c1
= buffer
[k
+ 4 - 1];
1593 int c2
= buffer
[k
+ 4];
1594 int c3
= buffer
[k
+ 4 + 1];
1595 int c4
= buffer
[k
+ 4 + 2];
1596 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1598 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1600 tex_error
("^^^^ needs four hex digits", NULL);
1603 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1607 int c1
= buffer
[k
+ 1];
1610 if
(is_hex
(c1
) && (k + 2) <= ilimit) {
1611 int c2
= buffer
[k
+ 2];
1614 two_hex_to_cur_chr
(c1
,c2
);
1616 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1619 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1627 if
(cur_chr
<= 0x7F) {
1628 buffer
[k
- 1] = (packed_ASCII_code
) cur_chr
;
1629 } else if
(cur_chr
<= 0x7FF) {
1630 buffer
[k
- 1] = (packed_ASCII_code
) (0xC0 + cur_chr
/ 0x40);
1633 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + cur_chr
% 0x40);
1634 } else if
(cur_chr
<= 0xFFFF) {
1635 buffer
[k
- 1] = (packed_ASCII_code
) (0xE0 + cur_chr
/ 0x1000);
1638 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) / 0x40);
1641 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) % 0x40);
1643 buffer
[k
- 1] = (packed_ASCII_code
) (0xF0 + cur_chr
/ 0x40000);
1646 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x40000) / 0x1000);
1649 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) / 0x40);
1652 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) % 0x40);
1655 ilimit
= ilimit
- d
;
1656 while
(l
<= ilimit
) {
1657 buffer
[l
] = buffer
[l
+ d
];
1666 @ All of the easy branches of |get_next| have now been taken care of. There is
1669 @c static next_line_retval next_line
(void
)
1671 boolean inhibit_eol
= false
; /* a way to end a pseudo file without trailing space
*/
1673 /* Read next line of file into |buffer|
, or |goto restart| if the file has ended
*/
1678 if
(pseudo_input
()) { /* not end of file
*/
1679 firm_up_the_line
(); /* this sets |ilimit|
*/
1680 line_catcode_table
= DEFAULT_CAT_TABLE
;
1681 if
((iname
== 19) && (pseudo_lines(pseudo_files) == null))
1683 } else if
((every_eof
!= null
) && !eof_seen[iindex]) {
1685 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1687 begin_token_list
(every_eof
, every_eof_text
);
1688 return next_line_restart
;
1694 if
(luacstring_input
()) { /* not end of strings
*/
1696 line_catcode_table
= (short
) luacstring_cattable
();
1697 line_partial
= (signed char
) luacstring_partial
();
1698 if
(luacstring_final_line
() || line_partial
1699 || line_catcode_table
== NO_CAT_TABLE
)
1707 if
(lua_input_ln
(cur_file
, 0, true
)) { /* not end of file
*/
1708 firm_up_the_line
(); /* this sets |ilimit|
*/
1709 line_catcode_table
= DEFAULT_CAT_TABLE
;
1710 } else if
((every_eof
!= null
) && (!eof_seen[iindex])) {
1712 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1713 begin_token_list
(every_eof
, every_eof_text
);
1714 return next_line_restart
;
1722 if
(tracing_nesting
> 0)
1723 if
((grp_stack
[in_open
] != cur_boundary
) ||
(if_stack
[in_open
] != cond_ptr
))
1724 if
(!((iname
== 19) ||
(iname
== 21))) {
1725 /* give warning for some unfinished groups and
/or conditionals
*/
1728 if
((iname
> 21) ||
(iname
== 20)) {
1729 report_stop_file
(filetype_tex
);
1733 /* lua input or \.
{\\scantextokens
} */
1734 if
(iname
== 21 || iname
== 19) {
1738 if
(! suppress_outer_error
)
1739 check_outer_validity
();
1741 return next_line_restart
;
1743 if
(inhibit_eol || end_line_char_inactive
)
1746 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1748 iloc
= istart
; /* ready to read
*/
1750 if
(!terminal_input
) {
1751 /* \.
{\\read
} line has ended
*/
1754 return next_line_return
; /* OUTER */
1756 if
(input_ptr
> 0) {
1757 /* text was inserted during error recovery
*/
1759 return next_line_restart
; /* resume previous level
*/
1761 if
(selector
< log_only
)
1763 if
(interaction
> nonstop_mode
) {
1764 if
(end_line_char_inactive
)
1766 if
(ilimit
== istart
) {
1767 /* previous line was empty
*/
1768 tprint_nl
("(Please type a command or say `\\end')");
1772 prompt_input
("*"); /* input on-line into |buffer|
*/
1774 if
(end_line_char_inactive
)
1777 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1782 Nonstop mode
, which is intended for overnight batch processing
,
1783 never waits for on-line input.
1785 fatal_error
("*** (job aborted, no legal \\end found)");
1788 return next_line_ok
;
1791 @ Let's consider now what happens when |get_next| is looking at a token list.
1794 static boolean get_next_tokenlist
(void
)
1796 register halfword t
= token_info
(iloc
);
1797 iloc
= token_link
(iloc
); /* move to next
*/
1798 if
(t
>= cs_token_flag
) {
1799 /* a control sequence token
*/
1800 cur_cs
= t
- cs_token_flag
;
1801 cur_cmd
= eq_type
(cur_cs
);
1802 if
(cur_cmd
>= outer_call_cmd
) {
1803 if
(cur_cmd
== dont_expand_cmd
) {
1805 Get the next token
, suppressing expansion. The present point in the program
1806 is reached only when the |expand| routine has inserted a special marker into
1807 the input. In this special case
, |token_info
(iloc
)| is known to be a control
1808 sequence token
, and |token_link
(iloc
)=null|.
1810 cur_cs
= token_info
(iloc
) - cs_token_flag
;
1812 cur_cmd
= eq_type
(cur_cs
);
1813 if
(cur_cmd
> max_command_cmd
) {
1814 cur_cmd
= relax_cmd
;
1815 cur_chr
= no_expand_flag
;
1818 } else if
(! suppress_outer_error
) {
1819 check_outer_validity
();
1822 cur_chr
= equiv
(cur_cs
);
1824 cur_cmd
= token_cmd
(t
);
1825 cur_chr
= token_chr
(t
);
1827 case left_brace_cmd
:
1830 case right_brace_cmd
:
1834 /* Insert macro parameter and |goto restart|
; */
1835 begin_token_list
(param_stack
[param_start
+ cur_chr
- 1], parameter
);
1843 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1844 routine are executed more often than any other instructions of \TeX.
1845 @^mastication@
>@^inner loop@
>
1847 @ sets |cur_cmd|
, |cur_chr|
, |cur_cs| to next token
1854 if
(istate
!= token_list
) {
1855 /* Input from external file
, |goto restart| if no input found
*/
1856 if
(!get_next_file
())
1861 goto RESTART
; /* list exhausted
, resume previous level
*/
1862 } else if
(!get_next_tokenlist
()) {
1863 goto RESTART
; /* parameter needs to be expanded
*/
1866 /* If an alignment entry has just ended
, take appropriate action
*/
1867 if
((cur_cmd
== tab_mark_cmd || cur_cmd
== car_ret_cmd
) && align_state == 0) {
1868 insert_vj_template
();
1873 @ Since |get_next| is used so frequently in \TeX
, it is convenient to define
1874 three related procedures that do a little more
:
1876 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|
, it also sets
1877 |cur_tok|
, a packed halfword version of the current token.
1879 \yskip\hang|get_x_token|
, meaning ``get an expanded token
,'' is like |get_token|
,
1880 but if the current token turns out to be a user-defined control sequence
(i.e.
, a
1881 macro call
), or a conditional
, or something like \.
{\\topmark
} or
1882 \.
{\\expandafter
} or \.
{\\csname
}, it is eliminated from the input by beginning
1883 the expansion of the macro or the evaluation of the conditional.
1885 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1886 has already been called.
1888 \yskip\noindent In fact
, these three procedures account for almost every use of
1891 No new control sequences will be defined except during a call of |get_token|
, or
1892 when \.
{\\csname
} compresses a token list
, because |no_new_control_sequence| is
1893 always |true| at other times.
1895 @ sets |cur_cmd|
, |cur_chr|
, |cur_tok|
1898 void get_token
(void
)
1900 no_new_control_sequence
= false
;
1902 no_new_control_sequence
= true
;
1904 cur_tok
= token_val
(cur_cmd
, cur_chr
);
1906 cur_tok
= cs_token_flag
+ cur_cs
;
1909 @ changes the string |s| to a token list
1912 halfword string_to_toks
(const char
*ss
)
1914 halfword p
; /* tail of the token list
*/
1915 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1916 halfword t
; /* token being appended
*/
1918 const char
*se
= ss
+ strlen
(s
);
1919 p
= temp_token_head
;
1920 set_token_link
(p
, null
);
1922 t
= (halfword
) str2uni
((const unsigned char
*) s
);
1927 t
= other_token
+ t
;
1928 fast_store_new_token
(t
);
1930 return token_link
(temp_token_head
);
1933 @ The token lists for macros and for other things like \.
{\\mark
} and
1934 \.
{\\output
} and \.
{\\write
} are produced by a procedure called |scan_toks|.
1936 Before we get into the details of |scan_toks|
, let's consider a much simpler
1937 task
, that of converting the current string into a token list. The |str_toks|
1938 function does this
; it classifies spaces as type |spacer| and everything else as
1941 The token list created by |str_toks| begins at |link
(temp_token_head
)| and ends
1942 at the value |p| that is returned.
(If |p
=temp_token_head|
, the list is empty.
)
1944 |lua_str_toks| is almost identical
, but it also escapes the three symbols that
1945 |lua| considers special while scanning a literal string
1947 @ changes the string |str_pool
[b..pool_ptr
]| to a token list
1950 halfword lua_str_toks
(lstring b
)
1952 halfword p
; /* tail of the token list
*/
1953 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1954 halfword t
; /* token being appended
*/
1955 unsigned char
*k
; /* index into string
*/
1956 p
= temp_token_head
;
1957 set_token_link
(p
, null
);
1958 k
= (unsigned char
*) b.s
;
1959 while
(k
< (unsigned char
*) b.s
+ b.l
) {
1960 t
= pool_to_unichar
(k
);
1965 if
((t
== '\\'
) ||
(t
== '
"') || (t == '\'') || (t == 10) || (t == 13))
1966 fast_store_new_token(other_token + '\\');
1971 t = other_token + t;
1973 fast_store_new_token(t);
1978 @ Incidentally, the main reason for wanting |str_toks| is the function
1979 |the_toks|, which has similar input/output characteristics.
1981 @ changes the string |str_pool[b..pool_ptr]| to a token list
1984 halfword str_toks(lstring s)
1986 halfword p; /* tail of the token list */
1987 halfword q; /* new node being added to the token list via |store_new_token| */
1988 halfword t; /* token being appended */
1989 unsigned char *k, *l; /* index into string */
1990 p = temp_token_head;
1991 set_token_link(p, null);
1995 t = pool_to_unichar(k);
2000 t = other_token + t;
2001 fast_store_new_token(t);
2007 hh: most of the converter is similar to the one i made for macro so at some point i
2008 can make a helper; also todo: there is no need to go through the pool
2012 halfword str_scan_toks(int ct, lstring s)
2013 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2014 halfword p; /* tail of the token list */
2015 halfword q; /* new node being added to the token list via |store_new_token| */
2016 halfword t; /* token being appended */
2017 unsigned char *k, *l; /* index into string */
2019 p = temp_token_head;
2020 set_token_link(p, null);
2024 t = pool_to_unichar(k);
2026 cc = get_cat_code(ct,t);
2028 /* we have a potential control sequence so we check for it */
2032 halfword _cs = null ;
2033 unsigned char *_name = k ;
2035 t = (halfword) str2uni((const unsigned char *) k);
2037 _c = get_cat_code(ct,t);
2040 _lname = _lname + _s ;
2041 } else if (_c == 10) {
2042 /* we ignore a trailing space like normal scanning does */
2050 /* we have a potential \cs */
2051 _cs = string_lookup((const char *) _name, _lname);
2052 if (_cs == undefined_control_sequence) {
2053 /* let's play safe and backtrack */
2054 t = cc * (1<<21) + t ;
2057 t = cs_token_flag + _cs;
2060 /* just a character with some meaning, so \unknown becomes effectively */
2061 /* \\unknown assuming that \\ has some useful meaning of course */
2062 t = cc * (1<<21) + t ;
2067 /* whatever token, so for instance $x^2$ just works given a tex */
2068 /* catcode regime */
2069 t = cc * (1<<21) + t ;
2071 fast_store_new_token(t);
2077 @ Here's part of the |expand| subroutine that we are now ready to complete:
2080 void ins_the_toks(void)
2083 ins_list(token_link(temp_token_head));
2086 #define set_toks_register(n,t,g) { \
2087 int a = (g>0) ? 4 : 0; \
2088 halfword ref = get_avail(); \
2089 set_token_ref_count(ref, 0); \
2090 set_token_link(ref, token_link(t)); \
2091 define(n + toks_base, call_cmd, ref); \
2094 void combine_the_toks(int how)
2099 if (cur_cmd == assign_toks_cmd) {
2100 nt = equiv(cur_cs) - toks_base;
2110 } while (cur_cmd == spacer_cmd);
2111 if (cur_cmd == left_brace_cmd) {
2114 x = scan_toks(false,how > 1); /* expanded or not */
2117 if (source != null) {
2118 halfword target = toks(nt);
2119 if (target == null) {
2120 set_toks_register(nt,source,0);
2122 halfword s = token_link(source);
2124 halfword t = token_link(target);
2126 /* can this happen ? */
2127 set_token_link(target, s);
2128 } else if (odd(how)) {
2130 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2131 halfword p = temp_token_head;
2133 set_token_link(p, s); /* s = head, x = tail */
2136 fast_store_new_token(token_info(t));
2139 set_toks_register(nt,temp_token_head,0);
2141 set_token_link(x,t);
2142 set_token_link(target,s);
2146 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2147 halfword p = temp_token_head;
2149 set_token_link(p, null);
2151 fast_store_new_token(token_info(t));
2154 set_token_link(p,s);
2155 set_toks_register(nt,temp_token_head,0);
2157 while (token_link(t) != null) {
2160 set_token_link(t,s);
2167 halfword source, ns;
2168 if (cur_cmd == assign_toks_cmd) {
2169 ns = equiv(cur_cs) - toks_base;
2178 if (source != null) {
2179 halfword target = toks(nt);
2180 if (target == null) {
2181 equiv(toks_base+nt) = source;
2182 equiv(toks_base+ns) = null;
2184 halfword s = token_link(source);
2186 halfword t = token_link(target);
2188 set_token_link(target, s);
2189 } else if (odd(how)) {
2192 while (token_link(x) != null) {
2195 set_token_link(x,t);
2196 set_token_link(target,s);
2199 while (token_link(t) != null) {
2202 set_token_link(t,s);
2204 equiv(toks_base+ns) = null;
2211 @ This routine, used in the next one, prints the job name, possibly modified by
2212 the |process_jobname| callback.
2215 static void print_job_name(void)
2218 char *s, *ss; /* C strings for jobname before and after processing */
2219 int callback_id, lua_retval;
2220 s = (char*)str_string(job_name);
2221 callback_id = callback_defined(process_jobname_callback);
2222 if (callback_id > 0) {
2223 lua_retval = run_callback(callback_id, "S-
>S
", s, &ss);
2224 if ((lua_retval == true) && (ss != NULL))
2233 @ Here is a routine that print the result of a convert command, using the
2234 argument |i|. It returns |false | if it does not know to print the code |c|. The
2235 function exists because lua code and tex code can both call it to convert
2238 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2239 get the next non-blank non-relax non-call token.
2243 int scan_lua_state(void)
2248 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2250 if (cur_cmd != left_brace_cmd) {
2251 if (scan_keyword("name
")) {
2252 (void) scan_toks(false, true);
2255 scan_register_num();
2256 if (get_lua_name(cur_val))
2257 sn = (cur_val - 65536);
2263 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2264 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2265 to follow `\.{\\string}' and `\.{\\meaning}'.
2267 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2268 pending string in its output. In order to save such a pending string, we have to
2269 create a temporary string that is destroyed immediately after.
2272 #define push_selector { \
2273 old_setting = selector; \
2274 selector = new_string; \
2277 #define pop_selector { \
2278 selector = old_setting; \
2281 static int do_variable_dvi(halfword c)
2286 #define do_variable_backend_int(i) \
2287 cur_cmd = assign_int_cmd; \
2288 cur_val = backend_int_base + i; \
2289 cur_tok = token_val(cur_cmd, cur_val); \
2292 #define do_variable_backend_dimen(i) \
2293 cur_cmd = assign_dimen_cmd; \
2294 cur_val = backend_dimen_base + i; \
2295 cur_tok = token_val(cur_cmd, cur_val); \
2298 #define do_variable_backend_toks(i) \
2299 cur_cmd = assign_toks_cmd; \
2300 cur_val = backend_toks_base + i ; \
2301 cur_tok = token_val(cur_cmd, cur_val); \
2304 static int do_variable_pdf(halfword c)
2306 if (scan_keyword("compresslevel
")) { do_variable_backend_int(c_pdf_compress_level); }
2307 else if (scan_keyword("decimaldigits
")) { do_variable_backend_int(c_pdf_decimal_digits); }
2308 else if (scan_keyword("imageresolution
")) { do_variable_backend_int(c_pdf_image_resolution); }
2309 else if (scan_keyword("pkresolution
")) { do_variable_backend_int(c_pdf_pk_resolution); }
2310 else if (scan_keyword("uniqueresname
")) { do_variable_backend_int(c_pdf_unique_resname); }
2311 else if (scan_keyword("minorversion
")) { do_variable_backend_int(c_pdf_minor_version); }
2312 else if (scan_keyword("pagebox
")) { do_variable_backend_int(c_pdf_pagebox); }
2313 else if (scan_keyword("inclusionerrorlevel
")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2314 else if (scan_keyword("ignoreunknownimages
")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2315 else if (scan_keyword("gamma
")) { do_variable_backend_int(c_pdf_gamma); }
2316 else if (scan_keyword("imageapplygamma
")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2317 else if (scan_keyword("imagegamma
")) { do_variable_backend_int(c_pdf_image_gamma); }
2318 else if (scan_keyword("imagehicolor
")) { do_variable_backend_int(c_pdf_image_hicolor); }
2319 else if (scan_keyword("imageaddfilename
")) { do_variable_backend_int(c_pdf_image_addfilename); }
2320 else if (scan_keyword("objcompresslevel
")) { do_variable_backend_int(c_pdf_objcompresslevel); }
2321 else if (scan_keyword("inclusioncopyfonts
")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2322 else if (scan_keyword("gentounicode
")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2323 else if (scan_keyword("pkfixeddpi
")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2325 else if (scan_keyword("horigin
")) { do_variable_backend_dimen(d_pdf_h_origin); }
2326 else if (scan_keyword("vorigin
")) { do_variable_backend_dimen(d_pdf_v_origin); }
2327 else if (scan_keyword("threadmargin
")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2328 else if (scan_keyword("destmargin
")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2329 else if (scan_keyword("linkmargin
")) { do_variable_backend_dimen(d_pdf_link_margin); }
2330 else if (scan_keyword("xformmargin
")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2332 else if (scan_keyword("pageattr
")) { do_variable_backend_toks(t_pdf_page_attr); }
2333 else if (scan_keyword("pageresources
")) { do_variable_backend_toks(t_pdf_page_resources); }
2334 else if (scan_keyword("pagesattr
")) { do_variable_backend_toks(t_pdf_pages_attr); }
2335 else if (scan_keyword("xformattr
")) { do_variable_backend_toks(t_pdf_xform_attr); }
2336 else if (scan_keyword("xformresources
")) { do_variable_backend_toks(t_pdf_xform_resources); }
2337 else if (scan_keyword("pkmode
")) { do_variable_backend_toks(t_pdf_pk_mode); }
2344 static int do_feedback_dvi(halfword c)
2349 /* codes not really needed but cleaner when testing */
2351 #define pdftex_version 40 /* these values will not change any more */
2352 #define pdftex_revision "0" /* these values will not change any more */
2354 static int do_feedback_pdf(halfword c)
2356 int old_setting; /* holds |selector| setting */
2357 int save_scanner_status; /* |scanner_status| upon entry */
2358 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2359 halfword save_warning_index;
2360 boolean bool; /* temp boolean */
2361 str_number s; /* first temp string */
2362 int ff; /* for use with |set_ff| */
2363 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2364 char *str; /* color stack init str */
2366 if (scan_keyword("lastlink
")) {
2368 print_int(pdf_last_link);
2370 } else if (scan_keyword("retval
")) {
2372 print_int(pdf_retval);
2374 } else if (scan_keyword("lastobj
")) {
2376 print_int(pdf_last_obj);
2378 } else if (scan_keyword("lastannot
")) {
2380 print_int(pdf_last_annot);
2382 } else if (scan_keyword("xformname
")) {
2384 check_obj_type(static_pdf, obj_type_xform, cur_val);
2386 print_int(obj_info(static_pdf, cur_val));
2388 } else if (scan_keyword("creationdate
")) {
2389 ins_list(string_to_toks(getcreationdate(static_pdf)));
2390 /* no further action */
2392 } else if (scan_keyword("fontname
")) {
2394 if (cur_val == null_font)
2395 normal_error("pdf backend
", "invalid font identifier when asking 'fontname'
");
2396 pdf_check_vf(cur_val);
2397 if (!font_used(cur_val))
2398 pdf_init_font(static_pdf, cur_val);
2401 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2403 } else if (scan_keyword("fontobjnum
")) {
2405 if (cur_val == null_font)
2406 normal_error("pdf backend
", "invalid font identifier when asking 'objnum'
");
2407 pdf_check_vf(cur_val);
2408 if (!font_used(cur_val))
2409 pdf_init_font(static_pdf, cur_val);
2412 print_int(pdf_font_num(ff));
2414 } else if (scan_keyword("fontsize
")) {
2416 if (cur_val == null_font)
2417 normal_error("pdf backend
", "invalid font identifier when asking 'fontsize'
");
2419 print_scaled(font_size(cur_val));
2422 } else if (scan_keyword("pageref
")) {
2425 normal_error("pdf backend
", "invalid page number when asking 'pageref'
");
2427 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2429 } else if (scan_keyword("colorstackinit
")) {
2430 bool = scan_keyword("page
");
2431 if (scan_keyword("direct
"))
2432 cur_val = direct_always;
2433 else if (scan_keyword("page
"))
2434 cur_val = direct_page;
2436 cur_val = set_origin;
2437 save_scanner_status = scanner_status;
2438 save_warning_index = warning_index;
2439 save_def_ref = def_ref;
2440 u = save_cur_string();
2441 scan_toks(false, true);
2442 s = tokens_to_string(def_ref);
2443 delete_token_ref(def_ref);
2444 def_ref = save_def_ref;
2445 warning_index = save_warning_index;
2446 scanner_status = save_scanner_status;
2447 str = makecstring(s);
2448 cur_val = newcolorstack(str, cur_val, bool);
2451 cur_val_level = int_val_level;
2453 print_err("Too many color stacks
");
2454 help2("The number of color stacks is limited to
32768.
",
2455 "I'll use the default color stack
0 here.
");
2458 restore_cur_string(u);
2463 } else if (scan_keyword("version
")) {
2465 print_int(pdftex_version);
2467 } else if (scan_keyword("revision
")) {
2468 ins_list(string_to_toks(pdftex_revision));
2476 void conv_toks(void)
2478 int old_setting; /* holds |selector| setting */
2480 int save_scanner_status; /* |scanner_status| upon entry */
2481 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2482 halfword save_warning_index;
2483 boolean bool; /* temp boolean */
2484 str_number s; /* first temp string */
2485 int sn; /* lua chunk name */
2486 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2487 int c = cur_chr; /* desired type of conversion */
2490 /* Scan the argument for command |c| */
2498 case lua_function_code:
2501 normal_error("luafunction
", "invalid number
");
2503 u = save_cur_string();
2505 luafunctioncall(cur_val);
2506 restore_cur_string(u);
2507 if (luacstrings > 0)
2510 /* no further action */
2514 u = save_cur_string();
2515 save_scanner_status = scanner_status;
2516 save_def_ref = def_ref;
2517 save_warning_index = warning_index;
2518 sn = scan_lua_state();
2519 scan_toks(false, true);
2521 warning_index = save_warning_index;
2522 def_ref = save_def_ref;
2523 scanner_status = save_scanner_status;
2525 luatokencall(s, sn);
2526 delete_token_ref(s);
2527 restore_cur_string(u); /* TODO: check this, was different */
2528 if (luacstrings > 0)
2530 /* no further action */
2534 save_scanner_status = scanner_status;
2535 save_warning_index = warning_index;
2536 save_def_ref = def_ref;
2537 u = save_cur_string();
2538 scan_toks(false, true);
2539 warning_index = save_warning_index;
2540 scanner_status = save_scanner_status;
2541 ins_list(token_link(def_ref));
2542 def_ref = save_def_ref;
2543 restore_cur_string(u);
2544 /* no further action */
2547 case math_style_code:
2553 save_scanner_status = scanner_status;
2554 scanner_status = normal;
2556 scanner_status = save_scanner_status;
2564 case cs_string_code:
2565 save_scanner_status = scanner_status;
2566 scanner_status = normal;
2568 scanner_status = save_scanner_status;
2571 sprint_cs_name(cur_cs);
2576 case roman_numeral_code:
2579 print_roman_int(cur_val);
2583 save_scanner_status = scanner_status;
2584 scanner_status = normal;
2586 scanner_status = save_scanner_status;
2597 case lua_escape_string_code:
2601 save_scanner_status = scanner_status;
2602 save_def_ref = def_ref;
2603 save_warning_index = warning_index;
2604 scan_toks(false, true);
2605 bool = in_lua_escape;
2606 in_lua_escape = true;
2607 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2608 escstr.l = (unsigned) l;
2609 in_lua_escape = bool;
2610 delete_token_ref(def_ref);
2611 def_ref = save_def_ref;
2612 warning_index = save_warning_index;
2613 scanner_status = save_scanner_status;
2614 (void) lua_str_toks(escstr);
2615 ins_list(token_link(temp_token_head));
2619 /* no further action */
2627 case font_name_code:
2630 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2631 if (font_size(cur_val) != font_dsize(cur_val)) {
2633 print_scaled(font_size(cur_val));
2638 case left_margin_kern_code:
2640 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2641 normal_error("marginkern
", "a non-empty hbox expected
");
2643 p = list_ptr(box(cur_val));
2644 while ((p != null) && (type(p) == glue_node)) {
2647 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2648 print_scaled(width(p));
2654 case right_margin_kern_code:
2656 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657 normal_error("marginkern
", "a non-empty hbox expected
");
2659 p = list_ptr(box(cur_val));
2661 p = tail_of_list(p);
2663 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2664 node that points to glue spec ... and we don't want to analyze that messy lot
2666 while ((p != null) && (type(p) == glue_node)) {
2669 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2670 if (type(p) == disc_node) {
2672 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2676 officially we should look in the replace but currently protrusion doesn't
2677 work anyway with "foo\discretionary
{}{}{bar-
} " (no following char) so we
2684 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2685 print_scaled(width(p));
2691 case uniform_deviate_code:
2694 print_int(unif_rand(cur_val));
2697 case normal_deviate_code:
2700 print_int(norm_rand());
2703 case math_char_class_code:
2707 mval = get_math_code(cur_val);
2709 print_int(mval.class_value);
2713 case math_char_fam_code:
2717 mval = get_math_code(cur_val);
2719 print_int(mval.family_value);
2723 case math_char_slot_code:
2727 mval = get_math_code(cur_val);
2729 print_int(mval.character_value);
2733 case insert_ht_code:
2734 scan_register_num();
2738 while (i >= subtype(vlink(p)))
2740 if (subtype(p) == i)
2741 print_scaled(height(p));
2754 case format_name_code:
2761 case luatex_banner_code:
2763 tprint(luatex_banner);
2766 case luatex_revision_code:
2768 print(get_luatexrevision());
2771 case luatex_date_code:
2773 print_int(get_luatex_date_info());
2778 tprint(eTeX_version_string);
2781 case eTeX_revision_code:
2783 tprint(eTeX_revision);
2786 case font_identifier_code:
2787 confusion("convert
");
2790 confusion("convert
");
2793 str = make_string();
2794 (void) str_toks(str_lstring(str));
2796 ins_list(token_link(temp_token_head));
2799 void do_feedback(void)
2805 case dvi_feedback_code:
2806 if (get_o_mode() == OMODE_DVI) {
2807 done = do_feedback_dvi(c);
2809 tex_error("unexpected use of \\dvifeedback
",null);
2814 normal_warning("dvi backend
","unexpected use of \\dvifeedback
");
2816 } else if (done==2) {
2820 case pdf_feedback_code:
2821 if (get_o_mode() == OMODE_PDF) {
2822 done = do_feedback_pdf(c);
2824 tex_error("unexpected use of \\pdffeedback
",null);
2829 normal_warning("pdf backend
","unexpected use of \\pdffeedback
");
2831 } else if (done==2) {
2836 confusion("feedback
");
2839 str = make_string();
2840 (void) str_toks(str_lstring(str));
2842 ins_list(token_link(temp_token_head));
2845 void do_variable(void)
2850 case dvi_variable_code:
2851 done = do_variable_dvi(c);
2854 normal_warning("dvi backend
","unexpected use of \\dvivariable
");
2858 case pdf_variable_code:
2859 done = do_variable_pdf(c);
2862 normal_warning("pdf backend
","unexpected use of \\pdfvariable
");
2867 confusion("variable
");
2872 @ This boolean is keeping track of the lua string escape state
2874 boolean in_lua_escape;
2876 static int the_convert_string_dvi(halfword c, int i)
2881 static int the_convert_string_pdf(halfword c, int i)
2884 if (get_o_mode() != OMODE_PDF) {
2886 } else if (scan_keyword("lastlink
")) {
2887 print_int(pdf_last_link);
2888 } else if (scan_keyword("retval
")) {
2889 print_int(pdf_retval);
2890 } else if (scan_keyword("lastobj
")) {
2891 print_int(pdf_last_obj);
2892 } else if (scan_keyword("lastannot
")) {
2893 print_int(pdf_last_annot);
2894 } else if (scan_keyword("xformname
")) {
2895 print_int(obj_info(static_pdf, i));
2896 } else if (scan_keyword("creationdate
")) {
2898 } else if (scan_keyword("fontname
")) {
2900 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2901 } else if (scan_keyword("fontobjnum
")) {
2903 print_int(pdf_font_num(ff));
2904 } else if (scan_keyword("fontsize
")) {
2905 print_scaled(font_size(i));
2907 } else if (scan_keyword("pageref
")) {
2908 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2909 } else if (scan_keyword("colorstackinit
")) {
2917 str_number the_convert_string(halfword c, int i)
2919 int old_setting; /* saved |selector| setting */
2921 boolean done = true ;
2922 old_setting = selector;
2923 selector = new_string;
2928 /* case lua_function_code: */
2929 /* case lua_code: */
2930 /* case expanded_code: */
2931 case math_style_code:
2934 /* case string_code: */
2935 /* case cs_string_code: */
2936 case roman_numeral_code:
2939 /* case meaning_code: */
2943 /* lua_escape_string_code: */
2947 case font_name_code:
2948 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2949 if (font_size(i) != font_dsize(i)) {
2951 print_scaled(font_size(i));
2955 /* left_margin_kern_code: */
2956 /* right_margin_kern_code: */
2957 case uniform_deviate_code:
2958 print_int(unif_rand(i));
2960 case normal_deviate_code:
2961 print_int(norm_rand());
2963 /* math_char_class_code: */
2964 /* math_char_fam_code: */
2965 /* math_char_slot_code: */
2966 /* insert_ht_code: */
2970 case format_name_code:
2973 case luatex_banner_code:
2974 tprint(luatex_banner);
2976 case luatex_revision_code:
2977 print(get_luatexrevision());
2979 case luatex_date_code:
2980 print_int(get_luatex_date_info());
2983 tprint(eTeX_version_string);
2985 case eTeX_revision_code:
2986 tprint(eTeX_revision);
2988 case font_identifier_code:
2989 print_font_identifier(i);
2991 /* backend: this might become obsolete */
2992 case dvi_feedback_code:
2993 done = the_convert_string_dvi(c,i);
2995 case pdf_feedback_code:
2996 done = the_convert_string_pdf(c,i);
3004 ret = make_string();
3006 selector = old_setting;
3010 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3011 files potentially usable for reading appear in the following global variables.
3012 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3013 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3014 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3018 FILE *read_file[16]; /* used for \.{\\read} */
3019 int read_open[17]; /* state of |read_file[n]| */
3021 void initialize_read(void)
3024 for (k = 0; k <= 16; k++)
3025 read_open[k] = closed;
3028 @ The |read_toks| procedure constructs a token list like that for any macro
3029 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3030 sequence that will receive this token list.
3033 void read_toks(int n, halfword r, halfword j)
3035 halfword p; /* tail of the token list */
3036 halfword q; /* new node being added to the token list via |store_new_token| */
3037 int s; /* saved value of |align_state| */
3038 int m; /* stream number */
3039 scanner_status = defining;
3043 set_token_ref_count(def_ref, 0);
3044 p = def_ref; /* the reference count */
3045 store_new_token(end_match_token);
3046 if ((n < 0) || (n > 15))
3051 align_state = 1000000; /* disable tab marks, etc. */
3053 /* Input and store tokens from the next line of the file */
3054 begin_file_reading();
3056 if (read_open[m] == closed) {
3058 Input for \.{\\read} from the terminal
3060 Here we input on-line into the |buffer| array, prompting the user explicitly
3061 if |n>=0|. The value of |n| is set negative so that additional prompts
3062 will not be given in the case of multi-line input.
3064 if (interaction > nonstop_mode) {
3076 ("*** (cannot \\read from terminal in nonstop modes
)");
3079 } else if (read_open[m] == just_open) {
3081 Input the first line of |read_file[m]|
3083 The first line of a file must be treated specially, since |lua_input_ln|
3084 must be told not to start with |get|.
3086 if (lua_input_ln(read_file[m], (m + 1), false)) {
3087 read_open[m] = normal;
3089 lua_a_close_in(read_file[m], (m + 1));
3090 read_open[m] = closed;
3095 Input the next line of |read_file[m]|
3097 An empty line is appended at the end of a |read_file|.
3099 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3100 lua_a_close_in(read_file[m], (m + 1));
3101 read_open[m] = closed;
3102 if (align_state != 1000000) {
3104 print_err("File ended within \\read
");
3105 help1("This \\read has unbalanced braces.
");
3106 align_state = 1000000;
3113 if (end_line_char_inactive)
3116 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3120 /* Handle \.{\\readline} and |goto done|; */
3122 while (iloc <= ilimit) {
3123 /* current line not yet finished */
3124 do_buffer_to_unichar(cur_chr, iloc);
3126 cur_tok = space_token;
3128 cur_tok = cur_chr + other_token;
3129 store_new_token(cur_tok);
3135 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3138 if (align_state < 1000000) {
3139 /* unmatched `\.\}' aborts the line */
3142 } while (cur_tok != 0);
3143 align_state = 1000000;
3146 store_new_token(cur_tok);
3151 } while (align_state != 1000000);
3153 scanner_status = normal;
3157 @ return a string from tokens list
3160 str_number tokens_to_string(halfword p)
3163 if (selector == new_string)
3164 normal_error("tokens
","tokens_to_string
() called while selector
= new_string
");
3165 old_setting = selector;
3166 selector = new_string;
3167 show_token_list(token_link(p), null, -1);
3168 selector = old_setting;
3169 return make_string();
3173 #define make_room(a) \
3174 if ((unsigned)i+a+1>alloci) { \
3175 ret = xrealloc(ret,(alloci+64)); \
3176 alloci = alloci + 64; \
3179 #define append_i_byte(a) ret[i++] = (char)(a)
3181 #define Print_char(a) make_room(1); append_i_byte(a)
3183 #define Print_uchar(s) { \
3187 } else if (s<=0x7FF) { \
3188 append_i_byte(0xC0 + (s / 0x40)); \
3189 append_i_byte(0x80 + (s % 0x40)); \
3190 } else if (s<=0xFFFF) { \
3191 append_i_byte(0xE0 + (s / 0x1000)); \
3192 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3193 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3194 } else if (s>=0x110000) { \
3195 append_i_byte(s-0x11000); \
3197 append_i_byte(0xF0 + (s / 0x40000)); \
3198 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3199 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3200 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3203 #define Print_esc(b) { \
3204 const char *v = b; \
3205 if (e>0 && e<STRING_OFFSET) { \
3208 make_room(strlen(v)); \
3209 while (*v) { append_i_byte(*v); v++; } \
3212 #define Print_str(b) { \
3213 const char *v = b; \
3214 make_room(strlen(v)); \
3215 while (*v) { append_i_byte(*v); v++; } \
3218 #define is_cat_letter(a) \
3219 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3221 @ the actual token conversion in this function is now functionally equivalent to
3222 |show_token_list|, except that it always prints the whole token list. TODO: check
3223 whether this causes problems in the lua library.
3226 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3228 register int p, c, m;
3234 int match_chr = '#';
3236 unsigned alloci = 1024;
3244 ret = xmalloc(alloci);
3245 p = token_link(p); /* skip refcount */
3247 e = int_par(escape_char_code);
3250 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3251 Print_esc("CLOBBERED.
");
3254 infop = token_info(p);
3255 if (infop >= cs_token_flag) {
3256 if (!(inhibit_par && infop == par_token)) {
3257 q = infop - cs_token_flag;
3258 if (q < hash_base) {
3260 Print_esc("csname
");
3261 Print_esc("endcsname
");
3263 Print_esc("IMPOSSIBLE.
");
3265 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3266 Print_esc("IMPOSSIBLE.
");
3267 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3268 Print_esc("NONEXISTENT.
");
3270 str_number txt = cs_text(q);
3271 sh = makecstring(txt);
3273 if (is_active_cs(txt)) {
3280 if (e>=0 && e<0x110000) Print_uchar(e);
3285 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3296 m = token_cmd(infop);
3297 c = token_chr(infop);
3299 case left_brace_cmd:
3300 case right_brace_cmd:
3301 case math_shift_cmd:
3307 case other_char_cmd:
3311 if (!in_lua_escape && (is_in_csname==0))
3316 Print_uchar(match_chr);
3318 Print_char(c + '0');
3339 not_so_bad(Print_esc);
3354 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3357 lstring *ret = xmalloc(sizeof(lstring));
3358 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3359 ret->l = (size_t) siz;
3364 void free_lstring(lstring * ls)