3 % Copyright
2006-2011 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
25 #define pausing int_par
(pausing_code
)
26 #define cat_code_table int_par
(cat_code_table_code
)
27 #define tracing_nesting int_par
(tracing_nesting_code
)
28 #define suppress_outer_error int_par
(suppress_outer_error_code
)
29 #define suppress_mathpar_error int_par
(suppress_mathpar_error_code
)
32 #define every_eof equiv
(every_eof_loc
)
33 #define box
(A
) equiv
(box_base
+(A
))
34 #define toks
(A
) equiv
(toks_base
+(A
))
36 #define detokenized_line
() (line_catcode_table
==NO_CAT_TABLE
)
39 #define do_get_cat_code
(a
,b
) do
{ \
40 if
(line_catcode_table
<=-0xFF) \
41 a
= - line_catcode_table
- 0xFF ; \
42 else if
(line_catcode_table
!=DEFAULT_CAT_TABLE
) \
43 a
=get_cat_code
(line_catcode_table
,b
); \
45 a
=get_cat_code
(cat_code_table
,b
); \
49 #define do_get_cat_code
(a
,b
) do
{ \
50 if
(line_catcode_table
==DEFAULT_CAT_TABLE
) \
51 a
=get_cat_code
(cat_code_table
,b
); \
52 else if
(line_catcode_table
>-0xFF) \
53 a
=get_cat_code
(line_catcode_table
,b
); \
55 a
= - line_catcode_table
- 0xFF ; \
59 @ The \TeX\ system does nearly all of its own memory allocation
, so that it can
60 readily be transported into environments that do not have automatic facilities
61 for strings
, garbage collection
, etc.
, and so that it can be in control of what
62 error messages the user receives. The dynamic storage requirements of \TeX\ are
63 handled by providing two large arrays called |fixmem| and |varmem| in which
64 consecutive blocks of words are used as nodes by the \TeX\ routines.
66 Pointer variables are indices into this array
, or into another array called
67 |eqtb| that will be explained later. A pointer variable might also be a special
68 flag that lies outside the bounds of |mem|
, so we allow pointers to assume any
69 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
70 does not assume that |mem
[null
]| exists.
72 @ Locations in |fixmem| are used for storing one-word records
; a conventional
73 \.
{AVAIL} stack is used for allocation in this array.
76 smemory_word
*fixmem
; /* the big dynamic storage area
*/
77 unsigned fix_mem_min
; /* the smallest location of one-word memory in use
*/
78 unsigned fix_mem_max
; /* the largest location of one-word memory in use
*/
80 @ In order to study the memory requirements of particular applications
, it is
81 possible to prepare a version of \TeX\ that keeps track of current and maximum
82 memory usage. When code between the delimiters |@
!stat| $\ldots$ |tats| is not
83 commented out
, \TeX\ will run a bit slower but it will report these statistics
84 when |tracing_stats| is sufficiently large.
87 int var_used
, dyn_used
; /* how much memory is in use
*/
89 halfword avail
; /* head of the list of available one-word nodes
*/
90 unsigned fix_mem_end
; /* the last one-word node used in |mem|
*/
92 halfword garbage
; /* head of a junk list
, write only
*/
93 halfword temp_token_head
; /* head of a temporary list of some kind
*/
94 halfword hold_token_head
; /* head of a temporary list of another kind
*/
95 halfword omit_template
; /* a constant token list
*/
96 halfword null_list
; /* permanently empty list
*/
97 halfword backup_head
; /* head of token list built by |scan_keyword|
*/
100 void initialize_tokens
(void
)
107 set_token_info
(temp_token_head
, 0);
110 set_token_info
(hold_token_head
, 0);
113 set_token_info
(omit_template
, 0);
116 set_token_info
(null_list
, 0);
119 set_token_info
(backup_head
, 0);
122 set_token_info
(garbage
, 0);
123 dyn_used
= 0; /* initialize statistics
*/
126 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
127 field is null. However
, \TeX\ will halt if there is no more room left.
130 If the available-space list is empty
, i.e.
, if |avail
=null|
, we try first to
131 increase |fix_mem_end|. If that cannot be done
, i.e.
, if
132 |fix_mem_end
=fix_mem_max|
, we try to reallocate array |fixmem|. If
, that doesn't
133 work
, we have to quit.
136 halfword get_avail
(void
)
137 { /* single-word node allocation
*/
138 unsigned p
; /* the new node being got
*/
140 p
= (unsigned
) avail
; /* get top location in the |avail| stack
*/
142 avail
= token_link
(avail
); /* and pop it off
*/
143 } else if
(fix_mem_end
< fix_mem_max
) { /* or go into virgin territory
*/
147 smemory_word
*new_fixmem
; /* the big dynamic storage area
*/
148 t
= (fix_mem_max
/ 5);
151 (fixmem
, sizeof
(smemory_word
) * (fix_mem_max
+ t
+ 1)));
152 if
(new_fixmem
== NULL) {
153 runaway
(); /* if memory is exhausted
, display possible runaway text
*/
154 overflow
("token memory size", fix_mem_max
);
158 memset
(voidcast
(fixmem
+ fix_mem_max
+ 1), 0, t
* sizeof
(smemory_word
));
162 token_link
(p
) = null
; /* provide an oft-desired initialization of the new node
*/
163 incr
(dyn_used
); /* maintain statistics
*/
167 @ The procedure |flush_list
(p
)| frees an entire linked list of one-word nodes
168 that starts at position |p|.
172 void flush_list
(halfword p
)
173 { /* makes list of single-word nodes available
*/
174 halfword q
, r
; /* list traversers
*/
181 } while
(r
!= null
); /* now |q| is the last node on the list
*/
182 token_link
(q
) = avail
;
187 @ A \TeX\ token is either a character or a control sequence
, and it is @^token@
>
188 represented internally in one of two ways
: (1)~A character whose ASCII code
189 number is |c| and whose command code is |m| is represented as the number
190 $
2^
{21}m
+c$
; the command code is in the range |
1<=m
<=14|.
(2)~A control sequence
191 whose |eqtb| address is |p| is represented as the number |cs_token_flag
+p|. Here
192 |cs_token_flag
=@t$
2^
{25}-1$@
>| is larger than $
2^
{21}m
+c$
, yet it is small enough
193 that |cs_token_flag
+p
< max_halfword|
; thus
, a token fits comfortably in a
196 A token |t| represents a |left_brace| command if and only if
197 |t
<left_brace_limit|
; it represents a |right_brace| command if and only if we
198 have |left_brace_limit
<=t
<right_brace_limit|
; and it represents a |match| or
199 |end_match| command if and only if |match_token
<=t
<=end_match_token|. The
200 following definitions take care of these token-oriented constants and a few
203 @ A token list is a singly linked list of one-word nodes in |mem|
, where each
204 word contains a token and a link. Macro definitions
, output-routine definitions
,
205 marks
, \.
{\\write
} texts
, and a few other things are remembered by \TeX\ in the
206 form of token lists
, usually preceded by a node with a reference count in its
207 |token_ref_count| field. The token stored in location |p| is called |info
(p
)|.
209 Three special commands appear in the token lists of macro definitions. When
210 |m
=match|
, it means that \TeX\ should scan a parameter for the current macro
;
211 when |m
=end_match|
, it means that parameter matching should end and \TeX\ should
212 start reading the macro text
; and when |m
=out_param|
, it means that \TeX\ should
213 insert parameter number |c| into the text at this point.
215 The enclosing \.
{\char'
173} and \.
{\char'
175} characters of a macro definition
216 are omitted
, but the final right brace of an output routine is included at the
217 end of its token list.
219 Here is an example macro definition that illustrates these conventions. After
220 \TeX\ processes the text
222 $$\.
{\\def\\mac a\#
1\#
2 \\b \
{\#
1\\
-a \#\#
1\#
2 \#
2\
}}$$
224 the definition of \.
{\\mac
} is represented as a token list containing
227 \vbox
{\halign
{\hfil#\hfil\cr
228 (reference count
), |letter|\
,\.a
, |match|\
,\#
, |match|\
,\#
, |spacer|\
,\.\
,
229 \.
{\\b
}, |end_match|
,\cr
230 |out_param|\
,1, \.
{\\
-}, |letter|\
,\.a
, |spacer|\
,\.\
, |mac_param|\
,\#
,
231 |other_char|\
,\
.1,\cr
232 |out_param|\
,2, |spacer|\
,\.\
, |out_param|\
,2.\cr
}}$$
234 The procedure |scan_toks| builds such token lists
, and |macro_call| does the
235 parameter matching. @^reference counts@
>
237 Examples such as $$\.
{\\def\\m\
{\\def\\m\
{a\
}\ b\
}}$$ explain why reference
238 counts would be needed even if \TeX\ had no \.
{\\let
} operation
: When the token
239 list for \.
{\\m
} is being read
, the redefinition of \.
{\\m
} changes the |eqtb|
240 entry before the token list has been fully consumed
, so we dare not simply
241 destroy a token list when its control sequence is being redefined.
243 If the parameter-matching part of a definition ends with `\.
{\#\
{}'
, the
244 corresponding token list will have `\.\
{' just before the `|end_match|' and also
245 at the very end. The first `\.\
{' is used to delimit the parameter
; the second
246 one keeps the first from disappearing.
248 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form
,
249 including the expansion of a macro or mark.
252 void print_meaning
(void
)
254 /* remap \mathchar onto \Umathchar
*/
256 if
(cur_cmd
== math_given_cmd
) {
257 cur_cmd
= xmath_given_cmd
;
260 print_cmd_chr
((quarterword
) cur_cmd
, cur_chr
);
261 if
(cur_cmd
>= call_cmd
) {
266 /* Show the meaning of a mark node
*/
267 if
((cur_cmd
== top_bot_mark_cmd
) && (cur_chr < marks_code)) {
271 case first_mark_code
:
272 token_show
(first_mark
(0));
275 token_show
(bot_mark
(0));
277 case split_first_mark_code
:
278 token_show
(split_first_mark
(0));
280 case split_bot_mark_code
:
281 token_show
(split_bot_mark
(0));
284 token_show
(top_mark
(0));
291 @ The procedure |show_token_list|
, which prints a symbolic form of the token list
292 that starts at a given node |p|
, illustrates these conventions. The token list
293 being displayed should not begin with a reference count. However
, the procedure
294 is intended to be robust
, so that if the memory links are awry or if |p| is not
295 really a pointer to a token list
, nothing catastrophic will happen.
297 An additional parameter |q| is also given
; this parameter is either null or it
298 points to a node in the token list where a certain magic computation takes place
299 that will be explained later.
(Basically
, |q| is non-null when we are printing
300 the two-line context information at the time of an error message
; |q| marks the
301 place corresponding to where the second line should begin.
)
303 For example
, if |p| points to the node containing the first \.a in the token list
304 above
, then |show_token_list| will print the string $$\hbox
{`\.
{a\#
1\#
2\ \\b\
305 ->\#
1\\
-a\ \#\#
1\#
2\ \#
2}'
;}$$ and if |q| points to the node containing the
306 second \.a
, the magic computation will be performed just before the second \.a is
309 The generation will stop
, and `\.
{\\ETC.
}' will be printed
, if the length of
310 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
311 control sequences that are not followed by a blank space
, e.g.
, `\.
{\\BAD.
}'
;
312 this cannot be confused with actual control sequences because a real control
313 sequence named \.
{BAD
} would come out `\.
{\\BAD\
}'.
316 #define not_so_bad
(p
) \
318 case assign_int_cmd
: \
319 if
(c
>= (backend_int_base
) && c <= (backend_int_last)) \
320 p
("[internal backend integer]"); \
322 case assign_dimen_cmd
: \
323 if
(c
>= (backend_dimen_base
) && c <= (backend_dimen_last)) \
324 p
("[internal backend dimension]"); \
326 case assign_toks_cmd
: \
327 if
(c
>= (backend_toks_base
) && c <= (backend_toks_last)) \
328 p
("[internal backend tokenlist]"); \
335 void show_token_list
(int p
, int q
, int l
)
337 int m
, c
; /* pieces of a token
*/
338 ASCII_code match_chr
= '#'
; /* character used in a `|match|'
*/
339 ASCII_code n
= '
0'
; /* the highest parameter number
, as an ASCII digit
*/
343 while
((p
!= null
) && (tally < l)) {
345 /* Do magic computation
*/
348 /* Display token |p|
, and |return| if there are problems
*/
349 if
((p
< (int
) fix_mem_min
) ||
(p
> (int
) fix_mem_end
)) {
350 tprint_esc
("CLOBBERED.");
353 if
(token_info
(p
) >= cs_token_flag
) {
354 if
(!((inhibit_par_tokens
) && (token_info(p) == par_token)))
355 print_cs
(token_info
(p
) - cs_token_flag
);
357 m
= token_cmd
(token_info
(p
));
358 c
= token_chr
(token_info
(p
));
359 if
(token_info
(p
) < 0) {
363 Display the token $
(|m|
,|c|
)$
365 The procedure usually ``learns'' the character code used for macro
366 parameters by seeing one in a |match| command before it runs into any
367 |out_param| commands.
371 case right_brace_cmd
:
382 if
(!in_lua_escape
&& (is_in_csname==0))
420 #define do_buffer_to_unichar
(a
,b
) do
{ \
421 a
= (halfword
)str2uni
(buffer
+b
); \
425 @ Here's the way we sometimes want to display a token list
, given a pointer to
426 its reference count
; the pointer may be null.
429 void token_show
(halfword p
)
432 show_token_list
(token_link
(p
), null
, 10000000);
435 @ |delete_token_ref|
, is called when a pointer to a token list's reference count
436 is being removed. This means that the token list should disappear if the
437 reference count was |null|
, otherwise the count should be decreased by one.
440 @ |p| points to the reference count of a token list that is losing one
444 void delete_token_ref
(halfword p
)
446 if
(token_ref_count
(p
) == 0)
449 decr
(token_ref_count
(p
));
453 int get_char_cat_code
(int curchr
)
456 do_get_cat_code
(a
,curchr
);
461 static void invalid_character_error
(void
)
463 const char
*hlp
[] = {
464 "A funny symbol that I can't read has just been input.",
465 "Continue, and I'll forget that it ever happened.",
468 deletions_allowed
= false
;
469 tex_error
("Text line contains an invalid character", hlp
);
470 deletions_allowed
= true
;
474 static boolean process_sup_mark
(void
); /* below
*/
476 static int scan_control_sequence
(void
); /* below
*/
484 static next_line_retval next_line
(void
); /* below
*/
486 @ In case you are getting bored
, here is a slightly less trivial routine
: Given a
487 string of lowercase letters
, like `\.
{pt
}' or `\.
{plus
}' or `\.
{width
}'
, the
488 |scan_keyword| routine checks to see whether the next tokens of input match this
489 string. The match must be exact
, except that uppercase letters will match their
490 lowercase counterparts
; uppercase equivalents are determined by subtracting
491 |
"a"-"A"|
, rather than using the |uc_code| table
, since \TeX\ uses this routine
492 only for its own limited set of keywords.
494 If a match is found
, the characters are effectively removed from the input and
495 |true| is returned. Otherwise |false| is returned
, and the input is left
496 essentially unchanged
(except for the fact that some macros may have been
497 expanded
, etc.
). @^inner loop@
>
500 boolean scan_keyword
(const char
*s
)
501 { /* look for a given string
*/
502 halfword p
; /* tail of the backup list
*/
503 halfword q
; /* new node being added to the token list via |store_new_token|
*/
504 const char
*k
; /* index into |str_pool|
*/
505 halfword save_cur_cs
= cur_cs
;
506 if
(strlen
(s
) == 0) /* was assert
(strlen
(s
) > 1); */
507 return false
; /* but not with newtokenlib zero keyword simply doesn't match
*/
509 token_link
(p
) = null
;
512 get_x_token
(); /* recursion is possible here
*/
513 if
((cur_cs
== 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
514 store_new_token
(cur_tok
);
516 } else if
((cur_cmd
!= spacer_cmd
) ||
(p
!= backup_head
)) {
518 crashes on some alignments
:
520 if
(p
!= backup_head
) {
522 token_info
(q
) = cur_tok
;
523 token_link
(q
) = null
;
525 begin_token_list
(token_link
(backup_head
), backed_up
);
531 if
(p
!= backup_head
) {
532 begin_token_list
(token_link
(backup_head
), backed_up
);
535 cur_cs
= save_cur_cs
;
539 if
(token_link
(backup_head
) != null
)
540 flush_list
(token_link
(backup_head
));
541 cur_cs
= save_cur_cs
;
545 @ We can not return |undefined_control_sequence| under some conditions
546 (inside |shift_case|
, for example
). This needs thinking.
551 halfword active_to_cs
(int curchr
, int force
)
555 char
*utfbytes
= xmalloc
(8);
556 int nncs
= no_new_control_sequence
;
557 a
= (char
*) uni2str
(0xFFFF);
558 utfbytes
= strcpy
(utfbytes
, a
);
560 no_new_control_sequence
= false
;
562 b
= (char
*) uni2str
((unsigned
) curchr
);
563 utfbytes
= strcat
(utfbytes
, b
);
565 curcs
= string_lookup
(utfbytes
, strlen
(utfbytes
));
568 curcs
= string_lookup
(utfbytes
, 4);
570 no_new_control_sequence
= nncs
;
577 /*static char
* FFFF
= "\xEF\xBF\xBF";*/ /* 0xFFFF */
579 halfword active_to_cs
(int curchr
, int force
)
582 int nncs
= no_new_control_sequence
;
584 no_new_control_sequence
= false
;
587 char
*b
= (char
*) uni2str
((unsigned
) curchr
);
588 char
*utfbytes
= xmalloc
(8);
589 utfbytes
= strcpy
(utfbytes
, "\xEF\xBF\xBF");
590 utfbytes
= strcat
(utfbytes
, b
);
592 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
595 curcs
= string_lookup
("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not
3 ?
*/
597 no_new_control_sequence
= nncs
;
603 static unsigned char
*uni2csstr
(unsigned unic
)
605 unsigned char
*buf
= xmalloc
(8);
606 unsigned char
*pt
= buf
;
607 *pt
++ = 239; *pt
++ = 191; *pt
++ = 191; // 0xFFFF
609 *pt
++ = (unsigned char
) unic
;
610 else if
(unic
< 0x800) {
611 *pt
++ = (unsigned char
) (0xc0 |
(unic
>> 6));
612 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
613 } else if
(unic
>= 0x110000) {
614 *pt
++ = (unsigned char
) (unic
- 0x110000);
615 } else if
(unic
< 0x10000) {
616 *pt
++ = (unsigned char
) (0xe0 |
(unic
>> 12));
617 *pt
++ = (unsigned char
) (0x80 |
((unic
>> 6) & 0x3f));
618 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
621 unsigned val
= unic
- 0x10000;
622 u
= (int
) (((val
& 0xf0000) >> 16) + 1);
623 z
= (int
) ((val
& 0x0f000) >> 12);
624 y
= (int
) ((val
& 0x00fc0) >> 6);
625 x
= (int
) (val
& 0x0003f);
626 *pt
++ = (unsigned char
) (0xf0 |
(u
>> 2));
627 *pt
++ = (unsigned char
) (0x80 |
((u
& 3) << 4) | z);
628 *pt
++ = (unsigned char
) (0x80 | y
);
629 *pt
++ = (unsigned char
) (0x80 | x
);
635 halfword active_to_cs
(int curchr
, int force
)
638 int nncs
= no_new_control_sequence
;
640 no_new_control_sequence
= false
;
643 char
* utfbytes
= (char
*) uni2csstr
((unsigned
) curchr
);
644 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
647 curcs
= string_lookup
(FFFF
, 4); // 0xFFFF ... why not
3 ?
649 no_new_control_sequence
= nncs
;
655 @ TODO this function should listen to \.
{\\escapechar
}
657 @ prints a control sequence
660 static char
*cs_to_string
(halfword p
)
665 static char ret
[256] = { 0 };
666 if
(p
== 0 || p
== null_cs
) {
680 str_number txt
= cs_text
(p
);
681 sh
= makecstring
(txt
);
683 if
(is_active_cs
(txt
)) {
701 @ TODO this is a quick hack
, will be solved differently soon
704 static char
*cmd_chr_to_string
(int cmd
, int chr
)
709 selector
= new_string
;
710 print_cmd_chr
((quarterword
) cmd
, chr
);
712 s
= makecstring
(str
);
718 @ The heart of \TeX's input mechanism is the |get_next| procedure
, which we shall
719 develop in the next few sections of the program. Perhaps we shouldn't actually
720 call it the ``heart
,'' however
, because it really acts as \TeX's eyes and mouth
,
721 reading the source files and gobbling them up. And it also helps \TeX\ to
722 regurgitate stored token lists that are to be processed again. @^eyes and mouth@
>
724 The main duty of |get_next| is to input one token and to set |cur_cmd| and
725 |cur_chr| to that token's command code and modifier. Furthermore
, if the input
726 token is a control sequence
, the |eqtb| location of that control sequence is
727 stored in |cur_cs|
; otherwise |cur_cs| is set to zero.
729 Underlying this simple description is a certain amount of complexity because of
730 all the cases that need to be handled. However
, the inner loop of |get_next| is
731 reasonably short and fast.
733 When |get_next| is asked to get the next token of a \.
{\\read
} line
,
734 it sets |cur_cmd
=cur_chr
=cur_cs
=0| in the case that no more tokens
735 appear on that line.
(There might not be any tokens at all
, if the
736 |end_line_char| has |ignore| as its catcode.
)
738 The value of |par_loc| is the |eqtb| address of `\.
{\\par
}'. This quantity is
739 needed because a blank line of input is supposed to be exactly equivalent to the
740 appearance of \.
{\\par
}; we must set |cur_cs
:=par_loc| when detecting a blank
744 halfword par_loc
; /* location of `\.
{\\par
}' in |eqtb|
*/
745 halfword par_token
; /* token representing `\.
{\\par
}'
*/
747 @ Parts |get_next| are executed more often than any other instructions of \TeX.
748 @^mastication@
>@^inner loop@
>
750 The global variable |force_eof| is normally |false|
; it is set |true| by an
751 \.
{\\endinput
} command. |luacstrings| is the number of lua print statements
752 waiting to be input
, it is changed by |luatokencall|.
755 boolean force_eof
; /* should the next \.
{\\input
} be aborted early?
*/
756 int luacstrings
; /* how many lua strings are waiting to be input?
*/
758 @ If the user has set the |pausing| parameter to some positive value
, and if
759 nonstop mode has not been selected
, each line of input is displayed on the
760 terminal and the transcript file
, followed by `\.
{=>}'. \TeX\ waits for a
761 response. If the response is simply |carriage_return|
, the line is accepted as it
762 stands
, otherwise the line typed is used instead of the line in the file.
765 void firm_up_the_line
(void
)
767 int k
; /* an index into |buffer|
*/
770 if
(interaction
> nonstop_mode
) {
773 if
(istart
< ilimit
) {
774 for
(k
= istart
; k
<= ilimit
- 1; k
++)
775 print_char
(buffer
[k
]);
778 prompt_input
("=>"); /* wait for user response
*/
780 for
(k
= first
; k
< +last
- 1; k
++) /* move line down in buffer
*/
781 buffer
[k
+ istart
- first
] = buffer
[k
];
782 ilimit
= istart
+ last
- first
;
788 @ Before getting into |get_next|
, let's consider the subroutine that is called
789 when an `\.
{\\outer
}' control sequence has been scanned or when the end of a file
790 has been reached. These two cases are distinguished by |cur_cs|
, which is zero at
794 void check_outer_validity
(void
)
796 halfword p
; /* points to inserted token list
*/
797 halfword q
; /* auxiliary pointer
*/
798 if
(suppress_outer_error
)
800 if
(scanner_status
!= normal
) {
801 deletions_allowed
= false
;
802 /* Back up an outer control sequence so that it can be reread
; */
803 /* An outer control sequence that occurs in a \.
{\\read
} will not be reread
,
804 since the error recovery for \.
{\\read
} is not very powerful.
*/
806 if
((istate
== token_list
) ||
(iname
< 1) ||
(iname
> 17)) {
808 token_info
(p
) = cs_token_flag
+ cur_cs
;
809 begin_token_list
(p
, backed_up
); /* prepare to read the control sequence again
*/
811 cur_cmd
= spacer_cmd
;
812 cur_chr
= ' '
; /* replace it by a space
*/
814 if
(scanner_status
> skipping
) {
815 const char
*errhlp
[] = {
816 "I suspect you have forgotten a `}', causing me",
817 "to read past where you wanted me to stop.",
818 "I'll try to recover; but if the error is serious,",
819 "you'd better type `E' or `X' now and fix your file.",
823 const char
*startmsg
;
824 const char
*scannermsg
;
825 /* Tell the user what has run away and try to recover
*/
826 runaway
(); /* print a definition
, argument
, or preamble
*/
828 startmsg
= "File ended";
831 startmsg
= "Forbidden control sequence found";
833 /* Print either `\.
{definition
}' or `\.
{use
}' or `\.
{preamble
}' or `\.
{text
}'
,
834 and insert tokens that should lead to recovery
; */
835 /* The recovery procedure can't be fully understood without knowing more
836 about the \TeX\ routines that should be aborted
, but we can sketch the
837 ideas here
: For a runaway definition we will insert a right brace
; for a
838 runaway preamble
, we will insert a special \.
{\\cr
} token and a right
839 brace
; and for a runaway argument
, we will set |long_state| to
840 |outer_call| and insert \.
{\\par
}.
*/
842 switch
(scanner_status
) {
844 scannermsg
= "definition";
845 token_info
(p
) = right_brace_token
+ '
}'
;
849 token_info
(p
) = par_token
;
850 long_state
= outer_call_cmd
;
853 scannermsg
= "preamble";
854 token_info
(p
) = right_brace_token
+ '
}'
;
858 token_info
(p
) = cs_token_flag
+ frozen_cr
;
859 align_state
= -1000000;
863 token_info
(p
) = right_brace_token
+ '
}'
;
865 default
: /* can't happen
*/
866 scannermsg
= "unknown";
868 } /*there are no other cases
*/
869 begin_token_list
(p
, inserted
);
870 snprintf
(errmsg
, 255, "%s while scanning %s of %s",
871 startmsg
, scannermsg
, cs_to_string
(warning_index
));
872 tex_error
(errmsg
, errhlp
);
875 const char
*errhlp_no
[] = {
876 "The file ended while I was skipping conditional text.",
877 "This kind of error happens when you say `\\if...' and forget",
878 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
881 const char
*errhlp_cs
[] = {
882 "A forbidden control sequence occurred in skipped text.",
883 "This kind of error happens when you say `\\if...' and forget",
884 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
887 const char
**errhlp
= (const char
**) errhlp_no
;
893 ss
= cmd_chr_to_string
(if_test_cmd
, cur_if
);
894 snprintf
(errmsg
, 255, "Incomplete %s; all text was ignored after line %d",
895 ss
, (int
) skip_line
);
897 /* Incomplete \\if...
*/
898 cur_tok
= cs_token_flag
+ frozen_fi
;
899 /* back up one inserted token and call |error|
*/
901 OK_to_interrupt
= false
;
903 token_type
= inserted
;
904 OK_to_interrupt
= true
;
905 tex_error
(errmsg
, errhlp
);
908 deletions_allowed
= true
;
917 The other variant gives less clutter in tracing cache usage when profiling and for
918 some files
(like the manual
) also a bit of a speedup.
921 static boolean get_next_file
(void
)
924 if
(iloc
<= ilimit
) {
925 /* current line not yet finished
*/
926 do_buffer_to_unichar
(cur_chr
, iloc
);
929 if
(detokenized_line
()) {
930 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
932 do_get_cat_code
(cur_cmd
, cur_chr
);
935 Change state if necessary
, and |goto switch| if the current
936 character should be ignored
, or |goto reswitch| if the current
937 character changes to another
;
939 The following
48-way switch accomplishes the scanning quickly
, assuming
940 that a decent C compiler has translated the code. Note that the numeric
941 values for |mid_line|
, |skip_blanks|
, and |new_line| are spaced
942 apart from each other by |max_char_code
+1|
, so we can add a character's
943 command code to the state to get a single number that characterizes both.
945 Remark
[ls
/hh
]: checking performance indicated that this switch was the
946 cause of many branch prediction errors but changing it to
:
948 c
= istate
+ cur_cmd
;
949 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
951 } else if
(c
>= new_line
) {
954 } else if
(c
>= skip_blanks
) {
957 } else if
(c
>= mid_line
) {
965 gives as many prediction errors. So
, we can indeed assume that the compiler
966 does the right job
, or that there is simply no other way.
969 switch
(istate
+ cur_cmd
) {
970 case mid_line
+ ignore_cmd
:
971 case skip_blanks
+ ignore_cmd
:
972 case new_line
+ ignore_cmd
:
973 case skip_blanks
+ spacer_cmd
:
974 case new_line
+ spacer_cmd
:
975 /* Cases where character is ignored
*/
978 case mid_line
+ escape_cmd
:
979 case new_line
+ escape_cmd
:
980 case skip_blanks
+ escape_cmd
:
981 /* Scan a control sequence ...
; */
982 istate
= (unsigned char
) scan_control_sequence
();
983 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
984 check_outer_validity
();
986 case mid_line
+ active_char_cmd
:
987 case new_line
+ active_char_cmd
:
988 case skip_blanks
+ active_char_cmd
:
989 /* Process an active-character
*/
990 cur_cs
= active_to_cs
(cur_chr
, false
);
991 cur_cmd
= eq_type
(cur_cs
);
992 cur_chr
= equiv
(cur_cs
);
994 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
995 check_outer_validity
();
997 case mid_line
+ sup_mark_cmd
:
998 case new_line
+ sup_mark_cmd
:
999 case skip_blanks
+ sup_mark_cmd
:
1000 /* If this |sup_mark| starts
*/
1001 if
(process_sup_mark
())
1006 case mid_line
+ invalid_char_cmd
:
1007 case new_line
+ invalid_char_cmd
:
1008 case skip_blanks
+ invalid_char_cmd
:
1009 /* Decry the invalid character and |goto restart|
; */
1010 invalid_character_error
();
1011 return false
; /* because state may be |token_list| now
*/
1013 case mid_line
+ spacer_cmd
:
1014 /* Enter |skip_blanks| state
, emit a space
; */
1015 istate
= skip_blanks
;
1018 case mid_line
+ car_ret_cmd
:
1020 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1021 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1022 for tab and space
, and for the space inserted at the end of a line
, will be
1023 treated alike when macro parameters are being matched. We do this since such
1024 characters are indistinguishable on most computer terminal displays.
1027 cur_cmd
= spacer_cmd
;
1030 case skip_blanks
+ car_ret_cmd
:
1031 case mid_line
+ comment_cmd
:
1032 case new_line
+ comment_cmd
:
1033 case skip_blanks
+ comment_cmd
:
1034 /* Finish line
, |goto switch|
; */
1038 case new_line
+ car_ret_cmd
:
1039 /* Finish line
, emit a \.
{\\par
}; */
1042 cur_cmd
= eq_type
(cur_cs
);
1043 cur_chr
= equiv
(cur_cs
);
1044 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1045 check_outer_validity
();
1047 case skip_blanks
+ left_brace_cmd
:
1048 case new_line
+ left_brace_cmd
:
1051 case mid_line
+ left_brace_cmd
:
1054 case skip_blanks
+ right_brace_cmd
:
1055 case new_line
+ right_brace_cmd
:
1058 case mid_line
+ right_brace_cmd
:
1061 case mid_line
+ math_shift_cmd
:
1062 case mid_line
+ tab_mark_cmd
:
1063 case mid_line
+ mac_param_cmd
:
1064 case mid_line
+ sub_mark_cmd
:
1065 case mid_line
+ letter_cmd
:
1066 case mid_line
+ other_char_cmd
:
1069 case skip_blanks
+ math_shift
:
1070 case skip_blanks
+ tab_mark
:
1071 case skip_blanks
+ mac_param
:
1072 case skip_blanks
+ sub_mark
:
1073 case skip_blanks
+ letter
:
1074 case skip_blanks
+ other_char
:
1075 case new_line
+ math_shift
:
1076 case new_line
+ tab_mark
:
1077 case new_line
+ mac_param
:
1078 case new_line
+ sub_mark
:
1079 case new_line
+ letter
:
1080 case new_line
+ other_char
:
1090 Move to next line of file
,
1091 or |goto restart| if there is no next line
,
1092 or |return| if a \.
{\\read
} line has finished
;
1095 next_line_retval r
= next_line
();
1096 if
(r
== next_line_return
) {
1098 } else if
(r
== next_line_restart
) {
1110 /* 10 times less Bim in callgrind
*/
1113 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1114 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1115 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1116 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1119 static boolean get_next_file
(void
)
1123 if
(iloc
<= ilimit
) {
1124 /* current line not yet finished
*/
1125 do_buffer_to_unichar
(cur_chr
, iloc
);
1127 if
(detokenized_line
()) {
1128 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
1130 do_get_cat_code
(cur_cmd
, cur_chr
);
1133 Change state if necessary
, and |goto switch| if the current
1134 character should be ignored
, or |goto reswitch| if the current
1135 character changes to another
;
1137 c
= istate
+ cur_cmd
;
1138 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
1140 } else if
(c
>= new_line
) {
1141 switch
(c-new_line
) {
1143 istate
= (unsigned char
) scan_control_sequence
();
1144 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1145 check_outer_validity
();
1147 case left_brace_cmd
:
1151 case right_brace_cmd
:
1155 case math_shift_cmd
:
1162 /* Finish line
, emit a \.
{\\par
}; */
1165 cur_cmd
= eq_type
(cur_cs
);
1166 cur_chr
= equiv
(cur_cs
);
1167 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1168 check_outer_validity
();
1174 if
(process_sup_mark
())
1186 /* Cases where character is ignored
*/
1191 case other_char_cmd
:
1194 case active_char_cmd
:
1195 cur_cs
= active_to_cs
(cur_chr
, false
);
1196 cur_cmd
= eq_type
(cur_cs
);
1197 cur_chr
= equiv
(cur_cs
);
1199 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1200 check_outer_validity
();
1205 case invalid_char_cmd
:
1206 invalid_character_error
();
1207 return false
; /* because state may be |token_list| now
*/
1212 } else if
(c
>= skip_blanks
) {
1213 switch
(c-skip_blanks
) {
1215 /* Scan a control sequence ...
; */
1216 istate
= (unsigned char
) scan_control_sequence
();
1217 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1218 check_outer_validity
();
1220 case left_brace_cmd
:
1224 case right_brace_cmd
:
1228 case math_shift_cmd
:
1241 /* If this |sup_mark| starts
*/
1242 if
(process_sup_mark
())
1257 case other_char_cmd
:
1260 case active_char_cmd
:
1261 cur_cs
= active_to_cs
(cur_chr
, false
);
1262 cur_cmd
= eq_type
(cur_cs
);
1263 cur_chr
= equiv
(cur_cs
);
1265 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1266 check_outer_validity
();
1269 /* Finish line
, |goto switch|
; */
1272 case invalid_char_cmd
:
1273 /* Decry the invalid character and |goto restart|
; */
1274 invalid_character_error
();
1275 return false
; /* because state may be |token_list| now
*/
1280 } else if
(c
>= mid_line
) {
1281 switch
(c-mid_line
) {
1283 istate
= (unsigned char
) scan_control_sequence
();
1284 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1285 check_outer_validity
();
1287 case left_brace_cmd
:
1290 case right_brace_cmd
:
1293 case math_shift_cmd
:
1299 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1300 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1301 for tab and space
, and for the space inserted at the end of a line
, will be
1302 treated alike when macro parameters are being matched. We do this since such
1303 characters are indistinguishable on most computer terminal displays.
1306 cur_cmd
= spacer_cmd
;
1312 if
(process_sup_mark
())
1322 /* Enter |skip_blanks| state
, emit a space
; */
1323 istate
= skip_blanks
;
1329 case other_char_cmd
:
1332 case active_char_cmd
:
1333 cur_cs
= active_to_cs
(cur_chr
, false
);
1334 cur_cmd
= eq_type
(cur_cs
);
1335 cur_chr
= equiv
(cur_cs
);
1337 if
(! suppress_outer_error
&& cur_cmd >= outer_call_cmd)
1338 check_outer_validity
();
1343 case invalid_char_cmd
:
1344 invalid_character_error
();
1345 return false
; /* because state may be |token_list| now
*/
1359 Move to next line of file
, or |goto restart| if there is no next line
,
1360 or |return| if a \.
{\\read
} line has finished
;
1363 next_line_retval r
= next_line
();
1364 if
(r
== next_line_return
) {
1366 } else if
(r
== next_line_restart
) {
1378 @ Notice that a code like \.
{\^\^
8} becomes \.x if not followed by a hex digit.
1379 We only support a limited set
:
1387 #define is_hex
(a
) ((a
>='
0'
&&a<='9')||(a>='a'&&a<='f'))
1389 #define add_nybble
(c
) \
1391 cur_chr
=(cur_chr
<<4)+c-'
0'
; \
1393 cur_chr
=(cur_chr
<<4)+c-'a'
+10; \
1396 #define set_nybble
(c
) \
1403 #define one_hex_to_cur_chr
(c1
) \
1406 #define two_hex_to_cur_chr
(c1
,c2
) \
1410 #define four_hex_to_cur_chr
(c1
,c2
,c3
,c4
) \
1411 two_hex_to_cur_chr
(c1
,c2
); \
1415 #define six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
) \
1416 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
); \
1420 static boolean process_sup_mark
(void
)
1422 if
(cur_chr
== buffer
[iloc
]) {
1423 if
(iloc
< ilimit
) {
1424 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])) {
1425 if
((cur_chr
== buffer
[iloc
+ 3]) && (cur_chr == buffer[iloc + 4])) {
1427 if
((iloc
+ 10) <= ilimit
) {
1428 int c1
= buffer
[iloc
+ 5];
1429 int c2
= buffer
[iloc
+ 6];
1430 int c3
= buffer
[iloc
+ 7];
1431 int c4
= buffer
[iloc
+ 8];
1432 int c5
= buffer
[iloc
+ 9];
1433 int c6
= buffer
[iloc
+ 10];
1434 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) &&
1435 is_hex
(c4
) && is_hex(c5) && is_hex(c6)) {
1437 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1440 tex_error
("^^^^^^ needs six hex digits", NULL);
1443 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1447 if
((iloc
+ 6) <= ilimit
) {
1448 int c1
= buffer
[iloc
+ 3];
1449 int c2
= buffer
[iloc
+ 4];
1450 int c3
= buffer
[iloc
+ 5];
1451 int c4
= buffer
[iloc
+ 6];
1452 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1454 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1457 tex_error
("^^^^ needs four hex digits", NULL);
1460 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1465 if
((iloc
+ 2) <= ilimit
) {
1466 int c1
= buffer
[iloc
+ 1];
1467 int c2
= buffer
[iloc
+ 2];
1468 if
(is_hex
(c1
) && is_hex(c2)) {
1470 two_hex_to_cur_chr
(c1
,c2
);
1474 /* go on
, no error
, good old tex
*/
1479 int c1
= buffer
[iloc
+ 1];
1482 if
(is_hex
(c1
) && (iloc <= ilimit)) {
1483 int c2
= buffer
[iloc
];
1486 two_hex_to_cur_chr
(c1
,c2
);
1490 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1498 @ Control sequence names are scanned only when they appear in some line of a
1499 file
; once they have been scanned the first time
, their |eqtb| location serves as
1500 a unique identification
, so \TeX\ doesn't need to refer to the original name any
1501 more except when it prints the equivalent in symbolic form.
1503 The program that scans a control sequence has been written carefully in order to
1504 avoid the blowups that might otherwise occur if a malicious user tried something
1505 like `\.
{\\catcode\'
15=0}'. The algorithm might look at |buffer
[ilimit
+1]|
, but
1506 it never looks at |buffer
[ilimit
+2]|.
1508 If expanded characters like `\.
{\^\^A
}' or `\.
{\^\^df
}' appear in or just
1509 following a control sequence name
, they are converted to single characters in the
1510 buffer and the process is repeated
, slowly but surely.
1513 static boolean check_expanded_code
(int
*kk
); /* below
*/
1515 static int scan_control_sequence
(void
)
1517 int retval
= mid_line
;
1518 if
(iloc
> ilimit
) {
1519 cur_cs
= null_cs
; /* |state| is irrelevant in this case
*/
1521 register int cat
; /* |cat_code
(cur_chr
)|
, usually
*/
1524 do_buffer_to_unichar
(cur_chr
, k
);
1525 do_get_cat_code
(cat
, cur_chr
);
1526 if
(cat
!= letter_cmd || k
> ilimit
) {
1527 retval
= (cat
== spacer_cmd ? skip_blanks
: mid_line
);
1528 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1531 retval
= skip_blanks
;
1533 do_buffer_to_unichar
(cur_chr
, k
);
1534 do_get_cat_code
(cat
, cur_chr
);
1535 } while
(cat
== letter_cmd
&& k <= ilimit);
1537 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1539 if
(cat
!= letter_cmd
) {
1540 /* backtrack one character which can be utf
*/
1543 if
(cur_chr
> 0xFFFF)
1545 if
(cur_chr
> 0x7FF)
1550 if
(cur_chr
<= 0x7F) {
1551 k
-= 1; /* in most cases
*/
1552 } else if
(cur_chr
> 0xFFFF) {
1554 } else if
(cur_chr
> 0x7FF) {
1556 } else
/* if
(cur_chr
> 0x7F) */ {
1559 /* now |k| points to first nonletter
*/
1562 cur_cs
= id_lookup
(iloc
, k
- iloc
);
1567 cur_cmd
= eq_type
(cur_cs
);
1568 cur_chr
= equiv
(cur_cs
);
1572 @ Whenever we reach the following piece of code
, we will have
1573 |cur_chr
=buffer
[k-1
]| and |k
<=ilimit
+1| and
1574 |cat
=get_cat_code
(cat_code_table
,cur_chr
)|. If an expanded code like \.
{\^\^A
} or
1575 \.
{\^\^df
} appears in |buffer
[(k-1
)..
(k
+1)]| or |buffer
[(k-1
)..
(k
+2)]|
, we will
1576 store the corresponding code in |buffer
[k-1
]| and shift the rest of the buffer
1577 left two or three places.
1580 static boolean check_expanded_code
(int
*kk
)
1585 if
(buffer
[k
] == cur_chr
&& k < ilimit) {
1586 if
((cur_chr
== buffer
[k
+ 1]) && (cur_chr == buffer[k + 2])) {
1587 if
((cur_chr
== buffer
[k
+ 3]) && (cur_chr == buffer[k + 4])) {
1588 if
((k
+ 10) <= ilimit
) {
1589 int c1
= buffer
[k
+ 6 - 1];
1590 int c2
= buffer
[k
+ 6];
1591 int c3
= buffer
[k
+ 6 + 1];
1592 int c4
= buffer
[k
+ 6 + 2];
1593 int c5
= buffer
[k
+ 6 + 3];
1594 int c6
= buffer
[k
+ 6 + 4];
1595 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1597 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1599 tex_error
("^^^^^^ needs six hex digits", NULL);
1602 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1605 if
((k
+ 6) <= ilimit
) {
1606 int c1
= buffer
[k
+ 4 - 1];
1607 int c2
= buffer
[k
+ 4];
1608 int c3
= buffer
[k
+ 4 + 1];
1609 int c4
= buffer
[k
+ 4 + 2];
1610 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1612 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1614 tex_error
("^^^^ needs four hex digits", NULL);
1617 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1621 int c1
= buffer
[k
+ 1];
1624 if
(is_hex
(c1
) && (k + 2) <= ilimit) {
1625 int c2
= buffer
[k
+ 2];
1628 two_hex_to_cur_chr
(c1
,c2
);
1630 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1633 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1641 if
(cur_chr
<= 0x7F) {
1642 buffer
[k
- 1] = (packed_ASCII_code
) cur_chr
;
1643 } else if
(cur_chr
<= 0x7FF) {
1644 buffer
[k
- 1] = (packed_ASCII_code
) (0xC0 + cur_chr
/ 0x40);
1647 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + cur_chr
% 0x40);
1648 } else if
(cur_chr
<= 0xFFFF) {
1649 buffer
[k
- 1] = (packed_ASCII_code
) (0xE0 + cur_chr
/ 0x1000);
1652 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) / 0x40);
1655 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) % 0x40);
1657 buffer
[k
- 1] = (packed_ASCII_code
) (0xF0 + cur_chr
/ 0x40000);
1660 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x40000) / 0x1000);
1663 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) / 0x40);
1666 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) % 0x40);
1669 ilimit
= ilimit
- d
;
1670 while
(l
<= ilimit
) {
1671 buffer
[l
] = buffer
[l
+ d
];
1680 @ All of the easy branches of |get_next| have now been taken care of. There is
1683 @c static next_line_retval next_line
(void
)
1685 boolean inhibit_eol
= false
; /* a way to end a pseudo file without trailing space
*/
1687 /* Read next line of file into |buffer|
, or |goto restart| if the file has ended
*/
1692 if
(pseudo_input
()) { /* not end of file
*/
1693 firm_up_the_line
(); /* this sets |ilimit|
*/
1694 line_catcode_table
= DEFAULT_CAT_TABLE
;
1695 if
((iname
== 19) && (pseudo_lines(pseudo_files) == null))
1697 } else if
((every_eof
!= null
) && !eof_seen[iindex]) {
1699 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1701 begin_token_list
(every_eof
, every_eof_text
);
1702 return next_line_restart
;
1708 if
(luacstring_input
()) { /* not end of strings
*/
1710 line_catcode_table
= (short
) luacstring_cattable
();
1711 line_partial
= (signed char
) luacstring_partial
();
1712 if
(luacstring_final_line
() || line_partial
1713 || line_catcode_table
== NO_CAT_TABLE
)
1721 if
(lua_input_ln
(cur_file
, 0, true
)) { /* not end of file
*/
1722 firm_up_the_line
(); /* this sets |ilimit|
*/
1723 line_catcode_table
= DEFAULT_CAT_TABLE
;
1724 } else if
((every_eof
!= null
) && (!eof_seen[iindex])) {
1726 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1727 begin_token_list
(every_eof
, every_eof_text
);
1728 return next_line_restart
;
1736 if
(tracing_nesting
> 0)
1737 if
((grp_stack
[in_open
] != cur_boundary
) ||
(if_stack
[in_open
] != cond_ptr
))
1738 if
(!((iname
== 19) ||
(iname
== 21))) {
1739 /* give warning for some unfinished groups and
/or conditionals
*/
1742 if
((iname
> 21) ||
(iname
== 20)) {
1743 report_stop_file
(filetype_tex
);
1747 /* lua input or \.
{\\scantextokens
} */
1748 if
(iname
== 21 || iname
== 19) {
1752 if
(! suppress_outer_error
)
1753 check_outer_validity
();
1755 return next_line_restart
;
1757 if
(inhibit_eol || end_line_char_inactive
)
1760 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1762 iloc
= istart
; /* ready to read
*/
1764 if
(!terminal_input
) {
1765 /* \.
{\\read
} line has ended
*/
1768 return next_line_return
; /* OUTER */
1770 if
(input_ptr
> 0) {
1771 /* text was inserted during error recovery
*/
1773 return next_line_restart
; /* resume previous level
*/
1775 if
(selector
< log_only
)
1777 if
(interaction
> nonstop_mode
) {
1778 if
(end_line_char_inactive
)
1780 if
(ilimit
== istart
) {
1781 /* previous line was empty
*/
1782 tprint_nl
("(Please type a command or say `\\end')");
1786 prompt_input
("*"); /* input on-line into |buffer|
*/
1788 if
(end_line_char_inactive
)
1791 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1796 Nonstop mode
, which is intended for overnight batch processing
,
1797 never waits for on-line input.
1799 fatal_error
("*** (job aborted, no legal \\end found)");
1802 return next_line_ok
;
1805 @ Let's consider now what happens when |get_next| is looking at a token list.
1808 static boolean get_next_tokenlist
(void
)
1810 register halfword t
= token_info
(iloc
);
1811 iloc
= token_link
(iloc
); /* move to next
*/
1812 if
(t
>= cs_token_flag
) {
1813 /* a control sequence token
*/
1814 cur_cs
= t
- cs_token_flag
;
1815 cur_cmd
= eq_type
(cur_cs
);
1816 if
(cur_cmd
>= outer_call_cmd
) {
1817 if
(cur_cmd
== dont_expand_cmd
) {
1819 Get the next token
, suppressing expansion. The present point in the program
1820 is reached only when the |expand| routine has inserted a special marker into
1821 the input. In this special case
, |token_info
(iloc
)| is known to be a control
1822 sequence token
, and |token_link
(iloc
)=null|.
1824 cur_cs
= token_info
(iloc
) - cs_token_flag
;
1826 cur_cmd
= eq_type
(cur_cs
);
1827 if
(cur_cmd
> max_command_cmd
) {
1828 cur_cmd
= relax_cmd
;
1829 cur_chr
= no_expand_flag
;
1832 } else if
(! suppress_outer_error
) {
1833 check_outer_validity
();
1836 cur_chr
= equiv
(cur_cs
);
1838 cur_cmd
= token_cmd
(t
);
1839 cur_chr
= token_chr
(t
);
1841 case left_brace_cmd
:
1844 case right_brace_cmd
:
1848 /* Insert macro parameter and |goto restart|
; */
1849 begin_token_list
(param_stack
[param_start
+ cur_chr
- 1], parameter
);
1857 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1858 routine are executed more often than any other instructions of \TeX.
1859 @^mastication@
>@^inner loop@
>
1861 @ sets |cur_cmd|
, |cur_chr|
, |cur_cs| to next token
1868 if
(istate
!= token_list
) {
1869 /* Input from external file
, |goto restart| if no input found
*/
1870 if
(!get_next_file
())
1875 goto RESTART
; /* list exhausted
, resume previous level
*/
1876 } else if
(!get_next_tokenlist
()) {
1877 goto RESTART
; /* parameter needs to be expanded
*/
1880 /* If an alignment entry has just ended
, take appropriate action
*/
1881 if
((cur_cmd
== tab_mark_cmd || cur_cmd
== car_ret_cmd
) && align_state == 0) {
1882 insert_vj_template
();
1887 @ Since |get_next| is used so frequently in \TeX
, it is convenient to define
1888 three related procedures that do a little more
:
1890 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|
, it also sets
1891 |cur_tok|
, a packed halfword version of the current token.
1893 \yskip\hang|get_x_token|
, meaning ``get an expanded token
,'' is like |get_token|
,
1894 but if the current token turns out to be a user-defined control sequence
(i.e.
, a
1895 macro call
), or a conditional
, or something like \.
{\\topmark
} or
1896 \.
{\\expandafter
} or \.
{\\csname
}, it is eliminated from the input by beginning
1897 the expansion of the macro or the evaluation of the conditional.
1899 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1900 has already been called.
1902 \yskip\noindent In fact
, these three procedures account for almost every use of
1905 No new control sequences will be defined except during a call of |get_token|
, or
1906 when \.
{\\csname
} compresses a token list
, because |no_new_control_sequence| is
1907 always |true| at other times.
1909 @ sets |cur_cmd|
, |cur_chr|
, |cur_tok|
1912 void get_token
(void
)
1914 no_new_control_sequence
= false
;
1916 no_new_control_sequence
= true
;
1918 cur_tok
= token_val
(cur_cmd
, cur_chr
);
1920 cur_tok
= cs_token_flag
+ cur_cs
;
1923 @ changes the string |s| to a token list
1926 halfword string_to_toks
(const char
*ss
)
1928 halfword p
; /* tail of the token list
*/
1929 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1930 halfword t
; /* token being appended
*/
1932 const char
*se
= ss
+ strlen
(s
);
1933 p
= temp_token_head
;
1934 set_token_link
(p
, null
);
1936 t
= (halfword
) str2uni
((const unsigned char
*) s
);
1941 t
= other_token
+ t
;
1942 fast_store_new_token
(t
);
1944 return token_link
(temp_token_head
);
1947 @ The token lists for macros and for other things like \.
{\\mark
} and
1948 \.
{\\output
} and \.
{\\write
} are produced by a procedure called |scan_toks|.
1950 Before we get into the details of |scan_toks|
, let's consider a much simpler
1951 task
, that of converting the current string into a token list. The |str_toks|
1952 function does this
; it classifies spaces as type |spacer| and everything else as
1955 The token list created by |str_toks| begins at |link
(temp_token_head
)| and ends
1956 at the value |p| that is returned.
(If |p
=temp_token_head|
, the list is empty.
)
1958 |lua_str_toks| is almost identical
, but it also escapes the three symbols that
1959 |lua| considers special while scanning a literal string
1961 @ changes the string |str_pool
[b..pool_ptr
]| to a token list
1964 halfword lua_str_toks
(lstring b
)
1966 halfword p
; /* tail of the token list
*/
1967 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1968 halfword t
; /* token being appended
*/
1969 unsigned char
*k
; /* index into string
*/
1970 p
= temp_token_head
;
1971 set_token_link
(p
, null
);
1972 k
= (unsigned char
*) b.s
;
1973 while
(k
< (unsigned char
*) b.s
+ b.l
) {
1974 t
= pool_to_unichar
(k
);
1979 if
((t
== '\\'
) ||
(t
== '
"') || (t == '\'') || (t == 10) || (t == 13))
1980 fast_store_new_token(other_token + '\\');
1985 t = other_token + t;
1987 fast_store_new_token(t);
1992 @ Incidentally, the main reason for wanting |str_toks| is the function
1993 |the_toks|, which has similar input/output characteristics.
1995 @ changes the string |str_pool[b..pool_ptr]| to a token list
1998 halfword str_toks(lstring s)
2000 halfword p; /* tail of the token list */
2001 halfword q; /* new node being added to the token list via |store_new_token| */
2002 halfword t; /* token being appended */
2003 unsigned char *k, *l; /* index into string */
2004 p = temp_token_head;
2005 set_token_link(p, null);
2009 t = pool_to_unichar(k);
2014 t = other_token + t;
2015 fast_store_new_token(t);
2021 hh: most of the converter is similar to the one i made for macro so at some point i
2022 can make a helper; also todo: there is no need to go through the pool
2026 halfword str_scan_toks(int ct, lstring s)
2027 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2028 halfword p; /* tail of the token list */
2029 halfword q; /* new node being added to the token list via |store_new_token| */
2030 halfword t; /* token being appended */
2031 unsigned char *k, *l; /* index into string */
2033 p = temp_token_head;
2034 set_token_link(p, null);
2038 t = pool_to_unichar(k);
2040 cc = get_cat_code(ct,t);
2042 /* we have a potential control sequence so we check for it */
2046 halfword _cs = null ;
2047 unsigned char *_name = k ;
2049 t = (halfword) str2uni((const unsigned char *) k);
2051 _c = get_cat_code(ct,t);
2054 _lname = _lname + _s ;
2055 } else if (_c == 10) {
2056 /* we ignore a trailing space like normal scanning does */
2064 /* we have a potential \cs */
2065 _cs = string_lookup((const char *) _name, _lname);
2066 if (_cs == undefined_control_sequence) {
2067 /* let's play safe and backtrack */
2068 t = cc * (1<<21) + t ;
2071 t = cs_token_flag + _cs;
2074 /* just a character with some meaning, so \unknown becomes effectively */
2075 /* \\unknown assuming that \\ has some useful meaning of course */
2076 t = cc * (1<<21) + t ;
2081 /* whatever token, so for instance $x^2$ just works given a tex */
2082 /* catcode regime */
2083 t = cc * (1<<21) + t ;
2085 fast_store_new_token(t);
2091 @ Here's part of the |expand| subroutine that we are now ready to complete:
2094 void ins_the_toks(void)
2097 ins_list(token_link(temp_token_head));
2100 #define set_toks_register(n,t,g) { \
2101 int a = (g>0) ? 4 : 0; \
2102 halfword ref = get_avail(); \
2103 set_token_ref_count(ref, 0); \
2104 set_token_link(ref, token_link(t)); \
2105 define(n + toks_base, call_cmd, ref); \
2108 void combine_the_toks(int how)
2113 if (cur_cmd == assign_toks_cmd) {
2114 nt = equiv(cur_cs) - toks_base;
2124 } while (cur_cmd == spacer_cmd);
2125 if (cur_cmd == left_brace_cmd) {
2128 x = scan_toks(false,how > 1); /* expanded or not */
2131 if (source != null) {
2132 halfword target = toks(nt);
2133 if (target == null) {
2134 set_toks_register(nt,source,0);
2136 halfword s = token_link(source);
2138 halfword t = token_link(target);
2140 /* can this happen ? */
2141 set_token_link(target, s);
2142 } else if (odd(how)) {
2144 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2145 halfword p = temp_token_head;
2147 set_token_link(p, s); /* s = head, x = tail */
2150 fast_store_new_token(token_info(t));
2153 set_toks_register(nt,temp_token_head,0);
2155 set_token_link(x,t);
2156 set_token_link(target,s);
2160 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2161 halfword p = temp_token_head;
2163 set_token_link(p, null);
2165 fast_store_new_token(token_info(t));
2168 set_token_link(p,s);
2169 set_toks_register(nt,temp_token_head,0);
2171 while (token_link(t) != null) {
2174 set_token_link(t,s);
2181 halfword source, ns;
2182 if (cur_cmd == assign_toks_cmd) {
2183 ns = equiv(cur_cs) - toks_base;
2192 if (source != null) {
2193 halfword target = toks(nt);
2194 if (target == null) {
2195 equiv(toks_base+nt) = source;
2196 equiv(toks_base+ns) = null;
2198 halfword s = token_link(source);
2200 halfword t = token_link(target);
2202 set_token_link(target, s);
2203 } else if (odd(how)) {
2206 while (token_link(x) != null) {
2209 set_token_link(x,t);
2210 set_token_link(target,s);
2213 while (token_link(t) != null) {
2216 set_token_link(t,s);
2218 equiv(toks_base+ns) = null;
2225 @ This routine, used in the next one, prints the job name, possibly modified by
2226 the |process_jobname| callback.
2229 static void print_job_name(void)
2232 char *s, *ss; /* C strings for jobname before and after processing */
2233 int callback_id, lua_retval;
2234 s = (char*)str_string(job_name);
2235 callback_id = callback_defined(process_jobname_callback);
2236 if (callback_id > 0) {
2237 lua_retval = run_callback(callback_id, "S-
>S
", s, &ss);
2238 if ((lua_retval == true) && (ss != NULL))
2247 @ Here is a routine that print the result of a convert command, using the
2248 argument |i|. It returns |false | if it does not know to print the code |c|. The
2249 function exists because lua code and tex code can both call it to convert
2252 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2253 get the next non-blank non-relax non-call token.
2257 int scan_lua_state(void)
2262 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2264 if (cur_cmd != left_brace_cmd) {
2265 if (scan_keyword("name
")) {
2266 (void) scan_toks(false, true);
2269 scan_register_num();
2270 if (get_lua_name(cur_val))
2271 sn = (cur_val - 65536);
2277 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2278 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2279 to follow `\.{\\string}' and `\.{\\meaning}'.
2281 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2282 pending string in its output. In order to save such a pending string, we have to
2283 create a temporary string that is destroyed immediately after.
2286 #define push_selector { \
2287 old_setting = selector; \
2288 selector = new_string; \
2291 #define pop_selector { \
2292 selector = old_setting; \
2295 static int do_variable_dvi(halfword c)
2300 #define do_variable_backend_int(i) \
2301 cur_cmd = assign_int_cmd; \
2302 cur_val = backend_int_base + i; \
2303 cur_tok = token_val(cur_cmd, cur_val); \
2306 #define do_variable_backend_dimen(i) \
2307 cur_cmd = assign_dimen_cmd; \
2308 cur_val = backend_dimen_base + i; \
2309 cur_tok = token_val(cur_cmd, cur_val); \
2312 #define do_variable_backend_toks(i) \
2313 cur_cmd = assign_toks_cmd; \
2314 cur_val = backend_toks_base + i ; \
2315 cur_tok = token_val(cur_cmd, cur_val); \
2318 static int do_variable_pdf(halfword c)
2320 if (scan_keyword("compresslevel
")) { do_variable_backend_int(c_pdf_compress_level); }
2321 else if (scan_keyword("decimaldigits
")) { do_variable_backend_int(c_pdf_decimal_digits); }
2322 else if (scan_keyword("imageresolution
")) { do_variable_backend_int(c_pdf_image_resolution); }
2323 else if (scan_keyword("pkresolution
")) { do_variable_backend_int(c_pdf_pk_resolution); }
2324 else if (scan_keyword("uniqueresname
")) { do_variable_backend_int(c_pdf_unique_resname); }
2325 else if (scan_keyword("minorversion
")) { do_variable_backend_int(c_pdf_minor_version); }
2326 else if (scan_keyword("pagebox
")) { do_variable_backend_int(c_pdf_pagebox); }
2327 else if (scan_keyword("inclusionerrorlevel
")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2328 else if (scan_keyword("ignoreunknownimages
")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2329 else if (scan_keyword("gamma
")) { do_variable_backend_int(c_pdf_gamma); }
2330 else if (scan_keyword("imageapplygamma
")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2331 else if (scan_keyword("imagegamma
")) { do_variable_backend_int(c_pdf_image_gamma); }
2332 else if (scan_keyword("imagehicolor
")) { do_variable_backend_int(c_pdf_image_hicolor); }
2333 else if (scan_keyword("imageaddfilename
")) { do_variable_backend_int(c_pdf_image_addfilename); }
2334 else if (scan_keyword("objcompresslevel
")) { do_variable_backend_int(c_pdf_obj_compress_level); }
2335 else if (scan_keyword("inclusioncopyfonts
")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2336 else if (scan_keyword("gentounicode
")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2337 else if (scan_keyword("pkfixeddpi
")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2338 else if (scan_keyword("suppressoptionalinfo
")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2340 else if (scan_keyword("horigin
")) { do_variable_backend_dimen(d_pdf_h_origin); }
2341 else if (scan_keyword("vorigin
")) { do_variable_backend_dimen(d_pdf_v_origin); }
2342 else if (scan_keyword("threadmargin
")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2343 else if (scan_keyword("destmargin
")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2344 else if (scan_keyword("linkmargin
")) { do_variable_backend_dimen(d_pdf_link_margin); }
2345 else if (scan_keyword("xformmargin
")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2347 else if (scan_keyword("pageattr
")) { do_variable_backend_toks(t_pdf_page_attr); }
2348 else if (scan_keyword("pageresources
")) { do_variable_backend_toks(t_pdf_page_resources); }
2349 else if (scan_keyword("pagesattr
")) { do_variable_backend_toks(t_pdf_pages_attr); }
2350 else if (scan_keyword("xformattr
")) { do_variable_backend_toks(t_pdf_xform_attr); }
2351 else if (scan_keyword("xformresources
")) { do_variable_backend_toks(t_pdf_xform_resources); }
2352 else if (scan_keyword("pkmode
")) { do_variable_backend_toks(t_pdf_pk_mode); }
2353 else if (scan_keyword("trailerid
")) { do_variable_backend_toks(t_pdf_trailer_id); }
2360 static int do_feedback_dvi(halfword c)
2365 /* codes not really needed but cleaner when testing */
2367 #define pdftex_version 40 /* these values will not change any more */
2368 #define pdftex_revision "0" /* these values will not change any more */
2370 static int do_feedback_pdf(halfword c)
2372 int old_setting; /* holds |selector| setting */
2373 int save_scanner_status; /* |scanner_status| upon entry */
2374 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2375 halfword save_warning_index;
2376 boolean bool; /* temp boolean */
2377 str_number s; /* first temp string */
2378 int ff; /* for use with |set_ff| */
2379 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2380 char *str; /* color stack init str */
2382 if (scan_keyword("lastlink
")) {
2384 print_int(pdf_last_link);
2386 } else if (scan_keyword("retval
")) {
2388 print_int(pdf_retval);
2390 } else if (scan_keyword("lastobj
")) {
2392 print_int(pdf_last_obj);
2394 } else if (scan_keyword("lastannot
")) {
2396 print_int(pdf_last_annot);
2398 } else if (scan_keyword("xformname
")) {
2400 check_obj_type(static_pdf, obj_type_xform, cur_val);
2402 print_int(obj_info(static_pdf, cur_val));
2404 } else if (scan_keyword("creationdate
")) {
2405 ins_list(string_to_toks(getcreationdate(static_pdf)));
2406 /* no further action */
2408 } else if (scan_keyword("fontname
")) {
2410 if (cur_val == null_font)
2411 normal_error("pdf backend
", "invalid font identifier when asking 'fontname'
");
2412 pdf_check_vf(cur_val);
2413 if (!font_used(cur_val))
2414 pdf_init_font(static_pdf, cur_val);
2417 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2419 } else if (scan_keyword("fontobjnum
")) {
2421 if (cur_val == null_font)
2422 normal_error("pdf backend
", "invalid font identifier when asking 'objnum'
");
2423 pdf_check_vf(cur_val);
2424 if (!font_used(cur_val))
2425 pdf_init_font(static_pdf, cur_val);
2428 print_int(pdf_font_num(ff));
2430 } else if (scan_keyword("fontsize
")) {
2432 if (cur_val == null_font)
2433 normal_error("pdf backend
", "invalid font identifier when asking 'fontsize'
");
2435 print_scaled(font_size(cur_val));
2438 } else if (scan_keyword("pageref
")) {
2441 normal_error("pdf backend
", "invalid page number when asking 'pageref'
");
2443 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2445 } else if (scan_keyword("colorstackinit
")) {
2446 bool = scan_keyword("page
");
2447 if (scan_keyword("direct
"))
2448 cur_val = direct_always;
2449 else if (scan_keyword("page
"))
2450 cur_val = direct_page;
2452 cur_val = set_origin;
2453 save_scanner_status = scanner_status;
2454 save_warning_index = warning_index;
2455 save_def_ref = def_ref;
2456 u = save_cur_string();
2457 scan_toks(false, true);
2458 s = tokens_to_string(def_ref);
2459 delete_token_ref(def_ref);
2460 def_ref = save_def_ref;
2461 warning_index = save_warning_index;
2462 scanner_status = save_scanner_status;
2463 str = makecstring(s);
2464 cur_val = newcolorstack(str, cur_val, bool);
2467 cur_val_level = int_val_level;
2469 print_err("Too many color stacks
");
2470 help2("The number of color stacks is limited to
32768.
",
2471 "I'll use the default color stack
0 here.
");
2474 restore_cur_string(u);
2479 } else if (scan_keyword("version
")) {
2481 print_int(pdftex_version);
2483 } else if (scan_keyword("revision
")) {
2484 ins_list(string_to_toks(pdftex_revision));
2492 void conv_toks(void)
2494 int old_setting; /* holds |selector| setting */
2496 int save_scanner_status; /* |scanner_status| upon entry */
2497 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2498 halfword save_warning_index;
2499 boolean bool; /* temp boolean */
2500 str_number s; /* first temp string */
2501 int sn; /* lua chunk name */
2502 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2503 int c = cur_chr; /* desired type of conversion */
2506 /* Scan the argument for command |c| */
2514 case lua_function_code:
2517 normal_error("luafunction
", "invalid number
");
2519 u = save_cur_string();
2521 luafunctioncall(cur_val);
2522 restore_cur_string(u);
2523 if (luacstrings > 0)
2526 /* no further action */
2530 u = save_cur_string();
2531 save_scanner_status = scanner_status;
2532 save_def_ref = def_ref;
2533 save_warning_index = warning_index;
2534 sn = scan_lua_state();
2535 scan_toks(false, true);
2537 warning_index = save_warning_index;
2538 def_ref = save_def_ref;
2539 scanner_status = save_scanner_status;
2541 luatokencall(s, sn);
2542 delete_token_ref(s);
2543 restore_cur_string(u); /* TODO: check this, was different */
2544 if (luacstrings > 0)
2546 /* no further action */
2550 save_scanner_status = scanner_status;
2551 save_warning_index = warning_index;
2552 save_def_ref = def_ref;
2553 u = save_cur_string();
2554 scan_toks(false, true);
2555 warning_index = save_warning_index;
2556 scanner_status = save_scanner_status;
2557 ins_list(token_link(def_ref));
2558 def_ref = save_def_ref;
2559 restore_cur_string(u);
2560 /* no further action */
2563 case math_style_code:
2569 save_scanner_status = scanner_status;
2570 scanner_status = normal;
2572 scanner_status = save_scanner_status;
2580 case cs_string_code:
2581 save_scanner_status = scanner_status;
2582 scanner_status = normal;
2584 scanner_status = save_scanner_status;
2587 sprint_cs_name(cur_cs);
2592 case roman_numeral_code:
2595 print_roman_int(cur_val);
2599 save_scanner_status = scanner_status;
2600 scanner_status = normal;
2602 scanner_status = save_scanner_status;
2613 case lua_escape_string_code:
2617 save_scanner_status = scanner_status;
2618 save_def_ref = def_ref;
2619 save_warning_index = warning_index;
2620 scan_toks(false, true);
2621 bool = in_lua_escape;
2622 in_lua_escape = true;
2623 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2624 escstr.l = (unsigned) l;
2625 in_lua_escape = bool;
2626 delete_token_ref(def_ref);
2627 def_ref = save_def_ref;
2628 warning_index = save_warning_index;
2629 scanner_status = save_scanner_status;
2630 (void) lua_str_toks(escstr);
2631 ins_list(token_link(temp_token_head));
2635 /* no further action */
2643 case font_name_code:
2646 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2647 if (font_size(cur_val) != font_dsize(cur_val)) {
2649 print_scaled(font_size(cur_val));
2654 case left_margin_kern_code:
2656 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2657 normal_error("marginkern
", "a non-empty hbox expected
");
2659 p = list_ptr(box(cur_val));
2660 while ((p != null) && (type(p) == glue_node)) {
2663 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2664 print_scaled(width(p));
2670 case right_margin_kern_code:
2672 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2673 normal_error("marginkern
", "a non-empty hbox expected
");
2675 p = list_ptr(box(cur_val));
2677 p = tail_of_list(p);
2679 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2680 node that points to glue spec ... and we don't want to analyze that messy lot
2682 while ((p != null) && (type(p) == glue_node)) {
2685 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2686 if (type(p) == disc_node) {
2688 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2692 officially we should look in the replace but currently protrusion doesn't
2693 work anyway with "foo\discretionary
{}{}{bar-
} " (no following char) so we
2700 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2701 print_scaled(width(p));
2707 case uniform_deviate_code:
2710 print_int(unif_rand(cur_val));
2713 case normal_deviate_code:
2716 print_int(norm_rand());
2719 case math_char_class_code:
2723 mval = get_math_code(cur_val);
2725 print_int(mval.class_value);
2729 case math_char_fam_code:
2733 mval = get_math_code(cur_val);
2735 print_int(mval.family_value);
2739 case math_char_slot_code:
2743 mval = get_math_code(cur_val);
2745 print_int(mval.character_value);
2749 case insert_ht_code:
2750 scan_register_num();
2754 while (i >= subtype(vlink(p)))
2756 if (subtype(p) == i)
2757 print_scaled(height(p));
2770 case format_name_code:
2777 case luatex_banner_code:
2779 tprint(luatex_banner);
2782 case luatex_revision_code:
2784 print(get_luatexrevision());
2787 case luatex_date_code:
2789 print_int(get_luatex_date_info());
2794 tprint(eTeX_version_string);
2797 case eTeX_revision_code:
2799 tprint(eTeX_revision);
2802 case font_identifier_code:
2803 confusion("convert
");
2806 confusion("convert
");
2809 str = make_string();
2810 (void) str_toks(str_lstring(str));
2812 ins_list(token_link(temp_token_head));
2815 void do_feedback(void)
2821 case dvi_feedback_code:
2822 if (get_o_mode() == OMODE_DVI) {
2823 done = do_feedback_dvi(c);
2825 tex_error("unexpected use of \\dvifeedback
",null);
2830 normal_warning("dvi backend
","unexpected use of \\dvifeedback
");
2832 } else if (done==2) {
2836 case pdf_feedback_code:
2837 if (get_o_mode() == OMODE_PDF) {
2838 done = do_feedback_pdf(c);
2840 tex_error("unexpected use of \\pdffeedback
",null);
2845 normal_warning("pdf backend
","unexpected use of \\pdffeedback
");
2847 } else if (done==2) {
2852 confusion("feedback
");
2855 str = make_string();
2856 (void) str_toks(str_lstring(str));
2858 ins_list(token_link(temp_token_head));
2861 void do_variable(void)
2866 case dvi_variable_code:
2867 done = do_variable_dvi(c);
2870 normal_warning("dvi backend
","unexpected use of \\dvivariable
");
2874 case pdf_variable_code:
2875 done = do_variable_pdf(c);
2878 normal_warning("pdf backend
","unexpected use of \\pdfvariable
");
2883 confusion("variable
");
2889 The following code is not used as we can only set math options and not query them. If
2890 an option is really important we will provide a proper variable. Most options are not
2891 meant for users anyway but for development.
2896 #define do_mathoption_int(i) \
2897 cur_cmd = assign_int_cmd; \
2898 cur_val = mathoption_int_base + i; \
2899 cur_tok = token_val(cur_cmd, cur_val); \
2902 void do_mathoption(void)
2904 if (scan_keyword("old
")) { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2905 if (scan_keyword("noitaliccompensation
")) { do_mathoption_int(c_mathoption_no_char_italic_code); }
2906 else if (scan_keyword("nocharitalic
")) { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2907 else if (scan_keyword("useoldfractionscaling
")) { do_mathoption_int(c_mathoption_old_code); }
2908 else if (scan_keyword("umathcodemeaning
")) { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2913 @ This boolean is keeping track of the lua string escape state
2915 boolean in_lua_escape;
2917 static int the_convert_string_dvi(halfword c, int i)
2922 static int the_convert_string_pdf(halfword c, int i)
2925 if (get_o_mode() != OMODE_PDF) {
2927 } else if (scan_keyword("lastlink
")) {
2928 print_int(pdf_last_link);
2929 } else if (scan_keyword("retval
")) {
2930 print_int(pdf_retval);
2931 } else if (scan_keyword("lastobj
")) {
2932 print_int(pdf_last_obj);
2933 } else if (scan_keyword("lastannot
")) {
2934 print_int(pdf_last_annot);
2935 } else if (scan_keyword("xformname
")) {
2936 print_int(obj_info(static_pdf, i));
2937 } else if (scan_keyword("creationdate
")) {
2939 } else if (scan_keyword("fontname
")) {
2941 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2942 } else if (scan_keyword("fontobjnum
")) {
2944 print_int(pdf_font_num(ff));
2945 } else if (scan_keyword("fontsize
")) {
2946 print_scaled(font_size(i));
2948 } else if (scan_keyword("pageref
")) {
2949 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2950 } else if (scan_keyword("colorstackinit
")) {
2958 str_number the_convert_string(halfword c, int i)
2960 int old_setting; /* saved |selector| setting */
2962 boolean done = true ;
2963 old_setting = selector;
2964 selector = new_string;
2969 /* case lua_function_code: */
2970 /* case lua_code: */
2971 /* case expanded_code: */
2972 case math_style_code:
2975 /* case string_code: */
2976 /* case cs_string_code: */
2977 case roman_numeral_code:
2980 /* case meaning_code: */
2984 /* lua_escape_string_code: */
2988 case font_name_code:
2989 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2990 if (font_size(i) != font_dsize(i)) {
2992 print_scaled(font_size(i));
2996 /* left_margin_kern_code: */
2997 /* right_margin_kern_code: */
2998 case uniform_deviate_code:
2999 print_int(unif_rand(i));
3001 case normal_deviate_code:
3002 print_int(norm_rand());
3004 /* math_char_class_code: */
3005 /* math_char_fam_code: */
3006 /* math_char_slot_code: */
3007 /* insert_ht_code: */
3011 case format_name_code:
3014 case luatex_banner_code:
3015 tprint(luatex_banner);
3017 case luatex_revision_code:
3018 print(get_luatexrevision());
3020 case luatex_date_code:
3021 print_int(get_luatex_date_info());
3024 tprint(eTeX_version_string);
3026 case eTeX_revision_code:
3027 tprint(eTeX_revision);
3029 case font_identifier_code:
3030 print_font_identifier(i);
3032 /* backend: this might become obsolete */
3033 case dvi_feedback_code:
3034 done = the_convert_string_dvi(c,i);
3036 case pdf_feedback_code:
3037 done = the_convert_string_pdf(c,i);
3045 ret = make_string();
3047 selector = old_setting;
3051 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3052 files potentially usable for reading appear in the following global variables.
3053 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3054 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3055 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3059 FILE *read_file[16]; /* used for \.{\\read} */
3060 int read_open[17]; /* state of |read_file[n]| */
3062 void initialize_read(void)
3065 for (k = 0; k <= 16; k++)
3066 read_open[k] = closed;
3069 @ The |read_toks| procedure constructs a token list like that for any macro
3070 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3071 sequence that will receive this token list.
3074 void read_toks(int n, halfword r, halfword j)
3076 halfword p; /* tail of the token list */
3077 halfword q; /* new node being added to the token list via |store_new_token| */
3078 int s; /* saved value of |align_state| */
3079 int m; /* stream number */
3080 scanner_status = defining;
3084 set_token_ref_count(def_ref, 0);
3085 p = def_ref; /* the reference count */
3086 store_new_token(end_match_token);
3087 if ((n < 0) || (n > 15))
3092 align_state = 1000000; /* disable tab marks, etc. */
3094 /* Input and store tokens from the next line of the file */
3095 begin_file_reading();
3097 if (read_open[m] == closed) {
3099 Input for \.{\\read} from the terminal
3101 Here we input on-line into the |buffer| array, prompting the user explicitly
3102 if |n>=0|. The value of |n| is set negative so that additional prompts
3103 will not be given in the case of multi-line input.
3105 if (interaction > nonstop_mode) {
3117 ("*** (cannot \\read from terminal in nonstop modes
)");
3120 } else if (read_open[m] == just_open) {
3122 Input the first line of |read_file[m]|
3124 The first line of a file must be treated specially, since |lua_input_ln|
3125 must be told not to start with |get|.
3127 if (lua_input_ln(read_file[m], (m + 1), false)) {
3128 read_open[m] = normal;
3130 lua_a_close_in(read_file[m], (m + 1));
3131 read_open[m] = closed;
3136 Input the next line of |read_file[m]|
3138 An empty line is appended at the end of a |read_file|.
3140 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3141 lua_a_close_in(read_file[m], (m + 1));
3142 read_open[m] = closed;
3143 if (align_state != 1000000) {
3145 print_err("File ended within \\read
");
3146 help1("This \\read has unbalanced braces.
");
3147 align_state = 1000000;
3154 if (end_line_char_inactive)
3157 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
3161 /* Handle \.{\\readline} and |goto done|; */
3163 while (iloc <= ilimit) {
3164 /* current line not yet finished */
3165 do_buffer_to_unichar(cur_chr, iloc);
3167 cur_tok = space_token;
3169 cur_tok = cur_chr + other_token;
3170 store_new_token(cur_tok);
3176 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3179 if (align_state < 1000000) {
3180 /* unmatched `\.\}' aborts the line */
3183 } while (cur_tok != 0);
3184 align_state = 1000000;
3187 store_new_token(cur_tok);
3192 } while (align_state != 1000000);
3194 scanner_status = normal;
3198 @ return a string from tokens list
3201 str_number tokens_to_string(halfword p)
3204 if (selector == new_string)
3205 normal_error("tokens
","tokens_to_string
() called while selector
= new_string
");
3206 old_setting = selector;
3207 selector = new_string;
3208 show_token_list(token_link(p), null, -1);
3209 selector = old_setting;
3210 return make_string();
3214 #define make_room(a) \
3215 if ((unsigned)i+a+1>alloci) { \
3216 ret = xrealloc(ret,(alloci+64)); \
3217 alloci = alloci + 64; \
3220 #define append_i_byte(a) ret[i++] = (char)(a)
3222 #define Print_char(a) make_room(1); append_i_byte(a)
3224 #define Print_uchar(s) { \
3228 } else if (s<=0x7FF) { \
3229 append_i_byte(0xC0 + (s / 0x40)); \
3230 append_i_byte(0x80 + (s % 0x40)); \
3231 } else if (s<=0xFFFF) { \
3232 append_i_byte(0xE0 + (s / 0x1000)); \
3233 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3234 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3235 } else if (s>=0x110000) { \
3236 append_i_byte(s-0x11000); \
3238 append_i_byte(0xF0 + (s / 0x40000)); \
3239 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3240 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3241 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3244 #define Print_esc(b) { \
3245 const char *v = b; \
3246 if (e>0 && e<STRING_OFFSET) { \
3249 make_room(strlen(v)); \
3250 while (*v) { append_i_byte(*v); v++; } \
3253 #define Print_str(b) { \
3254 const char *v = b; \
3255 make_room(strlen(v)); \
3256 while (*v) { append_i_byte(*v); v++; } \
3259 #define is_cat_letter(a) \
3260 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3262 @ the actual token conversion in this function is now functionally equivalent to
3263 |show_token_list|, except that it always prints the whole token list. TODO: check
3264 whether this causes problems in the lua library.
3267 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3269 register int p, c, m;
3275 int match_chr = '#';
3277 unsigned alloci = 1024;
3285 ret = xmalloc(alloci);
3286 p = token_link(p); /* skip refcount */
3288 e = int_par(escape_char_code);
3291 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3292 Print_esc("CLOBBERED.
");
3295 infop = token_info(p);
3296 if (infop >= cs_token_flag) {
3297 if (!(inhibit_par && infop == par_token)) {
3298 q = infop - cs_token_flag;
3299 if (q < hash_base) {
3301 Print_esc("csname
");
3302 Print_esc("endcsname
");
3304 Print_esc("IMPOSSIBLE.
");
3306 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3307 Print_esc("IMPOSSIBLE.
");
3308 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3309 Print_esc("NONEXISTENT.
");
3311 str_number txt = cs_text(q);
3312 sh = makecstring(txt);
3314 if (is_active_cs(txt)) {
3321 if (e>=0 && e<0x110000) Print_uchar(e);
3326 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3337 m = token_cmd(infop);
3338 c = token_chr(infop);
3340 case left_brace_cmd:
3341 case right_brace_cmd:
3342 case math_shift_cmd:
3348 case other_char_cmd:
3352 if (!in_lua_escape && (is_in_csname==0))
3357 Print_uchar(match_chr);
3359 Print_char(c + '0');
3380 not_so_bad(Print_esc);
3395 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3398 lstring *ret = xmalloc(sizeof(lstring));
3399 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3400 ret->l = (size_t) siz;
3405 void free_lstring(lstring * ls)