3 % Copyright
2006-2011 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
25 #define detokenized_line
() (line_catcode_table
==NO_CAT_TABLE
)
28 #define do_get_cat_code
(a
,b
) do
{ \
29 if
(line_catcode_table
<=-0xFF) \
30 a
= - line_catcode_table
- 0xFF ; \
31 else if
(line_catcode_table
!=DEFAULT_CAT_TABLE
) \
32 a
=get_cat_code
(line_catcode_table
,b
); \
34 a
=get_cat_code
(cat_code_table_par
,b
); \
38 #define do_get_cat_code
(a
,b
) do
{ \
39 if
(line_catcode_table
==DEFAULT_CAT_TABLE
) \
40 a
=get_cat_code
(cat_code_table_par
,b
); \
41 else if
(line_catcode_table
>-0xFF) \
42 a
=get_cat_code
(line_catcode_table
,b
); \
44 a
= - line_catcode_table
- 0xFF ; \
48 @ The \TeX\ system does nearly all of its own memory allocation
, so that it can
49 readily be transported into environments that do not have automatic facilities
50 for strings
, garbage collection
, etc.
, and so that it can be in control of what
51 error messages the user receives. The dynamic storage requirements of \TeX\ are
52 handled by providing two large arrays called |fixmem| and |varmem| in which
53 consecutive blocks of words are used as nodes by the \TeX\ routines.
55 Pointer variables are indices into this array
, or into another array called
56 |eqtb| that will be explained later. A pointer variable might also be a special
57 flag that lies outside the bounds of |mem|
, so we allow pointers to assume any
58 |halfword| value. The minimum halfword value represents a null pointer. \TeX\
59 does not assume that |mem
[null
]| exists.
61 @ Locations in |fixmem| are used for storing one-word records
; a conventional
62 \.
{AVAIL} stack is used for allocation in this array.
65 smemory_word
*fixmem
; /* the big dynamic storage area
*/
66 unsigned fix_mem_min
; /* the smallest location of one-word memory in use
*/
67 unsigned fix_mem_max
; /* the largest location of one-word memory in use
*/
69 @ In order to study the memory requirements of particular applications
, it is
70 possible to prepare a version of \TeX\ that keeps track of current and maximum
71 memory usage. When code between the delimiters |@
!stat| $\ldots$ |tats| is not
72 commented out
, \TeX\ will run a bit slower but it will report these statistics
73 when |tracing_stats| is sufficiently large.
76 int var_used
, dyn_used
; /* how much memory is in use
*/
78 halfword avail
; /* head of the list of available one-word nodes
*/
79 unsigned fix_mem_end
; /* the last one-word node used in |mem|
*/
81 halfword garbage
; /* head of a junk list
, write only
*/
82 halfword temp_token_head
; /* head of a temporary list of some kind
*/
83 halfword hold_token_head
; /* head of a temporary list of another kind
*/
84 halfword omit_template
; /* a constant token list
*/
85 halfword null_list
; /* permanently empty list
*/
86 halfword backup_head
; /* head of token list built by |scan_keyword|
*/
89 void initialize_tokens
(void
)
96 set_token_info
(temp_token_head
, 0);
99 set_token_info
(hold_token_head
, 0);
102 set_token_info
(omit_template
, 0);
105 set_token_info
(null_list
, 0);
108 set_token_info
(backup_head
, 0);
111 set_token_info
(garbage
, 0);
112 dyn_used
= 0; /* initialize statistics
*/
115 @ The function |get_avail| returns a pointer to a new one-word node whose |link|
116 field is null. However
, \TeX\ will halt if there is no more room left.
119 If the available-space list is empty
, i.e.
, if |avail
=null|
, we try first to
120 increase |fix_mem_end|. If that cannot be done
, i.e.
, if
121 |fix_mem_end
=fix_mem_max|
, we try to reallocate array |fixmem|. If
, that doesn't
122 work
, we have to quit.
125 halfword get_avail
(void
)
126 { /* single-word node allocation
*/
127 unsigned p
; /* the new node being got
*/
129 p
= (unsigned
) avail
; /* get top location in the |avail| stack
*/
131 avail
= token_link
(avail
); /* and pop it off
*/
132 } else if
(fix_mem_end
< fix_mem_max
) { /* or go into virgin territory
*/
136 smemory_word
*new_fixmem
; /* the big dynamic storage area
*/
137 t
= (fix_mem_max
/ 5);
140 (fixmem
, sizeof
(smemory_word
) * (fix_mem_max
+ t
+ 1)));
141 if
(new_fixmem
== NULL) {
142 runaway
(); /* if memory is exhausted
, display possible runaway text
*/
143 overflow
("token memory size", fix_mem_max
);
147 memset
(voidcast
(fixmem
+ fix_mem_max
+ 1), 0, t
* sizeof
(smemory_word
));
151 token_link
(p
) = null
; /* provide an oft-desired initialization of the new node
*/
152 incr
(dyn_used
); /* maintain statistics
*/
156 @ The procedure |flush_list
(p
)| frees an entire linked list of one-word nodes
157 that starts at position |p|.
161 void flush_list
(halfword p
)
162 { /* makes list of single-word nodes available
*/
163 halfword q
, r
; /* list traversers
*/
170 } while
(r
!= null
); /* now |q| is the last node on the list
*/
171 token_link
(q
) = avail
;
176 @ A \TeX\ token is either a character or a control sequence
, and it is @^token@
>
177 represented internally in one of two ways
: (1)~A character whose ASCII code
178 number is |c| and whose command code is |m| is represented as the number
179 $
2^
{21}m
+c$
; the command code is in the range |
1<=m
<=14|.
(2)~A control sequence
180 whose |eqtb| address is |p| is represented as the number |cs_token_flag
+p|. Here
181 |cs_token_flag
=@t$
2^
{25}-1$@
>| is larger than $
2^
{21}m
+c$
, yet it is small enough
182 that |cs_token_flag
+p
< max_halfword|
; thus
, a token fits comfortably in a
185 A token |t| represents a |left_brace| command if and only if
186 |t
<left_brace_limit|
; it represents a |right_brace| command if and only if we
187 have |left_brace_limit
<=t
<right_brace_limit|
; and it represents a |match| or
188 |end_match| command if and only if |match_token
<=t
<=end_match_token|. The
189 following definitions take care of these token-oriented constants and a few
192 @ A token list is a singly linked list of one-word nodes in |mem|
, where each
193 word contains a token and a link. Macro definitions
, output-routine definitions
,
194 marks
, \.
{\\write
} texts
, and a few other things are remembered by \TeX\ in the
195 form of token lists
, usually preceded by a node with a reference count in its
196 |token_ref_count| field. The token stored in location |p| is called |info
(p
)|.
198 Three special commands appear in the token lists of macro definitions. When
199 |m
=match|
, it means that \TeX\ should scan a parameter for the current macro
;
200 when |m
=end_match|
, it means that parameter matching should end and \TeX\ should
201 start reading the macro text
; and when |m
=out_param|
, it means that \TeX\ should
202 insert parameter number |c| into the text at this point.
204 The enclosing \.
{\char'
173} and \.
{\char'
175} characters of a macro definition
205 are omitted
, but the final right brace of an output routine is included at the
206 end of its token list.
208 Here is an example macro definition that illustrates these conventions. After
209 \TeX\ processes the text
211 $$\.
{\\def\\mac a\#
1\#
2 \\b \
{\#
1\\
-a \#\#
1\#
2 \#
2\
}}$$
213 the definition of \.
{\\mac
} is represented as a token list containing
216 \vbox
{\halign
{\hfil#\hfil\cr
217 (reference count
), |letter|\
,\.a
, |match|\
,\#
, |match|\
,\#
, |spacer|\
,\.\
,
218 \.
{\\b
}, |end_match|
,\cr
219 |out_param|\
,1, \.
{\\
-}, |letter|\
,\.a
, |spacer|\
,\.\
, |mac_param|\
,\#
,
220 |other_char|\
,\
.1,\cr
221 |out_param|\
,2, |spacer|\
,\.\
, |out_param|\
,2.\cr
}}$$
223 The procedure |scan_toks| builds such token lists
, and |macro_call| does the
224 parameter matching. @^reference counts@
>
226 Examples such as $$\.
{\\def\\m\
{\\def\\m\
{a\
}\ b\
}}$$ explain why reference
227 counts would be needed even if \TeX\ had no \.
{\\let
} operation
: When the token
228 list for \.
{\\m
} is being read
, the redefinition of \.
{\\m
} changes the |eqtb|
229 entry before the token list has been fully consumed
, so we dare not simply
230 destroy a token list when its control sequence is being redefined.
232 If the parameter-matching part of a definition ends with `\.
{\#\
{}'
, the
233 corresponding token list will have `\.\
{' just before the `|end_match|' and also
234 at the very end. The first `\.\
{' is used to delimit the parameter
; the second
235 one keeps the first from disappearing.
237 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form
,
238 including the expansion of a macro or mark.
241 void print_meaning
(void
)
243 /* remap \mathchar onto \Umathchar
*/
245 if
(cur_cmd
== math_given_cmd
) {
246 cur_cmd
= xmath_given_cmd
;
249 print_cmd_chr
((quarterword
) cur_cmd
, cur_chr
);
250 if
(cur_cmd
>= call_cmd
) {
255 /* Show the meaning of a mark node
*/
256 if
((cur_cmd
== top_bot_mark_cmd
) && (cur_chr < marks_code)) {
260 case first_mark_code
:
261 token_show
(first_mark
(0));
264 token_show
(bot_mark
(0));
266 case split_first_mark_code
:
267 token_show
(split_first_mark
(0));
269 case split_bot_mark_code
:
270 token_show
(split_bot_mark
(0));
273 token_show
(top_mark
(0));
280 @ The procedure |show_token_list|
, which prints a symbolic form of the token list
281 that starts at a given node |p|
, illustrates these conventions. The token list
282 being displayed should not begin with a reference count. However
, the procedure
283 is intended to be robust
, so that if the memory links are awry or if |p| is not
284 really a pointer to a token list
, nothing catastrophic will happen.
286 An additional parameter |q| is also given
; this parameter is either null or it
287 points to a node in the token list where a certain magic computation takes place
288 that will be explained later.
(Basically
, |q| is non-null when we are printing
289 the two-line context information at the time of an error message
; |q| marks the
290 place corresponding to where the second line should begin.
)
292 For example
, if |p| points to the node containing the first \.a in the token list
293 above
, then |show_token_list| will print the string $$\hbox
{`\.
{a\#
1\#
2\ \\b\
294 ->\#
1\\
-a\ \#\#
1\#
2\ \#
2}'
;}$$ and if |q| points to the node containing the
295 second \.a
, the magic computation will be performed just before the second \.a is
298 The generation will stop
, and `\.
{\\ETC.
}' will be printed
, if the length of
299 printing exceeds a given limit~|l|. Anomalous entries are printed in the form of
300 control sequences that are not followed by a blank space
, e.g.
, `\.
{\\BAD.
}'
;
301 this cannot be confused with actual control sequences because a real control
302 sequence named \.
{BAD
} would come out `\.
{\\BAD\
}'.
305 #define not_so_bad
(p
) \
307 case assign_int_cmd
: \
308 if
(c
>= (backend_int_base
) && c <= (backend_int_last)) \
309 p
("[internal backend integer]"); \
311 case assign_dimen_cmd
: \
312 if
(c
>= (backend_dimen_base
) && c <= (backend_dimen_last)) \
313 p
("[internal backend dimension]"); \
315 case assign_toks_cmd
: \
316 if
(c
>= (backend_toks_base
) && c <= (backend_toks_last)) \
317 p
("[internal backend tokenlist]"); \
324 void show_token_list
(int p
, int q
, int l
)
326 int m
, c
; /* pieces of a token
*/
327 ASCII_code match_chr
= '#'
; /* character used in a `|match|'
*/
328 ASCII_code n
= '
0'
; /* the highest parameter number
, as an ASCII digit
*/
332 while
((p
!= null
) && (tally < l)) {
334 /* Do magic computation
*/
337 /* Display token |p|
, and |return| if there are problems
*/
338 if
((p
< (int
) fix_mem_min
) ||
(p
> (int
) fix_mem_end
)) {
339 tprint_esc
("CLOBBERED.");
342 if
(token_info
(p
) >= cs_token_flag
) {
343 if
(!((inhibit_par_tokens
) && (token_info(p) == par_token)))
344 print_cs
(token_info
(p
) - cs_token_flag
);
346 m
= token_cmd
(token_info
(p
));
347 c
= token_chr
(token_info
(p
));
348 if
(token_info
(p
) < 0) {
352 Display the token $
(|m|
,|c|
)$
354 The procedure usually ``learns'' the character code used for macro
355 parameters by seeing one in a |match| command before it runs into any
356 |out_param| commands.
360 case right_brace_cmd
:
371 if
(!in_lua_escape
&& (is_in_csname==0))
409 #define do_buffer_to_unichar
(a
,b
) do
{ \
410 a
= (halfword
)str2uni
(buffer
+b
); \
414 @ Here's the way we sometimes want to display a token list
, given a pointer to
415 its reference count
; the pointer may be null.
418 void token_show
(halfword p
)
421 show_token_list
(token_link
(p
), null
, 10000000);
424 @ |delete_token_ref|
, is called when a pointer to a token list's reference count
425 is being removed. This means that the token list should disappear if the
426 reference count was |null|
, otherwise the count should be decreased by one.
429 @ |p| points to the reference count of a token list that is losing one
433 void delete_token_ref
(halfword p
)
435 if
(token_ref_count
(p
) == 0)
438 decr
(token_ref_count
(p
));
442 int get_char_cat_code
(int curchr
)
445 do_get_cat_code
(a
,curchr
);
450 static void invalid_character_error
(void
)
452 const char
*hlp
[] = {
453 "A funny symbol that I can't read has just been input.",
454 "Continue, and I'll forget that it ever happened.",
457 deletions_allowed
= false
;
458 tex_error
("Text line contains an invalid character", hlp
);
459 deletions_allowed
= true
;
463 static boolean process_sup_mark
(void
); /* below
*/
465 static int scan_control_sequence
(void
); /* below
*/
473 static next_line_retval next_line
(void
); /* below
*/
475 @ In case you are getting bored
, here is a slightly less trivial routine
: Given a
476 string of lowercase letters
, like `\.
{pt
}' or `\.
{plus
}' or `\.
{width
}'
, the
477 |scan_keyword| routine checks to see whether the next tokens of input match this
478 string. The match must be exact
, except that uppercase letters will match their
479 lowercase counterparts
; uppercase equivalents are determined by subtracting
480 |
"a"-"A"|
, rather than using the |uc_code| table
, since \TeX\ uses this routine
481 only for its own limited set of keywords.
483 If a match is found
, the characters are effectively removed from the input and
484 |true| is returned. Otherwise |false| is returned
, and the input is left
485 essentially unchanged
(except for the fact that some macros may have been
486 expanded
, etc.
). @^inner loop@
>
489 boolean scan_keyword
(const char
*s
)
490 { /* look for a given string
*/
491 halfword p
; /* tail of the backup list
*/
492 halfword q
; /* new node being added to the token list via |store_new_token|
*/
493 const char
*k
; /* index into |str_pool|
*/
494 halfword save_cur_cs
= cur_cs
;
495 if
(strlen
(s
) == 0) /* was assert
(strlen
(s
) > 1); */
496 return false
; /* but not with newtokenlib zero keyword simply doesn't match
*/
498 token_link
(p
) = null
;
501 get_x_token
(); /* recursion is possible here
*/
502 if
((cur_cs
== 0) && ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) {
503 store_new_token
(cur_tok
);
505 } else if
((cur_cmd
!= spacer_cmd
) ||
(p
!= backup_head
)) {
507 crashes on some alignments
:
509 if
(p
!= backup_head
) {
511 token_info
(q
) = cur_tok
;
512 token_link
(q
) = null
;
514 begin_token_list
(token_link
(backup_head
), backed_up
);
520 if
(p
!= backup_head
) {
521 begin_token_list
(token_link
(backup_head
), backed_up
);
524 cur_cs
= save_cur_cs
;
528 if
(token_link
(backup_head
) != null
)
529 flush_list
(token_link
(backup_head
));
530 cur_cs
= save_cur_cs
;
534 @ We can not return |undefined_control_sequence| under some conditions
535 (inside |shift_case|
, for example
). This needs thinking.
540 halfword active_to_cs
(int curchr
, int force
)
544 char
*utfbytes
= xmalloc
(8);
545 int nncs
= no_new_control_sequence
;
546 a
= (char
*) uni2str
(0xFFFF);
547 utfbytes
= strcpy
(utfbytes
, a
);
549 no_new_control_sequence
= false
;
551 b
= (char
*) uni2str
((unsigned
) curchr
);
552 utfbytes
= strcat
(utfbytes
, b
);
554 curcs
= string_lookup
(utfbytes
, strlen
(utfbytes
));
557 curcs
= string_lookup
(utfbytes
, 4);
559 no_new_control_sequence
= nncs
;
566 /*static char
* FFFF
= "\xEF\xBF\xBF";*/ /* 0xFFFF */
568 halfword active_to_cs
(int curchr
, int force
)
571 int nncs
= no_new_control_sequence
;
573 no_new_control_sequence
= false
;
576 char
*b
= (char
*) uni2str
((unsigned
) curchr
);
577 char
*utfbytes
= xmalloc
(8);
578 utfbytes
= strcpy
(utfbytes
, "\xEF\xBF\xBF");
579 utfbytes
= strcat
(utfbytes
, b
);
581 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
584 curcs
= string_lookup
("\xEF\xBF\xBF", 4); /* 0xFFFF ... why not
3 ?
*/
586 no_new_control_sequence
= nncs
;
592 static unsigned char
*uni2csstr
(unsigned unic
)
594 unsigned char
*buf
= xmalloc
(8);
595 unsigned char
*pt
= buf
;
596 *pt
++ = 239; *pt
++ = 191; *pt
++ = 191; // 0xFFFF
598 *pt
++ = (unsigned char
) unic
;
599 else if
(unic
< 0x800) {
600 *pt
++ = (unsigned char
) (0xc0 |
(unic
>> 6));
601 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
602 } else if
(unic
>= 0x110000) {
603 *pt
++ = (unsigned char
) (unic
- 0x110000);
604 } else if
(unic
< 0x10000) {
605 *pt
++ = (unsigned char
) (0xe0 |
(unic
>> 12));
606 *pt
++ = (unsigned char
) (0x80 |
((unic
>> 6) & 0x3f));
607 *pt
++ = (unsigned char
) (0x80 |
(unic
& 0x3f));
610 unsigned val
= unic
- 0x10000;
611 u
= (int
) (((val
& 0xf0000) >> 16) + 1);
612 z
= (int
) ((val
& 0x0f000) >> 12);
613 y
= (int
) ((val
& 0x00fc0) >> 6);
614 x
= (int
) (val
& 0x0003f);
615 *pt
++ = (unsigned char
) (0xf0 |
(u
>> 2));
616 *pt
++ = (unsigned char
) (0x80 |
((u
& 3) << 4) | z);
617 *pt
++ = (unsigned char
) (0x80 | y
);
618 *pt
++ = (unsigned char
) (0x80 | x
);
624 halfword active_to_cs
(int curchr
, int force
)
627 int nncs
= no_new_control_sequence
;
629 no_new_control_sequence
= false
;
632 char
* utfbytes
= (char
*) uni2csstr
((unsigned
) curchr
);
633 curcs
= string_lookup
(utfbytes
, utf8_size
(curchr
)+3);
636 curcs
= string_lookup
(FFFF
, 4); // 0xFFFF ... why not
3 ?
638 no_new_control_sequence
= nncs
;
644 @ TODO this function should listen to \.
{\\escapechar
}
646 @ prints a control sequence
649 static char
*cs_to_string
(halfword p
)
654 static char ret
[256] = { 0 };
655 if
(p
== 0 || p
== null_cs
) {
669 str_number txt
= cs_text
(p
);
670 sh
= makecstring
(txt
);
672 if
(is_active_cs
(txt
)) {
690 @ TODO this is a quick hack
, will be solved differently soon
693 static char
*cmd_chr_to_string
(int cmd
, int chr
)
698 selector
= new_string
;
699 print_cmd_chr
((quarterword
) cmd
, chr
);
701 s
= makecstring
(str
);
707 @ The heart of \TeX's input mechanism is the |get_next| procedure
, which we shall
708 develop in the next few sections of the program. Perhaps we shouldn't actually
709 call it the ``heart
,'' however
, because it really acts as \TeX's eyes and mouth
,
710 reading the source files and gobbling them up. And it also helps \TeX\ to
711 regurgitate stored token lists that are to be processed again. @^eyes and mouth@
>
713 The main duty of |get_next| is to input one token and to set |cur_cmd| and
714 |cur_chr| to that token's command code and modifier. Furthermore
, if the input
715 token is a control sequence
, the |eqtb| location of that control sequence is
716 stored in |cur_cs|
; otherwise |cur_cs| is set to zero.
718 Underlying this simple description is a certain amount of complexity because of
719 all the cases that need to be handled. However
, the inner loop of |get_next| is
720 reasonably short and fast.
722 When |get_next| is asked to get the next token of a \.
{\\read
} line
,
723 it sets |cur_cmd
=cur_chr
=cur_cs
=0| in the case that no more tokens
724 appear on that line.
(There might not be any tokens at all
, if the
725 |end_line_char| has |ignore| as its catcode.
)
727 The value of |par_loc| is the |eqtb| address of `\.
{\\par
}'. This quantity is
728 needed because a blank line of input is supposed to be exactly equivalent to the
729 appearance of \.
{\\par
}; we must set |cur_cs
:=par_loc| when detecting a blank
733 halfword par_loc
; /* location of `\.
{\\par
}' in |eqtb|
*/
734 halfword par_token
; /* token representing `\.
{\\par
}'
*/
736 @ Parts |get_next| are executed more often than any other instructions of \TeX.
737 @^mastication@
>@^inner loop@
>
739 The global variable |force_eof| is normally |false|
; it is set |true| by an
740 \.
{\\endinput
} command. |luacstrings| is the number of lua print statements
741 waiting to be input
, it is changed by |luatokencall|.
744 boolean force_eof
; /* should the next \.
{\\input
} be aborted early?
*/
745 int luacstrings
; /* how many lua strings are waiting to be input?
*/
747 @ If the user has set the |pausing| parameter to some positive value
, and if
748 nonstop mode has not been selected
, each line of input is displayed on the
749 terminal and the transcript file
, followed by `\.
{=>}'. \TeX\ waits for a
750 response. If the response is simply |carriage_return|
, the line is accepted as it
751 stands
, otherwise the line typed is used instead of the line in the file.
754 void firm_up_the_line
(void
)
756 int k
; /* an index into |buffer|
*/
758 if
(pausing_par
> 0) {
759 if
(interaction
> nonstop_mode
) {
762 if
(istart
< ilimit
) {
763 for
(k
= istart
; k
<= ilimit
- 1; k
++)
764 print_char
(buffer
[k
]);
767 prompt_input
("=>"); /* wait for user response
*/
769 for
(k
= first
; k
< +last
- 1; k
++) /* move line down in buffer
*/
770 buffer
[k
+ istart
- first
] = buffer
[k
];
771 ilimit
= istart
+ last
- first
;
777 @ Before getting into |get_next|
, let's consider the subroutine that is called
778 when an `\.
{\\outer
}' control sequence has been scanned or when the end of a file
779 has been reached. These two cases are distinguished by |cur_cs|
, which is zero at
783 void check_outer_validity
(void
)
785 halfword p
; /* points to inserted token list
*/
786 halfword q
; /* auxiliary pointer
*/
787 if
(suppress_outer_error_par
)
789 if
(scanner_status
!= normal
) {
790 deletions_allowed
= false
;
791 /* Back up an outer control sequence so that it can be reread
; */
792 /* An outer control sequence that occurs in a \.
{\\read
} will not be reread
,
793 since the error recovery for \.
{\\read
} is not very powerful.
*/
795 if
((istate
== token_list
) ||
(iname
< 1) ||
(iname
> 17)) {
797 token_info
(p
) = cs_token_flag
+ cur_cs
;
798 begin_token_list
(p
, backed_up
); /* prepare to read the control sequence again
*/
800 cur_cmd
= spacer_cmd
;
801 cur_chr
= ' '
; /* replace it by a space
*/
803 if
(scanner_status
> skipping
) {
804 const char
*errhlp
[] = {
805 "I suspect you have forgotten a `}', causing me",
806 "to read past where you wanted me to stop.",
807 "I'll try to recover; but if the error is serious,",
808 "you'd better type `E' or `X' now and fix your file.",
812 const char
*startmsg
;
813 const char
*scannermsg
;
814 /* Tell the user what has run away and try to recover
*/
815 runaway
(); /* print a definition
, argument
, or preamble
*/
817 startmsg
= "File ended";
820 startmsg
= "Forbidden control sequence found";
822 /* Print either `\.
{definition
}' or `\.
{use
}' or `\.
{preamble
}' or `\.
{text
}'
,
823 and insert tokens that should lead to recovery
; */
824 /* The recovery procedure can't be fully understood without knowing more
825 about the \TeX\ routines that should be aborted
, but we can sketch the
826 ideas here
: For a runaway definition we will insert a right brace
; for a
827 runaway preamble
, we will insert a special \.
{\\cr
} token and a right
828 brace
; and for a runaway argument
, we will set |long_state| to
829 |outer_call| and insert \.
{\\par
}.
*/
831 switch
(scanner_status
) {
833 scannermsg
= "definition";
834 token_info
(p
) = right_brace_token
+ '
}'
;
838 token_info
(p
) = par_token
;
839 long_state
= outer_call_cmd
;
842 scannermsg
= "preamble";
843 token_info
(p
) = right_brace_token
+ '
}'
;
847 token_info
(p
) = cs_token_flag
+ frozen_cr
;
848 align_state
= -1000000;
852 token_info
(p
) = right_brace_token
+ '
}'
;
854 default
: /* can't happen
*/
855 scannermsg
= "unknown";
857 } /*there are no other cases
*/
858 begin_token_list
(p
, inserted
);
859 snprintf
(errmsg
, 255, "%s while scanning %s of %s",
860 startmsg
, scannermsg
, cs_to_string
(warning_index
));
861 tex_error
(errmsg
, errhlp
);
864 const char
*errhlp_no
[] = {
865 "The file ended while I was skipping conditional text.",
866 "This kind of error happens when you say `\\if...' and forget",
867 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
870 const char
*errhlp_cs
[] = {
871 "A forbidden control sequence occurred in skipped text.",
872 "This kind of error happens when you say `\\if...' and forget",
873 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
876 const char
**errhlp
= (const char
**) errhlp_no
;
882 ss
= cmd_chr_to_string
(if_test_cmd
, cur_if
);
883 snprintf
(errmsg
, 255, "Incomplete %s; all text was ignored after line %d",
884 ss
, (int
) skip_line
);
886 /* Incomplete \\if...
*/
887 cur_tok
= cs_token_flag
+ frozen_fi
;
888 /* back up one inserted token and call |error|
*/
890 OK_to_interrupt
= false
;
892 token_type
= inserted
;
893 OK_to_interrupt
= true
;
894 tex_error
(errmsg
, errhlp
);
897 deletions_allowed
= true
;
906 The other variant gives less clutter in tracing cache usage when profiling and for
907 some files
(like the manual
) also a bit of a speedup.
910 static boolean get_next_file
(void
)
913 if
(iloc
<= ilimit
) {
914 /* current line not yet finished
*/
915 do_buffer_to_unichar
(cur_chr
, iloc
);
918 if
(detokenized_line
()) {
919 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
921 do_get_cat_code
(cur_cmd
, cur_chr
);
924 Change state if necessary
, and |goto switch| if the current
925 character should be ignored
, or |goto reswitch| if the current
926 character changes to another
;
928 The following
48-way switch accomplishes the scanning quickly
, assuming
929 that a decent C compiler has translated the code. Note that the numeric
930 values for |mid_line|
, |skip_blanks|
, and |new_line| are spaced
931 apart from each other by |max_char_code
+1|
, so we can add a character's
932 command code to the state to get a single number that characterizes both.
934 Remark
[ls
/hh
]: checking performance indicated that this switch was the
935 cause of many branch prediction errors but changing it to
:
937 c
= istate
+ cur_cmd
;
938 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
940 } else if
(c
>= new_line
) {
943 } else if
(c
>= skip_blanks
) {
946 } else if
(c
>= mid_line
) {
954 gives as many prediction errors. So
, we can indeed assume that the compiler
955 does the right job
, or that there is simply no other way.
958 switch
(istate
+ cur_cmd
) {
959 case mid_line
+ ignore_cmd
:
960 case skip_blanks
+ ignore_cmd
:
961 case new_line
+ ignore_cmd
:
962 case skip_blanks
+ spacer_cmd
:
963 case new_line
+ spacer_cmd
:
964 /* Cases where character is ignored
*/
967 case mid_line
+ escape_cmd
:
968 case new_line
+ escape_cmd
:
969 case skip_blanks
+ escape_cmd
:
970 /* Scan a control sequence ...
; */
971 istate
= (unsigned char
) scan_control_sequence
();
972 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
973 check_outer_validity
();
975 case mid_line
+ active_char_cmd
:
976 case new_line
+ active_char_cmd
:
977 case skip_blanks
+ active_char_cmd
:
978 /* Process an active-character
*/
979 cur_cs
= active_to_cs
(cur_chr
, false
);
980 cur_cmd
= eq_type
(cur_cs
);
981 cur_chr
= equiv
(cur_cs
);
983 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
984 check_outer_validity
();
986 case mid_line
+ sup_mark_cmd
:
987 case new_line
+ sup_mark_cmd
:
988 case skip_blanks
+ sup_mark_cmd
:
989 /* If this |sup_mark| starts
*/
990 if
(process_sup_mark
())
995 case mid_line
+ invalid_char_cmd
:
996 case new_line
+ invalid_char_cmd
:
997 case skip_blanks
+ invalid_char_cmd
:
998 /* Decry the invalid character and |goto restart|
; */
999 invalid_character_error
();
1000 return false
; /* because state may be |token_list| now
*/
1002 case mid_line
+ spacer_cmd
:
1003 /* Enter |skip_blanks| state
, emit a space
; */
1004 istate
= skip_blanks
;
1007 case mid_line
+ car_ret_cmd
:
1009 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1010 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1011 for tab and space
, and for the space inserted at the end of a line
, will be
1012 treated alike when macro parameters are being matched. We do this since such
1013 characters are indistinguishable on most computer terminal displays.
1016 cur_cmd
= spacer_cmd
;
1019 case skip_blanks
+ car_ret_cmd
:
1020 case mid_line
+ comment_cmd
:
1021 case new_line
+ comment_cmd
:
1022 case skip_blanks
+ comment_cmd
:
1023 /* Finish line
, |goto switch|
; */
1027 case new_line
+ car_ret_cmd
:
1028 /* Finish line
, emit a \.
{\\par
}; */
1031 cur_cmd
= eq_type
(cur_cs
);
1032 cur_chr
= equiv
(cur_cs
);
1033 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1034 check_outer_validity
();
1036 case skip_blanks
+ left_brace_cmd
:
1037 case new_line
+ left_brace_cmd
:
1040 case mid_line
+ left_brace_cmd
:
1043 case skip_blanks
+ right_brace_cmd
:
1044 case new_line
+ right_brace_cmd
:
1047 case mid_line
+ right_brace_cmd
:
1050 case mid_line
+ math_shift_cmd
:
1051 case mid_line
+ tab_mark_cmd
:
1052 case mid_line
+ mac_param_cmd
:
1053 case mid_line
+ sub_mark_cmd
:
1054 case mid_line
+ letter_cmd
:
1055 case mid_line
+ other_char_cmd
:
1058 case skip_blanks
+ math_shift
:
1059 case skip_blanks
+ tab_mark
:
1060 case skip_blanks
+ mac_param
:
1061 case skip_blanks
+ sub_mark
:
1062 case skip_blanks
+ letter
:
1063 case skip_blanks
+ other_char
:
1064 case new_line
+ math_shift
:
1065 case new_line
+ tab_mark
:
1066 case new_line
+ mac_param
:
1067 case new_line
+ sub_mark
:
1068 case new_line
+ letter
:
1069 case new_line
+ other_char
:
1079 Move to next line of file
,
1080 or |goto restart| if there is no next line
,
1081 or |return| if a \.
{\\read
} line has finished
;
1084 next_line_retval r
= next_line
();
1085 if
(r
== next_line_return
) {
1087 } else if
(r
== next_line_restart
) {
1099 /* 10 times less Bim in callgrind
*/
1102 escape_cmd left_brace_cmd right_brace_cmd math_shift_cmd
1103 tab_mark_cmd car_ret_cmd mac_param_cmd sup_mark_cmd
1104 sub_mark_cmd ignore_cmd spacer_cmd letter_cmd
1105 other_char_cmd active_char_cmd comment_cmd invalid_char_cmd
1108 static boolean get_next_file
(void
)
1112 if
(iloc
<= ilimit
) {
1113 /* current line not yet finished
*/
1114 do_buffer_to_unichar
(cur_chr
, iloc
);
1116 if
(detokenized_line
()) {
1117 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
1119 do_get_cat_code
(cur_cmd
, cur_chr
);
1122 Change state if necessary
, and |goto switch| if the current
1123 character should be ignored
, or |goto reswitch| if the current
1124 character changes to another
;
1126 c
= istate
+ cur_cmd
;
1127 if
(c
== (mid_line
+ letter_cmd
) || c
== (mid_line
+ other_char_cmd
)) {
1129 } else if
(c
>= new_line
) {
1130 switch
(c-new_line
) {
1132 istate
= (unsigned char
) scan_control_sequence
();
1133 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1134 check_outer_validity
();
1136 case left_brace_cmd
:
1140 case right_brace_cmd
:
1144 case math_shift_cmd
:
1151 /* Finish line
, emit a \.
{\\par
}; */
1154 cur_cmd
= eq_type
(cur_cs
);
1155 cur_chr
= equiv
(cur_cs
);
1156 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1157 check_outer_validity
();
1163 if
(process_sup_mark
())
1175 /* Cases where character is ignored
*/
1180 case other_char_cmd
:
1183 case active_char_cmd
:
1184 cur_cs
= active_to_cs
(cur_chr
, false
);
1185 cur_cmd
= eq_type
(cur_cs
);
1186 cur_chr
= equiv
(cur_cs
);
1188 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1189 check_outer_validity
();
1194 case invalid_char_cmd
:
1195 invalid_character_error
();
1196 return false
; /* because state may be |token_list| now
*/
1201 } else if
(c
>= skip_blanks
) {
1202 switch
(c-skip_blanks
) {
1204 /* Scan a control sequence ...
; */
1205 istate
= (unsigned char
) scan_control_sequence
();
1206 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1207 check_outer_validity
();
1209 case left_brace_cmd
:
1213 case right_brace_cmd
:
1217 case math_shift_cmd
:
1230 /* If this |sup_mark| starts
*/
1231 if
(process_sup_mark
())
1246 case other_char_cmd
:
1249 case active_char_cmd
:
1250 cur_cs
= active_to_cs
(cur_chr
, false
);
1251 cur_cmd
= eq_type
(cur_cs
);
1252 cur_chr
= equiv
(cur_cs
);
1254 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1255 check_outer_validity
();
1258 /* Finish line
, |goto switch|
; */
1261 case invalid_char_cmd
:
1262 /* Decry the invalid character and |goto restart|
; */
1263 invalid_character_error
();
1264 return false
; /* because state may be |token_list| now
*/
1269 } else if
(c
>= mid_line
) {
1270 switch
(c-mid_line
) {
1272 istate
= (unsigned char
) scan_control_sequence
();
1273 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1274 check_outer_validity
();
1276 case left_brace_cmd
:
1279 case right_brace_cmd
:
1282 case math_shift_cmd
:
1288 Finish line
, emit a space. When a character of type |spacer| gets through
, its
1289 character code is changed to $\.
{"\ "}=040$. This means that the ASCII codes
1290 for tab and space
, and for the space inserted at the end of a line
, will be
1291 treated alike when macro parameters are being matched. We do this since such
1292 characters are indistinguishable on most computer terminal displays.
1295 cur_cmd
= spacer_cmd
;
1301 if
(process_sup_mark
())
1311 /* Enter |skip_blanks| state
, emit a space
; */
1312 istate
= skip_blanks
;
1318 case other_char_cmd
:
1321 case active_char_cmd
:
1322 cur_cs
= active_to_cs
(cur_chr
, false
);
1323 cur_cmd
= eq_type
(cur_cs
);
1324 cur_chr
= equiv
(cur_cs
);
1326 if
(! suppress_outer_error_par
&& cur_cmd >= outer_call_cmd)
1327 check_outer_validity
();
1332 case invalid_char_cmd
:
1333 invalid_character_error
();
1334 return false
; /* because state may be |token_list| now
*/
1348 Move to next line of file
, or |goto restart| if there is no next line
,
1349 or |return| if a \.
{\\read
} line has finished
;
1352 next_line_retval r
= next_line
();
1353 if
(r
== next_line_return
) {
1355 } else if
(r
== next_line_restart
) {
1367 @ Notice that a code like \.
{\^\^
8} becomes \.x if not followed by a hex digit.
1368 We only support a limited set
:
1376 #define is_hex
(a
) ((a
>='
0'
&&a<='9')||(a>='a'&&a<='f'))
1378 #define add_nybble
(c
) \
1380 cur_chr
=(cur_chr
<<4)+c-'
0'
; \
1382 cur_chr
=(cur_chr
<<4)+c-'a'
+10; \
1385 #define set_nybble
(c
) \
1392 #define one_hex_to_cur_chr
(c1
) \
1395 #define two_hex_to_cur_chr
(c1
,c2
) \
1399 #define four_hex_to_cur_chr
(c1
,c2
,c3
,c4
) \
1400 two_hex_to_cur_chr
(c1
,c2
); \
1404 #define six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
) \
1405 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
); \
1409 static boolean process_sup_mark
(void
)
1411 if
(cur_chr
== buffer
[iloc
]) {
1412 if
(iloc
< ilimit
) {
1413 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])) {
1414 if
((cur_chr
== buffer
[iloc
+ 3]) && (cur_chr == buffer[iloc + 4])) {
1416 if
((iloc
+ 10) <= ilimit
) {
1417 int c1
= buffer
[iloc
+ 5];
1418 int c2
= buffer
[iloc
+ 6];
1419 int c3
= buffer
[iloc
+ 7];
1420 int c4
= buffer
[iloc
+ 8];
1421 int c5
= buffer
[iloc
+ 9];
1422 int c6
= buffer
[iloc
+ 10];
1423 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) &&
1424 is_hex
(c4
) && is_hex(c5) && is_hex(c6)) {
1426 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1429 tex_error
("^^^^^^ needs six hex digits", NULL);
1432 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1436 if
((iloc
+ 6) <= ilimit
) {
1437 int c1
= buffer
[iloc
+ 3];
1438 int c2
= buffer
[iloc
+ 4];
1439 int c3
= buffer
[iloc
+ 5];
1440 int c4
= buffer
[iloc
+ 6];
1441 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1443 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1446 tex_error
("^^^^ needs four hex digits", NULL);
1449 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1454 if
((iloc
+ 2) <= ilimit
) {
1455 int c1
= buffer
[iloc
+ 1];
1456 int c2
= buffer
[iloc
+ 2];
1457 if
(is_hex
(c1
) && is_hex(c2)) {
1459 two_hex_to_cur_chr
(c1
,c2
);
1463 /* go on
, no error
, good old tex
*/
1468 int c1
= buffer
[iloc
+ 1];
1471 if
(is_hex
(c1
) && (iloc <= ilimit)) {
1472 int c2
= buffer
[iloc
];
1475 two_hex_to_cur_chr
(c1
,c2
);
1479 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1487 @ Control sequence names are scanned only when they appear in some line of a
1488 file
; once they have been scanned the first time
, their |eqtb| location serves as
1489 a unique identification
, so \TeX\ doesn't need to refer to the original name any
1490 more except when it prints the equivalent in symbolic form.
1492 The program that scans a control sequence has been written carefully in order to
1493 avoid the blowups that might otherwise occur if a malicious user tried something
1494 like `\.
{\\catcode\'
15=0}'. The algorithm might look at |buffer
[ilimit
+1]|
, but
1495 it never looks at |buffer
[ilimit
+2]|.
1497 If expanded characters like `\.
{\^\^A
}' or `\.
{\^\^df
}' appear in or just
1498 following a control sequence name
, they are converted to single characters in the
1499 buffer and the process is repeated
, slowly but surely.
1502 static boolean check_expanded_code
(int
*kk
); /* below
*/
1504 static int scan_control_sequence
(void
)
1506 int retval
= mid_line
;
1507 if
(iloc
> ilimit
) {
1508 cur_cs
= null_cs
; /* |state| is irrelevant in this case
*/
1510 register int cat
; /* |cat_code
(cur_chr
)|
, usually
*/
1513 do_buffer_to_unichar
(cur_chr
, k
);
1514 do_get_cat_code
(cat
, cur_chr
);
1515 if
(cat
!= letter_cmd || k
> ilimit
) {
1516 retval
= (cat
== spacer_cmd ? skip_blanks
: mid_line
);
1517 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1520 retval
= skip_blanks
;
1522 do_buffer_to_unichar
(cur_chr
, k
);
1523 do_get_cat_code
(cat
, cur_chr
);
1524 } while
(cat
== letter_cmd
&& k <= ilimit);
1526 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1528 if
(cat
!= letter_cmd
) {
1529 /* backtrack one character which can be utf
*/
1532 if
(cur_chr
> 0xFFFF)
1534 if
(cur_chr
> 0x7FF)
1539 if
(cur_chr
<= 0x7F) {
1540 k
-= 1; /* in most cases
*/
1541 } else if
(cur_chr
> 0xFFFF) {
1543 } else if
(cur_chr
> 0x7FF) {
1545 } else
/* if
(cur_chr
> 0x7F) */ {
1548 /* now |k| points to first nonletter
*/
1551 cur_cs
= id_lookup
(iloc
, k
- iloc
);
1556 cur_cmd
= eq_type
(cur_cs
);
1557 cur_chr
= equiv
(cur_cs
);
1561 @ Whenever we reach the following piece of code
, we will have
1562 |cur_chr
=buffer
[k-1
]| and |k
<=ilimit
+1| and
1563 |cat
=get_cat_code
(cat_code_table
,cur_chr
)|. If an expanded code like \.
{\^\^A
} or
1564 \.
{\^\^df
} appears in |buffer
[(k-1
)..
(k
+1)]| or |buffer
[(k-1
)..
(k
+2)]|
, we will
1565 store the corresponding code in |buffer
[k-1
]| and shift the rest of the buffer
1566 left two or three places.
1569 static boolean check_expanded_code
(int
*kk
)
1574 if
(buffer
[k
] == cur_chr
&& k < ilimit) {
1575 if
((cur_chr
== buffer
[k
+ 1]) && (cur_chr == buffer[k + 2])) {
1576 if
((cur_chr
== buffer
[k
+ 3]) && (cur_chr == buffer[k + 4])) {
1577 if
((k
+ 10) <= ilimit
) {
1578 int c1
= buffer
[k
+ 6 - 1];
1579 int c2
= buffer
[k
+ 6];
1580 int c3
= buffer
[k
+ 6 + 1];
1581 int c4
= buffer
[k
+ 6 + 2];
1582 int c5
= buffer
[k
+ 6 + 3];
1583 int c6
= buffer
[k
+ 6 + 4];
1584 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1586 six_hex_to_cur_chr
(c1
,c2
,c3
,c4
,c5
,c6
);
1588 tex_error
("^^^^^^ needs six hex digits", NULL);
1591 tex_error
("^^^^^^ needs six hex digits, end of input", NULL);
1594 if
((k
+ 6) <= ilimit
) {
1595 int c1
= buffer
[k
+ 4 - 1];
1596 int c2
= buffer
[k
+ 4];
1597 int c3
= buffer
[k
+ 4 + 1];
1598 int c4
= buffer
[k
+ 4 + 2];
1599 if
(is_hex
(c1
) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1601 four_hex_to_cur_chr
(c1
,c2
,c3
,c4
);
1603 tex_error
("^^^^ needs four hex digits", NULL);
1606 tex_error
("^^^^ needs four hex digits, end of input", NULL);
1610 int c1
= buffer
[k
+ 1];
1613 if
(is_hex
(c1
) && (k + 2) <= ilimit) {
1614 int c2
= buffer
[k
+ 2];
1617 two_hex_to_cur_chr
(c1
,c2
);
1619 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1622 cur_chr
= (c1
< 0100 ? c1
+ 0100 : c1
- 0100);
1630 if
(cur_chr
<= 0x7F) {
1631 buffer
[k
- 1] = (packed_ASCII_code
) cur_chr
;
1632 } else if
(cur_chr
<= 0x7FF) {
1633 buffer
[k
- 1] = (packed_ASCII_code
) (0xC0 + cur_chr
/ 0x40);
1636 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + cur_chr
% 0x40);
1637 } else if
(cur_chr
<= 0xFFFF) {
1638 buffer
[k
- 1] = (packed_ASCII_code
) (0xE0 + cur_chr
/ 0x1000);
1641 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) / 0x40);
1644 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) % 0x40);
1646 buffer
[k
- 1] = (packed_ASCII_code
) (0xF0 + cur_chr
/ 0x40000);
1649 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + (cur_chr
% 0x40000) / 0x1000);
1652 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) / 0x40);
1655 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + ((cur_chr
% 0x40000) % 0x1000) % 0x40);
1658 ilimit
= ilimit
- d
;
1659 while
(l
<= ilimit
) {
1660 buffer
[l
] = buffer
[l
+ d
];
1669 @ All of the easy branches of |get_next| have now been taken care of. There is
1672 @c static next_line_retval next_line
(void
)
1674 boolean inhibit_eol
= false
; /* a way to end a pseudo file without trailing space
*/
1676 /* Read next line of file into |buffer|
, or |goto restart| if the file has ended
*/
1681 if
(pseudo_input
()) { /* not end of file
*/
1682 firm_up_the_line
(); /* this sets |ilimit|
*/
1683 line_catcode_table
= DEFAULT_CAT_TABLE
;
1684 if
((iname
== 19) && (pseudo_lines(pseudo_files) == null))
1686 } else if
((every_eof_par
!= null
) && !eof_seen[iindex]) {
1688 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1690 begin_token_list
(every_eof_par
, every_eof_text
);
1691 return next_line_restart
;
1697 if
(luacstring_input
()) { /* not end of strings
*/
1699 line_catcode_table
= (short
) luacstring_cattable
();
1700 line_partial
= (signed char
) luacstring_partial
();
1701 if
(luacstring_final_line
() || line_partial
1702 || line_catcode_table
== NO_CAT_TABLE
)
1710 if
(lua_input_ln
(cur_file
, 0, true
)) { /* not end of file
*/
1711 firm_up_the_line
(); /* this sets |ilimit|
*/
1712 line_catcode_table
= DEFAULT_CAT_TABLE
;
1713 } else if
((every_eof_par
!= null
) && (!eof_seen[iindex])) {
1715 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1716 begin_token_list
(every_eof_par
, every_eof_text
);
1717 return next_line_restart
;
1725 if
(tracing_nesting_par
> 0)
1726 if
((grp_stack
[in_open
] != cur_boundary
) ||
(if_stack
[in_open
] != cond_ptr
))
1727 if
(!((iname
== 19) ||
(iname
== 21))) {
1728 /* give warning for some unfinished groups and
/or conditionals
*/
1731 if
((iname
> 21) ||
(iname
== 20)) {
1732 report_stop_file
(filetype_tex
);
1736 /* lua input or \.
{\\scantextokens
} */
1737 if
(iname
== 21 || iname
== 19) {
1741 if
(! suppress_outer_error_par
)
1742 check_outer_validity
();
1744 return next_line_restart
;
1746 if
(inhibit_eol || end_line_char_inactive
)
1749 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char_par
;
1751 iloc
= istart
; /* ready to read
*/
1753 if
(!terminal_input
) {
1754 /* \.
{\\read
} line has ended
*/
1757 return next_line_return
; /* OUTER */
1759 if
(input_ptr
> 0) {
1760 /* text was inserted during error recovery
*/
1762 return next_line_restart
; /* resume previous level
*/
1764 if
(selector
< log_only
)
1766 if
(interaction
> nonstop_mode
) {
1767 if
(end_line_char_inactive
)
1769 if
(ilimit
== istart
) {
1770 /* previous line was empty
*/
1771 tprint_nl
("(Please type a command or say `\\end')");
1775 prompt_input
("*"); /* input on-line into |buffer|
*/
1777 if
(end_line_char_inactive
)
1780 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char_par
;
1785 Nonstop mode
, which is intended for overnight batch processing
,
1786 never waits for on-line input.
1788 fatal_error
("*** (job aborted, no legal \\end found)");
1791 return next_line_ok
;
1794 @ Let's consider now what happens when |get_next| is looking at a token list.
1797 static boolean get_next_tokenlist
(void
)
1799 register halfword t
= token_info
(iloc
);
1800 iloc
= token_link
(iloc
); /* move to next
*/
1801 if
(t
>= cs_token_flag
) {
1802 /* a control sequence token
*/
1803 cur_cs
= t
- cs_token_flag
;
1804 cur_cmd
= eq_type
(cur_cs
);
1805 if
(cur_cmd
>= outer_call_cmd
) {
1806 if
(cur_cmd
== dont_expand_cmd
) {
1808 Get the next token
, suppressing expansion. The present point in the program
1809 is reached only when the |expand| routine has inserted a special marker into
1810 the input. In this special case
, |token_info
(iloc
)| is known to be a control
1811 sequence token
, and |token_link
(iloc
)=null|.
1813 cur_cs
= token_info
(iloc
) - cs_token_flag
;
1815 cur_cmd
= eq_type
(cur_cs
);
1816 if
(cur_cmd
> max_command_cmd
) {
1817 cur_cmd
= relax_cmd
;
1818 cur_chr
= no_expand_flag
;
1821 } else if
(! suppress_outer_error_par
) {
1822 check_outer_validity
();
1825 cur_chr
= equiv
(cur_cs
);
1827 cur_cmd
= token_cmd
(t
);
1828 cur_chr
= token_chr
(t
);
1830 case left_brace_cmd
:
1833 case right_brace_cmd
:
1837 /* Insert macro parameter and |goto restart|
; */
1838 begin_token_list
(param_stack
[param_start
+ cur_chr
- 1], parameter
);
1846 @ Now we're ready to take the plunge into |get_next| itself. Parts of this
1847 routine are executed more often than any other instructions of \TeX.
1848 @^mastication@
>@^inner loop@
>
1850 @ sets |cur_cmd|
, |cur_chr|
, |cur_cs| to next token
1857 if
(istate
!= token_list
) {
1858 /* Input from external file
, |goto restart| if no input found
*/
1859 if
(!get_next_file
())
1864 goto RESTART
; /* list exhausted
, resume previous level
*/
1865 } else if
(!get_next_tokenlist
()) {
1866 goto RESTART
; /* parameter needs to be expanded
*/
1869 /* If an alignment entry has just ended
, take appropriate action
*/
1870 if
((cur_cmd
== tab_mark_cmd || cur_cmd
== car_ret_cmd
) && align_state == 0) {
1871 insert_vj_template
();
1876 @ Since |get_next| is used so frequently in \TeX
, it is convenient to define
1877 three related procedures that do a little more
:
1879 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|
, it also sets
1880 |cur_tok|
, a packed halfword version of the current token.
1882 \yskip\hang|get_x_token|
, meaning ``get an expanded token
,'' is like |get_token|
,
1883 but if the current token turns out to be a user-defined control sequence
(i.e.
, a
1884 macro call
), or a conditional
, or something like \.
{\\topmark
} or
1885 \.
{\\expandafter
} or \.
{\\csname
}, it is eliminated from the input by beginning
1886 the expansion of the macro or the evaluation of the conditional.
1888 \yskip\hang|x_token| is like |get_x_token| except that it assumes that |get_next|
1889 has already been called.
1891 \yskip\noindent In fact
, these three procedures account for almost every use of
1894 No new control sequences will be defined except during a call of |get_token|
, or
1895 when \.
{\\csname
} compresses a token list
, because |no_new_control_sequence| is
1896 always |true| at other times.
1898 @ sets |cur_cmd|
, |cur_chr|
, |cur_tok|
1901 void get_token
(void
)
1903 no_new_control_sequence
= false
;
1905 no_new_control_sequence
= true
;
1907 cur_tok
= token_val
(cur_cmd
, cur_chr
);
1909 cur_tok
= cs_token_flag
+ cur_cs
;
1912 @ changes the string |s| to a token list
1915 halfword string_to_toks
(const char
*ss
)
1917 halfword p
; /* tail of the token list
*/
1918 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1919 halfword t
; /* token being appended
*/
1921 const char
*se
= ss
+ strlen
(s
);
1922 p
= temp_token_head
;
1923 set_token_link
(p
, null
);
1925 t
= (halfword
) str2uni
((const unsigned char
*) s
);
1930 t
= other_token
+ t
;
1931 fast_store_new_token
(t
);
1933 return token_link
(temp_token_head
);
1936 @ The token lists for macros and for other things like \.
{\\mark
} and
1937 \.
{\\output
} and \.
{\\write
} are produced by a procedure called |scan_toks|.
1939 Before we get into the details of |scan_toks|
, let's consider a much simpler
1940 task
, that of converting the current string into a token list. The |str_toks|
1941 function does this
; it classifies spaces as type |spacer| and everything else as
1944 The token list created by |str_toks| begins at |link
(temp_token_head
)| and ends
1945 at the value |p| that is returned.
(If |p
=temp_token_head|
, the list is empty.
)
1947 |lua_str_toks| is almost identical
, but it also escapes the three symbols that
1948 |lua| considers special while scanning a literal string
1950 @ changes the string |str_pool
[b..pool_ptr
]| to a token list
1953 halfword lua_str_toks
(lstring b
)
1955 halfword p
; /* tail of the token list
*/
1956 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1957 halfword t
; /* token being appended
*/
1958 unsigned char
*k
; /* index into string
*/
1959 p
= temp_token_head
;
1960 set_token_link
(p
, null
);
1961 k
= (unsigned char
*) b.s
;
1962 while
(k
< (unsigned char
*) b.s
+ b.l
) {
1963 t
= pool_to_unichar
(k
);
1968 if
((t
== '\\'
) ||
(t
== '
"') || (t == '\'') || (t == 10) || (t == 13))
1969 fast_store_new_token(other_token + '\\');
1974 t = other_token + t;
1976 fast_store_new_token(t);
1981 @ Incidentally, the main reason for wanting |str_toks| is the function
1982 |the_toks|, which has similar input/output characteristics.
1984 @ changes the string |str_pool[b..pool_ptr]| to a token list
1987 halfword str_toks(lstring s)
1989 halfword p; /* tail of the token list */
1990 halfword q; /* new node being added to the token list via |store_new_token| */
1991 halfword t; /* token being appended */
1992 unsigned char *k, *l; /* index into string */
1993 p = temp_token_head;
1994 set_token_link(p, null);
1998 t = pool_to_unichar(k);
2003 t = other_token + t;
2004 fast_store_new_token(t);
2010 hh: most of the converter is similar to the one i made for macro so at some point i
2011 can make a helper; also todo: there is no need to go through the pool
2015 halfword str_scan_toks(int ct, lstring s)
2016 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
2017 halfword p; /* tail of the token list */
2018 halfword q; /* new node being added to the token list via |store_new_token| */
2019 halfword t; /* token being appended */
2020 unsigned char *k, *l; /* index into string */
2022 p = temp_token_head;
2023 set_token_link(p, null);
2027 t = pool_to_unichar(k);
2029 cc = get_cat_code(ct,t);
2031 /* we have a potential control sequence so we check for it */
2035 halfword _cs = null ;
2036 unsigned char *_name = k ;
2038 t = (halfword) str2uni((const unsigned char *) k);
2040 _c = get_cat_code(ct,t);
2043 _lname = _lname + _s ;
2044 } else if (_c == 10) {
2045 /* we ignore a trailing space like normal scanning does */
2053 /* we have a potential \cs */
2054 _cs = string_lookup((const char *) _name, _lname);
2055 if (_cs == undefined_control_sequence) {
2056 /* let's play safe and backtrack */
2057 t = cc * (1<<21) + t ;
2060 t = cs_token_flag + _cs;
2063 /* just a character with some meaning, so \unknown becomes effectively */
2064 /* \\unknown assuming that \\ has some useful meaning of course */
2065 t = cc * (1<<21) + t ;
2070 /* whatever token, so for instance $x^2$ just works given a tex */
2071 /* catcode regime */
2072 t = cc * (1<<21) + t ;
2074 fast_store_new_token(t);
2080 @ Here's part of the |expand| subroutine that we are now ready to complete:
2083 void ins_the_toks(void)
2086 ins_list(token_link(temp_token_head));
2089 #define set_toks_register(n,t,g) { \
2090 int a = (g>0) ? 4 : 0; \
2091 halfword ref = get_avail(); \
2092 set_token_ref_count(ref, 0); \
2093 set_token_link(ref, token_link(t)); \
2094 define(n + toks_base, call_cmd, ref); \
2097 void combine_the_toks(int how)
2102 if (cur_cmd == assign_toks_cmd) {
2103 nt = equiv(cur_cs) - toks_base;
2113 } while (cur_cmd == spacer_cmd);
2114 if (cur_cmd == left_brace_cmd) {
2117 x = scan_toks(false,how > 1); /* expanded or not */
2120 if (source != null) {
2121 halfword target = toks(nt);
2122 if (target == null) {
2123 set_toks_register(nt,source,0);
2125 halfword s = token_link(source);
2127 halfword t = token_link(target);
2129 /* can this happen ? */
2130 set_token_link(target, s);
2131 } else if (odd(how)) {
2133 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2134 halfword p = temp_token_head;
2136 set_token_link(p, s); /* s = head, x = tail */
2139 fast_store_new_token(token_info(t));
2142 set_toks_register(nt,temp_token_head,0);
2144 set_token_link(x,t);
2145 set_token_link(target,s);
2149 if (cur_level != eq_level_field(eqtb[toks_base+nt])) {
2150 halfword p = temp_token_head;
2152 set_token_link(p, null);
2154 fast_store_new_token(token_info(t));
2157 set_token_link(p,s);
2158 set_toks_register(nt,temp_token_head,0);
2160 while (token_link(t) != null) {
2163 set_token_link(t,s);
2170 halfword source, ns;
2171 if (cur_cmd == assign_toks_cmd) {
2172 ns = equiv(cur_cs) - toks_base;
2181 if (source != null) {
2182 halfword target = toks(nt);
2183 if (target == null) {
2184 equiv(toks_base+nt) = source;
2185 equiv(toks_base+ns) = null;
2187 halfword s = token_link(source);
2189 halfword t = token_link(target);
2191 set_token_link(target, s);
2192 } else if (odd(how)) {
2195 while (token_link(x) != null) {
2198 set_token_link(x,t);
2199 set_token_link(target,s);
2202 while (token_link(t) != null) {
2205 set_token_link(t,s);
2207 equiv(toks_base+ns) = null;
2214 @ This routine, used in the next one, prints the job name, possibly modified by
2215 the |process_jobname| callback.
2218 static void print_job_name(void)
2221 char *s, *ss; /* C strings for jobname before and after processing */
2222 int callback_id, lua_retval;
2223 s = (char*)str_string(job_name);
2224 callback_id = callback_defined(process_jobname_callback);
2225 if (callback_id > 0) {
2226 lua_retval = run_callback(callback_id, "S-
>S
", s, &ss);
2227 if ((lua_retval == true) && (ss != NULL))
2236 @ Here is a routine that print the result of a convert command, using the
2237 argument |i|. It returns |false | if it does not know to print the code |c|. The
2238 function exists because lua code and tex code can both call it to convert
2241 @ Parse optional lua state integer, or an instance name to be stored in |sn| and
2242 get the next non-blank non-relax non-call token.
2246 int scan_lua_state(void)
2251 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
2253 if (cur_cmd != left_brace_cmd) {
2254 if (scan_keyword("name
")) {
2255 (void) scan_toks(false, true);
2258 scan_register_num();
2259 if (get_lua_name(cur_val))
2260 sn = (cur_val - 65536);
2266 @ The procedure |conv_toks| uses |str_toks| to insert the token list for
2267 |convert| functions into the scanner; `\.{\\outer}' control sequences are allowed
2268 to follow `\.{\\string}' and `\.{\\meaning}'.
2270 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates any
2271 pending string in its output. In order to save such a pending string, we have to
2272 create a temporary string that is destroyed immediately after.
2275 #define push_selector { \
2276 old_setting = selector; \
2277 selector = new_string; \
2280 #define pop_selector { \
2281 selector = old_setting; \
2284 static int do_variable_dvi(halfword c)
2289 #define do_variable_backend_int(i) \
2290 cur_cmd = assign_int_cmd; \
2291 cur_val = backend_int_base + i; \
2292 cur_tok = token_val(cur_cmd, cur_val); \
2295 #define do_variable_backend_dimen(i) \
2296 cur_cmd = assign_dimen_cmd; \
2297 cur_val = backend_dimen_base + i; \
2298 cur_tok = token_val(cur_cmd, cur_val); \
2301 #define do_variable_backend_toks(i) \
2302 cur_cmd = assign_toks_cmd; \
2303 cur_val = backend_toks_base + i ; \
2304 cur_tok = token_val(cur_cmd, cur_val); \
2307 static int do_variable_pdf(halfword c)
2309 if (scan_keyword("compresslevel
")) { do_variable_backend_int(c_pdf_compress_level); }
2310 else if (scan_keyword("decimaldigits
")) { do_variable_backend_int(c_pdf_decimal_digits); }
2311 else if (scan_keyword("imageresolution
")) { do_variable_backend_int(c_pdf_image_resolution); }
2312 else if (scan_keyword("pkresolution
")) { do_variable_backend_int(c_pdf_pk_resolution); }
2313 else if (scan_keyword("uniqueresname
")) { do_variable_backend_int(c_pdf_unique_resname); }
2314 else if (scan_keyword("minorversion
")) { do_variable_backend_int(c_pdf_minor_version); }
2315 else if (scan_keyword("pagebox
")) { do_variable_backend_int(c_pdf_pagebox); }
2316 else if (scan_keyword("inclusionerrorlevel
")) { do_variable_backend_int(c_pdf_inclusion_errorlevel); }
2317 else if (scan_keyword("ignoreunknownimages
")) { do_variable_backend_int(c_pdf_ignore_unknown_images); }
2318 else if (scan_keyword("gamma
")) { do_variable_backend_int(c_pdf_gamma); }
2319 else if (scan_keyword("imageapplygamma
")) { do_variable_backend_int(c_pdf_image_apply_gamma); }
2320 else if (scan_keyword("imagegamma
")) { do_variable_backend_int(c_pdf_image_gamma); }
2321 else if (scan_keyword("imagehicolor
")) { do_variable_backend_int(c_pdf_image_hicolor); }
2322 else if (scan_keyword("imageaddfilename
")) { do_variable_backend_int(c_pdf_image_addfilename); }
2323 else if (scan_keyword("objcompresslevel
")) { do_variable_backend_int(c_pdf_obj_compress_level); }
2324 else if (scan_keyword("inclusioncopyfonts
")) { do_variable_backend_int(c_pdf_inclusion_copy_font); }
2325 else if (scan_keyword("gentounicode
")) { do_variable_backend_int(c_pdf_gen_tounicode); }
2326 else if (scan_keyword("pkfixeddpi
")) { do_variable_backend_int(c_pdf_pk_fixed_dpi); }
2327 else if (scan_keyword("suppressoptionalinfo
")) { do_variable_backend_int(c_pdf_suppress_optional_info); }
2328 else if (scan_keyword("omitcidset
")) { do_variable_backend_int(c_pdf_omit_cidset); }
2330 else if (scan_keyword("horigin
")) { do_variable_backend_dimen(d_pdf_h_origin); }
2331 else if (scan_keyword("vorigin
")) { do_variable_backend_dimen(d_pdf_v_origin); }
2332 else if (scan_keyword("threadmargin
")) { do_variable_backend_dimen(d_pdf_thread_margin); }
2333 else if (scan_keyword("destmargin
")) { do_variable_backend_dimen(d_pdf_dest_margin); }
2334 else if (scan_keyword("linkmargin
")) { do_variable_backend_dimen(d_pdf_link_margin); }
2335 else if (scan_keyword("xformmargin
")) { do_variable_backend_dimen(d_pdf_xform_margin); }
2337 else if (scan_keyword("pageattr
")) { do_variable_backend_toks(t_pdf_page_attr); }
2338 else if (scan_keyword("pageresources
")) { do_variable_backend_toks(t_pdf_page_resources); }
2339 else if (scan_keyword("pagesattr
")) { do_variable_backend_toks(t_pdf_pages_attr); }
2340 else if (scan_keyword("xformattr
")) { do_variable_backend_toks(t_pdf_xform_attr); }
2341 else if (scan_keyword("xformresources
")) { do_variable_backend_toks(t_pdf_xform_resources); }
2342 else if (scan_keyword("pkmode
")) { do_variable_backend_toks(t_pdf_pk_mode); }
2343 else if (scan_keyword("trailerid
")) { do_variable_backend_toks(t_pdf_trailer_id); }
2350 static int do_feedback_dvi(halfword c)
2355 /* codes not really needed but cleaner when testing */
2357 #define pdftex_version 140 /* these values will not change any more */
2358 #define pdftex_revision "0" /* these values will not change any more */
2360 static int do_feedback_pdf(halfword c)
2362 int old_setting; /* holds |selector| setting */
2363 int save_scanner_status; /* |scanner_status| upon entry */
2364 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2365 halfword save_warning_index;
2366 boolean bool; /* temp boolean */
2367 str_number s; /* first temp string */
2368 int ff; /* for use with |set_ff| */
2369 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2370 char *str; /* color stack init str */
2372 if (scan_keyword("lastlink
")) {
2374 print_int(pdf_last_link);
2376 } else if (scan_keyword("retval
")) {
2378 print_int(pdf_retval);
2380 } else if (scan_keyword("lastobj
")) {
2382 print_int(pdf_last_obj);
2384 } else if (scan_keyword("lastannot
")) {
2386 print_int(pdf_last_annot);
2388 } else if (scan_keyword("xformname
")) {
2390 check_obj_type(static_pdf, obj_type_xform, cur_val);
2392 print_int(obj_info(static_pdf, cur_val));
2394 } else if (scan_keyword("creationdate
")) {
2395 ins_list(string_to_toks(getcreationdate(static_pdf)));
2396 /* no further action */
2398 } else if (scan_keyword("fontname
")) {
2400 if (cur_val == null_font)
2401 normal_error("pdf backend
", "invalid font identifier when asking 'fontname'
");
2402 pdf_check_vf(cur_val);
2403 if (!font_used(cur_val))
2404 pdf_init_font(static_pdf, cur_val);
2407 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2409 } else if (scan_keyword("fontobjnum
")) {
2411 if (cur_val == null_font)
2412 normal_error("pdf backend
", "invalid font identifier when asking 'objnum'
");
2413 pdf_check_vf(cur_val);
2414 if (!font_used(cur_val))
2415 pdf_init_font(static_pdf, cur_val);
2418 print_int(pdf_font_num(ff));
2420 } else if (scan_keyword("fontsize
")) {
2422 if (cur_val == null_font)
2423 normal_error("pdf backend
", "invalid font identifier when asking 'fontsize'
");
2425 print_scaled(font_size(cur_val));
2428 } else if (scan_keyword("pageref
")) {
2431 normal_error("pdf backend
", "invalid page number when asking 'pageref'
");
2433 print_int(pdf_get_obj(static_pdf, obj_type_page, cur_val, false));
2435 } else if (scan_keyword("colorstackinit
")) {
2436 bool = scan_keyword("page
");
2437 if (scan_keyword("direct
"))
2438 cur_val = direct_always;
2439 else if (scan_keyword("page
"))
2440 cur_val = direct_page;
2441 else if (scan_keyword("raw
"))
2442 cur_val = direct_raw;
2444 cur_val = set_origin;
2445 save_scanner_status = scanner_status;
2446 save_warning_index = warning_index;
2447 save_def_ref = def_ref;
2448 u = save_cur_string();
2449 scan_toks(false, true);
2450 s = tokens_to_string(def_ref);
2451 delete_token_ref(def_ref);
2452 def_ref = save_def_ref;
2453 warning_index = save_warning_index;
2454 scanner_status = save_scanner_status;
2455 str = makecstring(s);
2456 cur_val = newcolorstack(str, cur_val, bool);
2459 cur_val_level = int_val_level;
2461 print_err("Too many color stacks
");
2462 help2("The number of color stacks is limited to
32768.
",
2463 "I'll use the default color stack
0 here.
");
2466 restore_cur_string(u);
2471 } else if (scan_keyword("version
")) {
2473 print_int(pdftex_version);
2475 } else if (scan_keyword("revision
")) {
2476 ins_list(string_to_toks(pdftex_revision));
2484 void conv_toks(void)
2486 int old_setting; /* holds |selector| setting */
2488 int save_scanner_status; /* |scanner_status| upon entry */
2489 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
2490 halfword save_warning_index;
2491 boolean bool; /* temp boolean */
2492 str_number s; /* first temp string */
2493 int sn; /* lua chunk name */
2494 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
2495 int c = cur_chr; /* desired type of conversion */
2498 /* Scan the argument for command |c| */
2506 case lua_function_code:
2509 normal_error("luafunction
", "invalid number
");
2511 u = save_cur_string();
2513 luafunctioncall(cur_val);
2514 restore_cur_string(u);
2515 if (luacstrings > 0)
2518 /* no further action */
2522 u = save_cur_string();
2523 save_scanner_status = scanner_status;
2524 save_def_ref = def_ref;
2525 save_warning_index = warning_index;
2526 sn = scan_lua_state();
2527 scan_toks(false, true);
2529 warning_index = save_warning_index;
2530 def_ref = save_def_ref;
2531 scanner_status = save_scanner_status;
2533 luatokencall(s, sn);
2534 delete_token_ref(s);
2535 restore_cur_string(u); /* TODO: check this, was different */
2536 if (luacstrings > 0)
2538 /* no further action */
2542 save_scanner_status = scanner_status;
2543 save_warning_index = warning_index;
2544 save_def_ref = def_ref;
2545 u = save_cur_string();
2546 scan_toks(false, true);
2547 warning_index = save_warning_index;
2548 scanner_status = save_scanner_status;
2549 ins_list(token_link(def_ref));
2550 def_ref = save_def_ref;
2551 restore_cur_string(u);
2552 /* no further action */
2555 case math_style_code:
2561 save_scanner_status = scanner_status;
2562 scanner_status = normal;
2564 scanner_status = save_scanner_status;
2572 case cs_string_code:
2573 save_scanner_status = scanner_status;
2574 scanner_status = normal;
2576 scanner_status = save_scanner_status;
2579 sprint_cs_name(cur_cs);
2584 case roman_numeral_code:
2587 print_roman_int(cur_val);
2591 save_scanner_status = scanner_status;
2592 scanner_status = normal;
2594 scanner_status = save_scanner_status;
2605 case lua_escape_string_code:
2609 save_scanner_status = scanner_status;
2610 save_def_ref = def_ref;
2611 save_warning_index = warning_index;
2612 scan_toks(false, true);
2613 bool = in_lua_escape;
2614 in_lua_escape = true;
2615 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
2616 escstr.l = (unsigned) l;
2617 in_lua_escape = bool;
2618 delete_token_ref(def_ref);
2619 def_ref = save_def_ref;
2620 warning_index = save_warning_index;
2621 scanner_status = save_scanner_status;
2622 (void) lua_str_toks(escstr);
2623 ins_list(token_link(temp_token_head));
2627 /* no further action */
2635 case font_name_code:
2638 append_string((unsigned char *) font_name(cur_val),(unsigned) strlen(font_name(cur_val)));
2639 if (font_size(cur_val) != font_dsize(cur_val)) {
2641 print_scaled(font_size(cur_val));
2646 case left_margin_kern_code:
2648 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2649 normal_error("marginkern
", "a non-empty hbox expected
");
2651 p = list_ptr(box(cur_val));
2652 while ((p != null) && (type(p) == glue_node)) {
2655 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == left_side))
2656 print_scaled(width(p));
2662 case right_margin_kern_code:
2664 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
2665 normal_error("marginkern
", "a non-empty hbox expected
");
2667 p = list_ptr(box(cur_val));
2669 p = tail_of_list(p);
2671 there can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting
2672 node that points to glue spec ... and we don't want to analyze that messy lot
2674 while ((p != null) && (type(p) == glue_node)) {
2677 if ((p != null) && ! ((type(p) == margin_kern_node) && (subtype(p) == right_side))) {
2678 if (type(p) == disc_node) {
2680 if ((q != null) && ((type(q) == margin_kern_node) && (subtype(q) == right_side))) {
2684 officially we should look in the replace but currently protrusion doesn't
2685 work anyway with "foo\discretionary
{}{}{bar-
} " (no following char) so we
2692 if ((p != null) && (type(p) == margin_kern_node) && (subtype(p) == right_side))
2693 print_scaled(width(p));
2699 case uniform_deviate_code:
2702 print_int(unif_rand(cur_val));
2705 case normal_deviate_code:
2707 print_int(norm_rand());
2710 case math_char_class_code:
2714 mval = get_math_code(cur_val);
2716 print_int(mval.class_value);
2720 case math_char_fam_code:
2724 mval = get_math_code(cur_val);
2726 print_int(mval.family_value);
2730 case math_char_slot_code:
2734 mval = get_math_code(cur_val);
2736 print_int(mval.character_value);
2740 case insert_ht_code:
2741 scan_register_num();
2745 while (i >= subtype(vlink(p)))
2747 if (subtype(p) == i)
2748 print_scaled(height(p));
2761 case format_name_code:
2768 case luatex_banner_code:
2770 tprint(luatex_banner);
2773 case luatex_revision_code:
2775 print(get_luatexrevision());
2778 case luatex_date_code:
2780 print_int(get_luatex_date_info());
2785 tprint(eTeX_version_string);
2788 case eTeX_revision_code:
2790 tprint(eTeX_revision);
2793 case font_identifier_code:
2794 confusion("convert
");
2797 confusion("convert
");
2800 str = make_string();
2801 (void) str_toks(str_lstring(str));
2803 ins_list(token_link(temp_token_head));
2806 void do_feedback(void)
2812 case dvi_feedback_code:
2813 if (get_o_mode() == OMODE_DVI) {
2814 done = do_feedback_dvi(c);
2816 tex_error("unexpected use of \\dvifeedback
",null);
2821 normal_warning("dvi backend
","unexpected use of \\dvifeedback
");
2823 } else if (done==2) {
2827 case pdf_feedback_code:
2828 if (get_o_mode() == OMODE_PDF) {
2829 done = do_feedback_pdf(c);
2831 tex_error("unexpected use of \\pdffeedback
",null);
2836 normal_warning("pdf backend
","unexpected use of \\pdffeedback
");
2838 } else if (done==2) {
2843 confusion("feedback
");
2846 str = make_string();
2847 (void) str_toks(str_lstring(str));
2849 ins_list(token_link(temp_token_head));
2852 void do_variable(void)
2857 case dvi_variable_code:
2858 done = do_variable_dvi(c);
2861 normal_warning("dvi backend
","unexpected use of \\dvivariable
");
2865 case pdf_variable_code:
2866 done = do_variable_pdf(c);
2869 normal_warning("pdf backend
","unexpected use of \\pdfvariable
");
2874 confusion("variable
");
2880 The following code is not used as we can only set math options and not query them. If
2881 an option is really important we will provide a proper variable. Most options are not
2882 meant for users anyway but for development.
2887 #define do_mathoption_int(i) \
2888 cur_cmd = assign_int_cmd; \
2889 cur_val = mathoption_int_base + i; \
2890 cur_tok = token_val(cur_cmd, cur_val); \
2893 void do_mathoption(void)
2895 if (scan_keyword("old
")) { do_mathoption_int(c_mathoption_no_italic_compensation_code); }
2896 if (scan_keyword("noitaliccompensation
")) { do_mathoption_int(c_mathoption_no_char_italic_code); }
2897 else if (scan_keyword("nocharitalic
")) { do_mathoption_int(c_mathoption_use_old_fraction_scaling_code); }
2898 else if (scan_keyword("useoldfractionscaling
")) { do_mathoption_int(c_mathoption_old_code); }
2899 else if (scan_keyword("umathcodemeaning
")) { do_mathoption_int(c_mathoption_umathcode_meaning_code); }
2904 @ This boolean is keeping track of the lua string escape state
2906 boolean in_lua_escape;
2908 static int the_convert_string_dvi(halfword c, int i)
2913 static int the_convert_string_pdf(halfword c, int i)
2916 if (get_o_mode() != OMODE_PDF) {
2918 } else if (scan_keyword("lastlink
")) {
2919 print_int(pdf_last_link);
2920 } else if (scan_keyword("retval
")) {
2921 print_int(pdf_retval);
2922 } else if (scan_keyword("lastobj
")) {
2923 print_int(pdf_last_obj);
2924 } else if (scan_keyword("lastannot
")) {
2925 print_int(pdf_last_annot);
2926 } else if (scan_keyword("xformname
")) {
2927 print_int(obj_info(static_pdf, i));
2928 } else if (scan_keyword("creationdate
")) {
2930 } else if (scan_keyword("fontname
")) {
2932 print_int(obj_info(static_pdf, pdf_font_num(ff)));
2933 } else if (scan_keyword("fontobjnum
")) {
2935 print_int(pdf_font_num(ff));
2936 } else if (scan_keyword("fontsize
")) {
2937 print_scaled(font_size(i));
2939 } else if (scan_keyword("pageref
")) {
2940 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
2941 } else if (scan_keyword("colorstackinit
")) {
2949 str_number the_convert_string(halfword c, int i)
2951 int old_setting; /* saved |selector| setting */
2953 boolean done = true ;
2954 old_setting = selector;
2955 selector = new_string;
2960 /* case lua_function_code: */
2961 /* case lua_code: */
2962 /* case expanded_code: */
2963 case math_style_code:
2966 /* case string_code: */
2967 /* case cs_string_code: */
2968 case roman_numeral_code:
2971 /* case meaning_code: */
2975 /* lua_escape_string_code: */
2979 case font_name_code:
2980 append_string((unsigned char *) font_name(i),(unsigned) strlen(font_name(i)));
2981 if (font_size(i) != font_dsize(i)) {
2983 print_scaled(font_size(i));
2987 /* left_margin_kern_code: */
2988 /* right_margin_kern_code: */
2989 case uniform_deviate_code:
2990 print_int(unif_rand(i));
2992 case normal_deviate_code:
2993 print_int(norm_rand());
2995 /* math_char_class_code: */
2996 /* math_char_fam_code: */
2997 /* math_char_slot_code: */
2998 /* insert_ht_code: */
3002 case format_name_code:
3005 case luatex_banner_code:
3006 tprint(luatex_banner);
3008 case luatex_revision_code:
3009 print(get_luatexrevision());
3011 case luatex_date_code:
3012 print_int(get_luatex_date_info());
3015 tprint(eTeX_version_string);
3017 case eTeX_revision_code:
3018 tprint(eTeX_revision);
3020 case font_identifier_code:
3021 print_font_identifier(i);
3023 /* backend: this might become obsolete */
3024 case dvi_feedback_code:
3025 done = the_convert_string_dvi(c,i);
3027 case pdf_feedback_code:
3028 done = the_convert_string_pdf(c,i);
3036 ret = make_string();
3038 selector = old_setting;
3042 @ Another way to create a token list is via the \.{\\read} command. The sixteen
3043 files potentially usable for reading appear in the following global variables.
3044 The value of |read_open[n]| will be |closed| if stream number |n| has not been
3045 opened or if it has been fully read; |just_open| if an \.{\\openin} but not a
3046 \.{\\read} has been done; and |normal| if it is open and ready to read the next
3050 FILE *read_file[16]; /* used for \.{\\read} */
3051 int read_open[17]; /* state of |read_file[n]| */
3053 void initialize_read(void)
3056 for (k = 0; k <= 16; k++)
3057 read_open[k] = closed;
3060 @ The |read_toks| procedure constructs a token list like that for any macro
3061 definition, and makes |cur_val| point to it. Parameter |r| points to the control
3062 sequence that will receive this token list.
3065 void read_toks(int n, halfword r, halfword j)
3067 halfword p; /* tail of the token list */
3068 halfword q; /* new node being added to the token list via |store_new_token| */
3069 int s; /* saved value of |align_state| */
3070 int m; /* stream number */
3071 scanner_status = defining;
3075 set_token_ref_count(def_ref, 0);
3076 p = def_ref; /* the reference count */
3077 store_new_token(end_match_token);
3078 if ((n < 0) || (n > 15))
3083 align_state = 1000000; /* disable tab marks, etc. */
3085 /* Input and store tokens from the next line of the file */
3086 begin_file_reading();
3088 if (read_open[m] == closed) {
3090 Input for \.{\\read} from the terminal
3092 Here we input on-line into the |buffer| array, prompting the user explicitly
3093 if |n>=0|. The value of |n| is set negative so that additional prompts
3094 will not be given in the case of multi-line input.
3096 if (interaction > nonstop_mode) {
3108 ("*** (cannot \\read from terminal in nonstop modes
)");
3111 } else if (read_open[m] == just_open) {
3113 Input the first line of |read_file[m]|
3115 The first line of a file must be treated specially, since |lua_input_ln|
3116 must be told not to start with |get|.
3118 if (lua_input_ln(read_file[m], (m + 1), false)) {
3119 read_open[m] = normal;
3121 lua_a_close_in(read_file[m], (m + 1));
3122 read_open[m] = closed;
3127 Input the next line of |read_file[m]|
3129 An empty line is appended at the end of a |read_file|.
3131 if (!lua_input_ln(read_file[m], (m + 1), true)) {
3132 lua_a_close_in(read_file[m], (m + 1));
3133 read_open[m] = closed;
3134 if (align_state != 1000000) {
3136 print_err("File ended within \\read
");
3137 help1("This \\read has unbalanced braces.
");
3138 align_state = 1000000;
3145 if (end_line_char_inactive)
3148 buffer[ilimit] = (packed_ASCII_code) end_line_char_par;
3152 /* Handle \.{\\readline} and |goto done|; */
3154 while (iloc <= ilimit) {
3155 /* current line not yet finished */
3156 do_buffer_to_unichar(cur_chr, iloc);
3158 cur_tok = space_token;
3160 cur_tok = cur_chr + other_token;
3161 store_new_token(cur_tok);
3167 /* |cur_cmd=cur_chr=0| will occur at the end of the line */
3170 if (align_state < 1000000) {
3171 /* unmatched `\.\}' aborts the line */
3174 } while (cur_tok != 0);
3175 align_state = 1000000;
3178 store_new_token(cur_tok);
3183 } while (align_state != 1000000);
3185 scanner_status = normal;
3189 @ return a string from tokens list
3192 str_number tokens_to_string(halfword p)
3195 if (selector == new_string)
3196 normal_error("tokens
","tokens_to_string
() called while selector
= new_string
");
3197 old_setting = selector;
3198 selector = new_string;
3199 show_token_list(token_link(p), null, -1);
3200 selector = old_setting;
3201 return make_string();
3205 #define make_room(a) \
3206 if ((unsigned)i+a+1>alloci) { \
3207 ret = xrealloc(ret,(alloci+64)); \
3208 alloci = alloci + 64; \
3211 #define append_i_byte(a) ret[i++] = (char)(a)
3213 #define Print_char(a) make_room(1); append_i_byte(a)
3215 #define Print_uchar(s) { \
3219 } else if (s<=0x7FF) { \
3220 append_i_byte(0xC0 + (s / 0x40)); \
3221 append_i_byte(0x80 + (s % 0x40)); \
3222 } else if (s<=0xFFFF) { \
3223 append_i_byte(0xE0 + (s / 0x1000)); \
3224 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
3225 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
3226 } else if (s>=0x110000) { \
3227 append_i_byte(s-0x11000); \
3229 append_i_byte(0xF0 + (s / 0x40000)); \
3230 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
3231 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
3232 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
3235 #define Print_esc(b) { \
3236 const char *v = b; \
3237 if (e>0 && e<STRING_OFFSET) { \
3240 make_room(strlen(v)); \
3241 while (*v) { append_i_byte(*v); v++; } \
3244 #define Print_str(b) { \
3245 const char *v = b; \
3246 make_room(strlen(v)); \
3247 while (*v) { append_i_byte(*v); v++; } \
3250 #define is_cat_letter(a) \
3251 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
3253 @ the actual token conversion in this function is now functionally equivalent to
3254 |show_token_list|, except that it always prints the whole token list. TODO: check
3255 whether this causes problems in the lua library.
3258 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
3260 register int p, c, m;
3266 int match_chr = '#';
3268 unsigned alloci = 1024;
3276 ret = xmalloc(alloci);
3277 p = token_link(p); /* skip refcount */
3279 e = escape_char_par;
3282 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
3283 Print_esc("CLOBBERED.
");
3286 infop = token_info(p);
3287 if (infop >= cs_token_flag) {
3288 if (!(inhibit_par && infop == par_token)) {
3289 q = infop - cs_token_flag;
3290 if (q < hash_base) {
3292 Print_esc("csname
");
3293 Print_esc("endcsname
");
3295 Print_esc("IMPOSSIBLE.
");
3297 } else if ((q >= undefined_control_sequence) && ((q <= eqtb_size) || (q > eqtb_size + hash_extra))) {
3298 Print_esc("IMPOSSIBLE.
");
3299 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
3300 Print_esc("NONEXISTENT.
");
3302 str_number txt = cs_text(q);
3303 sh = makecstring(txt);
3305 if (is_active_cs(txt)) {
3312 if (e>=0 && e<0x110000) Print_uchar(e);
3317 if ((!single_letter(txt)) || is_cat_letter(txt)) {
3328 m = token_cmd(infop);
3329 c = token_chr(infop);
3331 case left_brace_cmd:
3332 case right_brace_cmd:
3333 case math_shift_cmd:
3339 case other_char_cmd:
3343 if (!in_lua_escape && (is_in_csname==0))
3348 Print_uchar(match_chr);
3350 Print_char(c + '0');
3371 not_so_bad(Print_esc);
3386 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
3389 lstring *ret = xmalloc(sizeof(lstring));
3390 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
3391 ret->l = (size_t) siz;
3396 void free_lstring(lstring * ls)