3 % Copyright
2006-2011 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
21 static const char _svn_version
[] =
28 #define pausing int_par
(pausing_code
)
29 #define cat_code_table int_par
(cat_code_table_code
)
30 #define tracing_nesting int_par
(tracing_nesting_code
)
31 #define suppress_outer_error int_par
(suppress_outer_error_code
)
33 #define every_eof equiv
(every_eof_loc
)
34 #define box
(A
) equiv
(box_base
+(A
))
36 #define detokenized_line
() (line_catcode_table
==NO_CAT_TABLE
)
38 #define do_get_cat_code
(a
,b
) do
{ \
39 if
(line_catcode_table
!=DEFAULT_CAT_TABLE
) \
40 a
=get_cat_code
(line_catcode_table
,b
); \
42 a
=get_cat_code
(cat_code_table
,b
); \
46 @ The \TeX\ system does nearly all of its own memory allocation
, so that it
47 can readily be transported into environments that do not have automatic
48 facilities for strings
, garbage collection
, etc.
, and so that it can be in
49 control of what error messages the user receives. The dynamic storage
50 requirements of \TeX\ are handled by providing two large arrays called
51 |fixmem| and |varmem| in which consecutive blocks of words are used as
52 nodes by the \TeX\ routines.
54 Pointer variables are indices into this array
, or into another array
55 called |eqtb| that will be explained later. A pointer variable might
56 also be a special flag that lies outside the bounds of |mem|
, so we
57 allow pointers to assume any |halfword| value. The minimum halfword
58 value represents a null pointer. \TeX\ does not assume that |mem
[null
]| exists.
62 @ Locations in |fixmem| are used for storing one-word records
; a conventional
63 \.
{AVAIL} stack is used for allocation in this array.
66 smemory_word
*fixmem
; /* the big dynamic storage area
*/
67 unsigned fix_mem_min
; /* the smallest location of one-word memory in use
*/
68 unsigned fix_mem_max
; /* the largest location of one-word memory in use
*/
71 @ In order to study the memory requirements of particular applications
, it
72 is possible to prepare a version of \TeX\ that keeps track of current and
73 maximum memory usage. When code between the delimiters |@
!stat| $\ldots$
74 |tats| is not ``commented out
,'' \TeX\ will run a bit slower but it will
75 report these statistics when |tracing_stats| is sufficiently large.
78 int var_used
, dyn_used
; /* how much memory is in use
*/
80 halfword avail
; /* head of the list of available one-word nodes
*/
81 unsigned fix_mem_end
; /* the last one-word node used in |mem|
*/
83 halfword garbage
; /* head of a junk list
, write only
*/
84 halfword temp_token_head
; /* head of a temporary list of some kind
*/
85 halfword hold_token_head
; /* head of a temporary list of another kind
*/
86 halfword omit_template
; /* a constant token list
*/
87 halfword null_list
; /* permanently empty list
*/
88 halfword backup_head
; /* head of token list built by |scan_keyword|
*/
91 void initialize_tokens
(void
)
98 set_token_info
(temp_token_head
, 0);
101 set_token_info
(hold_token_head
, 0);
104 set_token_info
(omit_template
, 0);
107 set_token_info
(null_list
, 0);
110 set_token_info
(backup_head
, 0);
113 set_token_info
(garbage
, 0);
114 dyn_used
= 0; /* initialize statistics
*/
117 @ The function |get_avail| returns a pointer to a new one-word node whose
118 |link| field is null. However
, \TeX\ will halt if there is no more room left.
121 If the available-space list is empty
, i.e.
, if |avail
=null|
,
122 we try first to increase |fix_mem_end|. If that cannot be done
, i.e.
, if
123 |fix_mem_end
=fix_mem_max|
, we try to reallocate array |fixmem|.
124 If
, that doesn't work
, we have to quit.
127 halfword get_avail
(void
)
128 { /* single-word node allocation
*/
129 unsigned p
; /* the new node being got
*/
131 p
= (unsigned
) avail
; /* get top location in the |avail| stack
*/
133 avail
= token_link
(avail
); /* and pop it off
*/
134 } else if
(fix_mem_end
< fix_mem_max
) { /* or go into virgin territory
*/
138 smemory_word
*new_fixmem
; /* the big dynamic storage area
*/
139 t
= (fix_mem_max
/ 5);
142 (fixmem
, sizeof
(smemory_word
) * (fix_mem_max
+ t
+ 1)));
143 if
(new_fixmem
== NULL) {
144 runaway
(); /* if memory is exhausted
, display possible runaway text
*/
145 overflow
("token memory size", fix_mem_max
);
149 memset
(voidcast
(fixmem
+ fix_mem_max
+ 1), 0, t
* sizeof
(smemory_word
));
153 token_link
(p
) = null
; /* provide an oft-desired initialization of the new node
*/
154 incr
(dyn_used
); /* maintain statistics
*/
159 @ The procedure |flush_list
(p
)| frees an entire linked list of
160 one-word nodes that starts at position |p|.
164 void flush_list
(halfword p
)
165 { /* makes list of single-word nodes available
*/
166 halfword q
, r
; /* list traversers
*/
173 } while
(r
!= null
); /* now |q| is the last node on the list
*/
174 token_link
(q
) = avail
;
179 @ A \TeX\ token is either a character or a control sequence
, and it is
181 represented internally in one of two ways
: (1)~A character whose ASCII
182 code number is |c| and whose command code is |m| is represented as the
183 number $
2^
{21}m
+c$
; the command code is in the range |
1<=m
<=14|.
(2)~A control
184 sequence whose |eqtb| address is |p| is represented as the number
185 |cs_token_flag
+p|. Here |cs_token_flag
=@t$
2^
{25}-1$@
>| is larger than
186 $
2^
{21}m
+c$
, yet it is small enough that |cs_token_flag
+p
< max_halfword|
;
187 thus
, a token fits comfortably in a halfword.
189 A token |t| represents a |left_brace| command if and only if
190 |t
<left_brace_limit|
; it represents a |right_brace| command if and only if
191 we have |left_brace_limit
<=t
<right_brace_limit|
; and it represents a |match| or
192 |end_match| command if and only if |match_token
<=t
<=end_match_token|.
193 The following definitions take care of these token-oriented constants
196 @ A token list is a singly linked list of one-word nodes in |mem|
, where
197 each word contains a token and a link. Macro definitions
, output-routine
198 definitions
, marks
, \.
{\\write
} texts
, and a few other things
199 are remembered by \TeX\ in the form
200 of token lists
, usually preceded by a node with a reference count in its
201 |token_ref_count| field. The token stored in location |p| is called
204 Three special commands appear in the token lists of macro definitions.
205 When |m
=match|
, it means that \TeX\ should scan a parameter
206 for the current macro
; when |m
=end_match|
, it means that parameter
207 matching should end and \TeX\ should start reading the macro text
; and
208 when |m
=out_param|
, it means that \TeX\ should insert parameter
209 number |c| into the text at this point.
211 The enclosing \.
{\char'
173} and \.
{\char'
175} characters of a macro
212 definition are omitted
, but the final right brace of an output routine
213 is included at the end of its token list.
215 Here is an example macro definition that illustrates these conventions.
216 After \TeX\ processes the text
217 $$\.
{\\def\\mac a\#
1\#
2 \\b \
{\#
1\\
-a \#\#
1\#
2 \#
2\
}}$$
218 the definition of \.
{\\mac
} is represented as a token list containing
220 \vbox
{\halign
{\hfil#\hfil\cr
221 (reference count
), |letter|\
,\.a
, |match|\
,\#
, |match|\
,\#
, |spacer|\
,\.\
,
222 \.
{\\b
}, |end_match|
,\cr
223 |out_param|\
,1, \.
{\\
-}, |letter|\
,\.a
, |spacer|\
,\.\
, |mac_param|\
,\#
,
224 |other_char|\
,\
.1,\cr
225 |out_param|\
,2, |spacer|\
,\.\
, |out_param|\
,2.\cr
}}$$
226 The procedure |scan_toks| builds such token lists
, and |macro_call|
227 does the parameter matching.
231 $$\.
{\\def\\m\
{\\def\\m\
{a\
}\ b\
}}$$
232 explain why reference counts would be needed even if \TeX\ had no \.
{\\let
}
233 operation
: When the token list for \.
{\\m
} is being read
, the redefinition of
234 \.
{\\m
} changes the |eqtb| entry before the token list has been fully
235 consumed
, so we dare not simply destroy a token list when its
236 control sequence is being redefined.
238 If the parameter-matching part of a definition ends with `\.
{\#\
{}'
,
239 the corresponding token list will have `\.\
{' just before the `|end_match|'
240 and also at the very end. The first `\.\
{' is used to delimit the parameter
; the
241 second one keeps the first from disappearing.
243 The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in
244 symbolic form
, including the expansion of a macro or mark.
247 void print_meaning
(void
)
249 print_cmd_chr
((quarterword
) cur_cmd
, cur_chr
);
250 if
(cur_cmd
>= call_cmd
) {
255 /* Show the meaning of a mark node
*/
256 if
((cur_cmd
== top_bot_mark_cmd
) && (cur_chr < marks_code)) {
260 case first_mark_code
:
261 token_show
(first_mark
(0));
264 token_show
(bot_mark
(0));
266 case split_first_mark_code
:
267 token_show
(split_first_mark
(0));
269 case split_bot_mark_code
:
270 token_show
(split_bot_mark
(0));
273 token_show
(top_mark
(0));
281 @ The procedure |show_token_list|
, which prints a symbolic form of
282 the token list that starts at a given node |p|
, illustrates these
283 conventions. The token list being displayed should not begin with a reference
284 count. However
, the procedure is intended to be robust
, so that if the
285 memory links are awry or if |p| is not really a pointer to a token list
,
286 nothing catastrophic will happen.
288 An additional parameter |q| is also given
; this parameter is either null
289 or it points to a node in the token list where a certain magic computation
290 takes place that will be explained later.
(Basically
, |q| is non-null when
291 we are printing the two-line context information at the time of an error
292 message
; |q| marks the place corresponding to where the second line
295 For example
, if |p| points to the node containing the first \.a in the
296 token list above
, then |show_token_list| will print the string
297 $$\hbox
{`\.
{a\#
1\#
2\ \\b\
->\#
1\\
-a\ \#\#
1\#
2\ \#
2}'
;}$$
298 and if |q| points to the node containing the second \.a
,
299 the magic computation will be performed just before the second \.a is printed.
301 The generation will stop
, and `\.
{\\ETC.
}' will be printed
, if the length
302 of printing exceeds a given limit~|l|. Anomalous entries are printed in the
303 form of control sequences that are not followed by a blank space
, e.g.
,
304 `\.
{\\BAD.
}'
; this cannot be confused with actual control sequences because
305 a real control sequence named \.
{BAD
} would come out `\.
{\\BAD\
}'.
308 void show_token_list
(int p
, int q
, int l
)
310 int m
, c
; /* pieces of a token
*/
311 ASCII_code match_chr
; /* character used in a `|match|'
*/
312 ASCII_code n
; /* the highest parameter number
, as an ASCII digit
*/
318 while
((p
!= null
) && (tally < l)) {
320 /* Do magic computation
*/
323 /* Display token |p|
, and |return| if there are problems
*/
324 if
((p
< (int
) fix_mem_min
) ||
(p
> (int
) fix_mem_end
)) {
325 tprint_esc
("CLOBBERED.");
328 if
(token_info
(p
) >= cs_token_flag
) {
329 if
(!((inhibit_par_tokens
) && (token_info(p) == par_token)))
330 print_cs
(token_info
(p
) - cs_token_flag
);
332 m
= token_cmd
(token_info
(p
));
333 c
= token_chr
(token_info
(p
));
334 if
(token_info
(p
) < 0) {
337 /* Display the token $
(|m|
,|c|
)$
*/
338 /* The procedure usually ``learns'' the character code used for macro
339 parameters by seeing one in a |match| command before it runs into any
340 |out_param| commands.
*/
343 case right_brace_cmd
:
392 #define do_buffer_to_unichar
(a
,b
) do
{ \
393 a
= (halfword
)str2uni
(buffer
+b
); \
398 @ Here's the way we sometimes want to display a token list
, given a pointer
399 to its reference count
; the pointer may be null.
402 void token_show
(halfword p
)
405 show_token_list
(token_link
(p
), null
, 10000000);
410 @ |delete_token_ref|
, is called when
411 a pointer to a token list's reference count is being removed. This means
412 that the token list should disappear if the reference count was |null|
,
413 otherwise the count should be decreased by one.
417 void delete_token_ref
(halfword p
)
418 { /* |p| points to the reference count
419 of a token list that is losing one reference
*/
420 assert
(token_ref_count
(p
) >= 0);
421 if
(token_ref_count
(p
) == 0)
424 decr
(token_ref_count
(p
));
428 int get_char_cat_code
(int curchr
)
431 do_get_cat_code
(a
,curchr
);
436 static void invalid_character_error
(void
)
439 { "A funny symbol that I can't read has just been input.",
440 "Continue, and I'll forget that it ever happened.",
443 deletions_allowed
= false
;
444 tex_error
("Text line contains an invalid character", hlp
);
445 deletions_allowed
= true
;
449 static boolean process_sup_mark
(void
); /* below
*/
451 static int scan_control_sequence
(void
); /* below
*/
453 typedef enum
{ next_line_ok
, next_line_return
,
457 static next_line_retval next_line
(void
); /* below
*/
460 @ In case you are getting bored
, here is a slightly less trivial routine
:
461 Given a string of lowercase letters
, like `\.
{pt
}' or `\.
{plus
}' or
462 `\.
{width
}'
, the |scan_keyword| routine checks to see whether the next
463 tokens of input match this string. The match must be exact
, except that
464 uppercase letters will match their lowercase counterparts
; uppercase
465 equivalents are determined by subtracting |
"a"-"A"|
, rather than using the
466 |uc_code| table
, since \TeX\ uses this routine only for its own limited
469 If a match is found
, the characters are effectively removed from the input
470 and |true| is returned. Otherwise |false| is returned
, and the input
471 is left essentially unchanged
(except for the fact that some macros
472 may have been expanded
, etc.
).
476 boolean scan_keyword
(const char
*s
)
477 { /* look for a given string
*/
478 halfword p
; /* tail of the backup list
*/
479 halfword q
; /* new node being added to the token list via |store_new_token|
*/
480 const char
*k
; /* index into |str_pool|
*/
481 halfword save_cur_cs
= cur_cs
;
482 int saved_align_state
= align_state
;
483 assert
(strlen
(s
) > 1);
485 token_link
(p
) = null
;
488 get_x_token
(); /* recursion is possible here
*/
490 ((cur_chr
== *k
) ||
(cur_chr
== *k
- 'a'
+ 'A'
))) {
491 store_new_token
(cur_tok
);
493 } else if
((cur_cmd
!= spacer_cmd
) ||
(p
!= backup_head
)) {
494 if
(p
!= backup_head
) {
496 token_info
(q
) = cur_tok
;
497 token_link
(q
) = null
;
499 begin_token_list
(token_link
(backup_head
), backed_up
);
500 if
(cur_cmd
!= endv_cmd
)
501 align_state
= saved_align_state
;
505 cur_cs
= save_cur_cs
;
509 flush_list
(token_link
(backup_head
));
510 cur_cs
= save_cur_cs
;
511 if
(cur_cmd
!= endv_cmd
)
512 align_state
= saved_align_state
;
516 @ We can not return |undefined_control_sequence| under some conditions
517 (inside |shift_case|
, for example
). This needs thinking.
520 halfword active_to_cs
(int curchr
, int force
)
524 char
*utfbytes
= xmalloc
(10);
525 int nncs
= no_new_control_sequence
;
526 a
= (char
*) uni2str
(0xFFFF);
527 utfbytes
= strcpy
(utfbytes
, a
);
529 no_new_control_sequence
= false
;
531 b
= (char
*) uni2str
((unsigned
) curchr
);
532 utfbytes
= strcat
(utfbytes
, b
);
534 curcs
= string_lookup
(utfbytes
, strlen
(utfbytes
));
537 curcs
= string_lookup
(utfbytes
, 4);
539 no_new_control_sequence
= nncs
;
545 @ TODO this function should listen to \.
{\\escapechar
}
548 static char
*cs_to_string
(halfword p
)
549 { /* prints a control sequence
*/
553 static char ret
[256] = { 0 };
554 if
(p
== 0 || p
== null_cs
) {
568 str_number txt
= cs_text
(p
);
569 sh
= makecstring
(txt
);
571 if
(is_active_cs
(txt
)) {
589 @ TODO this is a quick hack
, will be solved differently soon
592 static char
*cmd_chr_to_string
(int cmd
, int chr
)
597 selector
= new_string
;
598 print_cmd_chr
((quarterword
) cmd
, chr
);
600 s
= makecstring
(str
);
606 @ The heart of \TeX's input mechanism is the |get_next| procedure
, which
607 we shall develop in the next few sections of the program. Perhaps we
608 shouldn't actually call it the ``heart
,'' however
, because it really acts
609 as \TeX's eyes and mouth
, reading the source files and gobbling them up.
610 And it also helps \TeX\ to regurgitate stored token lists that are to be
614 The main duty of |get_next| is to input one token and to set |cur_cmd|
615 and |cur_chr| to that token's command code and modifier. Furthermore
, if
616 the input token is a control sequence
, the |eqtb| location of that control
617 sequence is stored in |cur_cs|
; otherwise |cur_cs| is set to zero.
619 Underlying this simple description is a certain amount of complexity
620 because of all the cases that need to be handled.
621 However
, the inner loop of |get_next| is reasonably short and fast.
623 When |get_next| is asked to get the next token of a \.
{\\read
} line
,
624 it sets |cur_cmd
=cur_chr
=cur_cs
=0| in the case that no more tokens
625 appear on that line.
(There might not be any tokens at all
, if the
626 |end_line_char| has |ignore| as its catcode.
)
629 @ The value of |par_loc| is the |eqtb| address of `\.
{\\par
}'. This quantity
630 is needed because a blank line of input is supposed to be exactly equivalent
631 to the appearance of \.
{\\par
}; we must set |cur_cs
:=par_loc|
632 when detecting a blank line.
635 halfword par_loc
; /* location of `\.
{\\par
}' in |eqtb|
*/
636 halfword par_token
; /* token representing `\.
{\\par
}'
*/
639 @ Parts |get_next| are executed more often than any other instructions of \TeX.
640 @^mastication@
>@^inner loop@
>
644 @ The global variable |force_eof| is normally |false|
; it is set |true|
645 by an \.
{\\endinput
} command. |luacstrings| is the number of lua print
646 statements waiting to be input
, it is changed by |luatokencall|.
649 boolean force_eof
; /* should the next \.
{\\input
} be aborted early?
*/
650 int luacstrings
; /* how many lua strings are waiting to be input?
*/
653 @ If the user has set the |pausing| parameter to some positive value
,
654 and if nonstop mode has not been selected
, each line of input is displayed
655 on the terminal and the transcript file
, followed by `\.
{=>}'.
656 \TeX\ waits for a response. If the response is simply |carriage_return|
, the
657 line is accepted as it stands
, otherwise the line typed is
658 used instead of the line in the file.
661 void firm_up_the_line
(void
)
663 int k
; /* an index into |buffer|
*/
666 if
(interaction
> nonstop_mode
) {
669 if
(istart
< ilimit
) {
670 for
(k
= istart
; k
<= ilimit
- 1; k
++)
671 print_char
(buffer
[k
]);
674 prompt_input
("=>"); /* wait for user response
*/
676 for
(k
= first
; k
< +last
- 1; k
++) /* move line down in buffer
*/
677 buffer
[k
+ istart
- first
] = buffer
[k
];
678 ilimit
= istart
+ last
- first
;
686 @ Before getting into |get_next|
, let's consider the subroutine that
687 is called when an `\.
{\\outer
}' control sequence has been scanned or
688 when the end of a file has been reached. These two cases are distinguished
689 by |cur_cs|
, which is zero at the end of a file.
692 void check_outer_validity
(void
)
694 halfword p
; /* points to inserted token list
*/
695 halfword q
; /* auxiliary pointer
*/
696 if
(suppress_outer_error
)
698 if
(scanner_status
!= normal
) {
699 deletions_allowed
= false
;
700 /* Back up an outer control sequence so that it can be reread
; */
701 /* An outer control sequence that occurs in a \.
{\\read
} will not be reread
,
702 since the error recovery for \.
{\\read
} is not very powerful.
*/
704 if
((istate
== token_list
) ||
(iname
< 1) ||
(iname
> 17)) {
706 token_info
(p
) = cs_token_flag
+ cur_cs
;
707 begin_token_list
(p
, backed_up
); /* prepare to read the control sequence again
*/
709 cur_cmd
= spacer_cmd
;
710 cur_chr
= ' '
; /* replace it by a space
*/
712 if
(scanner_status
> skipping
) {
713 const char
*errhlp
[] =
714 { "I suspect you have forgotten a `}', causing me",
715 "to read past where you wanted me to stop.",
716 "I'll try to recover; but if the error is serious,",
717 "you'd better type `E' or `X' now and fix your file.",
721 const char
*startmsg
;
722 const char
*scannermsg
;
723 /* Tell the user what has run away and try to recover
*/
724 runaway
(); /* print a definition
, argument
, or preamble
*/
726 startmsg
= "File ended";
729 startmsg
= "Forbidden control sequence found";
731 /* Print either `\.
{definition
}' or `\.
{use
}' or `\.
{preamble
}' or `\.
{text
}'
,
732 and insert tokens that should lead to recovery
; */
733 /* The recovery procedure can't be fully understood without knowing more
734 about the \TeX\ routines that should be aborted
, but we can sketch the
735 ideas here
: For a runaway definition we will insert a right brace
; for a
736 runaway preamble
, we will insert a special \.
{\\cr
} token and a right
737 brace
; and for a runaway argument
, we will set |long_state| to
738 |outer_call| and insert \.
{\\par
}.
*/
740 switch
(scanner_status
) {
742 scannermsg
= "definition";
743 token_info
(p
) = right_brace_token
+ '
}'
;
747 token_info
(p
) = par_token
;
748 long_state
= outer_call_cmd
;
751 scannermsg
= "preamble";
752 token_info
(p
) = right_brace_token
+ '
}'
;
756 token_info
(p
) = cs_token_flag
+ frozen_cr
;
757 align_state
= -1000000;
761 token_info
(p
) = right_brace_token
+ '
}'
;
763 default
: /* can't happen
*/
764 scannermsg
= "unknown";
766 } /*there are no other cases
*/
767 begin_token_list
(p
, inserted
);
768 snprintf
(errmsg
, 255, "%s while scanning %s of %s",
769 startmsg
, scannermsg
, cs_to_string
(warning_index
));
770 tex_error
(errmsg
, errhlp
);
773 const char
*errhlp_no
[] =
774 { "The file ended while I was skipping conditional text.",
775 "This kind of error happens when you say `\\if...' and forget",
776 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
779 const char
*errhlp_cs
[] =
780 { "A forbidden control sequence occurred in skipped text.",
781 "This kind of error happens when you say `\\if...' and forget",
782 "the matching `\\fi'. I've inserted a `\\fi'; this might work.",
785 const char
**errhlp
= (const char
**) errhlp_no
;
791 ss
= cmd_chr_to_string
(if_test_cmd
, cur_if
);
792 snprintf
(errmsg
, 255,
793 "Incomplete %s; all text was ignored after line %d",
794 ss
, (int
) skip_line
);
796 /* Incomplete \\if...
*/
797 cur_tok
= cs_token_flag
+ frozen_fi
;
798 /* back up one inserted token and call |error|
*/
800 OK_to_interrupt
= false
;
802 token_type
= inserted
;
803 OK_to_interrupt
= true
;
804 tex_error
(errmsg
, errhlp
);
807 deletions_allowed
= true
;
812 static boolean get_next_file
(void
)
815 if
(iloc
<= ilimit
) { /* current line not yet finished
*/
816 do_buffer_to_unichar
(cur_chr
, iloc
);
819 if
(detokenized_line
()) {
820 cur_cmd
= (cur_chr
== ' ' ?
10 : 12);
822 do_get_cat_code
(cur_cmd
, cur_chr
);
825 Change state if necessary
, and |goto switch| if the current
826 character should be ignored
, or |goto reswitch| if the current
827 character changes to another
;
829 /* The following
48-way switch accomplishes the scanning quickly
, assuming
830 that a decent C compiler has translated the code. Note that the numeric
831 values for |mid_line|
, |skip_blanks|
, and |new_line| are spaced
832 apart from each other by |max_char_code
+1|
, so we can add a character's
833 command code to the state to get a single number that characterizes both.
835 switch
(istate
+ cur_cmd
) {
836 case mid_line
+ ignore_cmd
:
837 case skip_blanks
+ ignore_cmd
:
838 case new_line
+ ignore_cmd
:
839 case skip_blanks
+ spacer_cmd
:
840 case new_line
+ spacer_cmd
: /* Cases where character is ignored
*/
843 case mid_line
+ escape_cmd
:
844 case new_line
+ escape_cmd
:
845 case skip_blanks
+ escape_cmd
: /* Scan a control sequence ...
; */
846 istate
= (unsigned char
) scan_control_sequence
();
847 if
(cur_cmd
>= outer_call_cmd
)
848 check_outer_validity
();
850 case mid_line
+ active_char_cmd
:
851 case new_line
+ active_char_cmd
:
852 case skip_blanks
+ active_char_cmd
: /* Process an active-character
*/
853 cur_cs
= active_to_cs
(cur_chr
, false
);
854 cur_cmd
= eq_type
(cur_cs
);
855 cur_chr
= equiv
(cur_cs
);
857 if
(cur_cmd
>= outer_call_cmd
)
858 check_outer_validity
();
860 case mid_line
+ sup_mark_cmd
:
861 case new_line
+ sup_mark_cmd
:
862 case skip_blanks
+ sup_mark_cmd
: /* If this |sup_mark| starts
*/
863 if
(process_sup_mark
())
868 case mid_line
+ invalid_char_cmd
:
869 case new_line
+ invalid_char_cmd
:
870 case skip_blanks
+ invalid_char_cmd
: /* Decry the invalid character and |goto restart|
; */
871 invalid_character_error
();
872 return false
; /* because state may be |token_list| now
*/
874 case mid_line
+ spacer_cmd
: /* Enter |skip_blanks| state
, emit a space
; */
875 istate
= skip_blanks
;
878 case mid_line
+ car_ret_cmd
: /* Finish line
, emit a space
; */
879 /* When a character of type |spacer| gets through
, its character code is
880 changed to $\.
{"\ "}=040$. This means that the ASCII codes for tab and space
,
881 and for the space inserted at the end of a line
, will
882 be treated alike when macro parameters are being matched. We do this
883 since such characters are indistinguishable on most computer terminal displays.
886 cur_cmd
= spacer_cmd
;
889 case skip_blanks
+ car_ret_cmd
:
890 case mid_line
+ comment_cmd
:
891 case new_line
+ comment_cmd
:
892 case skip_blanks
+ comment_cmd
: /* Finish line
, |goto switch|
; */
896 case new_line
+ car_ret_cmd
: /* Finish line
, emit a \.
{\\par
}; */
899 cur_cmd
= eq_type
(cur_cs
);
900 cur_chr
= equiv
(cur_cs
);
901 if
(cur_cmd
>= outer_call_cmd
)
902 check_outer_validity
();
904 case skip_blanks
+ left_brace_cmd
:
905 case new_line
+ left_brace_cmd
:
906 istate
= mid_line
; /* fall through
*/
907 case mid_line
+ left_brace_cmd
:
910 case skip_blanks
+ right_brace_cmd
:
911 case new_line
+ right_brace_cmd
:
912 istate
= mid_line
; /* fall through
*/
913 case mid_line
+ right_brace_cmd
:
916 case mid_line
+ math_shift_cmd
:
917 case mid_line
+ tab_mark_cmd
:
918 case mid_line
+ mac_param_cmd
:
919 case mid_line
+ sub_mark_cmd
:
920 case mid_line
+ letter_cmd
:
921 case mid_line
+ other_char_cmd
:
924 case skip_blanks
+ math_shift
:
925 case skip_blanks
+ tab_mark
:
926 case skip_blanks
+ mac_param
:
927 case skip_blanks
+ sub_mark
:
928 case skip_blanks
+ letter
:
929 case skip_blanks
+ other_char
:
930 case new_line
+ math_shift
:
931 case new_line
+ tab_mark
:
932 case new_line
+ mac_param
:
933 case new_line
+ sub_mark
:
934 case new_line
+ letter
:
935 case new_line
+ other_char
:
947 Move to next line of file
,
948 or |goto restart| if there is no next line
,
949 or |return| if a \.
{\\read
} line has finished
;
952 next_line_retval r
= next_line
();
953 if
(r
== next_line_return
) {
955 } else if
(r
== next_line_restart
) {
966 #define is_hex
(a
) ((a
>='
0'
&&a<='9')||(a>='a'&&a<='f'))
968 #define add_nybble
(a
) do
{ \
969 if
(a
<='
9'
) cur_chr
=(cur_chr
<<4)+a-'
0'
; \
970 else cur_chr
=(cur_chr
<<4)+a-'a'
+10; \
973 #define hex_to_cur_chr do
{ \
974 if
(c
<='
9'
) cur_chr
=c-'
0'
; \
975 else cur_chr
=c-'a'
+10; \
979 #define four_hex_to_cur_chr do
{ \
981 add_nybble
(ccc
); add_nybble
(cccc
); \
984 #define five_hex_to_cur_chr do
{ \
985 four_hex_to_cur_chr
; \
989 #define six_hex_to_cur_chr do
{ \
990 five_hex_to_cur_chr
; \
991 add_nybble
(cccccc
); \
995 @ Notice that a code like \.
{\^\^
8} becomes \.x if not followed by a hex digit.
998 static boolean process_sup_mark
(void
)
1000 if
(cur_chr
== buffer
[iloc
]) {
1002 if
(iloc
< ilimit
) {
1003 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])
1004 && (cur_chr == buffer[iloc + 3])
1005 && (cur_chr == buffer[iloc + 4])
1006 && ((iloc + 10) <= ilimit)) {
1007 int ccc
, cccc
, ccccc
, cccccc
; /* constituents of a possible expanded code
*/
1008 c
= buffer
[iloc
+ 5];
1009 cc
= buffer
[iloc
+ 6];
1010 ccc
= buffer
[iloc
+ 7];
1011 cccc
= buffer
[iloc
+ 8];
1012 ccccc
= buffer
[iloc
+ 9];
1013 cccccc
= buffer
[iloc
+ 10];
1014 if
((is_hex
(c
)) && (is_hex(cc)) && (is_hex(ccc))
1016 && (is_hex(ccccc)) && (is_hex(cccccc))) {
1022 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])
1023 && (cur_chr == buffer[iloc + 3]) && ((iloc + 8) <= ilimit)) {
1024 int ccc
, cccc
, ccccc
; /* constituents of a possible expanded code
*/
1025 c
= buffer
[iloc
+ 4];
1026 cc
= buffer
[iloc
+ 5];
1027 ccc
= buffer
[iloc
+ 6];
1028 cccc
= buffer
[iloc
+ 7];
1029 ccccc
= buffer
[iloc
+ 8];
1030 if
((is_hex
(c
)) && (is_hex(cc)) && (is_hex(ccc))
1031 && (is_hex(cccc)) && (is_hex(ccccc))) {
1033 five_hex_to_cur_chr
;
1037 if
((cur_chr
== buffer
[iloc
+ 1]) && (cur_chr == buffer[iloc + 2])
1038 && ((iloc + 6) <= ilimit)) {
1039 int ccc
, cccc
; /* constituents of a possible expanded code
*/
1040 c
= buffer
[iloc
+ 3];
1041 cc
= buffer
[iloc
+ 4];
1042 ccc
= buffer
[iloc
+ 5];
1043 cccc
= buffer
[iloc
+ 6];
1044 if
((is_hex
(c
)) && (is_hex(cc)) && (is_hex(ccc))
1045 && (is_hex(cccc))) {
1047 four_hex_to_cur_chr
;
1051 c
= buffer
[iloc
+ 1];
1052 if
(c
< 0200) { /* yes we have an expanded char
*/
1054 if
(is_hex
(c
) && iloc <= ilimit) {
1062 cur_chr
= (c
< 0100 ? c
+ 0100 : c
- 0100);
1070 @ Control sequence names are scanned only when they appear in some line of
1071 a file
; once they have been scanned the first time
, their |eqtb| location
1072 serves as a unique identification
, so \TeX\ doesn't need to refer to the
1073 original name any more except when it prints the equivalent in symbolic form.
1075 The program that scans a control sequence has been written carefully
1076 in order to avoid the blowups that might otherwise occur if a malicious
1077 user tried something like `\.
{\\catcode\'
15=0}'. The algorithm might
1078 look at |buffer
[ilimit
+1]|
, but it never looks at |buffer
[ilimit
+2]|.
1080 If expanded characters like `\.
{\^\^A
}' or `\.
{\^\^df
}'
1081 appear in or just following
1082 a control sequence name
, they are converted to single characters in the
1083 buffer and the process is repeated
, slowly but surely.
1086 static boolean check_expanded_code
(int
*kk
); /* below
*/
1088 static int scan_control_sequence
(void
)
1090 int retval
= mid_line
;
1091 if
(iloc
> ilimit
) {
1092 cur_cs
= null_cs
; /* |state| is irrelevant in this case
*/
1094 register int cat
; /* |cat_code
(cur_chr
)|
, usually
*/
1097 do_buffer_to_unichar
(cur_chr
, k
);
1098 do_get_cat_code
(cat
, cur_chr
);
1099 if
(cat
!= letter_cmd || k
> ilimit
) {
1100 retval
= (cat
== spacer_cmd ? skip_blanks
: mid_line
);
1101 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1104 retval
= skip_blanks
;
1106 do_buffer_to_unichar
(cur_chr
, k
);
1107 do_get_cat_code
(cat
, cur_chr
);
1108 } while
(cat
== letter_cmd
&& k <= ilimit);
1110 if
(cat
== sup_mark_cmd
&& check_expanded_code(&k)) /* If an expanded...; */
1112 if
(cat
!= letter_cmd
) {
1114 if
(cur_chr
> 0xFFFF)
1116 if
(cur_chr
> 0x7FF)
1120 } /* now |k| points to first nonletter
*/
1122 cur_cs
= id_lookup
(iloc
, k
- iloc
);
1127 cur_cmd
= eq_type
(cur_cs
);
1128 cur_chr
= equiv
(cur_cs
);
1132 @ Whenever we reach the following piece of code
, we will have
1133 |cur_chr
=buffer
[k-1
]| and |k
<=ilimit
+1| and |cat
=get_cat_code
(cat_code_table
,cur_chr
)|. If an
1134 expanded code like \.
{\^\^A
} or \.
{\^\^df
} appears in |buffer
[(k-1
)..
(k
+1)]|
1135 or |buffer
[(k-1
)..
(k
+2)]|
, we
1136 will store the corresponding code in |buffer
[k-1
]| and shift the rest of
1137 the buffer left two or three places.
1140 static boolean check_expanded_code
(int
*kk
)
1144 int d
= 1; /* number of excess characters in an expanded code
*/
1145 int c
, cc
, ccc
, cccc
, ccccc
, cccccc
; /* constituents of a possible expanded code
*/
1146 if
(buffer
[k
] == cur_chr
&& k < ilimit) {
1147 if
((cur_chr
== buffer
[k
+ 1]) && (cur_chr == buffer[k + 2])
1148 && ((k + 6) <= ilimit)) {
1150 if
((cur_chr
== buffer
[k
+ 3]) && ((k + 8) <= ilimit))
1152 if
((cur_chr
== buffer
[k
+ 4]) && ((k + 10) <= ilimit))
1154 c
= buffer
[k
+ d
- 1];
1156 ccc
= buffer
[k
+ d
+ 1];
1157 cccc
= buffer
[k
+ d
+ 2];
1159 ccccc
= buffer
[k
+ d
+ 3];
1160 cccccc
= buffer
[k
+ d
+ 4];
1161 if
(is_hex
(c
) && is_hex(cc) && is_hex(ccc) && is_hex(cccc)
1162 && is_hex(ccccc) && is_hex(cccccc))
1164 } else if
(d
== 5) {
1165 ccccc
= buffer
[k
+ d
+ 3];
1166 if
(is_hex
(c
) && is_hex(cc) && is_hex(ccc) && is_hex(cccc)
1168 five_hex_to_cur_chr
;
1170 if
(is_hex
(c
) && is_hex(cc) && is_hex(ccc) && is_hex(cccc))
1171 four_hex_to_cur_chr
;
1177 if
(is_hex
(c
) && (k + 2) <= ilimit) {
1179 if
(is_hex
(c
) && is_hex(cc)) {
1183 } else if
(c
< 0100) {
1194 if
(cur_chr
<= 0x7F) {
1195 buffer
[k
- 1] = (packed_ASCII_code
) cur_chr
;
1196 } else if
(cur_chr
<= 0x7FF) {
1197 buffer
[k
- 1] = (packed_ASCII_code
) (0xC0 + cur_chr
/ 0x40);
1200 buffer
[k
- 1] = (packed_ASCII_code
) (0x80 + cur_chr
% 0x40);
1201 } else if
(cur_chr
<= 0xFFFF) {
1202 buffer
[k
- 1] = (packed_ASCII_code
) (0xE0 + cur_chr
/ 0x1000);
1206 (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) / 0x40);
1210 (packed_ASCII_code
) (0x80 + (cur_chr
% 0x1000) % 0x40);
1212 buffer
[k
- 1] = (packed_ASCII_code
) (0xF0 + cur_chr
/ 0x40000);
1216 (packed_ASCII_code
) (0x80 + (cur_chr
% 0x40000) / 0x1000);
1220 (packed_ASCII_code
) (0x80 +
1221 ((cur_chr
% 0x40000) % 0x1000) / 0x40);
1225 (packed_ASCII_code
) (0x80 +
1226 ((cur_chr
% 0x40000) % 0x1000) % 0x40);
1229 ilimit
= ilimit
- d
;
1230 while
(l
<= ilimit
) {
1231 buffer
[l
] = buffer
[l
+ d
];
1241 @ All of the easy branches of |get_next| have now been taken care of.
1242 There is one more branch.
1245 static next_line_retval next_line
(void
)
1247 boolean inhibit_eol
= false
; /* a way to end a pseudo file without trailing space
*/
1249 /* Read next line of file into |buffer|
, or |goto restart| if the file has ended
*/
1254 if
(pseudo_input
()) { /* not end of file
*/
1255 firm_up_the_line
(); /* this sets |ilimit|
*/
1256 line_catcode_table
= DEFAULT_CAT_TABLE
;
1257 if
((iname
== 19) && (pseudo_lines(pseudo_files) == null))
1259 } else if
((every_eof
!= null
) && !eof_seen[iindex]) {
1261 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1263 begin_token_list
(every_eof
, every_eof_text
);
1264 return next_line_restart
;
1270 if
(luacstring_input
()) { /* not end of strings
*/
1272 line_catcode_table
= (short
) luacstring_cattable
();
1273 line_partial
= (signed char
) luacstring_partial
();
1274 if
(luacstring_final_line
() || line_partial
1275 || line_catcode_table
== NO_CAT_TABLE
)
1283 if
(lua_input_ln
(cur_file
, 0, true
)) { /* not end of file
*/
1284 firm_up_the_line
(); /* this sets |ilimit|
*/
1285 line_catcode_table
= DEFAULT_CAT_TABLE
;
1286 } else if
((every_eof
!= null
) && (!eof_seen[iindex])) {
1288 eof_seen
[iindex
] = true
; /* fake one empty line
*/
1289 begin_token_list
(every_eof
, every_eof_text
);
1290 return next_line_restart
;
1298 if
(tracing_nesting
> 0)
1299 if
((grp_stack
[in_open
] != cur_boundary
)
1300 ||
(if_stack
[in_open
] != cond_ptr
))
1301 if
(!((iname
== 19) ||
(iname
== 21)))
1302 file_warning
(); /* give warning for some unfinished groups and
/or conditionals
*/
1303 if
((iname
> 21) ||
(iname
== 20)) {
1304 report_stop_file
(filetype_tex
);
1307 update_terminal
(); /* show user that file has been read
*/
1311 if
(iname
== 21 ||
/* lua input
*/
1312 iname
== 19) { /* \.
{\\scantextokens
} */
1316 check_outer_validity
();
1318 return next_line_restart
;
1320 if
(inhibit_eol || end_line_char_inactive
)
1323 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1325 iloc
= istart
; /* ready to read
*/
1327 if
(!terminal_input
) { /* \.
{\\read
} line has ended
*/
1330 return next_line_return
; /* OUTER */
1332 if
(input_ptr
> 0) { /* text was inserted during error recovery
*/
1334 return next_line_restart
; /* resume previous level
*/
1336 if
(selector
< log_only
)
1338 if
(interaction
> nonstop_mode
) {
1339 if
(end_line_char_inactive
)
1341 if
(ilimit
== istart
) { /* previous line was empty
*/
1342 tprint_nl
("(Please type a command or say `\\end')");
1346 prompt_input
("*"); /* input on-line into |buffer|
*/
1348 if
(end_line_char_inactive
)
1351 buffer
[ilimit
] = (packed_ASCII_code
) end_line_char
;
1355 fatal_error
("*** (job aborted, no legal \\end found)");
1356 /* nonstop mode
, which is intended for overnight batch processing
,
1357 never waits for on-line input
*/
1360 return next_line_ok
;
1363 @ Let's consider now what happens when |get_next| is looking at a token list.
1366 static boolean get_next_tokenlist
(void
)
1368 register halfword t
; /* a token
*/
1369 t
= token_info
(iloc
);
1370 iloc
= token_link
(iloc
); /* move to next
*/
1371 if
(t
>= cs_token_flag
) { /* a control sequence token
*/
1372 cur_cs
= t
- cs_token_flag
;
1373 cur_cmd
= eq_type
(cur_cs
);
1374 if
(cur_cmd
>= outer_call_cmd
) {
1375 if
(cur_cmd
== dont_expand_cmd
) { /* Get the next token
, suppressing expansion
*/
1376 /* The present point in the program is reached only when the |expand|
1377 routine has inserted a special marker into the input. In this special
1378 case
, |token_info
(iloc
)| is known to be a control sequence token
, and |token_link
(iloc
)=null|.
1380 cur_cs
= token_info
(iloc
) - cs_token_flag
;
1382 cur_cmd
= eq_type
(cur_cs
);
1383 if
(cur_cmd
> max_command_cmd
) {
1384 cur_cmd
= relax_cmd
;
1385 cur_chr
= no_expand_flag
;
1389 check_outer_validity
();
1392 cur_chr
= equiv
(cur_cs
);
1394 cur_cmd
= token_cmd
(t
);
1395 cur_chr
= token_chr
(t
);
1397 case left_brace_cmd
:
1400 case right_brace_cmd
:
1403 case out_param_cmd
: /* Insert macro parameter and |goto restart|
; */
1404 begin_token_list
(param_stack
[param_start
+ cur_chr
- 1], parameter
);
1412 @ Now we're ready to take the plunge into |get_next| itself. Parts of
1413 this routine are executed more often than any other instructions of \TeX.
1414 @^mastication@
>@^inner loop@
>
1416 @ sets |cur_cmd|
, |cur_chr|
, |cur_cs| to next token
1423 if
(istate
!= token_list
) {
1424 /* Input from external file
, |goto restart| if no input found
*/
1425 if
(!get_next_file
())
1430 goto RESTART
; /* list exhausted
, resume previous level
*/
1431 } else if
(!get_next_tokenlist
()) {
1432 goto RESTART
; /* parameter needs to be expanded
*/
1435 /* If an alignment entry has just ended
, take appropriate action
*/
1436 if
((cur_cmd
== tab_mark_cmd || cur_cmd
== car_ret_cmd
) && align_state == 0) {
1437 insert_vj_template
();
1443 @ Since |get_next| is used so frequently in \TeX
, it is convenient
1444 to define three related procedures that do a little more
:
1446 \yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|
, it
1447 also sets |cur_tok|
, a packed halfword version of the current token.
1449 \yskip\hang|get_x_token|
, meaning ``get an expanded token
,'' is like
1450 |get_token|
, but if the current token turns out to be a user-defined
1451 control sequence
(i.e.
, a macro call
), or a conditional
,
1452 or something like \.
{\\topmark
} or \.
{\\expandafter
} or \.
{\\csname
},
1453 it is eliminated from the input by beginning the expansion of the macro
1454 or the evaluation of the conditional.
1456 \yskip\hang|x_token| is like |get_x_token| except that it assumes that
1457 |get_next| has already been called.
1460 In fact
, these three procedures account for almost every use of |get_next|.
1462 No new control sequences will be defined except during a call of
1463 |get_token|
, or when \.
{\\csname
} compresses a token list
, because
1464 |no_new_control_sequence| is always |true| at other times.
1467 void get_token
(void
)
1468 { /* sets |cur_cmd|
, |cur_chr|
, |cur_tok|
*/
1469 no_new_control_sequence
= false
;
1471 no_new_control_sequence
= true
;
1473 cur_tok
= token_val
(cur_cmd
, cur_chr
);
1475 cur_tok
= cs_token_flag
+ cur_cs
;
1479 void get_token_lua
(void
)
1481 register int callback_id
;
1482 callback_id
= callback_defined
(token_filter_callback
);
1483 if
(callback_id
> 0) {
1484 while
(istate
== token_list
&& iloc == null && iindex != v_template)
1486 /* there is some stuff we don't want to see inside the callback
*/
1487 if
(!(istate
== token_list
&&
1488 ((nofilter
== true
) ||
(iindex
== backed_up
&& iloc != null)))) {
1489 do_get_token_lua
(callback_id
);
1497 @ changes the string |s| to a token list
1499 halfword string_to_toks
(char
*ss
)
1501 halfword p
; /* tail of the token list
*/
1502 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1503 halfword t
; /* token being appended
*/
1504 char
*s
= ss
, *se
= ss
+ strlen
(s
);
1505 p
= temp_token_head
;
1506 set_token_link
(p
, null
);
1508 t
= (halfword
) str2uni
((unsigned char
*) s
);
1513 t
= other_token
+ t
;
1514 fast_store_new_token
(t
);
1516 return token_link
(temp_token_head
);
1519 @ The token lists for macros and for other things like \.
{\\mark
} and \.
{\\output
}
1520 and \.
{\\write
} are produced by a procedure called |scan_toks|.
1522 Before we get into the details of |scan_toks|
, let's consider a much
1523 simpler task
, that of converting the current string into a token list.
1524 The |str_toks| function does this
; it classifies spaces as type |spacer|
1525 and everything else as type |other_char|.
1527 The token list created by |str_toks| begins at |link
(temp_token_head
)| and ends
1528 at the value |p| that is returned.
(If |p
=temp_token_head|
, the list is empty.
)
1530 |lua_str_toks| is almost identical
, but it also escapes the three
1531 symbols that |lua| considers special while scanning a literal string
1534 static halfword lua_str_toks
(lstring b
)
1535 { /* changes the string |str_pool
[b..pool_ptr
]| to a token list
*/
1536 halfword p
; /* tail of the token list
*/
1537 halfword q
; /* new node being added to the token list via |store_new_token|
*/
1538 halfword t
; /* token being appended
*/
1539 unsigned char
*k
; /* index into string
*/
1540 p
= temp_token_head
;
1541 set_token_link
(p
, null
);
1542 k
= (unsigned char
*) b.s
;
1543 while
(k
< (unsigned char
*) b.s
+ b.l
) {
1544 t
= pool_to_unichar
(k
);
1549 if
((t
== '\\'
) ||
(t
== '
"') || (t == '\'') || (t == 10)
1551 fast_store_new_token(other_token + '\\');
1556 t = other_token + t;
1558 fast_store_new_token(t);
1564 @ Incidentally, the main reason for wanting |str_toks| is the function |the_toks|,
1565 which has similar input/output characteristics.
1568 halfword str_toks(lstring s)
1569 { /* changes the string |str_pool[b..pool_ptr]| to a token list */
1570 halfword p; /* tail of the token list */
1571 halfword q; /* new node being added to the token list via |store_new_token| */
1572 halfword t; /* token being appended */
1573 unsigned char *k, *l; /* index into string */
1574 p = temp_token_head;
1575 set_token_link(p, null);
1579 t = pool_to_unichar(k);
1584 t = other_token + t;
1585 fast_store_new_token(t);
1590 @ Here's part of the |expand| subroutine that we are now ready to complete:
1592 void ins_the_toks(void)
1595 ins_list(token_link(temp_token_head));
1598 @ This routine, used in the next one, prints the job name, possibly
1599 modified by the |process_jobname| callback.
1602 static void print_job_name(void)
1605 char *s, *ss; /* C strings for jobname before and after processing */
1606 int callback_id, lua_retval;
1607 s = (char*)str_string(job_name);
1608 callback_id = callback_defined(process_jobname_callback);
1609 if (callback_id > 0) {
1610 lua_retval = run_callback(callback_id, "S-
>S
", s, &ss);
1611 if ((lua_retval == true) && (ss != NULL))
1620 @ Here is a routine that print the result of a convert command, using
1621 the argument |i|. It returns |false | if it does not know to print
1622 the code |c|. The function exists because lua code and tex code can
1623 both call it to convert something.
1626 static boolean print_convert_string(halfword c, int i)
1628 int ff; /* for use with |set_ff| */
1637 case roman_numeral_code:
1641 tprint(eTeX_version_string);
1643 case pdftex_revision_code:
1644 tprint(pdftex_revision);
1646 case luatex_revision_code:
1647 print(get_luatexrevision());
1649 case luatex_date_code:
1650 print_int(get_luatex_date_info());
1652 case pdftex_banner_code:
1653 tprint(pdftex_banner);
1655 case uniform_deviate_code:
1656 print_int(unif_rand(i));
1658 case normal_deviate_code:
1659 print_int(norm_rand());
1661 case format_name_code:
1667 case font_name_code:
1668 append_string((unsigned char *) font_name(i),
1669 (unsigned) strlen(font_name(i)));
1670 if (font_size(i) != font_dsize(i)) {
1672 print_scaled(font_size(i));
1679 case math_style_code:
1682 case pdf_font_name_code:
1683 case pdf_font_objnum_code:
1685 if (c == pdf_font_name_code)
1686 print_int(obj_info(static_pdf, pdf_font_num(ff)));
1688 print_int(pdf_font_num(ff));
1690 case pdf_font_size_code:
1691 print_scaled(font_size(i));
1694 case pdf_page_ref_code:
1695 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false));
1697 case pdf_xform_name_code:
1698 print_int(obj_info(static_pdf, i));
1700 case eTeX_revision_code:
1701 tprint(eTeX_revision);
1711 int scan_lua_state(void) /* hh-ls: optional name or number (not optional name optional number) */
1713 /* Parse optional lua state integer, or an instance name to be stored in |sn| */
1714 /* Get the next non-blank non-relax non-call token */
1718 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd));
1719 back_input(); /* have to push it back, whatever it is */
1720 if (cur_cmd != left_brace_cmd) {
1721 if (scan_keyword("name
")) {
1722 (void) scan_toks(false, true);
1725 scan_register_num();
1726 if (get_lua_name(cur_val))
1727 sn = (cur_val - 65536);
1735 @ The procedure |conv_toks| uses |str_toks| to insert the token list
1736 for |convert| functions into the scanner; `\.{\\outer}' control sequences
1737 are allowed to follow `\.{\\string}' and `\.{\\meaning}'.
1739 The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates
1740 any pending string in its output. In order to save such a pending string,
1741 we have to create a temporary string that is destroyed immediately after.
1744 void conv_toks(void)
1746 int old_setting; /* holds |selector| setting */
1748 int save_scanner_status; /* |scanner_status| upon entry */
1749 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */
1750 halfword save_warning_index;
1751 boolean bool; /* temp boolean */
1752 str_number s; /* first temp string */
1753 int sn; /* lua chunk name */
1754 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */
1755 int i = 0; /* first temp integer */
1756 int j = 0; /* second temp integer */
1757 int c = cur_chr; /* desired type of conversion */
1759 /* Scan the argument for command |c| */
1765 case roman_numeral_code:
1770 save_scanner_status = scanner_status;
1771 scanner_status = normal;
1773 scanner_status = save_scanner_status;
1777 case font_name_code:
1781 case pdftex_revision_code:
1782 case luatex_revision_code:
1783 case luatex_date_code:
1784 case pdftex_banner_code:
1786 case pdf_font_name_code:
1787 case pdf_font_objnum_code:
1788 case pdf_font_size_code:
1790 if (cur_val == null_font)
1791 pdf_error("font
", "invalid font identifier
");
1792 if (c != pdf_font_size_code) {
1793 pdf_check_vf(cur_val);
1794 if (!font_used(cur_val))
1795 pdf_init_font(static_pdf, cur_val);
1798 case pdf_page_ref_code:
1801 pdf_error("pageref
", "invalid page number
");
1803 case left_margin_kern_code:
1804 case right_margin_kern_code:
1806 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node))
1807 pdf_error("marginkern
", "a non-empty hbox expected
");
1809 case pdf_xform_name_code:
1811 check_obj_type(static_pdf, obj_type_xform, cur_val);
1813 case pdf_creation_date_code:
1814 ins_list(string_to_toks(getcreationdate(static_pdf)));
1817 case format_name_code:
1822 case pdf_colorstack_init_code:
1823 bool = scan_keyword("page
");
1824 if (scan_keyword("direct
"))
1825 cur_val = direct_always;
1826 else if (scan_keyword("page
"))
1827 cur_val = direct_page;
1829 cur_val = set_origin;
1830 save_scanner_status = scanner_status;
1831 save_warning_index = warning_index;
1832 save_def_ref = def_ref;
1833 u = save_cur_string();
1834 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/
1835 s = tokens_to_string(def_ref);
1836 delete_token_ref(def_ref);
1837 def_ref = save_def_ref;
1838 warning_index = save_warning_index;
1839 scanner_status = save_scanner_status;
1840 cur_val = newcolorstack(s, cur_val, bool);
1842 cur_val_level = int_val_level;
1844 print_err("Too many color stacks
");
1845 help2("The number of color stacks is limited to
32768.
",
1846 "I'll use the default color stack
0 here.
");
1849 restore_cur_string(u);
1852 case uniform_deviate_code:
1855 case normal_deviate_code:
1857 case lua_escape_string_code:
1861 save_scanner_status = scanner_status;
1862 save_def_ref = def_ref;
1863 save_warning_index = warning_index;
1864 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/
1865 bool = in_lua_escape;
1866 in_lua_escape = true;
1867 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l);
1868 escstr.l = (unsigned) l;
1869 in_lua_escape = bool;
1870 delete_token_ref(def_ref);
1871 def_ref = save_def_ref;
1872 warning_index = save_warning_index;
1873 scanner_status = save_scanner_status;
1874 (void) lua_str_toks(escstr);
1875 ins_list(token_link(temp_token_head));
1880 case math_style_code:
1883 save_scanner_status = scanner_status;
1884 save_warning_index = warning_index;
1885 save_def_ref = def_ref;
1886 u = save_cur_string();
1887 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/
1888 warning_index = save_warning_index;
1889 scanner_status = save_scanner_status;
1890 ins_list(token_link(def_ref));
1891 def_ref = save_def_ref;
1892 restore_cur_string(u);
1896 u = save_cur_string();
1897 save_scanner_status = scanner_status;
1898 save_def_ref = def_ref;
1899 save_warning_index = warning_index;
1900 sn = scan_lua_state();
1901 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/
1903 warning_index = save_warning_index;
1904 def_ref = save_def_ref;
1905 scanner_status = save_scanner_status;
1907 luatokencall(s, sn);
1908 delete_token_ref(s);
1909 restore_cur_string(u); /* TODO: check this, was different */
1910 if (luacstrings > 0)
1914 case lua_function_code:
1917 pdf_error("luafunction
", "invalid number
");
1919 u = save_cur_string();
1921 luafunctioncall(cur_val);
1922 restore_cur_string(u);
1923 if (luacstrings > 0)
1927 case pdf_insert_ht_code:
1928 scan_register_num();
1930 case pdf_ximage_bbox_code:
1932 check_obj_type(static_pdf, obj_type_ximage, cur_val);
1933 i = obj_data_ptr(static_pdf, cur_val);
1936 if ((j < 1) || (j > 4))
1937 pdf_error("pdfximagebbox
", "invalid parameter
");
1939 /* Cases of 'Scan the argument for command |c|' */
1940 case eTeX_revision_code:
1943 confusion("convert
");
1947 old_setting = selector;
1948 selector = new_string;
1950 /* Print the result of command |c| */
1951 if (!print_convert_string(c, cur_val)) {
1962 case left_margin_kern_code:
1963 p = list_ptr(box(cur_val));
1964 if ((p != null) && (!is_char_node(p)) &&
1965 (type(p) == glue_node) && (subtype(p) == left_skip_code + 1))
1967 if ((p != null) && (!is_char_node(p)) &&
1968 (type(p) == margin_kern_node) && (subtype(p) == left_side))
1969 print_scaled(width(p));
1974 case right_margin_kern_code:
1975 q = list_ptr(box(cur_val));
1978 p = prev_rightmost(q, null);
1979 if ((p != null) && (!is_char_node(p)) && (type(p) == glue_node)
1980 && (subtype(p) == right_skip_code + 1))
1981 p = prev_rightmost(q, p);
1983 if ((p != null) && (!is_char_node(p)) &&
1984 (type(p) == margin_kern_node) && (subtype(p) == right_side))
1985 print_scaled(width(p));
1990 case pdf_colorstack_init_code:
1993 case pdf_insert_ht_code:
1996 while (i >= subtype(vlink(p)))
1998 if (subtype(p) == i)
1999 print_scaled(height(p));
2004 case pdf_ximage_bbox_code:
2005 if (is_pdf_image(i)) {
2008 print_scaled(epdf_orig_x(i));
2011 print_scaled(epdf_orig_y(i));
2014 print_scaled(epdf_orig_x(i) + epdf_xsize(i));
2017 print_scaled(epdf_orig_y(i) + epdf_ysize(i));
2025 case pdf_creation_date_code:
2026 case lua_escape_string_code:
2028 case lua_function_code:
2032 confusion("convert
");
2037 selector = old_setting;
2038 str = make_string();
2039 (void) str_toks(str_lstring(str));
2041 ins_list(token_link(temp_token_head));
2044 @ This boolean is keeping track of the lua string escape state
2046 boolean in_lua_escape;
2048 @ probably not needed anymore
2050 boolean is_convert(halfword c)
2052 return (c == convert_cmd);
2055 str_number the_convert_string(halfword c, int i)
2057 int old_setting; /* saved |selector| setting */
2059 old_setting = selector;
2060 selector = new_string;
2061 if (print_convert_string(c, i)) {
2062 ret = make_string();
2063 } else if (c == font_identifier_code) {
2064 print_font_identifier(i);
2065 ret = make_string();
2067 selector = old_setting;
2071 @ Another way to create a token list is via the \.{\\read} command. The
2072 sixteen files potentially usable for reading appear in the following
2073 global variables. The value of |read_open[n]| will be |closed| if
2074 stream number |n| has not been opened or if it has been fully read;
2075 |just_open| if an \.{\\openin} but not a \.{\\read} has been done;
2076 and |normal| if it is open and ready to read the next line.
2079 FILE *read_file[16]; /* used for \.{\\read} */
2080 int read_open[17]; /* state of |read_file[n]| */
2082 void initialize_read(void)
2085 for (k = 0; k <= 16; k++)
2086 read_open[k] = closed;
2089 @ The |read_toks| procedure constructs a token list like that for any
2090 macro definition, and makes |cur_val| point to it. Parameter |r| points
2091 to the control sequence that will receive this token list.
2094 void read_toks(int n, halfword r, halfword j)
2096 halfword p; /* tail of the token list */
2097 halfword q; /* new node being added to the token list via |store_new_token| */
2098 int s; /* saved value of |align_state| */
2099 int m; /* stream number */
2100 scanner_status = defining;
2104 set_token_ref_count(def_ref, 0);
2105 p = def_ref; /* the reference count */
2106 store_new_token(end_match_token);
2107 if ((n < 0) || (n > 15))
2112 align_state = 1000000; /* disable tab marks, etc. */
2114 /* Input and store tokens from the next line of the file */
2115 begin_file_reading();
2117 if (read_open[m] == closed) {
2118 /* Input for \.{\\read} from the terminal */
2119 /* Here we input on-line into the |buffer| array, prompting the user explicitly
2120 if |n>=0|. The value of |n| is set negative so that additional prompts
2121 will not be given in the case of multi-line input. */
2122 if (interaction > nonstop_mode) {
2134 ("*** (cannot \\read from terminal in nonstop modes
)");
2137 } else if (read_open[m] == just_open) {
2138 /* Input the first line of |read_file[m]| */
2139 /* The first line of a file must be treated specially, since |lua_input_ln|
2140 must be told not to start with |get|. */
2141 if (lua_input_ln(read_file[m], (m + 1), false)) {
2142 read_open[m] = normal;
2144 lua_a_close_in(read_file[m], (m + 1));
2145 read_open[m] = closed;
2149 /* Input the next line of |read_file[m]| */
2150 /* An empty line is appended at the end of a |read_file|. */
2151 if (!lua_input_ln(read_file[m], (m + 1), true)) {
2152 lua_a_close_in(read_file[m], (m + 1));
2153 read_open[m] = closed;
2154 if (align_state != 1000000) {
2156 print_err("File ended within \\read
");
2157 help1("This \\read has unbalanced braces.
");
2158 align_state = 1000000;
2165 if (end_line_char_inactive)
2168 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code);
2172 /* Handle \.{\\readline} and |goto done|; */
2174 while (iloc <= ilimit) { /* current line not yet finished */
2175 do_buffer_to_unichar(cur_chr, iloc);
2177 cur_tok = space_token;
2179 cur_tok = cur_chr + other_token;
2180 store_new_token(cur_tok);
2186 break; /* |cur_cmd=cur_chr=0| will occur at the end of the line */
2187 if (align_state < 1000000) { /* unmatched `\.\}' aborts the line */
2190 } while (cur_tok != 0);
2191 align_state = 1000000;
2194 store_new_token(cur_tok);
2199 } while (align_state != 1000000);
2201 scanner_status = normal;
2206 str_number tokens_to_string(halfword p)
2207 { /* return a string from tokens list */
2209 if (selector == new_string)
2211 "tokens_to_string
() called while selector
= new_string
");
2212 old_setting = selector;
2213 selector = new_string;
2214 show_token_list(token_link(p), null, -1);
2215 selector = old_setting;
2216 return make_string();
2220 #define make_room(a) \
2221 if ((unsigned)i+a+1>alloci) { \
2222 ret = xrealloc(ret,(alloci+64)); \
2223 alloci = alloci + 64; \
2227 #define append_i_byte(a) ret[i++] = (char)(a)
2229 #define Print_char(a) make_room(1); append_i_byte(a)
2231 #define Print_uchar(s) { \
2235 } else if (s<=0x7FF) { \
2236 append_i_byte(0xC0 + (s / 0x40)); \
2237 append_i_byte(0x80 + (s % 0x40)); \
2238 } else if (s<=0xFFFF) { \
2239 append_i_byte(0xE0 + (s / 0x1000)); \
2240 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \
2241 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \
2242 } else if (s>=0x110000) { \
2243 append_i_byte(s-0x11000); \
2245 append_i_byte(0xF0 + (s / 0x40000)); \
2246 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \
2247 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \
2248 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \
2252 #define Print_esc(b) { \
2253 const char *v = b; \
2254 if (e>0 && e<STRING_OFFSET) { \
2257 make_room(strlen(v)); \
2258 while (*v) { append_i_byte(*v); v++; } \
2261 #define is_cat_letter(a) \
2262 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11)
2264 @ the actual token conversion in this function is now functionally
2265 equivalent to |show_token_list|, except that it always prints the
2267 TODO: check whether this causes problems in the lua library.
2270 char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz)
2272 register int p, c, m;
2278 int match_chr = '#';
2280 unsigned alloci = 1024;
2288 ret = xmalloc(alloci);
2289 p = token_link(p); /* skip refcount */
2291 e = int_par(escape_char_code);
2294 if (p < (int) fix_mem_min || p > (int) fix_mem_end) {
2295 Print_esc("CLOBBERED.
");
2298 infop = token_info(p);
2299 if (infop >= cs_token_flag) {
2300 if (!(inhibit_par && infop == par_token)) {
2301 q = infop - cs_token_flag;
2302 if (q < hash_base) {
2304 Print_esc("csname
");
2305 Print_esc("endcsname
");
2307 Print_esc("IMPOSSIBLE.
");
2309 } else if ((q >= undefined_control_sequence)
2310 && ((q <= eqtb_size)
2311 || (q > eqtb_size + hash_extra))) {
2312 Print_esc("IMPOSSIBLE.
");
2313 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) {
2314 Print_esc("NONEXISTENT.
");
2316 str_number txt = cs_text(q);
2317 sh = makecstring(txt);
2319 if (is_active_cs(txt)) {
2331 if ((!single_letter(txt)) || is_cat_letter(txt)) {
2342 m = token_cmd(infop);
2343 c = token_chr(infop);
2345 case left_brace_cmd:
2346 case right_brace_cmd:
2347 case math_shift_cmd:
2353 case other_char_cmd:
2362 Print_uchar(match_chr);
2364 Print_char(c + '0');
2400 lstring *tokenlist_to_lstring(int pp, int inhibit_par)
2403 lstring *ret = xmalloc(sizeof(lstring));
2404 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz);
2405 ret->l = (size_t) siz;
2410 void free_lstring(lstring * ls)