beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / tex / inputstack.h
blob378fabe24b850b1396779aa7b6090bae684eb67f
1 /* inputstack.h
3 Copyright 2009 Taco Hoekwater <taco@luatex.org>
5 This file is part of LuaTeX.
7 LuaTeX is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License along
18 with LuaTeX; if not, see <http://www.gnu.org/licenses/>. */
21 #ifndef INPUTSTACK_H
22 # define INPUTSTACK_H 1
25 The state of \TeX's input mechanism appears in the input stack, whose
26 entries are records with six fields, called |state|, |index|, |start|, |loc|,
27 |limit|, and |name|.
30 typedef struct in_state_record {
31 halfword start_field;
32 halfword loc_field;
33 halfword limit_field;
34 halfword name_field;
35 int synctex_tag_field; /* stack the tag of the current file */
36 signed int cattable_field:16; /* category table used by the current line (see textoken.c) */
37 quarterword state_field:8;
38 quarterword index_field:8;
39 boolean partial_field:8; /* is the current line partial? (see textoken.c) */
40 boolean nofilter_field:8; /* used by token filtering */
41 } in_state_record;
43 extern in_state_record *input_stack;
44 extern int input_ptr;
45 extern int max_in_stack;
46 extern in_state_record cur_input; /* the ``top'' input state */
48 # define iloc cur_input.loc_field /* location of first unread character in |buffer| */
49 # define istate cur_input.state_field /* current scanner state */
50 # define iindex cur_input.index_field /* reference for buffer information */
51 # define istart cur_input.start_field /* starting position in |buffer| */
52 # define ilimit cur_input.limit_field /* end of current line in |buffer| */
53 # define iname cur_input.name_field /* name of the current file */
54 # define nofilter cur_input.nofilter_field /* is token filtering explicitly disallowed? */
55 # define synctex_tag cur_input.synctex_tag_field /* tag of the current file */
56 # define line_catcode_table cur_input.cattable_field
57 # define line_partial cur_input.partial_field
60 Let's look more closely now at the control variables
61 (|state|,~|index|,~|start|,~|loc|,~|limit|,~|name|),
62 assuming that \TeX\ is reading a line of characters that have been input
63 from some file or from the user's terminal. There is an array called
64 |buffer| that acts as a stack of all lines of characters that are
65 currently being read from files, including all lines on subsidiary
66 levels of the input stack that are not yet completed. \TeX\ will return to
67 the other lines when it is finished with the present input file.
69 (Incidentally, on a machine with byte-oriented addressing, it might be
70 appropriate to combine |buffer| with the |str_pool| array,
71 letting the buffer entries grow downward from the top of the string pool
72 and checking that these two tables don't bump into each other.)
74 The line we are currently working on begins in position |start| of the
75 buffer; the next character we are about to read is |buffer[loc]|; and
76 |limit| is the location of the last character present. If |loc>limit|,
77 the line has been completely read. Usually |buffer[limit]| is the
78 |end_line_char|, denoting the end of a line, but this is not
79 true if the current line is an insertion that was entered on the user's
80 terminal in response to an error message.
82 The |name| variable is a string number that designates the name of
83 the current file, if we are reading a text file. It is zero if we
84 are reading from the terminal; it is |n+1| if we are reading from
85 input stream |n|, where |0<=n<=16|. (Input stream 16 stands for
86 an invalid stream number; in such cases the input is actually from
87 the terminal, under control of the procedure |read_toks|.)
88 Finally |18<=name<=20| indicates that we are reading a pseudo file
89 created by the \.{\\scantokens} or \.{\\scantextokens} command.
91 The |state| variable has one of three values, when we are scanning such
92 files:
93 $$\baselineskip 15pt\vbox{\halign{#\hfil\cr
94 1) |state=mid_line| is the normal state.\cr
95 2) |state=skip_blanks| is like |mid_line|, but blanks are ignored.\cr
96 3) |state=new_line| is the state at the beginning of a line.\cr}}$$
97 These state values are assigned numeric codes so that if we add the state
98 code to the next character's command code, we get distinct values. For
99 example, `|mid_line+spacer|' stands for the case that a blank
100 space character occurs in the middle of a line when it is not being
101 ignored; after this case is processed, the next value of |state| will
102 be |skip_blanks|.
105 # define max_char_code 15 /* largest catcode for individual characters */
107 typedef enum {
108 mid_line = 1, /* |state| code when scanning a line of characters */
109 skip_blanks = 2 + max_char_code, /* |state| code when ignoring blanks */
110 new_line = 3 + max_char_code + max_char_code, /* |state| code at start of line */
111 } state_codes;
114 Additional information about the current line is available via the
115 |index| variable, which counts how many lines of characters are present
116 in the buffer below the current level. We have |index=0| when reading
117 from the terminal and prompting the user for each line; then if the user types,
118 e.g., `\.{\\input paper}', we will have |index=1| while reading
119 the file \.{paper.tex}. However, it does not follow that |index| is the
120 same as the input stack pointer, since many of the levels on the input
121 stack may come from token lists. For example, the instruction `\.{\\input
122 paper}' might occur in a token list.
124 The global variable |in_open| is equal to the |index|
125 value of the highest non-token-list level. Thus, the number of partially read
126 lines in the buffer is |in_open+1|, and we have |in_open=index|
127 when we are not reading a token list.
129 If we are not currently reading from the terminal, or from an input
130 stream, we are reading from the file variable |input_file[index]|. We use
131 the notation |terminal_input| as a convenient abbreviation for |name=0|,
132 and |cur_file| as an abbreviation for |input_file[index]|.
134 The global variable |line| contains the line number in the topmost
135 open file, for use in error messages. If we are not reading from
136 the terminal, |line_stack[index]| holds the line number for the
137 enclosing level, so that |line| can be restored when the current
138 file has been read. Line numbers should never be negative, since the
139 negative of the current line number is used to identify the user's output
140 routine in the |mode_line| field of the semantic nest entries.
142 If more information about the input state is needed, it can be
143 included in small arrays like those shown here. For example,
144 the current page or segment number in the input file might be
145 put into a variable |@!page|, maintained for enclosing levels in
146 `\ignorespaces|@!page_stack:array[1..max_in_open] of integer|\unskip'
147 by analogy with |line_stack|.
148 @^system dependencies@>
151 # define terminal_input (iname==0) /* are we reading from the terminal? */
152 # define cur_file input_file[iindex] /* the current |alpha_file| variable */
154 extern int in_open;
155 extern int open_parens;
156 extern alpha_file *input_file;
157 extern int line;
158 extern int *line_stack;
159 extern str_number *source_filename_stack;
160 extern char **full_source_filename_stack;
163 Users of \TeX\ sometimes forget to balance left and right braces properly,
164 and one of the ways \TeX\ tries to spot such errors is by considering an
165 input file as broken into subfiles by control sequences that
166 are declared to be \.{\\outer}.
168 A variable called |scanner_status| tells \TeX\ whether or not to complain
169 when a subfile ends. This variable has six possible values:
171 \yskip\hang|normal|, means that a subfile can safely end here without incident.
173 \yskip\hang|skipping|, means that a subfile can safely end here, but not a file,
174 because we're reading past some conditional text that was not selected.
176 \yskip\hang|defining|, means that a subfile shouldn't end now because a
177 macro is being defined.
179 \yskip\hang|matching|, means that a subfile shouldn't end now because a
180 macro is being used and we are searching for the end of its arguments.
182 \yskip\hang|aligning|, means that a subfile shouldn't end now because we are
183 not finished with the preamble of an \.{\\halign} or \.{\\valign}.
185 \yskip\hang|absorbing|, means that a subfile shouldn't end now because we are
186 reading a balanced token list for \.{\\message}, \.{\\write}, etc.
188 \yskip\noindent
189 If the |scanner_status| is not |normal|, the variable |warning_index| points
190 to the |eqtb| location for the relevant control sequence name to print
191 in an error message.
194 typedef enum {
195 skipping = 1, /* |scanner_status| when passing conditional text */
196 defining = 2, /* |scanner_status| when reading a macro definition */
197 matching = 3, /* |scanner_status| when reading macro arguments */
198 aligning = 4, /* |scanner_status| when reading an alignment preamble */
199 absorbing = 5, /* |scanner_status| when reading a balanced text */
200 } scanner_states;
202 extern int scanner_status;
203 extern pointer warning_index;
204 extern pointer def_ref;
206 extern void runaway(void);
209 However, the discussion about input state really applies only to the
210 case that we are inputting from a file. There is another important case,
211 namely when we are currently getting input from a token list. In this case
212 |state=token_list|, and the conventions about the other state variables
213 are different:
215 \yskip\hang|loc| is a pointer to the current node in the token list, i.e.,
216 the node that will be read next. If |loc=null|, the token list has been
217 fully read.
219 \yskip\hang|start| points to the first node of the token list; this node
220 may or may not contain a reference count, depending on the type of token
221 list involved.
223 \yskip\hang|token_type|, which takes the place of |index| in the
224 discussion above, is a code number that explains what kind of token list
225 is being scanned.
227 \yskip\hang|name| points to the |eqtb| address of the control sequence
228 being expanded, if the current token list is a macro.
230 \yskip\hang|param_start|, which takes the place of |limit|, tells where
231 the parameters of the current macro begin in the |param_stack|, if the
232 current token list is a macro.
234 \yskip\noindent The |token_type| can take several values, depending on
235 where the current token list came from:
237 \yskip\hang|parameter|, if a parameter is being scanned;
239 \hang|u_template|, if the \<u_j> part of an alignment
240 template is being scanned;
242 \hang|v_template|, if the \<v_j> part of an alignment
243 template is being scanned;
245 \hang|backed_up|, if the token list being scanned has been inserted as
246 `to be read again'.
248 \hang|inserted|, if the token list being scanned has been inserted as
249 the text expansion of a \.{\\count} or similar variable;
251 \hang|macro|, if a user-defined control sequence is being scanned;
253 \hang|output_text|, if an \.{\\output} routine is being scanned;
255 \hang|every_par_text|, if the text of \.{\\everypar} is being scanned;
257 \hang|every_math_text|, if the text of \.{\\everymath} is being scanned;
259 \hang|every_display_text|, if the text of \.{\\everydisplay} is being scanned;
261 \hang|every_hbox_text|, if the text of \.{\\everyhbox} is being scanned;
263 \hang|every_vbox_text|, if the text of \.{\\everyvbox} is being scanned;
265 \hang|every_job_text|, if the text of \.{\\everyjob} is being scanned;
267 \hang|every_cr_text|, if the text of \.{\\everycr} is being scanned;
269 \hang|mark_text|, if the text of a \.{\\mark} is being scanned;
271 \hang|write_text|, if the text of a \.{\\write} is being scanned.
273 \yskip\noindent
274 The codes for |output_text|, |every_par_text|, etc., are equal to a constant
275 plus the corresponding codes for token list parameters |output_routine_loc|,
276 |every_par_loc|, etc.
278 The token list begins with a reference count if and
279 only if |token_type>=macro|.
280 @^reference counts@>
282 Since \eTeX's additional token list parameters precede |toks_base|, the
283 corresponding token types must precede |write_text|.
286 # define token_list 0 /* |state| code when scanning a token list */
287 # define token_type iindex /* type of current token list */
288 # define param_start ilimit /* base of macro parameters in |param_stack| */
291 typedef enum {
292 parameter = 0, /* |token_type| code for parameter */
293 u_template = 1, /* |token_type| code for \<u_j> template */
294 v_template = 2, /* |token_type| code for \<v_j> template */
295 backed_up = 3, /* |token_type| code for text to be reread */
296 inserted = 4, /* |token_type| code for inserted texts */
297 macro = 5, /* |token_type| code for defined control sequences */
298 output_text = 6, /* |token_type| code for output routines */
299 every_par_text = 7, /* |token_type| code for \.{\\everypar} */
300 every_math_text = 8, /* |token_type| code for \.{\\everymath} */
301 every_display_text = 9, /* |token_type| code for \.{\\everydisplay} */
302 every_hbox_text = 10, /* |token_type| code for \.{\\everyhbox} */
303 every_vbox_text = 11, /* |token_type| code for \.{\\everyvbox} */
304 every_job_text = 12, /* |token_type| code for \.{\\everyjob} */
305 every_cr_text = 13, /* |token_type| code for \.{\\everycr} */
306 mark_text = 14, /* |token_type| code for \.{\\topmark}, etc. */
307 every_eof_text = 15, /* |token_type| code for \.{\\everyeof} */
308 write_text = 16, /* |token_type| code for \.{\\write} */
309 } token_types;
311 extern pointer *param_stack;
312 extern int param_ptr;
313 extern int max_param_stack;
315 extern int align_state;
317 extern int base_ptr;
319 extern void show_context(void);
320 extern void set_trick_count(void);
322 # define back_list(A) begin_token_list(A,backed_up) /* backs up a simple token list */
323 # define ins_list(A) begin_token_list(A,inserted) /* inserts a simple token list */
325 extern void begin_token_list(halfword p, quarterword t);
326 extern void end_token_list(void);
327 extern void back_input(void);
328 extern void reinsert_token(boolean a, halfword pp);
329 extern void begin_file_reading(void);
330 extern void end_file_reading(void);
331 extern void clear_for_error_prompt(void);
333 extern void initialize_inputstack(void);
335 extern halfword pseudo_files;
336 extern void pseudo_from_string(void);
337 extern void pseudo_start(void);
338 extern void lua_string_start(void);
339 extern boolean pseudo_input(void);
340 extern void pseudo_close(void);
343 #endif