3 % Copyright
2009-2010 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
25 #define end_line_char int_par
(end_line_char_code
)
26 #define error_context_lines int_par
(error_context_lines_code
)
28 in_state_record
*input_stack
= NULL;
29 int input_ptr
= 0; /* first unused location of |input_stack|
*/
30 int max_in_stack
= 0; /* largest value of |input_ptr| when pushing
*/
31 in_state_record cur_input
; /* the ``top'' input state
*/
34 int in_open
= 0; /* the number of lines in the buffer
, less one
*/
35 int open_parens
= 0; /* the number of open text files
*/
36 alpha_file
*input_file
= NULL;
37 int line
= 0; /* current line number in the current source file
*/
38 int
*line_stack
= NULL;
39 str_number
*source_filename_stack
= NULL;
40 char
**full_source_filename_stack
= NULL;
43 int scanner_status
= 0; /* can a subfile end now?
*/
44 pointer warning_index
= null
; /* identifier relevant to non-|normal| scanner status
*/
45 pointer def_ref
= null
; /* reference count of token list being defined
*/
47 @ Here is a procedure that uses |scanner_status| to print a warning message
48 when a subfile has ended
, and at certain other crucial times
:
53 pointer p
= null
; /* head of runaway list
*/
54 if
(scanner_status
> skipping
) {
55 switch
(scanner_status
) {
57 tprint_nl
("Runaway definition");
61 tprint_nl
("Runaway argument");
65 tprint_nl
("Runaway preamble");
69 tprint_nl
("Runaway text");
73 /* there are no other cases
*/
78 show_token_list
(token_link
(p
), null
, error_line
- 10);
82 @ The |param_stack| is an auxiliary array used to hold pointers to the token
83 lists for parameters at the current level and subsidiary levels of input.
84 This stack is maintained with convention
(2), and it grows at a different
88 pointer
*param_stack
= NULL; /* token list pointers for parameters
*/
89 int param_ptr
= 0; /* first unused entry in |param_stack|
*/
90 int max_param_stack
= 0; /* largest value of |param_ptr|
, will be |
<=param_size
+9|
*/
92 @ The input routines must also interact with the processing of
93 \.
{\\halign
} and \.
{\\valign
}, since the appearance of tab marks and
94 \.
{\\cr
} in certain places is supposed to trigger the beginning of special
95 $v_j$ template text in the scanner. This magic is accomplished by an
96 |align_state| variable that is increased by~
1 when a `\.
{\char'
173}' is
97 scanned and decreased by~
1 when a `\.
{\char'
175}' is scanned. The |align_state|
98 is nonzero during the $u_j$ template
, after which it is set to zero
; the
99 $v_j$ template begins when a tab mark or \.
{\\cr
} occurs at a time that
103 int align_state
= 0; /* group level with respect to current alignment
*/
105 @ Thus
, the ``current input state'' can be very complicated indeed
; there
106 can be many levels and each level can arise in a variety of ways. The
107 |show_context| procedure
, which is used by \TeX's error-reporting routine to
108 print out the current input state on all levels down to the most recent
109 line of characters from an input file
, illustrates most of these conventions.
110 The global variable |base_ptr| contains the lowest level that was
111 displayed by this procedure.
114 int base_ptr
= 0; /* shallowest level shown by |show_context|
*/
116 @ The status at each level is indicated by printing two lines
, where the first
117 line indicates what was read so far and the second line shows what remains
118 to be read. The context is cropped
, if necessary
, so that the first line
119 contains at most |half_error_line| characters
, and the second contains
120 at most |error_line|. Non-current input levels whose |token_type| is
121 `|backed_up|' are shown only if they have not been fully read.
124 static void print_token_list_type
(int t
)
128 tprint_nl
("<argument> ");
132 tprint_nl
("<template> ");
136 tprint_nl
("<recently read> ");
138 tprint_nl
("<to be read again> ");
141 tprint_nl
("<inserted text> ");
148 tprint_nl
("<output> ");
151 tprint_nl
("<everypar> ");
153 case every_math_text
:
154 tprint_nl
("<everymath> ");
156 case every_display_text
:
157 tprint_nl
("<everydisplay> ");
159 case every_hbox_text
:
160 tprint_nl
("<everyhbox> ");
162 case every_vbox_text
:
163 tprint_nl
("<everyvbox> ");
166 tprint_nl
("<everyjob> ");
169 tprint_nl
("<everycr> ");
172 tprint_nl
("<mark> ");
175 tprint_nl
("<everyeof> ");
178 tprint_nl
("<write> ");
182 /* this should never happen
*/
187 @ Here it is necessary to explain a little trick. We don't want to store a long
188 string that corresponds to a token list
, because that string might take up
189 lots of memory
; and we are printing during a time when an error message is
190 being given
, so we dare not do anything that might overflow one of \TeX's
191 tables. So `pseudoprinting' is the answer
: We enter a mode of printing
192 that stores characters into a buffer of length |error_line|
, where character
193 $k
+1$ is placed into \hbox
{|trick_buf
[k mod error_line
]|
} if
194 |k
<trick_count|
, otherwise character |k| is dropped. Initially we set
195 |tally
:=0| and |trick_count
:=1000000|
; then when we reach the
196 point where transition from line
1 to line
2 should occur
, we
197 set |first_count
:=tally| and |trick_count
:=@tmax@
>(error_line
,
198 tally
+1+error_line-half_error_line
)|. At the end of the
199 pseudoprinting
, the values of |first_count|
, |tally|
, and
200 |trick_count| give us all the information we need to print the two lines
,
201 and all of the necessary text is in |trick_buf|.
203 Namely
, let |l| be the length of the descriptive information that appears
204 on the first line. The length of the context information gathered for that
205 line is |k
=first_count|
, and the length of the context information
206 gathered for line~
2 is $m
=\min
(|tally|
, |trick_count|
)-k$. If |l
+k
<=h|
,
207 where |h
=half_error_line|
, we print |trick_buf
[0..k-1
]| after the
208 descriptive information on line~
1, and set |n
:=l
+k|
; here |n| is the
209 length of line~
1. If $l
+k
>h$
, some cropping is necessary
, so we set |n
:=h|
210 and print `\.
{...
}' followed by
211 $$\hbox
{|trick_buf
[(l
+k-h
+3)..k-1
]|
,}$$
212 where subscripts of |trick_buf| are circular modulo |error_line|. The
213 second line consists of |n|~spaces followed by |trick_buf
[k..
(k
+m-1
)]|
,
214 unless |n
+m
>error_line|
; in the latter case
, further cropping is done.
215 This is easier to program than to explain.
217 @ The following code sets up the print routines so that they will gather
218 the desired information.
221 void set_trick_count
(void
)
224 trick_count
= tally
+ 1 + error_line
- half_error_line
;
225 if
(trick_count
< error_line
)
226 trick_count
= error_line
;
229 #define begin_pseudoprint
() do
{ \
233 trick_count
=1000000; \
236 #define PSEUDO_PRINT_THE_LINE
() do
{ \
237 begin_pseudoprint
(); \
238 if
(buffer
[ilimit
]==end_line_char
) \
241 j
=ilimit
+1; /* determine the effective end of the line
*/ \
243 for
(i
=istart
;i
<=j-1
;i
++) { \
246 print_char
(buffer
[i
]); \
252 We don't care too much if we stay a bit too much below the max error_line
253 even if we have more room on the line. If length is really an issue then
254 any length is. After all one can set the length larger.
257 #define print_valid_utf8
(q
) do
{ \
258 c
= (int
)trick_buf
[q
% error_line
]; \
261 } else if
(c
< 194) { \
263 } else if
(c
< 224) { \
265 print_char
(trick_buf
[(q
+1) % error_line
]); \
266 } else if
(c
< 240) { \
268 print_char
(trick_buf
[(q
+1) % error_line
]); \
269 print_char
(trick_buf
[(q
+2) % error_line
]); \
270 } else if
(c
< 245) { \
272 print_char
(trick_buf
[(q
+1) % error_line
]); \
273 print_char
(trick_buf
[(q
+2) % error_line
]); \
274 print_char
(trick_buf
[(q
+3) % error_line
]); \
281 void show_context
(void
)
282 { /* prints where the scanner is
*/
283 int old_setting
; /* saved |selector| setting
*/
284 int nn
= -1; /* number of contexts shown so far
, less one
*/
285 boolean bottom_line
= false
; /* have we reached the final context to be shown?
*/
286 int i
; /* index into |buffer|
*/
287 int j
; /* end of current line in |buffer|
*/
288 int l
; /* length of descriptive information on line
1 */
289 int m
; /* context information gathered for line
2 */
290 int n
; /* length of line
1 */
291 int p
; /* starting or ending place in |trick_buf|
*/
292 int q
; /* temporary index
*/
293 int c
; /* used in sanitizer
*/
294 base_ptr
= input_ptr
;
295 input_stack
[base_ptr
] = cur_input
;
296 /* store current state
*/
298 cur_input
= input_stack
[base_ptr
]; /* enter into the context
*/
299 if
(istate
!= token_list
) {
300 if
((iname
> 21) ||
(base_ptr
== 0))
303 if
((base_ptr
== input_ptr
) || bottom_line ||
(nn
< error_context_lines
)) {
304 /* Display the current context
*/
305 if
((base_ptr
== input_ptr
) ||
(istate
!= token_list
) ||
(token_type
!= backed_up
) ||
(iloc
!= null
)) {
306 /* we omit backed-up token lists that have already been read
*/
307 tally
= 0; /* get ready to count characters
*/
308 old_setting
= selector
;
309 if
(istate
!= token_list
) {
311 Print location of current line
313 This routine should be changed
, if necessary
, to give the best possible
314 indication of where the current line resides in the input file. For example
,
315 on some systems it is best to print both a page and line number.
318 if
(terminal_input
) {
322 tprint_nl
("<insert> ");
328 print_int
(iname
- 1);
333 if
(iindex
== in_open
) {
335 } else
{ /* input from a pseudo file
*/
336 print_int
(line_stack
[iindex
+ 1]);
340 PSEUDO_PRINT_THE_LINE
();
342 print_token_list_type
(token_type
);
345 if
(token_type
< macro
)
346 show_token_list
(istart
, iloc
, 100000);
348 show_token_list
(token_link
(istart
), iloc
, 100000); /* avoid reference count
*/
350 /* stop pseudoprinting
*/
351 selector
= old_setting
;
352 /* Print two lines using the tricky pseudoprinted information
*/
353 if
(trick_count
== 1000000)
355 /* |set_trick_count| must be performed
*/
356 if
(tally
< trick_count
)
357 m
= tally
- first_count
;
359 m
= trick_count
- first_count
; /* context on line
2 */
360 if
(l
+ first_count
<= half_error_line
) {
365 p
= l
+ first_count
- half_error_line
+ 3;
368 for
(q
= p
; q
<= first_count
- 1; q
++)
371 /* print |n| spaces to begin line~
2 */
372 for
(q
= 1; q
<= n
; q
++)
374 if
(m
+ n
<= error_line
)
377 p
= first_count
+ (error_line
- n
- 3);
378 for
(q
= first_count
; q
<= p
- 1; q
++)
380 if
(m
+ n
> error_line
)
384 } else if
(nn
== error_context_lines
) {
387 /* omitted if |error_context_lines
<0|
*/
393 /* restore original state
*/
394 cur_input
= input_stack
[input_ptr
];
397 @ The following subroutines change the input status in commonly needed ways.
399 First comes |push_input|
, which stores the current state and creates a
400 new level
(having
, initially
, the same properties as the old
).
404 /* enter a new input level
, save the old
*/
406 # define pop_input
() \
407 cur_input
=input_stack
[--input_ptr
]
409 # define push_input
() \
410 if
(input_ptr
> max_in_stack
) { \
411 max_in_stack
= input_ptr
; \
412 if
(input_ptr
== stack_size
) \
413 overflow
("input stack size", (unsigned
) stack_size
); \
415 input_stack
[input_ptr
] = cur_input
; \
420 Here is a procedure that starts a new level of token-list input
, given
421 a token list |p| and its type |t|. If |t
=macro|
, the calling routine should
422 set |name| and |loc|.
425 void begin_token_list
(halfword p
, quarterword t
)
430 token_type
= (unsigned char
) t
;
432 /* the token list starts with a reference count
*/
435 param_start
= param_ptr
;
437 iloc
= token_link
(p
);
438 if
(int_par
(tracing_macros_code
) > 1) {
443 else if
(t
== write_text
)
446 print_cmd_chr
(assign_toks_cmd
,
447 t
- output_text
+ output_routine_loc
);
450 end_diagnostic
(false
);
458 @ When a token list has been fully scanned
, the following computations
459 should be done as we leave that level of input. The |token_type| tends
460 to be equal to either |backed_up| or |inserted| about
2/3 of the time.
464 void end_token_list
(void
)
466 /* leave a token-list input level
*/
467 if
(token_type
>= backed_up
) {
468 /* token list to be deleted
*/
469 if
(token_type
<= inserted
) {
472 /* update reference count
*/
473 delete_token_ref
(istart
);
474 if
(token_type
== macro
) {
475 /* parameters must be flushed
*/
476 while
(param_ptr
> param_start
) {
478 flush_list
(param_stack
[param_ptr
]);
482 } else if
(token_type
== u_template
) {
483 if
(align_state
> 500000)
486 fatal_error
("(interwoven alignment preambles are not allowed)");
492 @ Sometimes \TeX\ has read too far and wants to ``unscan'' what it has
493 seen. The |back_input| procedure takes care of this by putting the token
494 just scanned back into the input stream
, ready to be read again. This
495 procedure can be used only if |cur_tok| represents the token to be
496 replaced. Some applications of \TeX\ use this procedure a lot
,
497 so it has been slightly optimized for speed.
502 /* undoes one token of input
*/
504 void back_input
(void
)
506 halfword p
; /* a token list of length one
*/
507 while
((istate
== token_list
) && (iloc == null) && (token_type != v_template)) {
508 /* conserve stack space
*/
512 set_token_info
(p
, cur_tok
);
513 if
(cur_tok
< right_brace_limit
) {
514 if
(cur_tok
< left_brace_limit
)
522 token_type
= backed_up
;
523 iloc
= p
; /* that was |back_list
(p
)|
, without procedure overhead
*/
526 @ Insert token |p| into \TeX's input
529 void reinsert_token
(boolean a
, halfword pp
)
537 set_token_info
(p
, cur_tok
);
538 set_token_link
(p
, iloc
);
541 if
(cur_tok
< right_brace_limit
) {
542 if
(cur_tok
< left_brace_limit
)
553 @ The |begin_file_reading| procedure starts a new level of input for lines
554 of characters to be read from a file
, or as an insertion from the
555 terminal. It does not take care of opening the file
, nor does it set |loc|
556 or |limit| or |line|.
557 @^system dependencies@
>
560 void begin_file_reading
(void
)
562 if
(in_open
== max_in_open
)
563 overflow
("text input levels", (unsigned
) max_in_open
);
564 if
(first
== buf_size
)
565 check_buffer_overflow
(first
);
568 iindex
= (unsigned char
) in_open
;
569 source_filename_stack
[iindex
] = 0;
570 full_source_filename_stack
[iindex
] = NULL;
571 eof_seen
[iindex
] = false
;
572 grp_stack
[iindex
] = cur_boundary
;
573 if_stack
[iindex
] = cond_ptr
;
574 line_stack
[iindex
] = line
;
577 iname
= 0; /* |terminal_input| is now |true|
*/
578 line_catcode_table
= DEFAULT_CAT_TABLE
;
579 line_partial
= false
;
580 /* Prepare terminal input
{\sl Sync\TeX
} information
*/
584 @ Conversely
, the variables must be downdated when such a level of input
588 void end_file_reading
(void
)
591 line
= line_stack
[iindex
];
592 if
((iname
>= 18) && (iname <= 20))
594 else if
(iname
== 21)
595 luacstring_close
(iindex
);
597 lua_a_close_in
(cur_file
, 0); /* forget it
*/
602 @ In order to keep the stack from overflowing during a long sequence of
603 inserted `\.
{\\show
}' commands
, the following routine removes completed
604 error-inserted lines from memory.
607 void clear_for_error_prompt
(void
)
609 while
((istate
!= token_list
) && terminal_input
610 && (input_ptr > 0) && (iloc > ilimit))
616 @ To get \TeX's whole input mechanism going
, we perform the following actions.
619 void initialize_inputstack
(void
)
623 source_filename_stack
[0] = 0;
625 full_source_filename_stack
[0] = NULL;
638 } while
(first
!= 0);
639 scanner_status
= normal
;
640 warning_index
= null
;
650 line_catcode_table
= DEFAULT_CAT_TABLE
;
651 line_partial
= false
;
652 align_state
= 1000000;
653 if
(!init_terminal
())
654 exit
(EXIT_FAILURE
); /* |goto final_end|
; */
656 first
= last
+ 1; /* |init_terminal| has set |loc| and |last|
*/
659 @ The global variable |pseudo_files| is used to maintain a stack of
660 pseudo files. The |pseudo_lines| field of each pseudo file points to
661 a linked list of variable size nodes representing lines not yet
662 processed
: the |subtype| field contains the size of this node
,
663 all the following words contain ASCII codes.
667 hh
: todo
: if this is really critical code
(which it isn't
) then we can
668 consider a c stack and store a pointer to a line in the line node instead
669 which saves splitting here and reconstructing later.
675 halfword pseudo_files
; /* stack of pseudo files
*/
677 static halfword string_to_pseudo
(str_number str
, int nl
)
679 halfword i
, r
, q
= null
;
683 halfword h
= new_node
(pseudo_file_node
, 0);
684 unsigned char
*s
= str_string
(str
);
685 len
= (unsigned
) str_length
(str
);
688 unsigned m
= l
; /* start of current line
*/
689 while
((l
< len
) && (s[l] != nl))
691 sz
= (int
) (l
- m
+ 7) / 4;
694 r
= new_node
(pseudo_line_node
, sz
);
701 varmem
[++i
].qqqq
= w
;
703 w.b0
= (quarterword
) (l
> m ? s
[m
++] : ' '
);
704 w.b1
= (quarterword
) (l
> m ? s
[m
++] : ' '
);
705 w.b2
= (quarterword
) (l
> m ? s
[m
++] : ' '
);
706 w.b3
= (quarterword
) (l
> m ? s
[m
] : ' '
);
707 varmem
[++i
].qqqq
= w
;
711 vlink
(q
) = r
; /* no prev node here so no couple_nodes
!*/
720 @ The |pseudo_start| procedure initiates reading from a pseudo file.
723 void pseudo_from_string
(void
)
725 str_number s
; /* string to be converted into a pseudo file
*/
726 halfword p
; /* for list construction
*/
728 /* Convert string |s| into a new pseudo file
*/
729 p
= string_to_pseudo
(s
, int_par
(new_line_char_code
));
730 vlink
(p
) = pseudo_files
;
733 /* Initiate input from new pseudo file
*/
734 begin_file_reading
(); /* set up |cur_file| and new level of input
*/
737 iloc
= ilimit
+ 1; /* force line read
*/
738 if
(int_par
(tracing_scan_tokens_code
) > 0) {
739 if
(term_offset
> max_print_line
- 3)
741 else if
((term_offset
> 0) ||
(file_offset
> 0))
750 /* Prepare pseudo file
{\sl Sync\TeX
} information
*/
754 void pseudo_start
(void
)
758 old_setting
= selector
;
759 selector
= new_string
;
760 token_show
(temp_token_head
);
761 selector
= old_setting
;
762 flush_list
(token_link
(temp_token_head
));
764 pseudo_from_string
();
768 void lua_string_start
(void
)
770 begin_file_reading
(); /* set up |cur_file| and new level of input
*/
773 iloc
= ilimit
+ 1; /* force line read
*/
775 luacstring_start
(iindex
);
778 @ Here we read a line from the current pseudo file into |buffer|.
781 /* inputs the next line or returns |false|
*/
783 boolean pseudo_input
(void
)
785 halfword p
; /* current line from pseudo file
*/
786 int sz
; /* size of node |p|
*/
787 four_quarters w
; /* four ASCII codes
*/
788 halfword r
; /* loop index
*/
789 last
= first
; /* cf.\ Matthew
19\thinspace
:\thinspace30
*/
790 p
= pseudo_lines
(pseudo_files
);
794 pseudo_lines
(pseudo_files
) = vlink
(p
);
796 if
(4 * sz
- 3 >= buf_size
- last
)
797 check_buffer_overflow
(last
+ 4 * sz
);
799 for
(r
= p
+ 1; r
<= p
+ sz
- 1; r
++) {
801 buffer
[last
] = (packed_ASCII_code
) w.b0
;
802 buffer
[last
+ 1] = (packed_ASCII_code
) w.b1
;
803 buffer
[last
+ 2] = (packed_ASCII_code
) w.b2
;
804 buffer
[last
+ 3] = (packed_ASCII_code
) w.b3
;
807 if
(last
>= max_buf_stack
)
808 max_buf_stack
= last
+ 1;
809 while
((last
> first
) && (buffer[last - 1] == ' '))
816 @ When we are done with a pseudo file we `close' it.
819 /* close the top level pseudo file
*/
821 void pseudo_close
(void
)
824 p
= vlink
(pseudo_files
);
825 flush_node
(pseudo_files
);