3 % Copyright
2009-2010 Taco Hoekwater
<taco@@luatex.org
>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software
; you can redistribute it and
/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation
; either version
2 of the License
, or
(at your
10 % option
) any later version.
12 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
13 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
24 halfword last_cs_name
= null_cs
;
26 /* |eqtb
[p
]| has just been restored or retained
*/
28 static void diagnostic_trace
(halfword p
, const char
*s
)
36 end_diagnostic
(false
);
40 #define par_shape_ptr equiv
(par_shape_loc
)
42 void show_eqtb_meaning
(halfword n
); /* forward
*/
44 @ Now that we have studied the data structures for \TeX's semantic routines
,
45 we ought to consider the data structures used by its syntactic routines. In
46 other words
, our next concern will be
47 the tables that \TeX\ looks at when it is scanning
48 what the user has written.
50 The biggest and most important such table is called |eqtb|. It holds the
51 current ``equivalents'' of things
; i.e.
, it explains what things mean
52 or what their current values are
, for all quantities that are subject to
53 the nesting structure provided by \TeX's grouping mechanism. There are six
56 \yskip\hang
1) |eqtb
[null_cs
]| holds the current equivalent of the
57 zero-length control sequence.
59 \yskip\hang
2) |eqtb
[hash_base..
(glue_base-1
)]| holds the current
60 equivalents of single- and multiletter control sequences.
62 \yskip\hang
3) |eqtb
[glue_base..
(local_base-1
)]| holds the current
63 equivalents of glue parameters like the current baselineskip.
65 \yskip\hang
4) |eqtb
[local_base..
(int_base-1
)]| holds the current
66 equivalents of local halfword quantities like the current box registers
,
67 the current ``catcodes
,'' the current font
, and a pointer to the current
70 \yskip\hang
5) |eqtb
[int_base..
(dimen_base-1
)]| holds the current
71 equivalents of fullword integer parameters like the current hyphenation
74 \yskip\hang
6) |eqtb
[dimen_base..eqtb_size
]| holds the current equivalents
75 of fullword dimension parameters like the current hsize or amount of
78 \yskip\noindent Note that
, for example
, the current amount of
79 baselineskip glue is determined by the setting of a particular location
80 in region~
3 of |eqtb|
, while the current meaning of the control sequence
81 `\.
{\\baselineskip
}'
(which might have been changed by \.
{\\def
} or
82 \.
{\\let
}) appears in region~
2.
84 @ The last two regions of |eqtb| have fullword values instead of the
85 three fields |eq_level|
, |eq_type|
, and |equiv|. An |eq_type| is unnecessary
,
86 but \TeX\ needs to store the |eq_level| information in another array
91 halfword eqtb_top
; /* maximum of the |eqtb|
*/
92 quarterword xeq_level
[(eqtb_size
+ 1)];
95 void initialize_equivalents
(void
)
98 for
(k
= int_base
; k
<= eqtb_size
; k
++)
99 xeq_level
[k
] = level_one
;
102 @ The nested structure provided by `$\.
{\char'
173}\ldots\.
{\char'
175}$' groups
103 in \TeX\ means that |eqtb| entries valid in outer groups should be saved
104 and restored later if they are overridden inside the braces. When a new |eqtb|
105 value is being assigned
, the program therefore checks to see if the previous
106 entry belongs to an outer level. In such a case
, the old value is placed
107 on the |save_stack| just before the new value enters |eqtb|. At the
108 end of a grouping level
, i.e.
, when the right brace is sensed
, the
109 |save_stack| is used to restore the outer values
, and the inner ones are
112 Entries on the |save_stack| are of type |save_record|. The top item on
113 this stack is |save_stack
[p
]|
, where |p
=save_ptr-1|
; it contains three
114 fields called |save_type|
, |save_level|
, and |save_value|
, and it is
115 interpreted in one of four ways
:
117 \yskip\hang
1) If |save_type
(p
)=restore_old_value|
, then
118 |save_value
(p
)| is a location in |eqtb| whose current value should
119 be destroyed at the end of the current group and replaced by |save_word
(p-1
)|
120 (|save_type
(p-1
)==saved_eqtb|
).
121 Furthermore if |save_value
(p
)>=int_base|
, then |save_level
(p
)| should
122 replace the corresponding entry in |xeq_level|
(if |save_value
(p
)<int_base|
,
123 then the level is part of |save_word
(p-1
)|
).
125 \yskip\hang
2) If |save_type
(p
)=restore_zero|
, then |save_value
(p
)|
126 is a location in |eqtb| whose current value should be destroyed at the end
127 of the current group
, when it should be
128 replaced by the current value of |eqtb
[undefined_control_sequence
]|.
130 \yskip\hang
3) If |save_type
(p
)=insert_token|
, then |save_value
(p
)|
131 is a token that should be inserted into \TeX's input when the current
134 \yskip\hang
4) If |save_type
(p
)=level_boundary|
, then |save_level
(p
)|
135 is a code explaining what kind of group we were previously in
, and
136 |save_value
(p
)| points to the level boundary word at the bottom of
137 the entries for that group. Furthermore
, |save_value
(p-1
)| contains the
138 source line number at which the current level of grouping was entered
,
139 this field has itself a type
: |save_type
(p-1
)==saved_line|.
141 Besides this `official' use
, various subroutines push temporary
142 variables on the save stack when it is handy to do so. These all have
143 an explicit |save_type|
, and they are
:
145 |saved_adjust| signifies an adjustment is beging scanned
,
146 |saved_insert| an insertion is being scanned
,
147 |saved_disc| the \.
{\\discretionary
} sublist we are working on right now
,
148 |saved_boxtype| whether a \.
{\\localbox
} is \.
{\\left
} or \.
{\\right
},
149 |saved_textdir| a text direction to be restored
,
150 |saved_eqno| diffentiates between \.
{\\eqno
} and \.
{\\leqno
},
151 |saved_choices| the \.
{\\mathchoices
} sublist we are working on right now
,
152 |saved_math| and interrupted math list
,
153 |saved_boxcontext| the box context value
,
154 |saved_boxspec| the box \.
{to
} or \.
{spread
} specification
,
155 |saved_boxdir| the box \.
{dir
} specification
,
156 |saved_boxattr| the box \.
{attr
} specification
,
157 |saved_boxpack| the box \.
{pack
} specification.
159 @ The global variable |cur_group| keeps track of what sort of group we are
160 currently in. Another global variable
, |cur_boundary|
, points to the
161 topmost |level_boundary| word. And |cur_level| is the current depth of
162 nesting. The routines are designed to preserve the condition that no entry
163 in the |save_stack| or in |eqtb| ever has a level greater than |cur_level|.
166 save_record
*save_stack
;
167 int save_ptr
; /* first unused entry on |save_stack|
*/
168 int max_save_stack
; /* maximum usage of save stack
*/
169 quarterword cur_level
= level_one
; /* current nesting level for groups
*/
170 group_code cur_group
= bottom_level
; /* current group type
*/
171 int cur_boundary
; /* where the current level begins
*/
173 @ At this time it might be a good idea for the reader to review the introduction
174 to |eqtb| that was given above just before the long lists of parameter names.
175 Recall that the ``outer level'' of the program is |level_one|
, since
176 undefined control sequences are assumed to be ``defined'' at |level_zero|.
180 @ The following macro is used to test if there is room for up to eight more
181 entries on |save_stack|. By making a conservative test like this
, we can
182 get by with testing for overflow in only a few places.
185 #define check_full_save_stack
() do
{ \
186 if
(save_ptr
>max_save_stack
) { \
187 max_save_stack
=save_ptr
; \
188 if
(max_save_stack
>save_size-8
) \
189 overflow
("save size",(unsigned
)save_size
); \
193 @ Procedure |new_save_level| is called when a group begins. The
194 argument is a group identification code like `|hbox_group|'. After
195 calling this routine
, it is safe to put six more entries on |save_stack|.
197 In some cases integer-valued items are placed onto the
198 |save_stack| just below a |level_boundary| word
, because this is a
199 convenient place to keep information that is supposed to ``pop up'' just
200 when the group has finished.
201 For example
, when `\.
{\\hbox to
100pt
}' is being treated
, the
100pt
202 dimension is stored on |save_stack| just before |new_save_level| is
206 void new_save_level
(group_code c
)
207 { /* begin a new level of grouping
*/
208 check_full_save_stack
();
209 set_saved_record
(0, saved_line
, 0, line
);
211 save_type
(save_ptr
) = level_boundary
;
212 save_level
(save_ptr
) = cur_group
;
213 save_value
(save_ptr
) = cur_boundary
;
214 if
(cur_level
== max_quarterword
)
215 overflow
("grouping levels", max_quarterword
- min_quarterword
);
216 /* quit if |
(cur_level
+1)| is too big to be stored in |eqtb|
*/
217 cur_boundary
= save_ptr
;
219 if
(int_par
(tracing_groups_code
) > 0)
226 static const char
*save_stack_type
(int v
)
229 switch
(save_type
(v
)) {
230 case restore_old_value
: s
= "restore_old_value"; break
;
231 case restore_zero
: s
= "restore_zero"; break
;
232 case insert_token
: s
= "insert_token"; break
;
233 case level_boundary
: s
= "level_boundary"; break
;
234 case saved_line
: s
= "saved_line"; break
;
235 case saved_adjust
: s
= "saved_adjust"; break
;
236 case saved_insert
: s
= "saved_insert"; break
;
237 case saved_disc
: s
= "saved_disc"; break
;
238 case saved_boxtype
: s
= "saved_boxtype"; break
;
239 case saved_textdir
: s
= "saved_textdir"; break
;
240 case saved_eqno
: s
= "saved_eqno"; break
;
241 case saved_choices
: s
= "saved_choices"; break
;
242 case saved_math
: s
= "saved_math"; break
;
243 case saved_boxcontext
: s
= "saved_boxcontext"; break
;
244 case saved_boxspec
: s
= "saved_boxspec"; break
;
245 case saved_boxdir
: s
= "saved_boxdir"; break
;
246 case saved_boxattr
: s
= "saved_boxattr"; break
;
247 case saved_boxpack
: s
= "saved_boxpack"; break
;
248 case saved_eqtb
: s
= "saved_eqtb"; break
;
255 void print_save_stack
(void
)
259 selector
= term_and_log
;
261 for
(i
= (save_ptr
- 1); i
>= 0; i--
) {
262 tprint
("save_stack[");
269 tprint
(save_stack_type
(i
));
270 switch
(save_type
(i
)) {
271 case restore_old_value
:
273 show_eqtb_meaning
(save_value
(i
));
275 if
(save_value
(i
) >= int_base
) {
276 print_int
(save_word
(i
- 1).cint
);
278 print_int
(eq_type_field
(save_word
(i
- 1)));
279 print_char
('
,'
); /* |print_int
(eq_level_field
(save_word
(i-1
)));|
*/
280 print_int
(equiv_field
(save_word
(i
- 1)));
286 show_eqtb_meaning
(save_value
(i
));
291 halfword p
= get_avail
();
292 set_token_info
(p
, save_value
(i
));
293 show_token_list
(p
, null
, 1);
298 tprint
(", old group=");
299 print_int
(save_level
(i
));
300 tprint
(", boundary = ");
301 print_int
(save_value
(i
));
303 print_int
(save_value
(i
- 1));
308 print_int
(save_level
(i
)); /* vadjust vs vadjust pre
*/
312 print_int
(save_value
(i
)); /* insert number
*/
314 case saved_boxtype
: /* \.
{\\localleftbox
} vs \.
{\\localrightbox
} */
316 print_int
(save_value
(i
));
318 case saved_eqno
: /* \.
{\\eqno
} vs \.
{\\leqno
} */
320 print_int
(save_value
(i
));
325 print_int
(save_value
(i
));
328 tprint
(", listptr=");
329 print_int
(save_value
(i
));
331 case saved_boxcontext
:
333 print_int
(save_value
(i
));
337 print_int
(save_level
(i
));
339 print_int
(save_value
(i
));
344 print_dir
(dir_dir
(save_value
(i
)));
349 print_int
(save_value
(i
));
359 end_diagnostic
(true
);
362 @ The \.
{\\showgroups
} command displays all currently active grouping
365 @ The modifications of \TeX\ required for the display produced by the
366 |show_save_groups| procedure were first discussed by Donald~E. Knuth in
367 {\sl TUGboat\
/} {\bf
11}, 165--170 and
499--511, 1990.
368 @^Knuth
, Donald Ervin@
>
370 In order to understand a group type we also have to know its mode.
371 Since unrestricted horizontal modes are not associated with grouping
,
372 they are skipped when traversing the semantic nest.
375 void show_save_groups
(void
)
377 int p
= nest_ptr
; /* index into |nest|
*/
379 save_pointer v
= save_ptr
; /* saved value of |save_ptr|
*/
380 quarterword l
= cur_level
; /* saved value of |cur_level|
*/
381 group_code c
= cur_group
; /* saved value of |cur_group|
*/
382 int a
= 1; /* to keep track of alignments
*/
385 const char
*s
= NULL;
386 save_ptr
= cur_boundary
;
393 if
(cur_group
== bottom_level
)
396 m
= nest
[p
].mode_field
;
401 } while
(m
== hmode
);
409 case adjusted_hbox_group
:
428 tprint
("align entry");
440 tprint_esc
("noalign");
444 tprint_esc
("output");
451 tprint_esc
("discretionary");
452 for
(i
= 1; i
< 3; i
++)
453 if
(i
<= saved_value
(-2))
457 case math_choice_group
:
458 tprint_esc
("mathchoice");
459 for
(i
= 1; i
< 4; i
++)
460 if
(i
<= saved_value
(-3)) /* different offset because |
-2==saved_textdir|
*/
465 if
(saved_type
(-1) == saved_adjust
) {
466 tprint_esc
("vadjust");
467 if
(saved_level
(-1) != 0)
470 tprint_esc
("insert");
471 print_int
(saved_value
(-1));
479 case semi_simple_group
:
481 tprint_esc
("begingroup");
484 case math_shift_group
:
487 } else if
(nest
[p
].mode_field
== mmode
) {
488 print_cmd_chr
(eq_no_cmd
, saved_value
(-2));
494 case math_left_group
:
495 if
(subtype
(nest
[p
+ 1].eTeX_aux_field
) == left_noad_side
)
498 tprint_esc
("middle");
502 confusion
("showgroups");
505 /* Show the box context
*/
509 if
(abs
(nest
[p
].mode_field
) == vmode
)
517 print_scaled
(abs
(i
));
519 } else if
(i
< ship_out_flag
) {
520 if
(i
>= global_box_flag
) {
521 tprint_esc
("global");
522 i
= i
- (global_box_flag
- box_flag
);
524 tprint_esc
("setbox");
525 print_int
(i
- box_flag
);
528 print_cmd_chr
(leader_ship_cmd
, i
- (leader_flag
- a_leaders
));
533 /* Show the box packaging info
*/
535 /* offsets may vary
*/
537 while
(saved_type
(ii
) != saved_boxspec
)
539 if
(saved_value
(ii
) != 0) {
541 if
(saved_level
(ii
) == exactly
)
545 print_scaled
(saved_value
(ii
));
554 cur_group
= save_level
(save_ptr
);
555 save_ptr
= save_value
(save_ptr
);
563 @ Just before an entry of |eqtb| is changed
, the following procedure should
564 be called to update the other data structures properly. It is important
565 to keep in mind that reference counts in |mem| include references from
566 within |save_stack|
, so these counts must be handled carefully.
570 /* we don't need to destroy when an assignment has the same node
*/
572 void eq_destroy
(memory_word w
)
573 { /* gets ready to forget |w|
*/
574 halfword q
; /* |equiv| field of |w|
*/
575 switch
(eq_type_field
(w
)) {
579 case long_outer_call_cmd
:
580 delete_token_ref
(equiv_field
(w
));
583 flush_node
(equiv_field
(w
));
586 q
= equiv_field
(w
); /* we need to free a \.
{\\parshape
} block
*/
589 break
; /* such a block is |
2n
+1| words long
, where |n
=vinfo
(q
)|
*/
591 flush_node_list
(equiv_field
(w
));
598 @ To save a value of |eqtb
[p
]| that was established at level |l|
, we
599 can use the following subroutine.
602 void eq_save
(halfword p
, quarterword l
)
603 { /* saves |eqtb
[p
]|
*/
604 check_full_save_stack
();
605 if
(l
== level_zero
) {
606 save_type
(save_ptr
) = restore_zero
;
608 save_word
(save_ptr
) = eqtb
[p
];
609 save_type
(save_ptr
) = saved_eqtb
;
611 save_type
(save_ptr
) = restore_old_value
;
613 save_level
(save_ptr
) = l
;
614 save_value
(save_ptr
) = p
;
618 @ The procedure |eq_define| defines an |eqtb| entry having specified
619 |eq_type| and |equiv| fields
, and saves the former value if appropriate.
620 This procedure is used only for entries in the first four regions of |eqtb|
,
621 i.e.
, only for entries that have |eq_type| and |equiv| fields.
622 After calling this routine
, it is safe to put four more entries on
623 |save_stack|
, provided that there was room for four more entries before
624 the call
, since |eq_save| makes the necessary test.
626 @ new data for |eqtb|
628 void eq_define
(halfword p
, quarterword t
, halfword e
)
630 boolean trace
= int_par
(tracing_assigns_code
) > 0;
631 if
((eq_type
(p
) == t
) && (equiv(p) == e)) {
633 diagnostic_trace
(p
, "reassigning");
638 diagnostic_trace
(p
, "changing");
639 if
(eq_level
(p
) == cur_level
)
641 else if
(cur_level
> level_one
)
642 eq_save
(p
, eq_level
(p
));
643 set_eq_level
(p
, cur_level
);
647 diagnostic_trace
(p
, "into");
650 @ The counterpart of |eq_define| for the remaining
(fullword
) positions in
651 |eqtb| is called |eq_word_define|. Since |xeq_level
[p
]>=level_one| for all
652 |p|
, a `|restore_zero|' will never be used in this case.
655 void eq_word_define
(halfword p
, int w
)
657 boolean trace
= int_par
(tracing_assigns_code
) > 0;
658 if
(eqtb
[p
].cint
== w
) {
660 diagnostic_trace
(p
, "reassigning");
664 diagnostic_trace
(p
, "changing");
665 if
(xeq_level
[p
] != cur_level
) {
666 eq_save
(p
, xeq_level
[p
]);
667 xeq_level
[p
] = cur_level
;
671 diagnostic_trace
(p
, "into");
675 @ The |eq_define| and |eq_word_define| routines take care of local definitions.
676 @^global definitions@
>
677 Global definitions are done in almost the same way
, but there is no need
678 to save old values
, and the new value is associated with |level_one|.
681 void geq_define
(halfword p
, quarterword t
, halfword e
)
682 { /* global |eq_define|
*/
683 boolean trace
= int_par
(tracing_assigns_code
) > 0;
685 diagnostic_trace
(p
, "globally changing");
687 set_eq_level
(p
, level_one
);
691 diagnostic_trace
(p
, "into");
694 void geq_word_define
(halfword p
, int w
)
695 { /* global |eq_word_define|
*/
696 boolean trace
= int_par
(tracing_assigns_code
) > 0;
698 diagnostic_trace
(p
, "globally changing");
700 xeq_level
[p
] = level_one
;
702 diagnostic_trace
(p
, "into");
705 @ Subroutine |save_for_after| puts a token on the stack for save-keeping.
708 void save_for_after
(halfword t
)
710 if
(cur_level
> level_one
) {
711 check_full_save_stack
();
712 save_type
(save_ptr
) = insert_token
;
713 save_level
(save_ptr
) = level_zero
;
714 save_value
(save_ptr
) = t
;
719 @ The |unsave| routine goes the other way
, taking items off of |save_stack|.
720 This routine takes care of restoration when a level ends
; everything
721 belonging to the topmost group is cleared off of the save stack.
725 { /* pops the top level off the save stack
*/
726 halfword p
; /* position to be restored
*/
727 quarterword l
= level_one
; /* saved level
, if in fullword regions of |eqtb|
*/
728 boolean a
= false
; /* have we already processed an \.
{\\aftergroup
} ?
*/
729 unsave_math_codes
(cur_level
);
730 unsave_cat_codes
(int_par
(cat_code_table_code
), cur_level
);
731 unsave_text_codes
(cur_level
);
732 unsave_math_data
(cur_level
);
733 if
(cur_level
> level_one
) {
734 boolean trace
= int_par
(tracing_restores_code
) > 0;
736 /* Clear off top level from |save_stack|
*/
739 if
(save_type
(save_ptr
) == level_boundary
)
741 p
= save_value
(save_ptr
);
742 if
(save_type
(save_ptr
) == insert_token
) {
743 reinsert_token
(a
, p
);
744 a
= true
; /* always ... always etex now
*/
746 if
(save_type
(save_ptr
) == restore_old_value
) {
747 l
= save_level
(save_ptr
);
750 save_word
(save_ptr
) = eqtb
[undefined_control_sequence
];
752 /* Store |save_stack
[save_ptr
]| in |eqtb
[p
]|
, unless
753 |eqtb
[p
]| holds a global value
*/
754 /* A global definition
, which sets the level to |level_one|
,
755 will not be undone by |unsave|. If at least one global definition of
756 |eqtb
[p
]| has been carried out within the group that just ended
, the
757 last such definition will therefore survive.
759 if
(p
< int_base || p
> eqtb_size
) {
760 if
(eq_level
(p
) == level_one
) {
761 eq_destroy
(save_word
(save_ptr
)); /* destroy the saved value
*/
763 diagnostic_trace
(p
, "retaining");
765 eq_destroy
(eqtb
[p
]); /* destroy the current value
*/
766 eqtb
[p
] = save_word
(save_ptr
); /* restore the saved value
*/
768 diagnostic_trace
(p
, "restoring");
770 } else if
(xeq_level
[p
] != level_one
) {
771 eqtb
[p
] = save_word
(save_ptr
);
774 diagnostic_trace
(p
, "restoring");
777 diagnostic_trace
(p
, "retaining");
781 if
(int_par
(tracing_groups_code
) > 0)
783 if
(grp_stack
[in_open
] == cur_boundary
)
784 group_warning
(); /* groups possibly not properly nested with files
*/
785 cur_group
= save_level
(save_ptr
);
786 cur_boundary
= save_value
(save_ptr
);
789 confusion
("curlevel"); /* |unsave| is not used when |cur_group
=bottom_level|
*/
791 attr_list_cache
= cache_disabled
;
794 @ Most of the parameters kept in |eqtb| can be changed freely
, but there's
795 an exception
: The magnification should not be used with two different
796 values during any \TeX\ job
, since a single magnification is applied to an
797 entire run. The global variable |mag_set| is set to the current magnification
798 whenever it becomes necessary to ``freeze'' it at a particular value.
801 int mag_set
; /* if nonzero
, this magnification should be used henceforth
*/
803 @ The |prepare_mag| subroutine is called whenever \TeX\ wants to use |mag|
807 #define mag int_par
(mag_code
)
809 void prepare_mag
(void
)
811 if
((mag_set
> 0) && (mag != mag_set)) {
812 print_err
("Incompatible magnification (");
815 tprint_nl
(" the previous value will be retained");
816 help2
("I can handle only one magnification ratio per job. So I've",
817 "reverted to the magnification you used earlier on this run.");
819 geq_word_define
(int_base
+ mag_code
, mag_set
); /* |mag
:=mag_set|
*/
821 if
((mag
<= 0) ||
(mag
> 32768)) {
822 print_err
("Illegal magnification has been changed to 1000");
823 help1
("The magnification ratio must be between 1 and 32768.");
825 geq_word_define
(int_base
+ mag_code
, 1000);
827 if
((mag_set
== 0) && (mag != mag_set)) {
829 one_true_inch
= xn_over_d
(one_hundred_inch
, 10, mag
);
831 one_true_inch
= one_inch
;
836 @ Let's pause a moment now and try to look at the Big Picture.
837 The \TeX\ program consists of three main parts
: syntactic routines
,
838 semantic routines
, and output routines. The chief purpose of the
839 syntactic routines is to deliver the user's input to the semantic routines
,
840 one token at a time. The semantic routines act as an interpreter
841 responding to these tokens
, which may be regarded as commands. And the
842 output routines are periodically called on to convert box-and-glue
843 lists into a compact set of instructions that will be sent
844 to a typesetter. We have discussed the basic data structures and utility
845 routines of \TeX
, so we are good and ready to plunge into the real activity by
846 considering the syntactic routines.
848 Our current goal is to come to grips with the |get_next| procedure
,
849 which is the keystone of \TeX's input mechanism. Each call of |get_next|
850 sets the value of three variables |cur_cmd|
, |cur_chr|
, and |cur_cs|
,
851 representing the next input token.
852 $$\vbox
{\halign
{#\hfil\cr
853 \hbox
{|cur_cmd| denotes a command code from the long list of codes
855 \hbox
{|cur_chr| denotes a character code or other modifier of the command
857 \hbox
{|cur_cs| is the |eqtb| location of the current control sequence
,}\cr
858 \hbox
{\qquad if the current token was a control sequence
,
859 otherwise it's zero.
}\cr
}}$$
860 Underlying this external behavior of |get_next| is all the machinery
861 necessary to convert from character files to tokens. At a given time we
862 may be only partially finished with the reading of several files
(for
863 which \.
{\\input
} was specified
), and partially finished with the expansion
864 of some user-defined macros and
/or some macro parameters
, and partially
865 finished with the generation of some text in a template for \.
{\\halign
},
866 and so on. When reading a character file
, special characters must be
867 classified as math delimiters
, etc.
; comments and extra blank spaces must
868 be removed
, paragraphs must be recognized
, and control sequences must be
869 found in the hash table. Furthermore there are occasions in which the
870 scanning routines have looked ahead for a word like `\.
{plus
}' but only
871 part of that word was found
, hence a few characters must be put back
872 into the input and scanned again.
874 To handle these situations
, which might all be present simultaneously
,
875 \TeX\ uses various stacks that hold information about the incomplete
876 activities
, and there is a finite state control for each level of the
877 input mechanism. These stacks record the current state of an implicitly
878 recursive process
, but the |get_next| procedure is not recursive.
879 Therefore it will not be difficult to translate these algorithms into
880 low-level languages that do not support recursion.
883 int cur_cmd
; /* current command set by |get_next|
*/
884 halfword cur_chr
; /* operand of current command
*/
885 halfword cur_cs
; /* control sequence found here
, zero if none found
*/
886 halfword cur_tok
; /* packed representative of |cur_cmd| and |cur_chr|
*/
888 @ Here is a procedure that displays the current command.
891 #define mode cur_list.mode_field
893 void show_cur_cmd_chr
(void
)
895 int n
; /* level of \.
{\\if...\\fi
} nesting
*/
896 int l
; /* line where \.
{\\if
} started
*/
900 if
(mode
!= shown_mode
) {
905 print_cmd_chr
((quarterword
) cur_cmd
, cur_chr
);
906 if
(int_par
(tracing_ifs_code
) > 0) {
907 if
(cur_cmd
>= if_test_cmd
) {
908 if
(cur_cmd
<= fi_or_else_cmd
) {
910 if
(cur_cmd
== fi_or_else_cmd
) {
911 print_cmd_chr
(if_test_cmd
, cur_if
);
932 end_diagnostic
(false
);
935 @ Here is a procedure that displays the contents of |eqtb
[n
]| symbolically.
938 void show_eqtb
(halfword n
)
941 /* this can't happen
*/
943 } else if
((n
< glue_base
) ||
((n
> eqtb_size
) && (n <= eqtb_top))) {
945 Show equivalent |n|
, in region
1 or
2
947 Here is a routine that displays the current meaning of an |eqtb| entry
948 in region
1 or~
2.
(Similar routines for the other regions will appear
954 print_cmd_chr
(eq_type
(n
), equiv
(n
));
955 if
(eq_type
(n
) >= call_cmd
) {
957 show_token_list
(token_link
(equiv
(n
)), null
, 32);
959 } else if
(n
< local_base
) {
961 Show equivalent |n|
, in region
3
963 All glue parameters and registers are initially `\.
{0pt plus0pt minus0pt
}'.
966 if
(n
< glue_base
+ thin_mu_skip_code
)
967 print_cmd_chr
(assign_glue_cmd
, n
);
969 print_cmd_chr
(assign_mu_glue_cmd
, n
);
971 if
(n
< glue_base
+ thin_mu_skip_code
)
972 print_spec
(equiv
(n
), "pt");
974 print_spec
(equiv
(n
), "mu");
975 } else if
(n
< mu_skip_base
) {
977 print_int
(n
- skip_base
);
979 print_spec
(equiv
(n
), "pt");
981 tprint_esc
("muskip");
982 print_int
(n
- mu_skip_base
);
984 print_spec
(equiv
(n
), "mu");
987 } else if
(n
< int_base
) {
989 Show equivalent |n|
, in region
4
991 We initialize most things to null or undefined values. An undefined font
992 is represented by the internal code |font_base|.
994 However
, the character code tables are given initial values based on the
995 conventional interpretation of ASCII code. These initial values should
996 not be changed when \TeX\ is adapted for use with non-English languages
;
997 all changes to the initialization conventions should be made in format
998 packages
, not in \TeX\ itself
, so that global interchange of formats is
1001 if
((n
== par_shape_loc
) ||
((n
>= etex_pen_base
) && (n < etex_pens))) {
1002 if
(n
== par_shape_loc
)
1003 print_cmd_chr
(set_tex_shape_cmd
, n
);
1005 print_cmd_chr
(set_etex_shape_cmd
, n
);
1007 if
(equiv
(n
) == null
) {
1009 } else if
(n
> par_shape_loc
) {
1010 print_int
(penalty
(equiv
(n
)));
1012 print_int
(penalty
(equiv
(n
) + 1));
1013 if
(penalty
(equiv
(n
)) > 1)
1016 print_int
(vinfo
(par_shape_ptr
+ 1));
1018 } else if
(n
< toks_base
) {
1019 print_cmd_chr
(assign_toks_cmd
, n
);
1021 if
(equiv
(n
) != null
)
1022 show_token_list
(token_link
(equiv
(n
)), null
, 32);
1023 } else if
(n
< box_base
) {
1025 print_int
(n
- toks_base
);
1027 if
(equiv
(n
) != null
)
1028 show_token_list
(token_link
(equiv
(n
)), null
, 32);
1029 } else if
(n
< cur_font_loc
) {
1031 print_int
(n
- box_base
);
1033 if
(equiv
(n
) == null
) {
1036 depth_threshold
= 0;
1038 show_node_list
(equiv
(n
));
1040 } else if
(n
== cur_font_loc
) {
1041 /* Show the font identifier in |eqtb
[n
]|
*/
1042 tprint
("current font");
1044 print_esc
(hash
[font_id_base
+ equiv
(n
)].rh
); /* that's |font_id_text
(equiv
(n
))|
*/
1046 } else if
(n
< dimen_base
) {
1047 /* Show equivalent |n|
, in region
5 */
1049 print_cmd_chr
(assign_int_cmd
, n
);
1051 print_int
(eqtb
[n
].cint
);
1052 } else if
(n
< count_base
) {
1053 print_cmd_chr
(assign_dir_cmd
, n
);
1055 print_dir
(eqtb
[n
].cint
);
1056 } else if
(n
< attribute_base
) {
1057 tprint_esc
("count");
1058 print_int
(n
- count_base
);
1060 print_int
(eqtb
[n
].cint
);
1061 } else if
(n
< del_code_base
) {
1062 tprint_esc
("attribute");
1063 print_int
(n
- attribute_base
);
1065 print_int
(eqtb
[n
].cint
);
1067 } else if
(n
<= eqtb_size
) {
1068 /* Show equivalent |n|
, in region
6 */
1069 if
(n
< scaled_base
) {
1070 print_cmd_chr
(assign_dimen_cmd
, n
);
1072 tprint_esc
("dimen");
1073 print_int
(n
- scaled_base
);
1076 print_scaled
(eqtb
[n
].cint
);
1079 /* this can't happen either
*/
1085 void show_eqtb_meaning
(halfword n
)
1088 /* this can't happen
*/
1090 } else if
((n
< glue_base
) ||
((n
> eqtb_size
) && (n <= eqtb_top))) {
1092 Show equivalent |n|
, in region
1 or
2
1094 Here is a routine that displays the current meaning of an |eqtb| entry
1095 in region
1 or~
2.
(Similar routines for the other regions will appear
1099 } else if
(n
< local_base
) {
1101 Show equivalent |n|
, in region
3
1103 All glue parameters and registers are initially `\.
{0pt plus0pt minus0pt
}'.
1105 if
(n
< skip_base
) {
1106 if
(n
< glue_base
+ thin_mu_skip_code
)
1107 print_cmd_chr
(assign_glue_cmd
, n
);
1109 print_cmd_chr
(assign_mu_glue_cmd
, n
);
1110 } else if
(n
< mu_skip_base
) {
1112 print_int
(n
- skip_base
);
1114 tprint_esc
("muskip");
1115 print_int
(n
- mu_skip_base
);
1118 } else if
(n
< int_base
) {
1120 Show equivalent |n|
, in region
4
1122 We initialize most things to null or undefined values. An undefined font
1123 is represented by the internal code |font_base|.
1125 However
, the character code tables are given initial values based on the
1126 conventional interpretation of ASCII code. These initial values should
1127 not be changed when \TeX\ is adapted for use with non-English languages
;
1128 all changes to the initialization conventions should be made in format
1129 packages
, not in \TeX\ itself
, so that global interchange of formats is
1132 if
((n
== par_shape_loc
) ||
((n
>= etex_pen_base
) && (n < etex_pens))) {
1133 if
(n
== par_shape_loc
)
1134 print_cmd_chr
(set_tex_shape_cmd
, n
);
1136 print_cmd_chr
(set_etex_shape_cmd
, n
);
1137 } else if
(n
< toks_base
) {
1138 print_cmd_chr
(assign_toks_cmd
, n
);
1139 } else if
(n
< box_base
) {
1141 print_int
(n
- toks_base
);
1142 } else if
(n
< cur_font_loc
) {
1144 print_int
(n
- box_base
);
1145 } else if
(n
== cur_font_loc
) {
1146 /* Show the font identifier in |eqtb
[n
]|
*/
1147 tprint
("current font");
1149 } else if
(n
< dimen_base
) {
1150 /* Show equivalent |n|
, in region
5 */
1152 print_cmd_chr
(assign_int_cmd
, n
);
1153 } else if
(n
< count_base
) {
1154 print_cmd_chr
(assign_dir_cmd
, n
);
1155 } else if
(n
< attribute_base
) {
1156 tprint_esc
("count");
1157 print_int
(n
- count_base
);
1158 } else if
(n
< del_code_base
) {
1159 tprint_esc
("attribute");
1160 print_int
(n
- attribute_base
);
1162 } else if
(n
<= eqtb_size
) {
1163 /* Show equivalent |n|
, in region
6 */
1164 if
(n
< scaled_base
) {
1165 print_cmd_chr
(assign_dimen_cmd
, n
);
1167 tprint_esc
("dimen");
1168 print_int
(n
- scaled_base
);
1171 /* this can't happen either
*/