beta-0.89.2
[luatex.git] / source / texk / web2c / cwebdir / cweave.w
blob91b8785bfe98e692cf8ac3021d5ec2ad98f9ee0c
1 % This file is part of CWEB.
2 % This program by Silvio Levy and Donald E. Knuth
3 % is based on a program by Knuth.
4 % It is distributed WITHOUT ANY WARRANTY, express or implied.
5 % Version 3.64 --- February 2002
6 % (essentially the same as version 3.6, which added
7 % recently introduced features of standard C++ to version 3.4)
9 % Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
11 % Permission is granted to make and distribute verbatim copies of this
12 % document provided that the copyright notice and this permission notice
13 % are preserved on all copies.
15 % Permission is granted to copy and distribute modified versions of this
16 % document under the conditions for verbatim copying, provided that the
17 % entire resulting derived work is given a different name and distributed
18 % under the terms of a permission notice identical to this one.
20 % Here is TeX material that gets inserted after \input cwebmac
21 \def\hang{\hangindent 3em\indent\ignorespaces}
22 \def\pb{$\.|\ldots\.|$} % C brackets (|...|)
23 \def\v{\char'174} % vertical (|) in typewriter font
24 \def\dleft{[\![} \def\dright{]\!]} % double brackets
25 \mathchardef\RA="3221 % right arrow
26 \mathchardef\BA="3224 % double arrow
27 \def\({} % ) kludge for alphabetizing certain section names
28 \def\TeXxstring{\\{\TEX/\_string}}
29 \def\skipxTeX{\\{skip\_\TEX/}}
30 \def\copyxTeX{\\{copy\_\TEX/}}
32 \def\title{CWEAVE (Version 3.64)}
33 \def\topofcontents{\null\vfill
34 \centerline{\titlefont The {\ttitlefont CWEAVE} processor}
35 \vskip 15pt
36 \centerline{(Version 3.64)}
37 \vfill}
38 \def\botofcontents{\vfill
39 \noindent
40 Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
41 \bigskip\noindent
42 Permission is granted to make and distribute verbatim copies of this
43 document provided that the copyright notice and this permission notice
44 are preserved on all copies.
46 \smallskip\noindent
47 Permission is granted to copy and distribute modified versions of this
48 document under the conditions for verbatim copying, provided that the
49 entire resulting derived work is given a different name and distributed
50 under the terms of a permission notice identical to this one.
52 \pageno=\contentspagenumber \advance\pageno by 1
53 \let\maybe=\iftrue
54 @s not_eq normal @q unreserve a C++ keyword @>
56 @** Introduction.
57 This is the \.{CWEAVE} program by Silvio Levy and Donald E. Knuth,
58 based on \.{WEAVE} by Knuth.
59 We are thankful to Steve Avery,
60 Nelson Beebe, Hans-Hermann Bode (to whom the original \CPLUSPLUS/ adaptation
61 is due), Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
62 Joachim Schrod, Lee Wittenberg, Saroj Mahapatra, Cesar Augusto Rorato
63 Crusius, and others who have contributed improvements.
65 The ``banner line'' defined here should be changed whenever \.{CWEAVE}
66 is modified.
68 @d banner "This is CWEAVE (Version 3.64)\n"
70 @c @<Include files@>@/
72 @<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
73 @<Typedef declarations@>@/
74 @<Global variables@>@/
75 @<Predeclaration of procedures@>
77 @ We predeclare several standard system functions here instead of including
78 their system header files, because the names of the header files are not as
79 standard as the names of the functions. (For example, some \CEE/ environments
80 have \.{<string.h>} where others have \.{<strings.h>}.)
82 @<Predecl...@>=
83 extern int strlen(); /* length of string */
84 extern int strcmp(); /* compare strings lexicographically */
85 extern char* strcpy(); /* copy one string to another */
86 extern int strncmp(); /* compare up to $n$ string characters */
87 extern char* strncpy(); /* copy up to $n$ string characters */
89 @ \.{CWEAVE} has a fairly straightforward outline. It operates in
90 three phases: First it inputs the source file and stores cross-reference
91 data, then it inputs the source once again and produces the \TEX/ output
92 file, finally it sorts and outputs the index.
94 Please read the documentation for \.{common}, the set of routines common
95 to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
98 int main (ac, av)
99 int ac; /* argument count */
100 char **av; /* argument values */
102 argc=ac; argv=av;
103 program=cweave;
104 make_xrefs=force_lines=make_pb=1; /* controlled by command-line options */
105 common_init();
106 @<Set initial values@>;
107 if (show_banner) printf(banner); /* print a ``banner line'' */
108 @<Store all the reserved words@>;
109 phase_one(); /* read all the user's text and store the cross-references */
110 phase_two(); /* read all the text again and translate it to \TEX/ form */
111 phase_three(); /* output the cross-reference index */
112 return wrap_up(); /* and exit gracefully */
115 @ The following parameters were sufficient in the original \.{WEAVE} to
116 handle \TEX/, so they should be sufficient for most applications of \.{CWEAVE}.
117 If you change |max_bytes|, |max_names|, |hash_size|, or |buf_size|
118 you have to change them also in the file |"common.w"|.
120 @d max_bytes 90000 /* the number of bytes in identifiers,
121 index entries, and section names */
122 @d max_names 4000 /* number of identifiers, strings, section names;
123 must be less than 10240; used in |"common.w"| */
124 @d max_sections 2000 /* greater than the total number of sections */
125 @d hash_size 353 /* should be prime */
126 @d buf_size 100 /* maximum length of input line, plus one */
127 @d longest_name 10000 /* section names and strings shouldn't be longer than this */
128 @d long_buf_size (buf_size+longest_name)
129 @d line_length 80 /* lines of \TEX/ output have at most this many characters;
130 should be less than 256 */
131 @d max_refs 20000 /* number of cross-references; must be less than 65536 */
132 @d max_toks 20000 /* number of symbols in \CEE/ texts being parsed;
133 must be less than 65536 */
134 @d max_texts 4000 /* number of phrases in \CEE/ texts being parsed;
135 must be less than 10240 */
136 @d max_scraps 2000 /* number of tokens in \CEE/ texts being parsed */
137 @d stack_size 400 /* number of simultaneous output levels */
139 @ The next few sections contain stuff from the file |"common.w"| that must
140 be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
141 file |"common.h"|, which needs to be updated when |"common.w"| changes.
143 @i common.h
145 @* Data structures exclusive to {\tt CWEAVE}.
146 As explained in \.{common.w}, the field of a |name_info| structure
147 that contains the |rlink| of a section name is used for a completely
148 different purpose in the case of identifiers. It is then called the
149 |ilk| of the identifier, and it is used to
150 distinguish between various types of identifiers, as follows:
152 \yskip\hang |normal| and |func_template| identifiers are part of the
153 \CEE/ program that will appear in italic type (or in typewriter type
154 if all uppercase).
156 \yskip\hang |custom| identifiers are part of the \CEE/ program that
157 will be typeset in special ways.
159 \yskip\hang |roman| identifiers are index entries that appear after
160 \.{@@\^} in the \.{CWEB} file.
162 \yskip\hang |wildcard| identifiers are index entries that appear after
163 \.{@@:} in the \.{CWEB} file.
165 \yskip\hang |typewriter| identifiers are index entries that appear after
166 \.{@@.} in the \.{CWEB} file.
168 \yskip\hang |alfop|, \dots, |template_like|
169 identifiers are \CEE/ or \CPLUSPLUS/ reserved words whose |ilk|
170 explains how they are to be treated when \CEE/ code is being
171 formatted.
173 @d ilk dummy.Ilk
174 @d normal 0 /* ordinary identifiers have |normal| ilk */
175 @d roman 1 /* normal index entries have |roman| ilk */
176 @d wildcard 2 /* user-formatted index entries have |wildcard| ilk */
177 @d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */
178 @d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */
179 @d func_template 4 /* identifiers that can be followed by optional template */
180 @d custom 5 /* identifiers with user-given control sequence */
181 @d alfop 22 /* alphabetic operators like \&{and} or \&{not\_eq} */
182 @d else_like 26 /* \&{else} */
183 @d public_like 40 /* \&{public}, \&{private}, \&{protected} */
184 @d operator_like 41 /* \&{operator} */
185 @d new_like 42 /* \&{new} */
186 @d catch_like 43 /* \&{catch} */
187 @d for_like 45 /* \&{for}, \&{switch}, \&{while} */
188 @d do_like 46 /* \&{do} */
189 @d if_like 47 /* \&{if}, \&{ifdef}, \&{endif}, \&{pragma}, \dots */
190 @d delete_like 48 /* \&{delete} */
191 @d raw_ubin 49 /* `\.\&' or `\.*' when looking for \&{const} following */
192 @d const_like 50 /* \&{const}, \&{volatile} */
193 @d raw_int 51 /* \&{int}, \&{char}, \dots; also structure and class names */
194 @d int_like 52 /* same, when not followed by left parenthesis or \DC\ */
195 @d case_like 53 /* \&{case}, \&{return}, \&{goto}, \&{break}, \&{continue} */
196 @d sizeof_like 54 /* \&{sizeof} */
197 @d struct_like 55 /* \&{struct}, \&{union}, \&{enum}, \&{class} */
198 @d typedef_like 56 /* \&{typedef} */
199 @d define_like 57 /* \&{define} */
200 @d template_like 58 /* \&{template} */
202 @ We keep track of the current section number in |section_count|, which
203 is the total number of sections that have started. Sections which have
204 been altered by a change file entry have their |changed_section| flag
205 turned on during the first phase.
207 @<Global...@>=
208 boolean change_exists; /* has any section changed? */
210 @ The other large memory area in \.{CWEAVE} keeps the cross-reference data.
211 All uses of the name |p| are recorded in a linked list beginning at
212 |p->xref|, which points into the |xmem| array. The elements of |xmem|
213 are structures consisting of an integer, |num|, and a pointer |xlink|
214 to another element of |xmem|. If |x=p->xref| is a pointer into |xmem|,
215 the value of |x->num| is either a section number where |p| is used,
216 or |cite_flag| plus a section number where |p| is mentioned,
217 or |def_flag| plus a section number where |p| is defined;
218 and |x->xlink| points to the next such cross-reference for |p|,
219 if any. This list of cross-references is in decreasing order by
220 section number. The next unused slot in |xmem| is |xref_ptr|.
221 The linked list ends at |&xmem[0]|.
223 The global variable |xref_switch| is set either to |def_flag| or to zero,
224 depending on whether the next cross-reference to an identifier is to be
225 underlined or not in the index. This switch is set to |def_flag| when
226 \.{@@!} or \.{@@d} is scanned, and it is cleared to zero when
227 the next identifier or index entry cross-reference has been made.
228 Similarly, the global variable |section_xref_switch| is either
229 |def_flag| or |cite_flag| or zero, depending
230 on whether a section name is being defined, cited or used in \CEE/ text.
232 @<Type...@>=
233 typedef struct xref_info {
234 sixteen_bits num; /* section number plus zero or |def_flag| */
235 struct xref_info *xlink; /* pointer to the previous cross-reference */
236 } xref_info;
237 typedef xref_info *xref_pointer;
239 @ @<Global...@>=
240 xref_info xmem[max_refs]; /* contains cross-reference information */
241 xref_pointer xmem_end = xmem+max_refs-1;
242 xref_pointer xref_ptr; /* the largest occupied position in |xmem| */
243 sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */
245 @ A section that is used for multi-file output (with the \.{@@(} feature)
246 has a special first cross-reference whose |num| field is |file_flag|.
248 @d file_flag (3*cite_flag)
249 @d def_flag (2*cite_flag)
250 @d cite_flag 10240 /* must be strictly larger than |max_sections| */
251 @d xref equiv_or_xref
253 @<Set init...@>=
254 xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0;
255 xmem->num=0; /* sentinel value */
257 @ A new cross-reference for an identifier is formed by calling |new_xref|,
258 which discards duplicate entries and ignores non-underlined references
259 to one-letter identifiers or \CEE/'s reserved words.
261 If the user has sent the |no_xref| flag (the \.{-x} option of the command line),
262 it is unnecessary to keep track of cross-references for identifiers.
263 If one were careful, one could probably make more changes around section
264 100 to avoid a lot of identifier looking up.
266 @d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference");
267 else (++xref_ptr)->num=c;
268 @d no_xref (flags['x']==0)
269 @d make_xrefs flags['x'] /* should cross references be output? */
270 @d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1)
271 @d unindexed(a) (a<res_wd_end && a->ilk>=custom)
272 /* tells if uses of a name are to be indexed */
275 void
276 new_xref(p)
277 name_pointer p;
279 xref_pointer q; /* pointer to previous cross-reference */
280 sixteen_bits m, n; /* new and previous cross-reference value */
281 if (no_xref) return;
282 if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return;
283 m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref;
284 if (q != xmem) {
285 n=q->num;
286 if (n==m || n==m+def_flag) return;
287 else if (m==n+def_flag) {
288 q->num=m; return;
291 append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr;
294 @ The cross-reference lists for section names are slightly different.
295 Suppose that a section name is defined in sections $m_1$, \dots,
296 $m_k$, cited in sections $n_1$, \dots, $n_l$, and used in sections
297 $p_1$, \dots, $p_j$. Then its list will contain $m_1+|def_flag|$,
298 \dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, \dots,
299 $n_l+|cite_flag|$, $p_1$, \dots, $p_j$, in this order.
301 Although this method of storage takes quadratic time with respect to
302 the length of the list, under foreseeable uses of \.{CWEAVE} this inefficiency
303 is insignificant.
306 void
307 new_section_xref(p)
308 name_pointer p;
310 xref_pointer q,r; /* pointers to previous cross-references */
311 q=(xref_pointer)p->xref; r=xmem;
312 if (q>xmem)
313 while (q->num>section_xref_switch) {r=q; q=q->xlink;}
314 if (r->num==section_count+section_xref_switch)
315 return; /* don't duplicate entries */
316 append_xref(section_count+section_xref_switch);
317 xref_ptr->xlink=q; section_xref_switch=0;
318 if (r==xmem) p->xref=(char*)xref_ptr;
319 else r->xlink=xref_ptr;
322 @ The cross-reference list for a section name may also begin with
323 |file_flag|. Here's how that flag gets put~in.
326 void
327 set_file_flag(p)
328 name_pointer p;
330 xref_pointer q;
331 q=(xref_pointer)p->xref;
332 if (q->num==file_flag) return;
333 append_xref(file_flag);
334 xref_ptr->xlink = q;
335 p->xref = (char *)xref_ptr;
338 @ A third large area of memory is used for sixteen-bit `tokens', which appear
339 in short lists similar to the strings of characters in |byte_mem|. Token lists
340 are used to contain the result of \CEE/ code translated into \TEX/ form;
341 further details about them will be explained later. A |text_pointer| variable
342 is an index into |tok_start|.
344 @<Typed...@>=
345 typedef sixteen_bits token;
346 typedef token *token_pointer;
347 typedef token_pointer *text_pointer;
349 @ The first position of |tok_mem|
350 that is unoccupied by replacement text is called |tok_ptr|, and the first
351 unused location of |tok_start| is called |text_ptr|.
352 Thus, we usually have |*text_ptr==tok_ptr|.
354 @<Global...@>=
355 token tok_mem[max_toks]; /* tokens */
356 token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */
357 token_pointer tok_start[max_texts]; /* directory into |tok_mem| */
358 token_pointer tok_ptr; /* first unused position in |tok_mem| */
359 text_pointer text_ptr; /* first unused position in |tok_start| */
360 text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */
361 token_pointer max_tok_ptr; /* largest value of |tok_ptr| */
362 text_pointer max_text_ptr; /* largest value of |text_ptr| */
364 @ @<Set init...@>=
365 tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;
366 tok_start[1]=tok_mem+1;
367 max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;
369 @ Here are the three procedures needed to complete |id_lookup|:
371 int names_match(p,first,l,t)
372 name_pointer p; /* points to the proposed match */
373 char *first; /* position of first character of string */
374 int l; /* length of identifier */
375 eight_bits t; /* desired ilk */
377 if (length(p)!=l) return 0;
378 if (p->ilk!=t && !(t==normal && abnormal(p))) return 0;
379 return !strncmp(first,p->byte_start,l);
382 void
383 init_p(p,t)
384 name_pointer p;
385 eight_bits t;
387 p->ilk=t; p->xref=(char*)xmem;
390 void
391 init_node(p)
392 name_pointer p;
394 p->xref=(char*)xmem;
397 @ We have to get \CEE/'s
398 reserved words into the hash table, and the simplest way to do this is
399 to insert them every time \.{CWEAVE} is run. Fortunately there are relatively
400 few reserved words. (Some of these are not strictly ``reserved,'' but
401 are defined in header files of the ISO Standard \CEE/ Library.)
402 @^reserved words@>
404 @<Store all the reserved words@>=
405 id_lookup("and",NULL,alfop);
406 id_lookup("and_eq",NULL,alfop);
407 id_lookup("asm",NULL,sizeof_like);
408 id_lookup("auto",NULL,int_like);
409 id_lookup("bitand",NULL,alfop);
410 id_lookup("bitor",NULL,alfop);
411 id_lookup("bool",NULL,raw_int);
412 id_lookup("break",NULL,case_like);
413 id_lookup("case",NULL,case_like);
414 id_lookup("catch",NULL,catch_like);
415 id_lookup("char",NULL,raw_int);
416 id_lookup("class",NULL,struct_like);
417 id_lookup("clock_t",NULL,raw_int);
418 id_lookup("compl",NULL,alfop);
419 id_lookup("const",NULL,const_like);
420 id_lookup("const_cast",NULL,raw_int);
421 id_lookup("continue",NULL,case_like);
422 id_lookup("default",NULL,case_like);
423 id_lookup("define",NULL,define_like);
424 id_lookup("defined",NULL,sizeof_like);
425 id_lookup("delete",NULL,delete_like);
426 id_lookup("div_t",NULL,raw_int);
427 id_lookup("do",NULL,do_like);
428 id_lookup("double",NULL,raw_int);
429 id_lookup("dynamic_cast",NULL,raw_int);
430 id_lookup("elif",NULL,if_like);
431 id_lookup("else",NULL,else_like);
432 id_lookup("endif",NULL,if_like);
433 id_lookup("enum",NULL,struct_like);
434 id_lookup("error",NULL,if_like);
435 id_lookup("explicit",NULL,int_like);
436 id_lookup("export",NULL,int_like);
437 id_lookup("extern",NULL,int_like);
438 id_lookup("FILE",NULL,raw_int);
439 id_lookup("float",NULL,raw_int);
440 id_lookup("for",NULL,for_like);
441 id_lookup("fpos_t",NULL,raw_int);
442 id_lookup("friend",NULL,int_like);
443 id_lookup("goto",NULL,case_like);
444 id_lookup("if",NULL,if_like);
445 id_lookup("ifdef",NULL,if_like);
446 id_lookup("ifndef",NULL,if_like);
447 id_lookup("include",NULL,if_like);
448 id_lookup("inline",NULL,int_like);
449 id_lookup("int",NULL,raw_int);
450 id_lookup("jmp_buf",NULL,raw_int);
451 id_lookup("ldiv_t",NULL,raw_int);
452 id_lookup("line",NULL,if_like);
453 id_lookup("long",NULL,raw_int);
454 id_lookup("mutable",NULL,int_like);
455 id_lookup("namespace",NULL,struct_like);
456 id_lookup("new",NULL,new_like);
457 id_lookup("not",NULL,alfop);
458 id_lookup("not_eq",NULL,alfop);
459 id_lookup("NULL",NULL,custom);
460 id_lookup("offsetof",NULL,raw_int);
461 id_lookup("operator",NULL,operator_like);
462 id_lookup("or",NULL,alfop);
463 id_lookup("or_eq",NULL,alfop);
464 id_lookup("pragma",NULL,if_like);
465 id_lookup("private",NULL,public_like);
466 id_lookup("protected",NULL,public_like);
467 id_lookup("ptrdiff_t",NULL,raw_int);
468 id_lookup("public",NULL,public_like);
469 id_lookup("register",NULL,int_like);
470 id_lookup("reinterpret_cast",NULL,raw_int);
471 id_lookup("return",NULL,case_like);
472 id_lookup("short",NULL,raw_int);
473 id_lookup("sig_atomic_t",NULL,raw_int);
474 id_lookup("signed",NULL,raw_int);
475 id_lookup("size_t",NULL,raw_int);
476 id_lookup("sizeof",NULL,sizeof_like);
477 id_lookup("static",NULL,int_like);
478 id_lookup("static_cast",NULL,raw_int);
479 id_lookup("struct",NULL,struct_like);
480 id_lookup("switch",NULL,for_like);
481 id_lookup("template",NULL,template_like);
482 id_lookup("this",NULL,custom);
483 id_lookup("throw",NULL,case_like);
484 id_lookup("time_t",NULL,raw_int);
485 id_lookup("try",NULL,else_like);
486 id_lookup("typedef",NULL,typedef_like);
487 id_lookup("typeid",NULL,raw_int);
488 id_lookup("typename",NULL,struct_like);
489 id_lookup("undef",NULL,if_like);
490 id_lookup("union",NULL,struct_like);
491 id_lookup("unsigned",NULL,raw_int);
492 id_lookup("using",NULL,int_like);
493 id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */
494 id_lookup("va_list",NULL,raw_int); /* ditto */
495 id_lookup("virtual",NULL,int_like);
496 id_lookup("void",NULL,raw_int);
497 id_lookup("volatile",NULL,const_like);
498 id_lookup("wchar_t",NULL,raw_int);
499 id_lookup("while",NULL,for_like);
500 id_lookup("xor",NULL,alfop);
501 id_lookup("xor_eq",NULL,alfop);
502 res_wd_end=name_ptr;
503 id_lookup("TeX",NULL,custom);
504 id_lookup("make_pair",NULL,func_template);
506 @* Lexical scanning.
507 Let us now consider the subroutines that read the \.{CWEB} source file
508 and break it into meaningful units. There are four such procedures:
509 One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a
510 section; another passes over the \TEX/ text at the beginning of a
511 section; the third passes over the \TEX/ text in a \CEE/ comment;
512 and the last, which is the most interesting, gets the next token of
513 a \CEE/ text. They all use the pointers |limit| and |loc| into
514 the line of input currently being studied.
516 @ Control codes in \.{CWEB}, which begin with `\.{@@}', are converted
517 into a numeric code designed to simplify \.{CWEAVE}'s logic; for example,
518 larger numbers are given to the control codes that denote more significant
519 milestones, and the code of |new_section| should be the largest of
520 all. Some of these numeric control codes take the place of |char|
521 control codes that will not otherwise appear in the output of the
522 scanning routines.
523 @^ASCII code dependencies@>
525 @d ignore 00 /* control code of no interest to \.{CWEAVE} */
526 @d verbatim 02 /* takes the place of extended ASCII \.{\char2} */
527 @d begin_short_comment 03 /* \CPLUSPLUS/ short comment */
528 @d begin_comment '\t' /* tab marks will not appear */
529 @d underline '\n' /* this code will be intercepted without confusion */
530 @d noop 0177 /* takes the place of ASCII delete */
531 @d xref_roman 0203 /* control code for `\.{@@\^}' */
532 @d xref_wildcard 0204 /* control code for `\.{@@:}' */
533 @d xref_typewriter 0205 /* control code for `\.{@@.}' */
534 @d TeX_string 0206 /* control code for `\.{@@t}' */
535 @f TeX_string TeX
536 @d ord 0207 /* control code for `\.{@@'}' */
537 @d join 0210 /* control code for `\.{@@\&}' */
538 @d thin_space 0211 /* control code for `\.{@@,}' */
539 @d math_break 0212 /* control code for `\.{@@\v}' */
540 @d line_break 0213 /* control code for `\.{@@/}' */
541 @d big_line_break 0214 /* control code for `\.{@@\#}' */
542 @d no_line_break 0215 /* control code for `\.{@@+}' */
543 @d pseudo_semi 0216 /* control code for `\.{@@;}' */
544 @d macro_arg_open 0220 /* control code for `\.{@@[}' */
545 @d macro_arg_close 0221 /* control code for `\.{@@]}' */
546 @d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */
547 @d translit_code 0223 /* control code for `\.{@@l}' */
548 @d output_defs_code 0224 /* control code for `\.{@@h}' */
549 @d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */
550 @d definition 0226 /* control code for `\.{@@d}' */
551 @d begin_C 0227 /* control code for `\.{@@c}' */
552 @d section_name 0230 /* control code for `\.{@@<}' */
553 @d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */
555 @ Control codes are converted to \.{CWEAVE}'s internal
556 representation by means of the table |ccode|.
558 @<Global...@>=
559 eight_bits ccode[256]; /* meaning of a char following \.{@@} */
561 @ @<Set ini...@>=
562 {int c; for (c=0; c<256; c++) ccode[c]=0;}
563 ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
564 =ccode['*']=new_section;
565 ccode['@@']='@@'; /* `quoted' at sign */
566 ccode['=']=verbatim;
567 ccode['d']=ccode['D']=definition;
568 ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
569 ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
570 ccode['t']=ccode['T']=TeX_string;
571 ccode['l']=ccode['L']=translit_code;
572 ccode['q']=ccode['Q']=noop;
573 ccode['h']=ccode['H']=output_defs_code;
574 ccode['&']=join; ccode['<']=ccode['(']=section_name;
575 ccode['!']=underline; ccode['^']=xref_roman;
576 ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;
577 ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;
578 ccode['+']=no_line_break; ccode[';']=pseudo_semi;
579 ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;
580 ccode['\'']=ord;
581 @<Special control codes for debugging@>@;
583 @ Users can write
584 \.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on,
585 and off, respectively.
587 @<Special control codes...@>=
588 ccode['0']=ccode['1']=ccode['2']=trace;
590 @ The |skip_limbo| routine is used on the first pass to skip through
591 portions of the input that are not in any sections, i.e., that precede
592 the first section. After this procedure has been called, the value of
593 |input_has_ended| will tell whether or not a section has actually been found.
595 There's a complication that we will postpone until later: If the \.{@@s}
596 operation appears in limbo, we want to use it to adjust the default
597 interpretation of identifiers.
599 @<Predec...@>=
600 void skip_limbo();
602 @ @c
603 void
604 skip_limbo() {
605 while(1) {
606 if (loc>limit && get_line()==0) return;
607 *(limit+1)='@@';
608 while (*loc!='@@') loc++; /* look for '@@', then skip two chars */
609 if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++];
610 if (c==new_section) return;
611 if (c==noop) skip_restricted();
612 else if (c==format_code) @<Process simple format in limbo@>;
617 @ The |skip_TeX| routine is used on the first pass to skip through
618 the \TEX/ code at the beginning of a section. It returns the next
619 control code or `\.{\v}' found in the input. A |new_section| is
620 assumed to exist at the very end of the file.
622 @f skip_TeX TeX
625 unsigned
626 skip_TeX() /* skip past pure \TEX/ code */
628 while (1) {
629 if (loc>limit && get_line()==0) return(new_section);
630 *(limit+1)='@@';
631 while (*loc!='@@' && *loc!='|') loc++;
632 if (*loc++ =='|') return('|');
633 if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
637 @*1 Inputting the next token.
638 As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the
639 |get_next| function that inputs the next token of \CEE/ input. However,
640 |get_next| is not especially complicated.
642 The result of |get_next| is either a |char| code for some special character,
643 or it is a special code representing a pair of characters (e.g., `\.{!=}'),
644 or it is the numeric value computed by the |ccode|
645 table, or it is one of the following special codes:
647 \yskip\hang |identifier|: In this case the global variables |id_first| and
648 |id_loc| will have been set to the beginning and ending-plus-one locations
649 in the buffer, as required by the |id_lookup| routine.
651 \yskip\hang |string|: The string will have been copied into the array
652 |section_text|; |id_first| and |id_loc| are set as above (now they are
653 pointers into |section_text|).
655 \yskip\hang |constant|: The constant is copied into |section_text|, with
656 slight modifications; |id_first| and |id_loc| are set.
658 \yskip\noindent Furthermore, some of the control codes cause
659 |get_next| to take additional actions:
661 \yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,
662 |verbatim|: The values of |id_first| and |id_loc| will have been set to
663 the beginning and ending-plus-one locations in the buffer.
665 \yskip\hang |section_name|: In this case the global variable |cur_section| will
666 point to the |byte_start| entry for the section name that has just been scanned.
667 The value of |cur_section_char| will be |'('| if the section name was
668 preceded by \.{@@(} instead of \.{@@<}.
670 \yskip\noindent If |get_next| sees `\.{@@!}'
671 it sets |xref_switch| to |def_flag| and goes on to the next token.
673 @d constant 0200 /* \CEE/ constant */
674 @d string 0201 /* \CEE/ string */
675 @d identifier 0202 /* \CEE/ identifier or reserved word */
677 @<Global...@>=
678 name_pointer cur_section; /* name of section just scanned */
679 char cur_section_char; /* the character just before that name */
681 @ @<Include...@>=
682 #include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
683 #include <stdlib.h> /* definition of |exit| */
685 @ As one might expect, |get_next| consists mostly of a big switch
686 that branches to the various special cases that can arise.
687 \CEE/ allows underscores to appear in identifiers, and some \CEE/
688 compilers even allow the dollar sign.
690 @d isxalpha(c) ((c)=='_' || (c)=='$')
691 /* non-alpha characters allowed in identifier */
692 @d ishigh(c) ((eight_bits)(c)>0177)
693 @^high-bit character handling@>
695 @<Predecl...@>=
696 eight_bits get_next();
698 @ @c
699 eight_bits
700 get_next() /* produces the next input token */
701 {@+eight_bits c; /* the current character */
702 while (1) {
703 @<Check if we're at the end of a preprocessor command@>;
704 if (loc>limit && get_line()==0) return(new_section);
705 c=*(loc++);
706 if (xisdigit(c) || c=='.') @<Get a constant@>@;
707 else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@|
708 || (c=='<' && sharp_include_line==1))
709 @<Get a string@>@;
710 else if (xisalpha(c) || isxalpha(c) || ishigh(c))
711 @<Get an identifier@>@;
712 else if (c=='@@') @<Get control code and possible section name@>@;
713 else if (xisspace(c)) continue; /* ignore spaces and tabs */
714 if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>;
715 mistake: @<Compress two-symbol operator@>@;
716 return(c);
720 @ Because preprocessor commands do not fit in with the rest of the syntax
721 of \CEE/,
722 we have to deal with them separately. One solution is to enclose such
723 commands between special markers. Thus, when a \.\# is seen as the
724 first character of a line, |get_next| returns a special code
725 |left_preproc| and raises a flag |preprocessing|.
727 We can use the same internal code number for |left_preproc| as we do
728 for |ord|, since |get_next| changes |ord| into a string.
730 @d left_preproc ord /* begins a preprocessor command */
731 @d right_preproc 0217 /* ends a preprocessor command */
733 @<Glob...@>=
734 boolean preprocessing=0; /* are we scanning a preprocessor command? */
736 @ @<Raise prep...@>= {
737 preprocessing=1;
738 @<Check if next token is |include|@>;
739 return (left_preproc);
742 @ An additional complication is the freakish use of \.< and \.> to delimit
743 a file name in lines that start with \.{\#include}. We must treat this file
744 name as a string.
746 @<Glob...@>=
747 boolean sharp_include_line=0; /* are we scanning a |#include| line? */
749 @ @<Check if next token is |include|@>=
750 while (loc<=buffer_end-7 && xisspace(*loc)) loc++;
751 if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;
753 @ When we get to the end of a preprocessor line,
754 we lower the flag and send a code |right_preproc|, unless
755 the last character was a \.\\.
757 @<Check if we're at...@>=
758 while (loc==limit-1 && preprocessing && *loc=='\\')
759 if (get_line()==0) return(new_section); /* still in preprocessor mode */
760 if (loc>=limit && preprocessing) {
761 preprocessing=sharp_include_line=0;
762 return(right_preproc);
765 @ The following code assigns values to the combinations \.{++},
766 \.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and
767 \.{\&\&}, and to the \CPLUSPLUS/
768 combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
769 The compound assignment operators (e.g., \.{+=}) are
770 treated as separate tokens.
772 @d compress(c) if (loc++<=limit) return(c)
774 @<Compress tw...@>=
775 switch(c) {
776 case '/': if (*loc=='*') {compress(begin_comment);}
777 else if (*loc=='/') compress(begin_short_comment); break;
778 case '+': if (*loc=='+') compress(plus_plus); break;
779 case '-': if (*loc=='-') {compress(minus_minus);}
780 else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
781 else compress(minus_gt); break;
782 case '.': if (*loc=='*') {compress(period_ast);}
783 else if (*loc=='.' && *(loc+1)=='.') {
784 loc++; compress(dot_dot_dot);
786 break;
787 case ':': if (*loc==':') compress(colon_colon); break;
788 case '=': if (*loc=='=') compress(eq_eq); break;
789 case '>': if (*loc=='=') {compress(gt_eq);}
790 else if (*loc=='>') compress(gt_gt); break;
791 case '<': if (*loc=='=') {compress(lt_eq);}
792 else if (*loc=='<') compress(lt_lt); break;
793 case '&': if (*loc=='&') compress(and_and); break;
794 case '|': if (*loc=='|') compress(or_or); break;
795 case '!': if (*loc=='=') compress(not_eq); break;
798 @ @<Get an identifier@>= {
799 id_first=--loc;
800 while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
801 id_loc=loc; return(identifier);
804 @ Different conventions are followed by \TEX/ and \CEE/ to express octal
805 and hexadecimal numbers; it is reasonable to stick to each convention
806 within its realm. Thus the \CEE/ part of a \.{CWEB} file has octals
807 introduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will print
808 with \TEX/ macros that the user can redefine to fit the context.
809 In order to simplify such macros, we replace some of the characters.
811 Notice that in this section and the next, |id_first| and |id_loc|
812 are pointers into the array |section_text|, not into |buffer|.
814 @<Get a constant@>= {
815 id_first=id_loc=section_text+1;
816 if (*(loc-1)=='0') {
817 if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++;
818 while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */
819 else if (xisdigit(*loc)) {*id_loc++='~';
820 while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
821 else goto dec; /* decimal constant */
823 else { /* decimal constant */
824 if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
825 dec: *id_loc++=*(loc-1);
826 while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++;
827 if (*loc=='e' || *loc=='E') { /* float constant */
828 *id_loc++='_'; loc++;
829 if (*loc=='+' || *loc=='-') *id_loc++=*loc++;
830 while (xisdigit(*loc)) *id_loc++=*loc++;
833 while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
834 || *loc=='f' || *loc=='F') {
835 *id_loc++='$'; *id_loc++=toupper(*loc); loc++;
837 return(constant);
840 @ \CEE/ strings and character constants, delimited by double and single
841 quotes, respectively, can contain newlines or instances of their own
842 delimiters if they are protected by a backslash. We follow this
843 convention, but do not allow the string to be longer than |longest_name|.
845 @<Get a string@>= {
846 char delim = c; /* what started the string */
847 id_first = section_text+1;
848 id_loc = section_text;
849 if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';}
850 *++id_loc=delim;
851 if (delim=='L') { /* wide character constant */
852 delim=*loc++; *++id_loc=delim;
854 if (delim=='<') delim='>'; /* for file names in |#include| lines */
855 while (1) {
856 if (loc>=limit) {
857 if(*(limit-1)!='\\') {
858 err_print("! String didn't end"); loc=limit; break;
859 @.String didn't end@>
861 if(get_line()==0) {
862 err_print("! Input ended in middle of string"); loc=buffer; break;
863 @.Input ended in middle of string@>
866 if ((c=*loc++)==delim) {
867 if (++id_loc<=section_text_end) *id_loc=c;
868 break;
870 if (c=='\\') if (loc>=limit) continue;
871 else if (++id_loc<=section_text_end) {
872 *id_loc = '\\'; c=*loc++;
874 if (++id_loc<=section_text_end) *id_loc=c;
876 if (id_loc>=section_text_end) {
877 printf("\n! String too long: ");
878 @.String too long@>
879 term_write(section_text+1,25);
880 printf("..."); mark_error;
882 id_loc++;
883 return(string);
886 @ After an \.{@@} sign has been scanned, the next character tells us
887 whether there is more work to do.
889 @<Get control code and possible section name@>= {
890 c=*loc++;
891 switch(ccode[(eight_bits)c]) {
892 case translit_code: err_print("! Use @@l in limbo only"); continue;
893 @.Use @@l in limbo...@>
894 case underline: xref_switch=def_flag; continue;
895 case trace: tracing=c-'0'; continue;
896 case xref_roman: case xref_wildcard: case xref_typewriter:
897 case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c);
898 case section_name:
899 @<Scan the section name and make |cur_section| point to it@>;
900 case verbatim: @<Scan a verbatim string@>;
901 case ord: @<Get a string@>;
902 default: return(ccode[(eight_bits)c]);
906 @ The occurrence of a section name sets |xref_switch| to zero,
907 because the section name might (for example) follow \&{int}.
909 @<Scan the section name...@>= {
910 char *k; /* pointer into |section_text| */
911 cur_section_char=*(loc-1);
912 @<Put section name into |section_text|@>;
913 if (k-section_text>3 && strncmp(k-2,"...",3)==0)
914 cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */
915 else cur_section=section_lookup(section_text+1,k,0);
916 xref_switch=0; return(section_name);
919 @ Section names are placed into the |section_text| array with consecutive spaces,
920 tabs, and carriage-returns replaced by single spaces. There will be no
921 spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
922 this, since the |section_lookup| routine uses |section_text[1]| as the first
923 character of the name.)
925 @<Set init...@>=section_text[0]=' ';
927 @ @<Put section name...@>=
928 k=section_text;
929 while (1) {
930 if (loc>limit && get_line()==0) {
931 err_print("! Input ended in section name");
932 @.Input ended in section name@>
933 loc=buffer+1; break;
935 c=*loc;
936 @<If end of name or erroneous control code, |break|@>;
937 loc++; if (k<section_text_end) k++;
938 if (xisspace(c)) {
939 c=' '; if (*(k-1)==' ') k--;
941 *k=c;
943 if (k>=section_text_end) {
944 printf("\n! Section name too long: ");
945 @.Section name too long@>
946 term_write(section_text+1,25);
947 printf("..."); mark_harmless;
949 if (*k==' ' && k>section_text) k--;
951 @ @<If end of name...@>=
952 if (c=='@@') {
953 c=*(loc+1);
954 if (c=='>') {
955 loc+=2; break;
957 if (ccode[(eight_bits)c]==new_section) {
958 err_print("! Section name didn't end"); break;
959 @.Section name didn't end@>
961 if (c!='@@') {
962 err_print("! Control codes are forbidden in section name"); break;
963 @.Control codes are forbidden...@>
965 *(++k)='@@'; loc++; /* now |c==*loc| again */
968 @ This function skips over a restricted context at relatively high speed.
970 @<Predecl...@>=
971 void skip_restricted();
973 @ @c
974 void
975 skip_restricted()
977 id_first=loc; *(limit+1)='@@';
978 false_alarm:
979 while (*loc!='@@') loc++;
980 id_loc=loc;
981 if (loc++>limit) {
982 err_print("! Control text didn't end"); loc=limit;
983 @.Control text didn't end@>
985 else {
986 if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;}
987 if (*loc++!='>')
988 err_print("! Control codes are forbidden in control text");
989 @.Control codes are forbidden...@>
993 @ At the present point in the program we
994 have |*(loc-1)==verbatim|; we set |id_first| to the beginning
995 of the string itself, and |id_loc| to its ending-plus-one location in the
996 buffer. We also set |loc| to the position just after the ending delimiter.
998 @<Scan a verbatim string@>= {
999 id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1000 while (*loc!='@@' || *(loc+1)!='>') loc++;
1001 if (loc>=limit) err_print("! Verbatim string didn't end");
1002 @.Verbatim string didn't end@>
1003 id_loc=loc; loc+=2;
1004 return (verbatim);
1007 @** Phase one processing.
1008 We now have accumulated enough subroutines to make it possible to carry out
1009 \.{CWEAVE}'s first pass over the source file. If everything works right,
1010 both phase one and phase two of \.{CWEAVE} will assign the same numbers to
1011 sections, and these numbers will agree with what \.{CTANGLE} does.
1013 The global variable |next_control| often contains the most recent output of
1014 |get_next|; in interesting cases, this will be the control code that
1015 ended a section or part of a section.
1017 @<Global...@>=
1018 eight_bits next_control; /* control code waiting to be acting upon */
1020 @ The overall processing strategy in phase one has the following
1021 straightforward outline.
1023 @<Predecl...@>=
1024 void phase_one();
1026 @ @c
1027 void
1028 phase_one() {
1029 phase=1; reset_input(); section_count=0;
1030 skip_limbo(); change_exists=0;
1031 while (!input_has_ended)
1032 @<Store cross-reference data for the current section@>;
1033 changed_section[section_count]=change_exists;
1034 /* the index changes if anything does */
1035 phase=2; /* prepare for second phase */
1036 @<Print error messages about unused or undefined section names@>;
1039 @ @<Store cross-reference data...@>=
1041 if (++section_count==max_sections) overflow("section number");
1042 changed_section[section_count]=changing;
1043 /* it will become 1 if any line changes */
1044 if (*(loc-1)=='*' && show_progress) {
1045 printf("*%d",section_count);
1046 update_terminal; /* print a progress report */
1048 @<Store cross-references in the \TEX/ part of a section@>;
1049 @<Store cross-references in the definition part of a section@>;
1050 @<Store cross-references in the \CEE/ part of a section@>;
1051 if (changed_section[section_count]) change_exists=1;
1054 @ The |C_xref| subroutine stores references to identifiers in
1055 \CEE/ text material beginning with the current value of |next_control|
1056 and continuing until |next_control| is `\.\{' or `\.{\v}', or until the next
1057 ``milestone'' is passed (i.e., |next_control>=format_code|). If
1058 |next_control>=format_code| when |C_xref| is called, nothing will happen;
1059 but if |next_control=='|'| upon entry, the procedure assumes that this is
1060 the `\.{\v}' preceding \CEE/ text that is to be processed.
1062 The parameter |spec_ctrl| is used to change this behavior. In most cases
1063 |C_xref| is called with |spec_ctrl==ignore|, which triggers the default
1064 processing described above. If |spec_ctrl==section_name|, section names will
1065 be gobbled. This is used when \CEE/ text in the \TEX/ part or inside comments
1066 is parsed: It allows for section names to appear in \pb, but these
1067 strings will not be entered into the cross reference lists since they are not
1068 definitions of section names.
1070 The program uses the fact that our internal code numbers satisfy
1071 the relations |xref_roman==identifier+roman| and |xref_wildcard==identifier
1072 +wildcard| and |xref_typewriter==identifier+typewriter|,
1073 as well as |normal==0|.
1075 @<Predecl...@>=
1076 void C_xref();
1078 @ @c
1079 void
1080 C_xref( spec_ctrl ) /* makes cross-references for \CEE/ identifiers */
1081 eight_bits spec_ctrl;
1083 name_pointer p; /* a referenced name */
1084 while (next_control<format_code || next_control==spec_ctrl) {
1085 if (next_control>=identifier && next_control<=xref_typewriter) {
1086 if (next_control>identifier) @<Replace |"@@@@"| by |"@@"| @>@;
1087 p=id_lookup(id_first, id_loc,next_control-identifier); new_xref(p);
1089 if (next_control==section_name) {
1090 section_xref_switch=cite_flag;
1091 new_section_xref(cur_section);
1093 next_control=get_next();
1094 if (next_control=='|' || next_control==begin_comment ||
1095 next_control==begin_short_comment) return;
1099 @ The |outer_xref| subroutine is like |C_xref| except that it begins
1100 with |next_control!='|'| and ends with |next_control>=format_code|. Thus, it
1101 handles \CEE/ text with embedded comments.
1103 @<Predecl...@>=
1104 void outer_xref();
1106 @ @c
1107 void
1108 outer_xref() /* extension of |C_xref| */
1110 int bal; /* brace level in comment */
1111 while (next_control<format_code)
1112 if (next_control!=begin_comment && next_control!=begin_short_comment)
1113 C_xref(ignore);
1114 else {
1115 boolean is_long_comment=(next_control==begin_comment);
1116 bal=copy_comment(is_long_comment,1); next_control='|';
1117 while (bal>0) {
1118 C_xref(section_name); /* do not reference section names in comments */
1119 if (next_control=='|') bal=copy_comment(is_long_comment,bal);
1120 else bal=0; /* an error message will occur in phase two */
1125 @ In the \TEX/ part of a section, cross-reference entries are made only for
1126 the identifiers in \CEE/ texts enclosed in \pb, or for control texts
1127 enclosed in \.{@@\^}$\,\ldots\,$\.{@@>} or \.{@@.}$\,\ldots\,$\.{@@>}
1128 or \.{@@:}$\,\ldots\,$\.{@@>}.
1130 @<Store cross-references in the \T...@>=
1131 while (1) {
1132 switch (next_control=skip_TeX()) {
1133 case translit_code: err_print("! Use @@l in limbo only"); continue;
1134 @.Use @@l in limbo...@>
1135 case underline: xref_switch=def_flag; continue;
1136 case trace: tracing=*(loc-1)-'0'; continue;
1137 case '|': C_xref(section_name); break;
1138 case xref_roman: case xref_wildcard: case xref_typewriter:
1139 case noop: case section_name:
1140 loc-=2; next_control=get_next(); /* scan to \.{@@>} */
1141 if (next_control>=xref_roman && next_control<=xref_typewriter) {
1142 @<Replace |"@@@@"| by |"@@"| @>@;
1143 new_xref(id_lookup(id_first, id_loc,next_control-identifier));
1145 break;
1147 if (next_control>=format_code) break;
1150 @ @<Replace |"@@@@"| by |"@@"| @>=
1152 char *src=id_first,*dst=id_first;
1153 while(src<id_loc){
1154 if(*src=='@@') src++;
1155 *dst++=*src++;
1157 id_loc=dst;
1158 while (dst<src) *dst++=' '; /* clean up in case of error message display */
1161 @ During the definition and \CEE/ parts of a section, cross-references
1162 are made for all identifiers except reserved words. However, the right
1163 identifier in a format definition is not referenced, and the left
1164 identifier is referenced only if it has been explicitly
1165 underlined (preceded by \.{@@!}).
1166 The \TEX/ code in comments is, of course, ignored, except for
1167 \CEE/ portions enclosed in \pb; the text of a section name is skipped
1168 entirely, even if it contains \pb\ constructions.
1170 The variables |lhs| and |rhs| point to the respective identifiers involved
1171 in a format definition.
1173 @<Global...@>=
1174 name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */
1175 name_pointer res_wd_end; /* pointer to the first nonreserved identifier */
1177 @ When we get to the following code we have |next_control>=format_code|.
1179 @<Store cross-references in the d...@>=
1180 while (next_control<=definition) { /* |format_code| or |definition| */
1181 if (next_control==definition) {
1182 xref_switch=def_flag; /* implied \.{@@!} */
1183 next_control=get_next();
1184 } else @<Process a format definition@>;
1185 outer_xref();
1188 @ Error messages for improper format definitions will be issued in phase
1189 two. Our job in phase one is to define the |ilk| of a properly formatted
1190 identifier, and to remove cross-references to identifiers that we now
1191 discover should be unindexed.
1193 @<Process a form...@>= {
1194 next_control=get_next();
1195 if (next_control==identifier) {
1196 lhs=id_lookup(id_first, id_loc,normal); lhs->ilk=normal;
1197 if (xref_switch) new_xref(lhs);
1198 next_control=get_next();
1199 if (next_control==identifier) {
1200 rhs=id_lookup(id_first, id_loc,normal);
1201 lhs->ilk=rhs->ilk;
1202 if (unindexed(lhs)) { /* retain only underlined entries */
1203 xref_pointer q,r=NULL;
1204 for (q=(xref_pointer)lhs->xref;q>xmem;q=q->xlink)
1205 if (q->num<def_flag)
1206 if (r) r->xlink=q->xlink;
1207 else lhs->xref=(char*)q->xlink;
1208 else r=q;
1210 next_control=get_next();
1215 @ A much simpler processing of format definitions occurs when the
1216 definition is found in limbo.
1218 @<Process simple format in limbo@>=
1220 if (get_next()!=identifier)
1221 err_print("! Missing left identifier of @@s");
1222 @.Missing left identifier...@>
1223 else {
1224 lhs=id_lookup(id_first,id_loc,normal);
1225 if (get_next()!=identifier)
1226 err_print("! Missing right identifier of @@s");
1227 @.Missing right identifier...@>
1228 else {
1229 rhs=id_lookup(id_first,id_loc,normal);
1230 lhs->ilk=rhs->ilk;
1235 @ Finally, when the \TEX/ and definition parts have been treated, we have
1236 |next_control>=begin_C|.
1238 @<Store cross-references in the \CEE/...@>=
1239 if (next_control<=section_name) { /* |begin_C| or |section_name| */
1240 if (next_control==begin_C) section_xref_switch=0;
1241 else {
1242 section_xref_switch=def_flag;
1243 if(cur_section_char=='(' && cur_section!=name_dir)
1244 set_file_flag(cur_section);
1246 do {
1247 if (next_control==section_name && cur_section!=name_dir)
1248 new_section_xref(cur_section);
1249 next_control=get_next(); outer_xref();
1250 } while ( next_control<=section_name);
1253 @ After phase one has looked at everything, we want to check that each
1254 section name was both defined and used. The variable |cur_xref| will point
1255 to cross-references for the current section name of interest.
1257 @<Global...@>=
1258 xref_pointer cur_xref; /* temporary cross-reference pointer */
1259 boolean an_output; /* did |file_flag| precede |cur_xref|? */
1261 @ The following recursive procedure
1262 walks through the tree of section names and prints out anomalies.
1263 @^recursion@>
1265 @<Predecl...@>=
1266 void section_check();
1268 @ @c
1269 void
1270 section_check(p)
1271 name_pointer p; /* print anomalies in subtree |p| */
1273 if (p) {
1274 section_check(p->llink);
1275 cur_xref=(xref_pointer)p->xref;
1276 if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
1277 else an_output=0;
1278 if (cur_xref->num <def_flag) {
1279 printf("\n! Never defined: <"); print_section_name(p); putchar('>'); mark_harmless;
1280 @.Never defined: <section name>@>
1282 while (cur_xref->num >=cite_flag) cur_xref=cur_xref->xlink;
1283 if (cur_xref==xmem && !an_output) {
1284 printf("\n! Never used: <"); print_section_name(p); putchar('>'); mark_harmless;
1285 @.Never used: <section name>@>
1287 section_check(p->rlink);
1291 @ @<Print error messages about un...@>=section_check(root)
1293 @* Low-level output routines.
1294 The \TEX/ output is supposed to appear in lines at most |line_length|
1295 characters long, so we place it into an output buffer. During the output
1296 process, |out_line| will hold the current line number of the line about to
1297 be output.
1299 @<Global...@>=
1300 char out_buf[line_length+1]; /* assembled characters */
1301 char *out_ptr; /* just after last character in |out_buf| */
1302 char *out_buf_end = out_buf+line_length; /* end of |out_buf| */
1303 int out_line; /* number of next line to be output */
1305 @ The |flush_buffer| routine empties the buffer up to a given breakpoint,
1306 and moves any remaining characters to the beginning of the next line.
1307 If the |per_cent| parameter is 1 a |'%'| is appended to the line
1308 that is being output; in this case the breakpoint |b| should be strictly
1309 less than |out_buf_end|. If the |per_cent| parameter is |0|,
1310 trailing blanks are suppressed.
1311 The characters emptied from the buffer form a new line of output;
1312 if the |carryover| parameter is true, a |"%"| in that line will be
1313 carried over to the next line (so that \TEX/ will ignore the completion
1314 of commented-out text).
1316 @d c_line_write(c) fflush(active_file),fwrite(out_buf+1,sizeof(char),c,active_file)
1317 @d tex_putc(c) putc(c,active_file)
1318 @d tex_new_line putc('\n',active_file)
1319 @d tex_printf(c) fprintf(active_file,c)
1322 void
1323 flush_buffer(b,per_cent,carryover)
1324 char *b; /* outputs from |out_buf+1| to |b|,where |b<=out_ptr| */
1325 boolean per_cent,carryover;
1327 char *j; j=b; /* pointer into |out_buf| */
1328 if (! per_cent) /* remove trailing blanks */
1329 while (j>out_buf && *j==' ') j--;
1330 c_line_write(j-out_buf);
1331 if (per_cent) tex_putc('%');
1332 tex_new_line; out_line++;
1333 if (carryover)
1334 while (j>out_buf)
1335 if (*j--=='%' && (j==out_buf || *j!='\\')) {
1336 *b--='%'; break;
1338 if (b<out_ptr) strncpy(out_buf+1,b+1,out_ptr-b);
1339 out_ptr-=b-out_buf;
1342 @ When we are copying \TEX/ source material, we retain line breaks
1343 that occur in the input, except that an empty line is not
1344 output when the \TEX/ source line was nonempty. For example, a line
1345 of the \TEX/ file that contains only an index cross-reference entry
1346 will not be copied. The |finish_line| routine is called just before
1347 |get_line| inputs a new line, and just after a line break token has
1348 been emitted during the output of translated \CEE/ text.
1351 void
1352 finish_line() /* do this at the end of a line */
1354 char *k; /* pointer into |buffer| */
1355 if (out_ptr>out_buf) flush_buffer(out_ptr,0,0);
1356 else {
1357 for (k=buffer; k<=limit; k++)
1358 if (!(xisspace(*k))) return;
1359 flush_buffer(out_buf,0,0);
1363 @ In particular, the |finish_line| procedure is called near the very
1364 beginning of phase two. We initialize the output variables in a slightly
1365 tricky way so that the first line of the output file will be
1366 `\.{\\input cwebmac}'.
1368 @<Set init...@>=
1369 out_ptr=out_buf+1; out_line=1; active_file=tex_file;
1370 *out_ptr='c'; tex_printf("\\input cwebma");
1372 @ When we wish to append one character |c| to the output buffer, we write
1373 `|out(c)|'; this will cause the buffer to be emptied if it was already
1374 full. If we want to append more than one character at once, we say
1375 |out_str(s)|, where |s| is a string containing the characters.
1377 A line break will occur at a space or after a single-nonletter
1378 \TEX/ control sequence.
1380 @d out(c) {if (out_ptr>=out_buf_end) break_out(); *(++out_ptr)=c;}
1383 void
1384 out_str(s) /* output characters from |s| to end of string */
1385 char *s;
1387 while (*s) out(*s++);
1390 @ The |break_out| routine is called just before the output buffer is about
1391 to overflow. To make this routine a little faster, we initialize position
1392 0 of the output buffer to `\.\\'; this character isn't really output.
1394 @<Set init...@>=
1395 out_buf[0]='\\';
1397 @ A long line is broken at a blank space or just before a backslash that isn't
1398 preceded by another backslash. In the latter case, a |'%'| is output at
1399 the break.
1401 @<Predecl...@>=
1402 void break_out();
1404 @ @c
1405 void
1406 break_out() /* finds a way to break the output line */
1408 char *k=out_ptr; /* pointer into |out_buf| */
1409 while (1) {
1410 if (k==out_buf) @<Print warning message, break the line, |return|@>;
1411 if (*k==' ') {
1412 flush_buffer(k,0,1); return;
1414 if (*(k--)=='\\' && *k!='\\') { /* we've decreased |k| */
1415 flush_buffer(k,1,1); return;
1420 @ We get to this section only in the unusual case that the entire output line
1421 consists of a string of backslashes followed by a string of nonblank
1422 non-backslashes. In such cases it is almost always safe to break the
1423 line by putting a |'%'| just before the last character.
1425 @<Print warning message...@>=
1427 printf("\n! Line had to be broken (output l. %d):\n",out_line);
1428 @.Line had to be broken@>
1429 term_write(out_buf+1, out_ptr-out_buf-1);
1430 new_line; mark_harmless;
1431 flush_buffer(out_ptr-1,1,1); return;
1434 @ Here is a macro that outputs a section number in decimal notation.
1435 The number to be converted by |out_section| is known to be less than
1436 |def_flag|, so it cannot have more than five decimal digits. If
1437 the section is changed, we output `\.{\\*}' just after the number.
1440 void
1441 out_section(n)
1442 sixteen_bits n;
1444 char s[6];
1445 sprintf(s,"%d",n); out_str(s);
1446 if(changed_section[n]) out_str ("\\*");
1447 @.\\*@>
1450 @ The |out_name| procedure is used to output an identifier or index
1451 entry, enclosing it in braces.
1454 void
1455 out_name(p,quote_xalpha)
1456 name_pointer p;
1457 boolean quote_xalpha;
1459 char *k, *k_end=(p+1)->byte_start; /* pointers into |byte_mem| */
1460 out('{');
1461 for (k=p->byte_start; k<k_end; k++) {
1462 if (isxalpha(*k) && quote_xalpha) out('\\');
1463 @.\\\$@>
1464 @.\\\_@>
1465 out(*k);
1467 out('}');
1470 @* Routines that copy \TEX/ material.
1471 During phase two, we use subroutines |copy_limbo|, |copy_TeX|, and
1472 |copy_comment| in place of the analogous |skip_limbo|, |skip_TeX|, and
1473 |skip_comment| that were used in phase one. (Well, |copy_comment|
1474 was actually written in such a way that it functions as |skip_comment|
1475 in phase one.)
1477 The |copy_limbo| routine, for example, takes \TEX/ material that is not
1478 part of any section and transcribes it almost verbatim to the output file.
1479 The use of `\.{@@}' signs is severely restricted in such material:
1480 `\.{@@@@}' pairs are replaced by singletons; `\.{@@l}' and `\.{@@q}' and
1481 `\.{@@s}' are interpreted.
1484 void
1485 copy_limbo()
1487 char c;
1488 while (1) {
1489 if (loc>limit && (finish_line(), get_line()==0)) return;
1490 *(limit+1)='@@';
1491 while (*loc!='@@') out(*(loc++));
1492 if (loc++<=limit) {
1493 c=*loc++;
1494 if (ccode[(eight_bits)c]==new_section) break;
1495 switch (ccode[(eight_bits)c]) {
1496 case translit_code: out_str("\\ATL"); break;
1497 @.\\ATL@>
1498 case '@@': out('@@'); break;
1499 case noop: skip_restricted(); break;
1500 case format_code: if (get_next()==identifier) get_next();
1501 if (loc>=limit) get_line(); /* avoid blank lines in output */
1502 break; /* the operands of \.{@@s} are ignored on this pass */
1503 default: err_print("! Double @@ should be used in limbo");
1504 @.Double @@ should be used...@>
1505 out('@@');
1511 @ The |copy_TeX| routine processes the \TEX/ code at the beginning of a
1512 section; for example, the words you are now reading were copied in this
1513 way. It returns the next control code or `\.{\v}' found in the input.
1514 We don't copy spaces or tab marks into the beginning of a line. This
1515 makes the test for empty lines in |finish_line| work.
1517 @ @f copy_TeX TeX
1519 eight_bits
1520 copy_TeX()
1522 char c; /* current character being copied */
1523 while (1) {
1524 if (loc>limit && (finish_line(), get_line()==0)) return(new_section);
1525 *(limit+1)='@@';
1526 while ((c=*(loc++))!='|' && c!='@@') {
1527 out(c);
1528 if (out_ptr==out_buf+1 && (xisspace(c))) out_ptr--;
1530 if (c=='|') return('|');
1531 if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
1535 @ The |copy_comment| function issues a warning if more braces are opened than
1536 closed, and in the case of a more serious error it supplies enough
1537 braces to keep \TEX/ from complaining about unbalanced braces.
1538 Instead of copying the \TEX/ material
1539 into the output buffer, this function copies it into the token memory
1540 (in phase two only).
1541 The abbreviation |app_tok(t)| is used to append token |t| to the current
1542 token list, and it also makes sure that it is possible to append at least
1543 one further token without overflow.
1545 @d app_tok(c) {if (tok_ptr+2>tok_mem_end) overflow("token"); *(tok_ptr++)=c;}
1547 @<Predec...@>=
1548 int copy_comment();
1550 @ @c
1551 int copy_comment(is_long_comment,bal) /* copies \TEX/ code in comments */
1552 boolean is_long_comment; /* is this a traditional \CEE/ comment? */
1553 int bal; /* brace balance */
1555 char c; /* current character being copied */
1556 while (1) {
1557 if (loc>limit) {
1558 if (is_long_comment) {
1559 if (get_line()==0) {
1560 err_print("! Input ended in mid-comment");
1561 @.Input ended in mid-comment@>
1562 loc=buffer+1; goto done;
1565 else {
1566 if (bal>1) err_print("! Missing } in comment");
1567 @.Missing \} in comment@>
1568 goto done;
1571 c=*(loc++);
1572 if (c=='|') return(bal);
1573 if (is_long_comment) @<Check for end of comment@>;
1574 if (phase==2) {
1575 if (ishigh(c)) app_tok(quoted_char);
1576 app_tok(c);
1578 @<Copy special things when |c=='@@', '\\'|@>;
1579 if (c=='{') bal++;
1580 else if (c=='}') {
1581 if(bal>1) bal--;
1582 else {err_print("! Extra } in comment");
1583 @.Extra \} in comment@>
1584 if (phase==2) tok_ptr--;
1588 done:@<Clear |bal| and |return|@>;
1591 @ @<Check for end of comment@>=
1592 if (c=='*' && *loc=='/') {
1593 loc++;
1594 if (bal>1) err_print("! Missing } in comment");
1595 @.Missing \} in comment@>
1596 goto done;
1599 @ @<Copy special things when |c=='@@'...@>=
1600 if (c=='@@') {
1601 if (*(loc++)!='@@') {
1602 err_print("! Illegal use of @@ in comment");
1603 @.Illegal use of @@...@>
1604 loc-=2; if (phase==2) *(tok_ptr-1)=' '; goto done;
1607 else if (c=='\\' && *loc!='@@')
1608 if (phase==2) app_tok(*(loc++)) else loc++;
1610 @ We output
1611 enough right braces to keep \TEX/ happy.
1613 @<Clear |bal|...@>=
1614 if (phase==2) while (bal-- >0) app_tok('}');
1615 return(0);
1617 @** Parsing.
1618 The most intricate part of \.{CWEAVE} is its mechanism for converting
1619 \CEE/-like code into \TEX/ code, and we might as well plunge into this
1620 aspect of the program now. A ``bottom up'' approach is used to parse the
1621 \CEE/-like material, since \.{CWEAVE} must deal with fragmentary
1622 constructions whose overall ``part of speech'' is not known.
1624 At the lowest level, the input is represented as a sequence of entities
1625 that we shall call {\it scraps}, where each scrap of information consists
1626 of two parts, its {\it category} and its {\it translation}. The category
1627 is essentially a syntactic class, and the translation is a token list that
1628 represents \TEX/ code. Rules of syntax and semantics tell us how to
1629 combine adjacent scraps into larger ones, and if we are lucky an entire
1630 \CEE/ text that starts out as hundreds of small scraps will join
1631 together into one gigantic scrap whose translation is the desired \TEX/
1632 code. If we are unlucky, we will be left with several scraps that don't
1633 combine; their translations will simply be output, one by one.
1635 The combination rules are given as context-sensitive productions that are
1636 applied from left to right. Suppose that we are currently working on the
1637 sequence of scraps $s_1\,s_2\ldots s_n$. We try first to find the longest
1638 production that applies to an initial substring $s_1\,s_2\ldots\,$; but if
1639 no such productions exist, we try to find the longest production
1640 applicable to the next substring $s_2\,s_3\ldots\,$; and if that fails, we
1641 try to match $s_3\,s_4\ldots\,$, etc.
1643 A production applies if the category codes have a given pattern. For
1644 example, one of the productions (see rule~3) is
1645 $$\hbox{|exp| }\left\{\matrix{\hbox{|binop|}\cr\hbox{|ubinop|}}\right\}
1646 \hbox{ |exp| }\RA\hbox{ |exp|}$$
1647 and it means that three consecutive scraps whose respective categories are
1648 |exp|, |binop| (or |ubinop|),
1649 and |exp| are converted to one scrap whose category
1650 is |exp|. The translations of the original
1651 scraps are simply concatenated. The case of
1652 $$\hbox{|exp| |comma| |exp| $\RA$ |exp|} \hskip4emE_1C\,\\{opt}9\,E_2$$
1653 (rule 4) is only slightly more complicated:
1654 Here the resulting |exp| translation
1655 consists not only of the three original translations, but also of the
1656 tokens |opt| and 9 between the translations of the
1657 |comma| and the following |exp|.
1658 In the \TEX/ file, this will specify an optional line break after the
1659 comma, with penalty 90.
1661 At each opportunity the longest possible production is applied. For
1662 example, if the current sequence of scraps is |int_like| |cast|
1663 |lbrace|, rule 31 is applied; but if the sequence is |int_like| |cast|
1664 followed by anything other than |lbrace|, rule 32 takes effect.
1666 Translation rules such as `$E_1C\,\\{opt}9\,E_2$' above use subscripts
1667 to distinguish between translations of scraps whose categories have the
1668 same initial letter; these subscripts are assigned from left to right.
1670 @ Here is a list of the category codes that scraps can have.
1671 (A few others, like |int_like|, have already been defined; the
1672 |cat_name| array contains a complete list.)
1674 @d exp 1 /* denotes an expression, including perhaps a single identifier */
1675 @d unop 2 /* denotes a unary operator */
1676 @d binop 3 /* denotes a binary operator */
1677 @d ubinop 4
1678 /* denotes an operator that can be unary or binary, depending on context */
1679 @d cast 5 /* denotes a cast */
1680 @d question 6 /* denotes a question mark and possibly the expressions flanking it */
1681 @d lbrace 7 /* denotes a left brace */
1682 @d rbrace 8 /* denotes a right brace */
1683 @d decl_head 9 /* denotes an incomplete declaration */
1684 @d comma 10 /* denotes a comma */
1685 @d lpar 11 /* denotes a left parenthesis or left bracket */
1686 @d rpar 12 /* denotes a right parenthesis or right bracket */
1687 @d prelangle 13 /* denotes `$<$' before we know what it is */
1688 @d prerangle 14 /* denotes `$>$' before we know what it is */
1689 @d langle 15 /* denotes `$<$' when it's used as angle bracket in a template */
1690 @d colcol 18 /* denotes `::' */
1691 @d base 19 /* denotes a colon that introduces a base specifier */
1692 @d decl 20 /* denotes a complete declaration */
1693 @d struct_head 21 /* denotes the beginning of a structure specifier */
1694 @d stmt 23 /* denotes a complete statement */
1695 @d function 24 /* denotes a complete function */
1696 @d fn_decl 25 /* denotes a function declarator */
1697 @d semi 27 /* denotes a semicolon */
1698 @d colon 28 /* denotes a colon */
1699 @d tag 29 /* denotes a statement label */
1700 @d if_head 30 /* denotes the beginning of a compound conditional */
1701 @d else_head 31 /* denotes a prefix for a compound statement */
1702 @d if_clause 32 /* pending \.{if} together with a condition */
1703 @d lproc 35 /* begins a preprocessor command */
1704 @d rproc 36 /* ends a preprocessor command */
1705 @d insert 37 /* a scrap that gets combined with its neighbor */
1706 @d section_scrap 38 /* section name */
1707 @d dead 39 /* scrap that won't combine */
1708 @d ftemplate 59 /* \\{make\_pair} */
1709 @d new_exp 60 /* \&{new} and a following type identifier */
1710 @d begin_arg 61 /* \.{@@[} */
1711 @d end_arg 62 /* \.{@@]} */
1713 @<Glo...@>=
1714 char cat_name[256][12];
1715 eight_bits cat_index;
1717 @ @<Set in...@>=
1718 for (cat_index=0;cat_index<255;cat_index++)
1719 strcpy(cat_name[cat_index],"UNKNOWN");
1720 @.UNKNOWN@>
1721 strcpy(cat_name[exp],"exp");
1722 strcpy(cat_name[unop],"unop");
1723 strcpy(cat_name[binop],"binop");
1724 strcpy(cat_name[ubinop],"ubinop");
1725 strcpy(cat_name[cast],"cast");
1726 strcpy(cat_name[question],"?");
1727 strcpy(cat_name[lbrace],"{"@q}@>);
1728 strcpy(cat_name[rbrace],@q{@>"}");
1729 strcpy(cat_name[decl_head],"decl_head");
1730 strcpy(cat_name[comma],",");
1731 strcpy(cat_name[lpar],"(");
1732 strcpy(cat_name[rpar],")");
1733 strcpy(cat_name[prelangle],"<");
1734 strcpy(cat_name[prerangle],">");
1735 strcpy(cat_name[langle],"\\<");
1736 strcpy(cat_name[colcol],"::");
1737 strcpy(cat_name[base],"\\:");
1738 strcpy(cat_name[decl],"decl");
1739 strcpy(cat_name[struct_head],"struct_head");
1740 strcpy(cat_name[alfop],"alfop");
1741 strcpy(cat_name[stmt],"stmt");
1742 strcpy(cat_name[function],"function");
1743 strcpy(cat_name[fn_decl],"fn_decl");
1744 strcpy(cat_name[else_like],"else_like");
1745 strcpy(cat_name[semi],";");
1746 strcpy(cat_name[colon],":");
1747 strcpy(cat_name[tag],"tag");
1748 strcpy(cat_name[if_head],"if_head");
1749 strcpy(cat_name[else_head],"else_head");
1750 strcpy(cat_name[if_clause],"if()");
1751 strcpy(cat_name[lproc],"#{"@q}@>);
1752 strcpy(cat_name[rproc],@q{@>"#}");
1753 strcpy(cat_name[insert],"insert");
1754 strcpy(cat_name[section_scrap],"section");
1755 strcpy(cat_name[dead],"@@d");
1756 strcpy(cat_name[public_like],"public");
1757 strcpy(cat_name[operator_like],"operator");
1758 strcpy(cat_name[new_like],"new");
1759 strcpy(cat_name[catch_like],"catch");
1760 strcpy(cat_name[for_like],"for");
1761 strcpy(cat_name[do_like],"do");
1762 strcpy(cat_name[if_like],"if");
1763 strcpy(cat_name[delete_like],"delete");
1764 strcpy(cat_name[raw_ubin],"ubinop?");
1765 strcpy(cat_name[const_like],"const");
1766 strcpy(cat_name[raw_int],"raw");
1767 strcpy(cat_name[int_like],"int");
1768 strcpy(cat_name[case_like],"case");
1769 strcpy(cat_name[sizeof_like],"sizeof");
1770 strcpy(cat_name[struct_like],"struct");
1771 strcpy(cat_name[typedef_like],"typedef");
1772 strcpy(cat_name[define_like],"define");
1773 strcpy(cat_name[template_like],"template");
1774 strcpy(cat_name[ftemplate],"ftemplate");
1775 strcpy(cat_name[new_exp],"new_exp");
1776 strcpy(cat_name[begin_arg],"@@["@q]@>);
1777 strcpy(cat_name[end_arg],@q[@>"@@]");
1778 strcpy(cat_name[0],"zero");
1780 @ This code allows \.{CWEAVE} to display its parsing steps.
1783 void
1784 print_cat(c) /* symbolic printout of a category */
1785 eight_bits c;
1787 printf(cat_name[c]);
1790 @ The token lists for translated \TEX/ output contain some special control
1791 symbols as well as ordinary characters. These control symbols are
1792 interpreted by \.{CWEAVE} before they are written to the output file.
1794 \yskip\hang |break_space| denotes an optional line break or an en space;
1796 \yskip\hang |force| denotes a line break;
1798 \yskip\hang |big_force| denotes a line break with additional vertical space;
1800 \yskip\hang |preproc_line| denotes that the line will be printed flush left;
1802 \yskip\hang |opt| denotes an optional line break (with the continuation
1803 line indented two ems with respect to the normal starting position)---this
1804 code is followed by an integer |n|, and the break will occur with penalty
1805 $10n$;
1807 \yskip\hang |backup| denotes a backspace of one em;
1809 \yskip\hang |cancel| obliterates any |break_space|, |opt|, |force|, or
1810 |big_force| tokens that immediately precede or follow it and also cancels any
1811 |backup| tokens that follow it;
1813 \yskip\hang |indent| causes future lines to be indented one more em;
1815 \yskip\hang |outdent| causes future lines to be indented one less em.
1817 \yskip\noindent All of these tokens are removed from the \TEX/ output that
1818 comes from \CEE/ text between \pb\ signs; |break_space| and |force| and
1819 |big_force| become single spaces in this mode. The translation of other
1820 \CEE/ texts results in \TEX/ control sequences \.{\\1}, \.{\\2},
1821 \.{\\3}, \.{\\4}, \.{\\5}, \.{\\6}, \.{\\7}, \.{\\8}
1822 corresponding respectively to
1823 |indent|, |outdent|, |opt|, |backup|, |break_space|, |force|,
1824 |big_force| and |preproc_line|.
1825 However, a sequence of consecutive `\.\ ', |break_space|,
1826 |force|, and/or |big_force| tokens is first replaced by a single token
1827 (the maximum of the given ones).
1829 The token |math_rel| will be translated into
1830 \.{\\MRL\{}, and it will get a matching \.\} later.
1831 Other control sequences in the \TEX/ output will be
1832 `\.{\\\\\{}$\,\ldots\,$\.\}'
1833 surrounding identifiers, `\.{\\\&\{}$\,\ldots\,$\.\}' surrounding
1834 reserved words, `\.{\\.\{}$\,\ldots\,$\.\}' surrounding strings,
1835 `\.{\\C\{}$\,\ldots\,$\.\}$\,$|force|' surrounding comments, and
1836 `\.{\\X$n$:}$\,\ldots\,$\.{\\X}' surrounding section names, where
1837 |n| is the section number.
1839 @d math_rel 0206
1840 @d big_cancel 0210 /* like |cancel|, also overrides spaces */
1841 @d cancel 0211 /* overrides |backup|, |break_space|, |force|, |big_force| */
1842 @d indent 0212 /* one more tab (\.{\\1}) */
1843 @d outdent 0213 /* one less tab (\.{\\2}) */
1844 @d opt 0214 /* optional break in mid-statement (\.{\\3}) */
1845 @d backup 0215 /* stick out one unit to the left (\.{\\4}) */
1846 @d break_space 0216 /* optional break between statements (\.{\\5}) */
1847 @d force 0217 /* forced break between statements (\.{\\6}) */
1848 @d big_force 0220 /* forced break with additional space (\.{\\7}) */
1849 @d preproc_line 0221 /* begin line without indentation (\.{\\8}) */
1850 @^high-bit character handling@>
1851 @d quoted_char 0222
1852 /* introduces a character token in the range |0200|--|0377| */
1853 @d end_translation 0223 /* special sentinel token at end of list */
1854 @d inserted 0224 /* sentinel to mark translations of inserts */
1855 @d qualifier 0225 /* introduces an explicit namespace qualifier */
1857 @ The raw input is converted into scraps according to the following table,
1858 which gives category codes followed by the translations.
1859 \def\stars {\.{**}}%
1860 The symbol `\stars' stands for `\.{\\\&\{{\rm identifier}\}}',
1861 i.e., the identifier itself treated as a reserved word.
1862 The right-hand column is the so-called |mathness|, which is explained
1863 further below.
1865 An identifier |c| of length 1 is translated as \.{\\\v c} instead of
1866 as \.{\\\\\{c\}}. An identifier \.{CAPS} in all caps is translated as
1867 \.{\\.\{CAPS\}} instead of as \.{\\\\\{CAPS\}}. An identifier that has
1868 become a reserved word via |typedef| is translated with \.{\\\&} replacing
1869 \.{\\\\} and |raw_int| replacing |exp|.
1871 A string of length greater than 20 is broken into pieces of size at most~20
1872 with discretionary breaks in between.
1874 \yskip\halign{\quad#\hfil&\quad#\hfil&\quad\hfil#\hfil\cr
1875 \.{!=}&|binop|: \.{\\I}&yes\cr
1876 \.{<=}&|binop|: \.{\\Z}&yes\cr
1877 \.{>=}&|binop|: \.{\\G}&yes\cr
1878 \.{==}&|binop|: \.{\\E}&yes\cr
1879 \.{\&\&}&|binop|: \.{\\W}&yes\cr
1880 \.{\v\v}&|binop|: \.{\\V}&yes\cr
1881 \.{++}&|unop|: \.{\\PP}&yes\cr
1882 \.{--}&|unop|: \.{\\MM}&yes\cr
1883 \.{->}&|binop|: \.{\\MG}&yes\cr
1884 \.{>>}&|binop|: \.{\\GG}&yes\cr
1885 \.{<<}&|binop|: \.{\\LL}&yes\cr
1886 \.{::}&|colcol|: \.{\\DC}&maybe\cr
1887 \.{.*}&|binop|: \.{\\PA}&yes\cr
1888 \.{->*}&|binop|: \.{\\MGA}&yes\cr
1889 \.{...}&|raw_int|: \.{\\,\\ldots\\,}&yes\cr
1890 \."string\."&|exp|: \.{\\.\{}string with special characters quoted\.\}&maybe\cr
1891 \.{@@=}string\.{@@>}&|exp|: \.{\\vb\{}string with special characters
1892 quoted\.\}&maybe\cr
1893 \.{@@'7'}&|exp|: \.{\\.\{@@'7'\}}&maybe\cr
1894 \.{077} or \.{\\77}&|exp|: \.{\\T\{\\\~77\}}&maybe\cr
1895 \.{0x7f}&|exp|: \.{\\T\{\\\^7f\}}&maybe\cr
1896 \.{77}&|exp|: \.{\\T\{77\}}&maybe\cr
1897 \.{77L}&|exp|: \.{\\T\{77\\\$L\}}&maybe\cr
1898 \.{0.1E5}&|exp|: \.{\\T\{0.1\\\_5\}}&maybe\cr
1899 \.+&|ubinop|: \.+&yes\cr
1900 \.-&|ubinop|: \.-&yes\cr
1901 \.*&|raw_ubin|: \.*&yes\cr
1902 \./&|binop|: \./&yes\cr
1903 \.<&|prelangle|: \.{\\langle}&yes\cr
1904 \.=&|binop|: \.{\\K}&yes\cr
1905 \.>&|prerangle|: \.{\\rangle}&yes\cr
1906 \..&|binop|: \..&yes\cr
1907 \.{\v}&|binop|: \.{\\OR}&yes\cr
1908 \.\^&|binop|: \.{\\XOR}&yes\cr
1909 \.\%&|binop|: \.{\\MOD}&yes\cr
1910 \.?&|question|: \.{\\?}&yes\cr
1911 \.!&|unop|: \.{\\R}&yes\cr
1912 \.\~&|unop|: \.{\\CM}&yes\cr
1913 \.\&&|raw_ubin|: \.{\\AND}&yes\cr
1914 \.(&|lpar|: \.(&maybe\cr
1915 \.[&|lpar|: \.[&maybe\cr
1916 \.)&|rpar|: \.)&maybe\cr
1917 \.]&|rpar|: \.]&maybe\cr
1918 \.\{&|lbrace|: \.\{&yes\cr
1919 \.\}&|lbrace|: \.\}&yes\cr
1920 \.,&|comma|: \.,&yes\cr
1921 \.;&|semi|: \.;&maybe\cr
1922 \.:&|colon|: \.:&no\cr
1923 \.\# (within line)&|ubinop|: \.{\\\#}&yes\cr
1924 \.\# (at beginning)&|lproc|: |force| |preproc_line| \.{\\\#}&no\cr
1925 end of \.\# line&|rproc|: |force|&no\cr
1926 identifier&|exp|: \.{\\\\\{}identifier with underlines and
1927 dollar signs quoted\.\}&maybe\cr
1928 \.{and}&|alfop|: \stars&yes\cr
1929 \.{and\_eq}&|alfop|: \stars&yes\cr
1930 \.{asm}&|sizeof_like|: \stars&maybe\cr
1931 \.{auto}&|int_like|: \stars&maybe\cr
1932 \.{bitand}&|alfop|: \stars&yes\cr
1933 \.{bitor}&|alfop|: \stars&yes\cr
1934 \.{bool}&|raw_int|: \stars&maybe\cr
1935 \.{break}&|case_like|: \stars&maybe\cr
1936 \.{case}&|case_like|: \stars&maybe\cr
1937 \.{catch}&|catch_like|: \stars&maybe\cr
1938 \.{char}&|raw_int|: \stars&maybe\cr
1939 \.{class}&|struct_like|: \stars&maybe\cr
1940 \.{clock\_t}&|raw_int|: \stars&maybe\cr
1941 \.{compl}&|alfop|: \stars&yes\cr
1942 \.{const}&|const_like|: \stars&maybe\cr
1943 \.{const\_cast}&|raw_int|: \stars&maybe\cr
1944 \.{continue}&|case_like|: \stars&maybe\cr
1945 \.{default}&|case_like|: \stars&maybe\cr
1946 \.{define}&|define_like|: \stars&maybe\cr
1947 \.{defined}&|sizeof_like|: \stars&maybe\cr
1948 \.{delete}&|delete_like|: \stars&maybe\cr
1949 \.{div\_t}&|raw_int|: \stars&maybe\cr
1950 \.{do}&|do_like|: \stars&maybe\cr
1951 \.{double}&|raw_int|: \stars&maybe\cr
1952 \.{dynamic\_cast}&|raw_int|: \stars&maybe\cr
1953 \.{elif}&|if_like|: \stars&maybe\cr
1954 \.{else}&|else_like|: \stars&maybe\cr
1955 \.{endif}&|if_like|: \stars&maybe\cr
1956 \.{enum}&|struct_like|: \stars&maybe\cr
1957 \.{error}&|if_like|: \stars&maybe\cr
1958 \.{explicit}&|int_like|: \stars&maybe\cr
1959 \.{export}&|int_like|: \stars&maybe\cr
1960 \.{extern}&|int_like|: \stars&maybe\cr
1961 \.{FILE}&|raw_int|: \stars&maybe\cr
1962 \.{float}&|raw_int|: \stars&maybe\cr
1963 \.{for}&|for_like|: \stars&maybe\cr
1964 \.{fpos\_t}&|raw_int|: \stars&maybe\cr
1965 \.{friend}&|int_like|: \stars&maybe\cr
1966 \.{goto}&|case_like|: \stars&maybe\cr
1967 \.{if}&|if_like|: \stars&maybe\cr
1968 \.{ifdef}&|if_like|: \stars&maybe\cr
1969 \.{ifndef}&|if_like|: \stars&maybe\cr
1970 \.{include}&|if_like|: \stars&maybe\cr
1971 \.{inline}&|int_like|: \stars&maybe\cr
1972 \.{int}&|raw_int|: \stars&maybe\cr
1973 \.{jmp\_buf}&|raw_int|: \stars&maybe\cr
1974 \.{ldiv\_t}&|raw_int|: \stars&maybe\cr
1975 \.{line}&|if_like|: \stars&maybe\cr
1976 \.{long}&|raw_int|: \stars&maybe\cr
1977 \.{make\_pair}&|ftemplate|: \.{\\\\\{make\\\_pair\}}&maybe\cr
1978 \.{mutable}&|int_like|: \stars&maybe\cr
1979 \.{namespace}&|struct_like|: \stars&maybe\cr
1980 \.{new}&|new_like|: \stars&maybe\cr
1981 \.{not}&|alfop|: \stars&yes\cr
1982 \.{not\_eq}&|alfop|: \stars&yes\cr
1983 \.{NULL}&|exp|: \.{\\NULL}&yes\cr
1984 \.{offsetof}&|raw_int|: \stars&maybe\cr
1985 \.{operator}&|operator_like|: \stars&maybe\cr
1986 \.{or}&|alfop|: \stars&yes\cr
1987 \.{or\_eq}&|alfop|: \stars&yes\cr
1988 \.{pragma}&|if_like|: \stars&maybe\cr
1989 \.{private}&|public_like|: \stars&maybe\cr
1990 \.{protected}&|public_like|: \stars&maybe\cr
1991 \.{ptrdiff\_t}&|raw_int|: \stars&maybe\cr
1992 \.{public}&|public_like|: \stars&maybe\cr
1993 \.{register}&|int_like|: \stars&maybe\cr
1994 \.{reinterpret\_cast}&|raw_int|: \stars&maybe\cr
1995 \.{return}&|case_like|: \stars&maybe\cr
1996 \.{short}&|raw_int|: \stars&maybe\cr
1997 \.{sig\_atomic\_t}&|raw_int|: \stars&maybe\cr
1998 \.{signed}&|raw_int|: \stars&maybe\cr
1999 \.{size\_t}&|raw_int|: \stars&maybe\cr
2000 \.{sizeof}&|sizeof_like|: \stars&maybe\cr
2001 \.{static}&|int_like|: \stars&maybe\cr
2002 \.{static\_cast}&|raw_int|: \stars&maybe\cr
2003 \.{struct}&|struct_like|: \stars&maybe\cr
2004 \.{switch}&|for_like|: \stars&maybe\cr
2005 \.{template}&|template_like|: \stars&maybe\cr
2006 \.{TeX}&|exp|: \.{\\TeX}&yes\cr
2007 \.{this}&|exp|: \.{\\this}&yes\cr
2008 \.{throw}&|case_like|: \stars&maybe\cr
2009 \.{time\_t}&|raw_int|: \stars&maybe\cr
2010 \.{try}&|else_like|: \stars&maybe\cr
2011 \.{typedef}&|typedef_like|: \stars&maybe\cr
2012 \.{typeid}&|raw_int|: \stars&maybe\cr
2013 \.{typename}&|struct_like|: \stars&maybe\cr
2014 \.{undef}&|if_like|: \stars&maybe\cr
2015 \.{union}&|struct_like|: \stars&maybe\cr
2016 \.{unsigned}&|raw_int|: \stars&maybe\cr
2017 \.{using}&|int_like|: \stars&maybe\cr
2018 \.{va\_dcl}&|decl|: \stars&maybe\cr
2019 \.{va\_list}&|raw_int|: \stars&maybe\cr
2020 \.{virtual}&|int_like|: \stars&maybe\cr
2021 \.{void}&|raw_int|: \stars&maybe\cr
2022 \.{volatile}&|const_like|: \stars&maybe\cr
2023 \.{wchar\_t}&|raw_int|: \stars&maybe\cr
2024 \.{while}&|for_like|: \stars&maybe\cr
2025 \.{xor}&|alfop|: \stars&yes\cr
2026 \.{xor\_eq}&|alfop|: \stars&yes\cr
2027 \.{@@,}&|insert|: \.{\\,}&maybe\cr
2028 \.{@@\v}&|insert|: |opt| \.0&maybe\cr
2029 \.{@@/}&|insert|: |force|&no\cr
2030 \.{@@\#}&|insert|: |big_force|&no\cr
2031 \.{@@+}&|insert|: |big_cancel| \.{\{\}} |break_space|
2032 \.{\{\}} |big_cancel|&no\cr
2033 \.{@@;}&|semi|: &maybe\cr
2034 \.{@@[@q]@>}&|begin_arg|: &maybe\cr
2035 \.{@q[@>@@]}&|end_arg|: &maybe\cr
2036 \.{@@\&}&|insert|: \.{\\J}&maybe\cr
2037 \.{@@h}&|insert|: |force| \.{\\ATH} |force|&no\cr
2038 \.{@@<}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2039 \.{\\X}$n$\.:translated section name\.{\\X}&maybe\cr
2040 \.{@@(@q)@>}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2041 \.{\\X}$n$\.{:\\.\{}section name with special characters
2042 quoted\.{\ \}\\X}&maybe\cr
2043 \.{/*}comment\.{*/}&|insert|: |cancel|
2044 \.{\\C\{}translated comment\.\} |force|&no\cr
2045 \.{//}comment&|insert|: |cancel|
2046 \.{\\SHC\{}translated comment\.\} |force|&no\cr
2049 \smallskip
2050 The construction \.{@@t}\thinspace stuff\/\thinspace\.{@@>} contributes
2051 \.{\\hbox\{}\thinspace stuff\/\thinspace\.\} to the following scrap.
2053 @i prod.w
2055 @* Implementing the productions.
2056 More specifically, a scrap is a structure consisting of a category
2057 |cat| and a |text_pointer| |trans|, which points to the translation in
2058 |tok_start|. When \CEE/ text is to be processed with the grammar above,
2059 we form an array |scrap_info| containing the initial scraps.
2060 Our production rules have the nice property that the right-hand side is never
2061 longer than the left-hand side. Therefore it is convenient to use sequential
2062 allocation for the current sequence of scraps. Five pointers are used to
2063 manage the parsing:
2065 \yskip\hang |pp| is a pointer into |scrap_info|. We will try to match
2066 the category codes |pp->cat,@,@,(pp+1)->cat|$,\,\,\ldots\,$
2067 to the left-hand sides of productions.
2069 \yskip\hang |scrap_base|, |lo_ptr|, |hi_ptr|, and |scrap_ptr| are such that
2070 the current sequence of scraps appears in positions |scrap_base| through
2071 |lo_ptr| and |hi_ptr| through |scrap_ptr|, inclusive, in the |cat| and
2072 |trans| arrays. Scraps located between |scrap_base| and |lo_ptr| have
2073 been examined, while those in positions |>=hi_ptr| have not yet been
2074 looked at by the parsing process.
2076 \yskip\noindent Initially |scrap_ptr| is set to the position of the final
2077 scrap to be parsed, and it doesn't change its value. The parsing process
2078 makes sure that |lo_ptr>=pp+3|, since productions have as many as four terms,
2079 by moving scraps from |hi_ptr| to |lo_ptr|. If there are
2080 fewer than |pp+3| scraps left, the positions up to |pp+3| are filled with
2081 blanks that will not match in any productions. Parsing stops when
2082 |pp==lo_ptr+1| and |hi_ptr==scrap_ptr+1|.
2084 Since the |scrap| structure will later be used for other purposes, we
2085 declare its second element as a union.
2087 @<Type...@>=
2088 typedef struct {
2089 eight_bits cat;
2090 eight_bits mathness;
2091 union {
2092 text_pointer Trans;
2093 @<Rest of |trans_plus| union@>@;
2094 } trans_plus;
2095 } scrap;
2096 typedef scrap *scrap_pointer;
2098 @ @d trans trans_plus.Trans /* translation texts of scraps */
2100 @<Global...@>=
2101 scrap scrap_info[max_scraps]; /* memory array for scraps */
2102 scrap_pointer scrap_info_end=scrap_info+max_scraps -1; /* end of |scrap_info| */
2103 scrap_pointer pp; /* current position for reducing productions */
2104 scrap_pointer scrap_base; /* beginning of the current scrap sequence */
2105 scrap_pointer scrap_ptr; /* ending of the current scrap sequence */
2106 scrap_pointer lo_ptr; /* last scrap that has been examined */
2107 scrap_pointer hi_ptr; /* first scrap that has not been examined */
2108 scrap_pointer max_scr_ptr; /* largest value assumed by |scrap_ptr| */
2110 @ @<Set init...@>=
2111 scrap_base=scrap_info+1;
2112 max_scr_ptr=scrap_ptr=scrap_info;
2114 @ Token lists in |@!tok_mem| are composed of the following kinds of
2115 items for \TEX/ output.
2117 \yskip\item{$\bullet$}Character codes and special codes like |force| and
2118 |math_rel| represent themselves;
2120 \item{$\bullet$}|id_flag+p| represents \.{\\\\\{{\rm identifier $p$}\}};
2122 \item{$\bullet$}|res_flag+p| represents \.{\\\&\{{\rm identifier $p$}\}};
2124 \item{$\bullet$}|section_flag+p| represents section name |p|;
2126 \item{$\bullet$}|tok_flag+p| represents token list number |p|;
2128 \item{$\bullet$}|inner_tok_flag+p| represents token list number |p|, to be
2129 translated without line-break controls.
2131 @d id_flag 10240 /* signifies an identifier */
2132 @d res_flag 2*id_flag /* signifies a reserved word */
2133 @d section_flag 3*id_flag /* signifies a section name */
2134 @d tok_flag 4*id_flag /* signifies a token list */
2135 @d inner_tok_flag 5*id_flag /* signifies a token list in `\pb' */
2138 void
2139 print_text(p) /* prints a token list for debugging; not used in |main| */
2140 text_pointer p;
2142 token_pointer j; /* index into |tok_mem| */
2143 sixteen_bits r; /* remainder of token after the flag has been stripped off */
2144 if (p>=text_ptr) printf("BAD");
2145 else for (j=*p; j<*(p+1); j++) {
2146 r=*j%id_flag;
2147 switch (*j/id_flag) {
2148 case 1: printf("\\\\{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2149 break; /* |id_flag| */
2150 case 2: printf("\\&{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2151 break; /* |res_flag| */
2152 case 3: printf("<"); print_section_name((name_dir+r)); printf(">");
2153 break; /* |section_flag| */
2154 case 4: printf("[[%d]]",r); break; /* |tok_flag| */
2155 case 5: printf("|[[%d]]|",r); break; /* |inner_tok_flag| */
2156 default: @<Print token |r| in symbolic form@>;
2159 fflush(stdout);
2162 @ @<Print token |r|...@>=
2163 switch (r) {
2164 case math_rel: printf("\\mathrel{"@q}@>); break;
2165 case big_cancel: printf("[ccancel]"); break;
2166 case cancel: printf("[cancel]"); break;
2167 case indent: printf("[indent]"); break;
2168 case outdent: printf("[outdent]"); break;
2169 case backup: printf("[backup]"); break;
2170 case opt: printf("[opt]"); break;
2171 case break_space: printf("[break]"); break;
2172 case force: printf("[force]"); break;
2173 case big_force: printf("[fforce]"); break;
2174 case preproc_line: printf("[preproc]"); break;
2175 case quoted_char: j++; printf("[%o]",(unsigned)*j); break;
2176 case end_translation: printf("[quit]"); break;
2177 case inserted: printf("[inserted]"); break;
2178 default: putxchar(r);
2181 @ The production rules listed above are embedded directly into \.{CWEAVE},
2182 since it is easier to do this than to write an interpretive system
2183 that would handle production systems in general. Several macros are defined
2184 here so that the program for each production is fairly short.
2186 All of our productions conform to the general notion that some |k|
2187 consecutive scraps starting at some position |j| are to be replaced by a
2188 single scrap of some category |c| whose translation is composed from the
2189 translations of the disappearing scraps. After this production has been
2190 applied, the production pointer |pp| should change by an amount |d|. Such
2191 a production can be represented by the quadruple |(j,k,c,d)|. For example,
2192 the production `|exp@,comma@,exp| $\RA$ |exp|' would be represented by
2193 `|(pp,3,exp,-2)|'; in this case the pointer |pp| should decrease by 2
2194 after the production has been applied, because some productions with
2195 |exp| in their second or third positions might now match,
2196 but no productions have
2197 |exp| in the fourth position of their left-hand sides. Note that
2198 the value of |d| is determined by the whole collection of productions, not
2199 by an individual one.
2200 The determination of |d| has been
2201 done by hand in each case, based on the full set of productions but not on
2202 the grammar of \CEE/ or on the rules for constructing the initial
2203 scraps.
2205 We also attach a serial number to each production, so that additional
2206 information is available when debugging. For example, the program below
2207 contains the statement `|reduce(pp,3,exp,-2,4)|' when it implements
2208 the production just mentioned.
2210 Before calling |reduce|, the program should have appended the tokens of
2211 the new translation to the |tok_mem| array. We commonly want to append
2212 copies of several existing translations, and macros are defined to
2213 simplify these common cases. For example, \\{app2}|(pp)| will append the
2214 translations of two consecutive scraps, |pp->trans| and |(pp+1)->trans|, to
2215 the current token list. If the entire new translation is formed in this
2216 way, we write `|squash(j,k,c,d,n)|' instead of `|reduce(j,k,c,d,n)|'. For
2217 example, `|squash(pp,3,exp,-2,3)|' is an abbreviation for `\\{app3}|(pp);
2218 reduce(pp,3,exp,-2,3)|'.
2220 A couple more words of explanation:
2221 Both |big_app| and |app| append a token (while |big_app1| to |big_app4|
2222 append the specified number of scrap translations) to the current token list.
2223 The difference between |big_app| and |app| is simply that |big_app|
2224 checks whether there can be a conflict between math and non-math
2225 tokens, and intercalates a `\.{\$}' token if necessary. When in
2226 doubt what to use, use |big_app|.
2228 The |mathness| is an attribute of scraps that says whether they are
2229 to be printed in a math mode context or not. It is separate from the
2230 ``part of speech'' (the |cat|) because to make each |cat| have
2231 a fixed |mathness| (as in the original \.{WEAVE}) would multiply the
2232 number of necessary production rules.
2234 The low two bits (i.e. |mathness % 4|) control the left boundary.
2235 (We need two bits because we allow cases |yes_math|, |no_math| and
2236 |maybe_math|, which can go either way.)
2237 The next two bits (i.e. |mathness / 4|) control the right boundary.
2238 If we combine two scraps and the right boundary of the first has
2239 a different mathness from the left boundary of the second, we
2240 insert a \.{\$} in between. Similarly, if at printing time some
2241 irreducible scrap has a |yes_math| boundary the scrap gets preceded
2242 or followed by a \.{\$}. The left boundary is |maybe_math| if and
2243 only if the right boundary is.
2245 The code below is an exact translation of the production rules into
2246 \CEE/, using such macros, and the reader should have no difficulty
2247 understanding the format by comparing the code with the symbolic
2248 productions as they were listed earlier.
2250 @d no_math 2 /* should be in horizontal mode */
2251 @d yes_math 1 /* should be in math mode */
2252 @d maybe_math 0 /* works in either horizontal or math mode */
2253 @d big_app2(a) big_app1(a);big_app1(a+1)
2254 @d big_app3(a) big_app2(a);big_app1(a+2)
2255 @d big_app4(a) big_app3(a);big_app1(a+3)
2256 @d app(a) *(tok_ptr++)=a
2257 @d app1(a) *(tok_ptr++)=tok_flag+(int)((a)->trans-tok_start)
2259 @<Global...@>=
2260 int cur_mathness, init_mathness;
2262 @ @c
2263 void
2264 app_str(s)
2265 char *s;
2267 while (*s) app_tok(*(s++));
2270 void
2271 big_app(a)
2272 token a;
2274 if (a==' ' || (a>=big_cancel && a<=big_force)) /* non-math token */ {
2275 if (cur_mathness==maybe_math) init_mathness=no_math;
2276 else if (cur_mathness==yes_math) app_str("{}$");
2277 cur_mathness=no_math;
2279 else {
2280 if (cur_mathness==maybe_math) init_mathness=yes_math;
2281 else if (cur_mathness==no_math) app_str("${}");
2282 cur_mathness=yes_math;
2284 app(a);
2287 void
2288 big_app1(a)
2289 scrap_pointer a;
2291 switch (a->mathness % 4) { /* left boundary */
2292 case (no_math):
2293 if (cur_mathness==maybe_math) init_mathness=no_math;
2294 else if (cur_mathness==yes_math) app_str("{}$");
2295 cur_mathness=a->mathness / 4; /* right boundary */
2296 break;
2297 case (yes_math):
2298 if (cur_mathness==maybe_math) init_mathness=yes_math;
2299 else if (cur_mathness==no_math) app_str("${}");
2300 cur_mathness=a->mathness / 4; /* right boundary */
2301 break;
2302 case (maybe_math): /* no changes */ break;
2304 app(tok_flag+(int)((a)->trans-tok_start));
2307 @ Let us consider the big switch for productions now, before looking
2308 at its context. We want to design the program so that this switch
2309 works, so we might as well not keep ourselves in suspense about exactly what
2310 code needs to be provided with a proper environment.
2312 @d cat1 (pp+1)->cat
2313 @d cat2 (pp+2)->cat
2314 @d cat3 (pp+3)->cat
2315 @d lhs_not_simple (pp->cat!=public_like
2316 && pp->cat!=semi
2317 && pp->cat!=prelangle
2318 && pp->cat!=prerangle
2319 && pp->cat!=template_like
2320 && pp->cat!=new_like
2321 && pp->cat!=new_exp
2322 && pp->cat!=ftemplate
2323 && pp->cat!=raw_ubin
2324 && pp->cat!=const_like
2325 && pp->cat!=raw_int
2326 && pp->cat!=operator_like)
2327 /* not a production with left side length 1 */
2329 @<Match a production at |pp|, or increase |pp| if there is no match@>= {
2330 if (cat1==end_arg && lhs_not_simple)
2331 if (pp->cat==begin_arg) squash(pp,2,exp,-2,124);
2332 else squash(pp,2,end_arg,-1,125);
2333 else if (cat1==insert) squash(pp,2,pp->cat,-2,0);
2334 else if (cat2==insert) squash(pp+1,2,(pp+1)->cat,-1,0);
2335 else if (cat3==insert) squash(pp+2,2,(pp+2)->cat,0,0);
2336 else
2337 switch (pp->cat) {
2338 case exp: @<Cases for |exp|@>; @+break;
2339 case lpar: @<Cases for |lpar|@>; @+break;
2340 case unop: @<Cases for |unop|@>; @+break;
2341 case ubinop: @<Cases for |ubinop|@>; @+break;
2342 case binop: @<Cases for |binop|@>; @+break;
2343 case cast: @<Cases for |cast|@>; @+break;
2344 case sizeof_like: @<Cases for |sizeof_like|@>; @+break;
2345 case int_like: @<Cases for |int_like|@>; @+break;
2346 case public_like: @<Cases for |public_like|@>; @+break;
2347 case colcol: @<Cases for |colcol|@>; @+break;
2348 case decl_head: @<Cases for |decl_head|@>; @+break;
2349 case decl: @<Cases for |decl|@>; @+break;
2350 case base: @<Cases for |base|@>; @+break;
2351 case struct_like: @<Cases for |struct_like|@>; @+break;
2352 case struct_head: @<Cases for |struct_head|@>; @+break;
2353 case fn_decl: @<Cases for |fn_decl|@>; @+break;
2354 case function: @<Cases for |function|@>; @+break;
2355 case lbrace: @<Cases for |lbrace|@>; @+break;
2356 case if_like: @<Cases for |if_like|@>; @+break;
2357 case else_like: @<Cases for |else_like|@>; @+break;
2358 case else_head: @<Cases for |else_head|@>; @+break;
2359 case if_clause: @<Cases for |if_clause|@>; @+break;
2360 case if_head: @<Cases for |if_head|@>; @+break;
2361 case do_like: @<Cases for |do_like|@>; @+break;
2362 case case_like: @<Cases for |case_like|@>; @+break;
2363 case catch_like: @<Cases for |catch_like|@>; @+break;
2364 case tag: @<Cases for |tag|@>; @+break;
2365 case stmt: @<Cases for |stmt|@>; @+break;
2366 case semi: @<Cases for |semi|@>; @+break;
2367 case lproc: @<Cases for |lproc|@>; @+break;
2368 case section_scrap: @<Cases for |section_scrap|@>; @+break;
2369 case insert: @<Cases for |insert|@>; @+break;
2370 case prelangle: @<Cases for |prelangle|@>; @+break;
2371 case prerangle: @<Cases for |prerangle|@>; @+break;
2372 case langle: @<Cases for |langle|@>; @+break;
2373 case template_like: @<Cases for |template_like|@>; @+break;
2374 case new_like: @<Cases for |new_like|@>; @+break;
2375 case new_exp: @<Cases for |new_exp|@>; @+break;
2376 case ftemplate: @<Cases for |ftemplate|@>; @+break;
2377 case for_like: @<Cases for |for_like|@>; @+break;
2378 case raw_ubin: @<Cases for |raw_ubin|@>; @+break;
2379 case const_like: @<Cases for |const_like|@>; @+break;
2380 case raw_int: @<Cases for |raw_int|@>; @+break;
2381 case operator_like: @<Cases for |operator_like|@>; @+break;
2382 case typedef_like: @<Cases for |typedef_like|@>; @+break;
2383 case delete_like: @<Cases for |delete_like|@>; @+break;
2384 case question: @<Cases for |question|@>; @+break;
2386 pp++; /* if no match was found, we move to the right */
2389 @ In \CEE/, new specifier names can be defined via |typedef|, and we want
2390 to make the parser recognize future occurrences of the identifier thus
2391 defined as specifiers. This is done by the procedure |make_reserved|,
2392 which changes the |ilk| of the relevant identifier.
2394 We first need a procedure to recursively seek the first
2395 identifier in a token list, because the identifier might
2396 be enclosed in parentheses, as when one defines a function
2397 returning a pointer.
2399 If the first identifier found is a keyword like `\&{case}', we
2400 return the special value |case_found|; this prevents underlining
2401 of identifiers in case labels.
2403 If the first identifier is the keyword `\&{operator}', we give up;
2404 users who want to index definitions of overloaded \CPLUSPLUS/ operators
2405 should say, for example, `\.{@@!@@\^\\\&\{operator\} \$+\{=\}\$@@>}' (or,
2406 more properly alphebetized,
2407 `\.{@@!@@:operator+=\}\{\\\&\{operator\} \$+\{=\}\$@@>}').
2409 @d no_ident_found (token_pointer)0 /* distinct from any identifier token */
2410 @d case_found (token_pointer)1 /* likewise */
2411 @d operator_found (token_pointer)2 /* likewise */
2414 token_pointer
2415 find_first_ident(p)
2416 text_pointer p;
2418 token_pointer q; /* token to be returned */
2419 token_pointer j; /* token being looked at */
2420 sixteen_bits r; /* remainder of token after the flag has been stripped off */
2421 if (p>=text_ptr) confusion("find_first_ident");
2422 for (j=*p; j<*(p+1); j++) {
2423 r=*j%id_flag;
2424 switch (*j/id_flag) {
2425 case 2: /* |res_flag| */
2426 if (name_dir[r].ilk==case_like) return case_found;
2427 if (name_dir[r].ilk==operator_like) return operator_found;
2428 if (name_dir[r].ilk!=raw_int) break;
2429 case 1: return j;
2430 case 4: case 5: /* |tok_flag| or |inner_tok_flag| */
2431 if ((q=find_first_ident(tok_start+r))!=no_ident_found)
2432 return q;
2433 default: ; /* char, |section_flag|, fall thru: move on to next token */
2434 if (*j==inserted) return no_ident_found; /* ignore inserts */
2435 else if (*j==qualifier) j++; /* bypass namespace qualifier */
2438 return no_ident_found;
2441 @ The scraps currently being parsed must be inspected for any
2442 occurrence of the identifier that we're making reserved; hence
2443 the |for| loop below.
2446 void
2447 make_reserved(p) /* make the first identifier in |p->trans| like |int| */
2448 scrap_pointer p;
2450 sixteen_bits tok_value; /* the name of this identifier, plus its flag*/
2451 token_pointer tok_loc; /* pointer to |tok_value| */
2452 if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2453 return; /* this should not happen */
2454 tok_value=*tok_loc;
2455 for (;p<=scrap_ptr; p==lo_ptr? p=hi_ptr: p++) {
2456 if (p->cat==exp) {
2457 if (**(p->trans)==tok_value) {
2458 p->cat=raw_int;
2459 **(p->trans)=tok_value%id_flag+res_flag;
2463 (name_dir+(sixteen_bits)(tok_value%id_flag))->ilk=raw_int;
2464 *tok_loc=tok_value%id_flag+res_flag;
2467 @ In the following situations we want to mark the occurrence of
2468 an identifier as a definition: when |make_reserved| is just about to be
2469 used; after a specifier, as in |char **argv|;
2470 before a colon, as in \\{found}:; and in the declaration of a function,
2471 as in \\{main}()$\{\ldots;\}$. This is accomplished by the invocation
2472 of |make_underlined| at appropriate times. Notice that, in the declaration
2473 of a function, we find out that the identifier is being defined only after
2474 it has been swallowed up by an |exp|.
2477 void
2478 make_underlined(p)
2479 /* underline the entry for the first identifier in |p->trans| */
2480 scrap_pointer p;
2482 token_pointer tok_loc; /* where the first identifier appears */
2483 if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2484 return; /* this happens, for example, in |case found:| */
2485 xref_switch=def_flag;
2486 underline_xref(*tok_loc%id_flag+name_dir);
2489 @ We cannot use |new_xref| to underline a cross-reference at this point
2490 because this would just make a new cross-reference at the end of the list.
2491 We actually have to search through the list for the existing
2492 cross-reference.
2494 @<Predecl...@>=
2495 void underline_xref();
2497 @ @c
2498 void
2499 underline_xref(p)
2500 name_pointer p;
2502 xref_pointer q=(xref_pointer)p->xref; /* pointer to cross-reference being examined */
2503 xref_pointer r; /* temporary pointer for permuting cross-references */
2504 sixteen_bits m; /* cross-reference value to be installed */
2505 sixteen_bits n; /* cross-reference value being examined */
2506 if (no_xref) return;
2507 m=section_count+xref_switch;
2508 while (q != xmem) {
2509 n=q->num;
2510 if (n==m) return;
2511 else if (m==n+def_flag) {
2512 q->num=m; return;
2514 else if (n>=def_flag && n<m) break;
2515 q=q->xlink;
2517 @<Insert new cross-reference at |q|, not at beginning of list@>;
2520 @ We get to this section only when the identifier is one letter long,
2521 so it didn't get a non-underlined entry during phase one. But it may
2522 have got some explicitly underlined entries in later sections, so in order
2523 to preserve the numerical order of the entries in the index, we have
2524 to insert the new cross-reference not at the beginning of the list
2525 (namely, at |p->xref|), but rather right before |q|.
2527 @<Insert new cross-reference at |q|...@>=
2528 append_xref(0); /* this number doesn't matter */
2529 xref_ptr->xlink=(xref_pointer)p->xref; r=xref_ptr;
2530 p->xref=(char*)xref_ptr;
2531 while (r->xlink!=q) {r->num=r->xlink->num; r=r->xlink;}
2532 r->num=m; /* everything from |q| on is left undisturbed */
2534 @ Now comes the code that tries to match each production starting
2535 with a particular type of scrap. Whenever a match is discovered,
2536 the |squash| or |reduce| macro will cause the appropriate action
2537 to be performed, followed by |goto found|.
2539 @<Cases for |exp|@>=
2540 if (cat1==lbrace || cat1==int_like || cat1==decl) {
2541 make_underlined(pp); big_app1(pp); big_app(indent); app(indent);
2542 reduce(pp,1,fn_decl,0,1);
2544 else if (cat1==unop) squash(pp,2,exp,-2,2);
2545 else if ((cat1==binop || cat1==ubinop) && cat2==exp)
2546 squash(pp,3,exp,-2,3);
2547 else if (cat1==comma && cat2==exp) {
2548 big_app2(pp);
2549 app(opt); app('9'); big_app1(pp+2); reduce(pp,3,exp,-2,4);
2551 else if (cat1==lpar && cat2==rpar && cat3==colon) squash(pp+3,1,base,0,5);
2552 else if (cat1==cast && cat2==colon) squash(pp+2,1,base,0,5);
2553 else if (cat1==semi) squash(pp,2,stmt,-1,6);
2554 else if (cat1==colon) {
2555 make_underlined (pp); squash(pp,2,tag,-1,7);
2557 else if (cat1==rbrace) squash(pp,1,stmt,-1,8);
2558 else if (cat1==lpar && cat2==rpar && (cat3==const_like || cat3==case_like)) {
2559 big_app1(pp+2); big_app(' '); big_app1(pp+3); reduce(pp+2,2,rpar,0,9);
2561 else if (cat1==cast && (cat2==const_like || cat2==case_like)) {
2562 big_app1(pp+1); big_app(' '); big_app1(pp+2); reduce(pp+1,2,cast,0,9);
2564 else if (cat1==exp || cat1==cast) squash(pp,2,exp,-2,10);
2566 @ @<Cases for |lpar|@>=
2567 if ((cat1==exp||cat1==ubinop) && cat2==rpar) squash(pp,3,exp,-2,11);
2568 else if (cat1==rpar) {
2569 big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2570 @.\\,@>
2571 reduce(pp,2,exp,-2,12);
2573 else if ((cat1==decl_head || cat1==int_like || cat1==cast) && cat2==rpar)
2574 squash(pp,3,cast,-2,13);
2575 else if ((cat1==decl_head || cat1==int_like || cat1==exp) && cat2==comma) {
2576 big_app3(pp); app(opt); app('9'); reduce(pp,3,lpar,-1,14);
2578 else if (cat1==stmt || cat1==decl) {
2579 big_app2(pp); big_app(' '); reduce(pp,2,lpar,-1,15);
2582 @ @<Cases for |unop|@>=
2583 if (cat1==exp || cat1==int_like) squash(pp,2,exp,-2,16);
2585 @ @<Cases for |ubinop|@>=
2586 if (cat1==cast && cat2==rpar) {
2587 big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2588 reduce(pp,2,cast,-2,17);
2590 else if (cat1==exp || cat1==int_like) {
2591 big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2592 reduce(pp,2,cat1,-2,18);
2594 else if (cat1==binop) {
2595 big_app(math_rel); big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2596 big_app('}'); reduce(pp,2,binop,-1,19);
2599 @ @<Cases for |binop|@>=
2600 if (cat1==binop) {
2601 big_app(math_rel); big_app('{'); big_app1(pp); big_app('}');
2602 big_app('{'); big_app1(pp+1); big_app('}');
2603 big_app('}'); reduce(pp,2,binop,-1,20);
2606 @ @<Cases for |cast|@>=
2607 if (cat1==lpar) squash(pp,2,lpar,-1,21);
2608 else if (cat1==exp) {
2609 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,21);
2611 else if (cat1==semi) squash(pp,1,exp,-2,22);
2613 @ @<Cases for |sizeof_like|@>=
2614 if (cat1==cast) squash(pp,2,exp,-2,23);
2615 else if (cat1==exp) {
2616 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,24);
2619 @ @<Cases for |int_like|@>=
2620 if (cat1==int_like|| cat1==struct_like) {
2621 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,25);
2623 else if (cat1==exp && (cat2==raw_int||cat2==struct_like))
2624 squash(pp,2,int_like,-2,26);
2625 else if (cat1==exp || cat1==ubinop || cat1==colon) {
2626 big_app1(pp); big_app(' '); reduce(pp,1,decl_head,-1,27);
2628 else if (cat1==semi || cat1==binop) squash(pp,1,decl_head,0,28);
2630 @ @<Cases for |public_like|@>=
2631 if (cat1==colon) squash(pp,2,tag,-1,29);
2632 else squash(pp,1,int_like,-2,30);
2634 @ @<Cases for |colcol|@>=
2635 if (cat1==exp||cat1==int_like) {
2636 app(qualifier); squash(pp,2,cat1,-2,31);
2637 }@+else if (cat1==colcol) squash(pp,2,colcol,-1,32);
2639 @ @<Cases for |decl_head|@>=
2640 if (cat1==comma) {
2641 big_app2(pp); big_app(' '); reduce(pp,2,decl_head,-1,33);
2643 else if (cat1==ubinop) {
2644 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2645 reduce(pp,2,decl_head,-1,34);
2647 else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2648 make_underlined(pp+1); squash(pp,2,decl_head,-1,35);
2650 else if ((cat1==binop||cat1==colon) && cat2==exp && (cat3==comma ||
2651 cat3==semi || cat3==rpar))
2652 squash(pp,3,decl_head,-1,36);
2653 else if (cat1==cast) squash(pp,2,decl_head,-1,37);
2654 else if (cat1==lbrace || cat1==int_like || cat1==decl) {
2655 big_app1(pp); big_app(indent); app(indent); reduce(pp,1,fn_decl,0,38);
2657 else if (cat1==semi) squash(pp,2,decl,-1,39);
2659 @ @<Cases for |decl|@>=
2660 if (cat1==decl) {
2661 big_app1(pp); big_app(force); big_app1(pp+1);
2662 reduce(pp,2,decl,-1,40);
2664 else if (cat1==stmt || cat1==function) {
2665 big_app1(pp); big_app(big_force);
2666 big_app1(pp+1); reduce(pp,2,cat1,-1,41);
2669 @ @<Cases for |base|@>=
2670 if (cat1==int_like || cat1==exp) {
2671 if (cat2==comma) {
2672 big_app1(pp); big_app(' '); big_app2(pp+1);
2673 app(opt); app('9'); reduce(pp,3,base,0,42);
2675 else if (cat2==lbrace) {
2676 big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); big_app1(pp+2);
2677 reduce(pp,3,lbrace,-2,43);
2681 @ @<Cases for |struct_like|@>=
2682 if (cat1==lbrace) {
2683 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,struct_head,0,44);
2685 else if (cat1==exp||cat1==int_like) {
2686 if (cat2==lbrace || cat2==semi) {
2687 make_underlined(pp+1); make_reserved(pp+1);
2688 big_app1(pp); big_app(' '); big_app1(pp+1);
2689 if (cat2==semi) reduce(pp,2,decl_head,0,45);
2690 else {
2691 big_app(' '); big_app1(pp+2);reduce(pp,3,struct_head,0,46);
2694 else if (cat2==colon) squash(pp+2,1,base,2,47);
2695 else if (cat2!=base) {
2696 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,int_like,-2,48);
2700 @ @<Cases for |struct_head|@>=
2701 if ((cat1==decl || cat1==stmt || cat1==function) && cat2==rbrace) {
2702 big_app1(pp); big_app(indent); big_app(force); big_app1(pp+1);
2703 big_app(outdent); big_app(force); big_app1(pp+2);
2704 reduce(pp,3,int_like,-2,49);
2706 else if (cat1==rbrace) {
2707 big_app1(pp); app_str("\\,"); big_app1(pp+1);
2708 @.\\,@>
2709 reduce(pp,2,int_like,-2,50);
2712 @ @<Cases for |fn_decl|@>=
2713 if (cat1==decl) {
2714 big_app1(pp); big_app(force); big_app1(pp+1); reduce(pp,2,fn_decl,0,51);
2716 else if (cat1==stmt) {
2717 big_app1(pp); app(outdent); app(outdent); big_app(force);
2718 big_app1(pp+1); reduce(pp,2,function,-1,52);
2721 @ @<Cases for |function|@>=
2722 if (cat1==function || cat1==decl || cat1==stmt) {
2723 big_app1(pp); big_app(big_force); big_app1(pp+1); reduce(pp,2,cat1,-1,53);
2726 @ @<Cases for |lbrace|@>=
2727 if (cat1==rbrace) {
2728 big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2729 @.\\,@>
2730 reduce(pp,2,stmt,-1,54);
2732 else if ((cat1==stmt||cat1==decl||cat1==function) && cat2==rbrace) {
2733 big_app(force); big_app1(pp); big_app(indent); big_app(force);
2734 big_app1(pp+1); big_app(force); big_app(backup); big_app1(pp+2);
2735 big_app(outdent); big_app(force); reduce(pp,3,stmt,-1,55);
2737 else if (cat1==exp) {
2738 if (cat2==rbrace) squash(pp,3,exp,-2,56);
2739 else if (cat2==comma && cat3==rbrace) squash(pp,4,exp,-2,56);
2742 @ @<Cases for |if_like|@>=
2743 if (cat1==exp) {
2744 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,if_clause,0,57);
2747 @ @<Cases for |else_like|@>=
2748 if (cat1==colon) squash(pp+1,1,base,1,58);
2749 else if (cat1==lbrace) squash(pp,1,else_head,0,59);
2750 else if (cat1==stmt) {
2751 big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2752 big_app1(pp+1); big_app(outdent); big_app(force);
2753 reduce(pp,2,stmt,-1,60);
2756 @ @<Cases for |else_head|@>=
2757 if (cat1==stmt || cat1==exp) {
2758 big_app(force); big_app1(pp); big_app(break_space); app(noop);
2759 big_app(cancel); big_app1(pp+1); big_app(force);
2760 reduce(pp,2,stmt,-1,61);
2763 @ @<Cases for |if_clause|@>=
2764 if (cat1==lbrace) squash(pp,1,if_head,0,62);
2765 else if (cat1==stmt) {
2766 if (cat2==else_like) {
2767 big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2768 big_app1(pp+1); big_app(outdent); big_app(force); big_app1(pp+2);
2769 if (cat3==if_like) {
2770 big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,63);
2771 }@+else reduce(pp,3,else_like,0,64);
2773 else squash(pp,1,else_like,0,65);
2776 @ @<Cases for |if_head|@>=
2777 if (cat1==stmt || cat1==exp) {
2778 if (cat2==else_like) {
2779 big_app(force); big_app1(pp); big_app(break_space); app(noop);
2780 big_app(cancel); big_app1(pp+1); big_app(force); big_app1(pp+2);
2781 if (cat3==if_like) {
2782 big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,66);
2783 }@+else reduce(pp,3,else_like,0,67);
2785 else squash(pp,1,else_head,0,68);
2788 @ @<Cases for |do_like|@>=
2789 if (cat1==stmt && cat2==else_like && cat3==semi) {
2790 big_app1(pp); big_app(break_space); app(noop); big_app(cancel);
2791 big_app1(pp+1); big_app(cancel); app(noop); big_app(break_space);
2792 big_app2(pp+2); reduce(pp,4,stmt,-1,69);
2795 @ @<Cases for |case_like|@>=
2796 if (cat1==semi) squash(pp,2,stmt,-1,70);
2797 else if (cat1==colon) squash(pp,2,tag,-1,71);
2798 else if (cat1==exp) {
2799 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,72);
2802 @ @<Cases for |catch_like|@>=
2803 if (cat1==cast || cat1==exp) {
2804 big_app2(pp); big_app(indent); big_app(indent); reduce(pp,2,fn_decl,0,73);
2807 @ @<Cases for |tag|@>=
2808 if (cat1==tag) {
2809 big_app1(pp); big_app(break_space); big_app1(pp+1); reduce(pp,2,tag,-1,74);
2811 else if (cat1==stmt||cat1==decl||cat1==function) {
2812 big_app(force); big_app(backup); big_app1(pp); big_app(break_space);
2813 big_app1(pp+1); reduce(pp,2,cat1,-1,75);
2816 @ The user can decide at run-time whether short statements should be
2817 grouped together on the same line.
2819 @d force_lines flags['f'] /* should each statement be on its own line? */
2820 @<Cases for |stmt|@>=
2821 if (cat1==stmt||cat1==decl||cat1==function) {
2822 big_app1(pp);
2823 if (cat1==function) big_app(big_force);
2824 else if (cat1==decl) big_app(big_force);
2825 else if (force_lines) big_app(force);
2826 else big_app(break_space);
2827 big_app1(pp+1); reduce(pp,2,cat1,-1,76);
2830 @ @<Cases for |semi|@>=
2831 big_app(' '); big_app1(pp); reduce(pp,1,stmt,-1,77);
2833 @ @<Cases for |lproc|@>=
2834 if (cat1==define_like) make_underlined(pp+2);
2835 if (cat1==else_like || cat1==if_like ||cat1==define_like)
2836 squash(pp,2,lproc,0,78);
2837 else if (cat1==rproc) {
2838 app(inserted); big_app2(pp); reduce(pp,2,insert,-1,79);
2839 } else if (cat1==exp || cat1==function) {
2840 if (cat2==rproc) {
2841 app(inserted); big_app1(pp); big_app(' '); big_app2(pp+1);
2842 reduce(pp,3,insert,-1,80);
2844 else if (cat2==exp && cat3==rproc && cat1==exp) {
2845 app(inserted); big_app1(pp); big_app(' '); big_app1(pp+1); app_str(" \\5");
2846 @.\\5@>
2847 big_app2(pp+2); reduce(pp,4,insert,-1,80);
2851 @ @<Cases for |section_scrap|@>=
2852 if (cat1==semi) {
2853 big_app2(pp); big_app(force); reduce(pp,2,stmt,-2,81);
2855 else squash(pp,1,exp,-2,82);
2857 @ @<Cases for |insert|@>=
2858 if (cat1)
2859 squash(pp,2,cat1,0,83);
2861 @ @<Cases for |prelangle|@>=
2862 init_mathness=cur_mathness=yes_math;
2863 app('<'); reduce(pp,1,binop,-2,84);
2865 @ @<Cases for |prerangle|@>=
2866 init_mathness=cur_mathness=yes_math;
2867 app('>'); reduce(pp,1,binop,-2,85);
2869 @ @<Cases for |langle|@>=
2870 if (cat1==prerangle) {
2871 big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2872 @.\\,@>
2873 reduce(pp,2,cast,-1,86);
2875 else if (cat1==decl_head || cat1==int_like || cat1==exp) {
2876 if (cat2==prerangle) squash(pp,3,cast,-1,87);
2877 else if (cat2==comma) {
2878 big_app3(pp); app(opt); app('9'); reduce(pp,3,langle,0,88);
2882 @ @<Cases for |template_like|@>=
2883 if (cat1==exp && cat2==prelangle) squash(pp+2,1,langle,2,89);
2884 else if (cat1==exp || cat1==raw_int) {
2885 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,90);
2886 }@+ else squash(pp,1,raw_int,0,91);
2888 @ @<Cases for |new_like|@>=
2889 if (cat1==lpar && cat2==exp && cat3==rpar) squash(pp,4,new_like,0,92);
2890 else if (cat1==cast) {
2891 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,93);
2893 else if (cat1!=lpar) squash(pp,1,new_exp,0,94);
2895 @ @<Cases for |new_exp|@>=
2896 if (cat1==int_like || cat1==const_like) {
2897 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,new_exp,0,95);
2899 else if (cat1==struct_like && (cat2==exp || cat2==int_like)) {
2900 big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' ');
2901 big_app1(pp+2); reduce(pp,3,new_exp,0,96);
2903 else if (cat1==raw_ubin) {
2904 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2905 reduce(pp,2,new_exp,0,97);
2907 else if (cat1==lpar) squash(pp,1,exp,-2,98);
2908 else if (cat1==exp) {
2909 big_app1(pp); big_app(' '); reduce(pp,1,exp,-2,98);
2911 else if (cat1!=raw_int && cat1!=struct_like && cat1!=colcol)
2912 squash(pp,1,exp,-2,99);
2914 @ @<Cases for |ftemplate|@>=
2915 if (cat1==prelangle) squash(pp+1,1,langle,1,100);
2916 else squash(pp,1,exp,-2,101);
2918 @ @<Cases for |for_like|@>=
2919 if (cat1==exp) {
2920 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,else_like,-2,102);
2923 @ @<Cases for |raw_ubin|@>=
2924 if (cat1==const_like) {
2925 big_app2(pp); app_str("\\ "); reduce(pp,2,raw_ubin,0,103);
2926 @.\\\ @>
2927 } else squash(pp,1,ubinop,-2,104);
2929 @ @<Cases for |const_like|@>=
2930 squash(pp,1,int_like,-2,105);
2932 @ @<Cases for |raw_int|@>=
2933 if (cat1==prelangle) squash(pp+1,1,langle,1,106);
2934 else if (cat1==colcol) squash(pp,2,colcol,-1,107);
2935 else if (cat1==cast) squash(pp,2,raw_int,0,108);
2936 else if (cat1==lpar) squash(pp,1,exp,-2,109);
2937 else if (cat1!=langle) squash(pp,1,int_like,-3,110);
2939 @ @<Cases for |operator_like|@>=
2940 if (cat1==binop || cat1==unop || cat1==ubinop) {
2941 if (cat2==binop) break;
2942 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2943 reduce(pp,2,exp,-2,111);
2945 else if (cat1==new_like || cat1==delete_like) {
2946 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,112);
2948 else if (cat1==comma) squash(pp,2,exp,-2,113);
2949 else if (cat1!=raw_ubin) squash(pp,1,new_exp,0,114);
2951 @ @<Cases for |typedef_like|@>=
2952 if ((cat1==int_like || cat1==cast) && (cat2==comma || cat2==semi))
2953 squash(pp+1,1,exp,-1,115);
2954 else if (cat1==int_like) {
2955 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,116);
2957 else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2958 make_underlined(pp+1); make_reserved(pp+1);
2959 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,117);
2961 else if (cat1==comma) {
2962 big_app2(pp); big_app(' '); reduce(pp,2,typedef_like,0,118);
2964 else if (cat1==semi) squash(pp,2,decl,-1,119);
2965 else if (cat1==ubinop && (cat2==ubinop || cat2==cast)) {
2966 big_app('{'); big_app1(pp+1); big_app('}'); big_app1(pp+2);
2967 reduce(pp+1,2,cat2,0,120);
2970 @ @<Cases for |delete_like|@>=
2971 if (cat1==lpar && cat2==rpar) {
2972 big_app2(pp); app('\\'); app(','); big_app1(pp+2);
2973 @.\\,@>
2974 reduce(pp,3,delete_like,0,121);
2976 else if (cat1==exp) {
2977 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,122);
2980 @ @<Cases for |question|@>=
2981 if (cat1==exp && (cat2==colon || cat2==base)) {
2982 (pp+2)->mathness=5*yes_math; /* this colon should be in math mode */
2983 squash(pp,3,binop,-2,123);
2986 @ Now here's the |reduce| procedure used in our code for productions.
2988 The `|freeze_text|' macro is used to give official status to a token list.
2989 Before saying |freeze_text|, items are appended to the current token list,
2990 and we know that the eventual number of this token list will be the current
2991 value of |text_ptr|. But no list of that number really exists as yet,
2992 because no ending point for the current list has been
2993 stored in the |tok_start| array. After saying |freeze_text|, the
2994 old current token list becomes legitimate, and its number is the current
2995 value of |text_ptr-1| since |text_ptr| has been increased. The new
2996 current token list is empty and ready to be appended to.
2997 Note that |freeze_text| does not check to see that |text_ptr| hasn't gotten
2998 too large, since it is assumed that this test was done beforehand.
3000 @d freeze_text *(++text_ptr)=tok_ptr
3003 void
3004 reduce(j,k,c,d,n)
3005 scrap_pointer j;
3006 eight_bits c;
3007 short k, d, n;
3009 scrap_pointer i, i1; /* pointers into scrap memory */
3010 j->cat=c; j->trans=text_ptr;
3011 j->mathness=4*cur_mathness+init_mathness;
3012 freeze_text;
3013 if (k>1) {
3014 for (i=j+k, i1=j+1; i<=lo_ptr; i++, i1++) {
3015 i1->cat=i->cat; i1->trans=i->trans;
3016 i1->mathness=i->mathness;
3018 lo_ptr=lo_ptr-k+1;
3020 pp=(pp+d<scrap_base? scrap_base: pp+d);
3021 @<Print a snapshot of the scrap list if debugging @>;
3022 pp--; /* we next say |pp++| */
3025 @ Here's the |squash| procedure, which
3026 takes advantage of the simplification that occurs when |k==1|.
3029 void
3030 squash(j,k,c,d,n)
3031 scrap_pointer j;
3032 eight_bits c;
3033 short k, d, n;
3035 scrap_pointer i; /* pointers into scrap memory */
3036 if (k==1) {
3037 j->cat=c; pp=(pp+d<scrap_base? scrap_base: pp+d);
3038 @<Print a snapshot...@>;
3039 pp--; /* we next say |pp++| */
3040 return;
3042 for (i=j; i<j+k; i++) big_app1(i);
3043 reduce(j,k,c,d,n);
3046 @ And here now is the code that applies productions as long as possible.
3047 Before applying the production mechanism, we must make sure
3048 it has good input (at least four scraps, the length of the lhs of the
3049 longest rules), and that there is enough room in the memory arrays
3050 to hold the appended tokens and texts. Here we use a very
3051 conservative test; it's more important to make sure the program
3052 will still work if we change the production rules (within reason)
3053 than to squeeze the last bit of space from the memory arrays.
3055 @d safe_tok_incr 20
3056 @d safe_text_incr 10
3057 @d safe_scrap_incr 10
3059 @<Reduce the scraps using the productions until no more rules apply@>=
3060 while (1) {
3061 @<Make sure the entries |pp| through |pp+3| of |cat| are defined@>;
3062 if (tok_ptr+safe_tok_incr>tok_mem_end) {
3063 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3064 overflow("token");
3066 if (text_ptr+safe_text_incr>tok_start_end) {
3067 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3068 overflow("text");
3070 if (pp>lo_ptr) break;
3071 init_mathness=cur_mathness=maybe_math;
3072 @<Match a production...@>;
3075 @ If we get to the end of the scrap list, category codes equal to zero are
3076 stored, since zero does not match anything in a production.
3078 @<Make sure the entries...@>=
3079 if (lo_ptr<pp+3) {
3080 while (hi_ptr<=scrap_ptr && lo_ptr!=pp+3) {
3081 (++lo_ptr)->cat=hi_ptr->cat; lo_ptr->mathness=(hi_ptr)->mathness;
3082 lo_ptr->trans=(hi_ptr++)->trans;
3084 for (i=lo_ptr+1;i<=pp+3;i++) i->cat=0;
3087 @ If \.{CWEAVE} is being run in debugging mode, the production numbers and
3088 current stack categories will be printed out when |tracing| is set to 2;
3089 a sequence of two or more irreducible scraps will be printed out when
3090 |tracing| is set to 1.
3092 @<Global...@>=
3093 int tracing; /* can be used to show parsing details */
3095 @ @<Print a snapsh...@>=
3096 { scrap_pointer k; /* pointer into |scrap_info| */
3097 if (tracing==2) {
3098 printf("\n%d:",n);
3099 for (k=scrap_base; k<=lo_ptr; k++) {
3100 if (k==pp) putxchar('*'); else putxchar(' ');
3101 if (k->mathness %4 == yes_math) putchar('+');
3102 else if (k->mathness %4 == no_math) putchar('-');
3103 print_cat(k->cat);
3104 if (k->mathness /4 == yes_math) putchar('+');
3105 else if (k->mathness /4 == no_math) putchar('-');
3107 if (hi_ptr<=scrap_ptr) printf("..."); /* indicate that more is coming */
3111 @ The |translate| function assumes that scraps have been stored in
3112 positions |scrap_base| through |scrap_ptr| of |cat| and |trans|. It
3113 applies productions as much as
3114 possible. The result is a token list containing the translation of
3115 the given sequence of scraps.
3117 After calling |translate|, we will have |text_ptr+3<=max_texts| and
3118 |tok_ptr+6<=max_toks|, so it will be possible to create up to three token
3119 lists with up to six tokens without checking for overflow. Before calling
3120 |translate|, we should have |text_ptr<max_texts| and |scrap_ptr<max_scraps|,
3121 since |translate| might add a new text and a new scrap before it checks
3122 for overflow.
3125 text_pointer
3126 translate() /* converts a sequence of scraps */
3128 scrap_pointer i, /* index into |cat| */
3129 j; /* runs through final scraps */
3130 pp=scrap_base; lo_ptr=pp-1; hi_ptr=pp;
3131 @<If tracing, print an indication of where we are@>;
3132 @<Reduce the scraps...@>;
3133 @<Combine the irreducible scraps that remain@>;
3136 @ If the initial sequence of scraps does not reduce to a single scrap,
3137 we concatenate the translations of all remaining scraps, separated by
3138 blank spaces, with dollar signs surrounding the translations of scraps
3139 where appropriate.
3141 @<Combine the irreducible...@>= {
3142 @<If semi-tracing, show the irreducible scraps@>;
3143 for (j=scrap_base; j<=lo_ptr; j++) {
3144 if (j!=scrap_base) app(' ');
3145 if (j->mathness % 4 == yes_math) app('$');
3146 app1(j);
3147 if (j->mathness / 4 == yes_math) app('$');
3148 if (tok_ptr+6>tok_mem_end) overflow("token");
3150 freeze_text; return(text_ptr-1);
3153 @ @<If semi-tracing, show the irreducible scraps@>=
3154 if (lo_ptr>scrap_base && tracing==1) {
3155 printf("\nIrreducible scrap sequence in section %d:",section_count);
3156 @.Irreducible scrap sequence...@>
3157 mark_harmless;
3158 for (j=scrap_base; j<=lo_ptr; j++) {
3159 printf(" "); print_cat(j->cat);
3163 @ @<If tracing,...@>=
3164 if (tracing==2) {
3165 printf("\nTracing after l. %d:\n",cur_line); mark_harmless;
3166 @.Tracing after...@>
3167 if (loc>buffer+50) {
3168 printf("...");
3169 term_write(loc-51,51);
3171 else term_write(buffer,loc-buffer);
3174 @* Initializing the scraps.
3175 If we are going to use the powerful production mechanism just developed, we
3176 must get the scraps set up in the first place, given a \CEE/ text. A table
3177 of the initial scraps corresponding to \CEE/ tokens appeared above in the
3178 section on parsing; our goal now is to implement that table. We shall do this
3179 by implementing a subroutine called |C_parse| that is analogous to the
3180 |C_xref| routine used during phase one.
3182 Like |C_xref|, the |C_parse| procedure starts with the current
3183 value of |next_control| and it uses the operation |next_control=get_next()|
3184 repeatedly to read \CEE/ text until encountering the next `\.{\v}' or
3185 `\.{/*}', or until |next_control>=format_code|. The scraps corresponding to
3186 what it reads are appended into the |cat| and |trans| arrays, and |scrap_ptr|
3187 is advanced.
3190 void
3191 C_parse(spec_ctrl) /* creates scraps from \CEE/ tokens */
3192 eight_bits spec_ctrl;
3194 int count; /* characters remaining before string break */
3195 while (next_control<format_code || next_control==spec_ctrl) {
3196 @<Append the scrap appropriate to |next_control|@>;
3197 next_control=get_next();
3198 if (next_control=='|' || next_control==begin_comment ||
3199 next_control==begin_short_comment) return;
3203 @ The following macro is used to append a scrap whose tokens have just
3204 been appended:
3206 @d app_scrap(c,b) {
3207 (++scrap_ptr)->cat=(c); scrap_ptr->trans=text_ptr;
3208 scrap_ptr->mathness=5*(b); /* no no, yes yes, or maybe maybe */
3209 freeze_text;
3212 @ @<Append the scr...@>=
3213 @<Make sure that there is room for the new scraps, tokens, and texts@>;
3214 switch (next_control) {
3215 case section_name:
3216 app(section_flag+(int)(cur_section-name_dir));
3217 app_scrap(section_scrap,maybe_math);
3218 app_scrap(exp,yes_math);@+break;
3219 case string: case constant: case verbatim: @<Append a string or constant@>;
3220 @+break;
3221 case identifier: app_cur_id(1);@+break;
3222 case TeX_string: @<Append a \TEX/ string, without forming a scrap@>;@+break;
3223 case '/': case '.':
3224 app(next_control); app_scrap(binop,yes_math);@+break;
3225 case '<': app_str("\\langle");@+app_scrap(prelangle,yes_math);@+break;
3226 @.\\langle@>
3227 case '>': app_str("\\rangle");@+app_scrap(prerangle,yes_math);@+break;
3228 @.\\rangle@>
3229 case '=': app_str("\\K"); app_scrap(binop,yes_math);@+break;
3230 @.\\K@>
3231 case '|': app_str("\\OR"); app_scrap(binop,yes_math);@+break;
3232 @.\\OR@>
3233 case '^': app_str("\\XOR"); app_scrap(binop,yes_math);@+break;
3234 @.\\XOR@>
3235 case '%': app_str("\\MOD"); app_scrap(binop,yes_math);@+break;
3236 @.\\MOD@>
3237 case '!': app_str("\\R"); app_scrap(unop,yes_math);@+break;
3238 @.\\R@>
3239 case '~': app_str("\\CM"); app_scrap(unop,yes_math);@+break;
3240 @.\\CM@>
3241 case '+': case '-': app(next_control); app_scrap(ubinop,yes_math);@+break;
3242 case '*': app(next_control); app_scrap(raw_ubin,yes_math);@+break;
3243 case '&': app_str("\\AND"); app_scrap(raw_ubin,yes_math);@+break;
3244 @.\\AND@>
3245 case '?': app_str("\\?"); app_scrap(question,yes_math);@+break;
3246 @.\\?@>
3247 case '#': app_str("\\#"); app_scrap(ubinop,yes_math);@+break;
3248 @.\\\#@>
3249 case ignore: case xref_roman: case xref_wildcard:
3250 case xref_typewriter: case noop:@+break;
3251 case '(': case '[': app(next_control); app_scrap(lpar,maybe_math);@+break;
3252 case ')': case ']': app(next_control); app_scrap(rpar,maybe_math);@+break;
3253 case '{': app_str("\\{"@q}@>); app_scrap(lbrace,yes_math);@+break;
3254 @.\\\{@>@q}@>
3255 case '}': app_str(@q{@>"\\}"); app_scrap(rbrace,yes_math);@+break;
3256 @q{@>@.\\\}@>
3257 case ',': app(','); app_scrap(comma,yes_math);@+break;
3258 case ';': app(';'); app_scrap(semi,maybe_math);@+break;
3259 case ':': app(':'); app_scrap(colon,no_math);@+break;@/
3260 @t\4@> @<Cases involving nonstandard characters@>@;
3261 case thin_space: app_str("\\,"); app_scrap(insert,maybe_math);@+break;
3262 @.\\,@>
3263 case math_break: app(opt); app_str("0");
3264 app_scrap(insert,maybe_math);@+break;
3265 case line_break: app(force); app_scrap(insert,no_math);@+break;
3266 case left_preproc: app(force); app(preproc_line);
3267 app_str("\\#"); app_scrap(lproc,no_math);@+break;
3268 @.\\\#@>
3269 case right_preproc: app(force); app_scrap(rproc,no_math);@+break;
3270 case big_line_break: app(big_force); app_scrap(insert,no_math);@+break;
3271 case no_line_break: app(big_cancel); app(noop); app(break_space);
3272 app(noop); app(big_cancel);
3273 app_scrap(insert,no_math);@+break;
3274 case pseudo_semi: app_scrap(semi,maybe_math);@+break;
3275 case macro_arg_open: app_scrap(begin_arg,maybe_math);@+break;
3276 case macro_arg_close: app_scrap(end_arg,maybe_math);@+break;
3277 case join: app_str("\\J"); app_scrap(insert,no_math);@+break;
3278 @.\\J@>
3279 case output_defs_code: app(force); app_str("\\ATH"); app(force);
3280 app_scrap(insert,no_math);@+break;
3281 @.\\ATH@>
3282 default: app(inserted); app(next_control);
3283 app_scrap(insert,maybe_math);@+break;
3286 @ @<Make sure that there is room for the new...@>=
3287 if (scrap_ptr+safe_scrap_incr>scrap_info_end ||
3288 tok_ptr+safe_tok_incr>tok_mem_end @| ||
3289 text_ptr+safe_text_incr>tok_start_end) {
3290 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3291 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3292 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3293 overflow("scrap/token/text");
3296 @ Some nonstandard characters may have entered \.{CWEAVE} by means of
3297 standard ones. They are converted to \TEX/ control sequences so that it is
3298 possible to keep \.{CWEAVE} from outputting unusual |char| codes.
3300 @<Cases involving nonstandard...@>=
3301 case not_eq: app_str("\\I");@+app_scrap(binop,yes_math);@+break;
3302 @.\\I@>
3303 case lt_eq: app_str("\\Z");@+app_scrap(binop,yes_math);@+break;
3304 @.\\Z@>
3305 case gt_eq: app_str("\\G");@+app_scrap(binop,yes_math);@+break;
3306 @.\\G@>
3307 case eq_eq: app_str("\\E");@+app_scrap(binop,yes_math);@+break;
3308 @.\\E@>
3309 case and_and: app_str("\\W");@+app_scrap(binop,yes_math);@+break;
3310 @.\\W@>
3311 case or_or: app_str("\\V");@+app_scrap(binop,yes_math);@+break;
3312 @.\\V@>
3313 case plus_plus: app_str("\\PP");@+app_scrap(unop,yes_math);@+break;
3314 @.\\PP@>
3315 case minus_minus: app_str("\\MM");@+app_scrap(unop,yes_math);@+break;
3316 @.\\MM@>
3317 case minus_gt: app_str("\\MG");@+app_scrap(binop,yes_math);@+break;
3318 @.\\MG@>
3319 case gt_gt: app_str("\\GG");@+app_scrap(binop,yes_math);@+break;
3320 @.\\GG@>
3321 case lt_lt: app_str("\\LL");@+app_scrap(binop,yes_math);@+break;
3322 @.\\LL@>
3323 case dot_dot_dot: app_str("\\,\\ldots\\,");@+app_scrap(raw_int,yes_math);
3324 @+break;
3325 @.\\,@>
3326 @.\\ldots@>
3327 case colon_colon: app_str("\\DC");@+app_scrap(colcol,maybe_math);@+break;
3328 @.\\DC@>
3329 case period_ast: app_str("\\PA");@+app_scrap(binop,yes_math);@+break;
3330 @.\\PA@>
3331 case minus_gt_ast: app_str("\\MGA");@+app_scrap(binop,yes_math);@+break;
3332 @.\\MGA@>
3334 @ The following code must use |app_tok| instead of |app| in order to
3335 protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok|
3336 has been used, so another |app| is legitimate before testing again.
3338 Many of the special characters in a string must be prefixed by `\.\\' so that
3339 \TEX/ will print them properly.
3340 @^special string characters@>
3342 @<Append a string or...@>=
3343 count= -1;
3344 if (next_control==constant) app_str("\\T{"@q}@>);
3345 @.\\T@>
3346 else if (next_control==string) {
3347 count=20; app_str("\\.{"@q}@>);
3349 @.\\.@>
3350 else app_str("\\vb{"@q}@>);
3351 @.\\vb@>
3352 while (id_first<id_loc) {
3353 if (count==0) { /* insert a discretionary break in a long string */
3354 app_str(@q(@>@q{@>"}\\)\\.{"@q}@>); count=20;
3355 @q(@>@.\\)@>
3357 @^high-bit character handling@>
3358 if((eight_bits)(*id_first)>0177) {
3359 app_tok(quoted_char);
3360 app_tok((eight_bits)(*id_first++));
3362 else {
3363 switch (*id_first) {
3364 case ' ':case '\\':case '#':case '%':case '$':case '^':
3365 case '{': case '}': case '~': case '&': case '_': app('\\'); break;
3366 @.\\\ @>
3367 @.\\\\@>
3368 @.\\\#@>
3369 @.\\\%@>
3370 @.\\\$@>
3371 @.\\\^@>
3372 @.\\\{@>@q}@>
3373 @q{@>@.\\\}@>
3374 @.\\\~@>
3375 @.\\\&@>
3376 @.\\\_@>
3377 case '@@': if (*(id_first+1)=='@@') id_first++;
3378 else err_print("! Double @@ should be used in strings");
3379 @.Double @@ should be used...@>
3381 app_tok(*id_first++);
3383 count--;
3385 app(@q{@>'}');
3386 app_scrap(exp,maybe_math);
3388 @ We do not make the \TEX/ string into a scrap, because there is no
3389 telling what the user will be putting into it; instead we leave it
3390 open, to be picked up by the next scrap. If it comes at the end of a
3391 section, it will be made into a scrap when |finish_C| is called.
3393 There's a known bug here, in cases where an adjacent scrap is
3394 |prelangle| or |prerangle|. Then the \TEX/ string can disappear
3395 when the \.{\\langle} or \.{\\rangle} becomes \.{<} or \.{>}.
3396 For example, if the user writes \.{\v x<@@ty@@>\v}, the \TEX/ string
3397 \.{\\hbox\{y\}} eventually becomes part of an |insert| scrap, which is combined
3398 with a |prelangle| scrap and eventually lost. The best way to work around
3399 this bug is probably to enclose the \.{@@t...@@>} in \.{@@[...@@]} so that
3400 the \TEX/ string is treated as an expression.
3401 @^bug, known@>
3403 @<Append a \TEX/ string, without forming a scrap@>=
3404 app_str("\\hbox{"@q}@>);
3405 @^high-bit character handling@>
3406 while (id_first<id_loc)
3407 if((eight_bits)(*id_first)>0177) {
3408 app_tok(quoted_char);
3409 app_tok((eight_bits)(*id_first++));
3411 else {
3412 if (*id_first=='@@') id_first++;
3413 app_tok(*id_first++);
3415 app(@q{@>'}');
3417 @ The function |app_cur_id| appends the current identifier to the
3418 token list; it also builds a new scrap if |scrapping==1|.
3420 @<Predec...@>=
3421 void app_cur_id();
3423 @ @c
3424 void
3425 app_cur_id(scrapping)
3426 boolean scrapping; /* are we making this into a scrap? */
3428 name_pointer p=id_lookup(id_first,id_loc,normal);
3429 if (p->ilk<=custom) { /* not a reserved word */
3430 app(id_flag+(int)(p-name_dir));
3431 if (scrapping) app_scrap(p->ilk==func_template? ftemplate: exp,
3432 p->ilk==custom? yes_math: maybe_math);
3433 @.\\NULL@>
3434 } else {
3435 app(res_flag+(int)(p-name_dir));
3436 if (scrapping) {
3437 if (p->ilk==alfop) app_scrap(ubinop,yes_math)@;
3438 else app_scrap(p->ilk,maybe_math);
3443 @ When the `\.{\v}' that introduces \CEE/ text is sensed, a call on
3444 |C_translate| will return a pointer to the \TEX/ translation of
3445 that text. If scraps exist in |scrap_info|, they are
3446 unaffected by this translation process.
3449 text_pointer
3450 C_translate()
3452 text_pointer p; /* points to the translation */
3453 scrap_pointer save_base; /* holds original value of |scrap_base| */
3454 save_base=scrap_base; scrap_base=scrap_ptr+1;
3455 C_parse(section_name); /* get the scraps together */
3456 if (next_control!='|') err_print("! Missing '|' after C text");
3457 @.Missing '|'...@>
3458 app_tok(cancel); app_scrap(insert,maybe_math);
3459 /* place a |cancel| token as a final ``comment'' */
3460 p=translate(); /* make the translation */
3461 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3462 scrap_ptr=scrap_base-1; scrap_base=save_base; /* scrap the scraps */
3463 return(p);
3466 @ The |outer_parse| routine is to |C_parse| as |outer_xref|
3467 is to |C_xref|: It constructs a sequence of scraps for \CEE/ text
3468 until |next_control>=format_code|. Thus, it takes care of embedded comments.
3470 The token list created from within `\pb' brackets is output as an argument
3471 to \.{\\PB}, if the user has invoked \.{CWEAVE} with the \.{+e} flag.
3472 Although \.{cwebmac} ignores \.{\\PB}, other macro packages
3473 might use it to localize the special meaning of the macros that mark up
3474 program text.
3476 @d make_pb flags['e']
3479 void
3480 outer_parse() /* makes scraps from \CEE/ tokens and comments */
3482 int bal; /* brace level in comment */
3483 text_pointer p, q; /* partial comments */
3484 while (next_control<format_code)
3485 if (next_control!=begin_comment && next_control!=begin_short_comment)
3486 C_parse(ignore);
3487 else {
3488 boolean is_long_comment=(next_control==begin_comment);
3489 @<Make sure that there is room for the new...@>;
3490 app(cancel); app(inserted);
3491 if (is_long_comment) app_str("\\C{"@q}@>);
3492 @.\\C@>
3493 else app_str("\\SHC{"@q}@>);
3494 @.\\SHC@>
3495 bal=copy_comment(is_long_comment,1); next_control=ignore;
3496 while (bal>0) {
3497 p=text_ptr; freeze_text; q=C_translate();
3498 /* at this point we have |tok_ptr+6<=max_toks| */
3499 app(tok_flag+(int)(p-tok_start));
3500 if (make_pb) app_str("\\PB{");
3501 @.\\PB@>
3502 app(inner_tok_flag+(int)(q-tok_start));
3503 if (make_pb) app_tok('}');
3504 if (next_control=='|') {
3505 bal=copy_comment(is_long_comment,bal);
3506 next_control=ignore;
3508 else bal=0; /* an error has been reported */
3510 app(force); app_scrap(insert,no_math);
3511 /* the full comment becomes a scrap */
3515 @* Output of tokens.
3516 So far our programs have only built up multi-layered token lists in
3517 \.{CWEAVE}'s internal memory; we have to figure out how to get them into
3518 the desired final form. The job of converting token lists to characters in
3519 the \TEX/ output file is not difficult, although it is an implicitly
3520 recursive process. Four main considerations had to be kept in mind when
3521 this part of \.{CWEAVE} was designed. (a) There are two modes of output:
3522 |outer| mode, which translates tokens like |force| into line-breaking
3523 control sequences, and |inner| mode, which ignores them except that blank
3524 spaces take the place of line breaks. (b) The |cancel| instruction applies
3525 to adjacent token or tokens that are output, and this cuts across levels
3526 of recursion since `|cancel|' occurs at the beginning or end of a token
3527 list on one level. (c) The \TEX/ output file will be semi-readable if line
3528 breaks are inserted after the result of tokens like |break_space| and
3529 |force|. (d) The final line break should be suppressed, and there should
3530 be no |force| token output immediately after `\.{\\Y\\B}'.
3532 @ The output process uses a stack to keep track of what is going on at
3533 different ``levels'' as the token lists are being written out. Entries on
3534 this stack have three parts:
3536 \yskip\hang |end_field| is the |tok_mem| location where the token list of a
3537 particular level will end;
3539 \yskip\hang |tok_field| is the |tok_mem| location from which the next token
3540 on a particular level will be read;
3542 \yskip\hang |mode_field| is the current mode, either |inner| or |outer|.
3544 \yskip\noindent The current values of these quantities are referred to
3545 quite frequently, so they are stored in a separate place instead of in the
3546 |stack| array. We call the current values |cur_end|, |cur_tok|, and
3547 |cur_mode|.
3549 The global variable |stack_ptr| tells how many levels of output are
3550 currently in progress. The end of output occurs when an |end_translation|
3551 token is found, so the stack is never empty except when we first begin the
3552 output process.
3554 @d inner 0 /* value of |mode| for \CEE/ texts within \TEX/ texts */
3555 @d outer 1 /* value of |mode| for \CEE/ texts in sections */
3557 @<Typed...@>= typedef int mode;
3558 typedef struct {
3559 token_pointer end_field; /* ending location of token list */
3560 token_pointer tok_field; /* present location within token list */
3561 boolean mode_field; /* interpretation of control tokens */
3562 } output_state;
3563 typedef output_state *stack_pointer;
3565 @ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
3566 @d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */
3567 @d cur_mode cur_state.mode_field /* current mode of interpretation */
3568 @d init_stack stack_ptr=stack;cur_mode=outer /* initialize the stack */
3570 @<Global...@>=
3571 output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */
3572 output_state stack[stack_size]; /* info for non-current levels */
3573 stack_pointer stack_ptr; /* first unused location in the output state stack */
3574 stack_pointer stack_end=stack+stack_size-1; /* end of |stack| */
3575 stack_pointer max_stack_ptr; /* largest value assumed by |stack_ptr| */
3577 @ @<Set init...@>=
3578 max_stack_ptr=stack;
3580 @ To insert token-list |p| into the output, the |push_level| subroutine
3581 is called; it saves the old level of output and gets a new one going.
3582 The value of |cur_mode| is not changed.
3585 void
3586 push_level(p) /* suspends the current level */
3587 text_pointer p;
3589 if (stack_ptr==stack_end) overflow("stack");
3590 if (stack_ptr>stack) { /* save current state */
3591 stack_ptr->end_field=cur_end;
3592 stack_ptr->tok_field=cur_tok;
3593 stack_ptr->mode_field=cur_mode;
3595 stack_ptr++;
3596 if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr;
3597 cur_tok=*p; cur_end=*(p+1);
3600 @ Conversely, the |pop_level| routine restores the conditions that were in
3601 force when the current level was begun. This subroutine will never be
3602 called when |stack_ptr==1|.
3605 void
3606 pop_level()
3608 cur_end=(--stack_ptr)->end_field;
3609 cur_tok=stack_ptr->tok_field; cur_mode=stack_ptr->mode_field;
3612 @ The |get_output| function returns the next byte of output that is not a
3613 reference to a token list. It returns the values |identifier| or |res_word|
3614 or |section_code| if the next token is to be an identifier (typeset in
3615 italics), a reserved word (typeset in boldface), or a section name (typeset
3616 by a complex routine that might generate additional levels of output).
3617 In these cases |cur_name| points to the identifier or section name in
3618 question.
3620 @<Global...@>=
3621 name_pointer cur_name;
3623 @ @d res_word 0201 /* returned by |get_output| for reserved words */
3624 @d section_code 0200 /* returned by |get_output| for section names */
3627 eight_bits
3628 get_output() /* returns the next token of output */
3630 sixteen_bits a; /* current item read from |tok_mem| */
3631 restart: while (cur_tok==cur_end) pop_level();
3632 a=*(cur_tok++);
3633 if (a>=0400) {
3634 cur_name=a % id_flag + name_dir;
3635 switch (a / id_flag) {
3636 case 2: return(res_word); /* |a==res_flag+cur_name| */
3637 case 3: return(section_code); /* |a==section_flag+cur_name| */
3638 case 4: push_level(a % id_flag + tok_start); goto restart;
3639 /* |a==tok_flag+cur_name| */
3640 case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart;
3641 /* |a==inner_tok_flag+cur_name| */
3642 default: return(identifier); /* |a==id_flag+cur_name| */
3645 return(a);
3648 @ The real work associated with token output is done by |make_output|.
3649 This procedure appends an |end_translation| token to the current token list,
3650 and then it repeatedly calls |get_output| and feeds characters to the output
3651 buffer until reaching the |end_translation| sentinel. It is possible for
3652 |make_output| to be called recursively, since a section name may include
3653 embedded \CEE/ text; however, the depth of recursion never exceeds one
3654 level, since section names cannot be inside of section names.
3656 A procedure called |output_C| does the scanning, translation, and
3657 output of \CEE/ text within `\pb' brackets, and this procedure uses
3658 |make_output| to output the current token list. Thus, the recursive call
3659 of |make_output| actually occurs when |make_output| calls |output_C|
3660 while outputting the name of a section.
3661 @^recursion@>
3664 void
3665 output_C() /* outputs the current token list */
3667 token_pointer save_tok_ptr;
3668 text_pointer save_text_ptr;
3669 sixteen_bits save_next_control; /* values to be restored */
3670 text_pointer p; /* translation of the \CEE/ text */
3671 save_tok_ptr=tok_ptr; save_text_ptr=text_ptr;
3672 save_next_control=next_control; next_control=ignore; p=C_translate();
3673 app(inner_tok_flag+(int)(p-tok_start));
3674 if (make_pb) {
3675 out_str("\\PB{"); make_output(); out('}');
3676 @.\\PB@>
3677 }@+else make_output(); /* output the list */
3678 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3679 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3680 text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* forget the tokens */
3681 next_control=save_next_control; /* restore |next_control| to original state */
3684 @ Here is \.{CWEAVE}'s major output handler.
3686 @<Predecl...@>=
3687 void make_output();
3689 @ @c
3690 void
3691 make_output() /* outputs the equivalents of tokens */
3693 eight_bits a, /* current output byte */
3694 b; /* next output byte */
3695 int c; /* count of |indent| and |outdent| tokens */
3696 char scratch[longest_name]; /* scratch area for section names */
3697 char *k, *k_limit; /* indices into |scratch| */
3698 char *j; /* index into |buffer| */
3699 char *p; /* index into |byte_mem| */
3700 char delim; /* first and last character of string being copied */
3701 char *save_loc, *save_limit; /* |loc| and |limit| to be restored */
3702 name_pointer cur_section_name; /* name of section being output */
3703 boolean save_mode; /* value of |cur_mode| before a sequence of breaks */
3704 app(end_translation); /* append a sentinel */
3705 freeze_text; push_level(text_ptr-1);
3706 while (1) {
3707 a=get_output();
3708 reswitch: switch(a) {
3709 case end_translation: return;
3710 case identifier: case res_word: @<Output an identifier@>; break;
3711 case section_code: @<Output a section name@>; break;
3712 case math_rel: out_str("\\MRL{"@q}@>);
3713 @.\\MRL@>
3714 case noop: case inserted: break;
3715 case cancel: case big_cancel: c=0; b=a;
3716 while (1) {
3717 a=get_output();
3718 if (a==inserted) continue;
3719 if ((a<indent && !(b==big_cancel&&a==' ')) || a>big_force) break;
3720 if (a==indent) c++; else if (a==outdent) c--;
3721 else if (a==opt) a=get_output();
3723 @<Output saved |indent| or |outdent| tokens@>;
3724 goto reswitch;
3725 case indent: case outdent: case opt: case backup: case break_space:
3726 case force: case big_force: case preproc_line: @<Output a control,
3727 look ahead in case of line breaks, possibly |goto reswitch|@>; break;
3728 case quoted_char: out(*(cur_tok++));
3729 case qualifier: break;
3730 default: out(a); /* otherwise |a| is an ordinary character */
3735 @ An identifier of length one does not have to be enclosed in braces, and it
3736 looks slightly better if set in a math-italic font instead of a (slightly
3737 narrower) text-italic font. Thus we output `\.{\\\v}\.{a}' but
3738 `\.{\\\\\{aa\}}'.
3740 @<Output an identifier@>=
3741 out('\\');
3742 if (a==identifier) {
3743 if (cur_name->ilk==custom && !doing_format) {
3744 custom_out:
3745 for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3746 out(*p=='_'? 'x': *p=='$'? 'X': *p);
3747 break;
3748 } else if (is_tiny(cur_name)) out('|')@;
3749 @.\\|@>
3750 else { delim='.';
3751 for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3752 if (xislower(*p)) { /* not entirely uppercase */
3753 delim='\\'; break;
3755 out(delim);
3757 @.\\\\@>
3758 @.\\.@>
3759 }@+else if (cur_name->ilk==alfop) {
3760 out('X');
3761 goto custom_out;
3762 }@+else out('&'); /* |a==res_word| */
3763 @.\\\&@>
3764 if (is_tiny(cur_name)) {
3765 if (isxalpha((cur_name->byte_start)[0]))
3766 out('\\');
3767 out((cur_name->byte_start)[0]);
3769 else out_name(cur_name,1);
3771 @ The current mode does not affect the behavior of \.{CWEAVE}'s output routine
3772 except when we are outputting control tokens.
3774 @<Output a control...@>=
3775 if (a<break_space || a==preproc_line) {
3776 if (cur_mode==outer) {
3777 out('\\'); out(a-cancel+'0');
3778 @.\\1@>
3779 @.\\2@>
3780 @.\\3@>
3781 @.\\4@>
3782 @.\\8@>
3783 if (a==opt) {
3784 b=get_output(); /* |opt| is followed by a digit */
3785 if (b!='0' || force_lines==0) out(b)@;
3786 else out_str("{-1}"); /* |force_lines| encourages more \.{@@\v} breaks */
3788 } else if (a==opt) b=get_output(); /* ignore digit following |opt| */
3790 else @<Look ahead for strongest line break, |goto reswitch|@>
3792 @ If several of the tokens |break_space|, |force|, |big_force| occur in a
3793 row, possibly mixed with blank spaces (which are ignored),
3794 the largest one is used. A line break also occurs in the output file,
3795 except at the very end of the translation. The very first line break
3796 is suppressed (i.e., a line break that follows `\.{\\Y\\B}').
3798 @<Look ahead for st...@>= {
3799 b=a; save_mode=cur_mode; c=0;
3800 while (1) {
3801 a=get_output();
3802 if (a==inserted) continue;
3803 if (a==cancel || a==big_cancel) {
3804 @<Output saved |indent| or |outdent| tokens@>;
3805 goto reswitch; /* |cancel| overrides everything */
3807 if ((a!=' ' && a<indent) || a==backup || a>big_force) {
3808 if (save_mode==outer) {
3809 if (out_ptr>out_buf+3 && strncmp(out_ptr-3,"\\Y\\B",4)==0)
3810 goto reswitch;
3811 @<Output saved |indent| or |outdent| tokens@>;
3812 out('\\'); out(b-cancel+'0');
3813 @.\\5@>
3814 @.\\6@>
3815 @.\\7@>
3816 if (a!=end_translation) finish_line();
3818 else if (a!=end_translation && cur_mode==inner) out(' ');
3819 goto reswitch;
3821 if (a==indent) c++;
3822 else if (a==outdent) c--;
3823 else if (a==opt) a=get_output();
3824 else if (a>b) b=a; /* if |a==' '| we have |a<b| */
3828 @ @<Output saved...@>=
3829 for (;c>0;c--) out_str("\\1");
3830 @.\\1@>
3831 for (;c<0;c++) out_str("\\2");
3832 @.\\2@>
3834 @ The remaining part of |make_output| is somewhat more complicated. When we
3835 output a section name, we may need to enter the parsing and translation
3836 routines, since the name may contain \CEE/ code embedded in
3837 \pb\ constructions. This \CEE/ code is placed at the end of the active
3838 input buffer and the translation process uses the end of the active
3839 |tok_mem| area.
3841 @<Output a section name@>= {
3842 out_str("\\X");
3843 @.\\X@>
3844 cur_xref=(xref_pointer)cur_name->xref;
3845 if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
3846 else an_output=0;
3847 if (cur_xref->num>=def_flag) {
3848 out_section(cur_xref->num-def_flag);
3849 if (phase==3) {
3850 cur_xref=cur_xref->xlink;
3851 while (cur_xref->num>=def_flag) {
3852 out_str(", ");
3853 out_section(cur_xref->num-def_flag);
3854 cur_xref=cur_xref->xlink;
3858 else out('0'); /* output the section number, or zero if it was undefined */
3859 out(':');
3860 if (an_output) out_str("\\.{"@q}@>);
3861 @.\\.@>
3862 @<Output the text of the section name@>;
3863 if (an_output) out_str(@q{@>" }");
3864 out_str("\\X");
3867 @ @<Output the text...@>=
3868 sprint_section_name(scratch,cur_name);
3869 k=scratch;
3870 k_limit=scratch+strlen(scratch);
3871 cur_section_name=cur_name;
3872 while (k<k_limit) {
3873 b=*(k++);
3874 if (b=='@@') @<Skip next character, give error if not `\.{@@}'@>;
3875 if (an_output)
3876 switch (b) {
3877 case ' ':case '\\':case '#':case '%':case '$':case '^':
3878 case '{': case '}': case '~': case '&': case '_':
3879 out('\\'); /* falls through */
3880 @.\\\ @>
3881 @.\\\\@>
3882 @.\\\#@>
3883 @.\\\%@>
3884 @.\\\$@>
3885 @.\\\^@>
3886 @.\\\{@>@q}@>
3887 @q{@>@.\\\}@>
3888 @.\\\~@>
3889 @.\\\&@>
3890 @.\\\_@>
3891 default: out(b);
3893 else if (b!='|') out(b)
3894 else {
3895 @<Copy the \CEE/ text into the |buffer| array@>;
3896 save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1;
3897 *limit='|'; output_C();
3898 loc=save_loc; limit=save_limit;
3902 @ @<Skip next char...@>=
3903 if (*k++!='@@') {
3904 printf("\n! Illegal control code in section name: <");
3905 @.Illegal control code...@>
3906 print_section_name(cur_section_name); printf("> "); mark_error;
3909 @ The \CEE/ text enclosed in \pb\ should not contain `\.{\v}' characters,
3910 except within strings. We put a `\.{\v}' at the front of the buffer, so that an
3911 error message that displays the whole buffer will look a little bit sensible.
3912 The variable |delim| is zero outside of strings, otherwise it
3913 equals the delimiter that began the string being copied.
3915 @<Copy the \CEE/ text into...@>=
3916 j=limit+1; *j='|'; delim=0;
3917 while (1) {
3918 if (k>=k_limit) {
3919 printf("\n! C text in section name didn't end: <");
3920 @.C text...didn't end@>
3921 print_section_name(cur_section_name); printf("> "); mark_error; break;
3923 b=*(k++);
3924 if (b=='@@' || (b=='\\' && delim!=0))
3925 @<Copy a quoted character into the buffer@>
3926 else {
3927 if (b=='\'' || b=='"')
3928 if (delim==0) delim=b;
3929 else if (delim==b) delim=0;
3930 if (b!='|' || delim!=0) {
3931 if (j>buffer+long_buf_size-3) overflow("buffer");
3932 *(++j)=b;
3934 else break;
3938 @ @<Copy a quoted char...@>= {
3939 if (j>buffer+long_buf_size-4) overflow("buffer");
3940 *(++j)=b; *(++j)=*(k++);
3943 @** Phase two processing.
3944 We have assembled enough pieces of the puzzle in order to be ready to specify
3945 the processing in \.{CWEAVE}'s main pass over the source file. Phase two
3946 is analogous to phase one, except that more work is involved because we must
3947 actually output the \TEX/ material instead of merely looking at the
3948 \.{CWEB} specifications.
3950 @<Predecl...@>=
3951 void phase_two();
3953 @ @c
3954 void
3955 phase_two() {
3956 reset_input(); if (show_progress) printf("\nWriting the output file...");
3957 @.Writing the output file...@>
3958 section_count=0; format_visible=1; copy_limbo();
3959 finish_line(); flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
3960 while (!input_has_ended) @<Translate the current section@>;
3963 @ The output file will contain the control sequence \.{\\Y} between non-null
3964 sections of a section, e.g., between the \TEX/ and definition parts if both
3965 are nonempty. This puts a little white space between the parts when they are
3966 printed. However, we don't want \.{\\Y} to occur between two definitions
3967 within a single section. The variables |out_line| or |out_ptr| will
3968 change if a section is non-null, so the following macros `|save_position|'
3969 and `|emit_space_if_needed|' are able to handle the situation:
3971 @d save_position save_line=out_line; save_place=out_ptr
3972 @d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr)
3973 out_str("\\Y");
3974 space_checked=1
3975 @.\\Y@>
3977 @<Global...@>=
3978 int save_line; /* former value of |out_line| */
3979 char *save_place; /* former value of |out_ptr| */
3980 int sec_depth; /* the integer, if any, following \.{@@*} */
3981 boolean space_checked; /* have we done |emit_space_if_needed|? */
3982 boolean format_visible; /* should the next format declaration be output? */
3983 boolean doing_format=0; /* are we outputting a format declaration? */
3984 boolean group_found=0; /* has a starred section occurred? */
3986 @ @<Translate the current section@>= {
3987 section_count++;
3988 @<Output the code for the beginning of a new section@>;
3989 save_position;
3990 @<Translate the \TEX/ part of the current section@>;
3991 @<Translate the definition part of the current section@>;
3992 @<Translate the \CEE/ part of the current section@>;
3993 @<Show cross-references to this section@>;
3994 @<Output the code for the end of a section@>;
3997 @ Sections beginning with the \.{CWEB} control sequence `\.{@@\ }' start in the
3998 output with the \TEX/ control sequence `\.{\\M}', followed by the section
3999 number. Similarly, `\.{@@*}' sections lead to the control sequence `\.{\\N}'.
4000 In this case there's an additional parameter, representing one plus the
4001 specified depth, immediately after the \.{\\N}.
4002 If the section has changed, we put \.{\\*} just after the section number.
4004 @<Output the code for the beginning...@>=
4005 if (*(loc-1)!='*') out_str("\\M");
4006 @.\\M@>
4007 else {
4008 while (*loc == ' ') loc++;
4009 if (*loc=='*') { /* ``top'' level */
4010 sec_depth = -1;
4011 loc++;
4013 else {
4014 for (sec_depth=0; xisdigit(*loc);loc++)
4015 sec_depth = sec_depth*10 + (*loc) -'0';
4017 while (*loc == ' ') loc++; /* remove spaces before group title */
4018 group_found=1;
4019 out_str("\\N");
4020 @.\\N@>
4021 {@+ char s[32];@+sprintf(s,"{%d}",sec_depth+1);@+out_str(s);@+}
4022 if (show_progress)
4023 printf("*%d",section_count); update_terminal; /* print a progress report */
4025 out_str("{");out_section(section_count); out_str("}");
4027 @ In the \TEX/ part of a section, we simply copy the source text, except that
4028 index entries are not copied and \CEE/ text within \pb\ is translated.
4030 @<Translate the \T...@>= do {
4031 next_control=copy_TeX();
4032 switch (next_control) {
4033 case '|': init_stack; output_C(); break;
4034 case '@@': out('@@'); break;
4035 case TeX_string: case noop:
4036 case xref_roman: case xref_wildcard: case xref_typewriter:
4037 case section_name: loc-=2; next_control=get_next(); /* skip to \.{@@>} */
4038 if (next_control==TeX_string)
4039 err_print("! TeX string should be in C text only"); break;
4040 @.TeX string should be...@>
4041 case thin_space: case math_break: case ord:
4042 case line_break: case big_line_break: case no_line_break: case join:
4043 case pseudo_semi: case macro_arg_open: case macro_arg_close:
4044 case output_defs_code:
4045 err_print("! You can't do that in TeX text"); break;
4046 @.You can't do that...@>
4048 } while (next_control<format_code);
4050 @ When we get to the following code we have |next_control>=format_code|, and
4051 the token memory is in its initial empty state.
4053 @<Translate the d...@>=
4054 space_checked=0;
4055 while (next_control<=definition) { /* |format_code| or |definition| */
4056 init_stack;
4057 if (next_control==definition) @<Start a macro definition@>@;
4058 else @<Start a format definition@>;
4059 outer_parse(); finish_C(format_visible); format_visible=1;
4060 doing_format=0;
4063 @ The |finish_C| procedure outputs the translation of the current
4064 scraps, preceded by the control sequence `\.{\\B}' and followed by the
4065 control sequence `\.{\\par}'. It also restores the token and scrap
4066 memories to their initial empty state.
4068 A |force| token is appended to the current scraps before translation
4069 takes place, so that the translation will normally end with \.{\\6} or
4070 \.{\\7} (the \TEX/ macros for |force| and |big_force|). This \.{\\6} or
4071 \.{\\7} is replaced by the concluding \.{\\par} or by \.{\\Y\\par}.
4073 @<Predecl...@>=
4074 void finish_C();
4076 @ @c
4077 void
4078 finish_C(visible) /* finishes a definition or a \CEE/ part */
4079 boolean visible; /* nonzero if we should produce \TEX/ output */
4081 text_pointer p; /* translation of the scraps */
4082 if (visible) {
4083 out_str("\\B"); app_tok(force); app_scrap(insert,no_math);
4084 p=translate();
4085 @.\\B@>
4086 app(tok_flag+(int)(p-tok_start)); make_output(); /* output the list */
4087 if (out_ptr>out_buf+1)
4088 if (*(out_ptr-1)=='\\')
4089 @.\\6@>
4090 @.\\7@>
4091 @.\\Y@>
4092 if (*out_ptr=='6') out_ptr-=2;
4093 else if (*out_ptr=='7') *out_ptr='Y';
4094 out_str("\\par"); finish_line();
4096 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
4097 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
4098 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
4099 tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info;
4100 /* forget the tokens and the scraps */
4103 @ Keeping in line with the conventions of the \CEE/ preprocessor (and
4104 otherwise contrary to the rules of \.{CWEB}) we distinguish here
4105 between the case that `\.(' immediately follows an identifier and the
4106 case that the two are separated by a space. In the latter case, and
4107 if the identifier is not followed by `\.(' at all, the replacement
4108 text starts immediately after the identifier. In the former case,
4109 it starts after we scan the matching `\.)'.
4111 @<Start a macro...@>= {
4112 if (save_line!=out_line || save_place!=out_ptr || space_checked) app(backup);
4113 if(!space_checked){emit_space_if_needed;save_position;}
4114 app_str("\\D"); /* this will produce `\&{define }' */
4115 @.\\D@>
4116 if ((next_control=get_next())!=identifier)
4117 err_print("! Improper macro definition");
4118 @.Improper macro definition@>
4119 else {
4120 app('$'); app_cur_id(0);
4121 if (*loc=='(')
4122 reswitch: switch (next_control=get_next()) {
4123 case '(': case ',': app(next_control); goto reswitch;
4124 case identifier: app_cur_id(0); goto reswitch;
4125 case ')': app(next_control); next_control=get_next(); break;
4126 default: err_print("! Improper macro definition"); break;
4128 else next_control=get_next();
4129 app_str("$ "); app(break_space);
4130 app_scrap(dead,no_math); /* scrap won't take part in the parsing */
4134 @ @<Start a format...@>= {
4135 doing_format=1;
4136 if(*(loc-1)=='s' || *(loc-1)=='S') format_visible=0;
4137 if(!space_checked){emit_space_if_needed;save_position;}
4138 app_str("\\F"); /* this will produce `\&{format }' */
4139 @.\\F@>
4140 next_control=get_next();
4141 if (next_control==identifier) {
4142 app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4143 app(' ');
4144 app(break_space); /* this is syntactically separate from what follows */
4145 next_control=get_next();
4146 if (next_control==identifier) {
4147 app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4148 app_scrap(exp,maybe_math); app_scrap(semi,maybe_math);
4149 next_control=get_next();
4152 if (scrap_ptr!=scrap_info+2) err_print("! Improper format definition");
4153 @.Improper format definition@>
4156 @ Finally, when the \TEX/ and definition parts have been treated, we have
4157 |next_control>=begin_C|. We will make the global variable |this_section|
4158 point to the current section name, if it has a name.
4160 @<Global...@>=
4161 name_pointer this_section; /* the current section name, or zero */
4163 @ @<Translate the \CEE/...@>=
4164 this_section=name_dir;
4165 if (next_control<=section_name) {
4166 emit_space_if_needed; init_stack;
4167 if (next_control==begin_C) next_control=get_next();
4168 else {
4169 this_section=cur_section;
4170 @<Check that '=' or '==' follows this section name, and
4171 emit the scraps to start the section definition@>;
4173 while (next_control<=section_name) {
4174 outer_parse();
4175 @<Emit the scrap for a section name if present@>;
4177 finish_C(1);
4180 @ The title of the section and an $\E$ or $\mathrel+\E$ are made
4181 into a scrap that should not take part in the parsing.
4183 @<Check that '='...@>=
4184 do next_control=get_next();
4185 while (next_control=='+'); /* allow optional `\.{+=}' */
4186 if (next_control!='=' && next_control!=eq_eq)
4187 err_print("! You need an = sign after the section name");
4188 @.You need an = sign...@>
4189 else next_control=get_next();
4190 if (out_ptr>out_buf+1 && *out_ptr=='Y' && *(out_ptr-1)=='\\') app(backup);
4191 /* the section name will be flush left */
4192 @.\\Y@>
4193 app(section_flag+(int)(this_section-name_dir));
4194 cur_xref=(xref_pointer)this_section->xref;
4195 if(cur_xref->num==file_flag) cur_xref=cur_xref->xlink;
4196 app_str("${}");
4197 if (cur_xref->num!=section_count+def_flag) {
4198 app_str("\\mathrel+"); /*section name is multiply defined*/
4199 this_section=name_dir; /*so we won't give cross-reference info here*/
4201 app_str("\\E"); /* output an equivalence sign */
4202 @.\\E@>
4203 app_str("{}$");
4204 app(force); app_scrap(dead,no_math);
4205 /* this forces a line break unless `\.{@@+}' follows */
4207 @ @<Emit the scrap...@>=
4208 if (next_control<section_name) {
4209 err_print("! You can't do that in C text");
4210 @.You can't do that...@>
4211 next_control=get_next();
4213 else if (next_control==section_name) {
4214 app(section_flag+(int)(cur_section-name_dir));
4215 app_scrap(section_scrap,maybe_math);
4216 next_control=get_next();
4219 @ Cross references relating to a named section are given
4220 after the section ends.
4222 @<Show cross...@>=
4223 if (this_section>name_dir) {
4224 cur_xref=(xref_pointer)this_section->xref;
4225 if (cur_xref->num==file_flag){an_output=1;cur_xref=cur_xref->xlink;}
4226 else an_output=0;
4227 if (cur_xref->num>def_flag)
4228 cur_xref=cur_xref->xlink; /* bypass current section number */
4229 footnote(def_flag); footnote(cite_flag); footnote(0);
4232 @ The |footnote| procedure gives cross-reference information about
4233 multiply defined section names (if the |flag| parameter is
4234 |def_flag|), or about references to a section name
4235 (if |flag==cite_flag|), or to its uses (if |flag==0|). It assumes that
4236 |cur_xref| points to the first cross-reference entry of interest, and it
4237 leaves |cur_xref| pointing to the first element not printed. Typical outputs:
4238 `\.{\\A101.}'; `\.{\\Us 370\\ET1009.}';
4239 `\.{\\As 8, 27\\*\\ETs64.}'.
4241 Note that the output of \.{CWEAVE} is not English-specific; users may
4242 supply new definitions for the macros \.{\\A}, \.{\\As}, etc.
4244 @<Predecl...@>=
4245 void footnote();
4247 @ @c
4248 void
4249 footnote(flag) /* outputs section cross-references */
4250 sixteen_bits flag;
4252 xref_pointer q; /* cross-reference pointer variable */
4253 if (cur_xref->num<=flag) return;
4254 finish_line(); out('\\');
4255 @.\\A@>
4256 @.\\Q@>
4257 @.\\U@>
4258 out(flag==0? 'U': flag==cite_flag? 'Q': 'A');
4259 @<Output all the section numbers on the reference list |cur_xref|@>;
4260 out('.');
4263 @ The following code distinguishes three cases, according as the number
4264 of cross-references is one, two, or more than two. Variable |q| points
4265 to the first cross-reference, and the last link is a zero.
4267 @<Output all the section numbers...@>=
4268 q=cur_xref; if (q->xlink->num>flag) out('s'); /* plural */
4269 while (1) {
4270 out_section(cur_xref->num-flag);
4271 cur_xref=cur_xref->xlink; /* point to the next cross-reference to output */
4272 if (cur_xref->num<=flag) break;
4273 if (cur_xref->xlink->num>flag) out_str(", "); /* not the last */
4274 else {out_str("\\ET"); /* the last */
4275 @.\\ET@>
4276 if (cur_xref != q->xlink) out('s'); /* the last of more than two */
4280 @ @<Output the code for the end of a section@>=
4281 out_str("\\fi"); finish_line();
4282 @.\\fi@>
4283 flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
4285 @** Phase three processing.
4286 We are nearly finished! \.{CWEAVE}'s only remaining task is to write out the
4287 index, after sorting the identifiers and index entries.
4289 If the user has set the |no_xref| flag (the \.{-x} option on the command line),
4290 just finish off the page, omitting the index, section name list, and table of
4291 contents.
4293 @<Predecl...@>=
4294 void phase_three();
4296 @ @c
4297 void
4298 phase_three() {
4299 if (no_xref) {
4300 finish_line();
4301 out_str("\\end");
4302 @.\\end@>
4303 finish_line();
4305 else {
4306 phase=3; if (show_progress) printf("\nWriting the index...");
4307 @.Writing the index...@>
4308 finish_line();
4309 if ((idx_file=fopen(idx_file_name,"w"))==NULL)
4310 fatal("! Cannot open index file ",idx_file_name);
4311 @.Cannot open index file@>
4312 if (change_exists) {
4313 @<Tell about changed sections@>; finish_line(); finish_line();
4315 out_str("\\inx"); finish_line();
4316 @.\\inx@>
4317 active_file=idx_file; /* change active file to the index file */
4318 @<Do the first pass of sorting@>;
4319 @<Sort and output the index@>;
4320 finish_line(); fclose(active_file); /* finished with |idx_file| */
4321 active_file=tex_file; /* switch back to |tex_file| for a tic */
4322 out_str("\\fin"); finish_line();
4323 @.\\fin@>
4324 if ((scn_file=fopen(scn_file_name,"w"))==NULL)
4325 fatal("! Cannot open section file ",scn_file_name);
4326 @.Cannot open section file@>
4327 active_file=scn_file; /* change active file to section listing file */
4328 @<Output all the section names@>;
4329 finish_line(); fclose(active_file); /* finished with |scn_file| */
4330 active_file=tex_file;
4331 if (group_found) out_str("\\con");@+else out_str("\\end");
4332 @.\\con@>
4333 @.\\end@>
4334 finish_line();
4335 fclose(active_file);
4337 if (show_happiness) printf("\nDone.");
4338 check_complete(); /* was all of the change file used? */
4341 @ Just before the index comes a list of all the changed sections, including
4342 the index section itself.
4344 @<Global...@>=
4345 sixteen_bits k_section; /* runs through the sections */
4347 @ @<Tell about changed sections@>= {
4348 /* remember that the index is already marked as changed */
4349 k_section=0;
4350 while (!changed_section[++k_section]);
4351 out_str("\\ch ");
4352 @.\\ch@>
4353 out_section(k_section);
4354 while (k_section<section_count) {
4355 while (!changed_section[++k_section]);
4356 out_str(", "); out_section(k_section);
4358 out('.');
4361 @ A left-to-right radix sorting method is used, since this makes it easy to
4362 adjust the collating sequence and since the running time will be at worst
4363 proportional to the total length of all entries in the index. We put the
4364 identifiers into 102 different lists based on their first characters.
4365 (Uppercase letters are put into the same list as the corresponding lowercase
4366 letters, since we want to have `$t<\\{TeX}<\&{to}$'.) The
4367 list for character |c| begins at location |bucket[c]| and continues through
4368 the |blink| array.
4370 @<Global...@>=
4371 name_pointer bucket[256];
4372 name_pointer next_name; /* successor of |cur_name| when sorting */
4373 name_pointer blink[max_names]; /* links in the buckets */
4375 @ To begin the sorting, we go through all the hash lists and put each entry
4376 having a nonempty cross-reference list into the proper bucket.
4378 @<Do the first pass...@>= {
4379 int c;
4380 for (c=0; c<=255; c++) bucket[c]=NULL;
4381 for (h=hash; h<=hash_end; h++) {
4382 next_name=*h;
4383 while (next_name) {
4384 cur_name=next_name; next_name=cur_name->link;
4385 if (cur_name->xref!=(char*)xmem) {
4386 c=(eight_bits)((cur_name->byte_start)[0]);
4387 if (xisupper(c)) c=tolower(c);
4388 blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4394 @ During the sorting phase we shall use the |cat| and |trans| arrays from
4395 \.{CWEAVE}'s parsing algorithm and rename them |depth| and |head|. They now
4396 represent a stack of identifier lists for all the index entries that have
4397 not yet been output. The variable |sort_ptr| tells how many such lists are
4398 present; the lists are output in reverse order (first |sort_ptr|, then
4399 |sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first
4400 |k| characters of all entries on this list are known to be equal we have
4401 |depth[j]==k|.
4403 @ @<Rest of |trans_plus| union@>=
4404 name_pointer Head;
4406 @ @d depth cat /* reclaims memory that is no longer needed for parsing */
4407 @d head trans_plus.Head /* ditto */
4408 @f sort_pointer int
4409 @d sort_pointer scrap_pointer /* ditto */
4410 @d sort_ptr scrap_ptr /* ditto */
4411 @d max_sorts max_scraps /* ditto */
4413 @<Global...@>=
4414 eight_bits cur_depth; /* depth of current buckets */
4415 char *cur_byte; /* index into |byte_mem| */
4416 sixteen_bits cur_val; /* current cross-reference number */
4417 sort_pointer max_sort_ptr; /* largest value of |sort_ptr| */
4419 @ @<Set init...@>=
4420 max_sort_ptr=scrap_info;
4422 @ The desired alphabetic order is specified by the |collate| array; namely,
4423 $|collate|[0]<|collate|[1]<\cdots<|collate|[100]$.
4425 @<Global...@>=
4426 eight_bits collate[102+128]; /* collation order */
4427 @^high-bit character handling@>
4429 @ We use the order $\hbox{null}<\.\ <\hbox{other characters}<{}$\.\_${}<
4430 \.A=\.a<\cdots<\.Z=\.z<\.0<\cdots<\.9.$ Warning: The collation mapping
4431 needs to be changed if ASCII code is not being used.
4432 @^ASCII code dependencies@>
4433 @^high-bit character handling@>
4435 We initialize |collate| by copying a few characters at a time, because
4436 some \CEE/ compilers choke on long strings.
4438 @<Set init...@>=
4439 collate[0]=0;
4440 strcpy(collate+1," \1\2\3\4\5\6\7\10\11\12\13\14\15\16\17");
4441 /* 16 characters + 1 = 17 */
4442 strcpy(collate+17,"\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37");
4443 /* 16 characters + 17 = 33 */
4444 strcpy(collate+33,"!\42#$%&'()*+,-./:;<=>?@@[\\]^`{|}~_");
4445 /* 32 characters + 33 = 65 */
4446 strcpy(collate+65,"abcdefghijklmnopqrstuvwxyz0123456789");
4447 /* (26 + 10) characters + 65 = 101 */
4448 strcpy(collate+101,"\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217");
4449 /* 16 characters + 101 = 117 */
4450 strcpy(collate+117,"\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237");
4451 /* 16 characters + 117 = 133 */
4452 strcpy(collate+133,"\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257");
4453 /* 16 characters + 133 = 149 */
4454 strcpy(collate+149,"\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277");
4455 /* 16 characters + 149 = 165 */
4456 strcpy(collate+165,"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317");
4457 /* 16 characters + 165 = 181 */
4458 strcpy(collate+181,"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337");
4459 /* 16 characters + 181 = 197 */
4460 strcpy(collate+197,"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357");
4461 /* 16 characters + 197 = 213 */
4462 strcpy(collate+213,"\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377");
4463 /* 16 characters + 213 = 229 */
4465 @ Procedure |unbucket| goes through the buckets and adds nonempty lists
4466 to the stack, using the collating sequence specified in the |collate| array.
4467 The parameter to |unbucket| tells the current depth in the buckets.
4468 Any two sequences that agree in their first 255 character positions are
4469 regarded as identical.
4471 @d infinity 255 /* $\infty$ (approximately) */
4473 @<Predecl...@>=
4474 void unbucket();
4476 @ @c
4477 void
4478 unbucket(d) /* empties buckets having depth |d| */
4479 eight_bits d;
4481 int c; /* index into |bucket|; cannot be a simple |char| because of sign
4482 comparison below*/
4483 for (c=100+128; c>= 0; c--) if (bucket[collate[c]]) {
4484 @^high-bit character handling@>
4485 if (sort_ptr>=scrap_info_end) overflow("sorting");
4486 sort_ptr++;
4487 if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr;
4488 if (c==0) sort_ptr->depth=infinity;
4489 else sort_ptr->depth=d;
4490 sort_ptr->head=bucket[collate[c]]; bucket[collate[c]]=NULL;
4494 @ @<Sort and output...@>=
4495 sort_ptr=scrap_info; unbucket(1);
4496 while (sort_ptr>scrap_info) {
4497 cur_depth=sort_ptr->depth;
4498 if (blink[sort_ptr->head-name_dir]==0 || cur_depth==infinity)
4499 @<Output index entries for the list at |sort_ptr|@>@;
4500 else @<Split the list at |sort_ptr| into further lists@>;
4503 @ @<Split the list...@>= {
4504 eight_bits c;
4505 next_name=sort_ptr->head;
4506 do {
4507 cur_name=next_name; next_name=blink[cur_name-name_dir];
4508 cur_byte=cur_name->byte_start+cur_depth;
4509 if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */
4510 else {
4511 c=(eight_bits) *cur_byte;
4512 if (xisupper(c)) c=tolower(c);
4514 blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4515 } while (next_name);
4516 --sort_ptr; unbucket(cur_depth+1);
4519 @ @<Output index...@>= {
4520 cur_name=sort_ptr->head;
4521 do {
4522 out_str("\\I");
4523 @.\\I@>
4524 @<Output the name at |cur_name|@>;
4525 @<Output the cross-references at |cur_name|@>;
4526 cur_name=blink[cur_name-name_dir];
4527 } while (cur_name);
4528 --sort_ptr;
4531 @ @<Output the name...@>=
4532 switch (cur_name->ilk) {
4533 case normal: case func_template: if (is_tiny(cur_name)) out_str("\\|");
4534 else {char *j;
4535 for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4536 if (xislower(*j)) goto lowcase;
4537 out_str("\\."); break;
4538 lowcase: out_str("\\\\");
4540 break;
4541 @.\\|@>
4542 @.\\.@>
4543 @.\\\\@>
4544 case wildcard: out_str("\\9");@+ goto not_an_identifier;
4545 @.\\9@>
4546 case typewriter: out_str("\\.");
4547 @.\\.@>
4548 case roman: not_an_identifier: out_name(cur_name,0); goto name_done;
4549 case custom: {char *j; out_str("$\\");
4550 for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4551 out(*j=='_'? 'x': *j=='$'? 'X': *j);
4552 out('$');
4553 goto name_done;
4555 default: out_str("\\&");
4556 @.\\\&@>
4558 out_name(cur_name,1);
4559 name_done:@;
4561 @ Section numbers that are to be underlined are enclosed in
4562 `\.{\\[}$\,\ldots\,$\.]'.
4564 @<Output the cross-references...@>=
4565 @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>;
4566 do {
4567 out_str(", "); cur_val=cur_xref->num;
4568 if (cur_val<def_flag) out_section(cur_val);
4569 else {out_str("\\["); out_section(cur_val-def_flag); out(']');}
4570 @.\\[@>
4571 cur_xref=cur_xref->xlink;
4572 } while (cur_xref!=xmem);
4573 out('.'); finish_line();
4575 @ List inversion is best thought of as popping elements off one stack and
4576 pushing them onto another. In this case |cur_xref| will be the head of
4577 the stack that we push things onto.
4578 @<Global...@>=
4579 xref_pointer next_xref, this_xref;
4580 /* pointer variables for rearranging a list */
4582 @ @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>=
4583 this_xref=(xref_pointer)cur_name->xref; cur_xref=xmem;
4584 do {
4585 next_xref=this_xref->xlink; this_xref->xlink=cur_xref;
4586 cur_xref=this_xref; this_xref=next_xref;
4587 } while (this_xref!=xmem);
4589 @ The following recursive procedure walks through the tree of section names and
4590 prints them.
4591 @^recursion@>
4593 @<Predecl...@>=
4594 void section_print();
4596 @ @c
4597 void
4598 section_print(p) /* print all section names in subtree |p| */
4599 name_pointer p;
4601 if (p) {
4602 section_print(p->llink); out_str("\\I");
4603 @.\\I@>
4604 tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; init_stack;
4605 app(p-name_dir+section_flag); make_output();
4606 footnote(cite_flag);
4607 footnote(0); /* |cur_xref| was set by |make_output| */
4608 finish_line();@/
4609 section_print(p->rlink);
4613 @ @<Output all the section names@>=section_print(root)
4615 @ Because on some systems the difference between two pointers is a |long|
4616 rather than an |int|, we use \.{\%ld} to print these quantities.
4619 void
4620 print_stats() {
4621 printf("\nMemory usage statistics:\n");
4622 @.Memory usage statistics:@>
4623 printf("%ld names (out of %ld)\n",
4624 (long)(name_ptr-name_dir),(long)max_names);
4625 printf("%ld cross-references (out of %ld)\n",
4626 (long)(xref_ptr-xmem),(long)max_refs);
4627 printf("%ld bytes (out of %ld)\n",
4628 (long)(byte_ptr-byte_mem),(long)max_bytes);
4629 printf("Parsing:\n");
4630 printf("%ld scraps (out of %ld)\n",
4631 (long)(max_scr_ptr-scrap_info),(long)max_scraps);
4632 printf("%ld texts (out of %ld)\n",
4633 (long)(max_text_ptr-tok_start),(long)max_texts);
4634 printf("%ld tokens (out of %ld)\n",
4635 (long)(max_tok_ptr-tok_mem),(long)max_toks);
4636 printf("%ld levels (out of %ld)\n",
4637 (long)(max_stack_ptr-stack),(long)stack_size);
4638 printf("Sorting:\n");
4639 printf("%ld levels (out of %ld)\n",
4640 (long)(max_sort_ptr-scrap_info),(long)max_scraps);
4643 @** Index.
4644 If you have read and understood the code for Phase III above, you know what
4645 is in this index and how it got here. All sections in which an identifier is
4646 used are listed with that identifier, except that reserved words are
4647 indexed only when they appear in format definitions, and the appearances
4648 of identifiers in section names are not indexed. Underlined entries
4649 correspond to where the identifier was declared. Error messages, control
4650 sequences put into the output, and a few
4651 other things like ``recursion'' are indexed here too.