source/texk/web2c/cwebdir/cweave.w

   1 % This file is part of CWEB.
   2 % This program by Silvio Levy and Donald E. Knuth
   3 % is based on a program by Knuth.
   4 % It is distributed WITHOUT ANY WARRANTY, express or implied.
   5 % Version 3.64 --- February 2002
   6 % (essentially the same as version 3.6, which added
   7 %  recently introduced features of standard C++ to version 3.4)
   8
   9 % Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
  10
  11 % Permission is granted to make and distribute verbatim copies of this
  12 % document provided that the copyright notice and this permission notice
  13 % are preserved on all copies.
  14
  15 % Permission is granted to copy and distribute modified versions of this
  16 % document under the conditions for verbatim copying, provided that the
  17 % entire resulting derived work is given a different name and distributed
  18 % under the terms of a permission notice identical to this one.
  19
  20 % Here is TeX material that gets inserted after \input cwebmac
  21 \def\hang{\hangindent 3em\indent\ignorespaces}
  22 \def\pb{$\.|\ldots\.|$} % C brackets (|...|)
  23 \def\v{\char'174} % vertical (|) in typewriter font
  24 \def\dleft{[\![} \def\dright{]\!]} % double brackets
  25 \mathchardef\RA="3221 % right arrow
  26 \mathchardef\BA="3224 % double arrow
  27 \def\({} % ) kludge for alphabetizing certain section names
  28 \def\TeXxstring{\\{\TEX/\_string}}
  29 \def\skipxTeX{\\{skip\_\TEX/}}
  30 \def\copyxTeX{\\{copy\_\TEX/}}
  31
  32 \def\title{CWEAVE (Version 3.64)}
  33 \def\topofcontents{\null\vfill
  34   \centerline{\titlefont The {\ttitlefont CWEAVE} processor}
  35   \vskip 15pt
  36   \centerline{(Version 3.64)}
  37   \vfill}
  38 \def\botofcontents{\vfill
  39 \noindent
  40 Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
  41 \bigskip\noindent
  42 Permission is granted to make and distribute verbatim copies of this
  43 document provided that the copyright notice and this permission notice
  44 are preserved on all copies.
  45
  46 \smallskip\noindent
  47 Permission is granted to copy and distribute modified versions of this
  48 document under the conditions for verbatim copying, provided that the
  49 entire resulting derived work is given a different name and distributed
  50 under the terms of a permission notice identical to this one.
  51 }
  52 \pageno=\contentspagenumber \advance\pageno by 1
  53 \let\maybe=\iftrue
  54 @s not_eq normal @q unreserve a C++ keyword @>
  55
  56 @** Introduction.
  57 This is the \.{CWEAVE} program by Silvio Levy and Donald E. Knuth,
  58 based on \.{WEAVE} by Knuth.
  59 We are thankful to Steve Avery,
  60 Nelson Beebe, Hans-Hermann Bode (to whom the original \CPLUSPLUS/ adaptation
  61 is due), Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
  62 Joachim Schrod, Lee Wittenberg, Saroj Mahapatra, Cesar Augusto Rorato
  63 Crusius, and others who have contributed improvements.
  64
  65 The ``banner line'' defined here should be changed whenever \.{CWEAVE}
  66 is modified.
  67
  68 @d banner "This is CWEAVE (Version 3.64)\n"
  69
  70 @c @<Include files@>@/
  71 @h
  72 @<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
  73 @<Typedef declarations@>@/
  74 @<Global variables@>@/
  75 @<Predeclaration of procedures@>
  76
  77 @ We predeclare several standard system functions here instead of including
  78 their system header files, because the names of the header files are not as
  79 standard as the names of the functions. (For example, some \CEE/ environments
  80 have \.{<string.h>} where others have \.{<strings.h>}.)
  81
  82 @<Predecl...@>=
  83 extern int strlen(); /* length of string */
  84 extern int strcmp(); /* compare strings lexicographically */
  85 extern char* strcpy(); /* copy one string to another */
  86 extern int strncmp(); /* compare up to $n$ string characters */
  87 extern char* strncpy(); /* copy up to $n$ string characters */
  88
  89 @ \.{CWEAVE} has a fairly straightforward outline.  It operates in
  90 three phases: First it inputs the source file and stores cross-reference
  91 data, then it inputs the source once again and produces the \TEX/ output
  92 file, finally it sorts and outputs the index.
  93
  94 Please read the documentation for \.{common}, the set of routines common
  95 to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
  96
  97 @c
  98 int main (ac, av)
  99 int ac; /* argument count */
 100 char **av; /* argument values */
 101 {
 102   argc=ac; argv=av;
 103   program=cweave;
 104   make_xrefs=force_lines=make_pb=1; /* controlled by command-line options */
 105   common_init();
 106   @<Set initial values@>;
 107   if (show_banner) printf(banner); /* print a ``banner line'' */
 108   @<Store all the reserved words@>;
 109   phase_one(); /* read all the user's text and store the cross-references */
 110   phase_two(); /* read all the text again and translate it to \TEX/ form */
 111   phase_three(); /* output the cross-reference index */
 112   return wrap_up(); /* and exit gracefully */
 113 }
 114
 115 @ The following parameters were sufficient in the original \.{WEAVE} to
 116 handle \TEX/, so they should be sufficient for most applications of \.{CWEAVE}.
 117 If you change |max_bytes|, |max_names|, |hash_size|, or |buf_size|
 118 you have to change them also in the file |"common.w"|.
 119
 120 @d max_bytes 90000 /* the number of bytes in identifiers,
 121   index entries, and section names */
 122 @d max_names 4000 /* number of identifiers, strings, section names;
 123   must be less than 10240; used in |"common.w"| */
 124 @d max_sections 2000 /* greater than the total number of sections */
 125 @d hash_size 353 /* should be prime */
 126 @d buf_size 100 /* maximum length of input line, plus one */
 127 @d longest_name 10000 /* section names and strings shouldn't be longer than this */
 128 @d long_buf_size (buf_size+longest_name)
 129 @d line_length 80 /* lines of \TEX/ output have at most this many characters;
 130   should be less than 256 */
 131 @d max_refs 20000 /* number of cross-references; must be less than 65536 */
 132 @d max_toks 20000 /* number of symbols in \CEE/ texts being parsed;
 133   must be less than 65536 */
 134 @d max_texts 4000 /* number of phrases in \CEE/ texts being parsed;
 135   must be less than 10240 */
 136 @d max_scraps 2000 /* number of tokens in \CEE/ texts being parsed */
 137 @d stack_size 400 /* number of simultaneous output levels */
 138
 139 @ The next few sections contain stuff from the file |"common.w"| that must
 140 be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
 141 file |"common.h"|, which needs to be updated when |"common.w"| changes.
 142
 143 @i common.h
 144
 145 @* Data structures exclusive to {\tt CWEAVE}.
 146 As explained in \.{common.w}, the field of a |name_info| structure
 147 that contains the |rlink| of a section name is used for a completely
 148 different purpose in the case of identifiers.  It is then called the
 149 |ilk| of the identifier, and it is used to
 150 distinguish between various types of identifiers, as follows:
 151
 152 \yskip\hang |normal| and |func_template| identifiers are part of the
 153 \CEE/ program that will  appear in italic type (or in typewriter type
 154 if all uppercase).
 155
 156 \yskip\hang |custom| identifiers are part of the \CEE/ program that
 157 will be typeset in special ways.
 158
 159 \yskip\hang |roman| identifiers are index entries that appear after
 160 \.{@@\^} in the \.{CWEB} file.
 161
 162 \yskip\hang |wildcard| identifiers are index entries that appear after
 163 \.{@@:} in the \.{CWEB} file.
 164
 165 \yskip\hang |typewriter| identifiers are index entries that appear after
 166 \.{@@.} in the \.{CWEB} file.
 167
 168 \yskip\hang |alfop|, \dots, |template_like|
 169 identifiers are \CEE/ or \CPLUSPLUS/ reserved words whose |ilk|
 170 explains how they are to be treated when \CEE/ code is being
 171 formatted.
 172
 173 @d ilk dummy.Ilk
 174 @d normal 0 /* ordinary identifiers have |normal| ilk */
 175 @d roman 1 /* normal index entries have |roman| ilk */
 176 @d wildcard 2 /* user-formatted index entries have |wildcard| ilk */
 177 @d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */
 178 @d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */
 179 @d func_template 4 /* identifiers that can be followed by optional template */
 180 @d custom 5 /* identifiers with user-given control sequence */
 181 @d alfop 22 /* alphabetic operators like \&{and} or \&{not\_eq} */
 182 @d else_like 26 /* \&{else} */
 183 @d public_like 40 /* \&{public}, \&{private}, \&{protected} */
 184 @d operator_like 41 /* \&{operator} */
 185 @d new_like 42 /* \&{new} */
 186 @d catch_like 43 /* \&{catch} */
 187 @d for_like 45 /* \&{for}, \&{switch}, \&{while} */
 188 @d do_like 46 /* \&{do} */
 189 @d if_like 47 /* \&{if}, \&{ifdef}, \&{endif}, \&{pragma}, \dots */
 190 @d delete_like 48 /* \&{delete} */
 191 @d raw_ubin 49 /* `\.\&' or `\.*' when looking for \&{const} following */
 192 @d const_like 50 /* \&{const}, \&{volatile} */
 193 @d raw_int 51 /* \&{int}, \&{char}, \dots; also structure and class names  */
 194 @d int_like 52 /* same, when not followed by left parenthesis or \DC\ */
 195 @d case_like 53 /* \&{case}, \&{return}, \&{goto}, \&{break}, \&{continue} */
 196 @d sizeof_like 54 /* \&{sizeof} */
 197 @d struct_like 55 /* \&{struct}, \&{union}, \&{enum}, \&{class} */
 198 @d typedef_like 56 /* \&{typedef} */
 199 @d define_like 57 /* \&{define} */
 200 @d template_like 58 /* \&{template} */
 201
 202 @ We keep track of the current section number in |section_count|, which
 203 is the total number of sections that have started.  Sections which have
 204 been altered by a change file entry have their |changed_section| flag
 205 turned on during the first phase.
 206
 207 @<Global...@>=
 208 boolean change_exists; /* has any section changed? */
 209
 210 @ The other large memory area in \.{CWEAVE} keeps the cross-reference data.
 211 All uses of the name |p| are recorded in a linked list beginning at
 212 |p->xref|, which points into the |xmem| array. The elements of |xmem|
 213 are structures consisting of an integer, |num|, and a pointer |xlink|
 214 to another element of |xmem|.  If |x=p->xref| is a pointer into |xmem|,
 215 the value of |x->num| is either a section number where |p| is used,
 216 or |cite_flag| plus a section number where |p| is mentioned,
 217 or |def_flag| plus a section number where |p| is defined;
 218 and |x->xlink| points to the next such cross-reference for |p|,
 219 if any. This list of cross-references is in decreasing order by
 220 section number. The next unused slot in |xmem| is |xref_ptr|.
 221 The linked list ends at |&xmem[0]|.
 222
 223 The global variable |xref_switch| is set either to |def_flag| or to zero,
 224 depending on whether the next cross-reference to an identifier is to be
 225 underlined or not in the index. This switch is set to |def_flag| when
 226 \.{@@!} or \.{@@d} is scanned, and it is cleared to zero when
 227 the next identifier or index entry cross-reference has been made.
 228 Similarly, the global variable |section_xref_switch| is either
 229 |def_flag| or |cite_flag| or zero, depending
 230 on whether a section name is being defined, cited or used in \CEE/ text.
 231
 232 @<Type...@>=
 233 typedef struct xref_info {
 234   sixteen_bits num; /* section number plus zero or |def_flag| */
 235   struct xref_info *xlink; /* pointer to the previous cross-reference */
 236 } xref_info;
 237 typedef xref_info *xref_pointer;
 238
 239 @ @<Global...@>=
 240 xref_info xmem[max_refs]; /* contains cross-reference information */
 241 xref_pointer xmem_end = xmem+max_refs-1;
 242 xref_pointer xref_ptr; /* the largest occupied position in |xmem| */
 243 sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */
 244
 245 @ A section that is used for multi-file output (with the \.{@@(} feature)
 246 has a special first cross-reference whose |num| field is |file_flag|.
 247
 248 @d file_flag (3*cite_flag)
 249 @d def_flag (2*cite_flag)
 250 @d cite_flag 10240 /* must be strictly larger than |max_sections| */
 251 @d xref equiv_or_xref
 252
 253 @<Set init...@>=
 254 xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0;
 255 xmem->num=0; /* sentinel value */
 256
 257 @ A new cross-reference for an identifier is formed by calling |new_xref|,
 258 which discards duplicate entries and ignores non-underlined references
 259 to one-letter identifiers or \CEE/'s reserved words.
 260
 261 If the user has sent the |no_xref| flag (the \.{-x} option of the command line),
 262 it is unnecessary to keep track of cross-references for identifiers.
 263 If one were careful, one could probably make more changes around section
 264 100 to avoid a lot of identifier looking up.
 265
 266 @d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference");
 267   else (++xref_ptr)->num=c;
 268 @d no_xref (flags['x']==0)
 269 @d make_xrefs flags['x'] /* should cross references be output? */
 270 @d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1)
 271 @d unindexed(a) (a<res_wd_end && a->ilk>=custom)
 272       /* tells if uses of a name are to be indexed */
 273
 274 @c
 275 void
 276 new_xref(p)
 277 name_pointer p;
 278 {
 279   xref_pointer q; /* pointer to previous cross-reference */
 280   sixteen_bits m, n; /* new and previous cross-reference value */
 281   if (no_xref) return;
 282   if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return;
 283   m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref;
 284   if (q != xmem) {
 285     n=q->num;
 286     if (n==m || n==m+def_flag) return;
 287     else if (m==n+def_flag) {
 288         q->num=m; return;
 289     }
 290   }
 291   append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr;
 292 }
 293
 294 @ The cross-reference lists for section names are slightly different.
 295 Suppose that a section name is defined in sections $m_1$, \dots,
 296 $m_k$, cited in sections $n_1$, \dots, $n_l$, and used in sections
 297 $p_1$, \dots, $p_j$.  Then its list will contain $m_1+|def_flag|$,
 298 \dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, \dots,
 299 $n_l+|cite_flag|$, $p_1$, \dots, $p_j$, in this order.
 300
 301 Although this method of storage takes quadratic time with respect to
 302 the length of the list, under foreseeable uses of \.{CWEAVE} this inefficiency
 303 is insignificant.
 304
 305 @c
 306 void
 307 new_section_xref(p)
 308 name_pointer p;
 309 {
 310   xref_pointer q,r; /* pointers to previous cross-references */
 311   q=(xref_pointer)p->xref; r=xmem;
 312   if (q>xmem)
 313         while (q->num>section_xref_switch) {r=q; q=q->xlink;}
 314   if (r->num==section_count+section_xref_switch)
 315         return; /* don't duplicate entries */
 316   append_xref(section_count+section_xref_switch);
 317   xref_ptr->xlink=q; section_xref_switch=0;
 318   if (r==xmem) p->xref=(char*)xref_ptr;
 319   else r->xlink=xref_ptr;
 320 }
 321
 322 @ The cross-reference list for a section name may also begin with
 323 |file_flag|. Here's how that flag gets put~in.
 324
 325 @c
 326 void
 327 set_file_flag(p)
 328 name_pointer p;
 329 {
 330   xref_pointer q;
 331   q=(xref_pointer)p->xref;
 332   if (q->num==file_flag) return;
 333   append_xref(file_flag);
 334   xref_ptr->xlink = q;
 335   p->xref = (char *)xref_ptr;
 336 }
 337
 338 @ A third large area of memory is used for sixteen-bit `tokens', which appear
 339 in short lists similar to the strings of characters in |byte_mem|. Token lists
 340 are used to contain the result of \CEE/ code translated into \TEX/ form;
 341 further details about them will be explained later. A |text_pointer| variable
 342 is an index into |tok_start|.
 343
 344 @<Typed...@>=
 345 typedef sixteen_bits token;
 346 typedef token *token_pointer;
 347 typedef token_pointer *text_pointer;
 348
 349 @ The first position of |tok_mem|
 350 that is unoccupied by replacement text is called |tok_ptr|, and the first
 351 unused location of |tok_start| is called |text_ptr|.
 352 Thus, we usually have |*text_ptr==tok_ptr|.
 353
 354 @<Global...@>=
 355 token tok_mem[max_toks]; /* tokens */
 356 token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */
 357 token_pointer tok_start[max_texts]; /* directory into |tok_mem| */
 358 token_pointer tok_ptr; /* first unused position in |tok_mem| */
 359 text_pointer text_ptr; /* first unused position in |tok_start| */
 360 text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */
 361 token_pointer max_tok_ptr; /* largest value of |tok_ptr| */
 362 text_pointer max_text_ptr; /* largest value of |text_ptr| */
 363
 364 @ @<Set init...@>=
 365 tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;
 366 tok_start[1]=tok_mem+1;
 367 max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;
 368
 369 @ Here are the three procedures needed to complete |id_lookup|:
 370 @c
 371 int names_match(p,first,l,t)
 372 name_pointer p; /* points to the proposed match */
 373 char *first; /* position of first character of string */
 374 int l; /* length of identifier */
 375 eight_bits t; /* desired ilk */
 376 {
 377   if (length(p)!=l) return 0;
 378   if (p->ilk!=t && !(t==normal && abnormal(p))) return 0;
 379   return !strncmp(first,p->byte_start,l);
 380 }
 381
 382 void
 383 init_p(p,t)
 384 name_pointer p;
 385 eight_bits t;
 386 {
 387   p->ilk=t; p->xref=(char*)xmem;
 388 }
 389
 390 void
 391 init_node(p)
 392 name_pointer p;
 393 {
 394   p->xref=(char*)xmem;
 395 }
 396
 397 @ We have to get \CEE/'s
 398 reserved words into the hash table, and the simplest way to do this is
 399 to insert them every time \.{CWEAVE} is run.  Fortunately there are relatively
 400 few reserved words. (Some of these are not strictly ``reserved,'' but
 401 are defined in header files of the ISO Standard \CEE/ Library.)
 402 @^reserved words@>
 403
 404 @<Store all the reserved words@>=
 405 id_lookup("and",NULL,alfop);
 406 id_lookup("and_eq",NULL,alfop);
 407 id_lookup("asm",NULL,sizeof_like);
 408 id_lookup("auto",NULL,int_like);
 409 id_lookup("bitand",NULL,alfop);
 410 id_lookup("bitor",NULL,alfop);
 411 id_lookup("bool",NULL,raw_int);
 412 id_lookup("break",NULL,case_like);
 413 id_lookup("case",NULL,case_like);
 414 id_lookup("catch",NULL,catch_like);
 415 id_lookup("char",NULL,raw_int);
 416 id_lookup("class",NULL,struct_like);
 417 id_lookup("clock_t",NULL,raw_int);
 418 id_lookup("compl",NULL,alfop);
 419 id_lookup("const",NULL,const_like);
 420 id_lookup("const_cast",NULL,raw_int);
 421 id_lookup("continue",NULL,case_like);
 422 id_lookup("default",NULL,case_like);
 423 id_lookup("define",NULL,define_like);
 424 id_lookup("defined",NULL,sizeof_like);
 425 id_lookup("delete",NULL,delete_like);
 426 id_lookup("div_t",NULL,raw_int);
 427 id_lookup("do",NULL,do_like);
 428 id_lookup("double",NULL,raw_int);
 429 id_lookup("dynamic_cast",NULL,raw_int);
 430 id_lookup("elif",NULL,if_like);
 431 id_lookup("else",NULL,else_like);
 432 id_lookup("endif",NULL,if_like);
 433 id_lookup("enum",NULL,struct_like);
 434 id_lookup("error",NULL,if_like);
 435 id_lookup("explicit",NULL,int_like);
 436 id_lookup("export",NULL,int_like);
 437 id_lookup("extern",NULL,int_like);
 438 id_lookup("FILE",NULL,raw_int);
 439 id_lookup("float",NULL,raw_int);
 440 id_lookup("for",NULL,for_like);
 441 id_lookup("fpos_t",NULL,raw_int);
 442 id_lookup("friend",NULL,int_like);
 443 id_lookup("goto",NULL,case_like);
 444 id_lookup("if",NULL,if_like);
 445 id_lookup("ifdef",NULL,if_like);
 446 id_lookup("ifndef",NULL,if_like);
 447 id_lookup("include",NULL,if_like);
 448 id_lookup("inline",NULL,int_like);
 449 id_lookup("int",NULL,raw_int);
 450 id_lookup("jmp_buf",NULL,raw_int);
 451 id_lookup("ldiv_t",NULL,raw_int);
 452 id_lookup("line",NULL,if_like);
 453 id_lookup("long",NULL,raw_int);
 454 id_lookup("mutable",NULL,int_like);
 455 id_lookup("namespace",NULL,struct_like);
 456 id_lookup("new",NULL,new_like);
 457 id_lookup("not",NULL,alfop);
 458 id_lookup("not_eq",NULL,alfop);
 459 id_lookup("NULL",NULL,custom);
 460 id_lookup("offsetof",NULL,raw_int);
 461 id_lookup("operator",NULL,operator_like);
 462 id_lookup("or",NULL,alfop);
 463 id_lookup("or_eq",NULL,alfop);
 464 id_lookup("pragma",NULL,if_like);
 465 id_lookup("private",NULL,public_like);
 466 id_lookup("protected",NULL,public_like);
 467 id_lookup("ptrdiff_t",NULL,raw_int);
 468 id_lookup("public",NULL,public_like);
 469 id_lookup("register",NULL,int_like);
 470 id_lookup("reinterpret_cast",NULL,raw_int);
 471 id_lookup("return",NULL,case_like);
 472 id_lookup("short",NULL,raw_int);
 473 id_lookup("sig_atomic_t",NULL,raw_int);
 474 id_lookup("signed",NULL,raw_int);
 475 id_lookup("size_t",NULL,raw_int);
 476 id_lookup("sizeof",NULL,sizeof_like);
 477 id_lookup("static",NULL,int_like);
 478 id_lookup("static_cast",NULL,raw_int);
 479 id_lookup("struct",NULL,struct_like);
 480 id_lookup("switch",NULL,for_like);
 481 id_lookup("template",NULL,template_like);
 482 id_lookup("this",NULL,custom);
 483 id_lookup("throw",NULL,case_like);
 484 id_lookup("time_t",NULL,raw_int);
 485 id_lookup("try",NULL,else_like);
 486 id_lookup("typedef",NULL,typedef_like);
 487 id_lookup("typeid",NULL,raw_int);
 488 id_lookup("typename",NULL,struct_like);
 489 id_lookup("undef",NULL,if_like);
 490 id_lookup("union",NULL,struct_like);
 491 id_lookup("unsigned",NULL,raw_int);
 492 id_lookup("using",NULL,int_like);
 493 id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */
 494 id_lookup("va_list",NULL,raw_int); /* ditto */
 495 id_lookup("virtual",NULL,int_like);
 496 id_lookup("void",NULL,raw_int);
 497 id_lookup("volatile",NULL,const_like);
 498 id_lookup("wchar_t",NULL,raw_int);
 499 id_lookup("while",NULL,for_like);
 500 id_lookup("xor",NULL,alfop);
 501 id_lookup("xor_eq",NULL,alfop);
 502 res_wd_end=name_ptr;
 503 id_lookup("TeX",NULL,custom);
 504 id_lookup("make_pair",NULL,func_template);
 505
 506 @* Lexical scanning.
 507 Let us now consider the subroutines that read the \.{CWEB} source file
 508 and break it into meaningful units. There are four such procedures:
 509 One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a
 510 section; another passes over the \TEX/ text at the beginning of a
 511 section; the third passes over the \TEX/ text in a \CEE/ comment;
 512 and the last, which is the most interesting, gets the next token of
 513 a \CEE/ text.  They all use the pointers |limit| and |loc| into
 514 the line of input currently being studied.
 515
 516 @ Control codes in \.{CWEB}, which begin with `\.{@@}', are converted
 517 into a numeric code designed to simplify \.{CWEAVE}'s logic; for example,
 518 larger numbers are given to the control codes that denote more significant
 519 milestones, and the code of |new_section| should be the largest of
 520 all. Some of these numeric control codes take the place of |char|
 521 control codes that will not otherwise appear in the output of the
 522 scanning routines.
 523 @^ASCII code dependencies@>
 524
 525 @d ignore 00 /* control code of no interest to \.{CWEAVE} */
 526 @d verbatim 02 /* takes the place of extended ASCII \.{\char2} */
 527 @d begin_short_comment 03 /* \CPLUSPLUS/ short comment */
 528 @d begin_comment '\t' /* tab marks will not appear */
 529 @d underline '\n' /* this code will be intercepted without confusion */
 530 @d noop 0177 /* takes the place of ASCII delete */
 531 @d xref_roman 0203 /* control code for `\.{@@\^}' */
 532 @d xref_wildcard 0204 /* control code for `\.{@@:}' */
 533 @d xref_typewriter 0205 /* control code for `\.{@@.}' */
 534 @d TeX_string 0206 /* control code for `\.{@@t}' */
 535 @f TeX_string TeX
 536 @d ord 0207 /* control code for `\.{@@'}' */
 537 @d join 0210 /* control code for `\.{@@\&}' */
 538 @d thin_space 0211 /* control code for `\.{@@,}' */
 539 @d math_break 0212 /* control code for `\.{@@\v}' */
 540 @d line_break 0213 /* control code for `\.{@@/}' */
 541 @d big_line_break 0214 /* control code for `\.{@@\#}' */
 542 @d no_line_break 0215 /* control code for `\.{@@+}' */
 543 @d pseudo_semi 0216 /* control code for `\.{@@;}' */
 544 @d macro_arg_open 0220 /* control code for `\.{@@[}' */
 545 @d macro_arg_close 0221 /* control code for `\.{@@]}' */
 546 @d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */
 547 @d translit_code 0223 /* control code for `\.{@@l}' */
 548 @d output_defs_code 0224 /* control code for `\.{@@h}' */
 549 @d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */
 550 @d definition 0226 /* control code for `\.{@@d}' */
 551 @d begin_C 0227 /* control code for `\.{@@c}' */
 552 @d section_name 0230 /* control code for `\.{@@<}' */
 553 @d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */
 554
 555 @ Control codes are converted to \.{CWEAVE}'s internal
 556 representation by means of the table |ccode|.
 557
 558 @<Global...@>=
 559 eight_bits ccode[256]; /* meaning of a char following \.{@@} */
 560
 561 @ @<Set ini...@>=
 562 {int c; for (c=0; c<256; c++) ccode[c]=0;}
 563 ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
 564    =ccode['*']=new_section;
 565 ccode['@@']='@@'; /* `quoted' at sign */
 566 ccode['=']=verbatim;
 567 ccode['d']=ccode['D']=definition;
 568 ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
 569 ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
 570 ccode['t']=ccode['T']=TeX_string;
 571 ccode['l']=ccode['L']=translit_code;
 572 ccode['q']=ccode['Q']=noop;
 573 ccode['h']=ccode['H']=output_defs_code;
 574 ccode['&']=join; ccode['<']=ccode['(']=section_name;
 575 ccode['!']=underline; ccode['^']=xref_roman;
 576 ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;
 577 ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;
 578 ccode['+']=no_line_break; ccode[';']=pseudo_semi;
 579 ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;
 580 ccode['\'']=ord;
 581 @<Special control codes for debugging@>@;
 582
 583 @ Users can write
 584 \.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on,
 585 and off, respectively.
 586
 587 @<Special control codes...@>=
 588 ccode['0']=ccode['1']=ccode['2']=trace;
 589
 590 @ The |skip_limbo| routine is used on the first pass to skip through
 591 portions of the input that are not in any sections, i.e., that precede
 592 the first section. After this procedure has been called, the value of
 593 |input_has_ended| will tell whether or not a section has actually been found.
 594
 595 There's a complication that we will postpone until later: If the \.{@@s}
 596 operation appears in limbo, we want to use it to adjust the default
 597 interpretation of identifiers.
 598
 599 @<Predec...@>=
 600 void   skip_limbo();
 601
 602 @ @c
 603 void
 604 skip_limbo() {
 605   while(1) {
 606     if (loc>limit && get_line()==0) return;
 607     *(limit+1)='@@';
 608     while (*loc!='@@') loc++; /* look for '@@', then skip two chars */
 609     if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++];
 610       if (c==new_section) return;
 611       if (c==noop) skip_restricted();
 612       else if (c==format_code) @<Process simple format in limbo@>;
 613     }
 614   }
 615 }
 616
 617 @ The |skip_TeX| routine is used on the first pass to skip through
 618 the \TEX/ code at the beginning of a section. It returns the next
 619 control code or `\.{\v}' found in the input. A |new_section| is
 620 assumed to exist at the very end of the file.
 621
 622 @f skip_TeX TeX
 623
 624 @c
 625 unsigned
 626 skip_TeX() /* skip past pure \TEX/ code */
 627 {
 628   while (1) {
 629     if (loc>limit && get_line()==0) return(new_section);
 630     *(limit+1)='@@';
 631     while (*loc!='@@' && *loc!='|') loc++;
 632     if (*loc++ =='|') return('|');
 633     if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
 634   }
 635 }
 636
 637 @*1 Inputting the next token.
 638 As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the
 639 |get_next| function that inputs the next token of \CEE/ input. However,
 640 |get_next| is not especially complicated.
 641
 642 The result of |get_next| is either a |char| code for some special character,
 643 or it is a special code representing a pair of characters (e.g., `\.{!=}'),
 644 or it is the numeric value computed by the |ccode|
 645 table, or it is one of the following special codes:
 646
 647 \yskip\hang |identifier|: In this case the global variables |id_first| and
 648 |id_loc| will have been set to the beginning and ending-plus-one locations
 649 in the buffer, as required by the |id_lookup| routine.
 650
 651 \yskip\hang |string|: The string will have been copied into the array
 652 |section_text|; |id_first| and |id_loc| are set as above (now they are
 653 pointers into |section_text|).
 654
 655 \yskip\hang |constant|: The constant is copied into |section_text|, with
 656 slight modifications; |id_first| and |id_loc| are set.
 657
 658 \yskip\noindent Furthermore, some of the control codes cause
 659 |get_next| to take additional actions:
 660
 661 \yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,
 662 |verbatim|: The values of |id_first| and |id_loc| will have been set to
 663 the beginning and ending-plus-one locations in the buffer.
 664
 665 \yskip\hang |section_name|: In this case the global variable |cur_section| will
 666 point to the |byte_start| entry for the section name that has just been scanned.
 667 The value of |cur_section_char| will be |'('| if the section name was
 668 preceded by \.{@@(} instead of \.{@@<}.
 669
 670 \yskip\noindent If |get_next| sees `\.{@@!}'
 671 it sets |xref_switch| to |def_flag| and goes on to the next token.
 672
 673 @d constant 0200 /* \CEE/ constant */
 674 @d string 0201 /* \CEE/ string */
 675 @d identifier 0202 /* \CEE/ identifier or reserved word */
 676
 677 @<Global...@>=
 678 name_pointer cur_section; /* name of section just scanned */
 679 char cur_section_char; /* the character just before that name */
 680
 681 @ @<Include...@>=
 682 #include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
 683 #include <stdlib.h> /* definition of |exit| */
 684
 685 @ As one might expect, |get_next| consists mostly of a big switch
 686 that branches to the various special cases that can arise.
 687 \CEE/ allows underscores to appear in identifiers, and some \CEE/
 688 compilers even allow the dollar sign.
 689
 690 @d isxalpha(c) ((c)=='_' || (c)=='$')
 691    /* non-alpha characters allowed in identifier */
 692 @d ishigh(c) ((eight_bits)(c)>0177)
 693 @^high-bit character handling@>
 694
 695 @<Predecl...@>=
 696 eight_bits get_next();
 697
 698 @ @c
 699 eight_bits
 700 get_next() /* produces the next input token */
 701 {@+eight_bits c; /* the current character */
 702   while (1) {
 703     @<Check if we're at the end of a preprocessor command@>;
 704     if (loc>limit && get_line()==0) return(new_section);
 705     c=*(loc++);
 706     if (xisdigit(c) || c=='.') @<Get a constant@>@;
 707     else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@|
 708            || (c=='<' && sharp_include_line==1))
 709         @<Get a string@>@;
 710     else if (xisalpha(c) || isxalpha(c) || ishigh(c))
 711       @<Get an identifier@>@;
 712     else if (c=='@@') @<Get control code and possible section name@>@;
 713     else if (xisspace(c)) continue; /* ignore spaces and tabs */
 714     if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>;
 715     mistake: @<Compress two-symbol operator@>@;
 716     return(c);
 717   }
 718 }
 719
 720 @ Because preprocessor commands do not fit in with the rest of the syntax
 721 of \CEE/,
 722 we have to deal with them separately.  One solution is to enclose such
 723 commands between special markers.  Thus, when a \.\# is seen as the
 724 first character of a line, |get_next| returns a special code
 725 |left_preproc| and raises a flag |preprocessing|.
 726
 727 We can use the same internal code number for |left_preproc| as we do
 728 for |ord|, since |get_next| changes |ord| into a string.
 729
 730 @d left_preproc ord /* begins a preprocessor command */
 731 @d right_preproc 0217 /* ends a preprocessor command */
 732
 733 @<Glob...@>=
 734 boolean preprocessing=0; /* are we scanning a preprocessor command? */
 735
 736 @ @<Raise prep...@>= {
 737   preprocessing=1;
 738   @<Check if next token is |include|@>;
 739   return (left_preproc);
 740 }
 741
 742 @ An additional complication is the freakish use of \.< and \.> to delimit
 743 a file name in lines that start with \.{\#include}.  We must treat this file
 744 name as a string.
 745
 746 @<Glob...@>=
 747 boolean sharp_include_line=0; /* are we scanning a |#include| line? */
 748
 749 @ @<Check if next token is |include|@>=
 750 while (loc<=buffer_end-7 && xisspace(*loc)) loc++;
 751 if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;
 752
 753 @ When we get to the end of a preprocessor line,
 754 we lower the flag and send a code |right_preproc|, unless
 755 the last character was a \.\\.
 756
 757 @<Check if we're at...@>=
 758   while (loc==limit-1 && preprocessing && *loc=='\\')
 759     if (get_line()==0) return(new_section); /* still in preprocessor mode */
 760   if (loc>=limit && preprocessing) {
 761     preprocessing=sharp_include_line=0;
 762     return(right_preproc);
 763   }
 764
 765 @ The following code assigns values to the combinations \.{++},
 766 \.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and
 767 \.{\&\&}, and to the \CPLUSPLUS/
 768 combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
 769 The compound assignment operators (e.g., \.{+=}) are
 770 treated as separate tokens.
 771
 772 @d compress(c) if (loc++<=limit) return(c)
 773
 774 @<Compress tw...@>=
 775 switch(c) {
 776   case '/': if (*loc=='*') {compress(begin_comment);}
 777     else if (*loc=='/') compress(begin_short_comment); break;
 778   case '+': if (*loc=='+') compress(plus_plus); break;
 779   case '-': if (*loc=='-') {compress(minus_minus);}
 780     else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
 781                         else compress(minus_gt); break;
 782   case '.': if (*loc=='*') {compress(period_ast);}
 783             else if (*loc=='.' && *(loc+1)=='.') {
 784               loc++; compress(dot_dot_dot);
 785             }
 786             break;
 787   case ':': if (*loc==':') compress(colon_colon); break;
 788   case '=': if (*loc=='=') compress(eq_eq); break;
 789   case '>': if (*loc=='=') {compress(gt_eq);}
 790     else if (*loc=='>') compress(gt_gt); break;
 791   case '<': if (*loc=='=') {compress(lt_eq);}
 792     else if (*loc=='<') compress(lt_lt); break;
 793   case '&': if (*loc=='&') compress(and_and); break;
 794   case '|': if (*loc=='|') compress(or_or); break;
 795   case '!': if (*loc=='=') compress(not_eq); break;
 796 }
 797
 798 @ @<Get an identifier@>= {
 799   id_first=--loc;
 800   while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
 801   id_loc=loc; return(identifier);
 802 }
 803
 804 @ Different conventions are followed by \TEX/ and \CEE/ to express octal
 805 and hexadecimal numbers; it is reasonable to stick to each convention
 806 within its realm.  Thus the \CEE/ part of a \.{CWEB} file has octals
 807 introduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will print
 808 with \TEX/ macros that the user can redefine to fit the context.
 809 In order to simplify such macros, we replace some of the characters.
 810
 811 Notice that in this section and the next, |id_first| and |id_loc|
 812 are pointers into the array |section_text|, not into |buffer|.
 813
 814 @<Get a constant@>= {
 815   id_first=id_loc=section_text+1;
 816   if (*(loc-1)=='0') {
 817     if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++;
 818       while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */
 819     else if (xisdigit(*loc)) {*id_loc++='~';
 820       while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
 821     else goto dec; /* decimal constant */
 822   }
 823   else { /* decimal constant */
 824     if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
 825     dec: *id_loc++=*(loc-1);
 826     while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++;
 827     if (*loc=='e' || *loc=='E') { /* float constant */
 828       *id_loc++='_'; loc++;
 829       if (*loc=='+' || *loc=='-') *id_loc++=*loc++;
 830       while (xisdigit(*loc)) *id_loc++=*loc++;
 831     }
 832   }
 833   while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
 834          || *loc=='f' || *loc=='F') {
 835     *id_loc++='$'; *id_loc++=toupper(*loc); loc++;
 836   }
 837   return(constant);
 838 }
 839
 840 @ \CEE/ strings and character constants, delimited by double and single
 841 quotes, respectively, can contain newlines or instances of their own
 842 delimiters if they are protected by a backslash.  We follow this
 843 convention, but do not allow the string to be longer than |longest_name|.
 844
 845 @<Get a string@>= {
 846   char delim = c; /* what started the string */
 847   id_first = section_text+1;
 848   id_loc = section_text;
 849   if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';}
 850   *++id_loc=delim;
 851   if (delim=='L') { /* wide character constant */
 852     delim=*loc++; *++id_loc=delim;
 853   }
 854   if (delim=='<') delim='>'; /* for file names in |#include| lines */
 855   while (1) {
 856     if (loc>=limit) {
 857       if(*(limit-1)!='\\') {
 858         err_print("! String didn't end"); loc=limit; break;
 859 @.String didn't end@>
 860       }
 861       if(get_line()==0) {
 862         err_print("! Input ended in middle of string"); loc=buffer; break;
 863 @.Input ended in middle of string@>
 864       }
 865     }
 866     if ((c=*loc++)==delim) {
 867       if (++id_loc<=section_text_end) *id_loc=c;
 868       break;
 869     }
 870     if (c=='\\') if (loc>=limit) continue;
 871       else if (++id_loc<=section_text_end) {
 872         *id_loc = '\\'; c=*loc++;
 873       }
 874     if (++id_loc<=section_text_end) *id_loc=c;
 875   }
 876   if (id_loc>=section_text_end) {
 877     printf("\n! String too long: ");
 878 @.String too long@>
 879     term_write(section_text+1,25);
 880     printf("..."); mark_error;
 881   }
 882   id_loc++;
 883   return(string);
 884 }
 885
 886 @ After an \.{@@} sign has been scanned, the next character tells us
 887 whether there is more work to do.
 888
 889 @<Get control code and possible section name@>= {
 890   c=*loc++;
 891   switch(ccode[(eight_bits)c]) {
 892     case translit_code: err_print("! Use @@l in limbo only"); continue;
 893 @.Use @@l in limbo...@>
 894     case underline: xref_switch=def_flag; continue;
 895     case trace: tracing=c-'0'; continue;
 896     case xref_roman: case xref_wildcard: case xref_typewriter:
 897     case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c);
 898     case section_name:
 899       @<Scan the section name and make |cur_section| point to it@>;
 900     case verbatim: @<Scan a verbatim string@>;
 901     case ord: @<Get a string@>;
 902     default: return(ccode[(eight_bits)c]);
 903   }
 904 }
 905
 906 @ The occurrence of a section name sets |xref_switch| to zero,
 907 because the section name might (for example) follow \&{int}.
 908
 909 @<Scan the section name...@>= {
 910   char *k; /* pointer into |section_text| */
 911   cur_section_char=*(loc-1);
 912   @<Put section name into |section_text|@>;
 913   if (k-section_text>3 && strncmp(k-2,"...",3)==0)
 914         cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */
 915   else cur_section=section_lookup(section_text+1,k,0);
 916   xref_switch=0; return(section_name);
 917 }
 918
 919 @ Section names are placed into the |section_text| array with consecutive spaces,
 920 tabs, and carriage-returns replaced by single spaces. There will be no
 921 spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
 922 this, since the |section_lookup| routine uses |section_text[1]| as the first
 923 character of the name.)
 924
 925 @<Set init...@>=section_text[0]=' ';
 926
 927 @ @<Put section name...@>=
 928 k=section_text;
 929 while (1) {
 930   if (loc>limit && get_line()==0) {
 931     err_print("! Input ended in section name");
 932 @.Input ended in section name@>
 933     loc=buffer+1; break;
 934   }
 935   c=*loc;
 936   @<If end of name or erroneous control code, |break|@>;
 937   loc++; if (k<section_text_end) k++;
 938   if (xisspace(c)) {
 939     c=' '; if (*(k-1)==' ') k--;
 940   }
 941 *k=c;
 942 }
 943 if (k>=section_text_end) {
 944   printf("\n! Section name too long: ");
 945 @.Section name too long@>
 946   term_write(section_text+1,25);
 947   printf("..."); mark_harmless;
 948 }
 949 if (*k==' ' && k>section_text) k--;
 950
 951 @ @<If end of name...@>=
 952 if (c=='@@') {
 953   c=*(loc+1);
 954   if (c=='>') {
 955     loc+=2; break;
 956   }
 957   if (ccode[(eight_bits)c]==new_section) {
 958     err_print("! Section name didn't end"); break;
 959 @.Section name didn't end@>
 960   }
 961   if (c!='@@') {
 962     err_print("! Control codes are forbidden in section name"); break;
 963 @.Control codes are forbidden...@>
 964   }
 965   *(++k)='@@'; loc++; /* now |c==*loc| again */
 966 }
 967
 968 @ This function skips over a restricted context at relatively high speed.
 969
 970 @<Predecl...@>=
 971 void skip_restricted();
 972
 973 @ @c
 974 void
 975 skip_restricted()
 976 {
 977   id_first=loc; *(limit+1)='@@';
 978 false_alarm:
 979   while (*loc!='@@') loc++;
 980   id_loc=loc;
 981   if (loc++>limit) {
 982     err_print("! Control text didn't end"); loc=limit;
 983 @.Control text didn't end@>
 984   }
 985   else {
 986     if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;}
 987     if (*loc++!='>')
 988       err_print("! Control codes are forbidden in control text");
 989 @.Control codes are forbidden...@>
 990   }
 991 }
 992
 993 @ At the present point in the program we
 994 have |*(loc-1)==verbatim|; we set |id_first| to the beginning
 995 of the string itself, and |id_loc| to its ending-plus-one location in the
 996 buffer.  We also set |loc| to the position just after the ending delimiter.
 997
 998 @<Scan a verbatim string@>= {
 999   id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1000   while (*loc!='@@' || *(loc+1)!='>') loc++;
1001   if (loc>=limit) err_print("! Verbatim string didn't end");
1002 @.Verbatim string didn't end@>
1003   id_loc=loc; loc+=2;
1004   return (verbatim);
1005 }
1006
1007 @** Phase one processing.
1008 We now have accumulated enough subroutines to make it possible to carry out
1009 \.{CWEAVE}'s first pass over the source file. If everything works right,
1010 both phase one and phase two of \.{CWEAVE} will assign the same numbers to
1011 sections, and these numbers will agree with what \.{CTANGLE} does.
1012
1013 The global variable |next_control| often contains the most recent output of
1014 |get_next|; in interesting cases, this will be the control code that
1015 ended a section or part of a section.
1016
1017 @<Global...@>=
1018 eight_bits next_control; /* control code waiting to be acting upon */
1019
1020 @ The overall processing strategy in phase one has the following
1021 straightforward outline.
1022
1023 @<Predecl...@>=
1024 void phase_one();
1025
1026 @ @c
1027 void
1028 phase_one() {
1029   phase=1; reset_input(); section_count=0;
1030   skip_limbo(); change_exists=0;
1031   while (!input_has_ended)
1032     @<Store cross-reference data for the current section@>;
1033   changed_section[section_count]=change_exists;
1034     /* the index changes if anything does */
1035   phase=2; /* prepare for second phase */
1036   @<Print error messages about unused or undefined section names@>;
1037 }
1038
1039 @ @<Store cross-reference data...@>=
1040 {
1041   if (++section_count==max_sections) overflow("section number");
1042   changed_section[section_count]=changing;
1043      /* it will become 1 if any line changes */
1044   if (*(loc-1)=='*' && show_progress) {
1045     printf("*%d",section_count);
1046     update_terminal; /* print a progress report */
1047   }
1048   @<Store cross-references in the \TEX/ part of a section@>;
1049   @<Store cross-references in the definition part of a section@>;
1050   @<Store cross-references in the \CEE/ part of a section@>;
1051   if (changed_section[section_count]) change_exists=1;
1052 }
1053
1054 @ The |C_xref| subroutine stores references to identifiers in
1055 \CEE/ text material beginning with the current value of |next_control|
1056 and continuing until |next_control| is `\.\{' or `\.{\v}', or until the next
1057 ``milestone'' is passed (i.e., |next_control>=format_code|). If
1058 |next_control>=format_code| when |C_xref| is called, nothing will happen;
1059 but if |next_control=='|'| upon entry, the procedure assumes that this is
1060 the `\.{\v}' preceding \CEE/ text that is to be processed.
1061
1062 The parameter |spec_ctrl| is used to change this behavior. In most cases
1063 |C_xref| is called with |spec_ctrl==ignore|, which triggers the default
1064 processing described above. If |spec_ctrl==section_name|, section names will
1065 be gobbled. This is used when \CEE/ text in the \TEX/ part or inside comments
1066 is parsed: It allows for section names to appear in \pb, but these
1067 strings will not be entered into the cross reference lists since they are not
1068 definitions of section names.
1069
1070 The program uses the fact that our internal code numbers satisfy
1071 the relations |xref_roman==identifier+roman| and |xref_wildcard==identifier
1072 +wildcard| and |xref_typewriter==identifier+typewriter|,
1073 as well as |normal==0|.
1074
1075 @<Predecl...@>=
1076 void C_xref();
1077
1078 @ @c
1079 void
1080 C_xref( spec_ctrl ) /* makes cross-references for \CEE/ identifiers */
1081   eight_bits spec_ctrl;
1082 {
1083   name_pointer p; /* a referenced name */
1084   while (next_control<format_code || next_control==spec_ctrl) {
1085     if (next_control>=identifier && next_control<=xref_typewriter) {
1086       if (next_control>identifier) @<Replace |"@@@@"| by |"@@"| @>@;
1087       p=id_lookup(id_first, id_loc,next_control-identifier); new_xref(p);
1088     }
1089     if (next_control==section_name) {
1090       section_xref_switch=cite_flag;
1091       new_section_xref(cur_section);
1092     }
1093     next_control=get_next();
1094     if (next_control=='|' || next_control==begin_comment ||
1095         next_control==begin_short_comment) return;
1096   }
1097 }
1098
1099 @ The |outer_xref| subroutine is like |C_xref| except that it begins
1100 with |next_control!='|'| and ends with |next_control>=format_code|. Thus, it
1101 handles \CEE/ text with embedded comments.
1102
1103 @<Predecl...@>=
1104 void outer_xref();
1105
1106 @ @c
1107 void
1108 outer_xref() /* extension of |C_xref| */
1109 {
1110   int bal; /* brace level in comment */
1111   while (next_control<format_code)
1112     if (next_control!=begin_comment && next_control!=begin_short_comment)
1113       C_xref(ignore);
1114     else {
1115       boolean is_long_comment=(next_control==begin_comment);
1116       bal=copy_comment(is_long_comment,1); next_control='|';
1117       while (bal>0) {
1118         C_xref(section_name); /* do not reference section names in comments */
1119         if (next_control=='|') bal=copy_comment(is_long_comment,bal);
1120         else bal=0; /* an error message will occur in phase two */
1121       }
1122     }
1123 }
1124
1125 @ In the \TEX/ part of a section, cross-reference entries are made only for
1126 the identifiers in \CEE/ texts enclosed in \pb, or for control texts
1127 enclosed in \.{@@\^}$\,\ldots\,$\.{@@>} or \.{@@.}$\,\ldots\,$\.{@@>}
1128 or \.{@@:}$\,\ldots\,$\.{@@>}.
1129
1130 @<Store cross-references in the \T...@>=
1131 while (1) {
1132   switch (next_control=skip_TeX()) {
1133     case translit_code: err_print("! Use @@l in limbo only"); continue;
1134 @.Use @@l in limbo...@>
1135     case underline: xref_switch=def_flag; continue;
1136     case trace: tracing=*(loc-1)-'0'; continue;
1137     case '|': C_xref(section_name); break;
1138     case xref_roman: case xref_wildcard: case xref_typewriter:
1139     case noop: case section_name:
1140       loc-=2; next_control=get_next(); /* scan to \.{@@>} */
1141       if (next_control>=xref_roman && next_control<=xref_typewriter) {
1142         @<Replace |"@@@@"| by |"@@"| @>@;
1143         new_xref(id_lookup(id_first, id_loc,next_control-identifier));
1144       }
1145       break;
1146   }
1147   if (next_control>=format_code) break;
1148 }
1149
1150 @ @<Replace |"@@@@"| by |"@@"| @>=
1151 {
1152   char *src=id_first,*dst=id_first;
1153   while(src<id_loc){
1154     if(*src=='@@') src++;
1155     *dst++=*src++;
1156   }
1157   id_loc=dst;
1158   while (dst<src) *dst++=' '; /* clean up in case of error message display */
1159 }
1160
1161 @ During the definition and \CEE/ parts of a section, cross-references
1162 are made for all identifiers except reserved words. However, the right
1163 identifier in a format definition is not referenced, and the left
1164 identifier is referenced only if it has been explicitly
1165 underlined (preceded by \.{@@!}).
1166 The \TEX/ code in comments is, of course, ignored, except for
1167 \CEE/ portions enclosed in \pb; the text of a section name is skipped
1168 entirely, even if it contains \pb\ constructions.
1169
1170 The variables |lhs| and |rhs| point to the respective identifiers involved
1171 in a format definition.
1172
1173 @<Global...@>=
1174 name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */
1175 name_pointer res_wd_end; /* pointer to the first nonreserved identifier */
1176
1177 @ When we get to the following code we have |next_control>=format_code|.
1178
1179 @<Store cross-references in the d...@>=
1180 while (next_control<=definition) { /* |format_code| or |definition| */
1181   if (next_control==definition) {
1182     xref_switch=def_flag; /* implied \.{@@!} */
1183     next_control=get_next();
1184   } else @<Process a format definition@>;
1185   outer_xref();
1186 }
1187
1188 @ Error messages for improper format definitions will be issued in phase
1189 two. Our job in phase one is to define the |ilk| of a properly formatted
1190 identifier, and to remove cross-references to identifiers that we now
1191 discover should be unindexed.
1192
1193 @<Process a form...@>= {
1194   next_control=get_next();
1195   if (next_control==identifier) {
1196     lhs=id_lookup(id_first, id_loc,normal); lhs->ilk=normal;
1197     if (xref_switch) new_xref(lhs);
1198     next_control=get_next();
1199     if (next_control==identifier) {
1200       rhs=id_lookup(id_first, id_loc,normal);
1201       lhs->ilk=rhs->ilk;
1202       if (unindexed(lhs)) { /* retain only underlined entries */
1203         xref_pointer q,r=NULL;
1204         for (q=(xref_pointer)lhs->xref;q>xmem;q=q->xlink)
1205           if (q->num<def_flag)
1206             if (r) r->xlink=q->xlink;
1207             else lhs->xref=(char*)q->xlink;
1208           else r=q;
1209       }
1210       next_control=get_next();
1211     }
1212   }
1213 }
1214
1215 @ A much simpler processing of format definitions occurs when the
1216 definition is found in limbo.
1217
1218 @<Process simple format in limbo@>=
1219 {
1220   if (get_next()!=identifier)
1221     err_print("! Missing left identifier of @@s");
1222 @.Missing left identifier...@>
1223   else {
1224     lhs=id_lookup(id_first,id_loc,normal);
1225     if (get_next()!=identifier)
1226       err_print("! Missing right identifier of @@s");
1227 @.Missing right identifier...@>
1228     else {
1229       rhs=id_lookup(id_first,id_loc,normal);
1230       lhs->ilk=rhs->ilk;
1231     }
1232   }
1233 }
1234
1235 @ Finally, when the \TEX/ and definition parts have been treated, we have
1236 |next_control>=begin_C|.
1237
1238 @<Store cross-references in the \CEE/...@>=
1239 if (next_control<=section_name) {  /* |begin_C| or |section_name| */
1240   if (next_control==begin_C) section_xref_switch=0;
1241   else {
1242     section_xref_switch=def_flag;
1243     if(cur_section_char=='(' && cur_section!=name_dir)
1244       set_file_flag(cur_section);
1245   }
1246   do {
1247     if (next_control==section_name && cur_section!=name_dir)
1248       new_section_xref(cur_section);
1249     next_control=get_next(); outer_xref();
1250   } while ( next_control<=section_name);
1251 }
1252
1253 @ After phase one has looked at everything, we want to check that each
1254 section name was both defined and used.  The variable |cur_xref| will point
1255 to cross-references for the current section name of interest.
1256
1257 @<Global...@>=
1258 xref_pointer cur_xref; /* temporary cross-reference pointer */
1259 boolean an_output; /* did |file_flag| precede |cur_xref|? */
1260
1261 @ The following recursive procedure
1262 walks through the tree of section names and prints out anomalies.
1263 @^recursion@>
1264
1265 @<Predecl...@>=
1266 void section_check();
1267
1268 @ @c
1269 void
1270 section_check(p)
1271 name_pointer p; /* print anomalies in subtree |p| */
1272 {
1273   if (p) {
1274     section_check(p->llink);
1275     cur_xref=(xref_pointer)p->xref;
1276     if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
1277     else an_output=0;
1278     if (cur_xref->num <def_flag) {
1279       printf("\n! Never defined: <"); print_section_name(p); putchar('>'); mark_harmless;
1280 @.Never defined: <section name>@>
1281     }
1282     while (cur_xref->num >=cite_flag) cur_xref=cur_xref->xlink;
1283     if (cur_xref==xmem && !an_output) {
1284       printf("\n! Never used: <"); print_section_name(p); putchar('>'); mark_harmless;
1285 @.Never used: <section name>@>
1286     }
1287     section_check(p->rlink);
1288   }
1289 }
1290
1291 @ @<Print error messages about un...@>=section_check(root)
1292
1293 @* Low-level output routines.
1294 The \TEX/ output is supposed to appear in lines at most |line_length|
1295 characters long, so we place it into an output buffer. During the output
1296 process, |out_line| will hold the current line number of the line about to
1297 be output.
1298
1299 @<Global...@>=
1300 char out_buf[line_length+1]; /* assembled characters */
1301 char *out_ptr; /* just after last character in |out_buf| */
1302 char *out_buf_end = out_buf+line_length; /* end of |out_buf| */
1303 int out_line; /* number of next line to be output */
1304
1305 @ The |flush_buffer| routine empties the buffer up to a given breakpoint,
1306 and moves any remaining characters to the beginning of the next line.
1307 If the |per_cent| parameter is 1 a |'%'| is appended to the line
1308 that is being output; in this case the breakpoint |b| should be strictly
1309 less than |out_buf_end|. If the |per_cent| parameter is |0|,
1310 trailing blanks are suppressed.
1311 The characters emptied from the buffer form a new line of output;
1312 if the |carryover| parameter is true, a |"%"| in that line will be
1313 carried over to the next line (so that \TEX/ will ignore the completion
1314 of commented-out text).
1315
1316 @d c_line_write(c) fflush(active_file),fwrite(out_buf+1,sizeof(char),c,active_file)
1317 @d tex_putc(c) putc(c,active_file)
1318 @d tex_new_line putc('\n',active_file)
1319 @d tex_printf(c) fprintf(active_file,c)
1320
1321 @c
1322 void
1323 flush_buffer(b,per_cent,carryover)
1324 char *b;  /* outputs from |out_buf+1| to |b|,where |b<=out_ptr| */
1325 boolean per_cent,carryover;
1326 {
1327   char *j; j=b; /* pointer into |out_buf| */
1328   if (! per_cent) /* remove trailing blanks */
1329     while (j>out_buf && *j==' ') j--;
1330   c_line_write(j-out_buf);
1331   if (per_cent) tex_putc('%');
1332   tex_new_line; out_line++;
1333   if (carryover)
1334     while (j>out_buf)
1335       if (*j--=='%' && (j==out_buf || *j!='\\')) {
1336         *b--='%'; break;
1337       }
1338   if (b<out_ptr) strncpy(out_buf+1,b+1,out_ptr-b);
1339   out_ptr-=b-out_buf;
1340 }
1341
1342 @ When we are copying \TEX/ source material, we retain line breaks
1343 that occur in the input, except that an empty line is not
1344 output when the \TEX/ source line was nonempty. For example, a line
1345 of the \TEX/ file that contains only an index cross-reference entry
1346 will not be copied. The |finish_line| routine is called just before
1347 |get_line| inputs a new line, and just after a line break token has
1348 been emitted during the output of translated \CEE/ text.
1349
1350 @c
1351 void
1352 finish_line() /* do this at the end of a line */
1353 {
1354   char *k; /* pointer into |buffer| */
1355   if (out_ptr>out_buf) flush_buffer(out_ptr,0,0);
1356   else {
1357     for (k=buffer; k<=limit; k++)
1358       if (!(xisspace(*k))) return;
1359     flush_buffer(out_buf,0,0);
1360   }
1361 }
1362
1363 @ In particular, the |finish_line| procedure is called near the very
1364 beginning of phase two. We initialize the output variables in a slightly
1365 tricky way so that the first line of the output file will be
1366 `\.{\\input cwebmac}'.
1367
1368 @<Set init...@>=
1369 out_ptr=out_buf+1; out_line=1; active_file=tex_file;
1370 *out_ptr='c'; tex_printf("\\input cwebma");
1371
1372 @ When we wish to append one character |c| to the output buffer, we write
1373 `|out(c)|'; this will cause the buffer to be emptied if it was already
1374 full.  If we want to append more than one character at once, we say
1375 |out_str(s)|, where |s| is a string containing the characters.
1376
1377 A line break will occur at a space or after a single-nonletter
1378 \TEX/ control sequence.
1379
1380 @d out(c) {if (out_ptr>=out_buf_end) break_out(); *(++out_ptr)=c;}
1381
1382 @c
1383 void
1384 out_str(s) /* output characters from |s| to end of string */
1385 char *s;
1386 {
1387   while (*s) out(*s++);
1388 }
1389
1390 @ The |break_out| routine is called just before the output buffer is about
1391 to overflow. To make this routine a little faster, we initialize position
1392 0 of the output buffer to `\.\\'; this character isn't really output.
1393
1394 @<Set init...@>=
1395 out_buf[0]='\\';
1396
1397 @ A long line is broken at a blank space or just before a backslash that isn't
1398 preceded by another backslash. In the latter case, a |'%'| is output at
1399 the break.
1400
1401 @<Predecl...@>=
1402 void break_out();
1403
1404 @ @c
1405 void
1406 break_out() /* finds a way to break the output line */
1407 {
1408   char *k=out_ptr; /* pointer into |out_buf| */
1409   while (1) {
1410     if (k==out_buf) @<Print warning message, break the line, |return|@>;
1411     if (*k==' ') {
1412       flush_buffer(k,0,1); return;
1413     }
1414     if (*(k--)=='\\' && *k!='\\') { /* we've decreased |k| */
1415       flush_buffer(k,1,1); return;
1416     }
1417   }
1418 }
1419
1420 @ We get to this section only in the unusual case that the entire output line
1421 consists of a string of backslashes followed by a string of nonblank
1422 non-backslashes. In such cases it is almost always safe to break the
1423 line by putting a |'%'| just before the last character.
1424
1425 @<Print warning message...@>=
1426 {
1427   printf("\n! Line had to be broken (output l. %d):\n",out_line);
1428 @.Line had to be broken@>
1429   term_write(out_buf+1, out_ptr-out_buf-1);
1430   new_line; mark_harmless;
1431   flush_buffer(out_ptr-1,1,1); return;
1432 }
1433
1434 @ Here is a macro that outputs a section number in decimal notation.
1435 The number to be converted by |out_section| is known to be less than
1436 |def_flag|, so it cannot have more than five decimal digits.  If
1437 the section is changed, we output `\.{\\*}' just after the number.
1438
1439 @c
1440 void
1441 out_section(n)
1442 sixteen_bits n;
1443 {
1444   char s[6];
1445   sprintf(s,"%d",n); out_str(s);
1446   if(changed_section[n]) out_str ("\\*");
1447 @.\\*@>
1448 }
1449
1450 @ The |out_name| procedure is used to output an identifier or index
1451 entry, enclosing it in braces.
1452
1453 @c
1454 void
1455 out_name(p,quote_xalpha)
1456 name_pointer p;
1457 boolean quote_xalpha;
1458 {
1459   char *k, *k_end=(p+1)->byte_start; /* pointers into |byte_mem| */
1460   out('{');
1461   for (k=p->byte_start; k<k_end; k++) {
1462     if (isxalpha(*k) && quote_xalpha) out('\\');
1463 @.\\\$@>
1464 @.\\\_@>
1465     out(*k);
1466   }
1467   out('}');
1468 }
1469
1470 @* Routines that copy \TEX/ material.
1471 During phase two, we use subroutines |copy_limbo|, |copy_TeX|, and
1472 |copy_comment| in place of the analogous |skip_limbo|, |skip_TeX|, and
1473 |skip_comment| that were used in phase one. (Well, |copy_comment|
1474 was actually written in such a way that it functions as |skip_comment|
1475 in phase one.)
1476
1477 The |copy_limbo| routine, for example, takes \TEX/ material that is not
1478 part of any section and transcribes it almost verbatim to the output file.
1479 The use of `\.{@@}' signs is severely restricted in such material:
1480 `\.{@@@@}' pairs are replaced by singletons; `\.{@@l}' and `\.{@@q}' and
1481 `\.{@@s}' are interpreted.
1482
1483 @c
1484 void
1485 copy_limbo()
1486 {
1487   char c;
1488   while (1) {
1489     if (loc>limit && (finish_line(), get_line()==0)) return;
1490     *(limit+1)='@@';
1491     while (*loc!='@@') out(*(loc++));
1492     if (loc++<=limit) {
1493       c=*loc++;
1494       if (ccode[(eight_bits)c]==new_section) break;
1495       switch (ccode[(eight_bits)c]) {
1496         case translit_code: out_str("\\ATL"); break;
1497 @.\\ATL@>
1498         case '@@': out('@@'); break;
1499         case noop: skip_restricted(); break;
1500         case format_code: if (get_next()==identifier) get_next();
1501           if (loc>=limit) get_line(); /* avoid blank lines in output */
1502           break; /* the operands of \.{@@s} are ignored on this pass */
1503         default: err_print("! Double @@ should be used in limbo");
1504 @.Double @@ should be used...@>
1505         out('@@');
1506       }
1507     }
1508   }
1509 }
1510
1511 @ The |copy_TeX| routine processes the \TEX/ code at the beginning of a
1512 section; for example, the words you are now reading were copied in this
1513 way. It returns the next control code or `\.{\v}' found in the input.
1514 We don't copy spaces or tab marks into the beginning of a line. This
1515 makes the test for empty lines in |finish_line| work.
1516
1517 @ @f copy_TeX TeX
1518 @c
1519 eight_bits
1520 copy_TeX()
1521 {
1522   char c; /* current character being copied */
1523   while (1) {
1524     if (loc>limit && (finish_line(), get_line()==0)) return(new_section);
1525     *(limit+1)='@@';
1526     while ((c=*(loc++))!='|' && c!='@@') {
1527       out(c);
1528       if (out_ptr==out_buf+1 && (xisspace(c))) out_ptr--;
1529     }
1530     if (c=='|') return('|');
1531     if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
1532   }
1533 }
1534
1535 @ The |copy_comment| function issues a warning if more braces are opened than
1536 closed, and in the case of a more serious error it supplies enough
1537 braces to keep \TEX/ from complaining about unbalanced braces.
1538 Instead of copying the \TEX/ material
1539 into the output buffer, this function copies it into the token memory
1540 (in phase two only).
1541 The abbreviation |app_tok(t)| is used to append token |t| to the current
1542 token list, and it also makes sure that it is possible to append at least
1543 one further token without overflow.
1544
1545 @d app_tok(c) {if (tok_ptr+2>tok_mem_end) overflow("token"); *(tok_ptr++)=c;}
1546
1547 @<Predec...@>=
1548 int copy_comment();
1549
1550 @ @c
1551 int copy_comment(is_long_comment,bal) /* copies \TEX/ code in comments */
1552 boolean is_long_comment; /* is this a traditional \CEE/ comment? */
1553 int bal; /* brace balance */
1554 {
1555   char c; /* current character being copied */
1556   while (1) {
1557     if (loc>limit) {
1558       if (is_long_comment) {
1559         if (get_line()==0) {
1560           err_print("! Input ended in mid-comment");
1561 @.Input ended in mid-comment@>
1562           loc=buffer+1; goto done;
1563         }
1564       }
1565       else {
1566         if (bal>1) err_print("! Missing } in comment");
1567 @.Missing \} in comment@>
1568         goto done;
1569       }
1570     }
1571     c=*(loc++);
1572     if (c=='|') return(bal);
1573     if (is_long_comment) @<Check for end of comment@>;
1574     if (phase==2) {
1575       if (ishigh(c)) app_tok(quoted_char);
1576       app_tok(c);
1577     }
1578     @<Copy special things when |c=='@@', '\\'|@>;
1579     if (c=='{') bal++;
1580     else if (c=='}') {
1581       if(bal>1) bal--;
1582       else {err_print("! Extra } in comment");
1583 @.Extra \} in comment@>
1584         if (phase==2) tok_ptr--;
1585       }
1586     }
1587   }
1588 done:@<Clear |bal| and |return|@>;
1589 }
1590
1591 @ @<Check for end of comment@>=
1592 if (c=='*' && *loc=='/') {
1593   loc++;
1594   if (bal>1) err_print("! Missing } in comment");
1595 @.Missing \} in comment@>
1596   goto done;
1597 }
1598
1599 @ @<Copy special things when |c=='@@'...@>=
1600 if (c=='@@') {
1601   if (*(loc++)!='@@') {
1602     err_print("! Illegal use of @@ in comment");
1603 @.Illegal use of @@...@>
1604     loc-=2; if (phase==2) *(tok_ptr-1)=' '; goto done;
1605   }
1606 }
1607 else if (c=='\\' && *loc!='@@')
1608   if (phase==2) app_tok(*(loc++)) else loc++;
1609
1610 @ We output
1611 enough right braces to keep \TEX/ happy.
1612
1613 @<Clear |bal|...@>=
1614 if (phase==2) while (bal-- >0) app_tok('}');
1615 return(0);
1616
1617 @** Parsing.
1618 The most intricate part of \.{CWEAVE} is its mechanism for converting
1619 \CEE/-like code into \TEX/ code, and we might as well plunge into this
1620 aspect of the program now. A ``bottom up'' approach is used to parse the
1621 \CEE/-like material, since \.{CWEAVE} must deal with fragmentary
1622 constructions whose overall ``part of speech'' is not known.
1623
1624 At the lowest level, the input is represented as a sequence of entities
1625 that we shall call {\it scraps}, where each scrap of information consists
1626 of two parts, its {\it category} and its {\it translation}. The category
1627 is essentially a syntactic class, and the translation is a token list that
1628 represents \TEX/ code. Rules of syntax and semantics tell us how to
1629 combine adjacent scraps into larger ones, and if we are lucky an entire
1630 \CEE/ text that starts out as hundreds of small scraps will join
1631 together into one gigantic scrap whose translation is the desired \TEX/
1632 code. If we are unlucky, we will be left with several scraps that don't
1633 combine; their translations will simply be output, one by one.
1634
1635 The combination rules are given as context-sensitive productions that are
1636 applied from left to right. Suppose that we are currently working on the
1637 sequence of scraps $s_1\,s_2\ldots s_n$. We try first to find the longest
1638 production that applies to an initial substring $s_1\,s_2\ldots\,$; but if
1639 no such productions exist, we try to find the longest production
1640 applicable to the next substring $s_2\,s_3\ldots\,$; and if that fails, we
1641 try to match $s_3\,s_4\ldots\,$, etc.
1642
1643 A production applies if the category codes have a given pattern. For
1644 example, one of the productions (see rule~3) is
1645 $$\hbox{|exp| }\left\{\matrix{\hbox{|binop|}\cr\hbox{|ubinop|}}\right\}
1646 \hbox{ |exp| }\RA\hbox{ |exp|}$$
1647 and it means that three consecutive scraps whose respective categories are
1648 |exp|, |binop| (or |ubinop|),
1649 and |exp| are converted to one scrap whose category
1650 is |exp|.  The translations of the original
1651 scraps are simply concatenated.  The case of
1652 $$\hbox{|exp| |comma| |exp| $\RA$ |exp|} \hskip4emE_1C\,\\{opt}9\,E_2$$
1653 (rule 4) is only slightly more complicated:
1654 Here the resulting |exp| translation
1655 consists not only of the three original translations, but also of the
1656 tokens |opt| and 9 between the translations of the
1657 |comma| and the following |exp|.
1658 In the \TEX/ file, this will specify an optional line break after the
1659 comma, with penalty 90.
1660
1661 At each opportunity the longest possible production is applied.  For
1662 example, if the current sequence of scraps is |int_like| |cast|
1663 |lbrace|, rule 31 is applied; but if the sequence is |int_like| |cast|
1664 followed by anything other than |lbrace|, rule 32 takes effect.
1665
1666 Translation rules such as `$E_1C\,\\{opt}9\,E_2$' above use subscripts
1667 to distinguish between translations of scraps whose categories have the
1668 same initial letter; these subscripts are assigned from left to right.
1669
1670 @ Here is a list of the category codes that scraps can have.
1671 (A few others, like |int_like|, have already been defined; the
1672 |cat_name| array contains a complete list.)
1673
1674 @d exp 1 /* denotes an expression, including perhaps a single identifier */
1675 @d unop 2 /* denotes a unary operator */
1676 @d binop 3 /* denotes a binary operator */
1677 @d ubinop 4
1678   /* denotes an operator that can be unary or binary, depending on context */
1679 @d cast 5 /* denotes a cast */
1680 @d question 6 /* denotes a question mark and possibly the expressions flanking it */
1681 @d lbrace 7 /* denotes a left brace */
1682 @d rbrace 8 /* denotes a right brace */
1683 @d decl_head 9 /* denotes an incomplete declaration */
1684 @d comma 10 /* denotes a comma */
1685 @d lpar 11 /* denotes a left parenthesis or left bracket */
1686 @d rpar 12 /* denotes a right parenthesis or right bracket */
1687 @d prelangle 13 /* denotes `$<$' before we know what it is */
1688 @d prerangle 14 /* denotes `$>$' before we know what it is */
1689 @d langle 15 /* denotes `$<$' when it's used as angle bracket in a template */
1690 @d colcol 18 /* denotes `::' */
1691 @d base 19 /* denotes a colon that introduces a base specifier */
1692 @d decl 20 /* denotes a complete declaration */
1693 @d struct_head 21 /* denotes the beginning of a structure specifier */
1694 @d stmt 23 /* denotes a complete statement */
1695 @d function 24 /* denotes a complete function */
1696 @d fn_decl 25 /* denotes a function declarator */
1697 @d semi 27 /* denotes a semicolon */
1698 @d colon 28 /* denotes a colon */
1699 @d tag 29 /* denotes a statement label */
1700 @d if_head 30 /* denotes the beginning of a compound conditional */
1701 @d else_head 31 /* denotes a prefix for a compound statement */
1702 @d if_clause 32 /* pending \.{if} together with a condition */
1703 @d lproc 35 /* begins a preprocessor command */
1704 @d rproc 36 /* ends a preprocessor command */
1705 @d insert 37 /* a scrap that gets combined with its neighbor */
1706 @d section_scrap 38 /* section name */
1707 @d dead 39 /* scrap that won't combine */
1708 @d ftemplate 59 /* \\{make\_pair} */
1709 @d new_exp 60 /* \&{new} and a following type identifier */
1710 @d begin_arg 61 /* \.{@@[} */
1711 @d end_arg 62 /* \.{@@]} */
1712
1713 @<Glo...@>=
1714 char cat_name[256][12];
1715 eight_bits cat_index;
1716
1717 @ @<Set in...@>=
1718     for (cat_index=0;cat_index<255;cat_index++)
1719       strcpy(cat_name[cat_index],"UNKNOWN");
1720 @.UNKNOWN@>
1721     strcpy(cat_name[exp],"exp");
1722     strcpy(cat_name[unop],"unop");
1723     strcpy(cat_name[binop],"binop");
1724     strcpy(cat_name[ubinop],"ubinop");
1725     strcpy(cat_name[cast],"cast");
1726     strcpy(cat_name[question],"?");
1727     strcpy(cat_name[lbrace],"{"@q}@>);
1728     strcpy(cat_name[rbrace],@q{@>"}");
1729     strcpy(cat_name[decl_head],"decl_head");
1730     strcpy(cat_name[comma],",");
1731     strcpy(cat_name[lpar],"(");
1732     strcpy(cat_name[rpar],")");
1733     strcpy(cat_name[prelangle],"<");
1734     strcpy(cat_name[prerangle],">");
1735     strcpy(cat_name[langle],"\\<");
1736     strcpy(cat_name[colcol],"::");
1737     strcpy(cat_name[base],"\\:");
1738     strcpy(cat_name[decl],"decl");
1739     strcpy(cat_name[struct_head],"struct_head");
1740     strcpy(cat_name[alfop],"alfop");
1741     strcpy(cat_name[stmt],"stmt");
1742     strcpy(cat_name[function],"function");
1743     strcpy(cat_name[fn_decl],"fn_decl");
1744     strcpy(cat_name[else_like],"else_like");
1745     strcpy(cat_name[semi],";");
1746     strcpy(cat_name[colon],":");
1747     strcpy(cat_name[tag],"tag");
1748     strcpy(cat_name[if_head],"if_head");
1749     strcpy(cat_name[else_head],"else_head");
1750     strcpy(cat_name[if_clause],"if()");
1751     strcpy(cat_name[lproc],"#{"@q}@>);
1752     strcpy(cat_name[rproc],@q{@>"#}");
1753     strcpy(cat_name[insert],"insert");
1754     strcpy(cat_name[section_scrap],"section");
1755     strcpy(cat_name[dead],"@@d");
1756     strcpy(cat_name[public_like],"public");
1757     strcpy(cat_name[operator_like],"operator");
1758     strcpy(cat_name[new_like],"new");
1759     strcpy(cat_name[catch_like],"catch");
1760     strcpy(cat_name[for_like],"for");
1761     strcpy(cat_name[do_like],"do");
1762     strcpy(cat_name[if_like],"if");
1763     strcpy(cat_name[delete_like],"delete");
1764     strcpy(cat_name[raw_ubin],"ubinop?");
1765     strcpy(cat_name[const_like],"const");
1766     strcpy(cat_name[raw_int],"raw");
1767     strcpy(cat_name[int_like],"int");
1768     strcpy(cat_name[case_like],"case");
1769     strcpy(cat_name[sizeof_like],"sizeof");
1770     strcpy(cat_name[struct_like],"struct");
1771     strcpy(cat_name[typedef_like],"typedef");
1772     strcpy(cat_name[define_like],"define");
1773     strcpy(cat_name[template_like],"template");
1774     strcpy(cat_name[ftemplate],"ftemplate");
1775     strcpy(cat_name[new_exp],"new_exp");
1776     strcpy(cat_name[begin_arg],"@@["@q]@>);
1777     strcpy(cat_name[end_arg],@q[@>"@@]");
1778     strcpy(cat_name[0],"zero");
1779
1780 @ This code allows \.{CWEAVE} to display its parsing steps.
1781
1782 @c
1783 void
1784 print_cat(c) /* symbolic printout of a category */
1785 eight_bits c;
1786 {
1787   printf(cat_name[c]);
1788 }
1789
1790 @ The token lists for translated \TEX/ output contain some special control
1791 symbols as well as ordinary characters. These control symbols are
1792 interpreted by \.{CWEAVE} before they are written to the output file.
1793
1794 \yskip\hang |break_space| denotes an optional line break or an en space;
1795
1796 \yskip\hang |force| denotes a line break;
1797
1798 \yskip\hang |big_force| denotes a line break with additional vertical space;
1799
1800 \yskip\hang |preproc_line| denotes that the line will be printed flush left;
1801
1802 \yskip\hang |opt| denotes an optional line break (with the continuation
1803 line indented two ems with respect to the normal starting position)---this
1804 code is followed by an integer |n|, and the break will occur with penalty
1805 $10n$;
1806
1807 \yskip\hang |backup| denotes a backspace of one em;
1808
1809 \yskip\hang |cancel| obliterates any |break_space|, |opt|, |force|, or
1810 |big_force| tokens that immediately precede or follow it and also cancels any
1811 |backup| tokens that follow it;
1812
1813 \yskip\hang |indent| causes future lines to be indented one more em;
1814
1815 \yskip\hang |outdent| causes future lines to be indented one less em.
1816
1817 \yskip\noindent All of these tokens are removed from the \TEX/ output that
1818 comes from \CEE/ text between \pb\ signs; |break_space| and |force| and
1819 |big_force| become single spaces in this mode. The translation of other
1820 \CEE/ texts results in \TEX/ control sequences \.{\\1}, \.{\\2},
1821 \.{\\3}, \.{\\4}, \.{\\5}, \.{\\6}, \.{\\7}, \.{\\8}
1822 corresponding respectively to
1823 |indent|, |outdent|, |opt|, |backup|, |break_space|, |force|,
1824 |big_force| and |preproc_line|.
1825 However, a sequence of consecutive `\.\ ', |break_space|,
1826 |force|, and/or |big_force| tokens is first replaced by a single token
1827 (the maximum of the given ones).
1828
1829 The token |math_rel| will be translated into
1830 \.{\\MRL\{}, and it will get a matching \.\} later.
1831 Other control sequences in the \TEX/ output will be
1832 `\.{\\\\\{}$\,\ldots\,$\.\}'
1833 surrounding identifiers, `\.{\\\&\{}$\,\ldots\,$\.\}' surrounding
1834 reserved words, `\.{\\.\{}$\,\ldots\,$\.\}' surrounding strings,
1835 `\.{\\C\{}$\,\ldots\,$\.\}$\,$|force|' surrounding comments, and
1836 `\.{\\X$n$:}$\,\ldots\,$\.{\\X}' surrounding section names, where
1837 |n| is the section number.
1838
1839 @d math_rel 0206
1840 @d big_cancel 0210 /* like |cancel|, also overrides spaces */
1841 @d cancel 0211 /* overrides |backup|, |break_space|, |force|, |big_force| */
1842 @d indent 0212 /* one more tab (\.{\\1}) */
1843 @d outdent 0213 /* one less tab (\.{\\2}) */
1844 @d opt 0214 /* optional break in mid-statement (\.{\\3}) */
1845 @d backup 0215 /* stick out one unit to the left (\.{\\4}) */
1846 @d break_space 0216 /* optional break between statements (\.{\\5}) */
1847 @d force 0217 /* forced break between statements (\.{\\6}) */
1848 @d big_force 0220 /* forced break with additional space (\.{\\7}) */
1849 @d preproc_line 0221 /* begin line without indentation (\.{\\8}) */
1850 @^high-bit character handling@>
1851 @d quoted_char 0222
1852         /* introduces a character token in the range |0200|--|0377| */
1853 @d end_translation 0223 /* special sentinel token at end of list */
1854 @d inserted 0224 /* sentinel to mark translations of inserts */
1855 @d qualifier 0225 /* introduces an explicit namespace qualifier */
1856
1857 @ The raw input is converted into scraps according to the following table,
1858 which gives category codes followed by the translations.
1859 \def\stars {\.{**}}%
1860 The symbol `\stars' stands for `\.{\\\&\{{\rm identifier}\}}',
1861 i.e., the identifier itself treated as a reserved word.
1862 The right-hand column is the so-called |mathness|, which is explained
1863 further below.
1864
1865 An identifier |c| of length 1 is translated as \.{\\\v c} instead of
1866 as \.{\\\\\{c\}}. An identifier \.{CAPS} in all caps is translated as
1867 \.{\\.\{CAPS\}} instead of as \.{\\\\\{CAPS\}}. An identifier that has
1868 become a reserved word via |typedef| is translated with \.{\\\&} replacing
1869 \.{\\\\} and |raw_int| replacing |exp|.
1870
1871 A string of length greater than 20 is broken into pieces of size at most~20
1872 with discretionary breaks in between.
1873
1874 \yskip\halign{\quad#\hfil&\quad#\hfil&\quad\hfil#\hfil\cr
1875 \.{!=}&|binop|: \.{\\I}&yes\cr
1876 \.{<=}&|binop|: \.{\\Z}&yes\cr
1877 \.{>=}&|binop|: \.{\\G}&yes\cr
1878 \.{==}&|binop|: \.{\\E}&yes\cr
1879 \.{\&\&}&|binop|: \.{\\W}&yes\cr
1880 \.{\v\v}&|binop|: \.{\\V}&yes\cr
1881 \.{++}&|unop|: \.{\\PP}&yes\cr
1882 \.{--}&|unop|: \.{\\MM}&yes\cr
1883 \.{->}&|binop|: \.{\\MG}&yes\cr
1884 \.{>>}&|binop|: \.{\\GG}&yes\cr
1885 \.{<<}&|binop|: \.{\\LL}&yes\cr
1886 \.{::}&|colcol|: \.{\\DC}&maybe\cr
1887 \.{.*}&|binop|: \.{\\PA}&yes\cr
1888 \.{->*}&|binop|: \.{\\MGA}&yes\cr
1889 \.{...}&|raw_int|: \.{\\,\\ldots\\,}&yes\cr
1890 \."string\."&|exp|: \.{\\.\{}string with special characters quoted\.\}&maybe\cr
1891 \.{@@=}string\.{@@>}&|exp|: \.{\\vb\{}string with special characters
1892   quoted\.\}&maybe\cr
1893 \.{@@'7'}&|exp|: \.{\\.\{@@'7'\}}&maybe\cr
1894 \.{077} or \.{\\77}&|exp|: \.{\\T\{\\\~77\}}&maybe\cr
1895 \.{0x7f}&|exp|: \.{\\T\{\\\^7f\}}&maybe\cr
1896 \.{77}&|exp|: \.{\\T\{77\}}&maybe\cr
1897 \.{77L}&|exp|: \.{\\T\{77\\\$L\}}&maybe\cr
1898 \.{0.1E5}&|exp|: \.{\\T\{0.1\\\_5\}}&maybe\cr
1899 \.+&|ubinop|: \.+&yes\cr
1900 \.-&|ubinop|: \.-&yes\cr
1901 \.*&|raw_ubin|: \.*&yes\cr
1902 \./&|binop|: \./&yes\cr
1903 \.<&|prelangle|: \.{\\langle}&yes\cr
1904 \.=&|binop|: \.{\\K}&yes\cr
1905 \.>&|prerangle|: \.{\\rangle}&yes\cr
1906 \..&|binop|: \..&yes\cr
1907 \.{\v}&|binop|: \.{\\OR}&yes\cr
1908 \.\^&|binop|: \.{\\XOR}&yes\cr
1909 \.\%&|binop|: \.{\\MOD}&yes\cr
1910 \.?&|question|: \.{\\?}&yes\cr
1911 \.!&|unop|: \.{\\R}&yes\cr
1912 \.\~&|unop|: \.{\\CM}&yes\cr
1913 \.\&&|raw_ubin|: \.{\\AND}&yes\cr
1914 \.(&|lpar|: \.(&maybe\cr
1915 \.[&|lpar|: \.[&maybe\cr
1916 \.)&|rpar|: \.)&maybe\cr
1917 \.]&|rpar|: \.]&maybe\cr
1918 \.\{&|lbrace|: \.\{&yes\cr
1919 \.\}&|lbrace|: \.\}&yes\cr
1920 \.,&|comma|: \.,&yes\cr
1921 \.;&|semi|: \.;&maybe\cr
1922 \.:&|colon|: \.:&no\cr
1923 \.\# (within line)&|ubinop|: \.{\\\#}&yes\cr
1924 \.\# (at beginning)&|lproc|:  |force| |preproc_line| \.{\\\#}&no\cr
1925 end of \.\# line&|rproc|:  |force|&no\cr
1926 identifier&|exp|: \.{\\\\\{}identifier with underlines and
1927              dollar signs quoted\.\}&maybe\cr
1928 \.{and}&|alfop|: \stars&yes\cr
1929 \.{and\_eq}&|alfop|: \stars&yes\cr
1930 \.{asm}&|sizeof_like|: \stars&maybe\cr
1931 \.{auto}&|int_like|: \stars&maybe\cr
1932 \.{bitand}&|alfop|: \stars&yes\cr
1933 \.{bitor}&|alfop|: \stars&yes\cr
1934 \.{bool}&|raw_int|: \stars&maybe\cr
1935 \.{break}&|case_like|: \stars&maybe\cr
1936 \.{case}&|case_like|: \stars&maybe\cr
1937 \.{catch}&|catch_like|: \stars&maybe\cr
1938 \.{char}&|raw_int|: \stars&maybe\cr
1939 \.{class}&|struct_like|: \stars&maybe\cr
1940 \.{clock\_t}&|raw_int|: \stars&maybe\cr
1941 \.{compl}&|alfop|: \stars&yes\cr
1942 \.{const}&|const_like|: \stars&maybe\cr
1943 \.{const\_cast}&|raw_int|: \stars&maybe\cr
1944 \.{continue}&|case_like|: \stars&maybe\cr
1945 \.{default}&|case_like|: \stars&maybe\cr
1946 \.{define}&|define_like|: \stars&maybe\cr
1947 \.{defined}&|sizeof_like|: \stars&maybe\cr
1948 \.{delete}&|delete_like|: \stars&maybe\cr
1949 \.{div\_t}&|raw_int|: \stars&maybe\cr
1950 \.{do}&|do_like|: \stars&maybe\cr
1951 \.{double}&|raw_int|: \stars&maybe\cr
1952 \.{dynamic\_cast}&|raw_int|: \stars&maybe\cr
1953 \.{elif}&|if_like|: \stars&maybe\cr
1954 \.{else}&|else_like|: \stars&maybe\cr
1955 \.{endif}&|if_like|: \stars&maybe\cr
1956 \.{enum}&|struct_like|: \stars&maybe\cr
1957 \.{error}&|if_like|: \stars&maybe\cr
1958 \.{explicit}&|int_like|: \stars&maybe\cr
1959 \.{export}&|int_like|: \stars&maybe\cr
1960 \.{extern}&|int_like|: \stars&maybe\cr
1961 \.{FILE}&|raw_int|: \stars&maybe\cr
1962 \.{float}&|raw_int|: \stars&maybe\cr
1963 \.{for}&|for_like|: \stars&maybe\cr
1964 \.{fpos\_t}&|raw_int|: \stars&maybe\cr
1965 \.{friend}&|int_like|: \stars&maybe\cr
1966 \.{goto}&|case_like|: \stars&maybe\cr
1967 \.{if}&|if_like|: \stars&maybe\cr
1968 \.{ifdef}&|if_like|: \stars&maybe\cr
1969 \.{ifndef}&|if_like|: \stars&maybe\cr
1970 \.{include}&|if_like|: \stars&maybe\cr
1971 \.{inline}&|int_like|: \stars&maybe\cr
1972 \.{int}&|raw_int|: \stars&maybe\cr
1973 \.{jmp\_buf}&|raw_int|: \stars&maybe\cr
1974 \.{ldiv\_t}&|raw_int|: \stars&maybe\cr
1975 \.{line}&|if_like|: \stars&maybe\cr
1976 \.{long}&|raw_int|: \stars&maybe\cr
1977 \.{make\_pair}&|ftemplate|: \.{\\\\\{make\\\_pair\}}&maybe\cr
1978 \.{mutable}&|int_like|: \stars&maybe\cr
1979 \.{namespace}&|struct_like|: \stars&maybe\cr
1980 \.{new}&|new_like|: \stars&maybe\cr
1981 \.{not}&|alfop|: \stars&yes\cr
1982 \.{not\_eq}&|alfop|: \stars&yes\cr
1983 \.{NULL}&|exp|: \.{\\NULL}&yes\cr
1984 \.{offsetof}&|raw_int|: \stars&maybe\cr
1985 \.{operator}&|operator_like|: \stars&maybe\cr
1986 \.{or}&|alfop|: \stars&yes\cr
1987 \.{or\_eq}&|alfop|: \stars&yes\cr
1988 \.{pragma}&|if_like|: \stars&maybe\cr
1989 \.{private}&|public_like|: \stars&maybe\cr
1990 \.{protected}&|public_like|: \stars&maybe\cr
1991 \.{ptrdiff\_t}&|raw_int|: \stars&maybe\cr
1992 \.{public}&|public_like|: \stars&maybe\cr
1993 \.{register}&|int_like|: \stars&maybe\cr
1994 \.{reinterpret\_cast}&|raw_int|: \stars&maybe\cr
1995 \.{return}&|case_like|: \stars&maybe\cr
1996 \.{short}&|raw_int|: \stars&maybe\cr
1997 \.{sig\_atomic\_t}&|raw_int|: \stars&maybe\cr
1998 \.{signed}&|raw_int|: \stars&maybe\cr
1999 \.{size\_t}&|raw_int|: \stars&maybe\cr
2000 \.{sizeof}&|sizeof_like|: \stars&maybe\cr
2001 \.{static}&|int_like|: \stars&maybe\cr
2002 \.{static\_cast}&|raw_int|: \stars&maybe\cr
2003 \.{struct}&|struct_like|: \stars&maybe\cr
2004 \.{switch}&|for_like|: \stars&maybe\cr
2005 \.{template}&|template_like|: \stars&maybe\cr
2006 \.{TeX}&|exp|: \.{\\TeX}&yes\cr
2007 \.{this}&|exp|: \.{\\this}&yes\cr
2008 \.{throw}&|case_like|: \stars&maybe\cr
2009 \.{time\_t}&|raw_int|: \stars&maybe\cr
2010 \.{try}&|else_like|: \stars&maybe\cr
2011 \.{typedef}&|typedef_like|: \stars&maybe\cr
2012 \.{typeid}&|raw_int|: \stars&maybe\cr
2013 \.{typename}&|struct_like|: \stars&maybe\cr
2014 \.{undef}&|if_like|: \stars&maybe\cr
2015 \.{union}&|struct_like|: \stars&maybe\cr
2016 \.{unsigned}&|raw_int|: \stars&maybe\cr
2017 \.{using}&|int_like|: \stars&maybe\cr
2018 \.{va\_dcl}&|decl|: \stars&maybe\cr
2019 \.{va\_list}&|raw_int|: \stars&maybe\cr
2020 \.{virtual}&|int_like|: \stars&maybe\cr
2021 \.{void}&|raw_int|: \stars&maybe\cr
2022 \.{volatile}&|const_like|: \stars&maybe\cr
2023 \.{wchar\_t}&|raw_int|: \stars&maybe\cr
2024 \.{while}&|for_like|: \stars&maybe\cr
2025 \.{xor}&|alfop|: \stars&yes\cr
2026 \.{xor\_eq}&|alfop|: \stars&yes\cr
2027 \.{@@,}&|insert|: \.{\\,}&maybe\cr
2028 \.{@@\v}&|insert|:  |opt| \.0&maybe\cr
2029 \.{@@/}&|insert|:  |force|&no\cr
2030 \.{@@\#}&|insert|:  |big_force|&no\cr
2031 \.{@@+}&|insert|:  |big_cancel| \.{\{\}} |break_space|
2032   \.{\{\}} |big_cancel|&no\cr
2033 \.{@@;}&|semi|: &maybe\cr
2034 \.{@@[@q]@>}&|begin_arg|: &maybe\cr
2035 \.{@q[@>@@]}&|end_arg|: &maybe\cr
2036 \.{@@\&}&|insert|: \.{\\J}&maybe\cr
2037 \.{@@h}&|insert|: |force| \.{\\ATH} |force|&no\cr
2038 \.{@@<}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2039  \.{\\X}$n$\.:translated section name\.{\\X}&maybe\cr
2040 \.{@@(@q)@>}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2041  \.{\\X}$n$\.{:\\.\{}section name with special characters
2042       quoted\.{\ \}\\X}&maybe\cr
2043 \.{/*}comment\.{*/}&|insert|: |cancel|
2044       \.{\\C\{}translated comment\.\} |force|&no\cr
2045 \.{//}comment&|insert|: |cancel|
2046       \.{\\SHC\{}translated comment\.\} |force|&no\cr
2047 }
2048
2049 \smallskip
2050 The construction \.{@@t}\thinspace stuff\/\thinspace\.{@@>} contributes
2051 \.{\\hbox\{}\thinspace  stuff\/\thinspace\.\} to the following scrap.
2052
2053 @i prod.w
2054
2055 @* Implementing the productions.
2056 More specifically, a scrap is a structure consisting of a category
2057 |cat| and a |text_pointer| |trans|, which points to the translation in
2058 |tok_start|.  When \CEE/ text is to be processed with the grammar above,
2059 we form an array |scrap_info| containing the initial scraps.
2060 Our production rules have the nice property that the right-hand side is never
2061 longer than the left-hand side. Therefore it is convenient to use sequential
2062 allocation for the current sequence of scraps. Five pointers are used to
2063 manage the parsing:
2064
2065 \yskip\hang |pp| is a pointer into |scrap_info|.  We will try to match
2066 the category codes |pp->cat,@,@,(pp+1)->cat|$,\,\,\ldots\,$
2067 to the left-hand sides of productions.
2068
2069 \yskip\hang |scrap_base|, |lo_ptr|, |hi_ptr|, and |scrap_ptr| are such that
2070 the current sequence of scraps appears in positions |scrap_base| through
2071 |lo_ptr| and |hi_ptr| through |scrap_ptr|, inclusive, in the |cat| and
2072 |trans| arrays. Scraps located between |scrap_base| and |lo_ptr| have
2073 been examined, while those in positions |>=hi_ptr| have not yet been
2074 looked at by the parsing process.
2075
2076 \yskip\noindent Initially |scrap_ptr| is set to the position of the final
2077 scrap to be parsed, and it doesn't change its value. The parsing process
2078 makes sure that |lo_ptr>=pp+3|, since productions have as many as four terms,
2079 by moving scraps from |hi_ptr| to |lo_ptr|. If there are
2080 fewer than |pp+3| scraps left, the positions up to |pp+3| are filled with
2081 blanks that will not match in any productions. Parsing stops when
2082 |pp==lo_ptr+1| and |hi_ptr==scrap_ptr+1|.
2083
2084 Since the |scrap| structure will later be used for other purposes, we
2085 declare its second element as a union.
2086
2087 @<Type...@>=
2088 typedef struct {
2089   eight_bits cat;
2090   eight_bits mathness;
2091   union {
2092     text_pointer Trans;
2093     @<Rest of |trans_plus| union@>@;
2094   } trans_plus;
2095 } scrap;
2096 typedef scrap *scrap_pointer;
2097
2098 @ @d trans trans_plus.Trans /* translation texts of scraps */
2099
2100 @<Global...@>=
2101 scrap scrap_info[max_scraps]; /* memory array for scraps */
2102 scrap_pointer scrap_info_end=scrap_info+max_scraps -1; /* end of |scrap_info| */
2103 scrap_pointer pp; /* current position for reducing productions */
2104 scrap_pointer scrap_base; /* beginning of the current scrap sequence */
2105 scrap_pointer scrap_ptr; /* ending of the current scrap sequence */
2106 scrap_pointer lo_ptr; /* last scrap that has been examined */
2107 scrap_pointer hi_ptr; /* first scrap that has not been examined */
2108 scrap_pointer max_scr_ptr; /* largest value assumed by |scrap_ptr| */
2109
2110 @ @<Set init...@>=
2111 scrap_base=scrap_info+1;
2112 max_scr_ptr=scrap_ptr=scrap_info;
2113
2114 @ Token lists in |@!tok_mem| are composed of the following kinds of
2115 items for \TEX/ output.
2116
2117 \yskip\item{$\bullet$}Character codes and special codes like |force| and
2118 |math_rel| represent themselves;
2119
2120 \item{$\bullet$}|id_flag+p| represents \.{\\\\\{{\rm identifier $p$}\}};
2121
2122 \item{$\bullet$}|res_flag+p| represents \.{\\\&\{{\rm identifier $p$}\}};
2123
2124 \item{$\bullet$}|section_flag+p| represents section name |p|;
2125
2126 \item{$\bullet$}|tok_flag+p| represents token list number |p|;
2127
2128 \item{$\bullet$}|inner_tok_flag+p| represents token list number |p|, to be
2129 translated without line-break controls.
2130
2131 @d id_flag 10240 /* signifies an identifier */
2132 @d res_flag 2*id_flag /* signifies a reserved word */
2133 @d section_flag 3*id_flag /* signifies a section name */
2134 @d tok_flag 4*id_flag /* signifies a token list */
2135 @d inner_tok_flag 5*id_flag /* signifies a token list in `\pb' */
2136
2137 @c
2138 void
2139 print_text(p) /* prints a token list for debugging; not used in |main| */
2140 text_pointer p;
2141 {
2142   token_pointer j; /* index into |tok_mem| */
2143   sixteen_bits r; /* remainder of token after the flag has been stripped off */
2144   if (p>=text_ptr) printf("BAD");
2145   else for (j=*p; j<*(p+1); j++) {
2146     r=*j%id_flag;
2147     switch (*j/id_flag) {
2148       case 1: printf("\\\\{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2149         break; /* |id_flag| */
2150       case 2: printf("\\&{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2151         break; /* |res_flag| */
2152       case 3: printf("<"); print_section_name((name_dir+r)); printf(">");
2153         break; /* |section_flag| */
2154       case 4: printf("[[%d]]",r); break; /* |tok_flag| */
2155       case 5: printf("|[[%d]]|",r); break; /* |inner_tok_flag| */
2156       default: @<Print token |r| in symbolic form@>;
2157     }
2158   }
2159   fflush(stdout);
2160 }
2161
2162 @ @<Print token |r|...@>=
2163 switch (r) {
2164   case math_rel: printf("\\mathrel{"@q}@>); break;
2165   case big_cancel: printf("[ccancel]"); break;
2166   case cancel: printf("[cancel]"); break;
2167   case indent: printf("[indent]"); break;
2168   case outdent: printf("[outdent]"); break;
2169   case backup: printf("[backup]"); break;
2170   case opt: printf("[opt]"); break;
2171   case break_space: printf("[break]"); break;
2172   case force: printf("[force]"); break;
2173   case big_force: printf("[fforce]"); break;
2174   case preproc_line: printf("[preproc]"); break;
2175   case quoted_char: j++; printf("[%o]",(unsigned)*j); break;
2176   case end_translation: printf("[quit]"); break;
2177   case inserted: printf("[inserted]"); break;
2178   default: putxchar(r);
2179 }
2180
2181 @ The production rules listed above are embedded directly into \.{CWEAVE},
2182 since it is easier to do this than to write an interpretive system
2183 that would handle production systems in general. Several macros are defined
2184 here so that the program for each production is fairly short.
2185
2186 All of our productions conform to the general notion that some |k|
2187 consecutive scraps starting at some position |j| are to be replaced by a
2188 single scrap of some category |c| whose translation is composed from the
2189 translations of the disappearing scraps. After this production has been
2190 applied, the production pointer |pp| should change by an amount |d|. Such
2191 a production can be represented by the quadruple |(j,k,c,d)|. For example,
2192 the production `|exp@,comma@,exp| $\RA$ |exp|' would be represented by
2193 `|(pp,3,exp,-2)|'; in this case the pointer |pp| should decrease by 2
2194 after the production has been applied, because some productions with
2195 |exp| in their second or third positions might now match,
2196 but no productions have
2197 |exp| in the fourth position of their left-hand sides. Note that
2198 the value of |d| is determined by the whole collection of productions, not
2199 by an individual one.
2200 The determination of |d| has been
2201 done by hand in each case, based on the full set of productions but not on
2202 the grammar of \CEE/ or on the rules for constructing the initial
2203 scraps.
2204
2205 We also attach a serial number to each production, so that additional
2206 information is available when debugging. For example, the program below
2207 contains the statement `|reduce(pp,3,exp,-2,4)|' when it implements
2208 the production just mentioned.
2209
2210 Before calling |reduce|, the program should have appended the tokens of
2211 the new translation to the |tok_mem| array. We commonly want to append
2212 copies of several existing translations, and macros are defined to
2213 simplify these common cases. For example, \\{app2}|(pp)| will append the
2214 translations of two consecutive scraps, |pp->trans| and |(pp+1)->trans|, to
2215 the current token list. If the entire new translation is formed in this
2216 way, we write `|squash(j,k,c,d,n)|' instead of `|reduce(j,k,c,d,n)|'. For
2217 example, `|squash(pp,3,exp,-2,3)|' is an abbreviation for `\\{app3}|(pp);
2218 reduce(pp,3,exp,-2,3)|'.
2219
2220 A couple more words of explanation:
2221 Both |big_app| and |app| append a token (while |big_app1| to |big_app4|
2222 append the specified number of scrap translations) to the current token list.
2223 The difference between |big_app| and |app| is simply that |big_app|
2224 checks whether there can be a conflict between math and non-math
2225 tokens, and intercalates a `\.{\$}' token if necessary.  When in
2226 doubt what to use, use |big_app|.
2227
2228 The |mathness| is an attribute of scraps that says whether they are
2229 to be printed in a math mode context or not.  It is separate from the
2230 ``part of speech'' (the |cat|) because to make each |cat| have
2231 a fixed |mathness| (as in the original \.{WEAVE}) would multiply the
2232 number of necessary production rules.
2233
2234 The low two bits (i.e. |mathness % 4|) control the left boundary.
2235 (We need two bits because we allow cases |yes_math|, |no_math| and
2236 |maybe_math|, which can go either way.)
2237 The next two bits (i.e. |mathness / 4|) control the right boundary.
2238 If we combine two scraps and the right boundary of the first has
2239 a different mathness from the left boundary of the second, we
2240 insert a \.{\$} in between.  Similarly, if at printing time some
2241 irreducible scrap has a |yes_math| boundary the scrap gets preceded
2242 or followed by a \.{\$}. The left boundary is |maybe_math| if and
2243 only if the right boundary is.
2244
2245 The code below is an exact translation of the production rules into
2246 \CEE/, using such macros, and the reader should have no difficulty
2247 understanding the format by comparing the code with the symbolic
2248 productions as they were listed earlier.
2249
2250 @d no_math 2 /* should be in horizontal mode */
2251 @d yes_math 1 /* should be in math mode */
2252 @d maybe_math 0 /* works in either horizontal or math mode */
2253 @d big_app2(a) big_app1(a);big_app1(a+1)
2254 @d big_app3(a) big_app2(a);big_app1(a+2)
2255 @d big_app4(a) big_app3(a);big_app1(a+3)
2256 @d app(a) *(tok_ptr++)=a
2257 @d app1(a) *(tok_ptr++)=tok_flag+(int)((a)->trans-tok_start)
2258
2259 @<Global...@>=
2260 int cur_mathness, init_mathness;
2261
2262 @ @c
2263 void
2264 app_str(s)
2265 char *s;
2266 {
2267   while (*s) app_tok(*(s++));
2268 }
2269
2270 void
2271 big_app(a)
2272 token a;
2273 {
2274         if (a==' ' || (a>=big_cancel && a<=big_force)) /* non-math token */ {
2275                 if (cur_mathness==maybe_math) init_mathness=no_math;
2276                 else if (cur_mathness==yes_math) app_str("{}$");
2277                 cur_mathness=no_math;
2278         }
2279         else {
2280                 if (cur_mathness==maybe_math) init_mathness=yes_math;
2281                 else if (cur_mathness==no_math) app_str("${}");
2282                 cur_mathness=yes_math;
2283         }
2284         app(a);
2285 }
2286
2287 void
2288 big_app1(a)
2289 scrap_pointer a;
2290 {
2291   switch (a->mathness % 4) { /* left boundary */
2292   case (no_math):
2293     if (cur_mathness==maybe_math) init_mathness=no_math;
2294     else if (cur_mathness==yes_math) app_str("{}$");
2295     cur_mathness=a->mathness / 4; /* right boundary */
2296     break;
2297   case (yes_math):
2298     if (cur_mathness==maybe_math) init_mathness=yes_math;
2299     else if (cur_mathness==no_math) app_str("${}");
2300     cur_mathness=a->mathness / 4; /* right boundary */
2301     break;
2302   case (maybe_math): /* no changes */ break;
2303   }
2304   app(tok_flag+(int)((a)->trans-tok_start));
2305 }
2306
2307 @ Let us consider the big switch for productions now, before looking
2308 at its context. We want to design the program so that this switch
2309 works, so we might as well not keep ourselves in suspense about exactly what
2310 code needs to be provided with a proper environment.
2311
2312 @d cat1 (pp+1)->cat
2313 @d cat2 (pp+2)->cat
2314 @d cat3 (pp+3)->cat
2315 @d lhs_not_simple (pp->cat!=public_like
2316         && pp->cat!=semi
2317         && pp->cat!=prelangle
2318         && pp->cat!=prerangle
2319         && pp->cat!=template_like
2320         && pp->cat!=new_like
2321         && pp->cat!=new_exp
2322         && pp->cat!=ftemplate
2323         && pp->cat!=raw_ubin
2324         && pp->cat!=const_like
2325         && pp->cat!=raw_int
2326         && pp->cat!=operator_like)
2327  /* not a production with left side length 1 */
2328
2329 @<Match a production at |pp|, or increase |pp| if there is no match@>= {
2330   if (cat1==end_arg && lhs_not_simple)
2331     if (pp->cat==begin_arg) squash(pp,2,exp,-2,124);
2332     else squash(pp,2,end_arg,-1,125);
2333   else if (cat1==insert) squash(pp,2,pp->cat,-2,0);
2334   else if (cat2==insert) squash(pp+1,2,(pp+1)->cat,-1,0);
2335   else if (cat3==insert) squash(pp+2,2,(pp+2)->cat,0,0);
2336   else
2337   switch (pp->cat) {
2338     case exp: @<Cases for |exp|@>; @+break;
2339     case lpar: @<Cases for |lpar|@>; @+break;
2340     case unop: @<Cases for |unop|@>; @+break;
2341     case ubinop: @<Cases for |ubinop|@>; @+break;
2342     case binop: @<Cases for |binop|@>; @+break;
2343     case cast: @<Cases for |cast|@>; @+break;
2344     case sizeof_like: @<Cases for |sizeof_like|@>; @+break;
2345     case int_like: @<Cases for |int_like|@>; @+break;
2346     case public_like: @<Cases for |public_like|@>; @+break;
2347     case colcol: @<Cases for |colcol|@>; @+break;
2348     case decl_head: @<Cases for |decl_head|@>; @+break;
2349     case decl: @<Cases for |decl|@>; @+break;
2350     case base: @<Cases for |base|@>; @+break;
2351     case struct_like: @<Cases for |struct_like|@>; @+break;
2352     case struct_head: @<Cases for |struct_head|@>; @+break;
2353     case fn_decl: @<Cases for |fn_decl|@>; @+break;
2354     case function: @<Cases for |function|@>; @+break;
2355     case lbrace: @<Cases for |lbrace|@>; @+break;
2356     case if_like: @<Cases for |if_like|@>; @+break;
2357     case else_like: @<Cases for |else_like|@>; @+break;
2358     case else_head: @<Cases for |else_head|@>; @+break;
2359     case if_clause: @<Cases for |if_clause|@>; @+break;
2360     case if_head: @<Cases for |if_head|@>; @+break;
2361     case do_like: @<Cases for |do_like|@>; @+break;
2362     case case_like: @<Cases for |case_like|@>; @+break;
2363     case catch_like: @<Cases for |catch_like|@>; @+break;
2364     case tag: @<Cases for |tag|@>; @+break;
2365     case stmt: @<Cases for |stmt|@>; @+break;
2366     case semi: @<Cases for |semi|@>; @+break;
2367     case lproc: @<Cases for |lproc|@>; @+break;
2368     case section_scrap: @<Cases for |section_scrap|@>; @+break;
2369     case insert: @<Cases for |insert|@>; @+break;
2370     case prelangle: @<Cases for |prelangle|@>; @+break;
2371     case prerangle: @<Cases for |prerangle|@>; @+break;
2372     case langle: @<Cases for |langle|@>; @+break;
2373     case template_like: @<Cases for |template_like|@>; @+break;
2374     case new_like: @<Cases for |new_like|@>; @+break;
2375     case new_exp: @<Cases for |new_exp|@>; @+break;
2376     case ftemplate: @<Cases for |ftemplate|@>; @+break;
2377     case for_like: @<Cases for |for_like|@>; @+break;
2378     case raw_ubin: @<Cases for |raw_ubin|@>; @+break;
2379     case const_like: @<Cases for |const_like|@>; @+break;
2380     case raw_int: @<Cases for |raw_int|@>; @+break;
2381     case operator_like: @<Cases for |operator_like|@>; @+break;
2382     case typedef_like: @<Cases for |typedef_like|@>; @+break;
2383     case delete_like: @<Cases for |delete_like|@>; @+break;
2384     case question: @<Cases for |question|@>; @+break;
2385   }
2386   pp++; /* if no match was found, we move to the right */
2387 }
2388
2389 @ In \CEE/, new specifier names can be defined via |typedef|, and we want
2390 to make the parser recognize future occurrences of the identifier thus
2391 defined as specifiers.  This is done by the procedure |make_reserved|,
2392 which changes the |ilk| of the relevant identifier.
2393
2394 We first need a procedure to recursively seek the first
2395 identifier in a token list, because the identifier might
2396 be enclosed in parentheses, as when one defines a function
2397 returning a pointer.
2398
2399 If the first identifier found is a keyword like `\&{case}', we
2400 return the special value |case_found|; this prevents underlining
2401 of identifiers in case labels.
2402
2403 If the first identifier is the keyword `\&{operator}', we give up;
2404 users who want to index definitions of overloaded \CPLUSPLUS/ operators
2405 should say, for example, `\.{@@!@@\^\\\&\{operator\} \$+\{=\}\$@@>}' (or,
2406 more properly alphebetized,
2407 `\.{@@!@@:operator+=\}\{\\\&\{operator\} \$+\{=\}\$@@>}').
2408
2409 @d no_ident_found (token_pointer)0 /* distinct from any identifier token */
2410 @d case_found (token_pointer)1 /* likewise */
2411 @d operator_found (token_pointer)2 /* likewise */
2412
2413 @c
2414 token_pointer
2415 find_first_ident(p)
2416 text_pointer p;
2417 {
2418   token_pointer q; /* token to be returned */
2419   token_pointer j; /* token being looked at */
2420   sixteen_bits r; /* remainder of token after the flag has been stripped off */
2421   if (p>=text_ptr) confusion("find_first_ident");
2422   for (j=*p; j<*(p+1); j++) {
2423     r=*j%id_flag;
2424     switch (*j/id_flag) {
2425       case 2: /* |res_flag| */
2426         if (name_dir[r].ilk==case_like) return case_found;
2427         if (name_dir[r].ilk==operator_like) return operator_found;
2428         if (name_dir[r].ilk!=raw_int) break;
2429       case 1: return j;
2430       case 4: case 5: /* |tok_flag| or |inner_tok_flag| */
2431         if ((q=find_first_ident(tok_start+r))!=no_ident_found)
2432           return q;
2433       default: ; /* char, |section_flag|, fall thru: move on to next token */
2434         if (*j==inserted) return no_ident_found; /* ignore inserts */
2435         else if (*j==qualifier) j++; /* bypass namespace qualifier */
2436     }
2437   }
2438   return no_ident_found;
2439 }
2440
2441 @ The scraps currently being parsed must be inspected for any
2442 occurrence of the identifier that we're making reserved; hence
2443 the |for| loop below.
2444
2445 @c
2446 void
2447 make_reserved(p) /* make the first identifier in |p->trans| like |int| */
2448 scrap_pointer p;
2449 {
2450   sixteen_bits tok_value; /* the name of this identifier, plus its flag*/
2451   token_pointer tok_loc; /* pointer to |tok_value| */
2452   if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2453     return; /* this should not happen */
2454   tok_value=*tok_loc;
2455   for (;p<=scrap_ptr; p==lo_ptr? p=hi_ptr: p++) {
2456     if (p->cat==exp) {
2457       if (**(p->trans)==tok_value) {
2458         p->cat=raw_int;
2459         **(p->trans)=tok_value%id_flag+res_flag;
2460       }
2461     }
2462   }
2463   (name_dir+(sixteen_bits)(tok_value%id_flag))->ilk=raw_int;
2464   *tok_loc=tok_value%id_flag+res_flag;
2465 }
2466
2467 @ In the following situations we want to mark the occurrence of
2468 an identifier as a definition: when |make_reserved| is just about to be
2469 used; after a specifier, as in |char **argv|;
2470 before a colon, as in \\{found}:; and in the declaration of a function,
2471 as in \\{main}()$\{\ldots;\}$.  This is accomplished by the invocation
2472 of |make_underlined| at appropriate times.  Notice that, in the declaration
2473 of a function, we find out that the identifier is being defined only after
2474 it has been swallowed up by an |exp|.
2475
2476 @c
2477 void
2478 make_underlined(p)
2479 /* underline the entry for the first identifier in |p->trans| */
2480 scrap_pointer p;
2481 {
2482   token_pointer tok_loc; /* where the first identifier appears */
2483   if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2484     return; /* this happens, for example, in |case found:| */
2485   xref_switch=def_flag;
2486   underline_xref(*tok_loc%id_flag+name_dir);
2487 }
2488
2489 @ We cannot use |new_xref| to underline a cross-reference at this point
2490 because this would just make a new cross-reference at the end of the list.
2491 We actually have to search through the list for the existing
2492 cross-reference.
2493
2494 @<Predecl...@>=
2495 void  underline_xref();
2496
2497 @ @c
2498 void
2499 underline_xref(p)
2500 name_pointer p;
2501 {
2502   xref_pointer q=(xref_pointer)p->xref; /* pointer to cross-reference being examined */
2503   xref_pointer r; /* temporary pointer for permuting cross-references */
2504   sixteen_bits m; /* cross-reference value to be installed */
2505   sixteen_bits n; /* cross-reference value being examined */
2506   if (no_xref) return;
2507   m=section_count+xref_switch;
2508   while (q != xmem) {
2509     n=q->num;
2510     if (n==m) return;
2511     else if (m==n+def_flag) {
2512         q->num=m; return;
2513     }
2514     else if (n>=def_flag && n<m) break;
2515     q=q->xlink;
2516   }
2517   @<Insert new cross-reference at |q|, not at beginning of list@>;
2518 }
2519
2520 @ We get to this section only when the identifier is one letter long,
2521 so it didn't get a non-underlined entry during phase one.  But it may
2522 have got some explicitly underlined entries in later sections, so in order
2523 to preserve the numerical order of the entries in the index, we have
2524 to insert the new cross-reference not at the beginning of the list
2525 (namely, at |p->xref|), but rather right before |q|.
2526
2527 @<Insert new cross-reference at |q|...@>=
2528   append_xref(0); /* this number doesn't matter */
2529   xref_ptr->xlink=(xref_pointer)p->xref; r=xref_ptr;
2530   p->xref=(char*)xref_ptr;
2531   while (r->xlink!=q) {r->num=r->xlink->num; r=r->xlink;}
2532   r->num=m; /* everything from |q| on is left undisturbed */
2533
2534 @ Now comes the code that tries to match each production starting
2535 with a particular type of scrap. Whenever a match is discovered,
2536 the |squash| or |reduce| macro will cause the appropriate action
2537 to be performed, followed by |goto found|.
2538
2539 @<Cases for |exp|@>=
2540 if (cat1==lbrace || cat1==int_like || cat1==decl) {
2541   make_underlined(pp); big_app1(pp); big_app(indent); app(indent);
2542   reduce(pp,1,fn_decl,0,1);
2543 }
2544 else if (cat1==unop) squash(pp,2,exp,-2,2);
2545 else if ((cat1==binop || cat1==ubinop) && cat2==exp)
2546         squash(pp,3,exp,-2,3);
2547 else if (cat1==comma && cat2==exp) {
2548   big_app2(pp);
2549   app(opt); app('9'); big_app1(pp+2); reduce(pp,3,exp,-2,4);
2550 }
2551 else if (cat1==lpar && cat2==rpar && cat3==colon) squash(pp+3,1,base,0,5);
2552 else if (cat1==cast && cat2==colon) squash(pp+2,1,base,0,5);
2553 else if (cat1==semi) squash(pp,2,stmt,-1,6);
2554 else if (cat1==colon) {
2555   make_underlined (pp);  squash(pp,2,tag,-1,7);
2556 }
2557 else if (cat1==rbrace) squash(pp,1,stmt,-1,8);
2558 else if (cat1==lpar && cat2==rpar && (cat3==const_like || cat3==case_like)) {
2559   big_app1(pp+2); big_app(' '); big_app1(pp+3); reduce(pp+2,2,rpar,0,9);
2560 }
2561 else if (cat1==cast && (cat2==const_like || cat2==case_like)) {
2562   big_app1(pp+1); big_app(' '); big_app1(pp+2); reduce(pp+1,2,cast,0,9);
2563 }
2564 else if (cat1==exp || cat1==cast) squash(pp,2,exp,-2,10);
2565
2566 @ @<Cases for |lpar|@>=
2567 if ((cat1==exp||cat1==ubinop) && cat2==rpar) squash(pp,3,exp,-2,11);
2568 else if (cat1==rpar) {
2569   big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2570 @.\\,@>
2571   reduce(pp,2,exp,-2,12);
2572 }
2573 else if ((cat1==decl_head || cat1==int_like || cat1==cast) && cat2==rpar)
2574  squash(pp,3,cast,-2,13);
2575 else if ((cat1==decl_head || cat1==int_like || cat1==exp) && cat2==comma) {
2576   big_app3(pp); app(opt); app('9'); reduce(pp,3,lpar,-1,14);
2577 }
2578 else if (cat1==stmt || cat1==decl) {
2579   big_app2(pp); big_app(' '); reduce(pp,2,lpar,-1,15);
2580 }
2581
2582 @ @<Cases for |unop|@>=
2583 if (cat1==exp || cat1==int_like) squash(pp,2,exp,-2,16);
2584
2585 @ @<Cases for |ubinop|@>=
2586 if (cat1==cast && cat2==rpar) {
2587   big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2588   reduce(pp,2,cast,-2,17);
2589 }
2590 else if (cat1==exp || cat1==int_like) {
2591   big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2592   reduce(pp,2,cat1,-2,18);
2593 }
2594 else if (cat1==binop) {
2595   big_app(math_rel); big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2596   big_app('}'); reduce(pp,2,binop,-1,19);
2597 }
2598
2599 @ @<Cases for |binop|@>=
2600 if (cat1==binop) {
2601   big_app(math_rel); big_app('{'); big_app1(pp); big_app('}');
2602   big_app('{'); big_app1(pp+1); big_app('}');
2603   big_app('}'); reduce(pp,2,binop,-1,20);
2604 }
2605
2606 @ @<Cases for |cast|@>=
2607 if (cat1==lpar) squash(pp,2,lpar,-1,21);
2608 else if (cat1==exp) {
2609   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,21);
2610 }
2611 else if (cat1==semi) squash(pp,1,exp,-2,22);
2612
2613 @ @<Cases for |sizeof_like|@>=
2614 if (cat1==cast) squash(pp,2,exp,-2,23);
2615 else if (cat1==exp) {
2616   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,24);
2617 }
2618
2619 @ @<Cases for |int_like|@>=
2620 if (cat1==int_like|| cat1==struct_like) {
2621   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,25);
2622 }
2623 else if (cat1==exp && (cat2==raw_int||cat2==struct_like))
2624   squash(pp,2,int_like,-2,26);
2625 else if (cat1==exp || cat1==ubinop || cat1==colon) {
2626   big_app1(pp); big_app(' '); reduce(pp,1,decl_head,-1,27);
2627 }
2628 else if (cat1==semi || cat1==binop) squash(pp,1,decl_head,0,28);
2629
2630 @ @<Cases for |public_like|@>=
2631 if (cat1==colon) squash(pp,2,tag,-1,29);
2632 else squash(pp,1,int_like,-2,30);
2633
2634 @ @<Cases for |colcol|@>=
2635 if (cat1==exp||cat1==int_like) {
2636   app(qualifier); squash(pp,2,cat1,-2,31);
2637 }@+else if (cat1==colcol) squash(pp,2,colcol,-1,32);
2638
2639 @ @<Cases for |decl_head|@>=
2640 if (cat1==comma) {
2641   big_app2(pp); big_app(' '); reduce(pp,2,decl_head,-1,33);
2642 }
2643 else if (cat1==ubinop) {
2644   big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2645   reduce(pp,2,decl_head,-1,34);
2646 }
2647 else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2648   make_underlined(pp+1); squash(pp,2,decl_head,-1,35);
2649 }
2650 else if ((cat1==binop||cat1==colon) && cat2==exp && (cat3==comma ||
2651     cat3==semi || cat3==rpar))
2652   squash(pp,3,decl_head,-1,36);
2653 else if (cat1==cast) squash(pp,2,decl_head,-1,37);
2654 else if (cat1==lbrace || cat1==int_like || cat1==decl) {
2655   big_app1(pp); big_app(indent); app(indent); reduce(pp,1,fn_decl,0,38);
2656 }
2657 else if (cat1==semi) squash(pp,2,decl,-1,39);
2658
2659 @ @<Cases for |decl|@>=
2660 if (cat1==decl) {
2661   big_app1(pp); big_app(force); big_app1(pp+1);
2662   reduce(pp,2,decl,-1,40);
2663 }
2664 else if (cat1==stmt || cat1==function) {
2665   big_app1(pp); big_app(big_force);
2666   big_app1(pp+1); reduce(pp,2,cat1,-1,41);
2667 }
2668
2669 @ @<Cases for |base|@>=
2670 if (cat1==int_like || cat1==exp) {
2671   if (cat2==comma) {
2672     big_app1(pp); big_app(' '); big_app2(pp+1);
2673     app(opt); app('9'); reduce(pp,3,base,0,42);
2674   }
2675   else if (cat2==lbrace) {
2676     big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); big_app1(pp+2);
2677     reduce(pp,3,lbrace,-2,43);
2678   }
2679 }
2680
2681 @ @<Cases for |struct_like|@>=
2682 if (cat1==lbrace) {
2683   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,struct_head,0,44);
2684 }
2685 else if (cat1==exp||cat1==int_like) {
2686   if (cat2==lbrace || cat2==semi) {
2687     make_underlined(pp+1); make_reserved(pp+1);
2688     big_app1(pp); big_app(' '); big_app1(pp+1);
2689     if (cat2==semi) reduce(pp,2,decl_head,0,45);
2690     else {
2691       big_app(' '); big_app1(pp+2);reduce(pp,3,struct_head,0,46);
2692     }
2693   }
2694   else if (cat2==colon) squash(pp+2,1,base,2,47);
2695   else if (cat2!=base) {
2696     big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,int_like,-2,48);
2697   }
2698 }
2699
2700 @ @<Cases for |struct_head|@>=
2701 if ((cat1==decl || cat1==stmt || cat1==function) && cat2==rbrace) {
2702   big_app1(pp); big_app(indent); big_app(force); big_app1(pp+1);
2703   big_app(outdent); big_app(force);  big_app1(pp+2);
2704   reduce(pp,3,int_like,-2,49);
2705 }
2706 else if (cat1==rbrace) {
2707   big_app1(pp); app_str("\\,"); big_app1(pp+1);
2708 @.\\,@>
2709   reduce(pp,2,int_like,-2,50);
2710 }
2711
2712 @ @<Cases for |fn_decl|@>=
2713 if (cat1==decl) {
2714   big_app1(pp); big_app(force); big_app1(pp+1); reduce(pp,2,fn_decl,0,51);
2715 }
2716 else if (cat1==stmt) {
2717   big_app1(pp); app(outdent); app(outdent); big_app(force);
2718   big_app1(pp+1); reduce(pp,2,function,-1,52);
2719 }
2720
2721 @ @<Cases for |function|@>=
2722 if (cat1==function || cat1==decl || cat1==stmt) {
2723   big_app1(pp); big_app(big_force); big_app1(pp+1); reduce(pp,2,cat1,-1,53);
2724 }
2725
2726 @ @<Cases for |lbrace|@>=
2727 if (cat1==rbrace) {
2728   big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2729 @.\\,@>
2730   reduce(pp,2,stmt,-1,54);
2731 }
2732 else if ((cat1==stmt||cat1==decl||cat1==function) && cat2==rbrace) {
2733   big_app(force); big_app1(pp);  big_app(indent); big_app(force);
2734   big_app1(pp+1); big_app(force); big_app(backup);  big_app1(pp+2);
2735   big_app(outdent); big_app(force); reduce(pp,3,stmt,-1,55);
2736 }
2737 else if (cat1==exp) {
2738   if (cat2==rbrace) squash(pp,3,exp,-2,56);
2739   else if (cat2==comma && cat3==rbrace) squash(pp,4,exp,-2,56);
2740 }
2741
2742 @ @<Cases for |if_like|@>=
2743 if (cat1==exp) {
2744   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,if_clause,0,57);
2745 }
2746
2747 @ @<Cases for |else_like|@>=
2748 if (cat1==colon) squash(pp+1,1,base,1,58);
2749 else if (cat1==lbrace) squash(pp,1,else_head,0,59);
2750 else if (cat1==stmt) {
2751   big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2752   big_app1(pp+1); big_app(outdent); big_app(force);
2753   reduce(pp,2,stmt,-1,60);
2754 }
2755
2756 @ @<Cases for |else_head|@>=
2757 if (cat1==stmt || cat1==exp) {
2758   big_app(force); big_app1(pp); big_app(break_space); app(noop);
2759   big_app(cancel); big_app1(pp+1); big_app(force);
2760   reduce(pp,2,stmt,-1,61);
2761 }
2762
2763 @ @<Cases for |if_clause|@>=
2764 if (cat1==lbrace) squash(pp,1,if_head,0,62);
2765 else if (cat1==stmt) {
2766   if (cat2==else_like) {
2767     big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2768     big_app1(pp+1); big_app(outdent); big_app(force); big_app1(pp+2);
2769     if (cat3==if_like) {
2770       big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,63);
2771     }@+else reduce(pp,3,else_like,0,64);
2772   }
2773   else squash(pp,1,else_like,0,65);
2774 }
2775
2776 @ @<Cases for |if_head|@>=
2777 if (cat1==stmt || cat1==exp) {
2778   if (cat2==else_like) {
2779     big_app(force); big_app1(pp); big_app(break_space); app(noop);
2780     big_app(cancel); big_app1(pp+1); big_app(force); big_app1(pp+2);
2781     if (cat3==if_like) {
2782       big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,66);
2783     }@+else reduce(pp,3,else_like,0,67);
2784   }
2785   else squash(pp,1,else_head,0,68);
2786 }
2787
2788 @ @<Cases for |do_like|@>=
2789 if (cat1==stmt && cat2==else_like && cat3==semi) {
2790   big_app1(pp); big_app(break_space); app(noop); big_app(cancel);
2791   big_app1(pp+1); big_app(cancel); app(noop); big_app(break_space);
2792   big_app2(pp+2); reduce(pp,4,stmt,-1,69);
2793 }
2794
2795 @ @<Cases for |case_like|@>=
2796 if (cat1==semi) squash(pp,2,stmt,-1,70);
2797 else if (cat1==colon) squash(pp,2,tag,-1,71);
2798 else if (cat1==exp) {
2799   big_app1(pp); big_app(' ');  big_app1(pp+1);  reduce(pp,2,exp,-2,72);
2800 }
2801
2802 @ @<Cases for |catch_like|@>=
2803 if (cat1==cast || cat1==exp) {
2804   big_app2(pp); big_app(indent); big_app(indent); reduce(pp,2,fn_decl,0,73);
2805 }
2806
2807 @ @<Cases for |tag|@>=
2808 if (cat1==tag) {
2809   big_app1(pp); big_app(break_space); big_app1(pp+1); reduce(pp,2,tag,-1,74);
2810 }
2811 else if (cat1==stmt||cat1==decl||cat1==function) {
2812   big_app(force); big_app(backup); big_app1(pp); big_app(break_space);
2813   big_app1(pp+1); reduce(pp,2,cat1,-1,75);
2814 }
2815
2816 @ The user can decide at run-time whether short statements should be
2817 grouped together on the same line.
2818
2819 @d force_lines flags['f'] /* should each statement be on its own line? */
2820 @<Cases for |stmt|@>=
2821 if (cat1==stmt||cat1==decl||cat1==function) {
2822   big_app1(pp);
2823   if (cat1==function) big_app(big_force);
2824   else if (cat1==decl) big_app(big_force);
2825   else if (force_lines) big_app(force);
2826   else big_app(break_space);
2827   big_app1(pp+1); reduce(pp,2,cat1,-1,76);
2828 }
2829
2830 @ @<Cases for |semi|@>=
2831 big_app(' '); big_app1(pp); reduce(pp,1,stmt,-1,77);
2832
2833 @ @<Cases for |lproc|@>=
2834 if (cat1==define_like) make_underlined(pp+2);
2835 if (cat1==else_like || cat1==if_like ||cat1==define_like)
2836   squash(pp,2,lproc,0,78);
2837 else if (cat1==rproc) {
2838   app(inserted); big_app2(pp); reduce(pp,2,insert,-1,79);
2839 } else if (cat1==exp || cat1==function) {
2840   if (cat2==rproc) {
2841     app(inserted); big_app1(pp); big_app(' '); big_app2(pp+1);
2842     reduce(pp,3,insert,-1,80);
2843   }
2844   else if (cat2==exp && cat3==rproc && cat1==exp) {
2845     app(inserted); big_app1(pp); big_app(' '); big_app1(pp+1); app_str(" \\5");
2846 @.\\5@>
2847     big_app2(pp+2); reduce(pp,4,insert,-1,80);
2848   }
2849 }
2850
2851 @ @<Cases for |section_scrap|@>=
2852 if (cat1==semi) {
2853   big_app2(pp); big_app(force); reduce(pp,2,stmt,-2,81);
2854 }
2855 else squash(pp,1,exp,-2,82);
2856
2857 @ @<Cases for |insert|@>=
2858 if (cat1)
2859   squash(pp,2,cat1,0,83);
2860
2861 @ @<Cases for |prelangle|@>=
2862 init_mathness=cur_mathness=yes_math;
2863 app('<'); reduce(pp,1,binop,-2,84);
2864
2865 @ @<Cases for |prerangle|@>=
2866 init_mathness=cur_mathness=yes_math;
2867 app('>'); reduce(pp,1,binop,-2,85);
2868
2869 @ @<Cases for |langle|@>=
2870 if (cat1==prerangle) {
2871   big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2872 @.\\,@>
2873   reduce(pp,2,cast,-1,86);
2874 }
2875 else if (cat1==decl_head || cat1==int_like || cat1==exp) {
2876   if (cat2==prerangle) squash(pp,3,cast,-1,87);
2877   else if (cat2==comma) {
2878     big_app3(pp); app(opt); app('9'); reduce(pp,3,langle,0,88);
2879   }
2880 }
2881
2882 @ @<Cases for |template_like|@>=
2883 if (cat1==exp && cat2==prelangle) squash(pp+2,1,langle,2,89);
2884 else if (cat1==exp || cat1==raw_int) {
2885   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,90);
2886 }@+ else squash(pp,1,raw_int,0,91);
2887
2888 @ @<Cases for |new_like|@>=
2889 if (cat1==lpar && cat2==exp && cat3==rpar) squash(pp,4,new_like,0,92);
2890 else if (cat1==cast) {
2891   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,93);
2892 }
2893 else if (cat1!=lpar) squash(pp,1,new_exp,0,94);
2894
2895 @ @<Cases for |new_exp|@>=
2896 if (cat1==int_like || cat1==const_like) {
2897   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,new_exp,0,95);
2898 }
2899 else if (cat1==struct_like && (cat2==exp || cat2==int_like)) {
2900   big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' ');
2901   big_app1(pp+2); reduce(pp,3,new_exp,0,96);
2902 }
2903 else if (cat1==raw_ubin) {
2904   big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2905   reduce(pp,2,new_exp,0,97);
2906 }
2907 else if (cat1==lpar) squash(pp,1,exp,-2,98);
2908 else if (cat1==exp) {
2909   big_app1(pp); big_app(' '); reduce(pp,1,exp,-2,98);
2910 }
2911 else if (cat1!=raw_int && cat1!=struct_like && cat1!=colcol)
2912   squash(pp,1,exp,-2,99);
2913
2914 @ @<Cases for |ftemplate|@>=
2915 if (cat1==prelangle) squash(pp+1,1,langle,1,100);
2916 else squash(pp,1,exp,-2,101);
2917
2918 @ @<Cases for |for_like|@>=
2919 if (cat1==exp) {
2920   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,else_like,-2,102);
2921 }
2922
2923 @ @<Cases for |raw_ubin|@>=
2924 if (cat1==const_like) {
2925   big_app2(pp); app_str("\\ "); reduce(pp,2,raw_ubin,0,103);
2926 @.\\\ @>
2927 } else squash(pp,1,ubinop,-2,104);
2928
2929 @ @<Cases for |const_like|@>=
2930 squash(pp,1,int_like,-2,105);
2931
2932 @ @<Cases for |raw_int|@>=
2933 if (cat1==prelangle) squash(pp+1,1,langle,1,106);
2934 else if (cat1==colcol) squash(pp,2,colcol,-1,107);
2935 else if (cat1==cast) squash(pp,2,raw_int,0,108);
2936 else if (cat1==lpar) squash(pp,1,exp,-2,109);
2937 else if (cat1!=langle) squash(pp,1,int_like,-3,110);
2938
2939 @ @<Cases for |operator_like|@>=
2940 if (cat1==binop || cat1==unop || cat1==ubinop) {
2941   if (cat2==binop) break;
2942   big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2943   reduce(pp,2,exp,-2,111);
2944 }
2945 else if (cat1==new_like || cat1==delete_like) {
2946   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,112);
2947 }
2948 else if (cat1==comma) squash(pp,2,exp,-2,113);
2949 else if (cat1!=raw_ubin) squash(pp,1,new_exp,0,114);
2950
2951 @ @<Cases for |typedef_like|@>=
2952 if ((cat1==int_like || cat1==cast) && (cat2==comma || cat2==semi))
2953   squash(pp+1,1,exp,-1,115);
2954 else if (cat1==int_like) {
2955   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,116);
2956 }
2957 else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2958   make_underlined(pp+1); make_reserved(pp+1);
2959   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,117);
2960 }
2961 else if (cat1==comma) {
2962   big_app2(pp); big_app(' '); reduce(pp,2,typedef_like,0,118);
2963 }
2964 else if (cat1==semi) squash(pp,2,decl,-1,119);
2965 else if (cat1==ubinop && (cat2==ubinop || cat2==cast)) {
2966   big_app('{'); big_app1(pp+1); big_app('}'); big_app1(pp+2);
2967   reduce(pp+1,2,cat2,0,120);
2968 }
2969
2970 @ @<Cases for |delete_like|@>=
2971 if (cat1==lpar && cat2==rpar) {
2972   big_app2(pp); app('\\'); app(','); big_app1(pp+2);
2973 @.\\,@>
2974   reduce(pp,3,delete_like,0,121);
2975 }
2976 else if (cat1==exp) {
2977   big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,122);
2978 }
2979
2980 @ @<Cases for |question|@>=
2981 if (cat1==exp && (cat2==colon || cat2==base)) {
2982   (pp+2)->mathness=5*yes_math; /* this colon should be in math mode */
2983   squash(pp,3,binop,-2,123);
2984 }
2985
2986 @ Now here's the |reduce| procedure used in our code for productions.
2987
2988 The `|freeze_text|' macro is used to give official status to a token list.
2989 Before saying |freeze_text|, items are appended to the current token list,
2990 and we know that the eventual number of this token list will be the current
2991 value of |text_ptr|. But no list of that number really exists as yet,
2992 because no ending point for the current list has been
2993 stored in the |tok_start| array. After saying |freeze_text|, the
2994 old current token list becomes legitimate, and its number is the current
2995 value of |text_ptr-1| since |text_ptr| has been increased. The new
2996 current token list is empty and ready to be appended to.
2997 Note that |freeze_text| does not check to see that |text_ptr| hasn't gotten
2998 too large, since it is assumed that this test was done beforehand.
2999
3000 @d freeze_text *(++text_ptr)=tok_ptr
3001
3002 @c
3003 void
3004 reduce(j,k,c,d,n)
3005 scrap_pointer j;
3006 eight_bits c;
3007 short k, d, n;
3008 {
3009   scrap_pointer i, i1; /* pointers into scrap memory */
3010   j->cat=c; j->trans=text_ptr;
3011   j->mathness=4*cur_mathness+init_mathness;
3012   freeze_text;
3013   if (k>1) {
3014     for (i=j+k, i1=j+1; i<=lo_ptr; i++, i1++) {
3015       i1->cat=i->cat; i1->trans=i->trans;
3016       i1->mathness=i->mathness;
3017     }
3018     lo_ptr=lo_ptr-k+1;
3019   }
3020   pp=(pp+d<scrap_base? scrap_base: pp+d);
3021   @<Print a snapshot of the scrap list if debugging @>;
3022   pp--; /* we next say |pp++| */
3023 }
3024
3025 @ Here's the |squash| procedure, which
3026 takes advantage of the simplification that occurs when |k==1|.
3027
3028 @c
3029 void
3030 squash(j,k,c,d,n)
3031 scrap_pointer j;
3032 eight_bits c;
3033 short k, d, n;
3034 {
3035   scrap_pointer i; /* pointers into scrap memory */
3036   if (k==1) {
3037     j->cat=c; pp=(pp+d<scrap_base? scrap_base: pp+d);
3038     @<Print a snapshot...@>;
3039     pp--; /* we next say |pp++| */
3040     return;
3041   }
3042   for (i=j; i<j+k; i++) big_app1(i);
3043   reduce(j,k,c,d,n);
3044 }
3045
3046 @ And here now is the code that applies productions as long as possible.
3047 Before applying the production mechanism, we must make sure
3048 it has good input (at least four scraps, the length of the lhs of the
3049 longest rules), and that there is enough room in the memory arrays
3050 to hold the appended tokens and texts.  Here we use a very
3051 conservative test; it's more important to make sure the program
3052 will still work if we change the production rules (within reason)
3053 than to squeeze the last bit of space from the memory arrays.
3054
3055 @d safe_tok_incr 20
3056 @d safe_text_incr 10
3057 @d safe_scrap_incr 10
3058
3059 @<Reduce the scraps using the productions until no more rules apply@>=
3060 while (1) {
3061   @<Make sure the entries |pp| through |pp+3| of |cat| are defined@>;
3062   if (tok_ptr+safe_tok_incr>tok_mem_end) {
3063     if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3064     overflow("token");
3065   }
3066   if (text_ptr+safe_text_incr>tok_start_end) {
3067     if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3068     overflow("text");
3069   }
3070   if (pp>lo_ptr) break;
3071   init_mathness=cur_mathness=maybe_math;
3072   @<Match a production...@>;
3073 }
3074
3075 @ If we get to the end of the scrap list, category codes equal to zero are
3076 stored, since zero does not match anything in a production.
3077
3078 @<Make sure the entries...@>=
3079 if (lo_ptr<pp+3) {
3080   while (hi_ptr<=scrap_ptr && lo_ptr!=pp+3) {
3081     (++lo_ptr)->cat=hi_ptr->cat; lo_ptr->mathness=(hi_ptr)->mathness;
3082     lo_ptr->trans=(hi_ptr++)->trans;
3083   }
3084   for (i=lo_ptr+1;i<=pp+3;i++) i->cat=0;
3085 }
3086
3087 @ If \.{CWEAVE} is being run in debugging mode, the production numbers and
3088 current stack categories will be printed out when |tracing| is set to 2;
3089 a sequence of two or more irreducible scraps will be printed out when
3090 |tracing| is set to 1.
3091
3092 @<Global...@>=
3093 int tracing; /* can be used to show parsing details */
3094
3095 @ @<Print a snapsh...@>=
3096 { scrap_pointer k; /* pointer into |scrap_info| */
3097   if (tracing==2) {
3098     printf("\n%d:",n);
3099     for (k=scrap_base; k<=lo_ptr; k++) {
3100       if (k==pp) putxchar('*'); else putxchar(' ');
3101       if (k->mathness %4 ==  yes_math) putchar('+');
3102       else if (k->mathness %4 ==  no_math) putchar('-');
3103       print_cat(k->cat);
3104       if (k->mathness /4 ==  yes_math) putchar('+');
3105       else if (k->mathness /4 ==  no_math) putchar('-');
3106     }
3107     if (hi_ptr<=scrap_ptr) printf("..."); /* indicate that more is coming */
3108   }
3109 }
3110
3111 @ The |translate| function assumes that scraps have been stored in
3112 positions |scrap_base| through |scrap_ptr| of |cat| and |trans|. It
3113 applies productions as much as
3114 possible. The result is a token list containing the translation of
3115 the given sequence of scraps.
3116
3117 After calling |translate|, we will have |text_ptr+3<=max_texts| and
3118 |tok_ptr+6<=max_toks|, so it will be possible to create up to three token
3119 lists with up to six tokens without checking for overflow. Before calling
3120 |translate|, we should have |text_ptr<max_texts| and |scrap_ptr<max_scraps|,
3121 since |translate| might add a new text and a new scrap before it checks
3122 for overflow.
3123
3124 @c
3125 text_pointer
3126 translate() /* converts a sequence of scraps */
3127 {
3128   scrap_pointer i, /* index into |cat| */
3129   j; /* runs through final scraps */
3130   pp=scrap_base; lo_ptr=pp-1; hi_ptr=pp;
3131   @<If tracing, print an indication of where we are@>;
3132   @<Reduce the scraps...@>;
3133   @<Combine the irreducible scraps that remain@>;
3134 }
3135
3136 @ If the initial sequence of scraps does not reduce to a single scrap,
3137 we concatenate the translations of all remaining scraps, separated by
3138 blank spaces, with dollar signs surrounding the translations of scraps
3139 where appropriate.
3140
3141 @<Combine the irreducible...@>= {
3142   @<If semi-tracing, show the irreducible scraps@>;
3143   for (j=scrap_base; j<=lo_ptr; j++) {
3144     if (j!=scrap_base) app(' ');
3145     if (j->mathness % 4 == yes_math) app('$');
3146     app1(j);
3147     if (j->mathness / 4 == yes_math) app('$');
3148     if (tok_ptr+6>tok_mem_end) overflow("token");
3149   }
3150   freeze_text; return(text_ptr-1);
3151 }
3152
3153 @ @<If semi-tracing, show the irreducible scraps@>=
3154 if (lo_ptr>scrap_base && tracing==1) {
3155   printf("\nIrreducible scrap sequence in section %d:",section_count);
3156 @.Irreducible scrap sequence...@>
3157   mark_harmless;
3158   for (j=scrap_base; j<=lo_ptr; j++) {
3159     printf(" "); print_cat(j->cat);
3160   }
3161 }
3162
3163 @ @<If tracing,...@>=
3164 if (tracing==2) {
3165   printf("\nTracing after l. %d:\n",cur_line); mark_harmless;
3166 @.Tracing after...@>
3167   if (loc>buffer+50) {
3168     printf("...");
3169     term_write(loc-51,51);
3170   }
3171   else term_write(buffer,loc-buffer);
3172 }
3173
3174 @* Initializing the scraps.
3175 If we are going to use the powerful production mechanism just developed, we
3176 must get the scraps set up in the first place, given a \CEE/ text. A table
3177 of the initial scraps corresponding to \CEE/ tokens appeared above in the
3178 section on parsing; our goal now is to implement that table. We shall do this
3179 by implementing a subroutine called |C_parse| that is analogous to the
3180 |C_xref| routine used during phase one.
3181
3182 Like |C_xref|, the |C_parse| procedure starts with the current
3183 value of |next_control| and it uses the operation |next_control=get_next()|
3184 repeatedly to read \CEE/ text until encountering the next `\.{\v}' or
3185 `\.{/*}', or until |next_control>=format_code|. The scraps corresponding to
3186 what it reads are appended into the |cat| and |trans| arrays, and |scrap_ptr|
3187 is advanced.
3188
3189 @c
3190 void
3191 C_parse(spec_ctrl) /* creates scraps from \CEE/ tokens */
3192   eight_bits spec_ctrl;
3193 {
3194   int count; /* characters remaining before string break */
3195   while (next_control<format_code || next_control==spec_ctrl) {
3196     @<Append the scrap appropriate to |next_control|@>;
3197     next_control=get_next();
3198     if (next_control=='|' || next_control==begin_comment ||
3199         next_control==begin_short_comment) return;
3200   }
3201 }
3202
3203 @ The following macro is used to append a scrap whose tokens have just
3204 been appended:
3205
3206 @d app_scrap(c,b) {
3207   (++scrap_ptr)->cat=(c); scrap_ptr->trans=text_ptr;
3208   scrap_ptr->mathness=5*(b); /* no no, yes yes, or maybe maybe */
3209   freeze_text;
3210 }
3211
3212 @ @<Append the scr...@>=
3213 @<Make sure that there is room for the new scraps, tokens, and texts@>;
3214 switch (next_control) {
3215   case section_name:
3216     app(section_flag+(int)(cur_section-name_dir));
3217     app_scrap(section_scrap,maybe_math);
3218     app_scrap(exp,yes_math);@+break;
3219   case string: case constant: case verbatim: @<Append a string or constant@>;
3220    @+break;
3221   case identifier: app_cur_id(1);@+break;
3222   case TeX_string: @<Append a \TEX/ string, without forming a scrap@>;@+break;
3223   case '/': case '.':
3224     app(next_control); app_scrap(binop,yes_math);@+break;
3225   case '<': app_str("\\langle");@+app_scrap(prelangle,yes_math);@+break;
3226 @.\\langle@>
3227   case '>': app_str("\\rangle");@+app_scrap(prerangle,yes_math);@+break;
3228 @.\\rangle@>
3229   case '=': app_str("\\K"); app_scrap(binop,yes_math);@+break;
3230 @.\\K@>
3231   case '|': app_str("\\OR"); app_scrap(binop,yes_math);@+break;
3232 @.\\OR@>
3233   case '^': app_str("\\XOR"); app_scrap(binop,yes_math);@+break;
3234 @.\\XOR@>
3235   case '%': app_str("\\MOD"); app_scrap(binop,yes_math);@+break;
3236 @.\\MOD@>
3237   case '!': app_str("\\R"); app_scrap(unop,yes_math);@+break;
3238 @.\\R@>
3239   case '~': app_str("\\CM"); app_scrap(unop,yes_math);@+break;
3240 @.\\CM@>
3241   case '+': case '-': app(next_control); app_scrap(ubinop,yes_math);@+break;
3242   case '*': app(next_control); app_scrap(raw_ubin,yes_math);@+break;
3243   case '&': app_str("\\AND"); app_scrap(raw_ubin,yes_math);@+break;
3244 @.\\AND@>
3245   case '?': app_str("\\?"); app_scrap(question,yes_math);@+break;
3246 @.\\?@>
3247   case '#': app_str("\\#"); app_scrap(ubinop,yes_math);@+break;
3248 @.\\\#@>
3249   case ignore: case xref_roman: case xref_wildcard:
3250   case xref_typewriter: case noop:@+break;
3251   case '(': case '[': app(next_control); app_scrap(lpar,maybe_math);@+break;
3252   case ')': case ']': app(next_control); app_scrap(rpar,maybe_math);@+break;
3253   case '{': app_str("\\{"@q}@>); app_scrap(lbrace,yes_math);@+break;
3254 @.\\\{@>@q}@>
3255   case '}': app_str(@q{@>"\\}"); app_scrap(rbrace,yes_math);@+break;
3256 @q{@>@.\\\}@>
3257   case ',': app(','); app_scrap(comma,yes_math);@+break;
3258   case ';': app(';'); app_scrap(semi,maybe_math);@+break;
3259   case ':': app(':'); app_scrap(colon,no_math);@+break;@/
3260   @t\4@>  @<Cases involving nonstandard characters@>@;
3261   case thin_space: app_str("\\,"); app_scrap(insert,maybe_math);@+break;
3262 @.\\,@>
3263   case math_break: app(opt); app_str("0");
3264     app_scrap(insert,maybe_math);@+break;
3265   case line_break: app(force); app_scrap(insert,no_math);@+break;
3266   case left_preproc: app(force); app(preproc_line);
3267     app_str("\\#"); app_scrap(lproc,no_math);@+break;
3268 @.\\\#@>
3269   case right_preproc: app(force); app_scrap(rproc,no_math);@+break;
3270   case big_line_break: app(big_force); app_scrap(insert,no_math);@+break;
3271   case no_line_break: app(big_cancel); app(noop); app(break_space);
3272     app(noop); app(big_cancel);
3273     app_scrap(insert,no_math);@+break;
3274   case pseudo_semi: app_scrap(semi,maybe_math);@+break;
3275   case macro_arg_open: app_scrap(begin_arg,maybe_math);@+break;
3276   case macro_arg_close: app_scrap(end_arg,maybe_math);@+break;
3277   case join: app_str("\\J"); app_scrap(insert,no_math);@+break;
3278 @.\\J@>
3279   case output_defs_code: app(force); app_str("\\ATH"); app(force);
3280     app_scrap(insert,no_math);@+break;
3281 @.\\ATH@>
3282   default: app(inserted); app(next_control);
3283     app_scrap(insert,maybe_math);@+break;
3284 }
3285
3286 @ @<Make sure that there is room for the new...@>=
3287 if (scrap_ptr+safe_scrap_incr>scrap_info_end ||
3288   tok_ptr+safe_tok_incr>tok_mem_end @| ||
3289   text_ptr+safe_text_incr>tok_start_end) {
3290   if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3291   if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3292   if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3293   overflow("scrap/token/text");
3294 }
3295
3296 @ Some nonstandard characters may have entered \.{CWEAVE} by means of
3297 standard ones. They are converted to \TEX/ control sequences so that it is
3298 possible to keep \.{CWEAVE} from outputting unusual |char| codes.
3299
3300 @<Cases involving nonstandard...@>=
3301 case not_eq: app_str("\\I");@+app_scrap(binop,yes_math);@+break;
3302 @.\\I@>
3303 case lt_eq: app_str("\\Z");@+app_scrap(binop,yes_math);@+break;
3304 @.\\Z@>
3305 case gt_eq: app_str("\\G");@+app_scrap(binop,yes_math);@+break;
3306 @.\\G@>
3307 case eq_eq: app_str("\\E");@+app_scrap(binop,yes_math);@+break;
3308 @.\\E@>
3309 case and_and: app_str("\\W");@+app_scrap(binop,yes_math);@+break;
3310 @.\\W@>
3311 case or_or: app_str("\\V");@+app_scrap(binop,yes_math);@+break;
3312 @.\\V@>
3313 case plus_plus: app_str("\\PP");@+app_scrap(unop,yes_math);@+break;
3314 @.\\PP@>
3315 case minus_minus: app_str("\\MM");@+app_scrap(unop,yes_math);@+break;
3316 @.\\MM@>
3317 case minus_gt: app_str("\\MG");@+app_scrap(binop,yes_math);@+break;
3318 @.\\MG@>
3319 case gt_gt: app_str("\\GG");@+app_scrap(binop,yes_math);@+break;
3320 @.\\GG@>
3321 case lt_lt: app_str("\\LL");@+app_scrap(binop,yes_math);@+break;
3322 @.\\LL@>
3323 case dot_dot_dot: app_str("\\,\\ldots\\,");@+app_scrap(raw_int,yes_math);
3324   @+break;
3325 @.\\,@>
3326 @.\\ldots@>
3327 case colon_colon: app_str("\\DC");@+app_scrap(colcol,maybe_math);@+break;
3328 @.\\DC@>
3329 case period_ast: app_str("\\PA");@+app_scrap(binop,yes_math);@+break;
3330 @.\\PA@>
3331 case minus_gt_ast: app_str("\\MGA");@+app_scrap(binop,yes_math);@+break;
3332 @.\\MGA@>
3333
3334 @ The following code must use |app_tok| instead of |app| in order to
3335 protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok|
3336 has been used, so another |app| is legitimate before testing again.
3337
3338 Many of the special characters in a string must be prefixed by `\.\\' so that
3339 \TEX/ will print them properly.
3340 @^special string characters@>
3341
3342 @<Append a string or...@>=
3343 count= -1;
3344 if (next_control==constant) app_str("\\T{"@q}@>);
3345 @.\\T@>
3346 else if (next_control==string) {
3347   count=20; app_str("\\.{"@q}@>);
3348 }
3349 @.\\.@>
3350 else app_str("\\vb{"@q}@>);
3351 @.\\vb@>
3352 while (id_first<id_loc) {
3353   if (count==0) { /* insert a discretionary break in a long string */
3354      app_str(@q(@>@q{@>"}\\)\\.{"@q}@>); count=20;
3355 @q(@>@.\\)@>
3356   }
3357 @^high-bit character handling@>
3358   if((eight_bits)(*id_first)>0177) {
3359     app_tok(quoted_char);
3360     app_tok((eight_bits)(*id_first++));
3361   }
3362   else {
3363     switch (*id_first) {
3364       case  ' ':case '\\':case '#':case '%':case '$':case '^':
3365       case '{': case '}': case '~': case '&': case '_': app('\\'); break;
3366 @.\\\ @>
3367 @.\\\\@>
3368 @.\\\#@>
3369 @.\\\%@>
3370 @.\\\$@>
3371 @.\\\^@>
3372 @.\\\{@>@q}@>
3373 @q{@>@.\\\}@>
3374 @.\\\~@>
3375 @.\\\&@>
3376 @.\\\_@>
3377       case '@@': if (*(id_first+1)=='@@') id_first++;
3378         else err_print("! Double @@ should be used in strings");
3379 @.Double @@ should be used...@>
3380     }
3381     app_tok(*id_first++);
3382   }
3383   count--;
3384 }
3385 app(@q{@>'}');
3386 app_scrap(exp,maybe_math);
3387
3388 @ We do not make the \TEX/ string into a scrap, because there is no
3389 telling what the user will be putting into it; instead we leave it
3390 open, to be picked up by the next scrap. If it comes at the end of a
3391 section, it will be made into a scrap when |finish_C| is called.
3392
3393 There's a known bug here, in cases where an adjacent scrap is
3394 |prelangle| or |prerangle|. Then the \TEX/ string can disappear
3395 when the \.{\\langle} or \.{\\rangle} becomes \.{<} or \.{>}.
3396 For example, if the user writes \.{\v x<@@ty@@>\v}, the \TEX/ string
3397 \.{\\hbox\{y\}} eventually becomes part of an |insert| scrap, which is combined
3398 with a |prelangle| scrap and eventually lost. The best way to work around
3399 this bug is probably to enclose the \.{@@t...@@>} in \.{@@[...@@]} so that
3400 the \TEX/ string is treated as an expression.
3401 @^bug, known@>
3402
3403 @<Append a \TEX/ string, without forming a scrap@>=
3404 app_str("\\hbox{"@q}@>);
3405 @^high-bit character handling@>
3406 while (id_first<id_loc)
3407   if((eight_bits)(*id_first)>0177) {
3408     app_tok(quoted_char);
3409     app_tok((eight_bits)(*id_first++));
3410   }
3411   else {
3412     if (*id_first=='@@') id_first++;
3413     app_tok(*id_first++);
3414   }
3415 app(@q{@>'}');
3416
3417 @ The function |app_cur_id| appends the current identifier to the
3418 token list; it also builds a new scrap if |scrapping==1|.
3419
3420 @<Predec...@>=
3421 void app_cur_id();
3422
3423 @ @c
3424 void
3425 app_cur_id(scrapping)
3426 boolean scrapping; /* are we making this into a scrap? */
3427 {
3428   name_pointer p=id_lookup(id_first,id_loc,normal);
3429   if (p->ilk<=custom) { /* not a reserved word */
3430     app(id_flag+(int)(p-name_dir));
3431     if (scrapping) app_scrap(p->ilk==func_template? ftemplate: exp,
3432                              p->ilk==custom? yes_math: maybe_math);
3433 @.\\NULL@>
3434   } else {
3435     app(res_flag+(int)(p-name_dir));
3436     if (scrapping) {
3437       if (p->ilk==alfop) app_scrap(ubinop,yes_math)@;
3438       else app_scrap(p->ilk,maybe_math);
3439     }
3440   }
3441 }
3442
3443 @ When the `\.{\v}' that introduces \CEE/ text is sensed, a call on
3444 |C_translate| will return a pointer to the \TEX/ translation of
3445 that text. If scraps exist in |scrap_info|, they are
3446 unaffected by this translation process.
3447
3448 @c
3449 text_pointer
3450 C_translate()
3451 {
3452   text_pointer p; /* points to the translation */
3453   scrap_pointer save_base; /* holds original value of |scrap_base| */
3454   save_base=scrap_base; scrap_base=scrap_ptr+1;
3455   C_parse(section_name); /* get the scraps together */
3456   if (next_control!='|') err_print("! Missing '|' after C text");
3457 @.Missing '|'...@>
3458   app_tok(cancel); app_scrap(insert,maybe_math);
3459         /* place a |cancel| token as a final ``comment'' */
3460   p=translate(); /* make the translation */
3461   if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3462   scrap_ptr=scrap_base-1; scrap_base=save_base; /* scrap the scraps */
3463   return(p);
3464 }
3465
3466 @ The |outer_parse| routine is to |C_parse| as |outer_xref|
3467 is to |C_xref|: It constructs a sequence of scraps for \CEE/ text
3468 until |next_control>=format_code|. Thus, it takes care of embedded comments.
3469
3470 The token list created from within `\pb' brackets is output as an argument
3471 to \.{\\PB}, if the user has invoked \.{CWEAVE} with the \.{+e} flag.
3472 Although \.{cwebmac} ignores \.{\\PB}, other macro packages
3473 might use it to localize the special meaning of the macros that mark up
3474 program text.
3475
3476 @d make_pb flags['e']
3477
3478 @c
3479 void
3480 outer_parse() /* makes scraps from \CEE/ tokens and comments */
3481 {
3482   int bal; /* brace level in comment */
3483   text_pointer p, q; /* partial comments */
3484   while (next_control<format_code)
3485     if (next_control!=begin_comment && next_control!=begin_short_comment)
3486       C_parse(ignore);
3487     else {
3488       boolean is_long_comment=(next_control==begin_comment);
3489       @<Make sure that there is room for the new...@>;
3490       app(cancel); app(inserted);
3491       if (is_long_comment) app_str("\\C{"@q}@>);
3492 @.\\C@>
3493       else app_str("\\SHC{"@q}@>);
3494 @.\\SHC@>
3495       bal=copy_comment(is_long_comment,1); next_control=ignore;
3496       while (bal>0) {
3497         p=text_ptr; freeze_text; q=C_translate();
3498          /* at this point we have |tok_ptr+6<=max_toks| */
3499         app(tok_flag+(int)(p-tok_start));
3500         if (make_pb) app_str("\\PB{");
3501 @.\\PB@>
3502         app(inner_tok_flag+(int)(q-tok_start));
3503         if (make_pb)  app_tok('}');
3504         if (next_control=='|') {
3505           bal=copy_comment(is_long_comment,bal);
3506           next_control=ignore;
3507         }
3508         else bal=0; /* an error has been reported */
3509       }
3510       app(force); app_scrap(insert,no_math);
3511         /* the full comment becomes a scrap */
3512     }
3513 }
3514
3515 @* Output of tokens.
3516 So far our programs have only built up multi-layered token lists in
3517 \.{CWEAVE}'s internal memory; we have to figure out how to get them into
3518 the desired final form. The job of converting token lists to characters in
3519 the \TEX/ output file is not difficult, although it is an implicitly
3520 recursive process. Four main considerations had to be kept in mind when
3521 this part of \.{CWEAVE} was designed.  (a) There are two modes of output:
3522 |outer| mode, which translates tokens like |force| into line-breaking
3523 control sequences, and |inner| mode, which ignores them except that blank
3524 spaces take the place of line breaks. (b) The |cancel| instruction applies
3525 to adjacent token or tokens that are output, and this cuts across levels
3526 of recursion since `|cancel|' occurs at the beginning or end of a token
3527 list on one level. (c) The \TEX/ output file will be semi-readable if line
3528 breaks are inserted after the result of tokens like |break_space| and
3529 |force|.  (d) The final line break should be suppressed, and there should
3530 be no |force| token output immediately after `\.{\\Y\\B}'.
3531
3532 @ The output process uses a stack to keep track of what is going on at
3533 different ``levels'' as the token lists are being written out. Entries on
3534 this stack have three parts:
3535
3536 \yskip\hang |end_field| is the |tok_mem| location where the token list of a
3537 particular level will end;
3538
3539 \yskip\hang |tok_field| is the |tok_mem| location from which the next token
3540 on a particular level will be read;
3541
3542 \yskip\hang |mode_field| is the current mode, either |inner| or |outer|.
3543
3544 \yskip\noindent The current values of these quantities are referred to
3545 quite frequently, so they are stored in a separate place instead of in the
3546 |stack| array. We call the current values |cur_end|, |cur_tok|, and
3547 |cur_mode|.
3548
3549 The global variable |stack_ptr| tells how many levels of output are
3550 currently in progress. The end of output occurs when an |end_translation|
3551 token is found, so the stack is never empty except when we first begin the
3552 output process.
3553
3554 @d inner 0 /* value of |mode| for \CEE/ texts within \TEX/ texts */
3555 @d outer 1 /* value of |mode| for \CEE/ texts in sections */
3556
3557 @<Typed...@>= typedef int mode;
3558 typedef struct {
3559   token_pointer end_field; /* ending location of token list */
3560   token_pointer tok_field; /* present location within token list */
3561   boolean mode_field; /* interpretation of control tokens */
3562 } output_state;
3563 typedef output_state *stack_pointer;
3564
3565 @ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
3566 @d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */
3567 @d cur_mode cur_state.mode_field /* current mode of interpretation */
3568 @d init_stack stack_ptr=stack;cur_mode=outer /* initialize the stack */
3569
3570 @<Global...@>=
3571 output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */
3572 output_state stack[stack_size]; /* info for non-current levels */
3573 stack_pointer stack_ptr; /* first unused location in the output state stack */
3574 stack_pointer stack_end=stack+stack_size-1; /* end of |stack| */
3575 stack_pointer max_stack_ptr; /* largest value assumed by |stack_ptr| */
3576
3577 @ @<Set init...@>=
3578 max_stack_ptr=stack;
3579
3580 @ To insert token-list |p| into the output, the |push_level| subroutine
3581 is called; it saves the old level of output and gets a new one going.
3582 The value of |cur_mode| is not changed.
3583
3584 @c
3585 void
3586 push_level(p) /* suspends the current level */
3587 text_pointer p;
3588 {
3589   if (stack_ptr==stack_end) overflow("stack");
3590   if (stack_ptr>stack) { /* save current state */
3591     stack_ptr->end_field=cur_end;
3592     stack_ptr->tok_field=cur_tok;
3593     stack_ptr->mode_field=cur_mode;
3594   }
3595   stack_ptr++;
3596   if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr;
3597   cur_tok=*p; cur_end=*(p+1);
3598 }
3599
3600 @ Conversely, the |pop_level| routine restores the conditions that were in
3601 force when the current level was begun. This subroutine will never be
3602 called when |stack_ptr==1|.
3603
3604 @c
3605 void
3606 pop_level()
3607 {
3608   cur_end=(--stack_ptr)->end_field;
3609   cur_tok=stack_ptr->tok_field; cur_mode=stack_ptr->mode_field;
3610 }
3611
3612 @ The |get_output| function returns the next byte of output that is not a
3613 reference to a token list. It returns the values |identifier| or |res_word|
3614 or |section_code| if the next token is to be an identifier (typeset in
3615 italics), a reserved word (typeset in boldface), or a section name (typeset
3616 by a complex routine that might generate additional levels of output).
3617 In these cases |cur_name| points to the identifier or section name in
3618 question.
3619
3620 @<Global...@>=
3621 name_pointer cur_name;
3622
3623 @ @d res_word 0201 /* returned by |get_output| for reserved words */
3624 @d section_code 0200 /* returned by |get_output| for section names */
3625
3626 @c
3627 eight_bits
3628 get_output() /* returns the next token of output */
3629 {
3630   sixteen_bits a; /* current item read from |tok_mem| */
3631   restart: while (cur_tok==cur_end) pop_level();
3632   a=*(cur_tok++);
3633   if (a>=0400) {
3634     cur_name=a % id_flag + name_dir;
3635     switch (a / id_flag) {
3636       case 2: return(res_word); /* |a==res_flag+cur_name| */
3637       case 3: return(section_code); /* |a==section_flag+cur_name| */
3638       case 4: push_level(a % id_flag + tok_start); goto restart;
3639         /* |a==tok_flag+cur_name| */
3640       case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart;
3641         /* |a==inner_tok_flag+cur_name| */
3642       default: return(identifier); /* |a==id_flag+cur_name| */
3643     }
3644   }
3645   return(a);
3646 }
3647
3648 @ The real work associated with token output is done by |make_output|.
3649 This procedure appends an |end_translation| token to the current token list,
3650 and then it repeatedly calls |get_output| and feeds characters to the output
3651 buffer until reaching the |end_translation| sentinel. It is possible for
3652 |make_output| to be called recursively, since a section name may include
3653 embedded \CEE/ text; however, the depth of recursion never exceeds one
3654 level, since section names cannot be inside of section names.
3655
3656 A procedure called |output_C| does the scanning, translation, and
3657 output of \CEE/ text within `\pb' brackets, and this procedure uses
3658 |make_output| to output the current token list. Thus, the recursive call
3659 of |make_output| actually occurs when |make_output| calls |output_C|
3660 while outputting the name of a section.
3661 @^recursion@>
3662
3663 @c
3664 void
3665 output_C() /* outputs the current token list */
3666 {
3667   token_pointer save_tok_ptr;
3668   text_pointer save_text_ptr;
3669   sixteen_bits save_next_control; /* values to be restored */
3670   text_pointer p; /* translation of the \CEE/ text */
3671   save_tok_ptr=tok_ptr; save_text_ptr=text_ptr;
3672   save_next_control=next_control; next_control=ignore; p=C_translate();
3673   app(inner_tok_flag+(int)(p-tok_start));
3674   if (make_pb) {
3675     out_str("\\PB{"); make_output(); out('}');
3676 @.\\PB@>
3677   }@+else make_output(); /* output the list */
3678   if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3679   if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3680   text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* forget the tokens */
3681   next_control=save_next_control; /* restore |next_control| to original state */
3682 }
3683
3684 @ Here is \.{CWEAVE}'s major output handler.
3685
3686 @<Predecl...@>=
3687 void make_output();
3688
3689 @ @c
3690 void
3691 make_output() /* outputs the equivalents of tokens */
3692 {
3693   eight_bits a, /* current output byte */
3694   b; /* next output byte */
3695   int c; /* count of |indent| and |outdent| tokens */
3696   char scratch[longest_name]; /* scratch area for section names */
3697   char *k, *k_limit; /* indices into |scratch| */
3698   char *j; /* index into |buffer| */
3699   char *p; /* index into |byte_mem| */
3700   char delim; /* first and last character of string being copied */
3701   char *save_loc, *save_limit; /* |loc| and |limit| to be restored */
3702   name_pointer cur_section_name; /* name of section being output */
3703   boolean save_mode; /* value of |cur_mode| before a sequence of breaks */
3704   app(end_translation); /* append a sentinel */
3705   freeze_text; push_level(text_ptr-1);
3706   while (1) {
3707     a=get_output();
3708     reswitch: switch(a) {
3709       case end_translation: return;
3710       case identifier: case res_word: @<Output an identifier@>; break;
3711       case section_code: @<Output a section name@>; break;
3712       case math_rel: out_str("\\MRL{"@q}@>);
3713 @.\\MRL@>
3714       case noop: case inserted: break;
3715       case cancel: case big_cancel: c=0; b=a;
3716         while (1) {
3717           a=get_output();
3718           if (a==inserted) continue;
3719           if ((a<indent && !(b==big_cancel&&a==' ')) || a>big_force) break;
3720           if (a==indent) c++; else if (a==outdent) c--;
3721           else if (a==opt) a=get_output();
3722         }
3723         @<Output saved |indent| or |outdent| tokens@>;
3724         goto reswitch;
3725       case indent: case outdent: case opt: case backup: case break_space:
3726       case force: case big_force: case preproc_line: @<Output a control,
3727         look ahead in case of line breaks, possibly |goto reswitch|@>; break;
3728       case quoted_char: out(*(cur_tok++));
3729       case qualifier: break;
3730       default: out(a); /* otherwise |a| is an ordinary character */
3731     }
3732   }
3733 }
3734
3735 @ An identifier of length one does not have to be enclosed in braces, and it
3736 looks slightly better if set in a math-italic font instead of a (slightly
3737 narrower) text-italic font. Thus we output `\.{\\\v}\.{a}' but
3738 `\.{\\\\\{aa\}}'.
3739
3740 @<Output an identifier@>=
3741 out('\\');
3742 if (a==identifier) {
3743   if (cur_name->ilk==custom && !doing_format) {
3744  custom_out:
3745     for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3746       out(*p=='_'? 'x': *p=='$'? 'X': *p);
3747     break;
3748   } else if (is_tiny(cur_name)) out('|')@;
3749 @.\\|@>
3750   else { delim='.';
3751     for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3752       if (xislower(*p)) { /* not entirely uppercase */
3753          delim='\\'; break;
3754       }
3755   out(delim);
3756   }
3757 @.\\\\@>
3758 @.\\.@>
3759 }@+else if (cur_name->ilk==alfop) {
3760   out('X');
3761   goto custom_out;
3762 }@+else out('&'); /* |a==res_word| */
3763 @.\\\&@>
3764 if (is_tiny(cur_name)) {
3765   if (isxalpha((cur_name->byte_start)[0]))
3766     out('\\');
3767   out((cur_name->byte_start)[0]);
3768 }
3769 else out_name(cur_name,1);
3770
3771 @ The current mode does not affect the behavior of \.{CWEAVE}'s output routine
3772 except when we are outputting control tokens.
3773
3774 @<Output a control...@>=
3775 if (a<break_space || a==preproc_line) {
3776   if (cur_mode==outer) {
3777     out('\\'); out(a-cancel+'0');
3778 @.\\1@>
3779 @.\\2@>
3780 @.\\3@>
3781 @.\\4@>
3782 @.\\8@>
3783     if (a==opt) {
3784       b=get_output(); /* |opt| is followed by a digit */
3785       if (b!='0' || force_lines==0) out(b)@;
3786       else out_str("{-1}"); /* |force_lines| encourages more \.{@@\v} breaks */
3787     }
3788   } else if (a==opt) b=get_output(); /* ignore digit following |opt| */
3789   }
3790 else @<Look ahead for strongest line break, |goto reswitch|@>
3791
3792 @ If several of the tokens |break_space|, |force|, |big_force| occur in a
3793 row, possibly mixed with blank spaces (which are ignored),
3794 the largest one is used. A line break also occurs in the output file,
3795 except at the very end of the translation. The very first line break
3796 is suppressed (i.e., a line break that follows `\.{\\Y\\B}').
3797
3798 @<Look ahead for st...@>= {
3799   b=a; save_mode=cur_mode; c=0;
3800   while (1) {
3801     a=get_output();
3802     if (a==inserted) continue;
3803     if (a==cancel || a==big_cancel) {
3804       @<Output saved |indent| or |outdent| tokens@>;
3805       goto reswitch; /* |cancel| overrides everything */
3806     }
3807     if ((a!=' ' && a<indent) || a==backup || a>big_force) {
3808       if (save_mode==outer) {
3809         if (out_ptr>out_buf+3 && strncmp(out_ptr-3,"\\Y\\B",4)==0)
3810           goto reswitch;
3811         @<Output saved |indent| or |outdent| tokens@>;
3812         out('\\'); out(b-cancel+'0');
3813 @.\\5@>
3814 @.\\6@>
3815 @.\\7@>
3816         if (a!=end_translation) finish_line();
3817       }
3818       else if (a!=end_translation && cur_mode==inner) out(' ');
3819       goto reswitch;
3820     }
3821     if (a==indent) c++;
3822     else if (a==outdent) c--;
3823     else if (a==opt) a=get_output();
3824     else if (a>b) b=a; /* if |a==' '| we have |a<b| */
3825   }
3826 }
3827
3828 @ @<Output saved...@>=
3829   for (;c>0;c--) out_str("\\1");
3830 @.\\1@>
3831   for (;c<0;c++) out_str("\\2");
3832 @.\\2@>
3833
3834 @ The remaining part of |make_output| is somewhat more complicated. When we
3835 output a section name, we may need to enter the parsing and translation
3836 routines, since the name may contain \CEE/ code embedded in
3837 \pb\ constructions. This \CEE/ code is placed at the end of the active
3838 input buffer and the translation process uses the end of the active
3839 |tok_mem| area.
3840
3841 @<Output a section name@>= {
3842   out_str("\\X");
3843 @.\\X@>
3844   cur_xref=(xref_pointer)cur_name->xref;
3845   if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
3846   else an_output=0;
3847   if (cur_xref->num>=def_flag) {
3848     out_section(cur_xref->num-def_flag);
3849     if (phase==3) {
3850       cur_xref=cur_xref->xlink;
3851       while (cur_xref->num>=def_flag) {
3852         out_str(", ");
3853         out_section(cur_xref->num-def_flag);
3854       cur_xref=cur_xref->xlink;
3855       }
3856     }
3857   }
3858   else out('0'); /* output the section number, or zero if it was undefined */
3859   out(':');
3860   if (an_output) out_str("\\.{"@q}@>);
3861 @.\\.@>
3862   @<Output the text of the section name@>;
3863   if (an_output) out_str(@q{@>" }");
3864   out_str("\\X");
3865 }
3866
3867 @ @<Output the text...@>=
3868 sprint_section_name(scratch,cur_name);
3869 k=scratch;
3870 k_limit=scratch+strlen(scratch);
3871 cur_section_name=cur_name;
3872 while (k<k_limit) {
3873   b=*(k++);
3874   if (b=='@@') @<Skip next character, give error if not `\.{@@}'@>;
3875   if (an_output)
3876     switch (b) {
3877  case  ' ':case '\\':case '#':case '%':case '$':case '^':
3878  case '{': case '}': case '~': case '&': case '_':
3879     out('\\'); /* falls through */
3880 @.\\\ @>
3881 @.\\\\@>
3882 @.\\\#@>
3883 @.\\\%@>
3884 @.\\\$@>
3885 @.\\\^@>
3886 @.\\\{@>@q}@>
3887 @q{@>@.\\\}@>
3888 @.\\\~@>
3889 @.\\\&@>
3890 @.\\\_@>
3891  default: out(b);
3892     }
3893   else if (b!='|') out(b)
3894   else {
3895     @<Copy the \CEE/ text into the |buffer| array@>;
3896     save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1;
3897     *limit='|'; output_C();
3898     loc=save_loc; limit=save_limit;
3899   }
3900 }
3901
3902 @ @<Skip next char...@>=
3903 if (*k++!='@@') {
3904   printf("\n! Illegal control code in section name: <");
3905 @.Illegal control code...@>
3906   print_section_name(cur_section_name); printf("> "); mark_error;
3907 }
3908
3909 @ The \CEE/ text enclosed in \pb\ should not contain `\.{\v}' characters,
3910 except within strings. We put a `\.{\v}' at the front of the buffer, so that an
3911 error message that displays the whole buffer will look a little bit sensible.
3912 The variable |delim| is zero outside of strings, otherwise it
3913 equals the delimiter that began the string being copied.
3914
3915 @<Copy the \CEE/ text into...@>=
3916 j=limit+1; *j='|'; delim=0;
3917 while (1) {
3918   if (k>=k_limit) {
3919     printf("\n! C text in section name didn't end: <");
3920 @.C text...didn't end@>
3921     print_section_name(cur_section_name); printf("> "); mark_error; break;
3922   }
3923   b=*(k++);
3924   if (b=='@@' || (b=='\\' && delim!=0))
3925      @<Copy a quoted character into the buffer@>
3926   else {
3927     if (b=='\'' || b=='"')
3928       if (delim==0) delim=b;
3929       else if (delim==b) delim=0;
3930     if (b!='|' || delim!=0) {
3931       if (j>buffer+long_buf_size-3) overflow("buffer");
3932       *(++j)=b;
3933     }
3934     else break;
3935   }
3936 }
3937
3938 @ @<Copy a quoted char...@>= {
3939   if (j>buffer+long_buf_size-4) overflow("buffer");
3940   *(++j)=b; *(++j)=*(k++);
3941 }
3942
3943 @** Phase two processing.
3944 We have assembled enough pieces of the puzzle in order to be ready to specify
3945 the processing in \.{CWEAVE}'s main pass over the source file. Phase two
3946 is analogous to phase one, except that more work is involved because we must
3947 actually output the \TEX/ material instead of merely looking at the
3948 \.{CWEB} specifications.
3949
3950 @<Predecl...@>=
3951 void phase_two();
3952
3953 @ @c
3954 void
3955 phase_two() {
3956 reset_input(); if (show_progress) printf("\nWriting the output file...");
3957 @.Writing the output file...@>
3958 section_count=0; format_visible=1; copy_limbo();
3959 finish_line(); flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
3960 while (!input_has_ended) @<Translate the current section@>;
3961 }
3962
3963 @ The output file will contain the control sequence \.{\\Y} between non-null
3964 sections of a section, e.g., between the \TEX/ and definition parts if both
3965 are nonempty. This puts a little white space between the parts when they are
3966 printed. However, we don't want \.{\\Y} to occur between two definitions
3967 within a single section. The variables |out_line| or |out_ptr| will
3968 change if a section is non-null, so the following macros `|save_position|'
3969 and `|emit_space_if_needed|' are able to handle the situation:
3970
3971 @d save_position save_line=out_line; save_place=out_ptr
3972 @d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr)
3973   out_str("\\Y");
3974   space_checked=1
3975 @.\\Y@>
3976
3977 @<Global...@>=
3978 int save_line; /* former value of |out_line| */
3979 char *save_place; /* former value of |out_ptr| */
3980 int sec_depth; /* the integer, if any, following \.{@@*} */
3981 boolean space_checked; /* have we done |emit_space_if_needed|? */
3982 boolean format_visible; /* should the next format declaration be output? */
3983 boolean doing_format=0; /* are we outputting a format declaration? */
3984 boolean group_found=0; /* has a starred section occurred? */
3985
3986 @ @<Translate the current section@>= {
3987   section_count++;
3988   @<Output the code for the beginning of a new section@>;
3989   save_position;
3990   @<Translate the \TEX/ part of the current section@>;
3991   @<Translate the definition part of the current section@>;
3992   @<Translate the \CEE/ part of the current section@>;
3993   @<Show cross-references to this section@>;
3994   @<Output the code for the end of a section@>;
3995 }
3996
3997 @ Sections beginning with the \.{CWEB} control sequence `\.{@@\ }' start in the
3998 output with the \TEX/ control sequence `\.{\\M}', followed by the section
3999 number. Similarly, `\.{@@*}' sections lead to the control sequence `\.{\\N}'.
4000 In this case there's an additional parameter, representing one plus the
4001 specified depth, immediately after the \.{\\N}.
4002 If the section has changed, we put \.{\\*} just after the section number.
4003
4004 @<Output the code for the beginning...@>=
4005 if (*(loc-1)!='*') out_str("\\M");
4006 @.\\M@>
4007 else {
4008   while (*loc == ' ') loc++;
4009   if (*loc=='*') { /* ``top'' level */
4010     sec_depth = -1;
4011     loc++;
4012   }
4013   else {
4014     for (sec_depth=0; xisdigit(*loc);loc++)
4015       sec_depth = sec_depth*10 + (*loc) -'0';
4016   }
4017   while (*loc == ' ') loc++; /* remove spaces before group title */
4018   group_found=1;
4019   out_str("\\N");
4020 @.\\N@>
4021   {@+ char s[32];@+sprintf(s,"{%d}",sec_depth+1);@+out_str(s);@+}
4022   if (show_progress)
4023   printf("*%d",section_count); update_terminal; /* print a progress report */
4024 }
4025 out_str("{");out_section(section_count); out_str("}");
4026
4027 @ In the \TEX/ part of a section, we simply copy the source text, except that
4028 index entries are not copied and \CEE/ text within \pb\ is translated.
4029
4030 @<Translate the \T...@>= do {
4031   next_control=copy_TeX();
4032   switch (next_control) {
4033     case '|': init_stack; output_C(); break;
4034     case '@@': out('@@'); break;
4035     case TeX_string: case noop:
4036     case xref_roman: case xref_wildcard: case xref_typewriter:
4037     case section_name: loc-=2; next_control=get_next(); /* skip to \.{@@>} */
4038       if (next_control==TeX_string)
4039         err_print("! TeX string should be in C text only"); break;
4040 @.TeX string should be...@>
4041     case thin_space: case math_break: case ord:
4042     case line_break: case big_line_break: case no_line_break: case join:
4043     case pseudo_semi: case macro_arg_open: case macro_arg_close:
4044     case output_defs_code:
4045         err_print("! You can't do that in TeX text"); break;
4046 @.You can't do that...@>
4047   }
4048 } while (next_control<format_code);
4049
4050 @ When we get to the following code we have |next_control>=format_code|, and
4051 the token memory is in its initial empty state.
4052
4053 @<Translate the d...@>=
4054 space_checked=0;
4055 while (next_control<=definition) { /* |format_code| or |definition| */
4056   init_stack;
4057   if (next_control==definition) @<Start a macro definition@>@;
4058   else @<Start a format definition@>;
4059   outer_parse(); finish_C(format_visible); format_visible=1;
4060   doing_format=0;
4061 }
4062
4063 @ The |finish_C| procedure outputs the translation of the current
4064 scraps, preceded by the control sequence `\.{\\B}' and followed by the
4065 control sequence `\.{\\par}'. It also restores the token and scrap
4066 memories to their initial empty state.
4067
4068 A |force| token is appended to the current scraps before translation
4069 takes place, so that the translation will normally end with \.{\\6} or
4070 \.{\\7} (the \TEX/ macros for |force| and |big_force|). This \.{\\6} or
4071 \.{\\7} is replaced by the concluding \.{\\par} or by \.{\\Y\\par}.
4072
4073 @<Predecl...@>=
4074 void finish_C();
4075
4076 @ @c
4077 void
4078 finish_C(visible) /* finishes a definition or a \CEE/ part */
4079   boolean visible; /* nonzero if we should produce \TEX/ output */
4080 {
4081   text_pointer p; /* translation of the scraps */
4082   if (visible) {
4083     out_str("\\B"); app_tok(force); app_scrap(insert,no_math);
4084     p=translate();
4085 @.\\B@>
4086     app(tok_flag+(int)(p-tok_start)); make_output(); /* output the list */
4087     if (out_ptr>out_buf+1)
4088       if (*(out_ptr-1)=='\\')
4089 @.\\6@>
4090 @.\\7@>
4091 @.\\Y@>
4092         if (*out_ptr=='6') out_ptr-=2;
4093         else if (*out_ptr=='7') *out_ptr='Y';
4094     out_str("\\par"); finish_line();
4095   }
4096   if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
4097   if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
4098   if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
4099   tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info;
4100     /* forget the tokens and the scraps */
4101 }
4102
4103 @ Keeping in line with the conventions of the \CEE/ preprocessor (and
4104 otherwise contrary to the rules of \.{CWEB}) we distinguish here
4105 between the case that `\.(' immediately follows an identifier and the
4106 case that the two are separated by a space.  In the latter case, and
4107 if the identifier is not followed by `\.(' at all, the replacement
4108 text starts immediately after the identifier.  In the former case,
4109 it starts after we scan the matching `\.)'.
4110
4111 @<Start a macro...@>= {
4112   if (save_line!=out_line || save_place!=out_ptr || space_checked) app(backup);
4113   if(!space_checked){emit_space_if_needed;save_position;}
4114   app_str("\\D"); /* this will produce `\&{define }' */
4115 @.\\D@>
4116   if ((next_control=get_next())!=identifier)
4117     err_print("! Improper macro definition");
4118 @.Improper macro definition@>
4119   else {
4120     app('$'); app_cur_id(0);
4121     if (*loc=='(')
4122   reswitch: switch (next_control=get_next()) {
4123       case '(': case ',': app(next_control); goto reswitch;
4124       case identifier: app_cur_id(0); goto reswitch;
4125       case ')': app(next_control); next_control=get_next(); break;
4126       default: err_print("! Improper macro definition"); break;
4127     }
4128     else next_control=get_next();
4129     app_str("$ "); app(break_space);
4130     app_scrap(dead,no_math); /* scrap won't take part in the parsing */
4131   }
4132 }
4133
4134 @ @<Start a format...@>= {
4135   doing_format=1;
4136   if(*(loc-1)=='s' || *(loc-1)=='S') format_visible=0;
4137   if(!space_checked){emit_space_if_needed;save_position;}
4138   app_str("\\F"); /* this will produce `\&{format }' */
4139 @.\\F@>
4140   next_control=get_next();
4141   if (next_control==identifier) {
4142     app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4143     app(' ');
4144     app(break_space); /* this is syntactically separate from what follows */
4145     next_control=get_next();
4146     if (next_control==identifier) {
4147       app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4148       app_scrap(exp,maybe_math); app_scrap(semi,maybe_math);
4149       next_control=get_next();
4150     }
4151   }
4152   if (scrap_ptr!=scrap_info+2) err_print("! Improper format definition");
4153 @.Improper format definition@>
4154 }
4155
4156 @ Finally, when the \TEX/ and definition parts have been treated, we have
4157 |next_control>=begin_C|. We will make the global variable |this_section|
4158 point to the current section name, if it has a name.
4159
4160 @<Global...@>=
4161 name_pointer this_section; /* the current section name, or zero */
4162
4163 @ @<Translate the \CEE/...@>=
4164 this_section=name_dir;
4165 if (next_control<=section_name) {
4166   emit_space_if_needed; init_stack;
4167   if (next_control==begin_C) next_control=get_next();
4168   else {
4169     this_section=cur_section;
4170     @<Check that '=' or '==' follows this section name, and
4171       emit the scraps to start the section definition@>;
4172   }
4173   while  (next_control<=section_name) {
4174     outer_parse();
4175     @<Emit the scrap for a section name if present@>;
4176   }
4177   finish_C(1);
4178 }
4179
4180 @ The title of the section and an $\E$ or $\mathrel+\E$ are made
4181 into a scrap that should not take part in the parsing.
4182
4183 @<Check that '='...@>=
4184 do next_control=get_next();
4185   while (next_control=='+'); /* allow optional `\.{+=}' */
4186 if (next_control!='=' && next_control!=eq_eq)
4187   err_print("! You need an = sign after the section name");
4188 @.You need an = sign...@>
4189   else next_control=get_next();
4190 if (out_ptr>out_buf+1 && *out_ptr=='Y' && *(out_ptr-1)=='\\') app(backup);
4191     /* the section name will be flush left */
4192 @.\\Y@>
4193 app(section_flag+(int)(this_section-name_dir));
4194 cur_xref=(xref_pointer)this_section->xref;
4195 if(cur_xref->num==file_flag) cur_xref=cur_xref->xlink;
4196 app_str("${}");
4197 if (cur_xref->num!=section_count+def_flag) {
4198   app_str("\\mathrel+"); /*section name is multiply defined*/
4199   this_section=name_dir; /*so we won't give cross-reference info here*/
4200 }
4201 app_str("\\E"); /* output an equivalence sign */
4202 @.\\E@>
4203 app_str("{}$");
4204 app(force); app_scrap(dead,no_math);
4205         /* this forces a line break unless `\.{@@+}' follows */
4206
4207 @ @<Emit the scrap...@>=
4208 if (next_control<section_name) {
4209   err_print("! You can't do that in C text");
4210 @.You can't do that...@>
4211   next_control=get_next();
4212 }
4213 else if (next_control==section_name) {
4214   app(section_flag+(int)(cur_section-name_dir));
4215   app_scrap(section_scrap,maybe_math);
4216   next_control=get_next();
4217 }
4218
4219 @ Cross references relating to a named section are given
4220 after the section ends.
4221
4222 @<Show cross...@>=
4223 if (this_section>name_dir) {
4224   cur_xref=(xref_pointer)this_section->xref;
4225   if (cur_xref->num==file_flag){an_output=1;cur_xref=cur_xref->xlink;}
4226   else an_output=0;
4227   if (cur_xref->num>def_flag)
4228     cur_xref=cur_xref->xlink; /* bypass current section number */
4229   footnote(def_flag); footnote(cite_flag); footnote(0);
4230 }
4231
4232 @ The |footnote| procedure gives cross-reference information about
4233 multiply defined section names (if the |flag| parameter is
4234 |def_flag|), or about references to a section name
4235 (if |flag==cite_flag|), or to its uses (if |flag==0|). It assumes that
4236 |cur_xref| points to the first cross-reference entry of interest, and it
4237 leaves |cur_xref| pointing to the first element not printed.  Typical outputs:
4238 `\.{\\A101.}'; `\.{\\Us 370\\ET1009.}';
4239 `\.{\\As 8, 27\\*\\ETs64.}'.
4240
4241 Note that the output of \.{CWEAVE} is not English-specific; users may
4242 supply new definitions for the macros \.{\\A}, \.{\\As}, etc.
4243
4244 @<Predecl...@>=
4245 void footnote();
4246
4247 @ @c
4248 void
4249 footnote(flag) /* outputs section cross-references */
4250 sixteen_bits flag;
4251 {
4252   xref_pointer q; /* cross-reference pointer variable */
4253   if (cur_xref->num<=flag) return;
4254   finish_line(); out('\\');
4255 @.\\A@>
4256 @.\\Q@>
4257 @.\\U@>
4258   out(flag==0? 'U': flag==cite_flag? 'Q': 'A');
4259   @<Output all the section numbers on the reference list |cur_xref|@>;
4260   out('.');
4261 }
4262
4263 @ The following code distinguishes three cases, according as the number
4264 of cross-references is one, two, or more than two. Variable |q| points
4265 to the first cross-reference, and the last link is a zero.
4266
4267 @<Output all the section numbers...@>=
4268 q=cur_xref; if (q->xlink->num>flag) out('s'); /* plural */
4269 while (1) {
4270   out_section(cur_xref->num-flag);
4271   cur_xref=cur_xref->xlink; /* point to the next cross-reference to output */
4272   if (cur_xref->num<=flag) break;
4273   if (cur_xref->xlink->num>flag) out_str(", "); /* not the last */
4274   else {out_str("\\ET"); /* the last */
4275 @.\\ET@>
4276   if (cur_xref != q->xlink) out('s'); /* the last of more than two */
4277   }
4278 }
4279
4280 @ @<Output the code for the end of a section@>=
4281 out_str("\\fi"); finish_line();
4282 @.\\fi@>
4283 flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
4284
4285 @** Phase three processing.
4286 We are nearly finished! \.{CWEAVE}'s only remaining task is to write out the
4287 index, after sorting the identifiers and index entries.
4288
4289 If the user has set the |no_xref| flag (the \.{-x} option on the command line),
4290 just finish off the page, omitting the index, section name list, and table of
4291 contents.
4292
4293 @<Predecl...@>=
4294 void phase_three();
4295
4296 @ @c
4297 void
4298 phase_three() {
4299 if (no_xref) {
4300   finish_line();
4301   out_str("\\end");
4302 @.\\end@>
4303   finish_line();
4304 }
4305 else {
4306   phase=3; if (show_progress) printf("\nWriting the index...");
4307 @.Writing the index...@>
4308   finish_line();
4309   if ((idx_file=fopen(idx_file_name,"w"))==NULL)
4310     fatal("! Cannot open index file ",idx_file_name);
4311 @.Cannot open index file@>
4312   if (change_exists) {
4313     @<Tell about changed sections@>; finish_line(); finish_line();
4314   }
4315   out_str("\\inx"); finish_line();
4316 @.\\inx@>
4317   active_file=idx_file; /* change active file to the index file */
4318   @<Do the first pass of sorting@>;
4319   @<Sort and output the index@>;
4320   finish_line(); fclose(active_file); /* finished with |idx_file| */
4321   active_file=tex_file; /* switch back to |tex_file| for a tic */
4322   out_str("\\fin"); finish_line();
4323 @.\\fin@>
4324   if ((scn_file=fopen(scn_file_name,"w"))==NULL)
4325     fatal("! Cannot open section file ",scn_file_name);
4326 @.Cannot open section file@>
4327   active_file=scn_file; /* change active file to section listing file */
4328   @<Output all the section names@>;
4329   finish_line(); fclose(active_file); /* finished with |scn_file| */
4330   active_file=tex_file;
4331   if (group_found) out_str("\\con");@+else out_str("\\end");
4332 @.\\con@>
4333 @.\\end@>
4334   finish_line();
4335   fclose(active_file);
4336 }
4337 if (show_happiness) printf("\nDone.");
4338 check_complete(); /* was all of the change file used? */
4339 }
4340
4341 @ Just before the index comes a list of all the changed sections, including
4342 the index section itself.
4343
4344 @<Global...@>=
4345 sixteen_bits k_section; /* runs through the sections */
4346
4347 @ @<Tell about changed sections@>= {
4348   /* remember that the index is already marked as changed */
4349   k_section=0;
4350   while (!changed_section[++k_section]);
4351   out_str("\\ch ");
4352 @.\\ch@>
4353   out_section(k_section);
4354   while (k_section<section_count) {
4355     while (!changed_section[++k_section]);
4356     out_str(", "); out_section(k_section);
4357   }
4358   out('.');
4359 }
4360
4361 @ A left-to-right radix sorting method is used, since this makes it easy to
4362 adjust the collating sequence and since the running time will be at worst
4363 proportional to the total length of all entries in the index. We put the
4364 identifiers into 102 different lists based on their first characters.
4365 (Uppercase letters are put into the same list as the corresponding lowercase
4366 letters, since we want to have `$t<\\{TeX}<\&{to}$'.) The
4367 list for character |c| begins at location |bucket[c]| and continues through
4368 the |blink| array.
4369
4370 @<Global...@>=
4371 name_pointer bucket[256];
4372 name_pointer next_name; /* successor of |cur_name| when sorting */
4373 name_pointer blink[max_names]; /* links in the buckets */
4374
4375 @ To begin the sorting, we go through all the hash lists and put each entry
4376 having a nonempty cross-reference list into the proper bucket.
4377
4378 @<Do the first pass...@>= {
4379 int c;
4380 for (c=0; c<=255; c++) bucket[c]=NULL;
4381 for (h=hash; h<=hash_end; h++) {
4382   next_name=*h;
4383   while (next_name) {
4384     cur_name=next_name; next_name=cur_name->link;
4385     if (cur_name->xref!=(char*)xmem) {
4386       c=(eight_bits)((cur_name->byte_start)[0]);
4387       if (xisupper(c)) c=tolower(c);
4388       blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4389     }
4390   }
4391 }
4392 }
4393
4394 @ During the sorting phase we shall use the |cat| and |trans| arrays from
4395 \.{CWEAVE}'s parsing algorithm and rename them |depth| and |head|. They now
4396 represent a stack of identifier lists for all the index entries that have
4397 not yet been output. The variable |sort_ptr| tells how many such lists are
4398 present; the lists are output in reverse order (first |sort_ptr|, then
4399 |sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first
4400 |k| characters of all entries on this list are known to be equal we have
4401 |depth[j]==k|.
4402
4403 @ @<Rest of |trans_plus| union@>=
4404 name_pointer Head;
4405
4406 @ @d depth cat /* reclaims memory that is no longer needed for parsing */
4407 @d head trans_plus.Head /* ditto */
4408 @f sort_pointer int
4409 @d sort_pointer scrap_pointer /* ditto */
4410 @d sort_ptr scrap_ptr /* ditto */
4411 @d max_sorts max_scraps /* ditto */
4412
4413 @<Global...@>=
4414 eight_bits cur_depth; /* depth of current buckets */
4415 char *cur_byte; /* index into |byte_mem| */
4416 sixteen_bits cur_val; /* current cross-reference number */
4417 sort_pointer max_sort_ptr; /* largest value of |sort_ptr| */
4418
4419 @ @<Set init...@>=
4420 max_sort_ptr=scrap_info;
4421
4422 @ The desired alphabetic order is specified by the |collate| array; namely,
4423 $|collate|[0]<|collate|[1]<\cdots<|collate|[100]$.
4424
4425 @<Global...@>=
4426 eight_bits collate[102+128]; /* collation order */
4427 @^high-bit character handling@>
4428
4429 @ We use the order $\hbox{null}<\.\ <\hbox{other characters}<{}$\.\_${}<
4430 \.A=\.a<\cdots<\.Z=\.z<\.0<\cdots<\.9.$ Warning: The collation mapping
4431 needs to be changed if ASCII code is not being used.
4432 @^ASCII code dependencies@>
4433 @^high-bit character handling@>
4434
4435 We initialize |collate| by copying a few characters at a time, because
4436 some \CEE/ compilers choke on long strings.
4437
4438 @<Set init...@>=
4439 collate[0]=0;
4440 strcpy(collate+1," \1\2\3\4\5\6\7\10\11\12\13\14\15\16\17");
4441 /* 16 characters + 1 = 17 */
4442 strcpy(collate+17,"\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37");
4443 /* 16 characters + 17 = 33 */
4444 strcpy(collate+33,"!\42#$%&'()*+,-./:;<=>?@@[\\]^`{|}~_");
4445 /* 32 characters + 33 = 65 */
4446 strcpy(collate+65,"abcdefghijklmnopqrstuvwxyz0123456789");
4447 /* (26 + 10) characters + 65 = 101 */
4448 strcpy(collate+101,"\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217");
4449 /* 16 characters + 101 = 117 */
4450 strcpy(collate+117,"\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237");
4451 /* 16 characters + 117 = 133 */
4452 strcpy(collate+133,"\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257");
4453 /* 16 characters + 133 = 149 */
4454 strcpy(collate+149,"\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277");
4455 /* 16 characters + 149 = 165 */
4456 strcpy(collate+165,"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317");
4457 /* 16 characters + 165 = 181 */
4458 strcpy(collate+181,"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337");
4459 /* 16 characters + 181 = 197 */
4460 strcpy(collate+197,"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357");
4461 /* 16 characters + 197 = 213 */
4462 strcpy(collate+213,"\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377");
4463 /* 16 characters + 213 = 229 */
4464
4465 @ Procedure |unbucket| goes through the buckets and adds nonempty lists
4466 to the stack, using the collating sequence specified in the |collate| array.
4467 The parameter to |unbucket| tells the current depth in the buckets.
4468 Any two sequences that agree in their first 255 character positions are
4469 regarded as identical.
4470
4471 @d infinity 255 /* $\infty$ (approximately) */
4472
4473 @<Predecl...@>=
4474 void  unbucket();
4475
4476 @ @c
4477 void
4478 unbucket(d) /* empties buckets having depth |d| */
4479 eight_bits d;
4480 {
4481   int c;  /* index into |bucket|; cannot be a simple |char| because of sign
4482     comparison below*/
4483   for (c=100+128; c>= 0; c--) if (bucket[collate[c]]) {
4484 @^high-bit character handling@>
4485     if (sort_ptr>=scrap_info_end) overflow("sorting");
4486     sort_ptr++;
4487     if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr;
4488     if (c==0) sort_ptr->depth=infinity;
4489     else sort_ptr->depth=d;
4490     sort_ptr->head=bucket[collate[c]]; bucket[collate[c]]=NULL;
4491   }
4492 }
4493
4494 @ @<Sort and output...@>=
4495 sort_ptr=scrap_info; unbucket(1);
4496 while (sort_ptr>scrap_info) {
4497   cur_depth=sort_ptr->depth;
4498   if (blink[sort_ptr->head-name_dir]==0 || cur_depth==infinity)
4499     @<Output index entries for the list at |sort_ptr|@>@;
4500   else @<Split the list at |sort_ptr| into further lists@>;
4501 }
4502
4503 @ @<Split the list...@>= {
4504   eight_bits c;
4505   next_name=sort_ptr->head;
4506   do {
4507     cur_name=next_name; next_name=blink[cur_name-name_dir];
4508     cur_byte=cur_name->byte_start+cur_depth;
4509     if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */
4510     else {
4511       c=(eight_bits) *cur_byte;
4512       if (xisupper(c)) c=tolower(c);
4513     }
4514   blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4515   } while (next_name);
4516   --sort_ptr; unbucket(cur_depth+1);
4517 }
4518
4519 @ @<Output index...@>= {
4520   cur_name=sort_ptr->head;
4521   do {
4522     out_str("\\I");
4523 @.\\I@>
4524     @<Output the name at |cur_name|@>;
4525     @<Output the cross-references at |cur_name|@>;
4526     cur_name=blink[cur_name-name_dir];
4527   } while (cur_name);
4528   --sort_ptr;
4529 }
4530
4531 @ @<Output the name...@>=
4532 switch (cur_name->ilk) {
4533   case normal: case func_template: if (is_tiny(cur_name)) out_str("\\|");
4534     else {char *j;
4535       for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4536         if (xislower(*j)) goto lowcase;
4537       out_str("\\."); break;
4538 lowcase: out_str("\\\\");
4539     }
4540   break;
4541 @.\\|@>
4542 @.\\.@>
4543 @.\\\\@>
4544   case wildcard: out_str("\\9");@+ goto not_an_identifier;
4545 @.\\9@>
4546   case typewriter: out_str("\\.");
4547 @.\\.@>
4548   case roman: not_an_identifier: out_name(cur_name,0); goto name_done;
4549   case custom: {char *j; out_str("$\\");
4550     for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4551       out(*j=='_'? 'x': *j=='$'? 'X': *j);
4552     out('$');
4553     goto name_done;
4554     }
4555   default: out_str("\\&");
4556 @.\\\&@>
4557 }
4558 out_name(cur_name,1);
4559 name_done:@;
4560
4561 @ Section numbers that are to be underlined are enclosed in
4562 `\.{\\[}$\,\ldots\,$\.]'.
4563
4564 @<Output the cross-references...@>=
4565 @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>;
4566 do {
4567   out_str(", "); cur_val=cur_xref->num;
4568   if (cur_val<def_flag) out_section(cur_val);
4569   else {out_str("\\["); out_section(cur_val-def_flag); out(']');}
4570 @.\\[@>
4571   cur_xref=cur_xref->xlink;
4572 } while (cur_xref!=xmem);
4573 out('.'); finish_line();
4574
4575 @ List inversion is best thought of as popping elements off one stack and
4576 pushing them onto another. In this case |cur_xref| will be the head of
4577 the stack that we push things onto.
4578 @<Global...@>=
4579 xref_pointer next_xref, this_xref;
4580   /* pointer variables for rearranging a list */
4581
4582 @ @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>=
4583 this_xref=(xref_pointer)cur_name->xref; cur_xref=xmem;
4584 do {
4585   next_xref=this_xref->xlink; this_xref->xlink=cur_xref;
4586   cur_xref=this_xref; this_xref=next_xref;
4587 } while (this_xref!=xmem);
4588
4589 @ The following recursive procedure walks through the tree of section names and
4590 prints them.
4591 @^recursion@>
4592
4593 @<Predecl...@>=
4594 void section_print();
4595
4596 @ @c
4597 void
4598 section_print(p) /* print all section names in subtree |p| */
4599 name_pointer p;
4600 {
4601   if (p) {
4602     section_print(p->llink); out_str("\\I");
4603 @.\\I@>
4604     tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; init_stack;
4605     app(p-name_dir+section_flag); make_output();
4606     footnote(cite_flag);
4607     footnote(0); /* |cur_xref| was set by |make_output| */
4608     finish_line();@/
4609     section_print(p->rlink);
4610   }
4611 }
4612
4613 @ @<Output all the section names@>=section_print(root)
4614
4615 @ Because on some systems the difference between two pointers is a |long|
4616 rather than an |int|, we use \.{\%ld} to print these quantities.
4617
4618 @c
4619 void
4620 print_stats() {
4621   printf("\nMemory usage statistics:\n");
4622 @.Memory usage statistics:@>
4623   printf("%ld names (out of %ld)\n",
4624             (long)(name_ptr-name_dir),(long)max_names);
4625   printf("%ld cross-references (out of %ld)\n",
4626             (long)(xref_ptr-xmem),(long)max_refs);
4627   printf("%ld bytes (out of %ld)\n",
4628             (long)(byte_ptr-byte_mem),(long)max_bytes);
4629   printf("Parsing:\n");
4630   printf("%ld scraps (out of %ld)\n",
4631             (long)(max_scr_ptr-scrap_info),(long)max_scraps);
4632   printf("%ld texts (out of %ld)\n",
4633             (long)(max_text_ptr-tok_start),(long)max_texts);
4634   printf("%ld tokens (out of %ld)\n",
4635             (long)(max_tok_ptr-tok_mem),(long)max_toks);
4636   printf("%ld levels (out of %ld)\n",
4637             (long)(max_stack_ptr-stack),(long)stack_size);
4638   printf("Sorting:\n");
4639   printf("%ld levels (out of %ld)\n",
4640             (long)(max_sort_ptr-scrap_info),(long)max_scraps);
4641 }
4642
4643 @** Index.
4644 If you have read and understood the code for Phase III above, you know what
4645 is in this index and how it got here. All sections in which an identifier is
4646 used are listed with that identifier, except that reserved words are
4647 indexed only when they appear in format definitions, and the appearances
4648 of identifiers in section names are not indexed. Underlined entries
4649 correspond to where the identifier was declared. Error messages, control
4650 sequences put into the output, and a few
4651 other things like ``recursion'' are indexed here too.