source/texk/web2c/cwebdir/ctangle.w

   1 % This file is part of CWEB.
   2 % This program by Silvio Levy and Donald E. Knuth
   3 % is based on a program by Knuth.
   4 % It is distributed WITHOUT ANY WARRANTY, express or implied.
   5 % Version 3.64 --- February 2002
   6 % (same as Version 3.5 except for minor corrections)
   7 % (also quotes backslashes in file names of #line directives)
   8
   9 % Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
  10
  11 % Permission is granted to make and distribute verbatim copies of this
  12 % document provided that the copyright notice and this permission notice
  13 % are preserved on all copies.
  14
  15 % Permission is granted to copy and distribute modified versions of this
  16 % document under the conditions for verbatim copying, provided that the
  17 % entire resulting derived work is given a different name and distributed
  18 % under the terms of a permission notice identical to this one.
  19
  20 % Here is TeX material that gets inserted after \input cwebmac
  21 \def\hang{\hangindent 3em\indent\ignorespaces}
  22 \def\pb{$\.|\ldots\.|$} % C brackets (|...|)
  23 \def\v{\char'174} % vertical (|) in typewriter font
  24 \mathchardef\RA="3221 % right arrow
  25 \mathchardef\BA="3224 % double arrow
  26
  27 \def\title{CTANGLE (Version 3.64)}
  28 \def\topofcontents{\null\vfill
  29   \centerline{\titlefont The {\ttitlefont CTANGLE} processor}
  30   \vskip 15pt
  31   \centerline{(Version 3.64)}
  32   \vfill}
  33 \def\botofcontents{\vfill
  34 \noindent
  35 Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
  36 \bigskip\noindent
  37 Permission is granted to make and distribute verbatim copies of this
  38 document provided that the copyright notice and this permission notice
  39 are preserved on all copies.
  40
  41 \smallskip\noindent
  42 Permission is granted to copy and distribute modified versions of this
  43 document under the conditions for verbatim copying, provided that the
  44 entire resulting derived work is given a different name and distributed
  45 under the terms of a permission notice identical to this one.
  46 }
  47 \pageno=\contentspagenumber \advance\pageno by 1
  48 \let\maybe=\iftrue
  49 @s not_eq normal @q unreserve a C++ keyword @>
  50
  51 @** Introduction.
  52 This is the \.{CTANGLE} program by Silvio Levy and Donald E. Knuth,
  53 based on \.{TANGLE} by Knuth.
  54 We are thankful to
  55 Nelson Beebe, Hans-Hermann Bode (to whom the \CPLUSPLUS/ adaptation is due),
  56 Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
  57 Joachim Schrod, Lee Wittenberg, and others who have contributed improvements.
  58
  59 The ``banner line'' defined here should be changed whenever \.{CTANGLE}
  60 is modified.
  61
  62 @d banner "This is CTANGLE (Version 3.64)\n"
  63
  64 @c
  65 @<Include files@>@/
  66 @h
  67 @<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
  68 @<Typedef declarations@>@/
  69 @<Global variables@>@/
  70 @<Predeclaration of procedures@>@/
  71
  72 @ We predeclare several standard system functions here instead of including
  73 their system header files, because the names of the header files are not as
  74 standard as the names of the functions. (For example, some \CEE/ environments
  75 have \.{<string.h>} where others have \.{<strings.h>}.)
  76
  77 @<Predecl...@>=
  78 extern int strlen(); /* length of string */
  79 extern int strcmp(); /* compare strings lexicographically */
  80 extern char* strcpy(); /* copy one string to another */
  81 extern int strncmp(); /* compare up to $n$ string characters */
  82 extern char* strncpy(); /* copy up to $n$ string characters */
  83
  84 @ \.{CTANGLE} has a fairly straightforward outline.  It operates in
  85 two phases: First it reads the source file, saving the \CEE/ code in
  86 compressed form; then it shuffles and outputs the code.
  87
  88 Please read the documentation for \.{common}, the set of routines common
  89 to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
  90
  91 @c
  92 int main (ac, av)
  93 int ac;
  94 char **av;
  95 {
  96   argc=ac; argv=av;
  97   program=ctangle;
  98   @<Set initial values@>;
  99   common_init();
 100   if (show_banner) printf(banner); /* print a ``banner line'' */
 101   phase_one(); /* read all the user's text and compress it into |tok_mem| */
 102   phase_two(); /* output the contents of the compressed tables */
 103   return wrap_up(); /* and exit gracefully */
 104 }
 105
 106 @ The following parameters were sufficient in the original \.{TANGLE} to
 107 handle \TEX/,
 108 so they should be sufficient for most applications of \.{CTANGLE}.
 109 If you change |max_bytes|, |max_names|, or |hash_size| you should also
 110 change them in the file |"common.w"|.
 111
 112 @d max_bytes 90000 /* the number of bytes in identifiers,
 113   index entries, and section names; used in |"common.w"| */
 114 @d max_toks 270000 /* number of bytes in compressed \CEE/ code */
 115 @d max_names 4000 /* number of identifiers, strings, section names;
 116   must be less than 10240; used in |"common.w"| */
 117 @d max_texts 2500 /* number of replacement texts, must be less than 10240 */
 118 @d hash_size 353 /* should be prime; used in |"common.w"| */
 119 @d longest_name 10000 /* section names shouldn't be longer than this */
 120 @d stack_size 50 /* number of simultaneous levels of macro expansion */
 121 @d buf_size 100 /* for \.{CWEAVE} and \.{CTANGLE} */
 122
 123 @ The next few sections contain stuff from the file |"common.w"| that must
 124 be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
 125 file |"common.h"|, which needs to be updated when |"common.w"| changes.
 126
 127 @i common.h
 128
 129 @* Data structures exclusive to {\tt CTANGLE}.
 130 We've already seen that the |byte_mem| array holds the names of identifiers,
 131 strings, and sections;
 132 the |tok_mem| array holds the replacement texts
 133 for sections. Allocation is sequential, since things are deleted only
 134 during Phase II, and only in a last-in-first-out manner.
 135
 136 A \&{text} variable is a structure containing a pointer into
 137 |tok_mem|, which tells where the corresponding text starts, and an
 138 integer |text_link|, which, as we shall see later, is used to connect
 139 pieces of text that have the same name.  All the \&{text}s are stored in
 140 the array |text_info|, and we use a |text_pointer| variable to refer
 141 to them.
 142
 143 The first position of |tok_mem| that is unoccupied by
 144 replacement text is called |tok_ptr|, and the first unused location of
 145 |text_info| is called |text_ptr|.  Thus we usually have the identity
 146 |text_ptr->tok_start==tok_ptr|.
 147
 148 If your machine does not support |unsigned char| you should change
 149 the definition of \&{eight\_bits} to |unsigned short|.
 150 @^system dependencies@>
 151
 152 @<Typed...@>=
 153 typedef struct {
 154   eight_bits *tok_start; /* pointer into |tok_mem| */
 155   sixteen_bits text_link; /* relates replacement texts */
 156 } text;
 157 typedef text *text_pointer;
 158
 159 @ @<Glob...@>=
 160 text text_info[max_texts];
 161 text_pointer text_info_end=text_info+max_texts-1;
 162 text_pointer text_ptr; /* first unused position in |text_info| */
 163 eight_bits tok_mem[max_toks];
 164 eight_bits *tok_mem_end=tok_mem+max_toks-1;
 165 eight_bits *tok_ptr; /* first unused position in |tok_mem| */
 166
 167 @ @<Set init...@>=
 168 text_info->tok_start=tok_ptr=tok_mem;
 169 text_ptr=text_info+1; text_ptr->tok_start=tok_mem;
 170   /* this makes replacement text 0 of length zero */
 171
 172 @ If |p| is a pointer to a section name, |p->equiv| is a pointer to its
 173 replacement text, an element of the array |text_info|.
 174
 175 @d equiv equiv_or_xref /* info corresponding to names */
 176
 177 @ @<Set init...@>=
 178 name_dir->equiv=(char *)text_info; /* the undefined section has no replacement text */
 179
 180 @ Here's the procedure that decides whether a name of length |l|
 181 starting at position |first| equals the identifier pointed to by |p|:
 182
 183 @c
 184 int names_match(p,first,l)
 185 name_pointer p; /* points to the proposed match */
 186 char *first; /* position of first character of string */
 187 int l; /* length of identifier */
 188 {
 189   if (length(p)!=l) return 0;
 190   return !strncmp(first,p->byte_start,l);
 191 }
 192
 193 @ The common lookup routine refers to separate routines |init_node| and
 194 |init_p| when the data structure grows. Actually |init_p| is called only by
 195 \.{CWEAVE}, but we need to declare a dummy version so that
 196 the loader won't complain of its absence.
 197
 198 @c
 199 void
 200 init_node(node)
 201 name_pointer node;
 202 {
 203     node->equiv=(char *)text_info;
 204 }
 205 void
 206 init_p() {}
 207
 208 @* Tokens.
 209 Replacement texts, which represent \CEE/ code in a compressed format,
 210 appear in |tok_mem| as mentioned above. The codes in
 211 these texts are called `tokens'; some tokens occupy two consecutive
 212 eight-bit byte positions, and the others take just one byte.
 213
 214 If $p$ points to a replacement text, |p->tok_start| is the |tok_mem| position
 215 of the first eight-bit code of that text. If |p->text_link==0|,
 216 this is the replacement text for a macro, otherwise it is the replacement
 217 text for a section. In the latter case |p->text_link| is either equal to
 218 |section_flag|, which means that there is no further text for this section, or
 219 |p->text_link| points to a continuation of this replacement text; such
 220 links are created when several sections have \CEE/ texts with the same
 221 name, and they also tie together all the \CEE/ texts of unnamed sections.
 222 The replacement text pointer for the first unnamed section appears in
 223 |text_info->text_link|, and the most recent such pointer is |last_unnamed|.
 224
 225 @d section_flag max_texts /* final |text_link| in section replacement texts */
 226
 227 @<Glob...@>=
 228 text_pointer last_unnamed; /* most recent replacement text of unnamed section */
 229
 230 @ @<Set init...@>= last_unnamed=text_info; text_info->text_link=0;
 231
 232 @ If the first byte of a token is less than |0200|, the token occupies a
 233 single byte. Otherwise we make a sixteen-bit token by combining two consecutive
 234 bytes |a| and |b|. If |0200<=a<0250|, then |(a-0200)@t${}\times2^8$@>+b|
 235 points to an identifier; if |0250<=a<0320|, then
 236 |(a-0250)@t${}\times2^8$@>+b| points to a section name
 237 (or, if it has the special value |output_defs_flag|,
 238 to the area where the preprocessor definitions are stored); and if
 239 |0320<=a<0400|, then |(a-0320)@t${}\times2^8$@>+b| is the number of the section
 240 in which the current replacement text appears.
 241
 242 Codes less than |0200| are 7-bit |char| codes that represent themselves.
 243 Some of the 7-bit codes will not be present, however, so we can
 244 use them for special purposes. The following symbolic names are used:
 245
 246 \yskip \hang |join| denotes the concatenation of adjacent items with no
 247 space or line breaks allowed between them (the \.{@@\&} operation of \.{CWEB}).
 248
 249 \hang |string| denotes the beginning or end of a string, verbatim
 250 construction or numerical constant.
 251 @^ASCII code dependencies@>
 252
 253 @d string 02 /* takes the place of extended ASCII \.{\char2} */
 254 @d join 0177 /* takes the place of ASCII delete */
 255 @d output_defs_flag (2*024000-1)
 256
 257 @ The following procedure is used to enter a two-byte value into
 258 |tok_mem| when a replacement text is being generated.
 259
 260 @c
 261 void
 262 store_two_bytes(x)
 263 sixteen_bits x;
 264 {
 265   if (tok_ptr+2>tok_mem_end) overflow("token");
 266   *tok_ptr++=x>>8; /* store high byte */
 267   *tok_ptr++=x&0377; /* store low byte */
 268 }
 269
 270 @** Stacks for output.  The output process uses a stack to keep track
 271 of what is going on at different ``levels'' as the sections are being
 272 written out.  Entries on this stack have five parts:
 273
 274 \yskip\hang |end_field| is the |tok_mem| location where the replacement
 275 text of a particular level will end;
 276
 277 \hang |byte_field| is the |tok_mem| location from which the next token
 278 on a particular level will be read;
 279
 280 \hang |name_field| points to the name corresponding to a particular level;
 281
 282 \hang |repl_field| points to the replacement text currently being read
 283 at a particular level;
 284
 285 \hang |section_field| is the section number, or zero if this is a macro.
 286
 287 \yskip\noindent The current values of these five quantities are referred to
 288 quite frequently, so they are stored in a separate place instead of in
 289 the |stack| array. We call the current values |cur_end|, |cur_byte|,
 290 |cur_name|, |cur_repl|, and |cur_section|.
 291
 292 The global variable |stack_ptr| tells how many levels of output are
 293 currently in progress. The end of all output occurs when the stack is
 294 empty, i.e., when |stack_ptr==stack|.
 295
 296 @<Typed...@>=
 297 typedef struct {
 298   eight_bits *end_field; /* ending location of replacement text */
 299   eight_bits *byte_field; /* present location within replacement text */
 300   name_pointer name_field; /* |byte_start| index for text being output */
 301   text_pointer repl_field; /* |tok_start| index for text being output */
 302   sixteen_bits section_field; /* section number or zero if not a section */
 303 } output_state;
 304 typedef output_state *stack_pointer;
 305
 306 @ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
 307 @d cur_byte cur_state.byte_field /* location of next output byte in |tok_mem|*/
 308 @d cur_name cur_state.name_field /* pointer to current name being expanded */
 309 @d cur_repl cur_state.repl_field /* pointer to current replacement text */
 310 @d cur_section cur_state.section_field /* current section number being expanded */
 311
 312 @<Global...@>=
 313 output_state cur_state; /* |cur_end|, |cur_byte|, |cur_name|, |cur_repl|,
 314   and |cur_section| */
 315 output_state stack[stack_size+1]; /* info for non-current levels */
 316 stack_pointer stack_ptr; /* first unused location in the output state stack */
 317 stack_pointer stack_end=stack+stack_size; /* end of |stack| */
 318
 319 @ To get the output process started, we will perform the following
 320 initialization steps. We may assume that |text_info->text_link| is nonzero,
 321 since it points to the \CEE/ text in the first unnamed section that generates
 322 code; if there are no such sections, there is nothing to output, and an
 323 error message will have been generated before we do any of the initialization.
 324
 325 @<Initialize the output stacks@>=
 326 stack_ptr=stack+1; cur_name=name_dir; cur_repl=text_info->text_link+text_info;
 327 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; cur_section=0;
 328
 329 @ When the replacement text for name |p| is to be inserted into the output,
 330 the following subroutine is called to save the old level of output and get
 331 the new one going.
 332
 333 We assume that the \CEE/ compiler can copy structures.
 334 @^system dependencies@>
 335
 336 @c
 337 void
 338 push_level(p) /* suspends the current level */
 339 name_pointer p;
 340 {
 341   if (stack_ptr==stack_end) overflow("stack");
 342   *stack_ptr=cur_state;
 343   stack_ptr++;
 344   if (p!=NULL) { /* |p==NULL| means we are in |output_defs| */
 345     cur_name=p; cur_repl=(text_pointer)p->equiv;
 346     cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
 347     cur_section=0;
 348   }
 349 }
 350
 351 @ When we come to the end of a replacement text, the |pop_level| subroutine
 352 does the right thing: It either moves to the continuation of this replacement
 353 text or returns the state to the most recently stacked level.
 354
 355 @c
 356 void
 357 pop_level(flag) /* do this when |cur_byte| reaches |cur_end| */
 358 int flag; /* |flag==0| means we are in |output_defs| */
 359 {
 360   if (flag && cur_repl->text_link<section_flag) { /* link to a continuation */
 361     cur_repl=cur_repl->text_link+text_info; /* stay on the same level */
 362     cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
 363     return;
 364   }
 365   stack_ptr--; /* go down to the previous level */
 366   if (stack_ptr>stack) cur_state=*stack_ptr;
 367 }
 368
 369 @ The heart of the output procedure is the function |get_output|,
 370 which produces the next token of output and sends it on to the lower-level
 371 function |out_char|. The main purpose of |get_output| is to handle the
 372 necessary stacking and unstacking. It sends the value |section_number|
 373 if the next output begins or ends the replacement text of some section,
 374 in which case |cur_val| is that section's number (if beginning) or the
 375 negative of that value (if ending). (A section number of 0 indicates
 376 not the beginning or ending of a section, but a \&{\#line} command.)
 377 And it sends the value |identifier|
 378 if the next output is an identifier, in which case
 379 |cur_val| points to that identifier name.
 380
 381 @d section_number 0201 /* code returned by |get_output| for section numbers */
 382 @d identifier 0202 /* code returned by |get_output| for identifiers */
 383
 384 @<Global...@>=
 385 int cur_val; /* additional information corresponding to output token */
 386
 387 @ If |get_output| finds that no more output remains, it returns with
 388 |stack_ptr==stack|.
 389 @^high-bit character handling@>
 390
 391 @c
 392 void
 393 get_output() /* sends next token to |out_char| */
 394 {
 395   sixteen_bits a; /* value of current byte */
 396   restart: if (stack_ptr==stack) return;
 397   if (cur_byte==cur_end) {
 398     cur_val=-((int)cur_section); /* cast needed because of sign extension */
 399     pop_level(1);
 400     if (cur_val==0) goto restart;
 401     out_char(section_number); return;
 402   }
 403   a=*cur_byte++;
 404   if (out_state==verbatim && a!=string && a!=constant && a!='\n')
 405     C_putc(a); /* a high-bit character can occur in a string */
 406   else if (a<0200) out_char(a); /* one-byte token */
 407   else {
 408     a=(a-0200)*0400+*cur_byte++;
 409     switch (a/024000) { /* |024000==(0250-0200)*0400| */
 410       case 0: cur_val=a; out_char(identifier); break;
 411       case 1: if (a==output_defs_flag) output_defs();
 412         else @<Expand section |a-024000|, |goto restart|@>;
 413         break;
 414       default: cur_val=a-050000; if (cur_val>0) cur_section=cur_val;
 415         out_char(section_number);
 416     }
 417   }
 418 }
 419
 420 @ The user may have forgotten to give any \CEE/ text for a section name,
 421 or the \CEE/ text may have been associated with a different name by mistake.
 422
 423 @<Expand section |a-...@>=
 424 {
 425   a-=024000;
 426   if ((a+name_dir)->equiv!=(char *)text_info) push_level(a+name_dir);
 427   else if (a!=0) {
 428     printf("\n! Not present: <");
 429     print_section_name(a+name_dir); err_print(">");
 430 @.Not present: <section name>@>
 431   }
 432   goto restart;
 433 }
 434
 435 @* Producing the output.
 436 The |get_output| routine above handles most of the complexity of output
 437 generation, but there are two further considerations that have a nontrivial
 438 effect on \.{CTANGLE}'s algorithms.
 439
 440 @ First,
 441 we want to make sure that the output has spaces and line breaks in
 442 the right places (e.g., not in the middle of a string or a constant or an
 443 identifier, not at a `\.{@@\&}' position
 444 where quantities are being joined together, and certainly after an \.=
 445 because the \CEE/ compiler thinks \.{=-} is ambiguous).
 446
 447 The output process can be in one of following states:
 448
 449 \yskip\hang |num_or_id| means that the last item in the buffer is a number or
 450 identifier, hence a blank space or line break must be inserted if the next
 451 item is also a number or identifier.
 452
 453 \yskip\hang |unbreakable| means that the last item in the buffer was followed
 454 by the \.{@@\&} operation that inhibits spaces between it and the next item.
 455
 456 \yskip\hang |verbatim| means we're copying only character tokens, and
 457 that they are to be output exactly as stored.  This is the case during
 458 strings, verbatim constructions and numerical constants.
 459
 460 \yskip\hang |post_slash| means we've just output a slash.
 461
 462 \yskip\hang |normal| means none of the above.
 463
 464 \yskip\noindent Furthermore, if the variable |protect| is positive, newlines
 465 are preceded by a `\.\\'.
 466
 467 @d normal 0 /* non-unusual state */
 468 @d num_or_id 1 /* state associated with numbers and identifiers */
 469 @d post_slash 2 /* state following a \./ */
 470 @d unbreakable 3 /* state associated with \.{@@\&} */
 471 @d verbatim 4 /* state in the middle of a string */
 472
 473 @<Global...@>=
 474 eight_bits out_state; /* current status of partial output */
 475 boolean protect; /* should newline characters be quoted? */
 476
 477 @ Here is a routine that is invoked when we want to output the current line.
 478 During the output process, |cur_line| equals the number of the next line
 479 to be output.
 480
 481 @c
 482 void
 483 flush_buffer() /* writes one line to output file */
 484 {
 485   C_putc('\n');
 486   if (cur_line % 100 == 0 && show_progress) {
 487     printf(".");
 488     if (cur_line % 500 == 0) printf("%d",cur_line);
 489     update_terminal; /* progress report */
 490   }
 491   cur_line++;
 492 }
 493
 494 @ Second, we have modified the original \.{TANGLE} so that it will write output
 495 on multiple files.
 496 If a section name is introduced in at least one place by \.{@@(}
 497 instead of \.{@@<}, we treat it as the name of a file.
 498 All these special sections are saved on a stack, |output_files|.
 499 We write them out after we've done the unnamed section.
 500
 501 @d max_files 256
 502 @<Glob...@>=
 503 name_pointer output_files[max_files];
 504 name_pointer *cur_out_file, *end_output_files, *an_output_file;
 505 char cur_section_name_char; /* is it |'<'| or |'('| */
 506 char output_file_name[longest_name]; /* name of the file */
 507
 508 @ We make |end_output_files| point just beyond the end of
 509 |output_files|. The stack pointer
 510 |cur_out_file| starts out there. Every time we see a new file, we
 511 decrement |cur_out_file| and then write it in.
 512 @<Set initial...@>=
 513 cur_out_file=end_output_files=output_files+max_files;
 514
 515 @ @<If it's not there, add |cur_section_name| to the output file stack, or
 516 complain we're out of room@>=
 517 {
 518   for (an_output_file=cur_out_file;
 519         an_output_file<end_output_files; an_output_file++)
 520             if (*an_output_file==cur_section_name) break;
 521   if (an_output_file==end_output_files) {
 522     if (cur_out_file>output_files)
 523         *--cur_out_file=cur_section_name;
 524     else {
 525       overflow("output files");
 526     }
 527   }
 528 }
 529
 530 @* The big output switch.  Here then is the routine that does the
 531 output.
 532
 533 @<Predecl...@>=
 534 void phase_two();
 535
 536 @ @c
 537 void
 538 phase_two () {
 539   web_file_open=0;
 540   cur_line=1;
 541   @<Initialize the output stacks@>;
 542   @<Output macro definitions if appropriate@>;
 543   if (text_info->text_link==0 && cur_out_file==end_output_files) {
 544     printf("\n! No program text was specified."); mark_harmless;
 545 @.No program text...@>
 546   }
 547   else {
 548     if(cur_out_file==end_output_files) {
 549       if(show_progress)
 550         printf("\nWriting the output file (%s):",C_file_name);
 551     }
 552     else {
 553       if (show_progress) {
 554         printf("\nWriting the output files:");
 555 @.Writing the output...@>
 556         printf(" (%s)",C_file_name);
 557         update_terminal;
 558       }
 559       if (text_info->text_link==0) goto writeloop;
 560     }
 561     while (stack_ptr>stack) get_output();
 562     flush_buffer();
 563 writeloop:   @<Write all the named output files@>;
 564     if(show_happiness) printf("\nDone.");
 565   }
 566 }
 567
 568 @ To write the named output files, we proceed as for the unnamed
 569 section.
 570 The only subtlety is that we have to open each one.
 571
 572 @<Write all the named output files@>=
 573 for (an_output_file=end_output_files; an_output_file>cur_out_file;) {
 574     an_output_file--;
 575     sprint_section_name(output_file_name,*an_output_file);
 576     fclose(C_file);
 577     C_file=fopen(output_file_name,"w");
 578     if (C_file ==0) fatal("! Cannot open output file:",output_file_name);
 579 @.Cannot open output file@>
 580     printf("\n(%s)",output_file_name); update_terminal;
 581     cur_line=1;
 582     stack_ptr=stack+1;
 583     cur_name= (*an_output_file);
 584     cur_repl= (text_pointer)cur_name->equiv;
 585     cur_byte=cur_repl->tok_start;
 586     cur_end=(cur_repl+1)->tok_start;
 587     while (stack_ptr > stack) get_output();
 588     flush_buffer();
 589 }
 590
 591 @ If a \.{@@h} was not encountered in the input,
 592 we go through the list of replacement texts and copy the ones
 593 that refer to macros, preceded by the \.{\#define} preprocessor command.
 594
 595 @<Output macro definitions if appropriate@>=
 596   if (!output_defs_seen)
 597     output_defs();
 598
 599 @ @<Glob...@>=
 600 boolean output_defs_seen=0;
 601
 602 @ @<Predecl...@>=
 603 void output_defs();
 604
 605 @ @c
 606 void
 607 output_defs()
 608 {
 609   sixteen_bits a;
 610   push_level(NULL);
 611   for (cur_text=text_info+1; cur_text<text_ptr; cur_text++)
 612     if (cur_text->text_link==0) { /* |cur_text| is the text for a macro */
 613       cur_byte=cur_text->tok_start;
 614       cur_end=(cur_text+1)->tok_start;
 615       C_printf("%s","#define ");
 616       out_state=normal;
 617       protect=1; /* newlines should be preceded by |'\\'| */
 618       while (cur_byte<cur_end) {
 619         a=*cur_byte++;
 620         if (cur_byte==cur_end && a=='\n') break; /* disregard a final newline */
 621         if (out_state==verbatim && a!=string && a!=constant && a!='\n')
 622           C_putc(a); /* a high-bit character can occur in a string */
 623 @^high-bit character handling@>
 624         else if (a<0200) out_char(a); /* one-byte token */
 625         else {
 626           a=(a-0200)*0400+*cur_byte++;
 627           if (a<024000) { /* |024000==(0250-0200)*0400| */
 628             cur_val=a; out_char(identifier);
 629           }
 630           else if (a<050000) { confusion("macro defs have strange char");}
 631           else {
 632             cur_val=a-050000; cur_section=cur_val; out_char(section_number);
 633           }
 634       /* no other cases */
 635         }
 636       }
 637       protect=0;
 638       flush_buffer();
 639     }
 640   pop_level(0);
 641 }
 642
 643 @ A many-way switch is used to send the output.  Note that this function
 644 is not called if |out_state==verbatim|, except perhaps with arguments
 645 |'\n'| (protect the newline), |string| (end the string), or |constant|
 646 (end the constant).
 647
 648 @<Predecl...@>=
 649 static void out_char();
 650
 651 @ @c
 652 static void
 653 out_char(cur_char)
 654 eight_bits cur_char;
 655 {
 656   char *j, *k; /* pointer into |byte_mem| */
 657 restart:
 658     switch (cur_char) {
 659       case '\n': if (protect && out_state!=verbatim) C_putc(' ');
 660         if (protect || out_state==verbatim) C_putc('\\');
 661         flush_buffer(); if (out_state!=verbatim) out_state=normal; break;
 662       @/@t\4@>@<Case of an identifier@>;
 663       @/@t\4@>@<Case of a section number@>;
 664       @/@t\4@>@<Cases like \.{!=}@>;
 665       case '=': case '>': C_putc(cur_char); C_putc(' ');
 666         out_state=normal; break;
 667       case join: out_state=unbreakable; break;
 668       case constant: if (out_state==verbatim) {
 669           out_state=num_or_id; break;
 670         }
 671         if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break;
 672       case string: if (out_state==verbatim) out_state=normal;
 673         else out_state=verbatim; break;
 674       case '/': C_putc('/'); out_state=post_slash; break;
 675       case '*': if (out_state==post_slash) C_putc(' ');
 676         /* fall through */
 677       default: C_putc(cur_char); out_state=normal; break;
 678     }
 679 }
 680
 681 @ @<Cases like \.{!=}@>=
 682 case plus_plus: C_putc('+'); C_putc('+'); out_state=normal; break;
 683 case minus_minus: C_putc('-'); C_putc('-'); out_state=normal; break;
 684 case minus_gt: C_putc('-'); C_putc('>'); out_state=normal; break;
 685 case gt_gt: C_putc('>'); C_putc('>'); out_state=normal; break;
 686 case eq_eq: C_putc('='); C_putc('='); out_state=normal; break;
 687 case lt_lt: C_putc('<'); C_putc('<'); out_state=normal; break;
 688 case gt_eq: C_putc('>'); C_putc('='); out_state=normal; break;
 689 case lt_eq: C_putc('<'); C_putc('='); out_state=normal; break;
 690 case not_eq: C_putc('!'); C_putc('='); out_state=normal; break;
 691 case and_and: C_putc('&'); C_putc('&'); out_state=normal; break;
 692 case or_or: C_putc('|'); C_putc('|'); out_state=normal; break;
 693 case dot_dot_dot: C_putc('.'); C_putc('.'); C_putc('.'); out_state=normal;
 694     break;
 695 case colon_colon: C_putc(':'); C_putc(':'); out_state=normal; break;
 696 case period_ast: C_putc('.'); C_putc('*'); out_state=normal; break;
 697 case minus_gt_ast: C_putc('-'); C_putc('>'); C_putc('*'); out_state=normal;
 698     break;
 699
 700 @ When an identifier is output to the \CEE/ file, characters in the
 701 range 128--255 must be changed into something else, so the \CEE/
 702 compiler won't complain.  By default, \.{CTANGLE} converts the
 703 character with code $16 x+y$ to the three characters `\.X$xy$', but
 704 a different transliteration table can be specified.  Thus a German
 705 might want {\it gr\"un\/} to appear as a still readable \.{gruen}.
 706 This makes debugging a lot less confusing.
 707
 708 @d translit_length 10
 709
 710 @<Glo...@>=
 711 char translit[128][translit_length];
 712
 713 @ @<Set init...@>=
 714 {
 715   int i;
 716   for (i=0;i<128;i++) sprintf(translit[i],"X%02X",(unsigned)(128+i));
 717 }
 718
 719 @ @<Case of an identifier@>=
 720 case identifier:
 721   if (out_state==num_or_id) C_putc(' ');
 722   j=(cur_val+name_dir)->byte_start;
 723   k=(cur_val+name_dir+1)->byte_start;
 724   while (j<k) {
 725     if ((unsigned char)(*j)<0200) C_putc(*j);
 726 @^high-bit character handling@>
 727     else C_printf("%s",translit[(unsigned char)(*j)-0200]);
 728     j++;
 729   }
 730   out_state=num_or_id; break;
 731
 732 @ @<Case of a sec...@>=
 733 case section_number:
 734   if (cur_val>0) C_printf("/*%d:*/",cur_val);
 735   else if(cur_val<0) C_printf("/*:%d*/",-cur_val);
 736   else if (protect) {
 737     cur_byte +=4; /* skip line number and file name */
 738     cur_char = '\n';
 739     goto restart;
 740   } else {
 741     sixteen_bits a;
 742     a=0400* *cur_byte++;
 743     a+=*cur_byte++; /* gets the line number */
 744     C_printf("\n#line %d \"",a);
 745 @:line}{\.{\#line}@>
 746     cur_val=*cur_byte++;
 747     cur_val=0400*(cur_val-0200)+ *cur_byte++; /* points to the file name */
 748     for (j=(cur_val+name_dir)->byte_start, k=(cur_val+name_dir+1)->byte_start;
 749          j<k; j++) {
 750       if (*j=='\\' || *j=='"') C_putc('\\');
 751       C_putc(*j);
 752     }
 753     C_printf("%s","\"\n");
 754   }
 755   break;
 756
 757 @** Introduction to the input phase.
 758 We have now seen that \.{CTANGLE} will be able to output the full
 759 \CEE/ program, if we can only get that program into the byte memory in
 760 the proper format. The input process is something like the output process
 761 in reverse, since we compress the text as we read it in and we expand it
 762 as we write it out.
 763
 764 There are three main input routines. The most interesting is the one that gets
 765 the next token of a \CEE/ text; the other two are used to scan rapidly past
 766 \TEX/ text in the \.{CWEB} source code. One of the latter routines will jump to
 767 the next token that starts with `\.{@@}', and the other skips to the end
 768 of a \CEE/ comment.
 769
 770 @ Control codes in \.{CWEB} begin with `\.{@@}', and the next character
 771 identifies the code. Some of these are of interest only to \.{CWEAVE},
 772 so \.{CTANGLE} ignores them; the others are converted by \.{CTANGLE} into
 773 internal code numbers by the |ccode| table below. The ordering
 774 of these internal code numbers has been chosen to simplify the program logic;
 775 larger numbers are given to the control codes that denote more significant
 776 milestones.
 777
 778 @d ignore 0 /* control code of no interest to \.{CTANGLE} */
 779 @d ord 0302 /* control code for `\.{@@'}' */
 780 @d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */
 781 @d translit_code 0304 /* control code for `\.{@@l}' */
 782 @d output_defs_code 0305 /* control code for `\.{@@h}' */
 783 @d format_code 0306 /* control code for `\.{@@f}' */
 784 @d definition 0307 /* control code for `\.{@@d}' */
 785 @d begin_C 0310 /* control code for `\.{@@c}' */
 786 @d section_name 0311 /* control code for `\.{@@<}' */
 787 @d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */
 788
 789 @<Global...@>=
 790 eight_bits ccode[256]; /* meaning of a char following \.{@@} */
 791
 792 @ @<Set ini...@>= {
 793   int c; /* must be |int| so the |for| loop will end */
 794   for (c=0; c<256; c++) ccode[c]=ignore;
 795   ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
 796    =ccode['*']=new_section;
 797   ccode['@@']='@@'; ccode['=']=string;
 798   ccode['d']=ccode['D']=definition;
 799   ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
 800   ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
 801   ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']=
 802    ccode['q']=ccode['Q']=control_text;
 803   ccode['h']=ccode['H']=output_defs_code;
 804   ccode['l']=ccode['L']=translit_code;
 805   ccode['&']=join;
 806   ccode['<']=ccode['(']=section_name;
 807   ccode['\'']=ord;
 808 }
 809
 810 @ The |skip_ahead| procedure reads through the input at fairly high speed
 811 until finding the next non-ignorable control code, which it returns.
 812
 813 @c
 814 eight_bits
 815 skip_ahead() /* skip to next control code */
 816 {
 817   eight_bits c; /* control code found */
 818   while (1) {
 819     if (loc>limit && (get_line()==0)) return(new_section);
 820     *(limit+1)='@@';
 821     while (*loc!='@@') loc++;
 822     if (loc<=limit) {
 823       loc++; c=ccode[(eight_bits)*loc]; loc++;
 824       if (c!=ignore || *(loc-1)=='>') return(c);
 825     }
 826   }
 827 }
 828
 829 @ The |skip_comment| procedure reads through the input at somewhat high
 830 speed in order to pass over comments, which \.{CTANGLE} does not transmit
 831 to the output. If the comment is introduced by \.{/*}, |skip_comment|
 832 proceeds until finding the end-comment token \.{*/} or a newline; in the
 833 latter case |skip_comment| will be called again by |get_next|, since the
 834 comment is not finished.  This is done so that each newline in the
 835 \CEE/ part of a section is copied to the output; otherwise the \&{\#line}
 836 commands inserted into the \CEE/ file by the output routines become useless.
 837 On the other hand, if the comment is introduced by \.{//} (i.e., if it
 838 is a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the next
 839 newline. The boolean argument |is_long_comment| distinguishes between
 840 the two types of comments.
 841
 842 If |skip_comment| comes to the end of the section, it prints an error message.
 843 No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'.
 844
 845 @<Global...@>=
 846 boolean comment_continues=0; /* are we scanning a comment? */
 847
 848 @ @c
 849 int skip_comment(is_long_comment) /* skips over comments */
 850 boolean is_long_comment;
 851 {
 852   char c; /* current character */
 853   while (1) {
 854     if (loc>limit) {
 855       if (is_long_comment) {
 856         if(get_line()) return(comment_continues=1);
 857         else{
 858           err_print("! Input ended in mid-comment");
 859 @.Input ended in mid-comment@>
 860           return(comment_continues=0);
 861         }
 862       }
 863       else return(comment_continues=0);
 864     }
 865     c=*(loc++);
 866     if (is_long_comment && c=='*' && *loc=='/') {
 867       loc++; return(comment_continues=0);
 868     }
 869     if (c=='@@') {
 870       if (ccode[(eight_bits)*loc]==new_section) {
 871         err_print("! Section name ended in mid-comment"); loc--;
 872 @.Section name ended in mid-comment@>
 873         return(comment_continues=0);
 874       }
 875       else loc++;
 876     }
 877   }
 878 }
 879
 880 @* Inputting the next token.
 881
 882 @d constant 03
 883
 884 @<Global...@>=
 885 name_pointer cur_section_name; /* name of section just scanned */
 886 int no_where; /* suppress |print_where|? */
 887
 888 @ @<Include...@>=
 889 #include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
 890 #include <stdlib.h> /* definition of |exit| */
 891
 892 @ As one might expect, |get_next| consists mostly of a big switch
 893 that branches to the various special cases that can arise.
 894
 895 @d isxalpha(c) ((c)=='_' || (c)=='$')
 896   /* non-alpha characters allowed in identifier */
 897 @d ishigh(c) ((unsigned char)(c)>0177)
 898 @^high-bit character handling@>
 899
 900 @c
 901 eight_bits
 902 get_next() /* produces the next input token */
 903 {
 904   static int preprocessing=0;
 905   eight_bits c; /* the current character */
 906   while (1) {
 907     if (loc>limit) {
 908       if (preprocessing && *(limit-1)!='\\') preprocessing=0;
 909       if (get_line()==0) return(new_section);
 910       else if (print_where && !no_where) {
 911           print_where=0;
 912           @<Insert the line number into |tok_mem|@>;
 913         }
 914         else return ('\n');
 915     }
 916     c=*loc;
 917     if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) {
 918       skip_comment(comment_continues||*(loc+1)=='*');
 919           /* scan to end of comment or newline */
 920       if (comment_continues) return('\n');
 921       else continue;
 922     }
 923     loc++;
 924     if (xisdigit(c) || c=='.') @<Get a constant@>@;
 925     else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"')))
 926         @<Get a string@>@;
 927     else if (isalpha(c) || isxalpha(c) || ishigh(c))
 928       @<Get an identifier@>@;
 929     else if (c=='@@') @<Get control code and possible section name@>@;
 930     else if (xisspace(c)) {
 931         if (!preprocessing || loc>limit) continue;
 932           /* we don't want a blank after a final backslash */
 933         else return(' '); /* ignore spaces and tabs, unless preprocessing */
 934     }
 935     else if (c=='#' && loc==buffer+1) preprocessing=1;
 936     mistake: @<Compress two-symbol operator@>@;
 937     return(c);
 938   }
 939 }
 940
 941 @ The following code assigns values to the combinations \.{++},
 942 \.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{||} and
 943 \.{\&\&}, and to the \CPLUSPLUS/
 944 combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
 945 The compound assignment operators (e.g., \.{+=}) are
 946 treated as separate tokens.
 947
 948 @d compress(c) if (loc++<=limit) return(c)
 949
 950 @<Compress tw...@>=
 951 switch(c) {
 952   case '+': if (*loc=='+') compress(plus_plus); break;
 953   case '-': if (*loc=='-') {compress(minus_minus);}
 954     else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
 955                         else compress(minus_gt); break;
 956   case '.': if (*loc=='*') {compress(period_ast);}
 957             else if (*loc=='.' && *(loc+1)=='.') {
 958               loc++; compress(dot_dot_dot);
 959             }
 960             break;
 961   case ':': if (*loc==':') compress(colon_colon); break;
 962   case '=': if (*loc=='=') compress(eq_eq); break;
 963   case '>': if (*loc=='=') {compress(gt_eq);}
 964     else if (*loc=='>') compress(gt_gt); break;
 965   case '<': if (*loc=='=') {compress(lt_eq);}
 966     else if (*loc=='<') compress(lt_lt); break;
 967   case '&': if (*loc=='&') compress(and_and); break;
 968   case '|': if (*loc=='|') compress(or_or); break;
 969   case '!': if (*loc=='=') compress(not_eq); break;
 970 }
 971
 972 @ @<Get an identifier@>= {
 973   id_first=--loc;
 974   while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
 975   id_loc=loc; return(identifier);
 976 }
 977
 978 @ @<Get a constant@>= {
 979   id_first=loc-1;
 980   if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
 981   if (*id_first=='0') {
 982     if (*loc=='x' || *loc=='X') { /* hex constant */
 983       loc++; while (xisxdigit(*loc)) loc++; goto found;
 984     }
 985   }
 986   while (xisdigit(*loc)) loc++;
 987   if (*loc=='.') {
 988   loc++;
 989   while (xisdigit(*loc)) loc++;
 990   }
 991   if (*loc=='e' || *loc=='E') { /* float constant */
 992     if (*++loc=='+' || *loc=='-') loc++;
 993     while (xisdigit(*loc)) loc++;
 994   }
 995  found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
 996              || *loc=='f' || *loc=='F') loc++;
 997   id_loc=loc;
 998   return(constant);
 999 }
1000
1001 @ \CEE/ strings and character constants, delimited by double and single
1002 quotes, respectively, can contain newlines or instances of their own
1003 delimiters if they are protected by a backslash.  We follow this
1004 convention, but do not allow the string to be longer than |longest_name|.
1005
1006 @<Get a string@>= {
1007   char delim = c; /* what started the string */
1008   id_first = section_text+1;
1009   id_loc = section_text; *++id_loc=delim;
1010   if (delim=='L') { /* wide character constant */
1011     delim=*loc++; *++id_loc=delim;
1012   }
1013   while (1) {
1014     if (loc>=limit) {
1015       if(*(limit-1)!='\\') {
1016         err_print("! String didn't end"); loc=limit; break;
1017 @.String didn't end@>
1018       }
1019       if(get_line()==0) {
1020         err_print("! Input ended in middle of string"); loc=buffer; break;
1021 @.Input ended in middle of string@>
1022       }
1023       else if (++id_loc<=section_text_end) *id_loc='\n'; /* will print as
1024       \.{"\\\\\\n"} */
1025     }
1026     if ((c=*loc++)==delim) {
1027       if (++id_loc<=section_text_end) *id_loc=c;
1028       break;
1029     }
1030     if (c=='\\') {
1031       if (loc>=limit) continue;
1032       if (++id_loc<=section_text_end) *id_loc = '\\';
1033       c=*loc++;
1034     }
1035     if (++id_loc<=section_text_end) *id_loc=c;
1036   }
1037   if (id_loc>=section_text_end) {
1038     printf("\n! String too long: ");
1039 @.String too long@>
1040     term_write(section_text+1,25);
1041     err_print("...");
1042   }
1043   id_loc++;
1044   return(string);
1045 }
1046
1047 @ After an \.{@@} sign has been scanned, the next character tells us
1048 whether there is more work to do.
1049
1050 @<Get control code and possible section name@>= {
1051   c=ccode[(eight_bits)*loc++];
1052   switch(c) {
1053     case ignore: continue;
1054     case translit_code: err_print("! Use @@l in limbo only"); continue;
1055 @.Use @@l in limbo...@>
1056     case control_text: while ((c=skip_ahead())=='@@');
1057       /* only \.{@@@@} and \.{@@>} are expected */
1058       if (*(loc-1)!='>')
1059         err_print("! Double @@ should be used in control text");
1060 @.Double @@ should be used...@>
1061       continue;
1062     case section_name:
1063       cur_section_name_char=*(loc-1);
1064       @<Scan the section name and make |cur_section_name| point to it@>;
1065     case string: @<Scan a verbatim string@>;
1066     case ord: @<Scan an ASCII constant@>;
1067     default: return(c);
1068   }
1069 }
1070
1071 @ After scanning a valid ASCII constant that follows
1072 \.{@@'}, this code plows ahead until it finds the next single quote.
1073 (Special care is taken if the quote is part of the constant.)
1074 Anything after a valid ASCII constant is ignored;
1075 thus, \.{@@'\\nopq'} gives the same result as \.{@@'\\n'}.
1076
1077 @<Scan an ASCII constant@>=
1078   id_first=loc;
1079   if (*loc=='\\') {
1080     if (*++loc=='\'') loc++;
1081   }
1082   while (*loc!='\'') {
1083     if (*loc=='@@') {
1084       if (*(loc+1)!='@@')
1085         err_print("! Double @@ should be used in ASCII constant");
1086 @.Double @@ should be used...@>
1087       else loc++;
1088     }
1089     loc++;
1090     if (loc>limit) {
1091         err_print("! String didn't end"); loc=limit-1; break;
1092 @.String didn't end@>
1093     }
1094   }
1095   loc++;
1096   return(ord);
1097
1098 @ @<Scan the section name...@>= {
1099   char *k; /* pointer into |section_text| */
1100   @<Put section name into |section_text|@>;
1101   if (k-section_text>3 && strncmp(k-2,"...",3)==0)
1102     cur_section_name=section_lookup(section_text+1,k-3,1); /* 1 means is a prefix */
1103   else cur_section_name=section_lookup(section_text+1,k,0);
1104   if (cur_section_name_char=='(')
1105     @<If it's not there, add |cur_section_name| to the output file stack, or
1106           complain we're out of room@>;
1107   return(section_name);
1108 }
1109
1110 @ Section names are placed into the |section_text| array with consecutive spaces,
1111 tabs, and carriage-returns replaced by single spaces. There will be no
1112 spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
1113 this, since the |section_lookup| routine uses |section_text[1]| as the first
1114 character of the name.)
1115
1116 @<Set init...@>=section_text[0]=' ';
1117
1118 @ @<Put section name...@>=
1119 k=section_text;
1120 while (1) {
1121   if (loc>limit && get_line()==0) {
1122     err_print("! Input ended in section name");
1123 @.Input ended in section name@>
1124     loc=buffer+1; break;
1125   }
1126   c=*loc;
1127   @<If end of name or erroneous nesting, |break|@>;
1128   loc++; if (k<section_text_end) k++;
1129   if (xisspace(c)) {
1130     c=' '; if (*(k-1)==' ') k--;
1131   }
1132 *k=c;
1133 }
1134 if (k>=section_text_end) {
1135   printf("\n! Section name too long: ");
1136 @.Section name too long@>
1137   term_write(section_text+1,25);
1138   printf("..."); mark_harmless;
1139 }
1140 if (*k==' ' && k>section_text) k--;
1141
1142 @ @<If end of name or erroneous nesting,...@>=
1143 if (c=='@@') {
1144   c=*(loc+1);
1145   if (c=='>') {
1146     loc+=2; break;
1147   }
1148   if (ccode[(eight_bits)c]==new_section) {
1149     err_print("! Section name didn't end"); break;
1150 @.Section name didn't end@>
1151   }
1152   if (ccode[(eight_bits)c]==section_name) {
1153     err_print("! Nesting of section names not allowed"); break;
1154 @.Nesting of section names...@>
1155   }
1156   *(++k)='@@'; loc++; /* now |c==*loc| again */
1157 }
1158
1159 @ At the present point in the program we
1160 have |*(loc-1)==string|; we set |id_first| to the beginning
1161 of the string itself, and |id_loc| to its ending-plus-one location in the
1162 buffer.  We also set |loc| to the position just after the ending delimiter.
1163
1164 @<Scan a verbatim string@>= {
1165   id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1166   while (*loc!='@@' || *(loc+1)!='>') loc++;
1167   if (loc>=limit) err_print("! Verbatim string didn't end");
1168 @.Verbatim string didn't end@>
1169   id_loc=loc; loc+=2;
1170   return(string);
1171 }
1172
1173 @* Scanning a macro definition.
1174 The rules for generating the replacement texts corresponding to macros and
1175 \CEE/ texts of a section are almost identical; the only differences are that
1176
1177 \yskip \item{a)}Section names are not allowed in macros;
1178 in fact, the appearance of a section name terminates such macros and denotes
1179 the name of the current section.
1180
1181 \item{b)}The symbols \.{@@d} and \.{@@f} and \.{@@c} are not allowed after
1182 section names, while they terminate macro definitions.
1183
1184 \item{c)}Spaces are inserted after right parentheses in macros, because the
1185 ANSI \CEE/ preprocessor sometimes requires it.
1186
1187 \yskip Therefore there is a single procedure |scan_repl| whose parameter
1188 |t| specifies either |macro| or |section_name|. After |scan_repl| has
1189 acted, |cur_text| will point to the replacement text just generated, and
1190 |next_control| will contain the control code that terminated the activity.
1191
1192 @d macro  0
1193 @d app_repl(c)  {if (tok_ptr==tok_mem_end) overflow("token"); *tok_ptr++=c;}
1194
1195 @<Global...@>=
1196 text_pointer cur_text; /* replacement text formed by |scan_repl| */
1197 eight_bits next_control;
1198
1199 @ @c
1200 void
1201 scan_repl(t) /* creates a replacement text */
1202 eight_bits t;
1203 {
1204   sixteen_bits a; /* the current token */
1205   if (t==section_name) {@<Insert the line number into |tok_mem|@>;}
1206   while (1) switch (a=get_next()) {
1207       @<In cases that |a| is a non-|char| token (|identifier|,
1208         |section_name|, etc.), either process it and change |a| to a byte
1209         that should be stored, or |continue| if |a| should be ignored,
1210         or |goto done| if |a| signals the end of this replacement text@>@;
1211       case ')': app_repl(a);
1212         if (t==macro) app_repl(' ');
1213         break;
1214       default: app_repl(a); /* store |a| in |tok_mem| */
1215     }
1216   done: next_control=(eight_bits) a;
1217   if (text_ptr>text_info_end) overflow("text");
1218   cur_text=text_ptr; (++text_ptr)->tok_start=tok_ptr;
1219 }
1220
1221 @ Here is the code for the line number: first a |sixteen_bits| equal
1222 to |0150000|; then the numeric line number; then a pointer to the
1223 file name.
1224
1225 @<Insert the line...@>=
1226 store_two_bytes(0150000);
1227 if (changing) id_first=change_file_name;
1228 else id_first=cur_file_name;
1229 id_loc=id_first+strlen(id_first);
1230 if (changing) store_two_bytes((sixteen_bits)change_line);
1231 else store_two_bytes((sixteen_bits)cur_line);
1232 {int a=id_lookup(id_first,id_loc,0)-name_dir; app_repl((a / 0400)+0200);
1233   app_repl(a % 0400);}
1234
1235 @ @<In cases that |a| is...@>=
1236 case identifier: a=id_lookup(id_first,id_loc,0)-name_dir;
1237   app_repl((a / 0400)+0200);
1238   app_repl(a % 0400); break;
1239 case section_name: if (t!=section_name) goto done;
1240   else {
1241     @<Was an `@@' missed here?@>;
1242     a=cur_section_name-name_dir;
1243     app_repl((a / 0400)+0250);
1244     app_repl(a % 0400);
1245     @<Insert the line number into |tok_mem|@>; break;
1246   }
1247 case output_defs_code: if (t!=section_name) err_print("! Misplaced @@h");
1248 @.Misplaced @@h@>
1249   else {
1250     output_defs_seen=1;
1251     a=output_defs_flag;
1252     app_repl((a / 0400)+0200);
1253     app_repl(a % 0400);
1254     @<Insert the line number into |tok_mem|@>;
1255   }
1256  break;
1257 case constant: case string:
1258   @<Copy a string or verbatim construction or numerical constant@>;
1259 case ord:
1260   @<Copy an ASCII constant@>;
1261 case definition: case format_code: case begin_C: if (t!=section_name) goto done;
1262   else {
1263     err_print("! @@d, @@f and @@c are ignored in C text"); continue;
1264 @.@@d, @@f and @@c are ignored in C text@>
1265   }
1266 case new_section: goto done;
1267
1268 @ @<Was an `@@'...@>= {
1269   char *try_loc=loc;
1270   while (*try_loc==' ' && try_loc<limit) try_loc++;
1271   if (*try_loc=='+' && try_loc<limit) try_loc++;
1272   while (*try_loc==' ' && try_loc<limit) try_loc++;
1273   if (*try_loc=='=') err_print ("! Missing `@@ ' before a named section");
1274 @.Missing `@@ '...@>
1275   /* user who isn't defining a section should put newline after the name,
1276      as explained in the manual */
1277 }
1278
1279 @ @<Copy a string...@>=
1280   app_repl(a); /* |string| or |constant| */
1281   while (id_first < id_loc) { /* simplify \.{@@@@} pairs */
1282     if (*id_first=='@@') {
1283       if (*(id_first+1)=='@@') id_first++;
1284       else err_print("! Double @@ should be used in string");
1285 @.Double @@ should be used...@>
1286     }
1287     app_repl(*id_first++);
1288   }
1289   app_repl(a); break;
1290
1291 @ This section should be rewritten on machines that don't use ASCII
1292 code internally.
1293 @^ASCII code dependencies@>
1294
1295 @<Copy an ASCII constant@>= {
1296   int c=(eight_bits) *id_first;
1297   if (c=='\\') {
1298     c=*++id_first;
1299     if (c>='0' && c<='7') {
1300       c-='0';
1301       if (*(id_first+1)>='0' && *(id_first+1)<='7') {
1302         c=8*c+*(++id_first) - '0';
1303         if (*(id_first+1)>='0' && *(id_first+1)<='7' && c<32)
1304           c=8*c+*(++id_first)- '0';
1305       }
1306     }
1307     else switch (c) {
1308     case 't':c='\t';@+break;
1309     case 'n':c='\n';@+break;
1310     case 'b':c='\b';@+break;
1311     case 'f':c='\f';@+break;
1312     case 'v':c='\v';@+break;
1313     case 'r':c='\r';@+break;
1314     case 'a':c='\7';@+break;
1315     case '?':c='?';@+break;
1316     case 'x':
1317       if (xisdigit(*(id_first+1))) c=*(++id_first)-'0';
1318       else if (xisxdigit(*(id_first+1))) {
1319         ++id_first;
1320         c=toupper(*id_first)-'A'+10;
1321       }
1322       if (xisdigit(*(id_first+1))) c=16*c+*(++id_first)-'0';
1323       else if (xisxdigit(*(id_first+1))) {
1324         ++id_first;
1325         c=16*c+toupper(*id_first)-'A'+10;
1326       }
1327       break;
1328     case '\\':c='\\';@+break;
1329     case '\'':c='\'';@+break;
1330     case '\"':c='\"';@+break;
1331     default: err_print("! Unrecognized escape sequence");
1332 @.Unrecognized escape sequence@>
1333     }
1334   }@/
1335   /* at this point |c| should have been converted to its ASCII code number */
1336   app_repl(constant);
1337   if (c>=100) app_repl('0'+c/100);
1338   if (c>=10) app_repl('0'+(c/10)%10);
1339   app_repl('0'+c%10);
1340   app_repl(constant);
1341 }
1342 break;
1343
1344 @* Scanning a section.
1345 The |scan_section| procedure starts when `\.{@@\ }' or `\.{@@*}' has been
1346 sensed in the input, and it proceeds until the end of that section.  It
1347 uses |section_count| to keep track of the current section number; with luck,
1348 \.{CWEAVE} and \.{CTANGLE} will both assign the same numbers to sections.
1349
1350 @<Global...@>=
1351 extern sixteen_bits section_count; /* the current section number */
1352
1353 @ The body of |scan_section| is a loop where we look for control codes
1354 that are significant to \.{CTANGLE}: those
1355 that delimit a definition, the \CEE/ part of a module, or a new module.
1356
1357 @c
1358 void
1359 scan_section()
1360 {
1361   name_pointer p; /* section name for the current section */
1362   text_pointer q; /* text for the current section */
1363   sixteen_bits a; /* token for left-hand side of definition */
1364   section_count++; @+ no_where=1;
1365   if (*(loc-1)=='*' && show_progress) { /* starred section */
1366     printf("*%d",section_count); update_terminal;
1367   }
1368   next_control=0;
1369   while (1) {
1370     @<Skip ahead until |next_control| corresponds to \.{@@d}, \.{@@<},
1371       \.{@@\ } or the like@>;
1372     if (next_control == definition) {  /* \.{@@d} */
1373         @<Scan a definition@>@;
1374         continue;
1375     }
1376     if (next_control == begin_C) {  /* \.{@@c} or \.{@@p} */
1377       p=name_dir; break;
1378     }
1379     if (next_control == section_name) { /* \.{@@<} or \.{@@(} */
1380       p=cur_section_name;
1381       @<If section is not being defined, |continue| @>;
1382       break;
1383     }
1384     return; /* \.{@@\ } or \.{@@*} */
1385   }
1386   no_where=print_where=0;
1387   @<Scan the \CEE/ part of the current section@>;
1388 }
1389
1390 @ At the top of this loop, if |next_control==section_name|, the
1391 section name has already been scanned (see |@<Get control code
1392 and...@>|).  Thus, if we encounter |next_control==section_name| in the
1393 skip-ahead process, we should likewise scan the section name, so later
1394 processing will be the same in both cases.
1395
1396 @<Skip ahead until |next_control| ...@>=
1397 while (next_control<definition)
1398       /* |definition| is the lowest of the ``significant'' codes */
1399   if((next_control=skip_ahead())==section_name){
1400     loc-=2; next_control=get_next();
1401   }
1402
1403 @ @<Scan a definition@>= {
1404   while ((next_control=get_next())=='\n'); /*allow newline before definition */
1405   if (next_control!=identifier) {
1406     err_print("! Definition flushed, must start with identifier");
1407 @.Definition flushed...@>
1408     continue;
1409   }
1410   app_repl(((a=id_lookup(id_first,id_loc,0)-name_dir) / 0400)+0200);
1411         /* append the lhs */
1412   app_repl(a % 0400);
1413   if (*loc!='(') { /* identifier must be separated from replacement text */
1414     app_repl(string); app_repl(' '); app_repl(string);
1415   }
1416   scan_repl(macro);
1417   cur_text->text_link=0; /* |text_link==0| characterizes a macro */
1418 }
1419
1420 @ If the section name is not followed by \.{=} or \.{+=}, no \CEE/
1421 code is forthcoming: the section is being cited, not being
1422 defined.  This use is illegal after the definition part of the
1423 current section has started, except inside a comment, but
1424 \.{CTANGLE} does not enforce this rule; it simply ignores the offending
1425 section name and everything following it, up to the next significant
1426 control code.
1427
1428 @<If section is not being defined, |continue| @>=
1429 while ((next_control=get_next())=='+'); /* allow optional \.{+=} */
1430 if (next_control!='=' && next_control!=eq_eq)
1431   continue;
1432
1433 @ @<Scan the \CEE/...@>=
1434 @<Insert the section number into |tok_mem|@>;
1435 scan_repl(section_name); /* now |cur_text| points to the replacement text */
1436 @<Update the data structure so that the replacement text is accessible@>;
1437
1438 @ @<Insert the section number...@>=
1439 store_two_bytes((sixteen_bits)(0150000+section_count));
1440   /* |0150000==0320*0400| */
1441
1442 @ @<Update the data...@>=
1443 if (p==name_dir||p==0) { /* unnamed section, or bad section name */
1444   (last_unnamed)->text_link=cur_text-text_info; last_unnamed=cur_text;
1445 }
1446 else if (p->equiv==(char *)text_info) p->equiv=(char *)cur_text;
1447   /* first section of this name */
1448 else {
1449   q=(text_pointer)p->equiv;
1450   while (q->text_link<section_flag)
1451     q=q->text_link+text_info; /* find end of list */
1452   q->text_link=cur_text-text_info;
1453 }
1454 cur_text->text_link=section_flag;
1455   /* mark this replacement text as a nonmacro */
1456
1457 @ @<Predec...@>=
1458 void phase_one();
1459
1460 @ @c
1461 void
1462 phase_one() {
1463   phase=1;
1464   section_count=0;
1465   reset_input();
1466   skip_limbo();
1467   while (!input_has_ended) scan_section();
1468   check_complete();
1469   phase=2;
1470 }
1471
1472 @ Only a small subset of the control codes is legal in limbo, so limbo
1473 processing is straightforward.
1474
1475 @<Predecl...@>=
1476 void skip_limbo();
1477
1478 @ @c
1479 void
1480 skip_limbo()
1481 {
1482   char c;
1483   while (1) {
1484     if (loc>limit && get_line()==0) return;
1485     *(limit+1)='@@';
1486     while (*loc!='@@') loc++;
1487     if (loc++<=limit) {
1488       c=*loc++;
1489       if (ccode[(eight_bits)c]==new_section) break;
1490       switch (ccode[(eight_bits)c]) {
1491         case translit_code: @<Read in transliteration of a character@>; break;
1492         case format_code: case '@@': break;
1493         case control_text: if (c=='q' || c=='Q') {
1494           while ((c=skip_ahead())=='@@');
1495           if (*(loc-1)!='>')
1496             err_print("! Double @@ should be used in control text");
1497 @.Double @@ should be used...@>
1498           break;
1499           } /* otherwise fall through */
1500         default: err_print("! Double @@ should be used in limbo");
1501 @.Double @@ should be used...@>
1502       }
1503     }
1504   }
1505 }
1506
1507 @ @<Read in transliteration of a character@>=
1508   while(xisspace(*loc)&&loc<limit) loc++;
1509   loc+=3;
1510   if (loc>limit || !xisxdigit(*(loc-3)) || !xisxdigit(*(loc-2)) @|
1511          || (*(loc-3)>='0' && *(loc-3)<='7') || !xisspace(*(loc-1)))
1512     err_print("! Improper hex number following @@l");
1513 @.Improper hex number...@>
1514   else {
1515     unsigned i;
1516     char *beg;
1517     sscanf(loc-3,"%x",&i);
1518     while(xisspace(*loc)&&loc<limit) loc++;
1519     beg=loc;
1520     while(loc<limit&&(xisalpha(*loc)||xisdigit(*loc)||*loc=='_')) loc++;
1521     if (loc-beg>=translit_length)
1522       err_print("! Replacement string in @@l too long");
1523 @.Replacement string in @@l...@>
1524     else{
1525       strncpy(translit[i-0200],beg,loc-beg);
1526       translit[i-0200][loc-beg]='\0';
1527     }
1528   }
1529
1530 @ Because on some systems the difference between two pointers is a |long|
1531 but not an |int|, we use \.{\%ld} to print these quantities.
1532
1533 @c
1534 void
1535 print_stats() {
1536   printf("\nMemory usage statistics:\n");
1537   printf("%ld names (out of %ld)\n",
1538           (long)(name_ptr-name_dir),(long)max_names);
1539   printf("%ld replacement texts (out of %ld)\n",
1540           (long)(text_ptr-text_info),(long)max_texts);
1541   printf("%ld bytes (out of %ld)\n",
1542           (long)(byte_ptr-byte_mem),(long)max_bytes);
1543   printf("%ld tokens (out of %ld)\n",
1544           (long)(tok_ptr-tok_mem),(long)max_toks);
1545 }
1546
1547 @** Index.
1548 Here is a cross-reference table for \.{CTANGLE}.
1549 All sections in which an identifier is
1550 used are listed with that identifier, except that reserved words are
1551 indexed only when they appear in format definitions, and the appearances
1552 of identifiers in section names are not indexed. Underlined entries
1553 correspond to where the identifier was declared. Error messages and
1554 a few other things like ``ASCII code dependencies'' are indexed here too.