source/texk/web2c/cwebdir/common.w

   1 % This file is part of CWEB.
   2 % This program by Silvio Levy and Donald E. Knuth
   3 % is based on a program by Knuth.
   4 % It is distributed WITHOUT ANY WARRANTY, express or implied.
   5 % Version 3.64 --- January 2002
   6
   7 % Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
   8
   9 % Permission is granted to make and distribute verbatim copies of this
  10 % document provided that the copyright notice and this permission notice
  11 % are preserved on all copies.
  12
  13 % Permission is granted to copy and distribute modified versions of this
  14 % document under the conditions for verbatim copying, provided that the
  15 % entire resulting derived work is given a different name and distributed
  16 % under the terms of a permission notice identical to this one.
  17
  18 \def\v{\char'174} % vertical (|) in typewriter font
  19
  20 \def\title{Common code for CTANGLE and CWEAVE (Version 3.64)}
  21 \def\topofcontents{\null\vfill
  22   \centerline{\titlefont Common code for {\ttitlefont CTANGLE} and
  23     {\ttitlefont CWEAVE}}
  24   \vskip 15pt
  25   \centerline{(Version 3.64)}
  26   \vfill}
  27 \def\botofcontents{\vfill
  28 \noindent
  29 Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
  30 \bigskip\noindent
  31 Permission is granted to make and distribute verbatim copies of this
  32 document provided that the copyright notice and this permission notice
  33 are preserved on all copies.
  34
  35 \smallskip\noindent
  36 Permission is granted to copy and distribute modified versions of this
  37 document under the conditions for verbatim copying, provided that the
  38 entire resulting derived work is given a different name and distributed
  39 under the terms of a permission notice identical to this one.
  40 }
  41
  42 \pageno=\contentspagenumber \advance\pageno by 1
  43 \let\maybe=\iftrue
  44 @s not_eq normal @q unreserve a C++ keyword @>
  45
  46 @** Introduction.  This file contains code common
  47 to both \.{CTANGLE} and \.{CWEAVE}, which roughly concerns the following
  48 problems: character uniformity, input routines, error handling and
  49 parsing of command line.  We have tried to concentrate in this file
  50 all the system dependencies, so as to maximize portability.
  51
  52 In the texts below we will
  53 sometimes use \.{CWEB} to refer to either of the two component
  54 programs, if no confusion can arise.
  55
  56 The file begins with a few basic definitions.
  57
  58 @c
  59 @<Include files@>@/
  60 @h
  61 @<Definitions that should agree with \.{CTANGLE} and \.{CWEAVE}@>@/
  62 @<Other definitions@>@/
  63 @<Predeclaration of procedures@>@/
  64
  65 @ In certain cases \.{CTANGLE} and \.{CWEAVE} should do almost, but not
  66 quite, the same thing.  In these cases we've written common code for
  67 both, differentiating between the two by means of the global variable
  68 |program|.
  69
  70 @d ctangle 0
  71 @d cweave 1
  72
  73 @<Definitions...@>=
  74 typedef short boolean;
  75 boolean program; /* \.{CWEAVE} or \.{CTANGLE}? */
  76
  77 @ \.{CWEAVE} operates in three phases: First it inputs the source
  78 file and stores cross-reference data, then it inputs the source once again and
  79 produces the \TEX/ output file, and finally it sorts and outputs the index.
  80 Similarly, \.{CTANGLE} operates in two phases.
  81 The global variable |phase| tells which phase we are in.
  82
  83 @<Other...@>= int phase; /* which phase are we in? */
  84
  85 @ There's an initialization procedure that gets both \.{CTANGLE} and
  86 \.{CWEAVE} off to a good start. We will fill in the details of this
  87 procedure later.
  88
  89 @c
  90 void
  91 common_init()
  92 {
  93   @<Initialize pointers@>;
  94   @<Set the default options common to \.{CTANGLE} and \.{CWEAVE}@>;
  95   @<Scan arguments and open output files@>;
  96 }
  97
  98 @* The character set.
  99 \.{CWEB} uses the conventions of \CEE/ programs found in the standard
 100 \.{ctype.h} header file.
 101
 102 @<Include files@>=
 103 #include <ctype.h>
 104
 105 @ A few character pairs are encoded internally as single characters,
 106 using the definitions below. These definitions are consistent with
 107 an extension of ASCII code originally developed at MIT and explained in
 108 Appendix~C of {\sl The \TEX/book\/}; thus, users who have such a
 109 character set can type things like \.{\char'32} and \.{\char'4} instead
 110 of \.{!=} and \.{\&\&}. (However, their files will not be too portable
 111 until more people adopt the extended code.)
 112
 113 If the character set is not ASCII, the definitions given here may conflict
 114 with existing characters; in such cases, other arbitrary codes should be
 115 substituted. The indexes to \.{CTANGLE} and \.{CWEAVE} mention every
 116 case where similar codes may have to be changed in order to
 117 avoid character conflicts. Look for the entry ``ASCII code dependencies''
 118 in those indexes.
 119
 120 @^ASCII code dependencies@>
 121 @^system dependencies@>
 122
 123 @d and_and 04 /* `\.{\&\&}'\,; corresponds to MIT's {\tentex\char'4} */
 124 @d lt_lt 020 /* `\.{<<}'\,;  corresponds to MIT's {\tentex\char'20} */
 125 @d gt_gt 021 /* `\.{>>}'\,;  corresponds to MIT's {\tentex\char'21} */
 126 @d plus_plus 013 /* `\.{++}'\,;  corresponds to MIT's {\tentex\char'13} */
 127 @d minus_minus 01 /* `\.{--}'\,;  corresponds to MIT's {\tentex\char'1} */
 128 @d minus_gt 031 /* `\.{->}'\,;  corresponds to MIT's {\tentex\char'31} */
 129 @d not_eq 032 /* `\.{!=}'\,;  corresponds to MIT's {\tentex\char'32} */
 130 @d lt_eq 034 /* `\.{<=}'\,;  corresponds to MIT's {\tentex\char'34} */
 131 @d gt_eq 035 /* `\.{>=}'\,;  corresponds to MIT's {\tentex\char'35} */
 132 @d eq_eq 036 /* `\.{==}'\,;  corresponds to MIT's {\tentex\char'36} */
 133 @d or_or 037 /* `\.{\v\v}'\,;  corresponds to MIT's {\tentex\char'37} */
 134 @d dot_dot_dot 016 /* `\.{...}'\,;  corresponds to MIT's {\tentex\char'16} */
 135 @d colon_colon 06 /* `\.{::}'\,;  corresponds to MIT's {\tentex\char'6} */
 136 @d period_ast 026 /* `\.{.*}'\,;  corresponds to MIT's {\tentex\char'26} */
 137 @d minus_gt_ast 027 /* `\.{->*}'\,;  corresponds to MIT's {\tentex\char'27} */
 138
 139 @** Input routines.  The lowest level of input to the \.{CWEB} programs
 140 is performed by |input_ln|, which must be told which file to read from.
 141 The return value of |input_ln| is 1 if the read is successful and 0 if
 142 not (generally this means the file has ended). The conventions
 143 of \TEX/ are followed; i.e., the characters of the next line of the file
 144 are copied into the |buffer| array,
 145 and the global variable |limit| is set to the first unoccupied position.
 146 Trailing blanks are ignored. The value of |limit| must be strictly less
 147 than |buf_size|, so that |buffer[buf_size-1]| is never filled.
 148
 149 Since |buf_size| is strictly less than |long_buf_size|,
 150 some of \.{CWEB}'s routines use the fact that it is safe to refer to
 151 |*(limit+2)| without overstepping the bounds of the array.
 152
 153 @d buf_size 100 /* for \.{CWEAVE} and \.{CTANGLE} */
 154 @d longest_name 1000
 155 @d long_buf_size (buf_size+longest_name) /* for \.{CWEAVE} */
 156 @d xisspace(c) (isspace(c)&&((unsigned char)c<0200))
 157 @d xisupper(c) (isupper(c)&&((unsigned char)c<0200))
 158
 159 @<Definitions...@>=
 160 char buffer[long_buf_size]; /* where each line of input goes */
 161 char *buffer_end=buffer+buf_size-2; /* end of |buffer| */
 162 char *limit=buffer; /* points to the last character in the buffer */
 163 char *loc=buffer; /* points to the next character to be read from the buffer */
 164
 165 @ @<Include files@>=
 166 #include <stdio.h>
 167
 168 @ In the unlikely event that your standard I/O library does not
 169 support |feof|, |getc|, and |ungetc| you may have to change things here.
 170 @^system dependencies@>
 171
 172 @c
 173 int input_ln(fp) /* copies a line into |buffer| or returns 0 */
 174 FILE *fp; /* what file to read from */
 175 {
 176   register int  c=EOF; /* character read; initialized so some compilers won't complain */
 177   register char *k;  /* where next character goes */
 178   if (feof(fp)) return(0);  /* we have hit end-of-file */
 179   limit = k = buffer;  /* beginning of buffer */
 180   while (k<=buffer_end && (c=getc(fp)) != EOF && c!='\n')
 181     if ((*(k++) = c) != ' ') limit = k;
 182   if (k>buffer_end)
 183     if ((c=getc(fp))!=EOF && c!='\n') {
 184       ungetc(c,fp); loc=buffer; err_print("! Input line too long");
 185 @.Input line too long@>
 186   }
 187   if (c==EOF && limit==buffer) return(0);  /* there was nothing after
 188     the last newline */
 189   return(1);
 190 }
 191
 192 @ Now comes the problem of deciding which file to read from next.
 193 Recall that the actual text that \.{CWEB} should process comes from two
 194 streams: a |web_file|, which can contain possibly nested include
 195 commands \.{@@i}, and a |change_file|, which might also contain
 196 includes.  The |web_file| together with the currently open include
 197 files form a stack |file|, whose names are stored in a parallel stack
 198 |file_name|.  The boolean |changing| tells whether or not we're reading
 199 from the |change_file|.
 200
 201 The line number of each open file is also kept for error reporting and
 202 for the benefit of \.{CTANGLE}.
 203
 204 @f line x /* make |line| an unreserved word */
 205 @d max_include_depth 10 /* maximum number of source files open
 206   simultaneously, not counting the change file */
 207 @d max_file_name_length 60
 208 @d cur_file file[include_depth] /* current file */
 209 @d cur_file_name file_name[include_depth] /* current file name */
 210 @d cur_line line[include_depth] /* number of current line in current file */
 211 @d web_file file[0] /* main source file */
 212 @d web_file_name file_name[0] /* main source file name */
 213
 214 @<Definitions...@>=
 215 int include_depth; /* current level of nesting */
 216 FILE *file[max_include_depth]; /* stack of non-change files */
 217 FILE *change_file; /* change file */
 218 char file_name[max_include_depth][max_file_name_length];
 219   /* stack of non-change file names */
 220 char change_file_name[max_file_name_length]; /* name of change file */
 221 char alt_web_file_name[max_file_name_length]; /* alternate name to try */
 222 int line[max_include_depth]; /* number of current line in the stacked files */
 223 int change_line; /* number of current line in change file */
 224 int change_depth; /* where \.{@@y} originated during a change */
 225 boolean input_has_ended; /* if there is no more input */
 226 boolean changing; /* if the current line is from |change_file| */
 227 boolean web_file_open=0; /* if the web file is being read */
 228
 229 @ When |changing==0|, the next line of |change_file| is kept in
 230 |change_buffer|, for purposes of comparison with the next
 231 line of |cur_file|. After the change file has been completely input, we
 232 set |change_limit=change_buffer|,
 233 so that no further matches will be made.
 234
 235 Here's a shorthand expression for inequality between the two lines:
 236
 237 @d lines_dont_match (change_limit-change_buffer != limit-buffer ||
 238   strncmp(buffer, change_buffer, limit-buffer))
 239
 240 @<Other...@>=
 241 char change_buffer[buf_size]; /* next line of |change_file| */
 242 char *change_limit; /* points to the last character in |change_buffer| */
 243
 244 @ Procedure |prime_the_change_buffer|
 245 sets |change_buffer| in preparation for the next matching operation.
 246 Since blank lines in the change file are not used for matching, we have
 247 |(change_limit==change_buffer && !changing)| if and only if
 248 the change file is exhausted. This procedure is called only when
 249 |changing| is 1; hence error messages will be reported correctly.
 250
 251 @c
 252 void
 253 prime_the_change_buffer()
 254 {
 255   change_limit=change_buffer; /* this value is used if the change file ends */
 256   @<Skip over comment lines in the change file; |return| if end of file@>;
 257   @<Skip to the next nonblank line; |return| if end of file@>;
 258   @<Move |buffer| and |limit| to |change_buffer| and |change_limit|@>;
 259 }
 260
 261 @ While looking for a line that begins with \.{@@x} in the change file, we
 262 allow lines that begin with \.{@@}, as long as they don't begin with \.{@@y},
 263 \.{@@z}, or \.{@@i} (which would probably mean that the change file is fouled up).
 264
 265 @<Skip over comment lines in the change file...@>=
 266 while(1) {
 267   change_line++;
 268   if (!input_ln(change_file)) return;
 269   if (limit<buffer+2) continue;
 270   if (buffer[0]!='@@') continue;
 271   if (xisupper(buffer[1])) buffer[1]=tolower(buffer[1]);
 272   if (buffer[1]=='x') break;
 273   if (buffer[1]=='y' || buffer[1]=='z' || buffer[1]=='i') {
 274     loc=buffer+2;
 275     err_print("! Missing @@x in change file");
 276 @.Missing @@x...@>
 277   }
 278 }
 279
 280 @ Here we are looking at lines following the \.{@@x}.
 281
 282 @<Skip to the next nonblank line...@>=
 283 do {
 284   change_line++;
 285   if (!input_ln(change_file)) {
 286     err_print("! Change file ended after @@x");
 287 @.Change file ended...@>
 288     return;
 289   }
 290 } while (limit==buffer);
 291
 292 @ @<Move |buffer| and |limit| to |change_buffer| and |change_limit|@>=
 293 {
 294   change_limit=change_buffer+(limit-buffer);
 295   strncpy(change_buffer,buffer,limit-buffer+1);
 296 }
 297
 298 @ The following procedure is used to see if the next change entry should
 299 go into effect; it is called only when |changing| is 0.
 300 The idea is to test whether or not the current
 301 contents of |buffer| matches the current contents of |change_buffer|.
 302 If not, there's nothing more to do; but if so, a change is called for:
 303 All of the text down to the \.{@@y} is supposed to match. An error
 304 message is issued if any discrepancy is found. Then the procedure
 305 prepares to read the next line from |change_file|.
 306
 307 When a match is found, the current section is marked as changed unless
 308 the first line after the \.{@@x} and after the \.{@@y} both start with
 309 either |'@@*'| or |'@@ '| (possibly preceded by whitespace).
 310
 311 This procedure is called only when |buffer<limit|, i.e., when the
 312 current line is nonempty.
 313
 314 @d if_section_start_make_pending(b) {@+*limit='!';
 315   for (loc=buffer;xisspace(*loc);loc++) ;
 316   *limit=' ';
 317   if (*loc=='@@' && (xisspace(*(loc+1)) || *(loc+1)=='*')) change_pending=b;
 318 }
 319
 320 @c
 321 void
 322 check_change() /* switches to |change_file| if the buffers match */
 323 {
 324   int n=0; /* the number of discrepancies found */
 325   if (lines_dont_match) return;
 326   change_pending=0;
 327   if (!changed_section[section_count]) {
 328     if_section_start_make_pending(1);
 329     if (!change_pending) changed_section[section_count]=1;
 330   }
 331   while (1) {
 332     changing=1; print_where=1; change_line++;
 333     if (!input_ln(change_file)) {
 334       err_print("! Change file ended before @@y");
 335 @.Change file ended...@>
 336       change_limit=change_buffer; changing=0;
 337       return;
 338     }
 339     if (limit>buffer+1 && buffer[0]=='@@') {
 340       char xyz_code=xisupper(buffer[1])? tolower(buffer[1]): buffer[1];
 341       @<If the current line starts with \.{@@y},
 342         report any discrepancies and |return|@>;
 343     }
 344     @<Move |buffer| and |limit|...@>;
 345     changing=0; cur_line++;
 346     while (!input_ln(cur_file)) { /* pop the stack or quit */
 347       if (include_depth==0) {
 348         err_print("! CWEB file ended during a change");
 349 @.CWEB file ended...@>
 350         input_has_ended=1; return;
 351       }
 352       include_depth--; cur_line++;
 353     }
 354     if (lines_dont_match) n++;
 355   }
 356 }
 357
 358 @ @<If the current line starts with \.{@@y}...@>=
 359 if (xyz_code=='x' || xyz_code=='z') {
 360   loc=buffer+2; err_print("! Where is the matching @@y?");
 361 @.Where is the match...@>
 362   }
 363 else if (xyz_code=='y') {
 364   if (n>0) {
 365     loc=buffer+2;
 366     printf("\n! Hmm... %d ",n);
 367     err_print("of the preceding lines failed to match");
 368 @.Hmm... n of the preceding...@>
 369   }
 370   change_depth=include_depth;
 371   return;
 372 }
 373
 374 @ The |reset_input| procedure, which gets \.{CWEB} ready to read the
 375 user's \.{CWEB} input, is used at the beginning of phase one of \.{CTANGLE},
 376 phases one and two of \.{CWEAVE}.
 377
 378 @c
 379 void
 380 reset_input()
 381 {
 382   limit=buffer; loc=buffer+1; buffer[0]=' ';
 383   @<Open input files@>;
 384   include_depth=0; cur_line=0; change_line=0;
 385   change_depth=include_depth;
 386   changing=1; prime_the_change_buffer(); changing=!changing;
 387   limit=buffer; loc=buffer+1; buffer[0]=' '; input_has_ended=0;
 388 }
 389
 390 @ The following code opens the input files.
 391 @^system dependencies@>
 392
 393 @<Open input files@>=
 394 if ((web_file=fopen(web_file_name,"r"))==NULL) {
 395   strcpy(web_file_name,alt_web_file_name);
 396   if ((web_file=fopen(web_file_name,"r"))==NULL)
 397        fatal("! Cannot open input file ", web_file_name);
 398 }
 399 @.Cannot open input file@>
 400 @.Cannot open change file@>
 401 web_file_open=1;
 402 if ((change_file=fopen(change_file_name,"r"))==NULL)
 403        fatal("! Cannot open change file ", change_file_name);
 404
 405 @ The |get_line| procedure is called when |loc>limit|; it puts the next
 406 line of merged input into the buffer and updates the other variables
 407 appropriately. A space is placed at the right end of the line.
 408 This procedure returns |!input_has_ended| because we often want to
 409 check the value of that variable after calling the procedure.
 410
 411 If we've just changed from the |cur_file| to the |change_file|, or if
 412 the |cur_file| has changed, we tell \.{CTANGLE} to print this
 413 information in the \CEE/ file by means of the |print_where| flag.
 414
 415 @d max_sections 2000 /* number of identifiers, strings, section names;
 416   must be less than 10240 */
 417
 418 @<Defin...@>=
 419 typedef unsigned short sixteen_bits;
 420 sixteen_bits section_count; /* the current section number */
 421 boolean changed_section[max_sections]; /* is the section changed? */
 422 boolean change_pending; /* if the current change is not yet recorded in
 423   |changed_section[section_count]| */
 424 boolean print_where=0; /* should \.{CTANGLE} print line and file info? */
 425
 426 @ @c
 427 int get_line() /* inputs the next line */
 428 {
 429   restart:
 430   if (changing && include_depth==change_depth)
 431    @<Read from |change_file| and maybe turn off |changing|@>;
 432   if (! changing || include_depth>change_depth) {
 433     @<Read from |cur_file| and maybe turn on |changing|@>;
 434     if (changing && include_depth==change_depth) goto restart;
 435   }
 436   if (input_has_ended) return 0;
 437   loc=buffer; *limit=' ';
 438   if (buffer[0]=='@@' && (buffer[1]=='i' || buffer[1]=='I')) {
 439     loc=buffer+2; *limit='"';
 440     while (*loc==' '||*loc=='\t') loc++;
 441     if (loc>=limit) {
 442       err_print("! Include file name not given");
 443 @.Include file name ...@>
 444       goto restart;
 445     }
 446     if (include_depth>=max_include_depth-1) {
 447       err_print("! Too many nested includes");
 448 @.Too many nested includes@>
 449       goto restart;
 450     }
 451     include_depth++; /* push input stack */
 452     @<Try to open include file, abort push if unsuccessful, go to |restart|@>;
 453   }
 454   return 1;
 455 }
 456
 457 @ When an \.{@@i} line is found in the |cur_file|, we must temporarily
 458 stop reading it and start reading from the named include file.  The
 459 \.{@@i} line should give a complete file name with or without
 460 double quotes.
 461 If the environment variable \.{CWEBINPUTS} is set, or if the compiler flag
 462 of the same name was defined at compile time,
 463 \.{CWEB} will look for include files in the directory thus named, if
 464 it cannot find them in the current directory.
 465 (Colon-separated paths are not supported.)
 466 The remainder of the \.{@@i} line after the file name is ignored.
 467
 468 @d too_long() {include_depth--;
 469         err_print("! Include file name too long"); goto restart;}
 470
 471 @<Include...@>=
 472 #include <stdlib.h> /* declaration of |getenv| and |exit| */
 473
 474 @ @<Try to open...@>= {
 475   char temp_file_name[max_file_name_length];
 476   char *cur_file_name_end=cur_file_name+max_file_name_length-1;
 477   char *k=cur_file_name, *kk;
 478   int l; /* length of file name */
 479
 480   if (*loc=='"') {
 481     loc++;
 482     while (*loc!='"' && k<=cur_file_name_end) *k++=*loc++;
 483     if (loc==limit) k=cur_file_name_end+1; /* unmatched quote is `too long' */
 484   } else
 485     while (*loc!=' '&&*loc!='\t'&&*loc!='"'&&k<=cur_file_name_end) *k++=*loc++;
 486   if (k>cur_file_name_end) too_long();
 487 @.Include file name ...@>
 488   *k='\0';
 489   if ((cur_file=fopen(cur_file_name,"r"))!=NULL) {
 490     cur_line=0; print_where=1;
 491     goto restart; /* success */
 492   }
 493   kk=getenv("CWEBINPUTS");
 494   if (kk!=NULL) {
 495     if ((l=strlen(kk))>max_file_name_length-2) too_long();
 496     strcpy(temp_file_name,kk);
 497   }
 498   else {
 499 #ifdef CWEBINPUTS
 500     if ((l=strlen(CWEBINPUTS))>max_file_name_length-2) too_long();
 501     strcpy(temp_file_name,CWEBINPUTS);
 502 #else
 503     l=0;
 504 #endif /* |CWEBINPUTS| */
 505   }
 506   if (l>0) {
 507     if (k+l+2>=cur_file_name_end)  too_long();
 508 @.Include file name ...@>
 509     for (; k>= cur_file_name; k--) *(k+l+1)=*k;
 510     strcpy(cur_file_name,temp_file_name);
 511     cur_file_name[l]='/'; /* \UNIX/ pathname separator */
 512     if ((cur_file=fopen(cur_file_name,"r"))!=NULL) {
 513       cur_line=0; print_where=1;
 514       goto restart; /* success */
 515     }
 516   }
 517   include_depth--; err_print("! Cannot open include file"); goto restart;
 518 }
 519
 520 @ @<Read from |cur_file|...@>= {
 521   cur_line++;
 522   while (!input_ln(cur_file)) { /* pop the stack or quit */
 523     print_where=1;
 524     if (include_depth==0) {input_has_ended=1; break;}
 525     else {
 526       fclose(cur_file); include_depth--;
 527       if (changing && include_depth==change_depth) break;
 528       cur_line++;
 529     }
 530   }
 531   if (!changing && !input_has_ended)
 532    if (limit-buffer==change_limit-change_buffer)
 533     if (buffer[0]==change_buffer[0])
 534       if (change_limit>change_buffer) check_change();
 535 }
 536
 537 @ @<Read from |change_file|...@>= {
 538   change_line++;
 539   if (!input_ln(change_file)) {
 540     err_print("! Change file ended without @@z");
 541 @.Change file ended...@>
 542     buffer[0]='@@'; buffer[1]='z'; limit=buffer+2;
 543   }
 544   if (limit>buffer) { /* check if the change has ended */
 545     if (change_pending) {
 546       if_section_start_make_pending(0);
 547       if (change_pending) {
 548         changed_section[section_count]=1; change_pending=0;
 549       }
 550     }
 551     *limit=' ';
 552     if (buffer[0]=='@@') {
 553       if (xisupper(buffer[1])) buffer[1]=tolower(buffer[1]);
 554       if (buffer[1]=='x' || buffer[1]=='y') {
 555         loc=buffer+2;
 556         err_print("! Where is the matching @@z?");
 557 @.Where is the match...@>
 558       }
 559       else if (buffer[1]=='z') {
 560         prime_the_change_buffer(); changing=!changing; print_where=1;
 561       }
 562     }
 563   }
 564 }
 565
 566 @ At the end of the program, we will tell the user if the change file
 567 had a line that didn't match any relevant line in |web_file|.
 568
 569 @c
 570 void
 571 check_complete(){
 572   if (change_limit!=change_buffer) { /* |changing| is 0 */
 573     strncpy(buffer,change_buffer,change_limit-change_buffer+1);
 574     limit=buffer+(int)(change_limit-change_buffer);
 575     changing=1; change_depth=include_depth; loc=buffer;
 576     err_print("! Change file entry did not match");
 577 @.Change file entry did not match@>
 578   }
 579 }
 580
 581 @** Storage of names and strings.
 582 Both \.{CWEAVE} and \.{CTANGLE} store identifiers, section names and
 583 other strings in a large array of |char|s, called |byte_mem|.
 584 Information about the names is kept in the array |name_dir|, whose
 585 elements are structures of type |name_info|, containing a pointer into
 586 the |byte_mem| array (the address where the name begins) and other data.
 587 A |name_pointer| variable is a pointer into |name_dir|.
 588
 589 @d max_bytes 90000 /* the number of bytes in identifiers,
 590   index entries, and section names; must be less than $2^{24}$ */
 591 @d max_names 4000 /* number of identifiers, strings, section names;
 592   must be less than 10240 */
 593
 594 @<Definitions that...@>=
 595 typedef struct name_info {
 596   char *byte_start; /* beginning of the name in |byte_mem| */
 597   @<More elements of |name_info| structure@>@;
 598 } name_info; /* contains information about an identifier or section name */
 599 typedef name_info *name_pointer; /* pointer into array of |name_info|s */
 600 char byte_mem[max_bytes]; /* characters of names */
 601 char *byte_mem_end = byte_mem+max_bytes-1; /* end of |byte_mem| */
 602 name_info name_dir[max_names]; /* information about names */
 603 name_pointer name_dir_end = name_dir+max_names-1; /* end of |name_dir| */
 604
 605 @ The actual sequence of characters in the name pointed to by a |name_pointer
 606 p| appears in positions |p->byte_start| to |(p+1)->byte_start-1|, inclusive.
 607 The |print_id| macro prints this text on the user's terminal.
 608
 609 @d length(c) (c+1)->byte_start-(c)->byte_start /* the length of a name */
 610 @d print_id(c) term_write((c)->byte_start,length((c))) /* print identifier */
 611
 612 @ The first unused position in |byte_mem| and |name_dir| is
 613 kept in |byte_ptr| and |name_ptr|, respectively.  Thus we
 614 usually have |name_ptr->byte_start==byte_ptr|, and certainly
 615 we want to keep |name_ptr<=name_dir_end| and |byte_ptr<=byte_mem_end|.
 616
 617 @<Defini...@>=
 618 name_pointer name_ptr; /* first unused position in |byte_start| */
 619 char *byte_ptr; /* first unused position in |byte_mem| */
 620
 621 @ @<Init...@>=
 622 name_dir->byte_start=byte_ptr=byte_mem; /* position zero in both arrays */
 623 name_ptr=name_dir+1; /* |name_dir[0]| will be used only for error recovery */
 624 name_ptr->byte_start=byte_mem; /* this makes name 0 of length zero */
 625
 626 @ The names of identifiers are found by computing a hash address |h| and
 627 then looking at strings of bytes signified by the |name_pointer|s
 628 |hash[h]|, |hash[h]->link|, |hash[h]->link->link|, \dots,
 629 until either finding the desired name or encountering the null pointer.
 630
 631 @<More elements of |name...@>=
 632 struct name_info *link;
 633
 634 @ The hash table itself
 635 consists of |hash_size| entries of type |name_pointer|, and is
 636 updated by the |id_lookup| procedure, which finds a given identifier
 637 and returns the appropriate |name_pointer|. The matching is done by the
 638 function |names_match|, which is slightly different in
 639 \.{CWEAVE} and \.{CTANGLE}.  If there is no match for the identifier,
 640 it is inserted into the table.
 641
 642 @d hash_size 353 /* should be prime */
 643
 644 @<Defini...@>=
 645 typedef name_pointer *hash_pointer;
 646 name_pointer hash[hash_size]; /* heads of hash lists */
 647 hash_pointer hash_end = hash+hash_size-1; /* end of |hash| */
 648 hash_pointer h; /* index into hash-head array */
 649
 650 @ @<Predec...@>=
 651 extern int names_match();
 652
 653 @ Initially all the hash lists are empty.
 654
 655 @<Init...@>=
 656 for (h=hash; h<=hash_end; *h++=NULL) ;
 657
 658 @ Here is the main procedure for finding identifiers:
 659
 660 @c
 661 name_pointer
 662 id_lookup(first,last,t) /* looks up a string in the identifier table */
 663 char *first; /* first character of string */
 664 char *last; /* last character of string plus one */
 665 char t; /* the |ilk|; used by \.{CWEAVE} only */
 666 {
 667   char *i=first; /* position in |buffer| */
 668   int h; /* hash code */
 669   int l; /* length of the given identifier */
 670   name_pointer p; /* where the identifier is being sought */
 671   if (last==NULL) for (last=first; *last!='\0'; last++);
 672   l=last-first; /* compute the length */
 673   @<Compute the hash code |h|@>;
 674   @<Compute the name location |p|@>;
 675   if (p==name_ptr) @<Enter a new name into the table at position |p|@>;
 676   return(p);
 677 }
 678
 679 @ A simple hash code is used: If the sequence of
 680 character codes is $c_1c_2\ldots c_n$, its hash value will be
 681 $$(2^{n-1}c_1+2^{n-2}c_2+\cdots+c_n)\,\bmod\,|hash_size|.$$
 682
 683 @<Compute the hash...@>=
 684 h=(unsigned char)*i;
 685 while (++i<last) h=(h+h+(int)((unsigned char)*i)) % hash_size;
 686 @^high-bit character handling@>
 687
 688 @ If the identifier is new, it will be placed in position |p=name_ptr|,
 689 otherwise |p| will point to its existing location.
 690
 691 @<Compute the name location...@>=
 692 p=hash[h];
 693 while (p && !names_match(p,first,l,t)) p=p->link;
 694 if (p==NULL) {
 695   p=name_ptr; /* the current identifier is new */
 696   p->link=hash[h]; hash[h]=p; /* insert |p| at beginning of hash list */
 697 }
 698
 699 @ The information associated with a new identifier must be initialized
 700 in a slightly different way in \.{CWEAVE} than in \.{CTANGLE}; hence the
 701 |init_p| procedure.
 702
 703 @<Pred...@>=
 704 void init_p();
 705
 706 @ @<Enter a new name...@>= {
 707   if (byte_ptr+l>byte_mem_end) overflow("byte memory");
 708   if (name_ptr>=name_dir_end) overflow("name");
 709   strncpy(byte_ptr,first,l);
 710   (++name_ptr)->byte_start=byte_ptr+=l;
 711   if (program==cweave) init_p(p,t);
 712 }
 713
 714 @ The names of sections are stored in |byte_mem| together
 715 with the identifier names, but a hash table is not used for them because
 716 \.{CTANGLE} needs to be able to recognize a section name when given a prefix of
 717 that name. A conventional binary search tree is used to retrieve section names,
 718 with fields called |llink| and |rlink| (where |llink| takes the place
 719 of |link|).  The root of this tree is stored in |name_dir->rlink|;
 720 this will be the only information in |name_dir[0]|.
 721
 722 Since the space used by |rlink| has a different function for
 723 identifiers than for section names, we declare it as a |union|.
 724
 725 @d llink link /* left link in binary search tree for section names */
 726 @d rlink dummy.Rlink /* right link in binary search tree for section names */
 727 @d root name_dir->rlink /* the root of the binary search tree
 728   for section names */
 729
 730 @<More elements of |name...@>=
 731 union {
 732   struct name_info *Rlink; /* right link in binary search tree for section
 733     names */
 734   char Ilk; /* used by identifiers in \.{CWEAVE} only */
 735 } dummy;
 736
 737 @ @<Init...@>=
 738 root=NULL; /* the binary search tree starts out with nothing in it */
 739
 740 @ If |p| is a |name_pointer| variable, as we have seen,
 741 |p->byte_start| is the beginning of the area where the name
 742 corresponding to |p| is stored.  However, if |p| refers to a section
 743 name, the name may need to be stored in chunks, because it may
 744 ``grow'': a prefix of the section name may be encountered before
 745 the full name.  Furthermore we need to know the length of the shortest
 746 prefix of the name that was ever encountered.
 747
 748 We solve this problem by inserting two extra bytes at |p->byte_start|,
 749 representing the length of the shortest prefix, when |p| is a
 750 section name. Furthermore, the last byte of the name will be a blank
 751 space if |p| is a prefix. In the latter case, the name pointer
 752 |p+1| will allow us to access additional chunks of the name:
 753 The second chunk will begin at the name pointer |(p+1)->link|,
 754 and if it too is a prefix (ending with blank) its |link| will point
 755 to additional chunks in the same way. Null links are represented by
 756 |name_dir|.
 757
 758 @d first_chunk(p)  ((p)->byte_start+2)
 759 @d prefix_length(p) (int)((unsigned char)*((p)->byte_start)*256 +
 760                 (unsigned char)*((p)->byte_start+1))
 761 @d set_prefix_length(p,m) (*((p)->byte_start)=(m)/256,
 762                  *((p)->byte_start+1)=(m)%256)
 763
 764 @c
 765 void
 766 print_section_name(p)
 767 name_pointer p;
 768 {
 769   char *ss, *s = first_chunk(p);
 770   name_pointer q = p+1;
 771   while (p!=name_dir) {
 772     ss = (p+1)->byte_start-1;
 773     if (*ss==' ' && ss>=s) {
 774       term_write(s,ss-s); p=q->link; q=p;
 775     } else {
 776       term_write(s,ss+1-s); p=name_dir; q=NULL;
 777     }
 778     s = p->byte_start;
 779   }
 780   if (q) term_write("...",3); /* complete name not yet known */
 781 }
 782
 783 @ @c
 784 void
 785 sprint_section_name(dest,p)
 786   char*dest;
 787   name_pointer p;
 788 {
 789   char *ss, *s = first_chunk(p);
 790   name_pointer q = p+1;
 791   while (p!=name_dir) {
 792     ss = (p+1)->byte_start-1;
 793     if (*ss==' ' && ss>=s) {
 794       p=q->link; q=p;
 795     } else {
 796       ss++; p=name_dir;
 797     }
 798     strncpy(dest,s,ss-s), dest+=ss-s;
 799     s = p->byte_start;
 800   }
 801   *dest='\0';
 802 }
 803
 804 @ @c
 805 void
 806 print_prefix_name(p)
 807 name_pointer p;
 808 {
 809   char *s = first_chunk(p);
 810   int l = prefix_length(p);
 811   term_write(s,l);
 812   if (s+l<(p+1)->byte_start) term_write("...",3);
 813 }
 814
 815 @ When we compare two section names, we'll need a function analogous to
 816 |strcmp|. But we do not assume the strings
 817 are null-terminated, and we keep an eye open for prefixes and extensions.
 818
 819 @d less 0 /* the first name is lexicographically less than the second */
 820 @d equal 1 /* the first name is equal to the second */
 821 @d greater 2 /* the first name is lexicographically greater than the second */
 822 @d prefix 3 /* the first name is a proper prefix of the second */
 823 @d extension 4 /* the first name is a proper extension of the second */
 824
 825 @c
 826 int web_strcmp(j,j_len,k,k_len) /* fuller comparison than |strcmp| */
 827   char *j, *k; /* beginning of first and second strings */
 828   int j_len, k_len; /* length of strings */
 829 {
 830   char *j1=j+j_len, *k1=k+k_len;
 831   while (k<k1 && j<j1 && *j==*k) k++, j++;
 832   if (k==k1) if (j==j1) return equal;
 833     else return extension;
 834   else if (j==j1) return prefix;
 835   else if (*j<*k) return less;
 836   else return greater;
 837 }
 838
 839 @ Adding a section name to the tree is straightforward if we know its
 840 parent and whether it's the |rlink| or |llink| of the parent.  As a
 841 special case, when the name is the first section being added, we set the
 842 ``parent'' to |NULL|.  When a section name is created, it has only one
 843 chunk, which however may be just a prefix; the full name will
 844 hopefully be unveiled later.  Obviously, |prefix_length| starts
 845 out as the length of the first chunk, though it may decrease later.
 846
 847 The information associated with a new node must be initialized
 848 differently in \.{CWEAVE} and \.{CTANGLE}; hence the
 849 |init_node| procedure, which is defined differently in \.{cweave.w}
 850 and \.{ctangle.w}.
 851
 852 @<Prede...@>=
 853 extern void init_node();
 854
 855 @ @c
 856 name_pointer
 857 add_section_name(par,c,first,last,ispref) /* install a new node in the tree */
 858 name_pointer par; /* parent of new node */
 859 int c; /* right or left? */
 860 char *first; /* first character of section name */
 861 char *last; /* last character of section name, plus one */
 862 int ispref; /* are we adding a prefix or a full name? */
 863 {
 864   name_pointer p=name_ptr; /* new node */
 865   char *s=first_chunk(p);
 866   int name_len=last-first+ispref; /* length of section name */
 867   if (s+name_len>byte_mem_end) overflow("byte memory");
 868   if (name_ptr+1>=name_dir_end) overflow("name");
 869   (++name_ptr)->byte_start=byte_ptr=s+name_len;
 870   if (ispref) {
 871     *(byte_ptr-1)=' ';
 872     name_len--;
 873     name_ptr->link=name_dir;
 874     (++name_ptr)->byte_start=byte_ptr;
 875   }
 876   set_prefix_length(p,name_len);
 877   strncpy(s,first,name_len);
 878   p->llink=NULL;
 879   p->rlink=NULL;
 880   init_node(p);
 881   return par==NULL ? (root=p) : c==less ? (par->llink=p) : (par->rlink=p);
 882 }
 883
 884 @ @c
 885 void
 886 extend_section_name(p,first,last,ispref)
 887 name_pointer p; /* name to be extended */
 888 char *first; /* beginning of extension text */
 889 char *last; /* one beyond end of extension text */
 890 int ispref; /* are we adding a prefix or a full name? */
 891 {
 892   char *s;
 893   name_pointer q=p+1;
 894   int name_len=last-first+ispref;
 895   if (name_ptr>=name_dir_end) overflow("name");
 896   while (q->link!=name_dir) q=q->link;
 897   q->link=name_ptr;
 898   s=name_ptr->byte_start;
 899   name_ptr->link=name_dir;
 900   if (s+name_len>byte_mem_end) overflow("byte memory");
 901   (++name_ptr)->byte_start=byte_ptr=s+name_len;
 902   strncpy(s,first,name_len);
 903   if (ispref) *(byte_ptr-1)=' ';
 904 }
 905
 906 @ The |section_lookup| procedure is supposed to find a
 907 section name that matches a new name, installing the new name if
 908 it doesn't match an existing one. The new name is the string
 909 between |first| and |last|; a ``match'' means that the new name
 910 exactly equals or is a prefix or extension of a name in the tree.
 911
 912 @c
 913 name_pointer
 914 section_lookup(first,last,ispref) /* find or install section name in tree */
 915 char *first, *last; /* first and last characters of new name */
 916 int ispref; /* is the new name a prefix or a full name? */
 917 {
 918   int c=0; /* comparison between two names; initialized so some compilers won't complain */
 919   name_pointer p=root; /* current node of the search tree */
 920   name_pointer q=NULL; /* another place to look in the tree */
 921   name_pointer r=NULL; /* where a match has been found */
 922   name_pointer par=NULL; /* parent of |p|, if |r| is |NULL|;
 923             otherwise parent of |r| */
 924   int name_len=last-first+1;
 925   @<Look for matches for new name among shortest prefixes, complaining
 926         if more than one is found@>;
 927   @<If no match found, add new name to tree@>;
 928   @<If one match found, check for compatibility and return match@>;
 929 }
 930
 931 @ A legal new name matches an existing section name if and only if it
 932 matches the shortest prefix of that section name.  Therefore we can
 933 limit our search for matches to shortest prefixes, which eliminates
 934 the need for chunk-chasing at this stage.
 935
 936 @<Look for matches for new name among...@>=
 937 while (p) { /* compare shortest prefix of |p| with new name */
 938   c=web_strcmp(first,name_len,first_chunk(p),prefix_length(p));
 939   if (c==less || c==greater) { /* new name does not match |p| */
 940     if (r==NULL) /* no previous matches have been found */
 941       par=p;
 942     p=(c==less?p->llink:p->rlink);
 943   } else { /* new name matches |p| */
 944     if (r!=NULL) {  /* and also |r|: illegal */
 945       printf("\n! Ambiguous prefix: matches <");
 946 @.Ambiguous prefix ... @>
 947       print_prefix_name(p);
 948       printf(">\n and <");
 949       print_prefix_name(r);
 950       err_print(">");
 951       return name_dir; /* the unsection */
 952     }
 953     r=p; /* remember match */
 954     p=p->llink; /* try another */
 955     q=r->rlink; /* we'll get back here if the new |p| doesn't match */
 956   }
 957   if (p==NULL)
 958     p=q, q=NULL; /* |q| held the other branch of |r| */
 959 }
 960
 961 @ @<If no match ...@>=
 962   if (r==NULL) /* no matches were found */
 963     return add_section_name(par,c,first,last+1,ispref);
 964
 965 @ Although error messages are given in anomalous cases, we do return the
 966 unique best match when a discrepancy is found, because users often
 967 change a title in one place while forgetting to change it elsewhere.
 968
 969 @<If one match found, check for compatibility and return match@>=
 970 switch(section_name_cmp(&first,name_len,r)) {
 971               /* compare all of |r| with new name */
 972   case prefix:
 973     if (!ispref) {
 974       printf("\n! New name is a prefix of <");
 975 @.New name is a prefix...@>
 976       print_section_name(r);
 977       err_print(">");
 978     }
 979     else if (name_len<prefix_length(r)) set_prefix_length(r,name_len);
 980     /* fall through */
 981   case equal: return r;
 982   case extension: if (!ispref || first<=last)
 983         extend_section_name(r,first,last+1,ispref);
 984       return r;
 985   case bad_extension:
 986       printf("\n! New name extends <");
 987 @.New name extends...@>
 988       print_section_name(r);
 989       err_print(">");
 990     return r;
 991   default: /* no match: illegal */
 992     printf("\n! Section name incompatible with <");
 993 @.Section name incompatible...@>
 994     print_prefix_name(r);
 995     printf(">,\n which abbreviates <");
 996     print_section_name(r);
 997     err_print(">");
 998     return r;
 999 }
1000
1001 @ The return codes of |section_name_cmp|, which compares a string with
1002 the full name of a section, are those of |web_strcmp| plus
1003 |bad_extension|, used when the string is an extension of a
1004 supposedly already complete section name.  This function has a side
1005 effect when the comparison string is an extension: It advances the
1006 address of the first character of the string by an amount equal to
1007 the length of the known part of the section name.
1008
1009 The name \.{@@<foo...@@>} should be an acceptable ``abbreviation''
1010 for \.{@@<foo@@>}. If such an abbreviation comes after the complete
1011 name, there's no trouble recognizing it. If it comes before the
1012 complete name, we simply append a null chunk. This logic requires
1013 us to regard \.{@@<foo...@@>} as an ``extension'' of itself.
1014
1015 @d bad_extension 5
1016
1017 @<Predec...@>=
1018 int section_name_cmp();
1019
1020 @ @c
1021 int section_name_cmp(pfirst,len,r)
1022 char **pfirst; /* pointer to beginning of comparison string */
1023 int len; /* length of string */
1024 name_pointer r; /* section name being compared */
1025 {
1026   char *first=*pfirst; /* beginning of comparison string */
1027   name_pointer q=r+1; /* access to subsequent chunks */
1028   char *ss, *s=first_chunk(r);
1029   int c; /* comparison */
1030   int ispref; /* is chunk |r| a prefix? */
1031   while (1) {
1032     ss=(r+1)->byte_start-1;
1033     if (*ss==' ' && ss>=r->byte_start) ispref=1,q=q->link;
1034     else ispref=0,ss++,q=name_dir;
1035     switch(c=web_strcmp(first,len,s,ss-s)) {
1036     case equal: if (q==name_dir)
1037         if (ispref) {
1038           *pfirst=first+(ss-s);
1039           return extension; /* null extension */
1040         } else return equal;
1041       else return (q->byte_start==(q+1)->byte_start)? equal: prefix;
1042     case extension:
1043       if (!ispref) return bad_extension;
1044       first += ss-s;
1045       if (q!=name_dir) {len -= ss-s; s=q->byte_start; r=q; continue;}
1046       *pfirst=first; return extension;
1047     default: return c;
1048     }
1049   }
1050 }
1051
1052 @ The last component of |name_info| is different for \.{CTANGLE} and
1053 \.{CWEAVE}.  In \.{CTANGLE}, if |p| is a pointer to a section name,
1054 |p->equiv| is a pointer to its replacement text, an element of the
1055 array |text_info|.  In \.{CWEAVE}, on the other hand, if
1056 |p| points to an identifier, |p->xref| is a pointer to its
1057 list of cross-references, an element of the array |xmem|.  The make-up
1058 of |text_info| and |xmem| is discussed in the \.{CTANGLE} and \.{CWEAVE}
1059 source files, respectively; here we just declare a common field
1060 |equiv_or_xref| as a pointer to a |char|.
1061
1062 @<More elements of |name...@>=
1063 char *equiv_or_xref; /* info corresponding to names */
1064
1065 @** Reporting errors to the user.
1066 A global variable called |history| will contain one of four values
1067 at the end of every run: |spotless| means that no unusual messages were
1068 printed; |harmless_message| means that a message of possible interest
1069 was printed but no serious errors were detected; |error_message| means that
1070 at least one error was found; |fatal_message| means that the program
1071 terminated abnormally. The value of |history| does not influence the
1072 behavior of the program; it is simply computed for the convenience
1073 of systems that might want to use such information.
1074
1075 @d spotless 0 /* |history| value for normal jobs */
1076 @d harmless_message 1 /* |history| value when non-serious info was printed */
1077 @d error_message 2 /* |history| value when an error was noted */
1078 @d fatal_message 3 /* |history| value when we had to stop prematurely */
1079 @d mark_harmless {if (history==spotless) history=harmless_message;}
1080 @d mark_error history=error_message
1081
1082 @<Definit...@>=
1083 int history=spotless; /* indicates how bad this run was */
1084
1085 @ The command `|err_print("! Error message")|' will report a syntax error to
1086 the user, by printing the error message at the beginning of a new line and
1087 then giving an indication of where the error was spotted in the source file.
1088 Note that no period follows the error message, since the error routine
1089 will automatically supply a period. A newline is automatically supplied
1090 if the string begins with |"!"|.
1091
1092 @<Predecl...@>=
1093 void  err_print();
1094
1095 @ @c
1096 void
1097 err_print(s) /* prints `\..' and location of error message */
1098 char *s;
1099 {
1100   char *k,*l; /* pointers into |buffer| */
1101   printf(*s=='!'? "\n%s" : "%s",s);
1102   if(web_file_open) @<Print error location based on input buffer@>;
1103   update_terminal; mark_error;
1104 }
1105
1106 @ The error locations can be indicated by using the global variables
1107 |loc|, |cur_line|, |cur_file_name| and |changing|,
1108 which tell respectively the first
1109 unlooked-at position in |buffer|, the current line number, the current
1110 file, and whether the current line is from |change_file| or |cur_file|.
1111 This routine should be modified on systems whose standard text editor
1112 has special line-numbering conventions.
1113 @^system dependencies@>
1114
1115 @<Print error location based on input buffer@>=
1116 {if (changing && include_depth==change_depth)
1117   printf(". (l. %d of change file)\n", change_line);
1118 else if (include_depth==0) printf(". (l. %d)\n", cur_line);
1119   else printf(". (l. %d of include file %s)\n", cur_line, cur_file_name);
1120 l= (loc>=limit? limit: loc);
1121 if (l>buffer) {
1122   for (k=buffer; k<l; k++)
1123     if (*k=='\t') putchar(' ');
1124     else putchar(*k); /* print the characters already read */
1125   putchar('\n');
1126   for (k=buffer; k<l; k++) putchar(' '); /* space out the next line */
1127 }
1128 for (k=l; k<limit; k++) putchar(*k); /* print the part not yet read */
1129 if (*limit=='|') putchar('|'); /* end of \CEE/ text in section names */
1130 putchar(' '); /* to separate the message from future asterisks */
1131 }
1132
1133 @ When no recovery from some error has been provided, we have to wrap
1134 up and quit as graciously as possible.  This is done by calling the
1135 function |wrap_up| at the end of the code.
1136
1137 \.{CTANGLE} and \.{CWEAVE} have their own notions about how to
1138 print the job statistics.
1139
1140 @<Prede...@>=
1141 int wrap_up();
1142 extern void print_stats();
1143
1144 @ Some implementations may wish to pass the |history| value to the
1145 operating system so that it can be used to govern whether or not other
1146 programs are started. Here, for instance, we pass the operating system
1147 a status of 0 if and only if only harmless messages were printed.
1148 @^system dependencies@>
1149
1150 @c
1151 int wrap_up() {
1152   putchar('\n');
1153   if (show_stats)
1154     print_stats(); /* print statistics about memory usage */
1155   @<Print the job |history|@>;
1156   if (history > harmless_message) return(1);
1157   else return(0);
1158 }
1159
1160 @ @<Print the job |history|@>=
1161 switch (history) {
1162 case spotless: if (show_happiness) printf("(No errors were found.)\n"); break;
1163 case harmless_message:
1164   printf("(Did you see the warning message above?)\n"); break;
1165 case error_message:
1166   printf("(Pardon me, but I think I spotted something wrong.)\n"); break;
1167 case fatal_message: printf("(That was a fatal error, my friend.)\n");
1168 } /* there are no other cases */
1169
1170 @ When there is no way to recover from an error, the |fatal| subroutine is
1171 invoked. This happens most often when |overflow| occurs.
1172
1173 @<Predec...@>=
1174 void fatal(), overflow();
1175
1176 @ The two parameters to |fatal| are strings that are essentially
1177 concatenated to print the final error message.
1178
1179 @c void
1180 fatal(s,t)
1181   char *s,*t;
1182 {
1183   if (*s) printf(s);
1184   err_print(t);
1185   history=fatal_message; exit(wrap_up());
1186 }
1187
1188 @ An overflow stop occurs if \.{CWEB}'s tables aren't large enough.
1189
1190 @c void
1191 overflow(t)
1192   char *t;
1193 {
1194   printf("\n! Sorry, %s capacity exceeded",t); fatal("","");
1195 }
1196 @.Sorry, capacity exceeded@>
1197
1198 @ Sometimes the program's behavior is far different from what it should be,
1199 and \.{CWEB} prints an error message that is really for the \.{CWEB}
1200 maintenance person, not the user. In such cases the program says
1201 |confusion("indication of where we are")|.
1202
1203 @d confusion(s) fatal("! This can't happen: ",s)
1204 @.This can't happen@>
1205
1206 @** Command line arguments.
1207 The user calls \.{CWEAVE} and \.{CTANGLE} with arguments on the command line.
1208 These are either file names or flags to be turned off (beginning with |"-"|)
1209 or flags to be turned on (beginning with |"+"|).
1210 The following globals are for communicating the user's desires to the rest
1211 of the program. The various file name variables contain strings with
1212 the names of those files. Most of the 128 flags are undefined but available
1213 for future extensions.
1214
1215 @d show_banner flags['b'] /* should the banner line be printed? */
1216 @d show_progress flags['p'] /* should progress reports be printed? */
1217 @d show_stats flags['s'] /* should statistics be printed at end of run? */
1218 @d show_happiness flags['h'] /* should lack of errors be announced? */
1219
1220 @<Defin...@>=
1221 int argc; /* copy of |ac| parameter to |main| */
1222 char **argv; /* copy of |av| parameter to |main| */
1223 char C_file_name[max_file_name_length]; /* name of |C_file| */
1224 char tex_file_name[max_file_name_length]; /* name of |tex_file| */
1225 char idx_file_name[max_file_name_length]; /* name of |idx_file| */
1226 char scn_file_name[max_file_name_length]; /* name of |scn_file| */
1227 boolean flags[128]; /* an option for each 7-bit code */
1228
1229 @ The |flags| will be initially zero. Some of them are set to~1 before
1230 scanning the arguments; if additional flags are 1 by default they
1231 should be set before calling |common_init|.
1232
1233 @<Set the default options common to \.{CTANGLE} and \.{CWEAVE}@>=
1234 show_banner=show_happiness=show_progress=1;
1235
1236 @ We now must look at the command line arguments and set the file names
1237 accordingly.  At least one file name must be present: the \.{CWEB}
1238 file.  It may have an extension, or it may omit the extension to get |".w"| or
1239 |".web"| added.  The \TEX/ output file name is formed by replacing the \.{CWEB}
1240 file name extension by |".tex"|, and the \CEE/ file name by replacing
1241 the extension by |".c"|, after removing the directory name (if any).
1242
1243 If there is a second file name present among the arguments, it is the
1244 change file, again either with an extension or without one to get |".ch"|.
1245 An omitted change file argument means that |"/dev/null"| should be used,
1246 when no changes are desired.
1247 @^system dependencies@>
1248
1249 If there's a third file name, it will be the output file.
1250
1251 @<Pred...@>=
1252 void scan_args();
1253
1254 @ @c
1255 void
1256 scan_args()
1257 {
1258   char *dot_pos; /* position of |'.'| in the argument */
1259   char *name_pos; /* file name beginning, sans directory */
1260   register char *s; /* register for scanning strings */
1261   boolean found_web=0,found_change=0,found_out=0;
1262              /* have these names been seen? */
1263   boolean flag_change;
1264
1265   while (--argc > 0) {
1266     if ((**(++argv)=='-'||**argv=='+')&&*(*argv+1)) @<Handle flag argument@>@;
1267     else {
1268       s=name_pos=*argv;@+dot_pos=NULL;
1269       while (*s) {
1270         if (*s=='.') dot_pos=s++;
1271         else if (*s=='/') dot_pos=NULL,name_pos=++s;
1272         else s++;
1273       }
1274       if (!found_web) @<Make
1275        |web_file_name|, |tex_file_name|, and |C_file_name|@>@;
1276       else if (!found_change) @<Make |change_file_name| from |fname|@>@;
1277       else if (!found_out) @<Override |tex_file_name| and |C_file_name|@>@;
1278         else @<Print usage error message and quit@>;
1279     }
1280   }
1281   if (!found_web) @<Print usage error message and quit@>;
1282   if (found_change<=0) strcpy(change_file_name,"/dev/null");
1283 }
1284
1285 @ We use all of |*argv| for the |web_file_name| if there is a |'.'| in it,
1286 otherwise we add |".w"|. If this file can't be opened, we prepare an
1287 |alt_web_file_name| by adding |"web"| after the dot.
1288 The other file names come from adding other things
1289 after the dot.  We must check that there is enough room in
1290 |web_file_name| and the other arrays for the argument.
1291
1292 @<Make |web_file_name|...@>=
1293 {
1294   if (s-*argv > max_file_name_length-5)
1295     @<Complain about argument length@>;
1296   if (dot_pos==NULL)
1297     sprintf(web_file_name,"%s.w",*argv);
1298   else {
1299     strcpy(web_file_name,*argv);
1300     *dot_pos=0; /* string now ends where the dot was */
1301   }
1302   sprintf(alt_web_file_name,"%s.web",*argv);
1303   sprintf(tex_file_name,"%s.tex",name_pos); /* strip off directory name */
1304   sprintf(idx_file_name,"%s.idx",name_pos);
1305   sprintf(scn_file_name,"%s.scn",name_pos);
1306   sprintf(C_file_name,"%s.c",name_pos);
1307   found_web=1;
1308 }
1309
1310 @ @<Make |change_file_name|...@>=
1311 {
1312   if (strcmp(*argv,"-")==0) found_change=-1;
1313   else {
1314     if (s-*argv > max_file_name_length-4)
1315       @<Complain about argument length@>;
1316     if (dot_pos==NULL)
1317       sprintf(change_file_name,"%s.ch",*argv);
1318     else strcpy(change_file_name,*argv);
1319     found_change=1;
1320   }
1321 }
1322
1323 @ @<Override...@>=
1324 {
1325   if (s-*argv > max_file_name_length-5)
1326     @<Complain about argument length@>;
1327   if (dot_pos==NULL) {
1328     sprintf(tex_file_name,"%s.tex",*argv);
1329     sprintf(idx_file_name,"%s.idx",*argv);
1330     sprintf(scn_file_name,"%s.scn",*argv);
1331     sprintf(C_file_name,"%s.c",*argv);
1332   } else {
1333     strcpy(tex_file_name,*argv);
1334     strcpy(C_file_name,*argv);
1335     if (flags['x']) { /* indexes will be generated */
1336       *dot_pos=0;
1337       sprintf(idx_file_name,"%s.idx",*argv);
1338       sprintf(scn_file_name,"%s.scn",*argv);
1339     }
1340   }
1341   found_out=1;
1342 }
1343
1344 @ @<Handle flag...@>=
1345 {
1346   if (**argv=='-') flag_change=0;
1347   else flag_change=1;
1348   for(dot_pos=*argv+1;*dot_pos>'\0';dot_pos++)
1349     flags[*dot_pos]=flag_change;
1350 }
1351
1352 @ @<Print usage error message and quit@>=
1353 {
1354 if (program==ctangle)
1355   fatal(
1356 "! Usage: ctangle [options] webfile[.w] [{changefile[.ch]|-} [outfile[.c]]]\n"
1357    ,"");
1358 @.Usage:@>
1359 else fatal(
1360 "! Usage: cweave [options] webfile[.w] [{changefile[.ch]|-} [outfile[.tex]]]\n"
1361    ,"");
1362 }
1363
1364 @ @<Complain about arg...@>= fatal("! Filename too long\n", *argv);
1365 @.Filename too long@>
1366
1367 @** Output. Here is the code that opens the output file:
1368 @^system dependencies@>
1369
1370 @<Defin...@>=
1371 FILE *C_file; /* where output of \.{CTANGLE} goes */
1372 FILE *tex_file; /* where output of \.{CWEAVE} goes */
1373 FILE *idx_file; /* where index from \.{CWEAVE} goes */
1374 FILE *scn_file; /* where list of sections from \.{CWEAVE} goes */
1375 FILE *active_file; /* currently active file for \.{CWEAVE} output */
1376
1377 @ @<Scan arguments and open output files@>=
1378 scan_args();
1379 if (program==ctangle) {
1380   if ((C_file=fopen(C_file_name,"w"))==NULL)
1381     fatal("! Cannot open output file ", C_file_name);
1382 @.Cannot open output file@>
1383 }
1384 else {
1385   if ((tex_file=fopen(tex_file_name,"w"))==NULL)
1386     fatal("! Cannot open output file ", tex_file_name);
1387 }
1388
1389 @ The |update_terminal| procedure is called when we want
1390 to make sure that everything we have output to the terminal so far has
1391 actually left the computer's internal buffers and been sent.
1392 @^system dependencies@>
1393
1394 @d update_terminal fflush(stdout) /* empty the terminal output buffer */
1395
1396 @ Terminal output uses |putchar| and |putc| when we have to
1397 translate from \.{CWEB}'s code into the external character code,
1398 and |printf| when we just want to print strings.
1399 Several macros make other kinds of output convenient.
1400 @^system dependencies@>
1401 @d new_line putchar('\n') @d putxchar putchar
1402 @d term_write(a,b) fflush(stdout),fwrite(a,sizeof(char),b,stdout)
1403 @d C_printf(c,a) fprintf(C_file,c,a)
1404 @d C_putc(c) putc(c,C_file) /* isn't \CEE/ wonderfully consistent? */
1405
1406 @ We predeclare several standard system functions here instead of including
1407 their system header files, because the names of the header files are not as
1408 standard as the names of the functions. (For example, some \CEE/ environments
1409 have \.{<string.h>} where others have \.{<strings.h>}.)
1410
1411 @<Predecl...@>=
1412 extern int strlen(); /* length of string */
1413 extern int strcmp(); /* compare strings lexicographically */
1414 extern char* strcpy(); /* copy one string to another */
1415 extern int strncmp(); /* compare up to $n$ string characters */
1416 extern char* strncpy(); /* copy up to $n$ string characters */
1417
1418 @** Index.