beta-0.89.2
[luatex.git] / source / texk / web2c / cwebdir / ctangle.w
blob86d0d0a5103ba5fd7f4908a47e89a43572b7e562
1 % This file is part of CWEB.
2 % This program by Silvio Levy and Donald E. Knuth
3 % is based on a program by Knuth.
4 % It is distributed WITHOUT ANY WARRANTY, express or implied.
5 % Version 3.64 --- February 2002
6 % (same as Version 3.5 except for minor corrections)
7 % (also quotes backslashes in file names of #line directives)
9 % Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
11 % Permission is granted to make and distribute verbatim copies of this
12 % document provided that the copyright notice and this permission notice
13 % are preserved on all copies.
15 % Permission is granted to copy and distribute modified versions of this
16 % document under the conditions for verbatim copying, provided that the
17 % entire resulting derived work is given a different name and distributed
18 % under the terms of a permission notice identical to this one.
20 % Here is TeX material that gets inserted after \input cwebmac
21 \def\hang{\hangindent 3em\indent\ignorespaces}
22 \def\pb{$\.|\ldots\.|$} % C brackets (|...|)
23 \def\v{\char'174} % vertical (|) in typewriter font
24 \mathchardef\RA="3221 % right arrow
25 \mathchardef\BA="3224 % double arrow
27 \def\title{CTANGLE (Version 3.64)}
28 \def\topofcontents{\null\vfill
29 \centerline{\titlefont The {\ttitlefont CTANGLE} processor}
30 \vskip 15pt
31 \centerline{(Version 3.64)}
32 \vfill}
33 \def\botofcontents{\vfill
34 \noindent
35 Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
36 \bigskip\noindent
37 Permission is granted to make and distribute verbatim copies of this
38 document provided that the copyright notice and this permission notice
39 are preserved on all copies.
41 \smallskip\noindent
42 Permission is granted to copy and distribute modified versions of this
43 document under the conditions for verbatim copying, provided that the
44 entire resulting derived work is given a different name and distributed
45 under the terms of a permission notice identical to this one.
47 \pageno=\contentspagenumber \advance\pageno by 1
48 \let\maybe=\iftrue
49 @s not_eq normal @q unreserve a C++ keyword @>
51 @** Introduction.
52 This is the \.{CTANGLE} program by Silvio Levy and Donald E. Knuth,
53 based on \.{TANGLE} by Knuth.
54 We are thankful to
55 Nelson Beebe, Hans-Hermann Bode (to whom the \CPLUSPLUS/ adaptation is due),
56 Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
57 Joachim Schrod, Lee Wittenberg, and others who have contributed improvements.
59 The ``banner line'' defined here should be changed whenever \.{CTANGLE}
60 is modified.
62 @d banner "This is CTANGLE (Version 3.64)\n"
65 @<Include files@>@/
67 @<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
68 @<Typedef declarations@>@/
69 @<Global variables@>@/
70 @<Predeclaration of procedures@>@/
72 @ We predeclare several standard system functions here instead of including
73 their system header files, because the names of the header files are not as
74 standard as the names of the functions. (For example, some \CEE/ environments
75 have \.{<string.h>} where others have \.{<strings.h>}.)
77 @<Predecl...@>=
78 extern int strlen(); /* length of string */
79 extern int strcmp(); /* compare strings lexicographically */
80 extern char* strcpy(); /* copy one string to another */
81 extern int strncmp(); /* compare up to $n$ string characters */
82 extern char* strncpy(); /* copy up to $n$ string characters */
84 @ \.{CTANGLE} has a fairly straightforward outline. It operates in
85 two phases: First it reads the source file, saving the \CEE/ code in
86 compressed form; then it shuffles and outputs the code.
88 Please read the documentation for \.{common}, the set of routines common
89 to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
92 int main (ac, av)
93 int ac;
94 char **av;
96 argc=ac; argv=av;
97 program=ctangle;
98 @<Set initial values@>;
99 common_init();
100 if (show_banner) printf(banner); /* print a ``banner line'' */
101 phase_one(); /* read all the user's text and compress it into |tok_mem| */
102 phase_two(); /* output the contents of the compressed tables */
103 return wrap_up(); /* and exit gracefully */
106 @ The following parameters were sufficient in the original \.{TANGLE} to
107 handle \TEX/,
108 so they should be sufficient for most applications of \.{CTANGLE}.
109 If you change |max_bytes|, |max_names|, or |hash_size| you should also
110 change them in the file |"common.w"|.
112 @d max_bytes 90000 /* the number of bytes in identifiers,
113 index entries, and section names; used in |"common.w"| */
114 @d max_toks 270000 /* number of bytes in compressed \CEE/ code */
115 @d max_names 4000 /* number of identifiers, strings, section names;
116 must be less than 10240; used in |"common.w"| */
117 @d max_texts 2500 /* number of replacement texts, must be less than 10240 */
118 @d hash_size 353 /* should be prime; used in |"common.w"| */
119 @d longest_name 10000 /* section names shouldn't be longer than this */
120 @d stack_size 50 /* number of simultaneous levels of macro expansion */
121 @d buf_size 100 /* for \.{CWEAVE} and \.{CTANGLE} */
123 @ The next few sections contain stuff from the file |"common.w"| that must
124 be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
125 file |"common.h"|, which needs to be updated when |"common.w"| changes.
127 @i common.h
129 @* Data structures exclusive to {\tt CTANGLE}.
130 We've already seen that the |byte_mem| array holds the names of identifiers,
131 strings, and sections;
132 the |tok_mem| array holds the replacement texts
133 for sections. Allocation is sequential, since things are deleted only
134 during Phase II, and only in a last-in-first-out manner.
136 A \&{text} variable is a structure containing a pointer into
137 |tok_mem|, which tells where the corresponding text starts, and an
138 integer |text_link|, which, as we shall see later, is used to connect
139 pieces of text that have the same name. All the \&{text}s are stored in
140 the array |text_info|, and we use a |text_pointer| variable to refer
141 to them.
143 The first position of |tok_mem| that is unoccupied by
144 replacement text is called |tok_ptr|, and the first unused location of
145 |text_info| is called |text_ptr|. Thus we usually have the identity
146 |text_ptr->tok_start==tok_ptr|.
148 If your machine does not support |unsigned char| you should change
149 the definition of \&{eight\_bits} to |unsigned short|.
150 @^system dependencies@>
152 @<Typed...@>=
153 typedef struct {
154 eight_bits *tok_start; /* pointer into |tok_mem| */
155 sixteen_bits text_link; /* relates replacement texts */
156 } text;
157 typedef text *text_pointer;
159 @ @<Glob...@>=
160 text text_info[max_texts];
161 text_pointer text_info_end=text_info+max_texts-1;
162 text_pointer text_ptr; /* first unused position in |text_info| */
163 eight_bits tok_mem[max_toks];
164 eight_bits *tok_mem_end=tok_mem+max_toks-1;
165 eight_bits *tok_ptr; /* first unused position in |tok_mem| */
167 @ @<Set init...@>=
168 text_info->tok_start=tok_ptr=tok_mem;
169 text_ptr=text_info+1; text_ptr->tok_start=tok_mem;
170 /* this makes replacement text 0 of length zero */
172 @ If |p| is a pointer to a section name, |p->equiv| is a pointer to its
173 replacement text, an element of the array |text_info|.
175 @d equiv equiv_or_xref /* info corresponding to names */
177 @ @<Set init...@>=
178 name_dir->equiv=(char *)text_info; /* the undefined section has no replacement text */
180 @ Here's the procedure that decides whether a name of length |l|
181 starting at position |first| equals the identifier pointed to by |p|:
184 int names_match(p,first,l)
185 name_pointer p; /* points to the proposed match */
186 char *first; /* position of first character of string */
187 int l; /* length of identifier */
189 if (length(p)!=l) return 0;
190 return !strncmp(first,p->byte_start,l);
193 @ The common lookup routine refers to separate routines |init_node| and
194 |init_p| when the data structure grows. Actually |init_p| is called only by
195 \.{CWEAVE}, but we need to declare a dummy version so that
196 the loader won't complain of its absence.
199 void
200 init_node(node)
201 name_pointer node;
203 node->equiv=(char *)text_info;
205 void
206 init_p() {}
208 @* Tokens.
209 Replacement texts, which represent \CEE/ code in a compressed format,
210 appear in |tok_mem| as mentioned above. The codes in
211 these texts are called `tokens'; some tokens occupy two consecutive
212 eight-bit byte positions, and the others take just one byte.
214 If $p$ points to a replacement text, |p->tok_start| is the |tok_mem| position
215 of the first eight-bit code of that text. If |p->text_link==0|,
216 this is the replacement text for a macro, otherwise it is the replacement
217 text for a section. In the latter case |p->text_link| is either equal to
218 |section_flag|, which means that there is no further text for this section, or
219 |p->text_link| points to a continuation of this replacement text; such
220 links are created when several sections have \CEE/ texts with the same
221 name, and they also tie together all the \CEE/ texts of unnamed sections.
222 The replacement text pointer for the first unnamed section appears in
223 |text_info->text_link|, and the most recent such pointer is |last_unnamed|.
225 @d section_flag max_texts /* final |text_link| in section replacement texts */
227 @<Glob...@>=
228 text_pointer last_unnamed; /* most recent replacement text of unnamed section */
230 @ @<Set init...@>= last_unnamed=text_info; text_info->text_link=0;
232 @ If the first byte of a token is less than |0200|, the token occupies a
233 single byte. Otherwise we make a sixteen-bit token by combining two consecutive
234 bytes |a| and |b|. If |0200<=a<0250|, then |(a-0200)@t${}\times2^8$@>+b|
235 points to an identifier; if |0250<=a<0320|, then
236 |(a-0250)@t${}\times2^8$@>+b| points to a section name
237 (or, if it has the special value |output_defs_flag|,
238 to the area where the preprocessor definitions are stored); and if
239 |0320<=a<0400|, then |(a-0320)@t${}\times2^8$@>+b| is the number of the section
240 in which the current replacement text appears.
242 Codes less than |0200| are 7-bit |char| codes that represent themselves.
243 Some of the 7-bit codes will not be present, however, so we can
244 use them for special purposes. The following symbolic names are used:
246 \yskip \hang |join| denotes the concatenation of adjacent items with no
247 space or line breaks allowed between them (the \.{@@\&} operation of \.{CWEB}).
249 \hang |string| denotes the beginning or end of a string, verbatim
250 construction or numerical constant.
251 @^ASCII code dependencies@>
253 @d string 02 /* takes the place of extended ASCII \.{\char2} */
254 @d join 0177 /* takes the place of ASCII delete */
255 @d output_defs_flag (2*024000-1)
257 @ The following procedure is used to enter a two-byte value into
258 |tok_mem| when a replacement text is being generated.
261 void
262 store_two_bytes(x)
263 sixteen_bits x;
265 if (tok_ptr+2>tok_mem_end) overflow("token");
266 *tok_ptr++=x>>8; /* store high byte */
267 *tok_ptr++=x&0377; /* store low byte */
270 @** Stacks for output. The output process uses a stack to keep track
271 of what is going on at different ``levels'' as the sections are being
272 written out. Entries on this stack have five parts:
274 \yskip\hang |end_field| is the |tok_mem| location where the replacement
275 text of a particular level will end;
277 \hang |byte_field| is the |tok_mem| location from which the next token
278 on a particular level will be read;
280 \hang |name_field| points to the name corresponding to a particular level;
282 \hang |repl_field| points to the replacement text currently being read
283 at a particular level;
285 \hang |section_field| is the section number, or zero if this is a macro.
287 \yskip\noindent The current values of these five quantities are referred to
288 quite frequently, so they are stored in a separate place instead of in
289 the |stack| array. We call the current values |cur_end|, |cur_byte|,
290 |cur_name|, |cur_repl|, and |cur_section|.
292 The global variable |stack_ptr| tells how many levels of output are
293 currently in progress. The end of all output occurs when the stack is
294 empty, i.e., when |stack_ptr==stack|.
296 @<Typed...@>=
297 typedef struct {
298 eight_bits *end_field; /* ending location of replacement text */
299 eight_bits *byte_field; /* present location within replacement text */
300 name_pointer name_field; /* |byte_start| index for text being output */
301 text_pointer repl_field; /* |tok_start| index for text being output */
302 sixteen_bits section_field; /* section number or zero if not a section */
303 } output_state;
304 typedef output_state *stack_pointer;
306 @ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
307 @d cur_byte cur_state.byte_field /* location of next output byte in |tok_mem|*/
308 @d cur_name cur_state.name_field /* pointer to current name being expanded */
309 @d cur_repl cur_state.repl_field /* pointer to current replacement text */
310 @d cur_section cur_state.section_field /* current section number being expanded */
312 @<Global...@>=
313 output_state cur_state; /* |cur_end|, |cur_byte|, |cur_name|, |cur_repl|,
314 and |cur_section| */
315 output_state stack[stack_size+1]; /* info for non-current levels */
316 stack_pointer stack_ptr; /* first unused location in the output state stack */
317 stack_pointer stack_end=stack+stack_size; /* end of |stack| */
319 @ To get the output process started, we will perform the following
320 initialization steps. We may assume that |text_info->text_link| is nonzero,
321 since it points to the \CEE/ text in the first unnamed section that generates
322 code; if there are no such sections, there is nothing to output, and an
323 error message will have been generated before we do any of the initialization.
325 @<Initialize the output stacks@>=
326 stack_ptr=stack+1; cur_name=name_dir; cur_repl=text_info->text_link+text_info;
327 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; cur_section=0;
329 @ When the replacement text for name |p| is to be inserted into the output,
330 the following subroutine is called to save the old level of output and get
331 the new one going.
333 We assume that the \CEE/ compiler can copy structures.
334 @^system dependencies@>
337 void
338 push_level(p) /* suspends the current level */
339 name_pointer p;
341 if (stack_ptr==stack_end) overflow("stack");
342 *stack_ptr=cur_state;
343 stack_ptr++;
344 if (p!=NULL) { /* |p==NULL| means we are in |output_defs| */
345 cur_name=p; cur_repl=(text_pointer)p->equiv;
346 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
347 cur_section=0;
351 @ When we come to the end of a replacement text, the |pop_level| subroutine
352 does the right thing: It either moves to the continuation of this replacement
353 text or returns the state to the most recently stacked level.
356 void
357 pop_level(flag) /* do this when |cur_byte| reaches |cur_end| */
358 int flag; /* |flag==0| means we are in |output_defs| */
360 if (flag && cur_repl->text_link<section_flag) { /* link to a continuation */
361 cur_repl=cur_repl->text_link+text_info; /* stay on the same level */
362 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
363 return;
365 stack_ptr--; /* go down to the previous level */
366 if (stack_ptr>stack) cur_state=*stack_ptr;
369 @ The heart of the output procedure is the function |get_output|,
370 which produces the next token of output and sends it on to the lower-level
371 function |out_char|. The main purpose of |get_output| is to handle the
372 necessary stacking and unstacking. It sends the value |section_number|
373 if the next output begins or ends the replacement text of some section,
374 in which case |cur_val| is that section's number (if beginning) or the
375 negative of that value (if ending). (A section number of 0 indicates
376 not the beginning or ending of a section, but a \&{\#line} command.)
377 And it sends the value |identifier|
378 if the next output is an identifier, in which case
379 |cur_val| points to that identifier name.
381 @d section_number 0201 /* code returned by |get_output| for section numbers */
382 @d identifier 0202 /* code returned by |get_output| for identifiers */
384 @<Global...@>=
385 int cur_val; /* additional information corresponding to output token */
387 @ If |get_output| finds that no more output remains, it returns with
388 |stack_ptr==stack|.
389 @^high-bit character handling@>
392 void
393 get_output() /* sends next token to |out_char| */
395 sixteen_bits a; /* value of current byte */
396 restart: if (stack_ptr==stack) return;
397 if (cur_byte==cur_end) {
398 cur_val=-((int)cur_section); /* cast needed because of sign extension */
399 pop_level(1);
400 if (cur_val==0) goto restart;
401 out_char(section_number); return;
403 a=*cur_byte++;
404 if (out_state==verbatim && a!=string && a!=constant && a!='\n')
405 C_putc(a); /* a high-bit character can occur in a string */
406 else if (a<0200) out_char(a); /* one-byte token */
407 else {
408 a=(a-0200)*0400+*cur_byte++;
409 switch (a/024000) { /* |024000==(0250-0200)*0400| */
410 case 0: cur_val=a; out_char(identifier); break;
411 case 1: if (a==output_defs_flag) output_defs();
412 else @<Expand section |a-024000|, |goto restart|@>;
413 break;
414 default: cur_val=a-050000; if (cur_val>0) cur_section=cur_val;
415 out_char(section_number);
420 @ The user may have forgotten to give any \CEE/ text for a section name,
421 or the \CEE/ text may have been associated with a different name by mistake.
423 @<Expand section |a-...@>=
425 a-=024000;
426 if ((a+name_dir)->equiv!=(char *)text_info) push_level(a+name_dir);
427 else if (a!=0) {
428 printf("\n! Not present: <");
429 print_section_name(a+name_dir); err_print(">");
430 @.Not present: <section name>@>
432 goto restart;
435 @* Producing the output.
436 The |get_output| routine above handles most of the complexity of output
437 generation, but there are two further considerations that have a nontrivial
438 effect on \.{CTANGLE}'s algorithms.
440 @ First,
441 we want to make sure that the output has spaces and line breaks in
442 the right places (e.g., not in the middle of a string or a constant or an
443 identifier, not at a `\.{@@\&}' position
444 where quantities are being joined together, and certainly after an \.=
445 because the \CEE/ compiler thinks \.{=-} is ambiguous).
447 The output process can be in one of following states:
449 \yskip\hang |num_or_id| means that the last item in the buffer is a number or
450 identifier, hence a blank space or line break must be inserted if the next
451 item is also a number or identifier.
453 \yskip\hang |unbreakable| means that the last item in the buffer was followed
454 by the \.{@@\&} operation that inhibits spaces between it and the next item.
456 \yskip\hang |verbatim| means we're copying only character tokens, and
457 that they are to be output exactly as stored. This is the case during
458 strings, verbatim constructions and numerical constants.
460 \yskip\hang |post_slash| means we've just output a slash.
462 \yskip\hang |normal| means none of the above.
464 \yskip\noindent Furthermore, if the variable |protect| is positive, newlines
465 are preceded by a `\.\\'.
467 @d normal 0 /* non-unusual state */
468 @d num_or_id 1 /* state associated with numbers and identifiers */
469 @d post_slash 2 /* state following a \./ */
470 @d unbreakable 3 /* state associated with \.{@@\&} */
471 @d verbatim 4 /* state in the middle of a string */
473 @<Global...@>=
474 eight_bits out_state; /* current status of partial output */
475 boolean protect; /* should newline characters be quoted? */
477 @ Here is a routine that is invoked when we want to output the current line.
478 During the output process, |cur_line| equals the number of the next line
479 to be output.
482 void
483 flush_buffer() /* writes one line to output file */
485 C_putc('\n');
486 if (cur_line % 100 == 0 && show_progress) {
487 printf(".");
488 if (cur_line % 500 == 0) printf("%d",cur_line);
489 update_terminal; /* progress report */
491 cur_line++;
494 @ Second, we have modified the original \.{TANGLE} so that it will write output
495 on multiple files.
496 If a section name is introduced in at least one place by \.{@@(}
497 instead of \.{@@<}, we treat it as the name of a file.
498 All these special sections are saved on a stack, |output_files|.
499 We write them out after we've done the unnamed section.
501 @d max_files 256
502 @<Glob...@>=
503 name_pointer output_files[max_files];
504 name_pointer *cur_out_file, *end_output_files, *an_output_file;
505 char cur_section_name_char; /* is it |'<'| or |'('| */
506 char output_file_name[longest_name]; /* name of the file */
508 @ We make |end_output_files| point just beyond the end of
509 |output_files|. The stack pointer
510 |cur_out_file| starts out there. Every time we see a new file, we
511 decrement |cur_out_file| and then write it in.
512 @<Set initial...@>=
513 cur_out_file=end_output_files=output_files+max_files;
515 @ @<If it's not there, add |cur_section_name| to the output file stack, or
516 complain we're out of room@>=
518 for (an_output_file=cur_out_file;
519 an_output_file<end_output_files; an_output_file++)
520 if (*an_output_file==cur_section_name) break;
521 if (an_output_file==end_output_files) {
522 if (cur_out_file>output_files)
523 *--cur_out_file=cur_section_name;
524 else {
525 overflow("output files");
530 @* The big output switch. Here then is the routine that does the
531 output.
533 @<Predecl...@>=
534 void phase_two();
536 @ @c
537 void
538 phase_two () {
539 web_file_open=0;
540 cur_line=1;
541 @<Initialize the output stacks@>;
542 @<Output macro definitions if appropriate@>;
543 if (text_info->text_link==0 && cur_out_file==end_output_files) {
544 printf("\n! No program text was specified."); mark_harmless;
545 @.No program text...@>
547 else {
548 if(cur_out_file==end_output_files) {
549 if(show_progress)
550 printf("\nWriting the output file (%s):",C_file_name);
552 else {
553 if (show_progress) {
554 printf("\nWriting the output files:");
555 @.Writing the output...@>
556 printf(" (%s)",C_file_name);
557 update_terminal;
559 if (text_info->text_link==0) goto writeloop;
561 while (stack_ptr>stack) get_output();
562 flush_buffer();
563 writeloop: @<Write all the named output files@>;
564 if(show_happiness) printf("\nDone.");
568 @ To write the named output files, we proceed as for the unnamed
569 section.
570 The only subtlety is that we have to open each one.
572 @<Write all the named output files@>=
573 for (an_output_file=end_output_files; an_output_file>cur_out_file;) {
574 an_output_file--;
575 sprint_section_name(output_file_name,*an_output_file);
576 fclose(C_file);
577 C_file=fopen(output_file_name,"w");
578 if (C_file ==0) fatal("! Cannot open output file:",output_file_name);
579 @.Cannot open output file@>
580 printf("\n(%s)",output_file_name); update_terminal;
581 cur_line=1;
582 stack_ptr=stack+1;
583 cur_name= (*an_output_file);
584 cur_repl= (text_pointer)cur_name->equiv;
585 cur_byte=cur_repl->tok_start;
586 cur_end=(cur_repl+1)->tok_start;
587 while (stack_ptr > stack) get_output();
588 flush_buffer();
591 @ If a \.{@@h} was not encountered in the input,
592 we go through the list of replacement texts and copy the ones
593 that refer to macros, preceded by the \.{\#define} preprocessor command.
595 @<Output macro definitions if appropriate@>=
596 if (!output_defs_seen)
597 output_defs();
599 @ @<Glob...@>=
600 boolean output_defs_seen=0;
602 @ @<Predecl...@>=
603 void output_defs();
605 @ @c
606 void
607 output_defs()
609 sixteen_bits a;
610 push_level(NULL);
611 for (cur_text=text_info+1; cur_text<text_ptr; cur_text++)
612 if (cur_text->text_link==0) { /* |cur_text| is the text for a macro */
613 cur_byte=cur_text->tok_start;
614 cur_end=(cur_text+1)->tok_start;
615 C_printf("%s","#define ");
616 out_state=normal;
617 protect=1; /* newlines should be preceded by |'\\'| */
618 while (cur_byte<cur_end) {
619 a=*cur_byte++;
620 if (cur_byte==cur_end && a=='\n') break; /* disregard a final newline */
621 if (out_state==verbatim && a!=string && a!=constant && a!='\n')
622 C_putc(a); /* a high-bit character can occur in a string */
623 @^high-bit character handling@>
624 else if (a<0200) out_char(a); /* one-byte token */
625 else {
626 a=(a-0200)*0400+*cur_byte++;
627 if (a<024000) { /* |024000==(0250-0200)*0400| */
628 cur_val=a; out_char(identifier);
630 else if (a<050000) { confusion("macro defs have strange char");}
631 else {
632 cur_val=a-050000; cur_section=cur_val; out_char(section_number);
634 /* no other cases */
637 protect=0;
638 flush_buffer();
640 pop_level(0);
643 @ A many-way switch is used to send the output. Note that this function
644 is not called if |out_state==verbatim|, except perhaps with arguments
645 |'\n'| (protect the newline), |string| (end the string), or |constant|
646 (end the constant).
648 @<Predecl...@>=
649 static void out_char();
651 @ @c
652 static void
653 out_char(cur_char)
654 eight_bits cur_char;
656 char *j, *k; /* pointer into |byte_mem| */
657 restart:
658 switch (cur_char) {
659 case '\n': if (protect && out_state!=verbatim) C_putc(' ');
660 if (protect || out_state==verbatim) C_putc('\\');
661 flush_buffer(); if (out_state!=verbatim) out_state=normal; break;
662 @/@t\4@>@<Case of an identifier@>;
663 @/@t\4@>@<Case of a section number@>;
664 @/@t\4@>@<Cases like \.{!=}@>;
665 case '=': case '>': C_putc(cur_char); C_putc(' ');
666 out_state=normal; break;
667 case join: out_state=unbreakable; break;
668 case constant: if (out_state==verbatim) {
669 out_state=num_or_id; break;
671 if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break;
672 case string: if (out_state==verbatim) out_state=normal;
673 else out_state=verbatim; break;
674 case '/': C_putc('/'); out_state=post_slash; break;
675 case '*': if (out_state==post_slash) C_putc(' ');
676 /* fall through */
677 default: C_putc(cur_char); out_state=normal; break;
681 @ @<Cases like \.{!=}@>=
682 case plus_plus: C_putc('+'); C_putc('+'); out_state=normal; break;
683 case minus_minus: C_putc('-'); C_putc('-'); out_state=normal; break;
684 case minus_gt: C_putc('-'); C_putc('>'); out_state=normal; break;
685 case gt_gt: C_putc('>'); C_putc('>'); out_state=normal; break;
686 case eq_eq: C_putc('='); C_putc('='); out_state=normal; break;
687 case lt_lt: C_putc('<'); C_putc('<'); out_state=normal; break;
688 case gt_eq: C_putc('>'); C_putc('='); out_state=normal; break;
689 case lt_eq: C_putc('<'); C_putc('='); out_state=normal; break;
690 case not_eq: C_putc('!'); C_putc('='); out_state=normal; break;
691 case and_and: C_putc('&'); C_putc('&'); out_state=normal; break;
692 case or_or: C_putc('|'); C_putc('|'); out_state=normal; break;
693 case dot_dot_dot: C_putc('.'); C_putc('.'); C_putc('.'); out_state=normal;
694 break;
695 case colon_colon: C_putc(':'); C_putc(':'); out_state=normal; break;
696 case period_ast: C_putc('.'); C_putc('*'); out_state=normal; break;
697 case minus_gt_ast: C_putc('-'); C_putc('>'); C_putc('*'); out_state=normal;
698 break;
700 @ When an identifier is output to the \CEE/ file, characters in the
701 range 128--255 must be changed into something else, so the \CEE/
702 compiler won't complain. By default, \.{CTANGLE} converts the
703 character with code $16 x+y$ to the three characters `\.X$xy$', but
704 a different transliteration table can be specified. Thus a German
705 might want {\it gr\"un\/} to appear as a still readable \.{gruen}.
706 This makes debugging a lot less confusing.
708 @d translit_length 10
710 @<Glo...@>=
711 char translit[128][translit_length];
713 @ @<Set init...@>=
715 int i;
716 for (i=0;i<128;i++) sprintf(translit[i],"X%02X",(unsigned)(128+i));
719 @ @<Case of an identifier@>=
720 case identifier:
721 if (out_state==num_or_id) C_putc(' ');
722 j=(cur_val+name_dir)->byte_start;
723 k=(cur_val+name_dir+1)->byte_start;
724 while (j<k) {
725 if ((unsigned char)(*j)<0200) C_putc(*j);
726 @^high-bit character handling@>
727 else C_printf("%s",translit[(unsigned char)(*j)-0200]);
728 j++;
730 out_state=num_or_id; break;
732 @ @<Case of a sec...@>=
733 case section_number:
734 if (cur_val>0) C_printf("/*%d:*/",cur_val);
735 else if(cur_val<0) C_printf("/*:%d*/",-cur_val);
736 else if (protect) {
737 cur_byte +=4; /* skip line number and file name */
738 cur_char = '\n';
739 goto restart;
740 } else {
741 sixteen_bits a;
742 a=0400* *cur_byte++;
743 a+=*cur_byte++; /* gets the line number */
744 C_printf("\n#line %d \"",a);
745 @:line}{\.{\#line}@>
746 cur_val=*cur_byte++;
747 cur_val=0400*(cur_val-0200)+ *cur_byte++; /* points to the file name */
748 for (j=(cur_val+name_dir)->byte_start, k=(cur_val+name_dir+1)->byte_start;
749 j<k; j++) {
750 if (*j=='\\' || *j=='"') C_putc('\\');
751 C_putc(*j);
753 C_printf("%s","\"\n");
755 break;
757 @** Introduction to the input phase.
758 We have now seen that \.{CTANGLE} will be able to output the full
759 \CEE/ program, if we can only get that program into the byte memory in
760 the proper format. The input process is something like the output process
761 in reverse, since we compress the text as we read it in and we expand it
762 as we write it out.
764 There are three main input routines. The most interesting is the one that gets
765 the next token of a \CEE/ text; the other two are used to scan rapidly past
766 \TEX/ text in the \.{CWEB} source code. One of the latter routines will jump to
767 the next token that starts with `\.{@@}', and the other skips to the end
768 of a \CEE/ comment.
770 @ Control codes in \.{CWEB} begin with `\.{@@}', and the next character
771 identifies the code. Some of these are of interest only to \.{CWEAVE},
772 so \.{CTANGLE} ignores them; the others are converted by \.{CTANGLE} into
773 internal code numbers by the |ccode| table below. The ordering
774 of these internal code numbers has been chosen to simplify the program logic;
775 larger numbers are given to the control codes that denote more significant
776 milestones.
778 @d ignore 0 /* control code of no interest to \.{CTANGLE} */
779 @d ord 0302 /* control code for `\.{@@'}' */
780 @d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */
781 @d translit_code 0304 /* control code for `\.{@@l}' */
782 @d output_defs_code 0305 /* control code for `\.{@@h}' */
783 @d format_code 0306 /* control code for `\.{@@f}' */
784 @d definition 0307 /* control code for `\.{@@d}' */
785 @d begin_C 0310 /* control code for `\.{@@c}' */
786 @d section_name 0311 /* control code for `\.{@@<}' */
787 @d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */
789 @<Global...@>=
790 eight_bits ccode[256]; /* meaning of a char following \.{@@} */
792 @ @<Set ini...@>= {
793 int c; /* must be |int| so the |for| loop will end */
794 for (c=0; c<256; c++) ccode[c]=ignore;
795 ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
796 =ccode['*']=new_section;
797 ccode['@@']='@@'; ccode['=']=string;
798 ccode['d']=ccode['D']=definition;
799 ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
800 ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
801 ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']=
802 ccode['q']=ccode['Q']=control_text;
803 ccode['h']=ccode['H']=output_defs_code;
804 ccode['l']=ccode['L']=translit_code;
805 ccode['&']=join;
806 ccode['<']=ccode['(']=section_name;
807 ccode['\'']=ord;
810 @ The |skip_ahead| procedure reads through the input at fairly high speed
811 until finding the next non-ignorable control code, which it returns.
814 eight_bits
815 skip_ahead() /* skip to next control code */
817 eight_bits c; /* control code found */
818 while (1) {
819 if (loc>limit && (get_line()==0)) return(new_section);
820 *(limit+1)='@@';
821 while (*loc!='@@') loc++;
822 if (loc<=limit) {
823 loc++; c=ccode[(eight_bits)*loc]; loc++;
824 if (c!=ignore || *(loc-1)=='>') return(c);
829 @ The |skip_comment| procedure reads through the input at somewhat high
830 speed in order to pass over comments, which \.{CTANGLE} does not transmit
831 to the output. If the comment is introduced by \.{/*}, |skip_comment|
832 proceeds until finding the end-comment token \.{*/} or a newline; in the
833 latter case |skip_comment| will be called again by |get_next|, since the
834 comment is not finished. This is done so that each newline in the
835 \CEE/ part of a section is copied to the output; otherwise the \&{\#line}
836 commands inserted into the \CEE/ file by the output routines become useless.
837 On the other hand, if the comment is introduced by \.{//} (i.e., if it
838 is a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the next
839 newline. The boolean argument |is_long_comment| distinguishes between
840 the two types of comments.
842 If |skip_comment| comes to the end of the section, it prints an error message.
843 No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'.
845 @<Global...@>=
846 boolean comment_continues=0; /* are we scanning a comment? */
848 @ @c
849 int skip_comment(is_long_comment) /* skips over comments */
850 boolean is_long_comment;
852 char c; /* current character */
853 while (1) {
854 if (loc>limit) {
855 if (is_long_comment) {
856 if(get_line()) return(comment_continues=1);
857 else{
858 err_print("! Input ended in mid-comment");
859 @.Input ended in mid-comment@>
860 return(comment_continues=0);
863 else return(comment_continues=0);
865 c=*(loc++);
866 if (is_long_comment && c=='*' && *loc=='/') {
867 loc++; return(comment_continues=0);
869 if (c=='@@') {
870 if (ccode[(eight_bits)*loc]==new_section) {
871 err_print("! Section name ended in mid-comment"); loc--;
872 @.Section name ended in mid-comment@>
873 return(comment_continues=0);
875 else loc++;
880 @* Inputting the next token.
882 @d constant 03
884 @<Global...@>=
885 name_pointer cur_section_name; /* name of section just scanned */
886 int no_where; /* suppress |print_where|? */
888 @ @<Include...@>=
889 #include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
890 #include <stdlib.h> /* definition of |exit| */
892 @ As one might expect, |get_next| consists mostly of a big switch
893 that branches to the various special cases that can arise.
895 @d isxalpha(c) ((c)=='_' || (c)=='$')
896 /* non-alpha characters allowed in identifier */
897 @d ishigh(c) ((unsigned char)(c)>0177)
898 @^high-bit character handling@>
901 eight_bits
902 get_next() /* produces the next input token */
904 static int preprocessing=0;
905 eight_bits c; /* the current character */
906 while (1) {
907 if (loc>limit) {
908 if (preprocessing && *(limit-1)!='\\') preprocessing=0;
909 if (get_line()==0) return(new_section);
910 else if (print_where && !no_where) {
911 print_where=0;
912 @<Insert the line number into |tok_mem|@>;
914 else return ('\n');
916 c=*loc;
917 if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) {
918 skip_comment(comment_continues||*(loc+1)=='*');
919 /* scan to end of comment or newline */
920 if (comment_continues) return('\n');
921 else continue;
923 loc++;
924 if (xisdigit(c) || c=='.') @<Get a constant@>@;
925 else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"')))
926 @<Get a string@>@;
927 else if (isalpha(c) || isxalpha(c) || ishigh(c))
928 @<Get an identifier@>@;
929 else if (c=='@@') @<Get control code and possible section name@>@;
930 else if (xisspace(c)) {
931 if (!preprocessing || loc>limit) continue;
932 /* we don't want a blank after a final backslash */
933 else return(' '); /* ignore spaces and tabs, unless preprocessing */
935 else if (c=='#' && loc==buffer+1) preprocessing=1;
936 mistake: @<Compress two-symbol operator@>@;
937 return(c);
941 @ The following code assigns values to the combinations \.{++},
942 \.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{||} and
943 \.{\&\&}, and to the \CPLUSPLUS/
944 combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
945 The compound assignment operators (e.g., \.{+=}) are
946 treated as separate tokens.
948 @d compress(c) if (loc++<=limit) return(c)
950 @<Compress tw...@>=
951 switch(c) {
952 case '+': if (*loc=='+') compress(plus_plus); break;
953 case '-': if (*loc=='-') {compress(minus_minus);}
954 else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
955 else compress(minus_gt); break;
956 case '.': if (*loc=='*') {compress(period_ast);}
957 else if (*loc=='.' && *(loc+1)=='.') {
958 loc++; compress(dot_dot_dot);
960 break;
961 case ':': if (*loc==':') compress(colon_colon); break;
962 case '=': if (*loc=='=') compress(eq_eq); break;
963 case '>': if (*loc=='=') {compress(gt_eq);}
964 else if (*loc=='>') compress(gt_gt); break;
965 case '<': if (*loc=='=') {compress(lt_eq);}
966 else if (*loc=='<') compress(lt_lt); break;
967 case '&': if (*loc=='&') compress(and_and); break;
968 case '|': if (*loc=='|') compress(or_or); break;
969 case '!': if (*loc=='=') compress(not_eq); break;
972 @ @<Get an identifier@>= {
973 id_first=--loc;
974 while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
975 id_loc=loc; return(identifier);
978 @ @<Get a constant@>= {
979 id_first=loc-1;
980 if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
981 if (*id_first=='0') {
982 if (*loc=='x' || *loc=='X') { /* hex constant */
983 loc++; while (xisxdigit(*loc)) loc++; goto found;
986 while (xisdigit(*loc)) loc++;
987 if (*loc=='.') {
988 loc++;
989 while (xisdigit(*loc)) loc++;
991 if (*loc=='e' || *loc=='E') { /* float constant */
992 if (*++loc=='+' || *loc=='-') loc++;
993 while (xisdigit(*loc)) loc++;
995 found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
996 || *loc=='f' || *loc=='F') loc++;
997 id_loc=loc;
998 return(constant);
1001 @ \CEE/ strings and character constants, delimited by double and single
1002 quotes, respectively, can contain newlines or instances of their own
1003 delimiters if they are protected by a backslash. We follow this
1004 convention, but do not allow the string to be longer than |longest_name|.
1006 @<Get a string@>= {
1007 char delim = c; /* what started the string */
1008 id_first = section_text+1;
1009 id_loc = section_text; *++id_loc=delim;
1010 if (delim=='L') { /* wide character constant */
1011 delim=*loc++; *++id_loc=delim;
1013 while (1) {
1014 if (loc>=limit) {
1015 if(*(limit-1)!='\\') {
1016 err_print("! String didn't end"); loc=limit; break;
1017 @.String didn't end@>
1019 if(get_line()==0) {
1020 err_print("! Input ended in middle of string"); loc=buffer; break;
1021 @.Input ended in middle of string@>
1023 else if (++id_loc<=section_text_end) *id_loc='\n'; /* will print as
1024 \.{"\\\\\\n"} */
1026 if ((c=*loc++)==delim) {
1027 if (++id_loc<=section_text_end) *id_loc=c;
1028 break;
1030 if (c=='\\') {
1031 if (loc>=limit) continue;
1032 if (++id_loc<=section_text_end) *id_loc = '\\';
1033 c=*loc++;
1035 if (++id_loc<=section_text_end) *id_loc=c;
1037 if (id_loc>=section_text_end) {
1038 printf("\n! String too long: ");
1039 @.String too long@>
1040 term_write(section_text+1,25);
1041 err_print("...");
1043 id_loc++;
1044 return(string);
1047 @ After an \.{@@} sign has been scanned, the next character tells us
1048 whether there is more work to do.
1050 @<Get control code and possible section name@>= {
1051 c=ccode[(eight_bits)*loc++];
1052 switch(c) {
1053 case ignore: continue;
1054 case translit_code: err_print("! Use @@l in limbo only"); continue;
1055 @.Use @@l in limbo...@>
1056 case control_text: while ((c=skip_ahead())=='@@');
1057 /* only \.{@@@@} and \.{@@>} are expected */
1058 if (*(loc-1)!='>')
1059 err_print("! Double @@ should be used in control text");
1060 @.Double @@ should be used...@>
1061 continue;
1062 case section_name:
1063 cur_section_name_char=*(loc-1);
1064 @<Scan the section name and make |cur_section_name| point to it@>;
1065 case string: @<Scan a verbatim string@>;
1066 case ord: @<Scan an ASCII constant@>;
1067 default: return(c);
1071 @ After scanning a valid ASCII constant that follows
1072 \.{@@'}, this code plows ahead until it finds the next single quote.
1073 (Special care is taken if the quote is part of the constant.)
1074 Anything after a valid ASCII constant is ignored;
1075 thus, \.{@@'\\nopq'} gives the same result as \.{@@'\\n'}.
1077 @<Scan an ASCII constant@>=
1078 id_first=loc;
1079 if (*loc=='\\') {
1080 if (*++loc=='\'') loc++;
1082 while (*loc!='\'') {
1083 if (*loc=='@@') {
1084 if (*(loc+1)!='@@')
1085 err_print("! Double @@ should be used in ASCII constant");
1086 @.Double @@ should be used...@>
1087 else loc++;
1089 loc++;
1090 if (loc>limit) {
1091 err_print("! String didn't end"); loc=limit-1; break;
1092 @.String didn't end@>
1095 loc++;
1096 return(ord);
1098 @ @<Scan the section name...@>= {
1099 char *k; /* pointer into |section_text| */
1100 @<Put section name into |section_text|@>;
1101 if (k-section_text>3 && strncmp(k-2,"...",3)==0)
1102 cur_section_name=section_lookup(section_text+1,k-3,1); /* 1 means is a prefix */
1103 else cur_section_name=section_lookup(section_text+1,k,0);
1104 if (cur_section_name_char=='(')
1105 @<If it's not there, add |cur_section_name| to the output file stack, or
1106 complain we're out of room@>;
1107 return(section_name);
1110 @ Section names are placed into the |section_text| array with consecutive spaces,
1111 tabs, and carriage-returns replaced by single spaces. There will be no
1112 spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
1113 this, since the |section_lookup| routine uses |section_text[1]| as the first
1114 character of the name.)
1116 @<Set init...@>=section_text[0]=' ';
1118 @ @<Put section name...@>=
1119 k=section_text;
1120 while (1) {
1121 if (loc>limit && get_line()==0) {
1122 err_print("! Input ended in section name");
1123 @.Input ended in section name@>
1124 loc=buffer+1; break;
1126 c=*loc;
1127 @<If end of name or erroneous nesting, |break|@>;
1128 loc++; if (k<section_text_end) k++;
1129 if (xisspace(c)) {
1130 c=' '; if (*(k-1)==' ') k--;
1132 *k=c;
1134 if (k>=section_text_end) {
1135 printf("\n! Section name too long: ");
1136 @.Section name too long@>
1137 term_write(section_text+1,25);
1138 printf("..."); mark_harmless;
1140 if (*k==' ' && k>section_text) k--;
1142 @ @<If end of name or erroneous nesting,...@>=
1143 if (c=='@@') {
1144 c=*(loc+1);
1145 if (c=='>') {
1146 loc+=2; break;
1148 if (ccode[(eight_bits)c]==new_section) {
1149 err_print("! Section name didn't end"); break;
1150 @.Section name didn't end@>
1152 if (ccode[(eight_bits)c]==section_name) {
1153 err_print("! Nesting of section names not allowed"); break;
1154 @.Nesting of section names...@>
1156 *(++k)='@@'; loc++; /* now |c==*loc| again */
1159 @ At the present point in the program we
1160 have |*(loc-1)==string|; we set |id_first| to the beginning
1161 of the string itself, and |id_loc| to its ending-plus-one location in the
1162 buffer. We also set |loc| to the position just after the ending delimiter.
1164 @<Scan a verbatim string@>= {
1165 id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1166 while (*loc!='@@' || *(loc+1)!='>') loc++;
1167 if (loc>=limit) err_print("! Verbatim string didn't end");
1168 @.Verbatim string didn't end@>
1169 id_loc=loc; loc+=2;
1170 return(string);
1173 @* Scanning a macro definition.
1174 The rules for generating the replacement texts corresponding to macros and
1175 \CEE/ texts of a section are almost identical; the only differences are that
1177 \yskip \item{a)}Section names are not allowed in macros;
1178 in fact, the appearance of a section name terminates such macros and denotes
1179 the name of the current section.
1181 \item{b)}The symbols \.{@@d} and \.{@@f} and \.{@@c} are not allowed after
1182 section names, while they terminate macro definitions.
1184 \item{c)}Spaces are inserted after right parentheses in macros, because the
1185 ANSI \CEE/ preprocessor sometimes requires it.
1187 \yskip Therefore there is a single procedure |scan_repl| whose parameter
1188 |t| specifies either |macro| or |section_name|. After |scan_repl| has
1189 acted, |cur_text| will point to the replacement text just generated, and
1190 |next_control| will contain the control code that terminated the activity.
1192 @d macro 0
1193 @d app_repl(c) {if (tok_ptr==tok_mem_end) overflow("token"); *tok_ptr++=c;}
1195 @<Global...@>=
1196 text_pointer cur_text; /* replacement text formed by |scan_repl| */
1197 eight_bits next_control;
1199 @ @c
1200 void
1201 scan_repl(t) /* creates a replacement text */
1202 eight_bits t;
1204 sixteen_bits a; /* the current token */
1205 if (t==section_name) {@<Insert the line number into |tok_mem|@>;}
1206 while (1) switch (a=get_next()) {
1207 @<In cases that |a| is a non-|char| token (|identifier|,
1208 |section_name|, etc.), either process it and change |a| to a byte
1209 that should be stored, or |continue| if |a| should be ignored,
1210 or |goto done| if |a| signals the end of this replacement text@>@;
1211 case ')': app_repl(a);
1212 if (t==macro) app_repl(' ');
1213 break;
1214 default: app_repl(a); /* store |a| in |tok_mem| */
1216 done: next_control=(eight_bits) a;
1217 if (text_ptr>text_info_end) overflow("text");
1218 cur_text=text_ptr; (++text_ptr)->tok_start=tok_ptr;
1221 @ Here is the code for the line number: first a |sixteen_bits| equal
1222 to |0150000|; then the numeric line number; then a pointer to the
1223 file name.
1225 @<Insert the line...@>=
1226 store_two_bytes(0150000);
1227 if (changing) id_first=change_file_name;
1228 else id_first=cur_file_name;
1229 id_loc=id_first+strlen(id_first);
1230 if (changing) store_two_bytes((sixteen_bits)change_line);
1231 else store_two_bytes((sixteen_bits)cur_line);
1232 {int a=id_lookup(id_first,id_loc,0)-name_dir; app_repl((a / 0400)+0200);
1233 app_repl(a % 0400);}
1235 @ @<In cases that |a| is...@>=
1236 case identifier: a=id_lookup(id_first,id_loc,0)-name_dir;
1237 app_repl((a / 0400)+0200);
1238 app_repl(a % 0400); break;
1239 case section_name: if (t!=section_name) goto done;
1240 else {
1241 @<Was an `@@' missed here?@>;
1242 a=cur_section_name-name_dir;
1243 app_repl((a / 0400)+0250);
1244 app_repl(a % 0400);
1245 @<Insert the line number into |tok_mem|@>; break;
1247 case output_defs_code: if (t!=section_name) err_print("! Misplaced @@h");
1248 @.Misplaced @@h@>
1249 else {
1250 output_defs_seen=1;
1251 a=output_defs_flag;
1252 app_repl((a / 0400)+0200);
1253 app_repl(a % 0400);
1254 @<Insert the line number into |tok_mem|@>;
1256 break;
1257 case constant: case string:
1258 @<Copy a string or verbatim construction or numerical constant@>;
1259 case ord:
1260 @<Copy an ASCII constant@>;
1261 case definition: case format_code: case begin_C: if (t!=section_name) goto done;
1262 else {
1263 err_print("! @@d, @@f and @@c are ignored in C text"); continue;
1264 @.@@d, @@f and @@c are ignored in C text@>
1266 case new_section: goto done;
1268 @ @<Was an `@@'...@>= {
1269 char *try_loc=loc;
1270 while (*try_loc==' ' && try_loc<limit) try_loc++;
1271 if (*try_loc=='+' && try_loc<limit) try_loc++;
1272 while (*try_loc==' ' && try_loc<limit) try_loc++;
1273 if (*try_loc=='=') err_print ("! Missing `@@ ' before a named section");
1274 @.Missing `@@ '...@>
1275 /* user who isn't defining a section should put newline after the name,
1276 as explained in the manual */
1279 @ @<Copy a string...@>=
1280 app_repl(a); /* |string| or |constant| */
1281 while (id_first < id_loc) { /* simplify \.{@@@@} pairs */
1282 if (*id_first=='@@') {
1283 if (*(id_first+1)=='@@') id_first++;
1284 else err_print("! Double @@ should be used in string");
1285 @.Double @@ should be used...@>
1287 app_repl(*id_first++);
1289 app_repl(a); break;
1291 @ This section should be rewritten on machines that don't use ASCII
1292 code internally.
1293 @^ASCII code dependencies@>
1295 @<Copy an ASCII constant@>= {
1296 int c=(eight_bits) *id_first;
1297 if (c=='\\') {
1298 c=*++id_first;
1299 if (c>='0' && c<='7') {
1300 c-='0';
1301 if (*(id_first+1)>='0' && *(id_first+1)<='7') {
1302 c=8*c+*(++id_first) - '0';
1303 if (*(id_first+1)>='0' && *(id_first+1)<='7' && c<32)
1304 c=8*c+*(++id_first)- '0';
1307 else switch (c) {
1308 case 't':c='\t';@+break;
1309 case 'n':c='\n';@+break;
1310 case 'b':c='\b';@+break;
1311 case 'f':c='\f';@+break;
1312 case 'v':c='\v';@+break;
1313 case 'r':c='\r';@+break;
1314 case 'a':c='\7';@+break;
1315 case '?':c='?';@+break;
1316 case 'x':
1317 if (xisdigit(*(id_first+1))) c=*(++id_first)-'0';
1318 else if (xisxdigit(*(id_first+1))) {
1319 ++id_first;
1320 c=toupper(*id_first)-'A'+10;
1322 if (xisdigit(*(id_first+1))) c=16*c+*(++id_first)-'0';
1323 else if (xisxdigit(*(id_first+1))) {
1324 ++id_first;
1325 c=16*c+toupper(*id_first)-'A'+10;
1327 break;
1328 case '\\':c='\\';@+break;
1329 case '\'':c='\'';@+break;
1330 case '\"':c='\"';@+break;
1331 default: err_print("! Unrecognized escape sequence");
1332 @.Unrecognized escape sequence@>
1335 /* at this point |c| should have been converted to its ASCII code number */
1336 app_repl(constant);
1337 if (c>=100) app_repl('0'+c/100);
1338 if (c>=10) app_repl('0'+(c/10)%10);
1339 app_repl('0'+c%10);
1340 app_repl(constant);
1342 break;
1344 @* Scanning a section.
1345 The |scan_section| procedure starts when `\.{@@\ }' or `\.{@@*}' has been
1346 sensed in the input, and it proceeds until the end of that section. It
1347 uses |section_count| to keep track of the current section number; with luck,
1348 \.{CWEAVE} and \.{CTANGLE} will both assign the same numbers to sections.
1350 @<Global...@>=
1351 extern sixteen_bits section_count; /* the current section number */
1353 @ The body of |scan_section| is a loop where we look for control codes
1354 that are significant to \.{CTANGLE}: those
1355 that delimit a definition, the \CEE/ part of a module, or a new module.
1358 void
1359 scan_section()
1361 name_pointer p; /* section name for the current section */
1362 text_pointer q; /* text for the current section */
1363 sixteen_bits a; /* token for left-hand side of definition */
1364 section_count++; @+ no_where=1;
1365 if (*(loc-1)=='*' && show_progress) { /* starred section */
1366 printf("*%d",section_count); update_terminal;
1368 next_control=0;
1369 while (1) {
1370 @<Skip ahead until |next_control| corresponds to \.{@@d}, \.{@@<},
1371 \.{@@\ } or the like@>;
1372 if (next_control == definition) { /* \.{@@d} */
1373 @<Scan a definition@>@;
1374 continue;
1376 if (next_control == begin_C) { /* \.{@@c} or \.{@@p} */
1377 p=name_dir; break;
1379 if (next_control == section_name) { /* \.{@@<} or \.{@@(} */
1380 p=cur_section_name;
1381 @<If section is not being defined, |continue| @>;
1382 break;
1384 return; /* \.{@@\ } or \.{@@*} */
1386 no_where=print_where=0;
1387 @<Scan the \CEE/ part of the current section@>;
1390 @ At the top of this loop, if |next_control==section_name|, the
1391 section name has already been scanned (see |@<Get control code
1392 and...@>|). Thus, if we encounter |next_control==section_name| in the
1393 skip-ahead process, we should likewise scan the section name, so later
1394 processing will be the same in both cases.
1396 @<Skip ahead until |next_control| ...@>=
1397 while (next_control<definition)
1398 /* |definition| is the lowest of the ``significant'' codes */
1399 if((next_control=skip_ahead())==section_name){
1400 loc-=2; next_control=get_next();
1403 @ @<Scan a definition@>= {
1404 while ((next_control=get_next())=='\n'); /*allow newline before definition */
1405 if (next_control!=identifier) {
1406 err_print("! Definition flushed, must start with identifier");
1407 @.Definition flushed...@>
1408 continue;
1410 app_repl(((a=id_lookup(id_first,id_loc,0)-name_dir) / 0400)+0200);
1411 /* append the lhs */
1412 app_repl(a % 0400);
1413 if (*loc!='(') { /* identifier must be separated from replacement text */
1414 app_repl(string); app_repl(' '); app_repl(string);
1416 scan_repl(macro);
1417 cur_text->text_link=0; /* |text_link==0| characterizes a macro */
1420 @ If the section name is not followed by \.{=} or \.{+=}, no \CEE/
1421 code is forthcoming: the section is being cited, not being
1422 defined. This use is illegal after the definition part of the
1423 current section has started, except inside a comment, but
1424 \.{CTANGLE} does not enforce this rule; it simply ignores the offending
1425 section name and everything following it, up to the next significant
1426 control code.
1428 @<If section is not being defined, |continue| @>=
1429 while ((next_control=get_next())=='+'); /* allow optional \.{+=} */
1430 if (next_control!='=' && next_control!=eq_eq)
1431 continue;
1433 @ @<Scan the \CEE/...@>=
1434 @<Insert the section number into |tok_mem|@>;
1435 scan_repl(section_name); /* now |cur_text| points to the replacement text */
1436 @<Update the data structure so that the replacement text is accessible@>;
1438 @ @<Insert the section number...@>=
1439 store_two_bytes((sixteen_bits)(0150000+section_count));
1440 /* |0150000==0320*0400| */
1442 @ @<Update the data...@>=
1443 if (p==name_dir||p==0) { /* unnamed section, or bad section name */
1444 (last_unnamed)->text_link=cur_text-text_info; last_unnamed=cur_text;
1446 else if (p->equiv==(char *)text_info) p->equiv=(char *)cur_text;
1447 /* first section of this name */
1448 else {
1449 q=(text_pointer)p->equiv;
1450 while (q->text_link<section_flag)
1451 q=q->text_link+text_info; /* find end of list */
1452 q->text_link=cur_text-text_info;
1454 cur_text->text_link=section_flag;
1455 /* mark this replacement text as a nonmacro */
1457 @ @<Predec...@>=
1458 void phase_one();
1460 @ @c
1461 void
1462 phase_one() {
1463 phase=1;
1464 section_count=0;
1465 reset_input();
1466 skip_limbo();
1467 while (!input_has_ended) scan_section();
1468 check_complete();
1469 phase=2;
1472 @ Only a small subset of the control codes is legal in limbo, so limbo
1473 processing is straightforward.
1475 @<Predecl...@>=
1476 void skip_limbo();
1478 @ @c
1479 void
1480 skip_limbo()
1482 char c;
1483 while (1) {
1484 if (loc>limit && get_line()==0) return;
1485 *(limit+1)='@@';
1486 while (*loc!='@@') loc++;
1487 if (loc++<=limit) {
1488 c=*loc++;
1489 if (ccode[(eight_bits)c]==new_section) break;
1490 switch (ccode[(eight_bits)c]) {
1491 case translit_code: @<Read in transliteration of a character@>; break;
1492 case format_code: case '@@': break;
1493 case control_text: if (c=='q' || c=='Q') {
1494 while ((c=skip_ahead())=='@@');
1495 if (*(loc-1)!='>')
1496 err_print("! Double @@ should be used in control text");
1497 @.Double @@ should be used...@>
1498 break;
1499 } /* otherwise fall through */
1500 default: err_print("! Double @@ should be used in limbo");
1501 @.Double @@ should be used...@>
1507 @ @<Read in transliteration of a character@>=
1508 while(xisspace(*loc)&&loc<limit) loc++;
1509 loc+=3;
1510 if (loc>limit || !xisxdigit(*(loc-3)) || !xisxdigit(*(loc-2)) @|
1511 || (*(loc-3)>='0' && *(loc-3)<='7') || !xisspace(*(loc-1)))
1512 err_print("! Improper hex number following @@l");
1513 @.Improper hex number...@>
1514 else {
1515 unsigned i;
1516 char *beg;
1517 sscanf(loc-3,"%x",&i);
1518 while(xisspace(*loc)&&loc<limit) loc++;
1519 beg=loc;
1520 while(loc<limit&&(xisalpha(*loc)||xisdigit(*loc)||*loc=='_')) loc++;
1521 if (loc-beg>=translit_length)
1522 err_print("! Replacement string in @@l too long");
1523 @.Replacement string in @@l...@>
1524 else{
1525 strncpy(translit[i-0200],beg,loc-beg);
1526 translit[i-0200][loc-beg]='\0';
1530 @ Because on some systems the difference between two pointers is a |long|
1531 but not an |int|, we use \.{\%ld} to print these quantities.
1534 void
1535 print_stats() {
1536 printf("\nMemory usage statistics:\n");
1537 printf("%ld names (out of %ld)\n",
1538 (long)(name_ptr-name_dir),(long)max_names);
1539 printf("%ld replacement texts (out of %ld)\n",
1540 (long)(text_ptr-text_info),(long)max_texts);
1541 printf("%ld bytes (out of %ld)\n",
1542 (long)(byte_ptr-byte_mem),(long)max_bytes);
1543 printf("%ld tokens (out of %ld)\n",
1544 (long)(tok_ptr-tok_mem),(long)max_toks);
1547 @** Index.
1548 Here is a cross-reference table for \.{CTANGLE}.
1549 All sections in which an identifier is
1550 used are listed with that identifier, except that reserved words are
1551 indexed only when they appear in format definitions, and the appearances
1552 of identifiers in section names are not indexed. Underlined entries
1553 correspond to where the identifier was declared. Error messages and
1554 a few other things like ``ASCII code dependencies'' are indexed here too.