1 % This file is part of CWEB.
2 % This program by Silvio Levy and Donald E. Knuth
3 % is based on a program by Knuth.
4 % It is distributed WITHOUT
ANY WARRANTY
, express or implied.
5 % Version
3.64 --- February
2002
6 % (same as Version
3.5 except for minor corrections
)
7 % (also quotes backslashes in file names of #line directives
)
9 % Copyright
(C
) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
11 % Permission is granted to make and distribute verbatim copies of this
12 % document provided that the copyright notice and this permission notice
13 % are preserved on all copies.
15 % Permission is granted to copy and distribute modified versions of this
16 % document under the conditions for verbatim copying
, provided that the
17 % entire resulting derived work is given a different name and distributed
18 % under the terms of a permission notice identical to this one.
20 % Here is TeX material that gets inserted after \input cwebmac
21 \def\hang
{\hangindent
3em\indent\ignorespaces
}
22 \def\pb
{$\.|\ldots\.|$
} % C brackets
(|...|
)
23 \def\v
{\char'
174} % vertical
(|
) in typewriter font
24 \mathchardef\RA
="3221 % right arrow
25 \mathchardef\BA="3224 % double arrow
27 \def\title
{CTANGLE
(Version
3.64)}
28 \def\topofcontents
{\null\vfill
29 \centerline
{\titlefont The
{\ttitlefont CTANGLE
} processor
}
31 \centerline
{(Version
3.64)}
33 \def\botofcontents
{\vfill
35 Copyright \copyright\
1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
37 Permission is granted to make and distribute verbatim copies of this
38 document provided that the copyright notice and this permission notice
39 are preserved on all copies.
42 Permission is granted to copy and distribute modified versions of this
43 document under the conditions for verbatim copying
, provided that the
44 entire resulting derived work is given a different name and distributed
45 under the terms of a permission notice identical to this one.
47 \pageno
=\contentspagenumber \advance\pageno by
1
49 @s not_eq normal @q unreserve a C
++ keyword @
>
52 This is the \.
{CTANGLE
} program by Silvio Levy and Donald E. Knuth
,
53 based on \.
{TANGLE
} by Knuth.
55 Nelson Beebe
, Hans-Hermann Bode
(to whom the \CPLUSPLUS
/ adaptation is due
),
56 Klaus Guntermann
, Norman Ramsey
, Tomas Rokicki
, Joachim Schnitter
,
57 Joachim Schrod
, Lee Wittenberg
, and others who have contributed improvements.
59 The ``banner line'' defined here should be changed whenever \.
{CTANGLE
}
62 @d banner
"This is CTANGLE (Version 3.64)\n"
67 @
<Common code for \.
{CWEAVE
} and \.
{CTANGLE
}@
>@
/
68 @
<Typedef declarations@
>@
/
69 @
<Global variables@
>@
/
70 @
<Predeclaration of procedures@
>@
/
72 @ We predeclare several standard system functions here instead of including
73 their system header files
, because the names of the header files are not as
74 standard as the names of the functions.
(For example
, some \CEE
/ environments
75 have \.
{<string.h
>} where others have \.
{<strings.h
>}.
)
78 extern int strlen
(); /* length of string
*/
79 extern int strcmp
(); /* compare strings lexicographically
*/
80 extern char
* strcpy
(); /* copy one string to another
*/
81 extern int strncmp
(); /* compare up to $n$ string characters
*/
82 extern char
* strncpy
(); /* copy up to $n$ string characters
*/
84 @ \.
{CTANGLE
} has a fairly straightforward outline. It operates in
85 two phases
: First it reads the source file
, saving the \CEE
/ code in
86 compressed form
; then it shuffles and outputs the code.
88 Please read the documentation for \.
{common
}, the set of routines common
89 to \.
{CTANGLE
} and \.
{CWEAVE
}, before proceeding further.
98 @
<Set initial values@
>;
100 if
(show_banner
) printf
(banner
); /* print a ``banner line''
*/
101 phase_one
(); /* read all the user's text and compress it into |tok_mem|
*/
102 phase_two
(); /* output the contents of the compressed tables
*/
103 return wrap_up
(); /* and exit gracefully
*/
106 @ The following parameters were sufficient in the original \.
{TANGLE
} to
108 so they should be sufficient for most applications of \.
{CTANGLE
}.
109 If you change |max_bytes|
, |max_names|
, or |hash_size| you should also
110 change them in the file |
"common.w"|.
112 @d max_bytes
90000 /* the number of bytes in identifiers
,
113 index entries
, and section names
; used in |
"common.w"|
*/
114 @d max_toks
270000 /* number of bytes in compressed \CEE
/ code
*/
115 @d max_names
4000 /* number of identifiers
, strings
, section names
;
116 must be less than
10240; used in |
"common.w"|
*/
117 @d max_texts
2500 /* number of replacement texts
, must be less than
10240 */
118 @d hash_size
353 /* should be prime
; used in |
"common.w"|
*/
119 @d longest_name
10000 /* section names shouldn't be longer than this
*/
120 @d stack_size
50 /* number of simultaneous levels of macro expansion
*/
121 @d buf_size
100 /* for \.
{CWEAVE
} and \.
{CTANGLE
} */
123 @ The next few sections contain stuff from the file |
"common.w"| that must
124 be included in both |
"ctangle.w"| and |
"cweave.w"|. It appears in
125 file |
"common.h"|
, which needs to be updated when |
"common.w"| changes.
129 @
* Data structures exclusive to
{\tt CTANGLE
}.
130 We've already seen that the |byte_mem| array holds the names of identifiers
,
131 strings
, and sections
;
132 the |tok_mem| array holds the replacement texts
133 for sections. Allocation is sequential
, since things are deleted only
134 during Phase II
, and only in a last-in-first-out manner.
136 A \
&{text} variable is a structure containing a pointer into
137 |tok_mem|
, which tells where the corresponding text starts
, and an
138 integer |text_link|
, which
, as we shall see later
, is used to connect
139 pieces of text that have the same name. All the \
&{text}s are stored in
140 the array |text_info|
, and we use a |text_pointer| variable to refer
143 The first position of |tok_mem| that is unoccupied by
144 replacement text is called |tok_ptr|
, and the first unused location of
145 |text_info| is called |text_ptr|. Thus we usually have the identity
146 |text_ptr-
>tok_start
==tok_ptr|.
148 If your machine does not support |unsigned char| you should change
149 the definition of \
&{eight\_bits} to |unsigned short|.
150 @^system dependencies@
>
154 eight_bits
*tok_start
; /* pointer into |tok_mem|
*/
155 sixteen_bits text_link
; /* relates replacement texts
*/
157 typedef text
*text_pointer
;
160 text text_info
[max_texts
];
161 text_pointer text_info_end
=text_info
+max_texts-1
;
162 text_pointer text_ptr
; /* first unused position in |text_info|
*/
163 eight_bits tok_mem
[max_toks
];
164 eight_bits
*tok_mem_end
=tok_mem
+max_toks-1
;
165 eight_bits
*tok_ptr
; /* first unused position in |tok_mem|
*/
168 text_info-
>tok_start
=tok_ptr
=tok_mem
;
169 text_ptr
=text_info
+1; text_ptr-
>tok_start
=tok_mem
;
170 /* this makes replacement text
0 of length zero
*/
172 @ If |p| is a pointer to a section name
, |p-
>equiv| is a pointer to its
173 replacement text
, an element of the array |text_info|.
175 @d equiv equiv_or_xref
/* info corresponding to names
*/
178 name_dir-
>equiv
=(char
*)text_info
; /* the undefined section has no replacement text
*/
180 @ Here's the procedure that decides whether a name of length |l|
181 starting at position |first| equals the identifier pointed to by |p|
:
184 int names_match
(p
,first
,l
)
185 name_pointer p
; /* points to the proposed match
*/
186 char
*first
; /* position of first character of string
*/
187 int l
; /* length of identifier
*/
189 if
(length
(p
)!=l
) return
0;
190 return
!strncmp
(first
,p-
>byte_start
,l
);
193 @ The common lookup routine refers to separate routines |init_node| and
194 |init_p| when the data structure grows. Actually |init_p| is called only by
195 \.
{CWEAVE
}, but we need to declare a dummy version so that
196 the loader won't complain of its absence.
203 node-
>equiv
=(char
*)text_info
;
209 Replacement texts
, which represent \CEE
/ code in a compressed format
,
210 appear in |tok_mem| as mentioned above. The codes in
211 these texts are called `tokens'
; some tokens occupy two consecutive
212 eight-bit byte positions
, and the others take just one byte.
214 If $p$ points to a replacement text
, |p-
>tok_start| is the |tok_mem| position
215 of the first eight-bit code of that text. If |p-
>text_link
==0|
,
216 this is the replacement text for a macro
, otherwise it is the replacement
217 text for a section. In the latter case |p-
>text_link| is either equal to
218 |section_flag|
, which means that there is no further text for this section
, or
219 |p-
>text_link| points to a continuation of this replacement text
; such
220 links are created when several sections have \CEE
/ texts with the same
221 name
, and they also tie together all the \CEE
/ texts of unnamed sections.
222 The replacement text pointer for the first unnamed section appears in
223 |text_info-
>text_link|
, and the most recent such pointer is |last_unnamed|.
225 @d section_flag max_texts
/* final |text_link| in section replacement texts
*/
228 text_pointer last_unnamed
; /* most recent replacement text of unnamed section
*/
230 @ @
<Set init...@
>= last_unnamed
=text_info
; text_info-
>text_link
=0;
232 @ If the first byte of a token is less than |
0200|
, the token occupies a
233 single byte. Otherwise we make a sixteen-bit token by combining two consecutive
234 bytes |a| and |b|. If |
0200<=a
<0250|
, then |
(a-0200
)@t$
{}\times2^
8$@
>+b|
235 points to an identifier
; if |
0250<=a
<0320|
, then
236 |
(a-0250
)@t$
{}\times2^
8$@
>+b| points to a section name
237 (or
, if it has the special value |output_defs_flag|
,
238 to the area where the preprocessor definitions are stored
); and if
239 |
0320<=a
<0400|
, then |
(a-0320
)@t$
{}\times2^
8$@
>+b| is the number of the section
240 in which the current replacement text appears.
242 Codes less than |
0200| are
7-bit |char| codes that represent themselves.
243 Some of the
7-bit codes will not be present
, however
, so we can
244 use them for special purposes. The following symbolic names are used
:
246 \yskip \hang |join| denotes the concatenation of adjacent items with no
247 space or line breaks allowed between them
(the \.
{@@\
&} operation of \.{CWEB}).
249 \hang |string| denotes the beginning or end of a string
, verbatim
250 construction or numerical constant.
251 @^ASCII code dependencies@
>
253 @d string
02 /* takes the place of extended ASCII \.
{\char2
} */
254 @d join
0177 /* takes the place of ASCII delete
*/
255 @d output_defs_flag
(2*024000-1)
257 @ The following procedure is used to enter a two-byte value into
258 |tok_mem| when a replacement text is being generated.
265 if
(tok_ptr
+2>tok_mem_end
) overflow
("token");
266 *tok_ptr
++=x
>>8; /* store high byte
*/
267 *tok_ptr
++=x
&0377; /* store low byte */
270 @
** Stacks for output. The output process uses a stack to keep track
271 of what is going on at different ``levels'' as the sections are being
272 written out. Entries on this stack have five parts
:
274 \yskip\hang |end_field| is the |tok_mem| location where the replacement
275 text of a particular level will end
;
277 \hang |byte_field| is the |tok_mem| location from which the next token
278 on a particular level will be read
;
280 \hang |name_field| points to the name corresponding to a particular level
;
282 \hang |repl_field| points to the replacement text currently being read
283 at a particular level
;
285 \hang |section_field| is the section number
, or zero if this is a macro.
287 \yskip\noindent The current values of these five quantities are referred to
288 quite frequently
, so they are stored in a separate place instead of in
289 the |stack| array. We call the current values |cur_end|
, |cur_byte|
,
290 |cur_name|
, |cur_repl|
, and |cur_section|.
292 The global variable |stack_ptr| tells how many levels of output are
293 currently in progress. The end of all output occurs when the stack is
294 empty
, i.e.
, when |stack_ptr
==stack|.
298 eight_bits
*end_field
; /* ending location of replacement text
*/
299 eight_bits
*byte_field
; /* present location within replacement text
*/
300 name_pointer name_field
; /* |byte_start| index for text being output
*/
301 text_pointer repl_field
; /* |tok_start| index for text being output
*/
302 sixteen_bits section_field
; /* section number or zero if not a section
*/
304 typedef output_state
*stack_pointer
;
306 @ @d cur_end cur_state.end_field
/* current ending location in |tok_mem|
*/
307 @d cur_byte cur_state.byte_field
/* location of next output byte in |tok_mem|
*/
308 @d cur_name cur_state.name_field
/* pointer to current name being expanded
*/
309 @d cur_repl cur_state.repl_field
/* pointer to current replacement text
*/
310 @d cur_section cur_state.section_field
/* current section number being expanded
*/
313 output_state cur_state
; /* |cur_end|
, |cur_byte|
, |cur_name|
, |cur_repl|
,
315 output_state stack
[stack_size
+1]; /* info for non-current levels
*/
316 stack_pointer stack_ptr
; /* first unused location in the output state stack
*/
317 stack_pointer stack_end
=stack
+stack_size
; /* end of |stack|
*/
319 @ To get the output process started
, we will perform the following
320 initialization steps. We may assume that |text_info-
>text_link| is nonzero
,
321 since it points to the \CEE
/ text in the first unnamed section that generates
322 code
; if there are no such sections
, there is nothing to output
, and an
323 error message will have been generated before we do any of the initialization.
325 @
<Initialize the output stacks@
>=
326 stack_ptr
=stack
+1; cur_name
=name_dir
; cur_repl
=text_info-
>text_link
+text_info
;
327 cur_byte
=cur_repl-
>tok_start
; cur_end
=(cur_repl
+1)->tok_start
; cur_section
=0;
329 @ When the replacement text for name |p| is to be inserted into the output
,
330 the following subroutine is called to save the old level of output and get
333 We assume that the \CEE
/ compiler can copy structures.
334 @^system dependencies@
>
338 push_level
(p
) /* suspends the current level
*/
341 if
(stack_ptr
==stack_end
) overflow
("stack");
342 *stack_ptr
=cur_state
;
344 if
(p
!=NULL) { /* |p
==NULL| means we are in |output_defs|
*/
345 cur_name
=p
; cur_repl
=(text_pointer
)p-
>equiv
;
346 cur_byte
=cur_repl-
>tok_start
; cur_end
=(cur_repl
+1)->tok_start
;
351 @ When we come to the end of a replacement text
, the |pop_level| subroutine
352 does the right thing
: It either moves to the continuation of this replacement
353 text or returns the state to the most recently stacked level.
357 pop_level
(flag
) /* do this when |cur_byte| reaches |cur_end|
*/
358 int flag
; /* |flag
==0| means we are in |output_defs|
*/
360 if
(flag
&& cur_repl->text_link<section_flag) { /* link to a continuation */
361 cur_repl
=cur_repl-
>text_link
+text_info
; /* stay on the same level
*/
362 cur_byte
=cur_repl-
>tok_start
; cur_end
=(cur_repl
+1)->tok_start
;
365 stack_ptr--
; /* go down to the previous level
*/
366 if
(stack_ptr
>stack
) cur_state
=*stack_ptr
;
369 @ The heart of the output procedure is the function |get_output|
,
370 which produces the next token of output and sends it on to the lower-level
371 function |out_char|. The main purpose of |get_output| is to handle the
372 necessary stacking and unstacking. It sends the value |section_number|
373 if the next output begins or ends the replacement text of some section
,
374 in which case |cur_val| is that section's number
(if beginning
) or the
375 negative of that value
(if ending
).
(A section number of
0 indicates
376 not the beginning or ending of a section
, but a \
&{\#line} command.)
377 And it sends the value |identifier|
378 if the next output is an identifier
, in which case
379 |cur_val| points to that identifier name.
381 @d section_number
0201 /* code returned by |get_output| for section numbers
*/
382 @d identifier
0202 /* code returned by |get_output| for identifiers
*/
385 int cur_val
; /* additional information corresponding to output token
*/
387 @ If |get_output| finds that no more output remains
, it returns with
389 @^high-bit character handling@
>
393 get_output
() /* sends next token to |out_char|
*/
395 sixteen_bits a
; /* value of current byte
*/
396 restart
: if
(stack_ptr
==stack
) return
;
397 if
(cur_byte
==cur_end
) {
398 cur_val
=-((int
)cur_section
); /* cast needed because of sign extension
*/
400 if
(cur_val
==0) goto restart
;
401 out_char
(section_number
); return
;
404 if
(out_state
==verbatim
&& a!=string && a!=constant && a!='\n')
405 C_putc
(a
); /* a high-bit character can occur in a string
*/
406 else if
(a
<0200) out_char
(a
); /* one-byte token
*/
408 a
=(a-0200
)*0400+*cur_byte
++;
409 switch
(a
/024000) { /* |
024000==(0250-0200)*0400|
*/
410 case
0: cur_val
=a
; out_char
(identifier
); break
;
411 case
1: if
(a
==output_defs_flag
) output_defs
();
412 else @
<Expand section |a-024000|
, |goto restart|@
>;
414 default
: cur_val
=a-050000
; if
(cur_val
>0) cur_section
=cur_val
;
415 out_char
(section_number
);
420 @ The user may have forgotten to give any \CEE
/ text for a section name
,
421 or the \CEE
/ text may have been associated with a different name by mistake.
423 @
<Expand section |a-...@
>=
426 if
((a
+name_dir
)->equiv
!=(char
*)text_info
) push_level
(a
+name_dir
);
428 printf
("\n! Not present: <");
429 print_section_name
(a
+name_dir
); err_print
(">");
430 @.Not present
: <section name
>@
>
435 @
* Producing the output.
436 The |get_output| routine above handles most of the complexity of output
437 generation
, but there are two further considerations that have a nontrivial
438 effect on \.
{CTANGLE
}'s algorithms.
441 we want to make sure that the output has spaces and line breaks in
442 the right places
(e.g.
, not in the middle of a string or a constant or an
443 identifier
, not at a `\.
{@@\
&}' position
444 where quantities are being joined together
, and certainly after an \.
=
445 because the \CEE
/ compiler thinks \.
{=-} is ambiguous
).
447 The output process can be in one of following states
:
449 \yskip\hang |num_or_id| means that the last item in the buffer is a number or
450 identifier
, hence a blank space or line break must be inserted if the next
451 item is also a number or identifier.
453 \yskip\hang |unbreakable| means that the last item in the buffer was followed
454 by the \.
{@@\
&} operation that inhibits spaces between it and the next item.
456 \yskip\hang |verbatim| means we're copying only character tokens
, and
457 that they are to be output exactly as stored. This is the case during
458 strings
, verbatim constructions and numerical constants.
460 \yskip\hang |post_slash| means we've just output a slash.
462 \yskip\hang |normal| means none of the above.
464 \yskip\noindent Furthermore
, if the variable |protect| is positive
, newlines
465 are preceded by a `\.\\'.
467 @d normal
0 /* non-unusual state
*/
468 @d num_or_id
1 /* state associated with numbers and identifiers
*/
469 @d post_slash
2 /* state following a \.
/ */
470 @d unbreakable
3 /* state associated with \.
{@@\
&} */
471 @d verbatim
4 /* state in the middle of a string
*/
474 eight_bits out_state
; /* current status of partial output
*/
475 boolean protect
; /* should newline characters be quoted?
*/
477 @ Here is a routine that is invoked when we want to output the current line.
478 During the output process
, |cur_line| equals the number of the next line
483 flush_buffer
() /* writes one line to output file
*/
486 if
(cur_line
% 100 == 0 && show_progress) {
488 if
(cur_line
% 500 == 0) printf
("%d",cur_line
);
489 update_terminal
; /* progress report
*/
494 @ Second
, we have modified the original \.
{TANGLE
} so that it will write output
496 If a section name is introduced in at least one place by \.
{@@
(}
497 instead of \.
{@@
<}, we treat it as the name of a file.
498 All these special sections are saved on a stack
, |output_files|.
499 We write them out after we've done the unnamed section.
503 name_pointer output_files
[max_files
];
504 name_pointer
*cur_out_file
, *end_output_files
, *an_output_file
;
505 char cur_section_name_char
; /* is it |'
<'| or |'
('|
*/
506 char output_file_name
[longest_name
]; /* name of the file
*/
508 @ We make |end_output_files| point just beyond the end of
509 |output_files|. The stack pointer
510 |cur_out_file| starts out there. Every time we see a new file
, we
511 decrement |cur_out_file| and then write it in.
513 cur_out_file
=end_output_files
=output_files
+max_files
;
515 @ @
<If it's not there
, add |cur_section_name| to the output file stack
, or
516 complain we're out of room@
>=
518 for
(an_output_file
=cur_out_file
;
519 an_output_file
<end_output_files
; an_output_file
++)
520 if
(*an_output_file
==cur_section_name
) break
;
521 if
(an_output_file
==end_output_files
) {
522 if
(cur_out_file
>output_files
)
523 *--cur_out_file
=cur_section_name
;
525 overflow
("output files");
530 @
* The big output switch. Here then is the routine that does the
541 @
<Initialize the output stacks@
>;
542 @
<Output macro definitions if appropriate@
>;
543 if
(text_info-
>text_link
==0 && cur_out_file==end_output_files) {
544 printf
("\n! No program text was specified."); mark_harmless
;
545 @.No program text...@
>
548 if
(cur_out_file
==end_output_files
) {
550 printf
("\nWriting the output file (%s):",C_file_name
);
554 printf
("\nWriting the output files:");
555 @.Writing the output...@
>
556 printf
(" (%s)",C_file_name
);
559 if
(text_info-
>text_link
==0) goto writeloop
;
561 while
(stack_ptr
>stack
) get_output
();
563 writeloop
: @
<Write all the named output files@
>;
564 if
(show_happiness
) printf
("\nDone.");
568 @ To write the named output files
, we proceed as for the unnamed
570 The only subtlety is that we have to open each one.
572 @
<Write all the named output files@
>=
573 for
(an_output_file
=end_output_files
; an_output_file
>cur_out_file
;) {
575 sprint_section_name
(output_file_name
,*an_output_file
);
577 C_file
=fopen
(output_file_name
,"w");
578 if
(C_file
==0) fatal
("! Cannot open output file:",output_file_name
);
579 @.Cannot open output file@
>
580 printf
("\n(%s)",output_file_name
); update_terminal
;
583 cur_name
= (*an_output_file
);
584 cur_repl
= (text_pointer
)cur_name-
>equiv
;
585 cur_byte
=cur_repl-
>tok_start
;
586 cur_end
=(cur_repl
+1)->tok_start
;
587 while
(stack_ptr
> stack
) get_output
();
591 @ If a \.
{@@h
} was not encountered in the input
,
592 we go through the list of replacement texts and copy the ones
593 that refer to macros
, preceded by the \.
{\#define
} preprocessor command.
595 @
<Output macro definitions if appropriate@
>=
596 if
(!output_defs_seen
)
600 boolean output_defs_seen
=0;
611 for
(cur_text
=text_info
+1; cur_text
<text_ptr
; cur_text
++)
612 if
(cur_text-
>text_link
==0) { /* |cur_text| is the text for a macro
*/
613 cur_byte
=cur_text-
>tok_start
;
614 cur_end
=(cur_text
+1)->tok_start
;
615 C_printf
("%s","#define ");
617 protect
=1; /* newlines should be preceded by |'\\'|
*/
618 while
(cur_byte
<cur_end
) {
620 if
(cur_byte
==cur_end
&& a=='\n') break; /* disregard a final newline */
621 if
(out_state
==verbatim
&& a!=string && a!=constant && a!='\n')
622 C_putc
(a
); /* a high-bit character can occur in a string
*/
623 @^high-bit character handling@
>
624 else if
(a
<0200) out_char
(a
); /* one-byte token
*/
626 a
=(a-0200
)*0400+*cur_byte
++;
627 if
(a
<024000) { /* |
024000==(0250-0200)*0400|
*/
628 cur_val
=a
; out_char
(identifier
);
630 else if
(a
<050000) { confusion
("macro defs have strange char");}
632 cur_val
=a-050000
; cur_section
=cur_val
; out_char
(section_number
);
643 @ A many-way switch is used to send the output. Note that this function
644 is not called if |out_state
==verbatim|
, except perhaps with arguments
645 |'\n'|
(protect the newline
), |string|
(end the string
), or |constant|
649 static void out_char
();
656 char
*j
, *k
; /* pointer into |byte_mem|
*/
659 case '\n'
: if
(protect
&& out_state!=verbatim) C_putc(' ');
660 if
(protect || out_state
==verbatim
) C_putc
('\\'
);
661 flush_buffer
(); if
(out_state
!=verbatim
) out_state
=normal
; break
;
662 @
/@t\
4@
>@
<Case of an identifier@
>;
663 @
/@t\
4@
>@
<Case of a section number@
>;
664 @
/@t\
4@
>@
<Cases like \.
{!=}@
>;
665 case '
='
: case '
>'
: C_putc
(cur_char
); C_putc
(' '
);
666 out_state
=normal
; break
;
667 case join
: out_state
=unbreakable
; break
;
668 case constant
: if
(out_state
==verbatim
) {
669 out_state
=num_or_id
; break
;
671 if
(out_state
==num_or_id
) C_putc
(' '
); out_state
=verbatim
; break
;
672 case string
: if
(out_state
==verbatim
) out_state
=normal
;
673 else out_state
=verbatim
; break
;
674 case '
/'
: C_putc
('
/'
); out_state
=post_slash
; break
;
675 case '
*'
: if
(out_state
==post_slash
) C_putc
(' '
);
677 default
: C_putc
(cur_char
); out_state
=normal
; break
;
681 @ @
<Cases like \.
{!=}@
>=
682 case plus_plus
: C_putc
('
+'
); C_putc
('
+'
); out_state
=normal
; break
;
683 case minus_minus
: C_putc
('
-'
); C_putc
('
-'
); out_state
=normal
; break
;
684 case minus_gt
: C_putc
('
-'
); C_putc
('
>'
); out_state
=normal
; break
;
685 case gt_gt
: C_putc
('
>'
); C_putc
('
>'
); out_state
=normal
; break
;
686 case eq_eq
: C_putc
('
='
); C_putc
('
='
); out_state
=normal
; break
;
687 case lt_lt
: C_putc
('
<'
); C_putc
('
<'
); out_state
=normal
; break
;
688 case gt_eq
: C_putc
('
>'
); C_putc
('
='
); out_state
=normal
; break
;
689 case lt_eq
: C_putc
('
<'
); C_putc
('
='
); out_state
=normal
; break
;
690 case not_eq
: C_putc
('
!'
); C_putc
('
='
); out_state
=normal
; break
;
691 case and_and
: C_putc
('
&'); C_putc('&'); out_state=normal; break;
692 case or_or
: C_putc
('|'
); C_putc
('|'
); out_state
=normal
; break
;
693 case dot_dot_dot
: C_putc
('.'
); C_putc
('.'
); C_putc
('.'
); out_state
=normal
;
695 case colon_colon
: C_putc
('
:'
); C_putc
('
:'
); out_state
=normal
; break
;
696 case period_ast
: C_putc
('.'
); C_putc
('
*'
); out_state
=normal
; break
;
697 case minus_gt_ast
: C_putc
('
-'
); C_putc
('
>'
); C_putc
('
*'
); out_state
=normal
;
700 @ When an identifier is output to the \CEE
/ file
, characters in the
701 range
128--255 must be changed into something else
, so the \CEE
/
702 compiler won't complain. By default
, \.
{CTANGLE
} converts the
703 character with code $
16 x
+y$ to the three characters `\.
X$xy$'
, but
704 a different transliteration table can be specified. Thus a German
705 might want
{\it gr\
"un\/} to appear as a still readable \.{gruen}.
706 This makes debugging a lot less confusing.
708 @d translit_length 10
711 char translit[128][translit_length];
716 for (i=0;i<128;i++) sprintf(translit[i],"X%02X",(unsigned)(128+i));
719 @ @<Case of an identifier@>=
721 if (out_state==num_or_id) C_putc(' ');
722 j=(cur_val+name_dir)->byte_start;
723 k=(cur_val+name_dir+1)->byte_start;
725 if ((unsigned char)(*j)<0200) C_putc(*j);
726 @^high-bit character handling@>
727 else C_printf("%s
",translit[(unsigned char)(*j)-0200]);
730 out_state=num_or_id; break;
732 @ @<Case of a sec...@>=
734 if (cur_val>0) C_printf("/*%d
:*/",cur_val);
735 else if(cur_val<0) C_printf("/*:%d
*/",-cur_val);
737 cur_byte +=4; /* skip line number and file name */
743 a+=*cur_byte++; /* gets the line number */
744 C_printf("\n#line
%d \
"",a
);
747 cur_val
=0400*(cur_val-0200
)+ *cur_byte
++; /* points to the file name
*/
748 for
(j
=(cur_val
+name_dir
)->byte_start
, k
=(cur_val
+name_dir
+1)->byte_start
;
750 if
(*j
=='\\' ||
*j
=='
"') C_putc('\\');
753 C_printf("%s
","\
"\n");
757 @
** Introduction to the input phase.
758 We have now seen that \.
{CTANGLE
} will be able to output the full
759 \CEE
/ program
, if we can only get that program into the byte memory in
760 the proper format. The input process is something like the output process
761 in reverse
, since we compress the text as we read it in and we expand it
764 There are three main input routines. The most interesting is the one that gets
765 the next token of a \CEE
/ text
; the other two are used to scan rapidly past
766 \TEX
/ text in the \.
{CWEB
} source code. One of the latter routines will jump to
767 the next token that starts with `\.
{@@
}'
, and the other skips to the end
770 @ Control codes in \.
{CWEB
} begin with `\.
{@@
}'
, and the next character
771 identifies the code. Some of these are of interest only to \.
{CWEAVE
},
772 so \.
{CTANGLE
} ignores them
; the others are converted by \.
{CTANGLE
} into
773 internal code numbers by the |ccode| table below. The ordering
774 of these internal code numbers has been chosen to simplify the program logic
;
775 larger numbers are given to the control codes that denote more significant
778 @d ignore
0 /* control code of no interest to \.
{CTANGLE
} */
779 @d ord
0302 /* control code for `\.
{@@'
}'
*/
780 @d control_text
0303 /* control code for `\.
{@@t
}'
, `\.
{@@\^
}'
, etc.
*/
781 @d translit_code
0304 /* control code for `\.
{@@l
}'
*/
782 @d output_defs_code
0305 /* control code for `\.
{@@h
}'
*/
783 @d format_code
0306 /* control code for `\.
{@@f
}'
*/
784 @d definition
0307 /* control code for `\.
{@@d
}'
*/
785 @d begin_C
0310 /* control code for `\.
{@@c
}'
*/
786 @d section_name
0311 /* control code for `\.
{@@
<}'
*/
787 @d new_section
0312 /* control code for `\.
{@@\
}' and `\.
{@@
*}'
*/
790 eight_bits ccode
[256]; /* meaning of a char following \.
{@@
} */
793 int c
; /* must be |int| so the |for| loop will end
*/
794 for
(c
=0; c
<256; c
++) ccode
[c
]=ignore
;
795 ccode
[' '
]=ccode
['\t'
]=ccode
['\n'
]=ccode
['\v'
]=ccode
['\r'
]=ccode
['\f'
]
796 =ccode
['
*'
]=new_section
;
797 ccode
['@@'
]='@@'
; ccode
['
='
]=string
;
798 ccode
['d'
]=ccode
['D'
]=definition
;
799 ccode
['f'
]=ccode
['F'
]=ccode
['s'
]=ccode
['S'
]=format_code
;
800 ccode
['c'
]=ccode
['C'
]=ccode
['p'
]=ccode
['P'
]=begin_C
;
801 ccode
['^'
]=ccode
['
:'
]=ccode
['.'
]=ccode
['t'
]=ccode
['T'
]=
802 ccode
['q'
]=ccode
['Q'
]=control_text
;
803 ccode
['h'
]=ccode
['H'
]=output_defs_code
;
804 ccode
['l'
]=ccode
['L'
]=translit_code
;
806 ccode
['
<'
]=ccode
['
('
]=section_name
;
810 @ The |skip_ahead| procedure reads through the input at fairly high speed
811 until finding the next non-ignorable control code
, which it returns.
815 skip_ahead
() /* skip to next control code
*/
817 eight_bits c
; /* control code found
*/
819 if
(loc
>limit
&& (get_line()==0)) return(new_section);
821 while
(*loc
!='@@'
) loc
++;
823 loc
++; c
=ccode
[(eight_bits
)*loc
]; loc
++;
824 if
(c
!=ignore ||
*(loc-1
)=='
>'
) return
(c
);
829 @ The |skip_comment| procedure reads through the input at somewhat high
830 speed in order to pass over comments
, which \.
{CTANGLE
} does not transmit
831 to the output. If the comment is introduced by \.
{/*}, |skip_comment|
832 proceeds until finding the end-comment token \.
{*/} or a newline
; in the
833 latter case |skip_comment| will be called again by |get_next|
, since the
834 comment is not finished. This is done so that each newline in the
835 \CEE
/ part of a section is copied to the output
; otherwise the \
&{\#line}
836 commands inserted into the \CEE
/ file by the output routines become useless.
837 On the other hand
, if the comment is introduced by \.
{//} (i.e.
, if it
838 is a \CPLUSPLUS
/ ``short comment''
), it always is simply delimited by the next
839 newline. The boolean argument |is_long_comment| distinguishes between
840 the two types of comments.
842 If |skip_comment| comes to the end of the section
, it prints an error message.
843 No comment
, long or short
, is allowed to contain `\.
{@@\
}' or `\.
{@@
*}'.
846 boolean comment_continues
=0; /* are we scanning a comment?
*/
849 int skip_comment
(is_long_comment
) /* skips over comments
*/
850 boolean is_long_comment
;
852 char c
; /* current character
*/
855 if
(is_long_comment
) {
856 if
(get_line
()) return
(comment_continues
=1);
858 err_print
("! Input ended in mid-comment");
859 @.Input ended in mid-comment@
>
860 return
(comment_continues
=0);
863 else return
(comment_continues
=0);
866 if
(is_long_comment
&& c=='*' && *loc=='/') {
867 loc
++; return
(comment_continues
=0);
870 if
(ccode
[(eight_bits
)*loc
]==new_section
) {
871 err_print
("! Section name ended in mid-comment"); loc--
;
872 @.Section name ended in mid-comment@
>
873 return
(comment_continues
=0);
880 @
* Inputting the next token.
885 name_pointer cur_section_name
; /* name of section just scanned
*/
886 int no_where
; /* suppress |print_where|?
*/
889 #include
<ctype.h
> /* definition of |isalpha|
, |isdigit| and so on
*/
890 #include
<stdlib.h
> /* definition of |exit|
*/
892 @ As one might expect
, |get_next| consists mostly of a big switch
893 that branches to the various special cases that can arise.
895 @d isxalpha
(c
) ((c
)=='_' ||
(c
)=='$'
)
896 /* non-alpha characters allowed in identifier
*/
897 @d ishigh
(c
) ((unsigned char
)(c
)>0177)
898 @^high-bit character handling@
>
902 get_next
() /* produces the next input token
*/
904 static int preprocessing
=0;
905 eight_bits c
; /* the current character
*/
908 if
(preprocessing
&& *(limit-1)!='\\') preprocessing=0;
909 if
(get_line
()==0) return
(new_section
);
910 else if
(print_where
&& !no_where) {
912 @
<Insert the line number into |tok_mem|@
>;
917 if
(comment_continues ||
(c
=='
/'
&& (*(loc+1)=='*' || *(loc+1)=='/'))) {
918 skip_comment
(comment_continues||
*(loc
+1)=='
*'
);
919 /* scan to end of comment or newline
*/
920 if
(comment_continues
) return
('\n'
);
924 if
(xisdigit
(c
) || c
=='.'
) @
<Get a constant@
>@
;
925 else if
(c
=='\'' || c
=='
"' || (c=='L'&&(*loc=='\'' || *loc=='"'
)))
927 else if
(isalpha
(c
) || isxalpha
(c
) || ishigh
(c
))
928 @
<Get an identifier@
>@
;
929 else if
(c
=='@@'
) @
<Get control code and possible section name@
>@
;
930 else if
(xisspace
(c
)) {
931 if
(!preprocessing || loc
>limit
) continue
;
932 /* we don't want a blank after a final backslash
*/
933 else return
(' '
); /* ignore spaces and tabs
, unless preprocessing
*/
935 else if
(c
=='#'
&& loc==buffer+1) preprocessing=1;
936 mistake
: @
<Compress two-symbol operator@
>@
;
941 @ The following code assigns values to the combinations \.
{++},
942 \.
{--}, \.
{->}, \.
{>=}, \.
{<=}, \.
{==}, \.
{<<}, \.
{>>}, \.
{!=}, \.
{||
} and
943 \.
{\
&\&}, and to the \CPLUSPLUS/
944 combinations \.
{...
}, \.
{::}, \.
{.
*} and \.
{->*}.
945 The compound assignment operators
(e.g.
, \.
{+=}) are
946 treated as separate tokens.
948 @d compress
(c
) if
(loc
++<=limit
) return
(c
)
952 case '
+'
: if
(*loc
=='
+'
) compress
(plus_plus
); break
;
953 case '
-'
: if
(*loc
=='
-'
) {compress
(minus_minus
);}
954 else if
(*loc
=='
>'
) if
(*(loc
+1)=='
*'
) {loc
++; compress
(minus_gt_ast
);}
955 else compress
(minus_gt
); break
;
956 case '.'
: if
(*loc
=='
*'
) {compress
(period_ast
);}
957 else if
(*loc
=='.'
&& *(loc+1)=='.') {
958 loc
++; compress
(dot_dot_dot
);
961 case '
:'
: if
(*loc
=='
:'
) compress
(colon_colon
); break
;
962 case '
='
: if
(*loc
=='
='
) compress
(eq_eq
); break
;
963 case '
>'
: if
(*loc
=='
='
) {compress
(gt_eq
);}
964 else if
(*loc
=='
>'
) compress
(gt_gt
); break
;
965 case '
<'
: if
(*loc
=='
='
) {compress
(lt_eq
);}
966 else if
(*loc
=='
<'
) compress
(lt_lt
); break
;
967 case '
&': if (*loc=='&') compress(and_and); break;
968 case '|'
: if
(*loc
=='|'
) compress
(or_or
); break
;
969 case '
!'
: if
(*loc
=='
='
) compress
(not_eq
); break
;
972 @ @
<Get an identifier@
>= {
974 while
(isalpha
(*++loc
) || isdigit
(*loc
) || isxalpha
(*loc
) || ishigh
(*loc
));
975 id_loc
=loc
; return
(identifier
);
978 @ @
<Get a constant@
>= {
980 if
(*id_first
=='.'
&& !xisdigit(*loc)) goto mistake; /* not a constant */
981 if
(*id_first
=='
0'
) {
982 if
(*loc
=='x' ||
*loc
=='
X'
) { /* hex constant
*/
983 loc
++; while
(xisxdigit
(*loc
)) loc
++; goto found
;
986 while
(xisdigit
(*loc
)) loc
++;
989 while
(xisdigit
(*loc
)) loc
++;
991 if
(*loc
=='e' ||
*loc
=='E'
) { /* float constant
*/
992 if
(*++loc
=='
+' ||
*loc
=='
-'
) loc
++;
993 while
(xisdigit
(*loc
)) loc
++;
995 found
: while
(*loc
=='u' ||
*loc
=='U' ||
*loc
=='l' ||
*loc
=='L'
996 ||
*loc
=='f' ||
*loc
=='F'
) loc
++;
1001 @ \CEE
/ strings and character constants
, delimited by double and single
1002 quotes
, respectively
, can contain newlines or instances of their own
1003 delimiters if they are protected by a backslash. We follow this
1004 convention
, but do not allow the string to be longer than |longest_name|.
1007 char delim
= c
; /* what started the string
*/
1008 id_first
= section_text
+1;
1009 id_loc
= section_text
; *++id_loc
=delim
;
1010 if
(delim
=='L'
) { /* wide character constant
*/
1011 delim
=*loc
++; *++id_loc
=delim
;
1015 if
(*(limit-1
)!='\\'
) {
1016 err_print
("! String didn't end"); loc
=limit
; break
;
1017 @.String didn't end@
>
1020 err_print
("! Input ended in middle of string"); loc
=buffer
; break
;
1021 @.Input ended in middle of string@
>
1023 else if
(++id_loc
<=section_text_end
) *id_loc
='\n'
; /* will print as
1026 if
((c
=*loc
++)==delim
) {
1027 if
(++id_loc
<=section_text_end
) *id_loc
=c
;
1031 if
(loc
>=limit
) continue
;
1032 if
(++id_loc
<=section_text_end
) *id_loc
= '\\'
;
1035 if
(++id_loc
<=section_text_end
) *id_loc
=c
;
1037 if
(id_loc
>=section_text_end
) {
1038 printf
("\n! String too long: ");
1040 term_write
(section_text
+1,25);
1047 @ After an \.
{@@
} sign has been scanned
, the next character tells us
1048 whether there is more work to do.
1050 @
<Get control code and possible section name@
>= {
1051 c
=ccode
[(eight_bits
)*loc
++];
1053 case ignore
: continue
;
1054 case translit_code
: err_print
("! Use @@l in limbo only"); continue
;
1055 @.Use @@l in limbo...@
>
1056 case control_text
: while
((c
=skip_ahead
())=='@@'
);
1057 /* only \.
{@@@@
} and \.
{@@
>} are expected
*/
1059 err_print
("! Double @@ should be used in control text");
1060 @.Double @@ should be used...@
>
1063 cur_section_name_char
=*(loc-1
);
1064 @
<Scan the section name and make |cur_section_name| point to it@
>;
1065 case string
: @
<Scan a verbatim string@
>;
1066 case ord
: @
<Scan an ASCII constant@
>;
1071 @ After scanning a valid ASCII constant that follows
1072 \.
{@@'
}, this code plows ahead until it finds the next single quote.
1073 (Special care is taken if the quote is part of the constant.
)
1074 Anything after a valid ASCII constant is ignored
;
1075 thus
, \.
{@@'\\nopq'
} gives the same result as \.
{@@'\\n'
}.
1077 @
<Scan an ASCII constant@
>=
1080 if
(*++loc
=='\''
) loc
++;
1082 while
(*loc
!='\''
) {
1085 err_print
("! Double @@ should be used in ASCII constant");
1086 @.Double @@ should be used...@
>
1091 err_print
("! String didn't end"); loc
=limit-1
; break
;
1092 @.String didn't end@
>
1098 @ @
<Scan the section name...@
>= {
1099 char
*k
; /* pointer into |section_text|
*/
1100 @
<Put section name into |section_text|@
>;
1101 if
(k-section_text
>3 && strncmp(k-2,"...",3)==0)
1102 cur_section_name
=section_lookup
(section_text
+1,k-3
,1); /* 1 means is a prefix
*/
1103 else cur_section_name
=section_lookup
(section_text
+1,k
,0);
1104 if
(cur_section_name_char
=='
('
)
1105 @
<If it's not there
, add |cur_section_name| to the output file stack
, or
1106 complain we're out of room@
>;
1107 return
(section_name
);
1110 @ Section names are placed into the |section_text| array with consecutive spaces
,
1111 tabs
, and carriage-returns replaced by single spaces. There will be no
1112 spaces at the beginning or the end.
(We set |section_text
[0]=' '| to facilitate
1113 this
, since the |section_lookup| routine uses |section_text
[1]| as the first
1114 character of the name.
)
1116 @
<Set init...@
>=section_text
[0]=' '
;
1118 @ @
<Put section name...@
>=
1121 if
(loc
>limit
&& get_line()==0) {
1122 err_print
("! Input ended in section name");
1123 @.Input ended in section name@
>
1124 loc
=buffer
+1; break
;
1127 @
<If end of name or erroneous nesting
, |break|@
>;
1128 loc
++; if
(k
<section_text_end
) k
++;
1130 c
=' '
; if
(*(k-1
)==' '
) k--
;
1134 if
(k
>=section_text_end
) {
1135 printf
("\n! Section name too long: ");
1136 @.Section name too long@
>
1137 term_write
(section_text
+1,25);
1138 printf
("..."); mark_harmless
;
1140 if
(*k
==' '
&& k>section_text) k--;
1142 @ @
<If end of name or erroneous nesting
,...@
>=
1148 if
(ccode
[(eight_bits
)c
]==new_section
) {
1149 err_print
("! Section name didn't end"); break
;
1150 @.Section name didn't end@
>
1152 if
(ccode
[(eight_bits
)c
]==section_name
) {
1153 err_print
("! Nesting of section names not allowed"); break
;
1154 @.Nesting of section names...@
>
1156 *(++k
)='@@'
; loc
++; /* now |c
==*loc| again
*/
1159 @ At the present point in the program we
1160 have |
*(loc-1
)==string|
; we set |id_first| to the beginning
1161 of the string itself
, and |id_loc| to its ending-plus-one location in the
1162 buffer. We also set |loc| to the position just after the ending delimiter.
1164 @
<Scan a verbatim string@
>= {
1165 id_first
=loc
++; *(limit
+1)='@@'
; *(limit
+2)='
>'
;
1166 while
(*loc
!='@@' ||
*(loc
+1)!='
>'
) loc
++;
1167 if
(loc
>=limit
) err_print
("! Verbatim string didn't end");
1168 @.Verbatim string didn't end@
>
1173 @
* Scanning a macro definition.
1174 The rules for generating the replacement texts corresponding to macros and
1175 \CEE
/ texts of a section are almost identical
; the only differences are that
1177 \yskip \item
{a
)}Section names are not allowed in macros
;
1178 in fact
, the appearance of a section name terminates such macros and denotes
1179 the name of the current section.
1181 \item
{b
)}The symbols \.
{@@d
} and \.
{@@f
} and \.
{@@c
} are not allowed after
1182 section names
, while they terminate macro definitions.
1184 \item
{c
)}Spaces are inserted after right parentheses in macros
, because the
1185 ANSI \CEE
/ preprocessor sometimes requires it.
1187 \yskip Therefore there is a single procedure |scan_repl| whose parameter
1188 |t| specifies either |macro| or |section_name|. After |scan_repl| has
1189 acted
, |cur_text| will point to the replacement text just generated
, and
1190 |next_control| will contain the control code that terminated the activity.
1193 @d app_repl
(c
) {if
(tok_ptr
==tok_mem_end
) overflow
("token"); *tok_ptr
++=c
;}
1196 text_pointer cur_text
; /* replacement text formed by |scan_repl|
*/
1197 eight_bits next_control
;
1201 scan_repl
(t
) /* creates a replacement text
*/
1204 sixteen_bits a
; /* the current token
*/
1205 if
(t
==section_name
) {@
<Insert the line number into |tok_mem|@
>;}
1206 while
(1) switch
(a
=get_next
()) {
1207 @
<In cases that |a| is a non-|char| token
(|identifier|
,
1208 |section_name|
, etc.
), either process it and change |a| to a byte
1209 that should be stored
, or |continue| if |a| should be ignored
,
1210 or |goto done| if |a| signals the end of this replacement text@
>@
;
1211 case '
)'
: app_repl
(a
);
1212 if
(t
==macro
) app_repl
(' '
);
1214 default
: app_repl
(a
); /* store |a| in |tok_mem|
*/
1216 done
: next_control
=(eight_bits
) a
;
1217 if
(text_ptr
>text_info_end
) overflow
("text");
1218 cur_text
=text_ptr
; (++text_ptr
)->tok_start
=tok_ptr
;
1221 @ Here is the code for the line number
: first a |sixteen_bits| equal
1222 to |
0150000|
; then the numeric line number
; then a pointer to the
1225 @
<Insert the line...@
>=
1226 store_two_bytes
(0150000);
1227 if
(changing
) id_first
=change_file_name
;
1228 else id_first
=cur_file_name
;
1229 id_loc
=id_first
+strlen
(id_first
);
1230 if
(changing
) store_two_bytes
((sixteen_bits
)change_line
);
1231 else store_two_bytes
((sixteen_bits
)cur_line
);
1232 {int a
=id_lookup
(id_first
,id_loc
,0)-name_dir
; app_repl
((a
/ 0400)+0200);
1233 app_repl
(a
% 0400);}
1235 @ @
<In cases that |a| is...@
>=
1236 case identifier
: a
=id_lookup
(id_first
,id_loc
,0)-name_dir
;
1237 app_repl
((a
/ 0400)+0200);
1238 app_repl
(a
% 0400); break
;
1239 case section_name
: if
(t
!=section_name
) goto done
;
1241 @
<Was an `@@' missed here?@
>;
1242 a
=cur_section_name-name_dir
;
1243 app_repl
((a
/ 0400)+0250);
1245 @
<Insert the line number into |tok_mem|@
>; break
;
1247 case output_defs_code
: if
(t
!=section_name
) err_print
("! Misplaced @@h");
1252 app_repl
((a
/ 0400)+0200);
1254 @
<Insert the line number into |tok_mem|@
>;
1257 case constant
: case string
:
1258 @
<Copy a string or verbatim construction or numerical constant@
>;
1260 @
<Copy an ASCII constant@
>;
1261 case definition
: case format_code
: case begin_C
: if
(t
!=section_name
) goto done
;
1263 err_print
("! @@d, @@f and @@c are ignored in C text"); continue
;
1264 @.@@d
, @@f and @@c are ignored in C text@
>
1266 case new_section
: goto done
;
1268 @ @
<Was an `@@'...@
>= {
1270 while
(*try_loc
==' '
&& try_loc<limit) try_loc++;
1271 if
(*try_loc
=='
+'
&& try_loc<limit) try_loc++;
1272 while
(*try_loc
==' '
&& try_loc<limit) try_loc++;
1273 if
(*try_loc
=='
='
) err_print
("! Missing `@@ ' before a named section");
1274 @.Missing `@@ '...@
>
1275 /* user who isn't defining a section should put newline after the name
,
1276 as explained in the manual
*/
1279 @ @
<Copy a string...@
>=
1280 app_repl
(a
); /* |string| or |constant|
*/
1281 while
(id_first
< id_loc
) { /* simplify \.
{@@@@
} pairs
*/
1282 if
(*id_first
=='@@'
) {
1283 if
(*(id_first
+1)=='@@'
) id_first
++;
1284 else err_print
("! Double @@ should be used in string");
1285 @.Double @@ should be used...@
>
1287 app_repl
(*id_first
++);
1291 @ This section should be rewritten on machines that don't use ASCII
1293 @^ASCII code dependencies@
>
1295 @
<Copy an ASCII constant@
>= {
1296 int c
=(eight_bits
) *id_first
;
1299 if
(c
>='
0'
&& c<='7') {
1301 if
(*(id_first
+1)>='
0'
&& *(id_first+1)<='7') {
1302 c
=8*c
+*(++id_first
) - '
0'
;
1303 if
(*(id_first
+1)>='
0'
&& *(id_first+1)<='7' && c<32)
1304 c
=8*c
+*(++id_first
)- '
0'
;
1308 case 't'
:c
='\t'
;@
+break
;
1309 case 'n'
:c
='\n'
;@
+break
;
1310 case 'b'
:c
='\b'
;@
+break
;
1311 case 'f'
:c
='\f'
;@
+break
;
1312 case 'v'
:c
='\v'
;@
+break
;
1313 case 'r'
:c
='\r'
;@
+break
;
1314 case 'a'
:c
='\
7'
;@
+break
;
1315 case '?'
:c
='?'
;@
+break
;
1317 if
(xisdigit
(*(id_first
+1))) c
=*(++id_first
)-'
0'
;
1318 else if
(xisxdigit
(*(id_first
+1))) {
1320 c
=toupper
(*id_first
)-'A'
+10;
1322 if
(xisdigit
(*(id_first
+1))) c
=16*c
+*(++id_first
)-'
0'
;
1323 else if
(xisxdigit
(*(id_first
+1))) {
1325 c
=16*c
+toupper
(*id_first
)-'A'
+10;
1328 case '\\'
:c
='\\'
;@
+break
;
1329 case '\''
:c
='\''
;@
+break
;
1330 case '\
"':c='\"'
;@
+break
;
1331 default
: err_print
("! Unrecognized escape sequence");
1332 @.Unrecognized escape sequence@
>
1335 /* at this point |c| should have been converted to its ASCII code number
*/
1337 if
(c
>=100) app_repl
('
0'
+c
/100);
1338 if
(c
>=10) app_repl
('
0'
+(c
/10)%10);
1344 @
* Scanning a section.
1345 The |scan_section| procedure starts when `\.
{@@\
}' or `\.
{@@
*}' has been
1346 sensed in the input
, and it proceeds until the end of that section. It
1347 uses |section_count| to keep track of the current section number
; with luck
,
1348 \.
{CWEAVE
} and \.
{CTANGLE
} will both assign the same numbers to sections.
1351 extern sixteen_bits section_count
; /* the current section number
*/
1353 @ The body of |scan_section| is a loop where we look for control codes
1354 that are significant to \.
{CTANGLE
}: those
1355 that delimit a definition
, the \CEE
/ part of a module
, or a new module.
1361 name_pointer p
; /* section name for the current section
*/
1362 text_pointer q
; /* text for the current section
*/
1363 sixteen_bits a
; /* token for left-hand side of definition
*/
1364 section_count
++; @
+ no_where
=1;
1365 if
(*(loc-1
)=='
*'
&& show_progress) { /* starred section */
1366 printf
("*%d",section_count
); update_terminal
;
1370 @
<Skip ahead until |next_control| corresponds to \.
{@@d
}, \.
{@@
<},
1371 \.
{@@\
} or the like@
>;
1372 if
(next_control
== definition
) { /* \.
{@@d
} */
1373 @
<Scan a definition@
>@
;
1376 if
(next_control
== begin_C
) { /* \.
{@@c
} or \.
{@@p
} */
1379 if
(next_control
== section_name
) { /* \.
{@@
<} or \.
{@@
(} */
1381 @
<If section is not being defined
, |continue| @
>;
1384 return
; /* \.
{@@\
} or \.
{@@
*} */
1386 no_where
=print_where
=0;
1387 @
<Scan the \CEE
/ part of the current section@
>;
1390 @ At the top of this loop
, if |next_control
==section_name|
, the
1391 section name has already been scanned
(see |@
<Get control code
1392 and...@
>|
). Thus
, if we encounter |next_control
==section_name| in the
1393 skip-ahead process
, we should likewise scan the section name
, so later
1394 processing will be the same in both cases.
1396 @
<Skip ahead until |next_control| ...@
>=
1397 while
(next_control
<definition
)
1398 /* |definition| is the lowest of the ``significant'' codes
*/
1399 if
((next_control
=skip_ahead
())==section_name
){
1400 loc-
=2; next_control
=get_next
();
1403 @ @
<Scan a definition@
>= {
1404 while
((next_control
=get_next
())=='\n'
); /*allow newline before definition
*/
1405 if
(next_control
!=identifier
) {
1406 err_print
("! Definition flushed, must start with identifier");
1407 @.Definition flushed...@
>
1410 app_repl
(((a
=id_lookup
(id_first
,id_loc
,0)-name_dir
) / 0400)+0200);
1411 /* append the lhs
*/
1413 if
(*loc
!='
('
) { /* identifier must be separated from replacement text
*/
1414 app_repl
(string
); app_repl
(' '
); app_repl
(string
);
1417 cur_text-
>text_link
=0; /* |text_link
==0| characterizes a macro
*/
1420 @ If the section name is not followed by \.
{=} or \.
{+=}, no \CEE
/
1421 code is forthcoming
: the section is being cited
, not being
1422 defined. This use is illegal after the definition part of the
1423 current section has started
, except inside a comment
, but
1424 \.
{CTANGLE
} does not enforce this rule
; it simply ignores the offending
1425 section name and everything following it
, up to the next significant
1428 @
<If section is not being defined
, |continue| @
>=
1429 while
((next_control
=get_next
())=='
+'
); /* allow optional \.
{+=} */
1430 if
(next_control
!='
='
&& next_control!=eq_eq)
1433 @ @
<Scan the \CEE
/...@
>=
1434 @
<Insert the section number into |tok_mem|@
>;
1435 scan_repl
(section_name
); /* now |cur_text| points to the replacement text
*/
1436 @
<Update the data structure so that the replacement text is accessible@
>;
1438 @ @
<Insert the section number...@
>=
1439 store_two_bytes
((sixteen_bits
)(0150000+section_count
));
1440 /* |
0150000==0320*0400|
*/
1442 @ @
<Update the data...@
>=
1443 if
(p
==name_dir||p
==0) { /* unnamed section
, or bad section name
*/
1444 (last_unnamed
)->text_link
=cur_text-text_info
; last_unnamed
=cur_text
;
1446 else if
(p-
>equiv
==(char
*)text_info
) p-
>equiv
=(char
*)cur_text
;
1447 /* first section of this name
*/
1449 q
=(text_pointer
)p-
>equiv
;
1450 while
(q-
>text_link
<section_flag
)
1451 q
=q-
>text_link
+text_info
; /* find end of list
*/
1452 q-
>text_link
=cur_text-text_info
;
1454 cur_text-
>text_link
=section_flag
;
1455 /* mark this replacement text as a nonmacro
*/
1467 while
(!input_has_ended
) scan_section
();
1472 @ Only a small subset of the control codes is legal in limbo
, so limbo
1473 processing is straightforward.
1484 if
(loc
>limit
&& get_line()==0) return;
1486 while
(*loc
!='@@'
) loc
++;
1489 if
(ccode
[(eight_bits
)c
]==new_section
) break
;
1490 switch
(ccode
[(eight_bits
)c
]) {
1491 case translit_code
: @
<Read in transliteration of a character@
>; break
;
1492 case format_code
: case '@@'
: break
;
1493 case control_text
: if
(c
=='q' || c
=='Q'
) {
1494 while
((c
=skip_ahead
())=='@@'
);
1496 err_print
("! Double @@ should be used in control text");
1497 @.Double @@ should be used...@
>
1499 } /* otherwise fall through
*/
1500 default
: err_print
("! Double @@ should be used in limbo");
1501 @.Double @@ should be used...@
>
1507 @ @
<Read in transliteration of a character@
>=
1508 while
(xisspace
(*loc
)&&loc<limit) loc++;
1510 if
(loc
>limit ||
!xisxdigit
(*(loc-3
)) ||
!xisxdigit
(*(loc-2
)) @|
1511 ||
(*(loc-3
)>='
0'
&& *(loc-3)<='7') || !xisspace(*(loc-1)))
1512 err_print
("! Improper hex number following @@l");
1513 @.Improper hex number...@
>
1517 sscanf
(loc-3
,"%x",&i);
1518 while
(xisspace
(*loc
)&&loc<limit) loc++;
1520 while
(loc
<limit
&&(xisalpha(*loc)||xisdigit(*loc)||*loc=='_')) loc++;
1521 if
(loc-beg
>=translit_length
)
1522 err_print
("! Replacement string in @@l too long");
1523 @.Replacement string in @@l...@
>
1525 strncpy
(translit
[i-0200
],beg
,loc-beg
);
1526 translit
[i-0200
][loc-beg
]='\
0'
;
1530 @ Because on some systems the difference between two pointers is a |long|
1531 but not an |int|
, we use \.
{\
%ld
} to print these quantities.
1536 printf
("\nMemory usage statistics:\n");
1537 printf
("%ld names (out of %ld)\n",
1538 (long
)(name_ptr-name_dir
),(long
)max_names
);
1539 printf
("%ld replacement texts (out of %ld)\n",
1540 (long
)(text_ptr-text_info
),(long
)max_texts
);
1541 printf
("%ld bytes (out of %ld)\n",
1542 (long
)(byte_ptr-byte_mem
),(long
)max_bytes
);
1543 printf
("%ld tokens (out of %ld)\n",
1544 (long
)(tok_ptr-tok_mem
),(long
)max_toks
);
1548 Here is a cross-reference table for \.
{CTANGLE
}.
1549 All sections in which an identifier is
1550 used are listed with that identifier
, except that reserved words are
1551 indexed only when they appear in format definitions
, and the appearances
1552 of identifiers in section names are not indexed. Underlined entries
1553 correspond to where the identifier was declared. Error messages and
1554 a few other things like ``ASCII code dependencies'' are indexed here too.