2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.19 2005/11/20 13:48:15 dds Exp $
37 * $DragonFly: src/usr.bin/indent/lexi.c,v 1.3 2005/04/10 20:55:38 drhodus Exp $
41 * Here we have the token scanner for indent. It scans off one token and puts
42 * it in the global variable "token". It returns a code, indicating the type
51 #include "indent_globs.h"
52 #include "indent_codes.h"
63 struct templ specials
[1000] =
99 { /* this is used to facilitate the decision of
100 * what type (alphanumeric, operator) each
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 3, 0, 0, 1, 3, 3, 0,
107 0, 0, 3, 3, 0, 3, 0, 3,
108 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 0, 0, 3, 3, 3, 3,
110 0, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 0, 0, 0, 3, 1,
114 0, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 0, 3, 0, 3, 0
123 int unary_delim
; /* this is set to 1 if the current token
124 * forces a following operator to be unary */
125 static int last_code
; /* the last token type returned */
126 static int l_struct
; /* set to 1 if the last token was 'struct' */
127 int code
; /* internal code to be returned */
128 char qchar
; /* the delimiter character for a string */
130 e_token
= s_token
; /* point to start of place to save token */
132 ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
133 * column 1 iff the last thing scanned was nl */
136 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
137 ps
.col_1
= false; /* leading blanks imply token is not in column
139 if (++buf_ptr
>= buf_end
)
143 /* Scan an alphanumeric token */
144 if (chartype
[(int)*buf_ptr
] == alphanum
|| (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
146 * we have a character or number
148 const char *j
; /* used for searching thru list of
153 if (isdigit(*buf_ptr
) || (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
157 if (*buf_ptr
== '0' &&
158 (buf_ptr
[1] == 'x' || buf_ptr
[1] == 'X')) {
159 *e_token
++ = *buf_ptr
++;
160 *e_token
++ = *buf_ptr
++;
161 while (isxdigit(*buf_ptr
)) {
163 *e_token
++ = *buf_ptr
++;
168 if (*buf_ptr
== '.') {
175 *e_token
++ = *buf_ptr
++;
176 if (!isdigit(*buf_ptr
) && *buf_ptr
!= '.') {
177 if ((*buf_ptr
!= 'E' && *buf_ptr
!= 'e') || seenexp
)
183 *e_token
++ = *buf_ptr
++;
184 if (*buf_ptr
== '+' || *buf_ptr
== '-')
185 *e_token
++ = *buf_ptr
++;
190 if (!(seensfx
& 1) &&
191 (*buf_ptr
== 'U' || *buf_ptr
== 'u')) {
193 *e_token
++ = *buf_ptr
++;
197 if (!(seensfx
& 2) &&
198 (*buf_ptr
== 'L' || *buf_ptr
== 'l')) {
200 if (buf_ptr
[1] == buf_ptr
[0])
201 *e_token
++ = *buf_ptr
++;
202 *e_token
++ = *buf_ptr
++;
210 while (chartype
[(int)*buf_ptr
] == alphanum
|| *buf_ptr
== BACKSLASH
) {
211 /* fill_buffer() terminates buffer with newline */
212 if (*buf_ptr
== BACKSLASH
) {
213 if (*(buf_ptr
+ 1) == '\n') {
215 if (buf_ptr
>= buf_end
)
222 *e_token
++ = *buf_ptr
++;
223 if (buf_ptr
>= buf_end
)
227 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
228 if (++buf_ptr
>= buf_end
)
231 ps
.its_a_keyword
= false;
232 ps
.sizeof_keyword
= false;
233 if (l_struct
&& !ps
.p_l_follow
) {
234 /* if last token was 'struct' and we're not
235 * in parentheses, then this token
236 * should be treated as a declaration */
242 ps
.last_u_d
= l_struct
; /* Operator after identifier is binary
243 * unless last token was 'struct' */
245 last_code
= ident
; /* Remember that this is the code we will
249 * This loop will check if the token is a keyword.
251 for (p
= specials
; (j
= p
->rwd
) != 0; p
++) {
252 const char *q
= s_token
; /* point at scanned token */
253 if (*j
++ != *q
++ || *j
++ != *q
++)
254 continue; /* This test depends on the fact that
255 * identifiers are always at least 1 character
256 * long (ie. the first two bytes of the
257 * identifier are always meaningful) */
259 break; /* If its a one-character identifier */
262 goto found_keyword
; /* I wish that C had a multi-level
265 if (p
->rwd
) { /* we have a keyword */
267 ps
.its_a_keyword
= true;
270 case 1: /* it is a switch */
272 case 2: /* a case or default */
275 case 3: /* a "struct" */
277 * Next time around, we will want to know that we have had a
283 case 4: /* one of the declaration keywords */
285 ps
.cast_mask
|= (1 << ps
.p_l_follow
) & ~ps
.sizeof_mask
;
286 break; /* inside parens: cast, param list or sizeof */
291 case 5: /* if, while, for */
294 case 6: /* do, else */
298 ps
.sizeof_keyword
= true;
299 default: /* all others are treated like any other
302 } /* end of switch */
303 } /* end of if (found_it) */
304 if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0) {
307 if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
309 strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
310 ps
.in_parameter_declaration
= 1;
315 * The following hack attempts to guess whether or not the current
316 * token is in fact a declaration keyword -- one that has been
319 if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') || isalpha(*buf_ptr
) || *buf_ptr
== '_')
322 && (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
323 ps
.last_token
== decl
||
324 ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
325 ps
.its_a_keyword
= true;
330 if (last_code
== decl
) /* if this is a declared variable, then
331 * following sign is unary */
332 ps
.last_u_d
= true; /* will make "int a -1" work */
334 return (ident
); /* the ident is not in the list */
335 } /* end of procesing for alpanum character */
337 /* Scan a non-alphanumeric token */
339 *e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
342 if (++buf_ptr
>= buf_end
)
347 unary_delim
= ps
.last_u_d
;
348 ps
.last_nl
= true; /* remember that we just had a newline */
349 code
= (had_eof
? 0 : newline
);
352 * if data has been exhausted, the newline is a dummy, and we should
353 * return code to stop
357 case '\'': /* start of quoted character */
358 case '"': /* start of string */
364 e_token
= chfont(&bodyf
, &stringf
, e_token
);
366 do { /* copy the string */
367 while (1) { /* move one character or [/<char>]<char> */
368 if (*buf_ptr
== '\n') {
369 diag2(1, "Unterminated literal");
372 CHECK_SIZE_TOKEN
; /* Only have to do this once in this loop,
373 * since CHECK_SIZE guarantees that there
374 * are at least 5 entries left */
375 *e_token
= *buf_ptr
++;
376 if (buf_ptr
>= buf_end
)
378 if (*e_token
== BACKSLASH
) { /* if escape, copy extra char */
379 if (*buf_ptr
== '\n') /* check for escaped newline */
382 *++e_token
= BACKSLASH
;
383 if (*buf_ptr
== BACKSLASH
)
384 *++e_token
= BACKSLASH
;
386 *++e_token
= *buf_ptr
++;
387 ++e_token
; /* we must increment this again because we
388 * copied two chars */
389 if (buf_ptr
>= buf_end
)
393 break; /* we copied one character */
394 } /* end of while (1) */
395 } while (*e_token
++ != qchar
);
397 e_token
= chfont(&stringf
, &bodyf
, e_token
- 1);
417 unary_delim
= ps
.last_u_d
;
440 * if (ps.in_or_st) ps.block_init = 1;
442 /* ? code = ps.block_init ? lparen : lbrace; */
448 /* ? code = ps.block_init ? rparen : rbrace; */
452 case 014: /* a form feed */
453 unary_delim
= ps
.last_u_d
;
454 ps
.last_nl
= true; /* remember this so we can set 'ps.col_1'
470 case '+': /* check for -, +, --, ++ */
471 code
= (ps
.last_u_d
? unary_op
: binary_op
);
474 if (*buf_ptr
== token
[0]) {
475 /* check for doubled character */
476 *e_token
++ = *buf_ptr
++;
477 /* buffer overflow will be checked at end of loop */
478 if (last_code
== ident
|| last_code
== rparen
) {
479 code
= (ps
.last_u_d
? unary_op
: postop
);
480 /* check for following ++ or -- */
484 else if (*buf_ptr
== '=')
485 /* check for operator += */
486 *e_token
++ = *buf_ptr
++;
487 else if (*buf_ptr
== '>') {
488 /* check for operator -> */
489 *e_token
++ = *buf_ptr
++;
490 if (!pointer_as_binop
) {
493 ps
.want_blank
= false;
496 break; /* buffer overflow will be checked at end of
503 if (chartype
[*buf_ptr
] == opchar
) { /* we have two char assignment */
504 e_token
[-1] = *buf_ptr
++;
505 if ((e_token
[-1] == '<' || e_token
[-1] == '>') && e_token
[-1] == *buf_ptr
)
506 *e_token
++ = *buf_ptr
++;
507 *e_token
++ = '='; /* Flip =+ to += */
511 if (*buf_ptr
== '=') {/* == */
512 *e_token
++ = '='; /* Flip =+ to += */
520 /* can drop thru!!! */
524 case '!': /* ops like <, <<, <=, !=, etc */
525 if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
526 *e_token
++ = *buf_ptr
;
527 if (++buf_ptr
>= buf_end
)
531 *e_token
++ = *buf_ptr
++;
532 code
= (ps
.last_u_d
? unary_op
: binary_op
);
537 if (token
[0] == '/' && *buf_ptr
== '*') {
538 /* it is start of comment */
541 if (++buf_ptr
>= buf_end
)
545 unary_delim
= ps
.last_u_d
;
548 while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
550 * handle ||, &&, etc, and also things as in int *****i
552 *e_token
++ = *buf_ptr
;
553 if (++buf_ptr
>= buf_end
)
556 code
= (ps
.last_u_d
? unary_op
: binary_op
);
560 } /* end of switch */
561 if (code
!= newline
) {
565 if (buf_ptr
>= buf_end
) /* check for input buffer empty */
567 ps
.last_u_d
= unary_delim
;
568 *e_token
= '\0'; /* null terminate the token */
573 * Add the given keyword to the keyword table, using val as the keyword type
576 addkey(char *key
, int val
)
578 struct templ
*p
= specials
;
580 if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
584 if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
585 return; /* For now, table overflows are silently