2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
32 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $
36 * Here we have the token scanner for indent. It scans off one token and puts
37 * it in the global variable "token". It returns a code, indicating the type
46 #include "indent_globs.h"
47 #include "indent_codes.h"
58 struct templ specials
[1000] =
94 { /* this is used to facilitate the decision of
95 * what type (alphanumeric, operator) each
97 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 3, 0, 0, 1, 3, 3, 0,
102 0, 0, 3, 3, 0, 3, 0, 3,
103 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 0, 0, 3, 3, 3, 3,
105 0, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 0, 0, 0, 3, 1,
109 0, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 0, 3, 0, 3, 0
118 int unary_delim
; /* this is set to 1 if the current token
119 * forces a following operator to be unary */
120 static int last_code
; /* the last token type returned */
121 static int l_struct
; /* set to 1 if the last token was 'struct' */
122 int code
; /* internal code to be returned */
123 char qchar
; /* the delimiter character for a string */
125 e_token
= s_token
; /* point to start of place to save token */
127 ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
128 * column 1 iff the last thing scanned was nl */
131 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
132 ps
.col_1
= false; /* leading blanks imply token is not in column
134 if (++buf_ptr
>= buf_end
)
138 /* Scan an alphanumeric token */
139 if (chartype
[(int)*buf_ptr
] == alphanum
|| (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
141 * we have a character or number
143 const char *j
; /* used for searching thru list of
148 if (isdigit(*buf_ptr
) || (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
152 if (*buf_ptr
== '0' &&
153 (buf_ptr
[1] == 'x' || buf_ptr
[1] == 'X')) {
154 *e_token
++ = *buf_ptr
++;
155 *e_token
++ = *buf_ptr
++;
156 while (isxdigit(*buf_ptr
)) {
158 *e_token
++ = *buf_ptr
++;
163 if (*buf_ptr
== '.') {
170 *e_token
++ = *buf_ptr
++;
171 if (!isdigit(*buf_ptr
) && *buf_ptr
!= '.') {
172 if ((*buf_ptr
!= 'E' && *buf_ptr
!= 'e') || seenexp
)
178 *e_token
++ = *buf_ptr
++;
179 if (*buf_ptr
== '+' || *buf_ptr
== '-')
180 *e_token
++ = *buf_ptr
++;
185 if (!(seensfx
& 1) &&
186 (*buf_ptr
== 'U' || *buf_ptr
== 'u')) {
188 *e_token
++ = *buf_ptr
++;
192 if (!(seensfx
& 2) &&
193 (*buf_ptr
== 'L' || *buf_ptr
== 'l')) {
195 if (buf_ptr
[1] == buf_ptr
[0])
196 *e_token
++ = *buf_ptr
++;
197 *e_token
++ = *buf_ptr
++;
205 while (chartype
[(int)*buf_ptr
] == alphanum
|| *buf_ptr
== BACKSLASH
) {
206 /* fill_buffer() terminates buffer with newline */
207 if (*buf_ptr
== BACKSLASH
) {
208 if (*(buf_ptr
+ 1) == '\n') {
210 if (buf_ptr
>= buf_end
)
217 *e_token
++ = *buf_ptr
++;
218 if (buf_ptr
>= buf_end
)
222 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
223 if (++buf_ptr
>= buf_end
)
226 ps
.its_a_keyword
= false;
227 ps
.sizeof_keyword
= false;
228 if (l_struct
&& !ps
.p_l_follow
) {
229 /* if last token was 'struct' and we're not
230 * in parentheses, then this token
231 * should be treated as a declaration */
237 ps
.last_u_d
= l_struct
; /* Operator after identifier is binary
238 * unless last token was 'struct' */
240 last_code
= ident
; /* Remember that this is the code we will
244 const char *q
= s_token
;
245 size_t q_len
= strlen(q
);
246 /* Check if we have an "_t" in the end */
248 (strcmp(q
+ q_len
- 2, "_t") == 0)) {
249 ps
.its_a_keyword
= true;
251 goto found_auto_typedef
;
256 * This loop will check if the token is a keyword.
258 for (p
= specials
; (j
= p
->rwd
) != NULL
; p
++) {
259 const char *q
= s_token
; /* point at scanned token */
260 if (*j
++ != *q
++ || *j
++ != *q
++)
261 continue; /* This test depends on the fact that
262 * identifiers are always at least 1 character
263 * long (ie. the first two bytes of the
264 * identifier are always meaningful) */
266 break; /* If its a one-character identifier */
269 goto found_keyword
; /* I wish that C had a multi-level
272 if (p
->rwd
) { /* we have a keyword */
274 ps
.its_a_keyword
= true;
277 case 1: /* it is a switch */
279 case 2: /* a case or default */
282 case 3: /* a "struct" */
284 * Next time around, we will want to know that we have had a
290 case 4: /* one of the declaration keywords */
293 ps
.cast_mask
|= (1 << ps
.p_l_follow
) & ~ps
.sizeof_mask
;
294 break; /* inside parens: cast, param list or sizeof */
299 case 5: /* if, while, for */
302 case 6: /* do, else */
306 ps
.sizeof_keyword
= true;
308 default: /* all others are treated like any other
311 } /* end of switch */
312 } /* end of if (found_it) */
313 if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0) {
316 if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
318 strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
319 ps
.in_parameter_declaration
= 1;
324 * The following hack attempts to guess whether or not the current
325 * token is in fact a declaration keyword -- one that has been
328 if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') || isalpha(*buf_ptr
) || *buf_ptr
== '_')
331 && (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
332 ps
.last_token
== decl
||
333 ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
334 ps
.its_a_keyword
= true;
339 if (last_code
== decl
) /* if this is a declared variable, then
340 * following sign is unary */
341 ps
.last_u_d
= true; /* will make "int a -1" work */
343 return (ident
); /* the ident is not in the list */
344 } /* end of procesing for alpanum character */
346 /* Scan a non-alphanumeric token */
348 *e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
351 if (++buf_ptr
>= buf_end
)
356 unary_delim
= ps
.last_u_d
;
357 ps
.last_nl
= true; /* remember that we just had a newline */
358 code
= (had_eof
? 0 : newline
);
361 * if data has been exhausted, the newline is a dummy, and we should
362 * return code to stop
366 case '\'': /* start of quoted character */
367 case '"': /* start of string */
373 e_token
= chfont(&bodyf
, &stringf
, e_token
);
375 do { /* copy the string */
376 while (1) { /* move one character or [/<char>]<char> */
377 if (*buf_ptr
== '\n') {
378 diag2(1, "Unterminated literal");
381 CHECK_SIZE_TOKEN
; /* Only have to do this once in this loop,
382 * since CHECK_SIZE guarantees that there
383 * are at least 5 entries left */
384 *e_token
= *buf_ptr
++;
385 if (buf_ptr
>= buf_end
)
387 if (*e_token
== BACKSLASH
) { /* if escape, copy extra char */
388 if (*buf_ptr
== '\n') /* check for escaped newline */
391 *++e_token
= BACKSLASH
;
392 if (*buf_ptr
== BACKSLASH
)
393 *++e_token
= BACKSLASH
;
395 *++e_token
= *buf_ptr
++;
396 ++e_token
; /* we must increment this again because we
397 * copied two chars */
398 if (buf_ptr
>= buf_end
)
402 break; /* we copied one character */
403 } /* end of while (1) */
404 } while (*e_token
++ != qchar
);
406 e_token
= chfont(&stringf
, &bodyf
, e_token
- 1);
426 unary_delim
= ps
.last_u_d
;
449 * if (ps.in_or_st) ps.block_init = 1;
451 /* ? code = ps.block_init ? lparen : lbrace; */
457 /* ? code = ps.block_init ? rparen : rbrace; */
461 case 014: /* a form feed */
462 unary_delim
= ps
.last_u_d
;
463 ps
.last_nl
= true; /* remember this so we can set 'ps.col_1'
479 case '+': /* check for -, +, --, ++ */
480 code
= (ps
.last_u_d
? unary_op
: binary_op
);
483 if (*buf_ptr
== token
[0]) {
484 /* check for doubled character */
485 *e_token
++ = *buf_ptr
++;
486 /* buffer overflow will be checked at end of loop */
487 if (last_code
== ident
|| last_code
== rparen
) {
488 code
= (ps
.last_u_d
? unary_op
: postop
);
489 /* check for following ++ or -- */
493 else if (*buf_ptr
== '=')
494 /* check for operator += */
495 *e_token
++ = *buf_ptr
++;
496 else if (*buf_ptr
== '>') {
497 /* check for operator -> */
498 *e_token
++ = *buf_ptr
++;
499 if (!pointer_as_binop
) {
502 ps
.want_blank
= false;
505 break; /* buffer overflow will be checked at end of
512 if (chartype
[*buf_ptr
] == opchar
) { /* we have two char assignment */
513 e_token
[-1] = *buf_ptr
++;
514 if ((e_token
[-1] == '<' || e_token
[-1] == '>') && e_token
[-1] == *buf_ptr
)
515 *e_token
++ = *buf_ptr
++;
516 *e_token
++ = '='; /* Flip =+ to += */
520 if (*buf_ptr
== '=') {/* == */
521 *e_token
++ = '='; /* Flip =+ to += */
529 /* can drop thru!!! */
533 case '!': /* ops like <, <<, <=, !=, etc */
534 if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
535 *e_token
++ = *buf_ptr
;
536 if (++buf_ptr
>= buf_end
)
540 *e_token
++ = *buf_ptr
++;
541 code
= (ps
.last_u_d
? unary_op
: binary_op
);
546 if (token
[0] == '/' && *buf_ptr
== '*') {
547 /* it is start of comment */
550 if (++buf_ptr
>= buf_end
)
554 unary_delim
= ps
.last_u_d
;
557 while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
559 * handle ||, &&, etc, and also things as in int *****i
561 *e_token
++ = *buf_ptr
;
562 if (++buf_ptr
>= buf_end
)
565 code
= (ps
.last_u_d
? unary_op
: binary_op
);
569 } /* end of switch */
570 if (code
!= newline
) {
574 if (buf_ptr
>= buf_end
) /* check for input buffer empty */
576 ps
.last_u_d
= unary_delim
;
577 *e_token
= '\0'; /* null terminate the token */
582 * Add the given keyword to the keyword table, using val as the keyword type
585 addkey(char *key
, int val
)
587 struct templ
*p
= specials
;
589 if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
593 if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
594 return; /* For now, table overflows are silently