2 * SPDX-License-Identifier: BSD-4-Clause
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1980, 1993
6 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
34 * $FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $
38 * Here we have the token scanner for indent. It scans off one token and puts
39 * it in the global variable "token". It returns a code, indicating the type
48 #include <sys/param.h>
50 #include "indent_globs.h"
51 #include "indent_codes.h"
60 * This table has to be sorted alphabetically, because it'll be used in binary
61 * search. For the same reason, string must be the first thing in struct templ.
63 struct templ specials
[] =
109 const char **typenames
;
111 int typename_top
= -1;
114 * The transition table below was rewritten by hand from lx's output, given
115 * the following definitions. lx is Katherine Flavel's lexer generator.
117 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/;
118 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i;
119 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+;
120 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
122 * D+ E FS? -> $float;
123 * D* "." D+ E? FS? -> $float;
124 * D+ "." E? FS? -> $float; HP H+ IS? -> $int;
125 * HP H+ P FS? -> $float; NZ D* IS? -> $int;
126 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int;
127 * HP H+ "." P FS -> $float; BP B+ IS? -> $int;
129 static char const *table
[] = {
135 r 11ee0001101lbuuxx.a.pp
136 t.01.e+008bLuxll0Ll.aa.p+0
137 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */
138 ['0'] = "CEIDEHHHIJQ U Q VUVVZZZ",
139 ['1'] = "DEIDEHHHIJQ U Q VUVVZZZ",
140 ['7'] = "DEIDEHHHIJ U VUVVZZZ",
141 ['9'] = "DEJDEHHHJJ U VUVVZZZ",
143 ['b'] = " K U VUVV ",
144 ['e'] = " FFF FF U VUVV ",
145 ['f'] = " f f U VUVV f",
146 ['u'] = " MM M i iiM M ",
149 ['L'] = " LLf fL PR Li L f",
150 ['l'] = " OOf fO S P O i O f",
152 ['.'] = "B EE EE T W ",
153 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
154 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu",
158 strcmp_type(const void *e1
, const void *e2
)
160 return (strcmp(e1
, *(const char * const *)e2
));
164 lexi(struct parser_state
*state
)
166 int unary_delim
; /* this is set to 1 if the current token
167 * forces a following operator to be unary */
168 int code
; /* internal code to be returned */
169 char qchar
; /* the delimiter character for a string */
171 e_token
= s_token
; /* point to start of place to save token */
173 state
->col_1
= state
->last_nl
; /* tell world that this token started
174 * in column 1 iff the last thing
175 * scanned was a newline */
176 state
->last_nl
= false;
178 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
179 state
->col_1
= false; /* leading blanks imply token is not in column
181 if (++buf_ptr
>= buf_end
)
185 /* Scan an alphanumeric token */
186 if (isalnum((unsigned char)*buf_ptr
) ||
187 *buf_ptr
== '_' || *buf_ptr
== '$' ||
188 (buf_ptr
[0] == '.' && isdigit((unsigned char)buf_ptr
[1]))) {
190 * we have a character or number
194 if (isdigit((unsigned char)*buf_ptr
) ||
195 (buf_ptr
[0] == '.' && isdigit((unsigned char)buf_ptr
[1]))) {
199 for (s
= 'A'; s
!= 'f' && s
!= 'i' && s
!= 'u'; ) {
200 i
= (unsigned char)*buf_ptr
;
201 if (i
>= nitems(table
) || table
[i
] == NULL
||
202 table
[i
][s
- 'A'] == ' ') {
203 s
= table
[0][s
- 'A'];
206 s
= table
[i
][s
- 'A'];
208 *e_token
++ = *buf_ptr
++;
209 if (buf_ptr
>= buf_end
)
212 /* s now indicates the type: f(loating), i(integer), u(nknown) */
215 while (isalnum((unsigned char)*buf_ptr
) ||
216 *buf_ptr
== BACKSLASH
||
217 *buf_ptr
== '_' || *buf_ptr
== '$') {
218 /* fill_buffer() terminates buffer with newline */
219 if (*buf_ptr
== BACKSLASH
) {
220 if (*(buf_ptr
+ 1) == '\n') {
222 if (buf_ptr
>= buf_end
)
229 *e_token
++ = *buf_ptr
++;
230 if (buf_ptr
>= buf_end
)
235 if (s_token
[0] == 'L' && s_token
[1] == '\0' &&
236 (*buf_ptr
== '"' || *buf_ptr
== '\''))
239 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
240 if (++buf_ptr
>= buf_end
)
244 if (state
->last_token
== structure
&& !state
->p_l_follow
) {
245 /* if last token was 'struct' and we're not
246 * in parentheses, then this token
247 * should be treated as a declaration */
248 state
->last_u_d
= true;
252 * Operator after identifier is binary unless last token was 'struct'
254 state
->last_u_d
= (state
->last_token
== structure
);
258 sizeof(specials
) / sizeof(specials
[0]),
261 if (p
== NULL
) { /* not a special keyword... */
264 /* ... so maybe a type_t or a typedef */
265 if ((opt
.auto_typedefs
&& ((u
= strrchr(s_token
, '_')) != NULL
) &&
266 strcmp(u
, "_t") == 0) || (typename_top
>= 0 &&
267 bsearch(s_token
, typenames
, typename_top
+ 1,
268 sizeof(typenames
[0]), strcmp_type
))) {
269 state
->keyword
= 4; /* a type name */
270 state
->last_u_d
= true;
273 } else { /* we have a keyword */
274 state
->keyword
= p
->rwcode
;
275 state
->last_u_d
= true;
277 case 7: /* it is a switch */
279 case 8: /* a case or default */
282 case 3: /* a "struct" */
284 case 4: /* one of the declaration keywords */
286 if (state
->p_l_follow
) {
287 /* inside parens: cast, param list, offsetof or sizeof */
288 state
->cast_mask
|= (1 << state
->p_l_follow
) & ~state
->not_cast_mask
;
290 if (state
->last_token
== period
|| state
->last_token
== unary_op
) {
294 if (p
!= NULL
&& p
->rwcode
== 3)
296 if (state
->p_l_follow
)
300 case 5: /* if, while, for */
303 case 6: /* do, else */
306 case 10: /* storage class specifier */
309 case 11: /* typedef */
313 default: /* all others are treated like any other
316 } /* end of switch */
317 } /* end of if (found_it) */
318 if (*buf_ptr
== '(' && state
->tos
<= 1 && state
->ind_level
== 0 &&
319 state
->in_parameter_declaration
== 0 && state
->block_init
== 0) {
322 if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
324 strncpy(state
->procname
, token
, sizeof state
->procname
- 1);
326 state
->in_parameter_declaration
= 1;
331 * The following hack attempts to guess whether or not the current
332 * token is in fact a declaration keyword -- one that has been
335 else if (!state
->p_l_follow
&& !state
->block_init
&&
337 ((*buf_ptr
== '*' && buf_ptr
[1] != '=') ||
338 isalpha((unsigned char)*buf_ptr
)) &&
339 (state
->last_token
== semicolon
|| state
->last_token
== lbrace
||
340 state
->last_token
== rbrace
)) {
341 state
->keyword
= 4; /* a type name */
342 state
->last_u_d
= true;
345 if (state
->last_token
== decl
) /* if this is a declared variable,
346 * then following sign is unary */
347 state
->last_u_d
= true; /* will make "int a -1" work */
348 return (ident
); /* the ident is not in the list */
349 } /* end of procesing for alpanum character */
351 /* Scan a non-alphanumeric token */
353 CHECK_SIZE_TOKEN(3); /* things like "<<=" */
354 *e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
357 if (++buf_ptr
>= buf_end
)
362 unary_delim
= state
->last_u_d
;
363 state
->last_nl
= true; /* remember that we just had a newline */
364 code
= (had_eof
? 0 : newline
);
367 * if data has been exhausted, the newline is a dummy, and we should
368 * return code to stop
372 case '\'': /* start of quoted character */
373 case '"': /* start of string */
375 do { /* copy the string */
376 while (1) { /* move one character or [/<char>]<char> */
377 if (*buf_ptr
== '\n') {
378 diag2(1, "Unterminated literal");
382 *e_token
= *buf_ptr
++;
383 if (buf_ptr
>= buf_end
)
385 if (*e_token
== BACKSLASH
) { /* if escape, copy extra char */
386 if (*buf_ptr
== '\n') /* check for escaped newline */
388 *++e_token
= *buf_ptr
++;
389 ++e_token
; /* we must increment this again because we
390 * copied two chars */
391 if (buf_ptr
>= buf_end
)
395 break; /* we copied one character */
396 } /* end of while (1) */
397 } while (*e_token
++ != qchar
);
414 unary_delim
= state
->last_u_d
;
437 * if (state->in_or_st) state->block_init = 1;
439 /* ? code = state->block_init ? lparen : lbrace; */
445 /* ? code = state->block_init ? rparen : rbrace; */
449 case 014: /* a form feed */
450 unary_delim
= state
->last_u_d
;
451 state
->last_nl
= true; /* remember this so we can set 'state->col_1'
467 case '+': /* check for -, +, --, ++ */
468 code
= (state
->last_u_d
? unary_op
: binary_op
);
471 if (*buf_ptr
== token
[0]) {
472 /* check for doubled character */
473 *e_token
++ = *buf_ptr
++;
474 /* buffer overflow will be checked at end of loop */
475 if (state
->last_token
== ident
|| state
->last_token
== rparen
) {
476 code
= (state
->last_u_d
? unary_op
: postop
);
477 /* check for following ++ or -- */
481 else if (*buf_ptr
== '=')
482 /* check for operator += */
483 *e_token
++ = *buf_ptr
++;
484 else if (*buf_ptr
== '>') {
485 /* check for operator -> */
486 *e_token
++ = *buf_ptr
++;
489 state
->want_blank
= false;
491 break; /* buffer overflow will be checked at end of
496 state
->block_init
= 1;
497 if (*buf_ptr
== '=') {/* == */
498 *e_token
++ = '='; /* Flip =+ to += */
505 /* can drop thru!!! */
509 case '!': /* ops like <, <<, <=, !=, etc */
510 if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
511 *e_token
++ = *buf_ptr
;
512 if (++buf_ptr
>= buf_end
)
516 *e_token
++ = *buf_ptr
++;
517 code
= (state
->last_u_d
? unary_op
: binary_op
);
523 if (!state
->last_u_d
) {
525 *e_token
++ = *buf_ptr
++;
529 while (*buf_ptr
== '*' || isspace((unsigned char)*buf_ptr
)) {
530 if (*buf_ptr
== '*') {
532 *e_token
++ = *buf_ptr
;
534 if (++buf_ptr
>= buf_end
)
540 while (isalpha((unsigned char)*tp
) ||
541 isspace((unsigned char)*tp
)) {
546 ps
.procname
[0] = ' ';
552 if (token
[0] == '/' && *buf_ptr
== '*') {
553 /* it is start of comment */
556 if (++buf_ptr
>= buf_end
)
560 unary_delim
= state
->last_u_d
;
563 while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
565 * handle ||, &&, etc, and also things as in int *****i
568 *e_token
++ = *buf_ptr
;
569 if (++buf_ptr
>= buf_end
)
572 code
= (state
->last_u_d
? unary_op
: binary_op
);
576 } /* end of switch */
577 if (buf_ptr
>= buf_end
) /* check for input buffer empty */
579 state
->last_u_d
= unary_delim
;
581 *e_token
= '\0'; /* null terminate the token */
585 /* Initialize constant transition table */
587 init_constant_tt(void)
589 table
['-'] = table
['+'];
590 table
['8'] = table
['9'];
591 table
['2'] = table
['3'] = table
['4'] = table
['5'] = table
['6'] = table
['7'];
592 table
['A'] = table
['C'] = table
['D'] = table
['c'] = table
['d'] = table
['a'];
593 table
['B'] = table
['b'];
594 table
['E'] = table
['e'];
595 table
['U'] = table
['u'];
596 table
['X'] = table
['x'];
597 table
['P'] = table
['p'];
598 table
['F'] = table
['f'];
602 alloc_typenames(void)
605 typenames
= (const char **)malloc(sizeof(typenames
[0]) *
606 (typename_count
= 16));
607 if (typenames
== NULL
)
612 add_typename(const char *key
)
617 if (typename_top
+ 1 >= typename_count
) {
618 typenames
= realloc((void *)typenames
,
619 sizeof(typenames
[0]) * (typename_count
*= 2));
620 if (typenames
== NULL
)
623 if (typename_top
== -1)
624 typenames
[++typename_top
] = copy
= strdup(key
);
625 else if ((comparison
= strcmp(key
, typenames
[typename_top
])) >= 0) {
626 /* take advantage of sorted input */
627 if (comparison
== 0) /* remove duplicates */
629 typenames
[++typename_top
] = copy
= strdup(key
);
634 for (p
= 0; (comparison
= strcmp(key
, typenames
[p
])) > 0; p
++)
635 /* find place for the new key */;
636 if (comparison
== 0) /* remove duplicates */
638 memmove(&typenames
[p
+ 1], &typenames
[p
],
639 sizeof(typenames
[0]) * (++typename_top
- p
));
640 typenames
[p
] = copy
= strdup(key
);