2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.3.6.3 2001/12/06 19:28:47 schweikh Exp $
37 * $DragonFly: src/usr.bin/indent/lexi.c,v 1.3 2005/04/10 20:55:38 drhodus Exp $
44 * Here we have the token scanner for indent. It scans off one token and puts
45 * it in the global variable "token". It returns a code, indicating the type
53 #include "indent_globs.h"
54 #include "indent_codes.h"
59 void fill_buffer(void);
66 struct templ specials
[1000] =
102 { /* this is used to facilitate the decision of
103 * what type (alphanumeric, operator) each
105 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 3, 0, 0, 1, 3, 3, 0,
110 0, 0, 3, 3, 0, 3, 0, 3,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 0, 0, 3, 3, 3, 3,
113 0, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 0, 0, 0, 3, 1,
117 0, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 0, 3, 0, 3, 0
126 int unary_delim
; /* this is set to 1 if the current token
127 * forces a following operator to be unary */
128 static int last_code
; /* the last token type returned */
129 static int l_struct
; /* set to 1 if the last token was 'struct' */
130 int code
; /* internal code to be returned */
131 char qchar
; /* the delimiter character for a string */
133 e_token
= s_token
; /* point to start of place to save token */
135 ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
136 * column 1 iff the last thing scanned was nl */
139 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
140 ps
.col_1
= false; /* leading blanks imply token is not in column
142 if (++buf_ptr
>= buf_end
)
146 /* Scan an alphanumeric token */
147 if (chartype
[(int)*buf_ptr
] == alphanum
|| (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
149 * we have a character or number
151 char *j
; /* used for searching thru list of
156 if (isdigit(*buf_ptr
) || (buf_ptr
[0] == '.' && isdigit(buf_ptr
[1]))) {
160 if (*buf_ptr
== '0' &&
161 (buf_ptr
[1] == 'x' || buf_ptr
[1] == 'X')) {
162 *e_token
++ = *buf_ptr
++;
163 *e_token
++ = *buf_ptr
++;
164 while (isxdigit(*buf_ptr
)) {
166 *e_token
++ = *buf_ptr
++;
171 if (*buf_ptr
== '.') {
178 *e_token
++ = *buf_ptr
++;
179 if (!isdigit(*buf_ptr
) && *buf_ptr
!= '.') {
180 if ((*buf_ptr
!= 'E' && *buf_ptr
!= 'e') || seenexp
)
186 *e_token
++ = *buf_ptr
++;
187 if (*buf_ptr
== '+' || *buf_ptr
== '-')
188 *e_token
++ = *buf_ptr
++;
193 if (!(seensfx
& 1) &&
194 (*buf_ptr
== 'U' || *buf_ptr
== 'u')) {
196 *e_token
++ = *buf_ptr
++;
200 if (!(seensfx
& 2) &&
201 (*buf_ptr
== 'L' || *buf_ptr
== 'l')) {
203 if (buf_ptr
[1] == buf_ptr
[0])
204 *e_token
++ = *buf_ptr
++;
205 *e_token
++ = *buf_ptr
++;
213 while (chartype
[(int)*buf_ptr
] == alphanum
|| *buf_ptr
== BACKSLASH
) {
214 /* fill_buffer() terminates buffer with newline */
215 if (*buf_ptr
== BACKSLASH
) {
216 if (*(buf_ptr
+ 1) == '\n') {
218 if (buf_ptr
>= buf_end
)
225 *e_token
++ = *buf_ptr
++;
226 if (buf_ptr
>= buf_end
)
230 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
231 if (++buf_ptr
>= buf_end
)
234 ps
.its_a_keyword
= false;
235 ps
.sizeof_keyword
= false;
236 if (l_struct
) { /* if last token was 'struct', then this token
237 * should be treated as a declaration */
243 ps
.last_u_d
= false; /* Operator after indentifier is binary */
244 last_code
= ident
; /* Remember that this is the code we will
248 * This loop will check if the token is a keyword.
250 for (p
= specials
; (j
= p
->rwd
) != 0; p
++) {
251 char *p
= s_token
; /* point at scanned token */
252 if (*j
++ != *p
++ || *j
++ != *p
++)
253 continue; /* This test depends on the fact that
254 * identifiers are always at least 1 character
255 * long (ie. the first two bytes of the
256 * identifier are always meaningful) */
258 break; /* If its a one-character identifier */
261 goto found_keyword
; /* I wish that C had a multi-level
264 if (p
->rwd
) { /* we have a keyword */
266 ps
.its_a_keyword
= true;
269 case 1: /* it is a switch */
271 case 2: /* a case or default */
274 case 3: /* a "struct" */
276 break; /* inside parens: cast */
278 * Next time around, we may want to know that we have had a
284 * Fall through to test for a cast, function prototype or
287 case 4: /* one of the declaration keywords */
289 ps
.cast_mask
|= 1 << ps
.p_l_follow
;
292 * Forget that we saw `struct' if we're in a sizeof().
297 break; /* inside parens: cast, prototype or sizeof() */
302 case 5: /* if, while, for */
305 case 6: /* do, else */
309 ps
.sizeof_keyword
= true;
310 default: /* all others are treated like any other
313 } /* end of switch */
314 } /* end of if (found_it) */
315 if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0) {
318 if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
320 strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
321 ps
.in_parameter_declaration
= 1;
326 * The following hack attempts to guess whether or not the current
327 * token is in fact a declaration keyword -- one that has been
330 if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') || isalpha(*buf_ptr
) || *buf_ptr
== '_')
333 && (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
334 ps
.last_token
== decl
||
335 ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
336 ps
.its_a_keyword
= true;
341 if (last_code
== decl
) /* if this is a declared variable, then
342 * following sign is unary */
343 ps
.last_u_d
= true; /* will make "int a -1" work */
345 return (ident
); /* the ident is not in the list */
346 } /* end of procesing for alpanum character */
348 /* Scan a non-alphanumeric token */
350 *e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
353 if (++buf_ptr
>= buf_end
)
358 unary_delim
= ps
.last_u_d
;
359 ps
.last_nl
= true; /* remember that we just had a newline */
360 code
= (had_eof
? 0 : newline
);
363 * if data has been exausted, the newline is a dummy, and we should
364 * return code to stop
368 case '\'': /* start of quoted character */
369 case '"': /* start of string */
375 e_token
= chfont(&bodyf
, &stringf
, e_token
);
377 do { /* copy the string */
378 while (1) { /* move one character or [/<char>]<char> */
379 if (*buf_ptr
== '\n') {
380 printf("%d: Unterminated literal\n", line_no
);
383 CHECK_SIZE_TOKEN
; /* Only have to do this once in this loop,
384 * since CHECK_SIZE guarantees that there
385 * are at least 5 entries left */
386 *e_token
= *buf_ptr
++;
387 if (buf_ptr
>= buf_end
)
389 if (*e_token
== BACKSLASH
) { /* if escape, copy extra char */
390 if (*buf_ptr
== '\n') /* check for escaped newline */
393 *++e_token
= BACKSLASH
;
394 if (*buf_ptr
== BACKSLASH
)
395 *++e_token
= BACKSLASH
;
397 *++e_token
= *buf_ptr
++;
398 ++e_token
; /* we must increment this again because we
399 * copied two chars */
400 if (buf_ptr
>= buf_end
)
404 break; /* we copied one character */
405 } /* end of while (1) */
406 } while (*e_token
++ != qchar
);
408 e_token
= chfont(&stringf
, &bodyf
, e_token
- 1);
428 unary_delim
= ps
.last_u_d
;
451 * if (ps.in_or_st) ps.block_init = 1;
453 /* ? code = ps.block_init ? lparen : lbrace; */
459 /* ? code = ps.block_init ? rparen : rbrace; */
463 case 014: /* a form feed */
464 unary_delim
= ps
.last_u_d
;
465 ps
.last_nl
= true; /* remember this so we can set 'ps.col_1'
481 case '+': /* check for -, +, --, ++ */
482 code
= (ps
.last_u_d
? unary_op
: binary_op
);
485 if (*buf_ptr
== token
[0]) {
486 /* check for doubled character */
487 *e_token
++ = *buf_ptr
++;
488 /* buffer overflow will be checked at end of loop */
489 if (last_code
== ident
|| last_code
== rparen
) {
490 code
= (ps
.last_u_d
? unary_op
: postop
);
491 /* check for following ++ or -- */
495 else if (*buf_ptr
== '=')
496 /* check for operator += */
497 *e_token
++ = *buf_ptr
++;
498 else if (*buf_ptr
== '>') {
499 /* check for operator -> */
500 *e_token
++ = *buf_ptr
++;
501 if (!pointer_as_binop
) {
504 ps
.want_blank
= false;
507 break; /* buffer overflow will be checked at end of
514 if (chartype
[*buf_ptr
] == opchar
) { /* we have two char assignment */
515 e_token
[-1] = *buf_ptr
++;
516 if ((e_token
[-1] == '<' || e_token
[-1] == '>') && e_token
[-1] == *buf_ptr
)
517 *e_token
++ = *buf_ptr
++;
518 *e_token
++ = '='; /* Flip =+ to += */
522 if (*buf_ptr
== '=') {/* == */
523 *e_token
++ = '='; /* Flip =+ to += */
531 /* can drop thru!!! */
535 case '!': /* ops like <, <<, <=, !=, etc */
536 if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
537 *e_token
++ = *buf_ptr
;
538 if (++buf_ptr
>= buf_end
)
542 *e_token
++ = *buf_ptr
++;
543 code
= (ps
.last_u_d
? unary_op
: binary_op
);
548 if (token
[0] == '/' && *buf_ptr
== '*') {
549 /* it is start of comment */
552 if (++buf_ptr
>= buf_end
)
556 unary_delim
= ps
.last_u_d
;
559 while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
561 * handle ||, &&, etc, and also things as in int *****i
563 *e_token
++ = *buf_ptr
;
564 if (++buf_ptr
>= buf_end
)
567 code
= (ps
.last_u_d
? unary_op
: binary_op
);
571 } /* end of switch */
572 if (code
!= newline
) {
576 if (buf_ptr
>= buf_end
) /* check for input buffer empty */
578 ps
.last_u_d
= unary_delim
;
579 *e_token
= '\0'; /* null terminate the token */
584 * Add the given keyword to the keyword table, using val as the keyword type
587 addkey(char *key
, int val
)
589 struct templ
*p
= specials
;
591 if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
595 if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
596 return; /* For now, table overflows are silently