kernel: Fix buildkernel without INVARIANTS.
[dragonfly.git] / usr.bin / indent / lexi.c
bloba6a3205b8ca0df746892307e84f9b5a5c45e7c38
1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $
40 * Here we have the token scanner for indent. It scans off one token and puts
41 * it in the global variable "token". It returns a code, indicating the type
42 * of token scanned.
45 #include <err.h>
46 #include <stdio.h>
47 #include <ctype.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include "indent_globs.h"
51 #include "indent_codes.h"
52 #include "indent.h"
54 #define alphanum 1
55 #define opchar 3
57 struct templ {
58 const char *rwd;
59 int rwcode;
62 struct templ specials[1000] =
64 {"switch", 1},
65 {"case", 2},
66 {"break", 0},
67 {"struct", 3},
68 {"union", 3},
69 {"enum", 3},
70 {"default", 2},
71 {"int", 4},
72 {"char", 4},
73 {"float", 4},
74 {"double", 4},
75 {"long", 4},
76 {"short", 4},
77 {"typdef", 4},
78 {"unsigned", 4},
79 {"register", 4},
80 {"static", 4},
81 {"global", 4},
82 {"extern", 4},
83 {"void", 4},
84 {"const", 4},
85 {"volatile", 4},
86 {"goto", 0},
87 {"return", 0},
88 {"if", 5},
89 {"while", 5},
90 {"for", 5},
91 {"else", 6},
92 {"do", 6},
93 {"sizeof", 7},
94 {0, 0}
97 char chartype[128] =
98 { /* this is used to facilitate the decision of
99 * what type (alphanumeric, operator) each
100 * character is */
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 3, 0, 0, 1, 3, 3, 0,
106 0, 0, 3, 3, 0, 3, 0, 3,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 0, 0, 3, 3, 3, 3,
109 0, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 0, 0, 0, 3, 1,
113 0, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 0, 3, 0, 3, 0
120 lexi(void)
122 int unary_delim; /* this is set to 1 if the current token
123 * forces a following operator to be unary */
124 static int last_code; /* the last token type returned */
125 static int l_struct; /* set to 1 if the last token was 'struct' */
126 int code; /* internal code to be returned */
127 char qchar; /* the delimiter character for a string */
129 e_token = s_token; /* point to start of place to save token */
130 unary_delim = false;
131 ps.col_1 = ps.last_nl; /* tell world that this token started in
132 * column 1 iff the last thing scanned was nl */
133 ps.last_nl = false;
135 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
136 ps.col_1 = false; /* leading blanks imply token is not in column
137 * 1 */
138 if (++buf_ptr >= buf_end)
139 fill_buffer();
142 /* Scan an alphanumeric token */
143 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
145 * we have a character or number
147 const char *j; /* used for searching thru list of
149 * reserved words */
150 struct templ *p;
152 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
153 int seendot = 0,
154 seenexp = 0,
155 seensfx = 0;
156 if (*buf_ptr == '0' &&
157 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
158 *e_token++ = *buf_ptr++;
159 *e_token++ = *buf_ptr++;
160 while (isxdigit(*buf_ptr)) {
161 CHECK_SIZE_TOKEN;
162 *e_token++ = *buf_ptr++;
165 else
166 while (1) {
167 if (*buf_ptr == '.') {
168 if (seendot)
169 break;
170 else
171 seendot++;
173 CHECK_SIZE_TOKEN;
174 *e_token++ = *buf_ptr++;
175 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
176 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
177 break;
178 else {
179 seenexp++;
180 seendot++;
181 CHECK_SIZE_TOKEN;
182 *e_token++ = *buf_ptr++;
183 if (*buf_ptr == '+' || *buf_ptr == '-')
184 *e_token++ = *buf_ptr++;
188 while (1) {
189 if (!(seensfx & 1) &&
190 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
191 CHECK_SIZE_TOKEN;
192 *e_token++ = *buf_ptr++;
193 seensfx |= 1;
194 continue;
196 if (!(seensfx & 2) &&
197 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
198 CHECK_SIZE_TOKEN;
199 if (buf_ptr[1] == buf_ptr[0])
200 *e_token++ = *buf_ptr++;
201 *e_token++ = *buf_ptr++;
202 seensfx |= 2;
203 continue;
205 break;
208 else
209 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
210 /* fill_buffer() terminates buffer with newline */
211 if (*buf_ptr == BACKSLASH) {
212 if (*(buf_ptr + 1) == '\n') {
213 buf_ptr += 2;
214 if (buf_ptr >= buf_end)
215 fill_buffer();
216 } else
217 break;
219 CHECK_SIZE_TOKEN;
220 /* copy it over */
221 *e_token++ = *buf_ptr++;
222 if (buf_ptr >= buf_end)
223 fill_buffer();
225 *e_token++ = '\0';
226 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
227 if (++buf_ptr >= buf_end)
228 fill_buffer();
230 ps.its_a_keyword = false;
231 ps.sizeof_keyword = false;
232 if (l_struct && !ps.p_l_follow) {
233 /* if last token was 'struct' and we're not
234 * in parentheses, then this token
235 * should be treated as a declaration */
236 l_struct = false;
237 last_code = ident;
238 ps.last_u_d = true;
239 return (decl);
241 ps.last_u_d = l_struct; /* Operator after identifier is binary
242 * unless last token was 'struct' */
243 l_struct = false;
244 last_code = ident; /* Remember that this is the code we will
245 * return */
247 if (auto_typedefs) {
248 const char *q = s_token;
249 size_t q_len = strlen(q);
250 /* Check if we have an "_t" in the end */
251 if (q_len > 2 &&
252 (strcmp(q + q_len - 2, "_t") == 0)) {
253 ps.its_a_keyword = true;
254 ps.last_u_d = true;
255 goto found_auto_typedef;
260 * This loop will check if the token is a keyword.
262 for (p = specials; (j = p->rwd) != NULL; p++) {
263 const char *q = s_token; /* point at scanned token */
264 if (*j++ != *q++ || *j++ != *q++)
265 continue; /* This test depends on the fact that
266 * identifiers are always at least 1 character
267 * long (ie. the first two bytes of the
268 * identifier are always meaningful) */
269 if (q[-1] == 0)
270 break; /* If its a one-character identifier */
271 while (*q++ == *j)
272 if (*j++ == 0)
273 goto found_keyword; /* I wish that C had a multi-level
274 * break... */
276 if (p->rwd) { /* we have a keyword */
277 found_keyword:
278 ps.its_a_keyword = true;
279 ps.last_u_d = true;
280 switch (p->rwcode) {
281 case 1: /* it is a switch */
282 return (swstmt);
283 case 2: /* a case or default */
284 return (casestmt);
286 case 3: /* a "struct" */
288 * Next time around, we will want to know that we have had a
289 * 'struct'
291 l_struct = true;
292 /* FALLTHROUGH */
294 case 4: /* one of the declaration keywords */
295 found_auto_typedef:
296 if (ps.p_l_follow) {
297 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
298 break; /* inside parens: cast, param list or sizeof */
300 last_code = decl;
301 return (decl);
303 case 5: /* if, while, for */
304 return (sp_paren);
306 case 6: /* do, else */
307 return (sp_nparen);
309 case 7:
310 ps.sizeof_keyword = true;
311 default: /* all others are treated like any other
312 * identifier */
313 return (ident);
314 } /* end of switch */
315 } /* end of if (found_it) */
316 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
317 char *tp = buf_ptr;
318 while (tp < buf_end)
319 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
320 goto not_proc;
321 strncpy(ps.procname, token, sizeof ps.procname - 1);
322 ps.in_parameter_declaration = 1;
323 rparen_count = 1;
324 not_proc:;
327 * The following hack attempts to guess whether or not the current
328 * token is in fact a declaration keyword -- one that has been
329 * typedefd
331 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
332 && !ps.p_l_follow
333 && !ps.block_init
334 && (ps.last_token == rparen || ps.last_token == semicolon ||
335 ps.last_token == decl ||
336 ps.last_token == lbrace || ps.last_token == rbrace)) {
337 ps.its_a_keyword = true;
338 ps.last_u_d = true;
339 last_code = decl;
340 return decl;
342 if (last_code == decl) /* if this is a declared variable, then
343 * following sign is unary */
344 ps.last_u_d = true; /* will make "int a -1" work */
345 last_code = ident;
346 return (ident); /* the ident is not in the list */
347 } /* end of procesing for alpanum character */
349 /* Scan a non-alphanumeric token */
351 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
352 * moved here */
353 *e_token = '\0';
354 if (++buf_ptr >= buf_end)
355 fill_buffer();
357 switch (*token) {
358 case '\n':
359 unary_delim = ps.last_u_d;
360 ps.last_nl = true; /* remember that we just had a newline */
361 code = (had_eof ? 0 : newline);
364 * if data has been exhausted, the newline is a dummy, and we should
365 * return code to stop
367 break;
369 case '\'': /* start of quoted character */
370 case '"': /* start of string */
371 qchar = *token;
372 if (troff) {
373 e_token[-1] = '`';
374 if (qchar == '"')
375 *e_token++ = '`';
376 e_token = chfont(&bodyf, &stringf, e_token);
378 do { /* copy the string */
379 while (1) { /* move one character or [/<char>]<char> */
380 if (*buf_ptr == '\n') {
381 diag2(1, "Unterminated literal");
382 goto stop_lit;
384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
385 * since CHECK_SIZE guarantees that there
386 * are at least 5 entries left */
387 *e_token = *buf_ptr++;
388 if (buf_ptr >= buf_end)
389 fill_buffer();
390 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
391 if (*buf_ptr == '\n') /* check for escaped newline */
392 ++line_no;
393 if (troff) {
394 *++e_token = BACKSLASH;
395 if (*buf_ptr == BACKSLASH)
396 *++e_token = BACKSLASH;
398 *++e_token = *buf_ptr++;
399 ++e_token; /* we must increment this again because we
400 * copied two chars */
401 if (buf_ptr >= buf_end)
402 fill_buffer();
404 else
405 break; /* we copied one character */
406 } /* end of while (1) */
407 } while (*e_token++ != qchar);
408 if (troff) {
409 e_token = chfont(&stringf, &bodyf, e_token - 1);
410 if (qchar == '"')
411 *e_token++ = '\'';
413 stop_lit:
414 code = ident;
415 break;
417 case ('('):
418 case ('['):
419 unary_delim = true;
420 code = lparen;
421 break;
423 case (')'):
424 case (']'):
425 code = rparen;
426 break;
428 case '#':
429 unary_delim = ps.last_u_d;
430 code = preesc;
431 break;
433 case '?':
434 unary_delim = true;
435 code = question;
436 break;
438 case (':'):
439 code = colon;
440 unary_delim = true;
441 break;
443 case (';'):
444 unary_delim = true;
445 code = semicolon;
446 break;
448 case ('{'):
449 unary_delim = true;
452 * if (ps.in_or_st) ps.block_init = 1;
454 /* ? code = ps.block_init ? lparen : lbrace; */
455 code = lbrace;
456 break;
458 case ('}'):
459 unary_delim = true;
460 /* ? code = ps.block_init ? rparen : rbrace; */
461 code = rbrace;
462 break;
464 case 014: /* a form feed */
465 unary_delim = ps.last_u_d;
466 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
467 * right */
468 code = form_feed;
469 break;
471 case (','):
472 unary_delim = true;
473 code = comma;
474 break;
476 case '.':
477 unary_delim = false;
478 code = period;
479 break;
481 case '-':
482 case '+': /* check for -, +, --, ++ */
483 code = (ps.last_u_d ? unary_op : binary_op);
484 unary_delim = true;
486 if (*buf_ptr == token[0]) {
487 /* check for doubled character */
488 *e_token++ = *buf_ptr++;
489 /* buffer overflow will be checked at end of loop */
490 if (last_code == ident || last_code == rparen) {
491 code = (ps.last_u_d ? unary_op : postop);
492 /* check for following ++ or -- */
493 unary_delim = false;
496 else if (*buf_ptr == '=')
497 /* check for operator += */
498 *e_token++ = *buf_ptr++;
499 else if (*buf_ptr == '>') {
500 /* check for operator -> */
501 *e_token++ = *buf_ptr++;
502 if (!pointer_as_binop) {
503 unary_delim = false;
504 code = unary_op;
505 ps.want_blank = false;
508 break; /* buffer overflow will be checked at end of
509 * switch */
511 case '=':
512 if (ps.in_or_st)
513 ps.block_init = 1;
514 #ifdef undef
515 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
516 e_token[-1] = *buf_ptr++;
517 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
518 *e_token++ = *buf_ptr++;
519 *e_token++ = '='; /* Flip =+ to += */
520 *e_token = 0;
522 #else
523 if (*buf_ptr == '=') {/* == */
524 *e_token++ = '='; /* Flip =+ to += */
525 buf_ptr++;
526 *e_token = 0;
528 #endif
529 code = binary_op;
530 unary_delim = true;
531 break;
532 /* can drop thru!!! */
534 case '>':
535 case '<':
536 case '!': /* ops like <, <<, <=, !=, etc */
537 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
538 *e_token++ = *buf_ptr;
539 if (++buf_ptr >= buf_end)
540 fill_buffer();
542 if (*buf_ptr == '=')
543 *e_token++ = *buf_ptr++;
544 code = (ps.last_u_d ? unary_op : binary_op);
545 unary_delim = true;
546 break;
548 default:
549 if (token[0] == '/' && *buf_ptr == '*') {
550 /* it is start of comment */
551 *e_token++ = '*';
553 if (++buf_ptr >= buf_end)
554 fill_buffer();
556 code = comment;
557 unary_delim = ps.last_u_d;
558 break;
560 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
562 * handle ||, &&, etc, and also things as in int *****i
564 *e_token++ = *buf_ptr;
565 if (++buf_ptr >= buf_end)
566 fill_buffer();
568 code = (ps.last_u_d ? unary_op : binary_op);
569 unary_delim = true;
572 } /* end of switch */
573 if (code != newline) {
574 l_struct = false;
575 last_code = code;
577 if (buf_ptr >= buf_end) /* check for input buffer empty */
578 fill_buffer();
579 ps.last_u_d = unary_delim;
580 *e_token = '\0'; /* null terminate the token */
581 return (code);
585 * Add the given keyword to the keyword table, using val as the keyword type
587 void
588 addkey(char *key, int val)
590 struct templ *p = specials;
591 while (p->rwd)
592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
593 return;
594 else
595 p++;
596 if (p >= specials + sizeof specials / sizeof specials[0])
597 return; /* For now, table overflows are silently
598 * ignored */
599 p->rwd = key;
600 p->rwcode = val;
601 p[1].rwd = NULL;
602 p[1].rwcode = 0;