MFC:
[dragonfly.git] / usr.bin / indent / lexi.c
blobcd229c2c23a56f0cdc5ff95ffe07559944e19302
1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.3.6.3 2001/12/06 19:28:47 schweikh Exp $
37 * $DragonFly: src/usr.bin/indent/lexi.c,v 1.3 2005/04/10 20:55:38 drhodus Exp $
40 #if 0
41 #endif
44 * Here we have the token scanner for indent. It scans off one token and puts
45 * it in the global variable "token". It returns a code, indicating the type
46 * of token scanned.
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include "indent_globs.h"
54 #include "indent_codes.h"
56 #define alphanum 1
57 #define opchar 3
59 void fill_buffer(void);
61 struct templ {
62 char *rwd;
63 int rwcode;
66 struct templ specials[1000] =
68 {"switch", 1},
69 {"case", 2},
70 {"break", 0},
71 {"struct", 3},
72 {"union", 3},
73 {"enum", 3},
74 {"default", 2},
75 {"int", 4},
76 {"char", 4},
77 {"float", 4},
78 {"double", 4},
79 {"long", 4},
80 {"short", 4},
81 {"typdef", 4},
82 {"unsigned", 4},
83 {"register", 4},
84 {"static", 4},
85 {"global", 4},
86 {"extern", 4},
87 {"void", 4},
88 {"goto", 0},
89 {"return", 0},
90 {"if", 5},
91 {"while", 5},
92 {"for", 5},
93 {"else", 6},
94 {"do", 6},
95 {"sizeof", 7},
96 {"const", 9},
97 {"volatile", 9},
98 {0, 0}
101 char chartype[128] =
102 { /* this is used to facilitate the decision of
103 * what type (alphanumeric, operator) each
104 * character is */
105 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 3, 0, 0, 1, 3, 3, 0,
110 0, 0, 3, 3, 0, 3, 0, 3,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 0, 0, 3, 3, 3, 3,
113 0, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 0, 0, 0, 3, 1,
117 0, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 0, 3, 0, 3, 0
124 lexi(void)
126 int unary_delim; /* this is set to 1 if the current token
127 * forces a following operator to be unary */
128 static int last_code; /* the last token type returned */
129 static int l_struct; /* set to 1 if the last token was 'struct' */
130 int code; /* internal code to be returned */
131 char qchar; /* the delimiter character for a string */
133 e_token = s_token; /* point to start of place to save token */
134 unary_delim = false;
135 ps.col_1 = ps.last_nl; /* tell world that this token started in
136 * column 1 iff the last thing scanned was nl */
137 ps.last_nl = false;
139 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
140 ps.col_1 = false; /* leading blanks imply token is not in column
141 * 1 */
142 if (++buf_ptr >= buf_end)
143 fill_buffer();
146 /* Scan an alphanumeric token */
147 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
149 * we have a character or number
151 char *j; /* used for searching thru list of
153 * reserved words */
154 struct templ *p;
156 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
157 int seendot = 0,
158 seenexp = 0,
159 seensfx = 0;
160 if (*buf_ptr == '0' &&
161 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
162 *e_token++ = *buf_ptr++;
163 *e_token++ = *buf_ptr++;
164 while (isxdigit(*buf_ptr)) {
165 CHECK_SIZE_TOKEN;
166 *e_token++ = *buf_ptr++;
169 else
170 while (1) {
171 if (*buf_ptr == '.') {
172 if (seendot)
173 break;
174 else
175 seendot++;
177 CHECK_SIZE_TOKEN;
178 *e_token++ = *buf_ptr++;
179 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
180 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
181 break;
182 else {
183 seenexp++;
184 seendot++;
185 CHECK_SIZE_TOKEN;
186 *e_token++ = *buf_ptr++;
187 if (*buf_ptr == '+' || *buf_ptr == '-')
188 *e_token++ = *buf_ptr++;
192 while (1) {
193 if (!(seensfx & 1) &&
194 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
195 CHECK_SIZE_TOKEN;
196 *e_token++ = *buf_ptr++;
197 seensfx |= 1;
198 continue;
200 if (!(seensfx & 2) &&
201 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
202 CHECK_SIZE_TOKEN;
203 if (buf_ptr[1] == buf_ptr[0])
204 *e_token++ = *buf_ptr++;
205 *e_token++ = *buf_ptr++;
206 seensfx |= 2;
207 continue;
209 break;
212 else
213 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
214 /* fill_buffer() terminates buffer with newline */
215 if (*buf_ptr == BACKSLASH) {
216 if (*(buf_ptr + 1) == '\n') {
217 buf_ptr += 2;
218 if (buf_ptr >= buf_end)
219 fill_buffer();
220 } else
221 break;
223 CHECK_SIZE_TOKEN;
224 /* copy it over */
225 *e_token++ = *buf_ptr++;
226 if (buf_ptr >= buf_end)
227 fill_buffer();
229 *e_token++ = '\0';
230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231 if (++buf_ptr >= buf_end)
232 fill_buffer();
234 ps.its_a_keyword = false;
235 ps.sizeof_keyword = false;
236 if (l_struct) { /* if last token was 'struct', then this token
237 * should be treated as a declaration */
238 l_struct = false;
239 last_code = ident;
240 ps.last_u_d = true;
241 return (decl);
243 ps.last_u_d = false; /* Operator after indentifier is binary */
244 last_code = ident; /* Remember that this is the code we will
245 * return */
248 * This loop will check if the token is a keyword.
250 for (p = specials; (j = p->rwd) != 0; p++) {
251 char *p = s_token; /* point at scanned token */
252 if (*j++ != *p++ || *j++ != *p++)
253 continue; /* This test depends on the fact that
254 * identifiers are always at least 1 character
255 * long (ie. the first two bytes of the
256 * identifier are always meaningful) */
257 if (p[-1] == 0)
258 break; /* If its a one-character identifier */
259 while (*p++ == *j)
260 if (*j++ == 0)
261 goto found_keyword; /* I wish that C had a multi-level
262 * break... */
264 if (p->rwd) { /* we have a keyword */
265 found_keyword:
266 ps.its_a_keyword = true;
267 ps.last_u_d = true;
268 switch (p->rwcode) {
269 case 1: /* it is a switch */
270 return (swstmt);
271 case 2: /* a case or default */
272 return (casestmt);
274 case 3: /* a "struct" */
275 if (ps.p_l_follow)
276 break; /* inside parens: cast */
278 * Next time around, we may want to know that we have had a
279 * 'struct'
281 l_struct = true;
284 * Fall through to test for a cast, function prototype or
285 * sizeof().
287 case 4: /* one of the declaration keywords */
288 if (ps.p_l_follow) {
289 ps.cast_mask |= 1 << ps.p_l_follow;
292 * Forget that we saw `struct' if we're in a sizeof().
294 if (ps.sizeof_mask)
295 l_struct = false;
297 break; /* inside parens: cast, prototype or sizeof() */
299 last_code = decl;
300 return (decl);
302 case 5: /* if, while, for */
303 return (sp_paren);
305 case 6: /* do, else */
306 return (sp_nparen);
308 case 7:
309 ps.sizeof_keyword = true;
310 default: /* all others are treated like any other
311 * identifier */
312 return (ident);
313 } /* end of switch */
314 } /* end of if (found_it) */
315 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
316 char *tp = buf_ptr;
317 while (tp < buf_end)
318 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
319 goto not_proc;
320 strncpy(ps.procname, token, sizeof ps.procname - 1);
321 ps.in_parameter_declaration = 1;
322 rparen_count = 1;
323 not_proc:;
326 * The following hack attempts to guess whether or not the current
327 * token is in fact a declaration keyword -- one that has been
328 * typedefd
330 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
331 && !ps.p_l_follow
332 && !ps.block_init
333 && (ps.last_token == rparen || ps.last_token == semicolon ||
334 ps.last_token == decl ||
335 ps.last_token == lbrace || ps.last_token == rbrace)) {
336 ps.its_a_keyword = true;
337 ps.last_u_d = true;
338 last_code = decl;
339 return decl;
341 if (last_code == decl) /* if this is a declared variable, then
342 * following sign is unary */
343 ps.last_u_d = true; /* will make "int a -1" work */
344 last_code = ident;
345 return (ident); /* the ident is not in the list */
346 } /* end of procesing for alpanum character */
348 /* Scan a non-alphanumeric token */
350 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
351 * moved here */
352 *e_token = '\0';
353 if (++buf_ptr >= buf_end)
354 fill_buffer();
356 switch (*token) {
357 case '\n':
358 unary_delim = ps.last_u_d;
359 ps.last_nl = true; /* remember that we just had a newline */
360 code = (had_eof ? 0 : newline);
363 * if data has been exausted, the newline is a dummy, and we should
364 * return code to stop
366 break;
368 case '\'': /* start of quoted character */
369 case '"': /* start of string */
370 qchar = *token;
371 if (troff) {
372 e_token[-1] = '`';
373 if (qchar == '"')
374 *e_token++ = '`';
375 e_token = chfont(&bodyf, &stringf, e_token);
377 do { /* copy the string */
378 while (1) { /* move one character or [/<char>]<char> */
379 if (*buf_ptr == '\n') {
380 printf("%d: Unterminated literal\n", line_no);
381 goto stop_lit;
383 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
384 * since CHECK_SIZE guarantees that there
385 * are at least 5 entries left */
386 *e_token = *buf_ptr++;
387 if (buf_ptr >= buf_end)
388 fill_buffer();
389 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
390 if (*buf_ptr == '\n') /* check for escaped newline */
391 ++line_no;
392 if (troff) {
393 *++e_token = BACKSLASH;
394 if (*buf_ptr == BACKSLASH)
395 *++e_token = BACKSLASH;
397 *++e_token = *buf_ptr++;
398 ++e_token; /* we must increment this again because we
399 * copied two chars */
400 if (buf_ptr >= buf_end)
401 fill_buffer();
403 else
404 break; /* we copied one character */
405 } /* end of while (1) */
406 } while (*e_token++ != qchar);
407 if (troff) {
408 e_token = chfont(&stringf, &bodyf, e_token - 1);
409 if (qchar == '"')
410 *e_token++ = '\'';
412 stop_lit:
413 code = ident;
414 break;
416 case ('('):
417 case ('['):
418 unary_delim = true;
419 code = lparen;
420 break;
422 case (')'):
423 case (']'):
424 code = rparen;
425 break;
427 case '#':
428 unary_delim = ps.last_u_d;
429 code = preesc;
430 break;
432 case '?':
433 unary_delim = true;
434 code = question;
435 break;
437 case (':'):
438 code = colon;
439 unary_delim = true;
440 break;
442 case (';'):
443 unary_delim = true;
444 code = semicolon;
445 break;
447 case ('{'):
448 unary_delim = true;
451 * if (ps.in_or_st) ps.block_init = 1;
453 /* ? code = ps.block_init ? lparen : lbrace; */
454 code = lbrace;
455 break;
457 case ('}'):
458 unary_delim = true;
459 /* ? code = ps.block_init ? rparen : rbrace; */
460 code = rbrace;
461 break;
463 case 014: /* a form feed */
464 unary_delim = ps.last_u_d;
465 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
466 * right */
467 code = form_feed;
468 break;
470 case (','):
471 unary_delim = true;
472 code = comma;
473 break;
475 case '.':
476 unary_delim = false;
477 code = period;
478 break;
480 case '-':
481 case '+': /* check for -, +, --, ++ */
482 code = (ps.last_u_d ? unary_op : binary_op);
483 unary_delim = true;
485 if (*buf_ptr == token[0]) {
486 /* check for doubled character */
487 *e_token++ = *buf_ptr++;
488 /* buffer overflow will be checked at end of loop */
489 if (last_code == ident || last_code == rparen) {
490 code = (ps.last_u_d ? unary_op : postop);
491 /* check for following ++ or -- */
492 unary_delim = false;
495 else if (*buf_ptr == '=')
496 /* check for operator += */
497 *e_token++ = *buf_ptr++;
498 else if (*buf_ptr == '>') {
499 /* check for operator -> */
500 *e_token++ = *buf_ptr++;
501 if (!pointer_as_binop) {
502 unary_delim = false;
503 code = unary_op;
504 ps.want_blank = false;
507 break; /* buffer overflow will be checked at end of
508 * switch */
510 case '=':
511 if (ps.in_or_st)
512 ps.block_init = 1;
513 #ifdef undef
514 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
515 e_token[-1] = *buf_ptr++;
516 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
517 *e_token++ = *buf_ptr++;
518 *e_token++ = '='; /* Flip =+ to += */
519 *e_token = 0;
521 #else
522 if (*buf_ptr == '=') {/* == */
523 *e_token++ = '='; /* Flip =+ to += */
524 buf_ptr++;
525 *e_token = 0;
527 #endif
528 code = binary_op;
529 unary_delim = true;
530 break;
531 /* can drop thru!!! */
533 case '>':
534 case '<':
535 case '!': /* ops like <, <<, <=, !=, etc */
536 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
537 *e_token++ = *buf_ptr;
538 if (++buf_ptr >= buf_end)
539 fill_buffer();
541 if (*buf_ptr == '=')
542 *e_token++ = *buf_ptr++;
543 code = (ps.last_u_d ? unary_op : binary_op);
544 unary_delim = true;
545 break;
547 default:
548 if (token[0] == '/' && *buf_ptr == '*') {
549 /* it is start of comment */
550 *e_token++ = '*';
552 if (++buf_ptr >= buf_end)
553 fill_buffer();
555 code = comment;
556 unary_delim = ps.last_u_d;
557 break;
559 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
561 * handle ||, &&, etc, and also things as in int *****i
563 *e_token++ = *buf_ptr;
564 if (++buf_ptr >= buf_end)
565 fill_buffer();
567 code = (ps.last_u_d ? unary_op : binary_op);
568 unary_delim = true;
571 } /* end of switch */
572 if (code != newline) {
573 l_struct = false;
574 last_code = code;
576 if (buf_ptr >= buf_end) /* check for input buffer empty */
577 fill_buffer();
578 ps.last_u_d = unary_delim;
579 *e_token = '\0'; /* null terminate the token */
580 return (code);
584 * Add the given keyword to the keyword table, using val as the keyword type
586 void
587 addkey(char *key, int val)
589 struct templ *p = specials;
590 while (p->rwd)
591 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
592 return;
593 else
594 p++;
595 if (p >= specials + sizeof specials / sizeof specials[0])
596 return; /* For now, table overflows are silently
597 * ignored */
598 p->rwd = key;
599 p->rwcode = val;
600 p[1].rwd = 0;
601 p[1].rwcode = 0;