mountd(8): Replace malloc+strcpy with strdup/strndup
[dragonfly.git] / usr.bin / indent / lexi.c
blob1cd29f27fac757ade0dd3f8bb23e89cc119c8ed7
1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
31 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
32 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $
36 * Here we have the token scanner for indent. It scans off one token and puts
37 * it in the global variable "token". It returns a code, indicating the type
38 * of token scanned.
41 #include <err.h>
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include "indent_globs.h"
47 #include "indent_codes.h"
48 #include "indent.h"
50 #define alphanum 1
51 #define opchar 3
53 struct templ {
54 const char *rwd;
55 int rwcode;
58 struct templ specials[1000] =
60 {"switch", 1},
61 {"case", 2},
62 {"break", 0},
63 {"struct", 3},
64 {"union", 3},
65 {"enum", 3},
66 {"default", 2},
67 {"int", 4},
68 {"char", 4},
69 {"float", 4},
70 {"double", 4},
71 {"long", 4},
72 {"short", 4},
73 {"typdef", 4},
74 {"unsigned", 4},
75 {"register", 4},
76 {"static", 4},
77 {"global", 4},
78 {"extern", 4},
79 {"void", 4},
80 {"const", 4},
81 {"volatile", 4},
82 {"goto", 0},
83 {"return", 0},
84 {"if", 5},
85 {"while", 5},
86 {"for", 5},
87 {"else", 6},
88 {"do", 6},
89 {"sizeof", 7},
90 {0, 0}
93 char chartype[128] =
94 { /* this is used to facilitate the decision of
95 * what type (alphanumeric, operator) each
96 * character is */
97 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 3, 0, 0, 1, 3, 3, 0,
102 0, 0, 3, 3, 0, 3, 0, 3,
103 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 0, 0, 3, 3, 3, 3,
105 0, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 0, 0, 0, 3, 1,
109 0, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 0, 3, 0, 3, 0
116 lexi(void)
118 int unary_delim; /* this is set to 1 if the current token
119 * forces a following operator to be unary */
120 static int last_code; /* the last token type returned */
121 static int l_struct; /* set to 1 if the last token was 'struct' */
122 int code; /* internal code to be returned */
123 char qchar; /* the delimiter character for a string */
125 e_token = s_token; /* point to start of place to save token */
126 unary_delim = false;
127 ps.col_1 = ps.last_nl; /* tell world that this token started in
128 * column 1 iff the last thing scanned was nl */
129 ps.last_nl = false;
131 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
132 ps.col_1 = false; /* leading blanks imply token is not in column
133 * 1 */
134 if (++buf_ptr >= buf_end)
135 fill_buffer();
138 /* Scan an alphanumeric token */
139 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
141 * we have a character or number
143 const char *j; /* used for searching thru list of
145 * reserved words */
146 struct templ *p;
148 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
149 int seendot = 0,
150 seenexp = 0,
151 seensfx = 0;
152 if (*buf_ptr == '0' &&
153 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
154 *e_token++ = *buf_ptr++;
155 *e_token++ = *buf_ptr++;
156 while (isxdigit(*buf_ptr)) {
157 CHECK_SIZE_TOKEN;
158 *e_token++ = *buf_ptr++;
161 else
162 while (1) {
163 if (*buf_ptr == '.') {
164 if (seendot)
165 break;
166 else
167 seendot++;
169 CHECK_SIZE_TOKEN;
170 *e_token++ = *buf_ptr++;
171 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
172 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
173 break;
174 else {
175 seenexp++;
176 seendot++;
177 CHECK_SIZE_TOKEN;
178 *e_token++ = *buf_ptr++;
179 if (*buf_ptr == '+' || *buf_ptr == '-')
180 *e_token++ = *buf_ptr++;
184 while (1) {
185 if (!(seensfx & 1) &&
186 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
187 CHECK_SIZE_TOKEN;
188 *e_token++ = *buf_ptr++;
189 seensfx |= 1;
190 continue;
192 if (!(seensfx & 2) &&
193 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
194 CHECK_SIZE_TOKEN;
195 if (buf_ptr[1] == buf_ptr[0])
196 *e_token++ = *buf_ptr++;
197 *e_token++ = *buf_ptr++;
198 seensfx |= 2;
199 continue;
201 break;
204 else
205 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
206 /* fill_buffer() terminates buffer with newline */
207 if (*buf_ptr == BACKSLASH) {
208 if (*(buf_ptr + 1) == '\n') {
209 buf_ptr += 2;
210 if (buf_ptr >= buf_end)
211 fill_buffer();
212 } else
213 break;
215 CHECK_SIZE_TOKEN;
216 /* copy it over */
217 *e_token++ = *buf_ptr++;
218 if (buf_ptr >= buf_end)
219 fill_buffer();
221 *e_token++ = '\0';
222 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
223 if (++buf_ptr >= buf_end)
224 fill_buffer();
226 ps.its_a_keyword = false;
227 ps.sizeof_keyword = false;
228 if (l_struct && !ps.p_l_follow) {
229 /* if last token was 'struct' and we're not
230 * in parentheses, then this token
231 * should be treated as a declaration */
232 l_struct = false;
233 last_code = ident;
234 ps.last_u_d = true;
235 return (decl);
237 ps.last_u_d = l_struct; /* Operator after identifier is binary
238 * unless last token was 'struct' */
239 l_struct = false;
240 last_code = ident; /* Remember that this is the code we will
241 * return */
243 if (auto_typedefs) {
244 const char *q = s_token;
245 size_t q_len = strlen(q);
246 /* Check if we have an "_t" in the end */
247 if (q_len > 2 &&
248 (strcmp(q + q_len - 2, "_t") == 0)) {
249 ps.its_a_keyword = true;
250 ps.last_u_d = true;
251 goto found_auto_typedef;
256 * This loop will check if the token is a keyword.
258 for (p = specials; (j = p->rwd) != NULL; p++) {
259 const char *q = s_token; /* point at scanned token */
260 if (*j++ != *q++ || *j++ != *q++)
261 continue; /* This test depends on the fact that
262 * identifiers are always at least 1 character
263 * long (ie. the first two bytes of the
264 * identifier are always meaningful) */
265 if (q[-1] == 0)
266 break; /* If its a one-character identifier */
267 while (*q++ == *j)
268 if (*j++ == 0)
269 goto found_keyword; /* I wish that C had a multi-level
270 * break... */
272 if (p->rwd) { /* we have a keyword */
273 found_keyword:
274 ps.its_a_keyword = true;
275 ps.last_u_d = true;
276 switch (p->rwcode) {
277 case 1: /* it is a switch */
278 return (swstmt);
279 case 2: /* a case or default */
280 return (casestmt);
282 case 3: /* a "struct" */
284 * Next time around, we will want to know that we have had a
285 * 'struct'
287 l_struct = true;
288 /* FALLTHROUGH */
290 case 4: /* one of the declaration keywords */
291 found_auto_typedef:
292 if (ps.p_l_follow) {
293 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
294 break; /* inside parens: cast, param list or sizeof */
296 last_code = decl;
297 return (decl);
299 case 5: /* if, while, for */
300 return (sp_paren);
302 case 6: /* do, else */
303 return (sp_nparen);
305 case 7:
306 ps.sizeof_keyword = true;
307 default: /* all others are treated like any other
308 * identifier */
309 return (ident);
310 } /* end of switch */
311 } /* end of if (found_it) */
312 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
313 char *tp = buf_ptr;
314 while (tp < buf_end)
315 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
316 goto not_proc;
317 strncpy(ps.procname, token, sizeof ps.procname - 1);
318 ps.in_parameter_declaration = 1;
319 rparen_count = 1;
320 not_proc:;
323 * The following hack attempts to guess whether or not the current
324 * token is in fact a declaration keyword -- one that has been
325 * typedefd
327 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
328 && !ps.p_l_follow
329 && !ps.block_init
330 && (ps.last_token == rparen || ps.last_token == semicolon ||
331 ps.last_token == decl ||
332 ps.last_token == lbrace || ps.last_token == rbrace)) {
333 ps.its_a_keyword = true;
334 ps.last_u_d = true;
335 last_code = decl;
336 return decl;
338 if (last_code == decl) /* if this is a declared variable, then
339 * following sign is unary */
340 ps.last_u_d = true; /* will make "int a -1" work */
341 last_code = ident;
342 return (ident); /* the ident is not in the list */
343 } /* end of procesing for alpanum character */
345 /* Scan a non-alphanumeric token */
347 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
348 * moved here */
349 *e_token = '\0';
350 if (++buf_ptr >= buf_end)
351 fill_buffer();
353 switch (*token) {
354 case '\n':
355 unary_delim = ps.last_u_d;
356 ps.last_nl = true; /* remember that we just had a newline */
357 code = (had_eof ? 0 : newline);
360 * if data has been exhausted, the newline is a dummy, and we should
361 * return code to stop
363 break;
365 case '\'': /* start of quoted character */
366 case '"': /* start of string */
367 qchar = *token;
368 if (troff) {
369 e_token[-1] = '`';
370 if (qchar == '"')
371 *e_token++ = '`';
372 e_token = chfont(&bodyf, &stringf, e_token);
374 do { /* copy the string */
375 while (1) { /* move one character or [/<char>]<char> */
376 if (*buf_ptr == '\n') {
377 diag2(1, "Unterminated literal");
378 goto stop_lit;
380 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
381 * since CHECK_SIZE guarantees that there
382 * are at least 5 entries left */
383 *e_token = *buf_ptr++;
384 if (buf_ptr >= buf_end)
385 fill_buffer();
386 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
387 if (*buf_ptr == '\n') /* check for escaped newline */
388 ++line_no;
389 if (troff) {
390 *++e_token = BACKSLASH;
391 if (*buf_ptr == BACKSLASH)
392 *++e_token = BACKSLASH;
394 *++e_token = *buf_ptr++;
395 ++e_token; /* we must increment this again because we
396 * copied two chars */
397 if (buf_ptr >= buf_end)
398 fill_buffer();
400 else
401 break; /* we copied one character */
402 } /* end of while (1) */
403 } while (*e_token++ != qchar);
404 if (troff) {
405 e_token = chfont(&stringf, &bodyf, e_token - 1);
406 if (qchar == '"')
407 *e_token++ = '\'';
409 stop_lit:
410 code = ident;
411 break;
413 case ('('):
414 case ('['):
415 unary_delim = true;
416 code = lparen;
417 break;
419 case (')'):
420 case (']'):
421 code = rparen;
422 break;
424 case '#':
425 unary_delim = ps.last_u_d;
426 code = preesc;
427 break;
429 case '?':
430 unary_delim = true;
431 code = question;
432 break;
434 case (':'):
435 code = colon;
436 unary_delim = true;
437 break;
439 case (';'):
440 unary_delim = true;
441 code = semicolon;
442 break;
444 case ('{'):
445 unary_delim = true;
448 * if (ps.in_or_st) ps.block_init = 1;
450 /* ? code = ps.block_init ? lparen : lbrace; */
451 code = lbrace;
452 break;
454 case ('}'):
455 unary_delim = true;
456 /* ? code = ps.block_init ? rparen : rbrace; */
457 code = rbrace;
458 break;
460 case 014: /* a form feed */
461 unary_delim = ps.last_u_d;
462 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
463 * right */
464 code = form_feed;
465 break;
467 case (','):
468 unary_delim = true;
469 code = comma;
470 break;
472 case '.':
473 unary_delim = false;
474 code = period;
475 break;
477 case '-':
478 case '+': /* check for -, +, --, ++ */
479 code = (ps.last_u_d ? unary_op : binary_op);
480 unary_delim = true;
482 if (*buf_ptr == token[0]) {
483 /* check for doubled character */
484 *e_token++ = *buf_ptr++;
485 /* buffer overflow will be checked at end of loop */
486 if (last_code == ident || last_code == rparen) {
487 code = (ps.last_u_d ? unary_op : postop);
488 /* check for following ++ or -- */
489 unary_delim = false;
492 else if (*buf_ptr == '=')
493 /* check for operator += */
494 *e_token++ = *buf_ptr++;
495 else if (*buf_ptr == '>') {
496 /* check for operator -> */
497 *e_token++ = *buf_ptr++;
498 if (!pointer_as_binop) {
499 unary_delim = false;
500 code = unary_op;
501 ps.want_blank = false;
504 break; /* buffer overflow will be checked at end of
505 * switch */
507 case '=':
508 if (ps.in_or_st)
509 ps.block_init = 1;
510 #ifdef undef
511 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
512 e_token[-1] = *buf_ptr++;
513 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
514 *e_token++ = *buf_ptr++;
515 *e_token++ = '='; /* Flip =+ to += */
516 *e_token = 0;
518 #else
519 if (*buf_ptr == '=') {/* == */
520 *e_token++ = '='; /* Flip =+ to += */
521 buf_ptr++;
522 *e_token = 0;
524 #endif
525 code = binary_op;
526 unary_delim = true;
527 break;
528 /* can drop thru!!! */
530 case '>':
531 case '<':
532 case '!': /* ops like <, <<, <=, !=, etc */
533 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
534 *e_token++ = *buf_ptr;
535 if (++buf_ptr >= buf_end)
536 fill_buffer();
538 if (*buf_ptr == '=')
539 *e_token++ = *buf_ptr++;
540 code = (ps.last_u_d ? unary_op : binary_op);
541 unary_delim = true;
542 break;
544 default:
545 if (token[0] == '/' && *buf_ptr == '*') {
546 /* it is start of comment */
547 *e_token++ = '*';
549 if (++buf_ptr >= buf_end)
550 fill_buffer();
552 code = comment;
553 unary_delim = ps.last_u_d;
554 break;
556 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
558 * handle ||, &&, etc, and also things as in int *****i
560 *e_token++ = *buf_ptr;
561 if (++buf_ptr >= buf_end)
562 fill_buffer();
564 code = (ps.last_u_d ? unary_op : binary_op);
565 unary_delim = true;
568 } /* end of switch */
569 if (code != newline) {
570 l_struct = false;
571 last_code = code;
573 if (buf_ptr >= buf_end) /* check for input buffer empty */
574 fill_buffer();
575 ps.last_u_d = unary_delim;
576 *e_token = '\0'; /* null terminate the token */
577 return (code);
581 * Add the given keyword to the keyword table, using val as the keyword type
583 void
584 addkey(char *key, int val)
586 struct templ *p = specials;
587 while (p->rwd)
588 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
589 return;
590 else
591 p++;
592 if (p >= specials + sizeof specials / sizeof specials[0])
593 return; /* For now, table overflows are silently
594 * ignored */
595 p->rwd = key;
596 p->rwcode = val;
597 p[1].rwd = NULL;
598 p[1].rwcode = 0;