amd64 - add kvtop and add back ed(4) to AMD64_GENERIC
[dragonfly.git] / usr.bin / indent / lexi.c
blobecbfa160171b3557bc617f2fe53b240dd96ebfe8
1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.19 2005/11/20 13:48:15 dds Exp $
37 * $DragonFly: src/usr.bin/indent/lexi.c,v 1.3 2005/04/10 20:55:38 drhodus Exp $
41 * Here we have the token scanner for indent. It scans off one token and puts
42 * it in the global variable "token". It returns a code, indicating the type
43 * of token scanned.
46 #include <err.h>
47 #include <stdio.h>
48 #include <ctype.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include "indent_globs.h"
52 #include "indent_codes.h"
53 #include "indent.h"
55 #define alphanum 1
56 #define opchar 3
58 struct templ {
59 const char *rwd;
60 int rwcode;
63 struct templ specials[1000] =
65 {"switch", 1},
66 {"case", 2},
67 {"break", 0},
68 {"struct", 3},
69 {"union", 3},
70 {"enum", 3},
71 {"default", 2},
72 {"int", 4},
73 {"char", 4},
74 {"float", 4},
75 {"double", 4},
76 {"long", 4},
77 {"short", 4},
78 {"typdef", 4},
79 {"unsigned", 4},
80 {"register", 4},
81 {"static", 4},
82 {"global", 4},
83 {"extern", 4},
84 {"void", 4},
85 {"const", 4},
86 {"volatile", 4},
87 {"goto", 0},
88 {"return", 0},
89 {"if", 5},
90 {"while", 5},
91 {"for", 5},
92 {"else", 6},
93 {"do", 6},
94 {"sizeof", 7},
95 {0, 0}
98 char chartype[128] =
99 { /* this is used to facilitate the decision of
100 * what type (alphanumeric, operator) each
101 * character is */
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 3, 0, 0, 1, 3, 3, 0,
107 0, 0, 3, 3, 0, 3, 0, 3,
108 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 0, 0, 3, 3, 3, 3,
110 0, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 0, 0, 0, 3, 1,
114 0, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 0, 3, 0, 3, 0
121 lexi(void)
123 int unary_delim; /* this is set to 1 if the current token
124 * forces a following operator to be unary */
125 static int last_code; /* the last token type returned */
126 static int l_struct; /* set to 1 if the last token was 'struct' */
127 int code; /* internal code to be returned */
128 char qchar; /* the delimiter character for a string */
130 e_token = s_token; /* point to start of place to save token */
131 unary_delim = false;
132 ps.col_1 = ps.last_nl; /* tell world that this token started in
133 * column 1 iff the last thing scanned was nl */
134 ps.last_nl = false;
136 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
137 ps.col_1 = false; /* leading blanks imply token is not in column
138 * 1 */
139 if (++buf_ptr >= buf_end)
140 fill_buffer();
143 /* Scan an alphanumeric token */
144 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
146 * we have a character or number
148 const char *j; /* used for searching thru list of
150 * reserved words */
151 struct templ *p;
153 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
154 int seendot = 0,
155 seenexp = 0,
156 seensfx = 0;
157 if (*buf_ptr == '0' &&
158 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
159 *e_token++ = *buf_ptr++;
160 *e_token++ = *buf_ptr++;
161 while (isxdigit(*buf_ptr)) {
162 CHECK_SIZE_TOKEN;
163 *e_token++ = *buf_ptr++;
166 else
167 while (1) {
168 if (*buf_ptr == '.') {
169 if (seendot)
170 break;
171 else
172 seendot++;
174 CHECK_SIZE_TOKEN;
175 *e_token++ = *buf_ptr++;
176 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
177 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
178 break;
179 else {
180 seenexp++;
181 seendot++;
182 CHECK_SIZE_TOKEN;
183 *e_token++ = *buf_ptr++;
184 if (*buf_ptr == '+' || *buf_ptr == '-')
185 *e_token++ = *buf_ptr++;
189 while (1) {
190 if (!(seensfx & 1) &&
191 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
192 CHECK_SIZE_TOKEN;
193 *e_token++ = *buf_ptr++;
194 seensfx |= 1;
195 continue;
197 if (!(seensfx & 2) &&
198 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
199 CHECK_SIZE_TOKEN;
200 if (buf_ptr[1] == buf_ptr[0])
201 *e_token++ = *buf_ptr++;
202 *e_token++ = *buf_ptr++;
203 seensfx |= 2;
204 continue;
206 break;
209 else
210 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
211 /* fill_buffer() terminates buffer with newline */
212 if (*buf_ptr == BACKSLASH) {
213 if (*(buf_ptr + 1) == '\n') {
214 buf_ptr += 2;
215 if (buf_ptr >= buf_end)
216 fill_buffer();
217 } else
218 break;
220 CHECK_SIZE_TOKEN;
221 /* copy it over */
222 *e_token++ = *buf_ptr++;
223 if (buf_ptr >= buf_end)
224 fill_buffer();
226 *e_token++ = '\0';
227 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
228 if (++buf_ptr >= buf_end)
229 fill_buffer();
231 ps.its_a_keyword = false;
232 ps.sizeof_keyword = false;
233 if (l_struct && !ps.p_l_follow) {
234 /* if last token was 'struct' and we're not
235 * in parentheses, then this token
236 * should be treated as a declaration */
237 l_struct = false;
238 last_code = ident;
239 ps.last_u_d = true;
240 return (decl);
242 ps.last_u_d = l_struct; /* Operator after identifier is binary
243 * unless last token was 'struct' */
244 l_struct = false;
245 last_code = ident; /* Remember that this is the code we will
246 * return */
249 * This loop will check if the token is a keyword.
251 for (p = specials; (j = p->rwd) != 0; p++) {
252 const char *q = s_token; /* point at scanned token */
253 if (*j++ != *q++ || *j++ != *q++)
254 continue; /* This test depends on the fact that
255 * identifiers are always at least 1 character
256 * long (ie. the first two bytes of the
257 * identifier are always meaningful) */
258 if (q[-1] == 0)
259 break; /* If its a one-character identifier */
260 while (*q++ == *j)
261 if (*j++ == 0)
262 goto found_keyword; /* I wish that C had a multi-level
263 * break... */
265 if (p->rwd) { /* we have a keyword */
266 found_keyword:
267 ps.its_a_keyword = true;
268 ps.last_u_d = true;
269 switch (p->rwcode) {
270 case 1: /* it is a switch */
271 return (swstmt);
272 case 2: /* a case or default */
273 return (casestmt);
275 case 3: /* a "struct" */
277 * Next time around, we will want to know that we have had a
278 * 'struct'
280 l_struct = true;
281 /* FALLTHROUGH */
283 case 4: /* one of the declaration keywords */
284 if (ps.p_l_follow) {
285 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
286 break; /* inside parens: cast, param list or sizeof */
288 last_code = decl;
289 return (decl);
291 case 5: /* if, while, for */
292 return (sp_paren);
294 case 6: /* do, else */
295 return (sp_nparen);
297 case 7:
298 ps.sizeof_keyword = true;
299 default: /* all others are treated like any other
300 * identifier */
301 return (ident);
302 } /* end of switch */
303 } /* end of if (found_it) */
304 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
305 char *tp = buf_ptr;
306 while (tp < buf_end)
307 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
308 goto not_proc;
309 strncpy(ps.procname, token, sizeof ps.procname - 1);
310 ps.in_parameter_declaration = 1;
311 rparen_count = 1;
312 not_proc:;
315 * The following hack attempts to guess whether or not the current
316 * token is in fact a declaration keyword -- one that has been
317 * typedefd
319 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
320 && !ps.p_l_follow
321 && !ps.block_init
322 && (ps.last_token == rparen || ps.last_token == semicolon ||
323 ps.last_token == decl ||
324 ps.last_token == lbrace || ps.last_token == rbrace)) {
325 ps.its_a_keyword = true;
326 ps.last_u_d = true;
327 last_code = decl;
328 return decl;
330 if (last_code == decl) /* if this is a declared variable, then
331 * following sign is unary */
332 ps.last_u_d = true; /* will make "int a -1" work */
333 last_code = ident;
334 return (ident); /* the ident is not in the list */
335 } /* end of procesing for alpanum character */
337 /* Scan a non-alphanumeric token */
339 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
340 * moved here */
341 *e_token = '\0';
342 if (++buf_ptr >= buf_end)
343 fill_buffer();
345 switch (*token) {
346 case '\n':
347 unary_delim = ps.last_u_d;
348 ps.last_nl = true; /* remember that we just had a newline */
349 code = (had_eof ? 0 : newline);
352 * if data has been exhausted, the newline is a dummy, and we should
353 * return code to stop
355 break;
357 case '\'': /* start of quoted character */
358 case '"': /* start of string */
359 qchar = *token;
360 if (troff) {
361 e_token[-1] = '`';
362 if (qchar == '"')
363 *e_token++ = '`';
364 e_token = chfont(&bodyf, &stringf, e_token);
366 do { /* copy the string */
367 while (1) { /* move one character or [/<char>]<char> */
368 if (*buf_ptr == '\n') {
369 diag2(1, "Unterminated literal");
370 goto stop_lit;
372 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
373 * since CHECK_SIZE guarantees that there
374 * are at least 5 entries left */
375 *e_token = *buf_ptr++;
376 if (buf_ptr >= buf_end)
377 fill_buffer();
378 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
379 if (*buf_ptr == '\n') /* check for escaped newline */
380 ++line_no;
381 if (troff) {
382 *++e_token = BACKSLASH;
383 if (*buf_ptr == BACKSLASH)
384 *++e_token = BACKSLASH;
386 *++e_token = *buf_ptr++;
387 ++e_token; /* we must increment this again because we
388 * copied two chars */
389 if (buf_ptr >= buf_end)
390 fill_buffer();
392 else
393 break; /* we copied one character */
394 } /* end of while (1) */
395 } while (*e_token++ != qchar);
396 if (troff) {
397 e_token = chfont(&stringf, &bodyf, e_token - 1);
398 if (qchar == '"')
399 *e_token++ = '\'';
401 stop_lit:
402 code = ident;
403 break;
405 case ('('):
406 case ('['):
407 unary_delim = true;
408 code = lparen;
409 break;
411 case (')'):
412 case (']'):
413 code = rparen;
414 break;
416 case '#':
417 unary_delim = ps.last_u_d;
418 code = preesc;
419 break;
421 case '?':
422 unary_delim = true;
423 code = question;
424 break;
426 case (':'):
427 code = colon;
428 unary_delim = true;
429 break;
431 case (';'):
432 unary_delim = true;
433 code = semicolon;
434 break;
436 case ('{'):
437 unary_delim = true;
440 * if (ps.in_or_st) ps.block_init = 1;
442 /* ? code = ps.block_init ? lparen : lbrace; */
443 code = lbrace;
444 break;
446 case ('}'):
447 unary_delim = true;
448 /* ? code = ps.block_init ? rparen : rbrace; */
449 code = rbrace;
450 break;
452 case 014: /* a form feed */
453 unary_delim = ps.last_u_d;
454 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
455 * right */
456 code = form_feed;
457 break;
459 case (','):
460 unary_delim = true;
461 code = comma;
462 break;
464 case '.':
465 unary_delim = false;
466 code = period;
467 break;
469 case '-':
470 case '+': /* check for -, +, --, ++ */
471 code = (ps.last_u_d ? unary_op : binary_op);
472 unary_delim = true;
474 if (*buf_ptr == token[0]) {
475 /* check for doubled character */
476 *e_token++ = *buf_ptr++;
477 /* buffer overflow will be checked at end of loop */
478 if (last_code == ident || last_code == rparen) {
479 code = (ps.last_u_d ? unary_op : postop);
480 /* check for following ++ or -- */
481 unary_delim = false;
484 else if (*buf_ptr == '=')
485 /* check for operator += */
486 *e_token++ = *buf_ptr++;
487 else if (*buf_ptr == '>') {
488 /* check for operator -> */
489 *e_token++ = *buf_ptr++;
490 if (!pointer_as_binop) {
491 unary_delim = false;
492 code = unary_op;
493 ps.want_blank = false;
496 break; /* buffer overflow will be checked at end of
497 * switch */
499 case '=':
500 if (ps.in_or_st)
501 ps.block_init = 1;
502 #ifdef undef
503 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
504 e_token[-1] = *buf_ptr++;
505 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
506 *e_token++ = *buf_ptr++;
507 *e_token++ = '='; /* Flip =+ to += */
508 *e_token = 0;
510 #else
511 if (*buf_ptr == '=') {/* == */
512 *e_token++ = '='; /* Flip =+ to += */
513 buf_ptr++;
514 *e_token = 0;
516 #endif
517 code = binary_op;
518 unary_delim = true;
519 break;
520 /* can drop thru!!! */
522 case '>':
523 case '<':
524 case '!': /* ops like <, <<, <=, !=, etc */
525 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
526 *e_token++ = *buf_ptr;
527 if (++buf_ptr >= buf_end)
528 fill_buffer();
530 if (*buf_ptr == '=')
531 *e_token++ = *buf_ptr++;
532 code = (ps.last_u_d ? unary_op : binary_op);
533 unary_delim = true;
534 break;
536 default:
537 if (token[0] == '/' && *buf_ptr == '*') {
538 /* it is start of comment */
539 *e_token++ = '*';
541 if (++buf_ptr >= buf_end)
542 fill_buffer();
544 code = comment;
545 unary_delim = ps.last_u_d;
546 break;
548 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
550 * handle ||, &&, etc, and also things as in int *****i
552 *e_token++ = *buf_ptr;
553 if (++buf_ptr >= buf_end)
554 fill_buffer();
556 code = (ps.last_u_d ? unary_op : binary_op);
557 unary_delim = true;
560 } /* end of switch */
561 if (code != newline) {
562 l_struct = false;
563 last_code = code;
565 if (buf_ptr >= buf_end) /* check for input buffer empty */
566 fill_buffer();
567 ps.last_u_d = unary_delim;
568 *e_token = '\0'; /* null terminate the token */
569 return (code);
573 * Add the given keyword to the keyword table, using val as the keyword type
575 void
576 addkey(char *key, int val)
578 struct templ *p = specials;
579 while (p->rwd)
580 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
581 return;
582 else
583 p++;
584 if (p >= specials + sizeof specials / sizeof specials[0])
585 return; /* For now, table overflows are silently
586 * ignored */
587 p->rwd = key;
588 p->rwcode = val;
589 p[1].rwd = 0;
590 p[1].rwcode = 0;