scripts/genksyms/lex.l

   1 /* Lexical analysis for genksyms.
   2    Copyright 1996, 1997 Linux International.
   3
   4    New implementation contributed by Richard Henderson <rth@tamu.edu>
   5    Based on original work by Bjorn Ekwall <bj0rn@blox.se>
   6
   7    Taken from Linux modutils 2.4.22.
   8
   9    This program is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published by the
  11    Free Software Foundation; either version 2 of the License, or (at your
  12    option) any later version.
  13
  14    This program is distributed in the hope that it will be useful, but
  15    WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17    General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software Foundation,
  21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23
  24 %{
  25
  26 #include <limits.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30
  31 #include "genksyms.h"
  32 #include "parse.h"
  33
  34 /* We've got a two-level lexer here.  We let flex do basic tokenization
  35    and then we categorize those basic tokens in the second stage.  */
  36 #define YY_DECL         static int yylex1(void)
  37
  38 %}
  39
  40 IDENT                   [A-Za-z_\$][A-Za-z0-9_\$]*
  41
  42 O_INT                   0[0-7]*
  43 D_INT                   [1-9][0-9]*
  44 X_INT                   0[Xx][0-9A-Fa-f]+
  45 I_SUF                   [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
  46 INT                     ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
  47
  48 FRAC                    ([0-9]*\.[0-9]+)|([0-9]+\.)
  49 EXP                     [Ee][+-]?[0-9]+
  50 F_SUF                   [FfLl]
  51 REAL                    ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
  52
  53 STRING                  L?\"([^\\\"]*\\.)*[^\\\"]*\"
  54 CHAR                    L?\'([^\\\']*\\.)*[^\\\']*\'
  55
  56 MC_TOKEN                ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
  57
  58 /* Version 2 checksumming does proper tokenization; version 1 wasn't
  59    quite so pedantic.  */
  60 %s V2_TOKENS
  61
  62 /* We don't do multiple input files.  */
  63 %option noyywrap
  64
  65 %%
  66
  67
  68  /* Keep track of our location in the original source files.  */
  69 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n     return FILENAME;
  70 ^#.*\n                                  cur_line++;
  71 \n                                      cur_line++;
  72
  73  /* Ignore all other whitespace.  */
  74 [ \t\f\v\r]+                            ;
  75
  76
  77 {STRING}                                return STRING;
  78 {CHAR}                                  return CHAR;
  79 {IDENT}                                 return IDENT;
  80
  81  /* The Pedant requires that the other C multi-character tokens be
  82     recognized as tokens.  We don't actually use them since we don't
  83     parse expressions, but we do want whitespace to be arranged
  84     around them properly.  */
  85 <V2_TOKENS>{MC_TOKEN}                   return OTHER;
  86 <V2_TOKENS>{INT}                        return INT;
  87 <V2_TOKENS>{REAL}                       return REAL;
  88
  89 "..."                                   return DOTS;
  90
  91  /* All other tokens are single characters.  */
  92 .                                       return yytext[0];
  93
  94
  95 %%
  96
  97 /* Bring in the keyword recognizer.  */
  98
  99 #include "keywords.c"
 100
 101
 102 /* Macros to append to our phrase collection list.  */
 103
 104 #define _APP(T,L)       do {                                               \
 105                           cur_node = next_node;                            \
 106                           next_node = xmalloc(sizeof(*next_node));         \
 107                           next_node->next = cur_node;                      \
 108                           cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
 109                           cur_node->tag = SYM_NORMAL;                      \
 110                         } while (0)
 111
 112 #define APP             _APP(yytext, yyleng)
 113
 114
 115 /* The second stage lexer.  Here we incorporate knowledge of the state
 116    of the parser to tailor the tokens that are returned.  */
 117
 118 int
 119 yylex(void)
 120 {
 121   static enum {
 122     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
 123     ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
 124     ST_TABLE_5, ST_TABLE_6
 125   } lexstate = ST_NOTSTARTED;
 126
 127   static int suppress_type_lookup, dont_want_brace_phrase;
 128   static struct string_list *next_node;
 129
 130   int token, count = 0;
 131   struct string_list *cur_node;
 132
 133   if (lexstate == ST_NOTSTARTED)
 134     {
 135       BEGIN(V2_TOKENS);
 136       next_node = xmalloc(sizeof(*next_node));
 137       next_node->next = NULL;
 138       lexstate = ST_NORMAL;
 139     }
 140
 141 repeat:
 142   token = yylex1();
 143
 144   if (token == 0)
 145     return 0;
 146   else if (token == FILENAME)
 147     {
 148       char *file, *e;
 149
 150       /* Save the filename and line number for later error messages.  */
 151
 152       if (cur_filename)
 153         free(cur_filename);
 154
 155       file = strchr(yytext, '\"')+1;
 156       e = strchr(file, '\"');
 157       *e = '\0';
 158       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
 159       cur_line = atoi(yytext+2);
 160
 161       goto repeat;
 162     }
 163
 164   switch (lexstate)
 165     {
 166     case ST_NORMAL:
 167       switch (token)
 168         {
 169         case IDENT:
 170           APP;
 171           {
 172             const struct resword *r = is_reserved_word(yytext, yyleng);
 173             if (r)
 174               {
 175                 switch (token = r->token)
 176                   {
 177                   case ATTRIBUTE_KEYW:
 178                     lexstate = ST_ATTRIBUTE;
 179                     count = 0;
 180                     goto repeat;
 181                   case ASM_KEYW:
 182                     lexstate = ST_ASM;
 183                     count = 0;
 184                     goto repeat;
 185
 186                   case STRUCT_KEYW:
 187                   case UNION_KEYW:
 188                     dont_want_brace_phrase = 3;
 189                   case ENUM_KEYW:
 190                     suppress_type_lookup = 2;
 191                     goto fini;
 192
 193                   case EXPORT_SYMBOL_KEYW:
 194                       goto fini;
 195                   }
 196               }
 197             if (!suppress_type_lookup)
 198               {
 199                 struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
 200                 if (sym && sym->type == SYM_TYPEDEF)
 201                   token = TYPE;
 202               }
 203           }
 204           break;
 205
 206         case '[':
 207           APP;
 208           lexstate = ST_BRACKET;
 209           count = 1;
 210           goto repeat;
 211
 212         case '{':
 213           APP;
 214           if (dont_want_brace_phrase)
 215             break;
 216           lexstate = ST_BRACE;
 217           count = 1;
 218           goto repeat;
 219
 220         case '=': case ':':
 221           APP;
 222           lexstate = ST_EXPRESSION;
 223           break;
 224
 225         case DOTS:
 226         default:
 227           APP;
 228           break;
 229         }
 230       break;
 231
 232     case ST_ATTRIBUTE:
 233       APP;
 234       switch (token)
 235         {
 236         case '(':
 237           ++count;
 238           goto repeat;
 239         case ')':
 240           if (--count == 0)
 241             {
 242               lexstate = ST_NORMAL;
 243               token = ATTRIBUTE_PHRASE;
 244               break;
 245             }
 246           goto repeat;
 247         default:
 248           goto repeat;
 249         }
 250       break;
 251
 252     case ST_ASM:
 253       APP;
 254       switch (token)
 255         {
 256         case '(':
 257           ++count;
 258           goto repeat;
 259         case ')':
 260           if (--count == 0)
 261             {
 262               lexstate = ST_NORMAL;
 263               token = ASM_PHRASE;
 264               break;
 265             }
 266           goto repeat;
 267         default:
 268           goto repeat;
 269         }
 270       break;
 271
 272     case ST_BRACKET:
 273       APP;
 274       switch (token)
 275         {
 276         case '[':
 277           ++count;
 278           goto repeat;
 279         case ']':
 280           if (--count == 0)
 281             {
 282               lexstate = ST_NORMAL;
 283               token = BRACKET_PHRASE;
 284               break;
 285             }
 286           goto repeat;
 287         default:
 288           goto repeat;
 289         }
 290       break;
 291
 292     case ST_BRACE:
 293       APP;
 294       switch (token)
 295         {
 296         case '{':
 297           ++count;
 298           goto repeat;
 299         case '}':
 300           if (--count == 0)
 301             {
 302               lexstate = ST_NORMAL;
 303               token = BRACE_PHRASE;
 304               break;
 305             }
 306           goto repeat;
 307         default:
 308           goto repeat;
 309         }
 310       break;
 311
 312     case ST_EXPRESSION:
 313       switch (token)
 314         {
 315         case '(': case '[': case '{':
 316           ++count;
 317           APP;
 318           goto repeat;
 319         case ')': case ']': case '}':
 320           --count;
 321           APP;
 322           goto repeat;
 323         case ',': case ';':
 324           if (count == 0)
 325             {
 326               /* Put back the token we just read so's we can find it again
 327                  after registering the expression.  */
 328               unput(token);
 329
 330               lexstate = ST_NORMAL;
 331               token = EXPRESSION_PHRASE;
 332               break;
 333             }
 334           APP;
 335           goto repeat;
 336         default:
 337           APP;
 338           goto repeat;
 339         }
 340       break;
 341
 342     case ST_TABLE_1:
 343       goto repeat;
 344
 345     case ST_TABLE_2:
 346       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
 347         {
 348           token = EXPORT_SYMBOL_KEYW;
 349           lexstate = ST_TABLE_5;
 350           APP;
 351           break;
 352         }
 353       lexstate = ST_TABLE_6;
 354       /* FALLTHRU */
 355
 356     case ST_TABLE_6:
 357       switch (token)
 358         {
 359         case '{': case '[': case '(':
 360           ++count;
 361           break;
 362         case '}': case ']': case ')':
 363           --count;
 364           break;
 365         case ',':
 366           if (count == 0)
 367             lexstate = ST_TABLE_2;
 368           break;
 369         };
 370       goto repeat;
 371
 372     case ST_TABLE_3:
 373       goto repeat;
 374
 375     case ST_TABLE_4:
 376       if (token == ';')
 377         lexstate = ST_NORMAL;
 378       goto repeat;
 379
 380     case ST_TABLE_5:
 381       switch (token)
 382         {
 383         case ',':
 384           token = ';';
 385           lexstate = ST_TABLE_2;
 386           APP;
 387           break;
 388         default:
 389           APP;
 390           break;
 391         }
 392       break;
 393
 394     default:
 395       exit(1);
 396     }
 397 fini:
 398
 399   if (suppress_type_lookup > 0)
 400     --suppress_type_lookup;
 401   if (dont_want_brace_phrase > 0)
 402     --dont_want_brace_phrase;
 403
 404   yylval = &next_node->next;
 405
 406   return token;
 407 }