token.h

   1 #ifndef TOKEN_H
   2 #define TOKEN_H
   3 /*
   4  * Basic tokenization structures. NOTE! Those tokens had better
   5  * be pretty small, since we're going to keep them all in memory
   6  * indefinitely.
   7  *
   8  * Copyright (C) 2003 Transmeta Corp.
   9  *               2003 Linus Torvalds
  10  *
  11  *  Licensed under the Open Software License version 1.1
  12  */
  13
  14 #include <sys/types.h>
  15 #include "lib.h"
  16
  17 /*
  18  * This describes the pure lexical elements (tokens), with
  19  * no semantic meaning. In other words, an identifier doesn't
  20  * have a type or meaning, it is only a specific string in
  21  * the input stream.
  22  *
  23  * Semantic meaning is handled elsewhere.
  24  */
  25
  26 enum constantfile {
  27   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
  28   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
  29   CONSTANT_FILE_NOPE,     // No
  30   CONSTANT_FILE_YES       // Yes
  31 };
  32
  33 extern const char *includepath[];
  34
  35 struct stream {
  36         int fd;
  37         const char *name;
  38         const char *path;    // input-file path - see set_stream_include_path()
  39         const char **next_path;
  40
  41         /* Use these to check for "already parsed" */
  42         enum constantfile constant;
  43         int dirty, next_stream, once;
  44         struct ident *protect;
  45         struct token *ifndef;
  46         struct token *top_if;
  47 };
  48
  49 extern int input_stream_nr;
  50 extern struct stream *input_streams;
  51 extern unsigned int tabstop;
  52 extern int *hash_stream(const char *name);
  53
  54 struct ident {
  55         struct ident *next;     /* Hash chain of identifiers */
  56         struct symbol *symbols; /* Pointer to semantic meaning list */
  57         unsigned char len;      /* Length of identifier name */
  58         unsigned char tainted:1,
  59                       reserved:1,
  60                       keyword:1;
  61         char name[];            /* Actual identifier */
  62 };
  63
  64 enum token_type {
  65         TOKEN_EOF,
  66         TOKEN_ERROR,
  67         TOKEN_IDENT,
  68         TOKEN_ZERO_IDENT,
  69         TOKEN_NUMBER,
  70         TOKEN_CHAR,
  71         TOKEN_CHAR_EMBEDDED_0,
  72         TOKEN_CHAR_EMBEDDED_1,
  73         TOKEN_CHAR_EMBEDDED_2,
  74         TOKEN_CHAR_EMBEDDED_3,
  75         TOKEN_WIDE_CHAR,
  76         TOKEN_WIDE_CHAR_EMBEDDED_0,
  77         TOKEN_WIDE_CHAR_EMBEDDED_1,
  78         TOKEN_WIDE_CHAR_EMBEDDED_2,
  79         TOKEN_WIDE_CHAR_EMBEDDED_3,
  80         TOKEN_STRING,
  81         TOKEN_WIDE_STRING,
  82         TOKEN_SPECIAL,
  83         TOKEN_STREAMBEGIN,
  84         TOKEN_STREAMEND,
  85         TOKEN_MACRO_ARGUMENT,
  86         TOKEN_STR_ARGUMENT,
  87         TOKEN_QUOTED_ARGUMENT,
  88         TOKEN_CONCAT,
  89         TOKEN_GNU_KLUDGE,
  90         TOKEN_UNTAINT,
  91         TOKEN_ARG_COUNT,
  92         TOKEN_IF,
  93         TOKEN_SKIP_GROUPS,
  94         TOKEN_ELSE,
  95 };
  96
  97 /* Combination tokens */
  98 #define COMBINATION_STRINGS {   \
  99         "+=", "++",             \
 100         "-=", "--", "->",       \
 101         "*=",                   \
 102         "/=",                   \
 103         "%=",                   \
 104         "<=", ">=",             \
 105         "==", "!=",             \
 106         "&&", "&=",             \
 107         "||", "|=",             \
 108         "^=", "##",             \
 109         "<<", ">>", "..",       \
 110         "<<=", ">>=", "...",    \
 111         "",                     \
 112         "<", ">", "<=", ">="    \
 113 }
 114
 115 extern unsigned char combinations[][4];
 116
 117 enum special_token {
 118         SPECIAL_BASE = 256,
 119         SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
 120         SPECIAL_INCREMENT,
 121         SPECIAL_SUB_ASSIGN,
 122         SPECIAL_DECREMENT,
 123         SPECIAL_DEREFERENCE,
 124         SPECIAL_MUL_ASSIGN,
 125         SPECIAL_DIV_ASSIGN,
 126         SPECIAL_MOD_ASSIGN,
 127         SPECIAL_LTE,
 128         SPECIAL_GTE,
 129         SPECIAL_EQUAL,
 130         SPECIAL_NOTEQUAL,
 131         SPECIAL_LOGICAL_AND,
 132         SPECIAL_AND_ASSIGN,
 133         SPECIAL_LOGICAL_OR,
 134         SPECIAL_OR_ASSIGN,
 135         SPECIAL_XOR_ASSIGN,
 136         SPECIAL_HASHHASH,
 137         SPECIAL_LEFTSHIFT,
 138         SPECIAL_RIGHTSHIFT,
 139         SPECIAL_DOTDOT,
 140         SPECIAL_SHL_ASSIGN,
 141         SPECIAL_SHR_ASSIGN,
 142         SPECIAL_ELLIPSIS,
 143         SPECIAL_ARG_SEPARATOR,
 144         SPECIAL_UNSIGNED_LT,
 145         SPECIAL_UNSIGNED_GT,
 146         SPECIAL_UNSIGNED_LTE,
 147         SPECIAL_UNSIGNED_GTE,
 148 };
 149
 150 struct string {
 151         unsigned int length;
 152         char data[];
 153 };
 154
 155 /* will fit into 32 bits */
 156 struct argcount {
 157         unsigned normal:10;
 158         unsigned quoted:10;
 159         unsigned str:10;
 160         unsigned vararg:1;
 161 };
 162
 163 /*
 164  * This is a very common data structure, it should be kept
 165  * as small as humanly possible. Big (rare) types go as
 166  * pointers.
 167  */
 168 struct token {
 169         struct position pos;
 170         struct token *next;
 171         union {
 172                 const char *number;
 173                 struct ident *ident;
 174                 unsigned int special;
 175                 struct string *string;
 176                 int argnum;
 177                 struct argcount count;
 178                 char embedded[4];
 179         };
 180 };
 181
 182 #define MAX_STRING 8191
 183
 184 static inline struct token *containing_token(struct token **p)
 185 {
 186         void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
 187         return addr;
 188 }
 189
 190 #define token_type(x) ((x)->pos.type)
 191
 192 /*
 193  * Last token in the stream - points to itself.
 194  * This allows us to not test for NULL pointers
 195  * when following the token->next chain..
 196  */
 197 extern struct token eof_token_entry;
 198 #define eof_token(x) ((x) == &eof_token_entry)
 199
 200 extern int init_stream(const char *, int fd, const char **next_path);
 201 extern const char *stream_name(int stream);
 202 extern struct ident *hash_ident(struct ident *);
 203 extern struct ident *built_in_ident(const char *);
 204 extern struct token *built_in_token(int, const char *);
 205 extern const char *show_special(int);
 206 extern const char *show_ident(const struct ident *);
 207 extern const char *show_string(const struct string *string);
 208 extern const char *show_token(const struct token *);
 209 extern const char *quote_token(const struct token *);
 210 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 211 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 212
 213 extern void show_identifier_stats(void);
 214 extern struct token *preprocess(struct token *);
 215
 216 static inline int match_op(struct token *token, int op)
 217 {
 218         return token->pos.type == TOKEN_SPECIAL && token->special == op;
 219 }
 220
 221 static inline int match_ident(struct token *token, struct ident *id)
 222 {
 223         return token->pos.type == TOKEN_IDENT && token->ident == id;
 224 }
 225
 226 #endif