token.h

   1 #ifndef TOKEN_H
   2 #define TOKEN_H
   3 /*
   4  * Basic tokenization structures. NOTE! Those tokens had better
   5  * be pretty small, since we're going to keep them all in memory
   6  * indefinitely.
   7  *
   8  * Copyright (C) 2003 Transmeta Corp.
   9  *               2003 Linus Torvalds
  10  *
  11  *  Licensed under the Open Software License version 1.1
  12  */
  13
  14 #include <sys/types.h>
  15 #include "lib.h"
  16
  17 /*
  18  * This describes the pure lexical elements (tokens), with
  19  * no semantic meaning. In other words, an identifier doesn't
  20  * have a type or meaning, it is only a specific string in
  21  * the input stream.
  22  *
  23  * Semantic meaning is handled elsewhere.
  24  */
  25
  26 enum constantfile {
  27   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
  28   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
  29   CONSTANT_FILE_NOPE,     // No
  30   CONSTANT_FILE_YES       // Yes
  31 };
  32
  33 extern const char *includepath[];
  34
  35 struct stream {
  36         int fd;
  37         const char *name;
  38         const char *path;    // input-file path - see set_stream_include_path()
  39         const char **next_path;
  40
  41         /* Use these to check for "already parsed" */
  42         enum constantfile constant;
  43         int dirty, next_stream;
  44         struct ident *protect;
  45         struct token *ifndef;
  46         struct token *top_if;
  47 };
  48
  49 extern int input_stream_nr;
  50 extern struct stream *input_streams;
  51 extern unsigned int tabstop;
  52 extern int no_lineno;
  53 extern int *hash_stream(const char *name);
  54
  55 struct ident {
  56         struct ident *next;     /* Hash chain of identifiers */
  57         struct symbol *symbols; /* Pointer to semantic meaning list */
  58         unsigned char len;      /* Length of identifier name */
  59         unsigned char tainted:1,
  60                       reserved:1,
  61                       keyword:1;
  62         char name[];            /* Actual identifier */
  63 };
  64
  65 enum token_type {
  66         TOKEN_EOF,
  67         TOKEN_ERROR,
  68         TOKEN_IDENT,
  69         TOKEN_ZERO_IDENT,
  70         TOKEN_NUMBER,
  71         TOKEN_CHAR,
  72         TOKEN_CHAR_EMBEDDED_0,
  73         TOKEN_CHAR_EMBEDDED_1,
  74         TOKEN_CHAR_EMBEDDED_2,
  75         TOKEN_CHAR_EMBEDDED_3,
  76         TOKEN_WIDE_CHAR,
  77         TOKEN_WIDE_CHAR_EMBEDDED_0,
  78         TOKEN_WIDE_CHAR_EMBEDDED_1,
  79         TOKEN_WIDE_CHAR_EMBEDDED_2,
  80         TOKEN_WIDE_CHAR_EMBEDDED_3,
  81         TOKEN_STRING,
  82         TOKEN_WIDE_STRING,
  83         TOKEN_SPECIAL,
  84         TOKEN_STREAMBEGIN,
  85         TOKEN_STREAMEND,
  86         TOKEN_MACRO_ARGUMENT,
  87         TOKEN_STR_ARGUMENT,
  88         TOKEN_QUOTED_ARGUMENT,
  89         TOKEN_CONCAT,
  90         TOKEN_GNU_KLUDGE,
  91         TOKEN_UNTAINT,
  92         TOKEN_ARG_COUNT,
  93         TOKEN_IF,
  94         TOKEN_SKIP_GROUPS,
  95         TOKEN_ELSE,
  96 };
  97
  98 /* Combination tokens */
  99 #define COMBINATION_STRINGS {   \
 100         "+=", "++",             \
 101         "-=", "--", "->",       \
 102         "*=",                   \
 103         "/=",                   \
 104         "%=",                   \
 105         "<=", ">=",             \
 106         "==", "!=",             \
 107         "&&", "&=",             \
 108         "||", "|=",             \
 109         "^=", "##",             \
 110         "<<", ">>", "..",       \
 111         "<<=", ">>=", "...",    \
 112         "",                     \
 113         "<", ">", "<=", ">="    \
 114 }
 115
 116 extern unsigned char combinations[][4];
 117
 118 enum special_token {
 119         SPECIAL_BASE = 256,
 120         SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
 121         SPECIAL_INCREMENT,
 122         SPECIAL_SUB_ASSIGN,
 123         SPECIAL_DECREMENT,
 124         SPECIAL_DEREFERENCE,
 125         SPECIAL_MUL_ASSIGN,
 126         SPECIAL_DIV_ASSIGN,
 127         SPECIAL_MOD_ASSIGN,
 128         SPECIAL_LTE,
 129         SPECIAL_GTE,
 130         SPECIAL_EQUAL,
 131         SPECIAL_NOTEQUAL,
 132         SPECIAL_LOGICAL_AND,
 133         SPECIAL_AND_ASSIGN,
 134         SPECIAL_LOGICAL_OR,
 135         SPECIAL_OR_ASSIGN,
 136         SPECIAL_XOR_ASSIGN,
 137         SPECIAL_HASHHASH,
 138         SPECIAL_LEFTSHIFT,
 139         SPECIAL_RIGHTSHIFT,
 140         SPECIAL_DOTDOT,
 141         SPECIAL_SHL_ASSIGN,
 142         SPECIAL_SHR_ASSIGN,
 143         SPECIAL_ELLIPSIS,
 144         SPECIAL_ARG_SEPARATOR,
 145         SPECIAL_UNSIGNED_LT,
 146         SPECIAL_UNSIGNED_GT,
 147         SPECIAL_UNSIGNED_LTE,
 148         SPECIAL_UNSIGNED_GTE,
 149 };
 150
 151 struct string {
 152         unsigned int length;
 153         char data[];
 154 };
 155
 156 /* will fit into 32 bits */
 157 struct argcount {
 158         unsigned normal:10;
 159         unsigned quoted:10;
 160         unsigned str:10;
 161         unsigned vararg:1;
 162 };
 163
 164 /*
 165  * This is a very common data structure, it should be kept
 166  * as small as humanly possible. Big (rare) types go as
 167  * pointers.
 168  */
 169 struct token {
 170         struct position pos;
 171         struct token *next;
 172         union {
 173                 const char *number;
 174                 struct ident *ident;
 175                 unsigned int special;
 176                 struct string *string;
 177                 int argnum;
 178                 struct argcount count;
 179                 char embedded[4];
 180         };
 181 };
 182
 183 #define MAX_STRING 4095
 184
 185 static inline struct token *containing_token(struct token **p)
 186 {
 187         void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
 188         return addr;
 189 }
 190
 191 #define token_type(x) ((x)->pos.type)
 192
 193 /*
 194  * Last token in the stream - points to itself.
 195  * This allows us to not test for NULL pointers
 196  * when following the token->next chain..
 197  */
 198 extern struct token eof_token_entry;
 199 #define eof_token(x) ((x) == &eof_token_entry)
 200
 201 extern int init_stream(const char *, int fd, const char **next_path);
 202 extern const char *stream_name(int stream);
 203 extern struct ident *hash_ident(struct ident *);
 204 extern struct ident *built_in_ident(const char *);
 205 extern struct token *built_in_token(int, const char *);
 206 extern const char *show_special(int);
 207 extern const char *show_ident(const struct ident *);
 208 extern const char *show_string(const struct string *string);
 209 extern const char *show_token(const struct token *);
 210 extern const char *quote_token(const struct token *);
 211 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 212 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 213
 214 extern void show_identifier_stats(void);
 215 extern struct token *preprocess(struct token *);
 216
 217 extern void store_all_tokens(struct token *token);
 218 extern struct token *pos_get_token(struct position pos);
 219 extern char *pos_ident(struct position pos);
 220
 221 extern void store_macro_pos(struct token *);
 222 extern char *get_macro_name(struct position pos);
 223
 224 static inline int match_op(struct token *token, int op)
 225 {
 226         return token->pos.type == TOKEN_SPECIAL && token->special == op;
 227 }
 228
 229 static inline int match_ident(struct token *token, struct ident *id)
 230 {
 231         return token->pos.type == TOKEN_IDENT && token->ident == id;
 232 }
 233
 234 #endif