token.h

   1 #ifndef TOKEN_H
   2 #define TOKEN_H
   3 /*
   4  * Basic tokenization structures. NOTE! Those tokens had better
   5  * be pretty small, since we're going to keep them all in memory
   6  * indefinitely.
   7  *
   8  * Copyright (C) 2003 Transmeta Corp.
   9  *               2003 Linus Torvalds
  10  *
  11  *  Licensed under the Open Software License version 1.1
  12  */
  13
  14 #include <sys/types.h>
  15 #include "lib.h"
  16
  17 /*
  18  * This describes the pure lexical elements (tokens), with
  19  * no semantic meaning. In other words, an identifier doesn't
  20  * have a type or meaning, it is only a specific string in
  21  * the input stream.
  22  *
  23  * Semantic meaning is handled elsewhere.
  24  */
  25
  26 enum constantfile {
  27   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
  28   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
  29   CONSTANT_FILE_NOPE,     // No
  30   CONSTANT_FILE_YES       // Yes
  31 };
  32
  33 extern const char *includepath[];
  34
  35 struct stream {
  36         int fd;
  37         const char *name;
  38         const char *path;    // input-file path - see set_stream_include_path()
  39         const char **next_path;
  40
  41         /* Use these to check for "already parsed" */
  42         enum constantfile constant;
  43         int dirty, next_stream;
  44         struct ident *protect;
  45         struct token *ifndef;
  46         struct token *top_if;
  47 };
  48
  49 extern int input_stream_nr;
  50 extern struct stream *input_streams;
  51 extern unsigned int tabstop;
  52 extern int no_lineno;
  53 extern int *hash_stream(const char *name);
  54
  55 struct ident {
  56         struct ident *next;     /* Hash chain of identifiers */
  57         struct symbol *symbols; /* Pointer to semantic meaning list */
  58         unsigned char len;      /* Length of identifier name */
  59         unsigned char tainted:1,
  60                       reserved:1,
  61                       keyword:1;
  62         char name[];            /* Actual identifier */
  63 };
  64
  65 enum token_type {
  66         TOKEN_EOF,
  67         TOKEN_ERROR,
  68         TOKEN_IDENT,
  69         TOKEN_ZERO_IDENT,
  70         TOKEN_NUMBER,
  71         TOKEN_CHAR,
  72         TOKEN_WIDE_CHAR,
  73         TOKEN_STRING,
  74         TOKEN_WIDE_STRING,
  75         TOKEN_SPECIAL,
  76         TOKEN_STREAMBEGIN,
  77         TOKEN_STREAMEND,
  78         TOKEN_MACRO_ARGUMENT,
  79         TOKEN_STR_ARGUMENT,
  80         TOKEN_QUOTED_ARGUMENT,
  81         TOKEN_CONCAT,
  82         TOKEN_GNU_KLUDGE,
  83         TOKEN_UNTAINT,
  84         TOKEN_ARG_COUNT,
  85         TOKEN_IF,
  86         TOKEN_SKIP_GROUPS,
  87         TOKEN_ELSE,
  88 };
  89
  90 /* Combination tokens */
  91 #define COMBINATION_STRINGS {   \
  92         "+=", "++",             \
  93         "-=", "--", "->",       \
  94         "*=",                   \
  95         "/=",                   \
  96         "%=",                   \
  97         "<=", ">=",             \
  98         "==", "!=",             \
  99         "&&", "&=",             \
 100         "||", "|=",             \
 101         "^=", "##",             \
 102         "<<", ">>", "..",       \
 103         "<<=", ">>=", "...",    \
 104         "",                     \
 105         "<", ">", "<=", ">="    \
 106 }
 107
 108 extern unsigned char combinations[][4];
 109
 110 enum special_token {
 111         SPECIAL_BASE = 256,
 112         SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
 113         SPECIAL_INCREMENT,
 114         SPECIAL_SUB_ASSIGN,
 115         SPECIAL_DECREMENT,
 116         SPECIAL_DEREFERENCE,
 117         SPECIAL_MUL_ASSIGN,
 118         SPECIAL_DIV_ASSIGN,
 119         SPECIAL_MOD_ASSIGN,
 120         SPECIAL_LTE,
 121         SPECIAL_GTE,
 122         SPECIAL_EQUAL,
 123         SPECIAL_NOTEQUAL,
 124         SPECIAL_LOGICAL_AND,
 125         SPECIAL_AND_ASSIGN,
 126         SPECIAL_LOGICAL_OR,
 127         SPECIAL_OR_ASSIGN,
 128         SPECIAL_XOR_ASSIGN,
 129         SPECIAL_HASHHASH,
 130         SPECIAL_LEFTSHIFT,
 131         SPECIAL_RIGHTSHIFT,
 132         SPECIAL_DOTDOT,
 133         SPECIAL_SHL_ASSIGN,
 134         SPECIAL_SHR_ASSIGN,
 135         SPECIAL_ELLIPSIS,
 136         SPECIAL_ARG_SEPARATOR,
 137         SPECIAL_UNSIGNED_LT,
 138         SPECIAL_UNSIGNED_GT,
 139         SPECIAL_UNSIGNED_LTE,
 140         SPECIAL_UNSIGNED_GTE,
 141 };
 142
 143 struct string {
 144         unsigned int length;
 145         char data[];
 146 };
 147
 148 /* will fit into 32 bits */
 149 struct argcount {
 150         unsigned normal:10;
 151         unsigned quoted:10;
 152         unsigned str:10;
 153         unsigned vararg:1;
 154 };
 155
 156 /*
 157  * This is a very common data structure, it should be kept
 158  * as small as humanly possible. Big (rare) types go as
 159  * pointers.
 160  */
 161 struct token {
 162         struct position pos;
 163         struct token *next;
 164         union {
 165                 const char *number;
 166                 struct ident *ident;
 167                 unsigned int special;
 168                 struct string *string;
 169                 int character;
 170                 int argnum;
 171                 struct argcount count;
 172         };
 173 };
 174
 175 #define MAX_STRING 4095
 176
 177 static inline struct token *containing_token(struct token **p)
 178 {
 179         void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
 180         return addr;
 181 }
 182
 183 #define token_type(x) ((x)->pos.type)
 184
 185 /*
 186  * Last token in the stream - points to itself.
 187  * This allows us to not test for NULL pointers
 188  * when following the token->next chain..
 189  */
 190 extern struct token eof_token_entry;
 191 #define eof_token(x) ((x) == &eof_token_entry)
 192
 193 extern int init_stream(const char *, int fd, const char **next_path);
 194 extern const char *stream_name(int stream);
 195 extern struct ident *hash_ident(struct ident *);
 196 extern struct ident *built_in_ident(const char *);
 197 extern struct token *built_in_token(int, const char *);
 198 extern const char *show_special(int);
 199 extern const char *show_ident(const struct ident *);
 200 extern const char *show_string(const struct string *string);
 201 extern const char *show_token(const struct token *);
 202 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 203 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 204
 205 extern void show_identifier_stats(void);
 206 extern struct token *preprocess(struct token *);
 207
 208 extern void store_all_tokens(struct token *token);
 209 extern struct token *pos_get_token(struct position pos);
 210 extern char *pos_ident(struct position pos);
 211
 212 extern void store_macro_pos(struct token *);
 213 extern char *get_macro_name(struct position pos);
 214
 215 static inline int match_op(struct token *token, int op)
 216 {
 217         return token->pos.type == TOKEN_SPECIAL && token->special == op;
 218 }
 219
 220 static inline int match_ident(struct token *token, struct ident *id)
 221 {
 222         return token->pos.type == TOKEN_IDENT && token->ident == id;
 223 }
 224
 225 #endif