db: improve database debugging
[smatch.git] / token.h
blobfacfc768fbf9be6160e670df955b7dd378d02a08
1 #ifndef TOKEN_H
2 #define TOKEN_H
3 /*
4 * Basic tokenization structures. NOTE! Those tokens had better
5 * be pretty small, since we're going to keep them all in memory
6 * indefinitely.
8 * Copyright (C) 2003 Transmeta Corp.
9 * 2003 Linus Torvalds
11 * Licensed under the Open Software License version 1.1
14 #include <sys/types.h>
15 #include "lib.h"
18 * This describes the pure lexical elements (tokens), with
19 * no semantic meaning. In other words, an identifier doesn't
20 * have a type or meaning, it is only a specific string in
21 * the input stream.
23 * Semantic meaning is handled elsewhere.
26 enum constantfile {
27 CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file
28 CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef
29 CONSTANT_FILE_NOPE, // No
30 CONSTANT_FILE_YES // Yes
33 extern const char *includepath[];
35 struct stream {
36 int fd;
37 const char *name;
38 const char *path; // input-file path - see set_stream_include_path()
39 const char **next_path;
41 /* Use these to check for "already parsed" */
42 enum constantfile constant;
43 int dirty, next_stream;
44 struct ident *protect;
45 struct token *ifndef;
46 struct token *top_if;
49 extern int input_stream_nr;
50 extern struct stream *input_streams;
51 extern unsigned int tabstop;
52 extern int no_lineno;
53 extern int *hash_stream(const char *name);
55 struct ident {
56 struct ident *next; /* Hash chain of identifiers */
57 struct symbol *symbols; /* Pointer to semantic meaning list */
58 unsigned char len; /* Length of identifier name */
59 unsigned char tainted:1,
60 reserved:1,
61 keyword:1;
62 char name[]; /* Actual identifier */
65 enum token_type {
66 TOKEN_EOF,
67 TOKEN_ERROR,
68 TOKEN_IDENT,
69 TOKEN_ZERO_IDENT,
70 TOKEN_NUMBER,
71 TOKEN_CHAR,
72 TOKEN_CHAR_EMBEDDED_0,
73 TOKEN_CHAR_EMBEDDED_1,
74 TOKEN_CHAR_EMBEDDED_2,
75 TOKEN_CHAR_EMBEDDED_3,
76 TOKEN_WIDE_CHAR,
77 TOKEN_WIDE_CHAR_EMBEDDED_0,
78 TOKEN_WIDE_CHAR_EMBEDDED_1,
79 TOKEN_WIDE_CHAR_EMBEDDED_2,
80 TOKEN_WIDE_CHAR_EMBEDDED_3,
81 TOKEN_STRING,
82 TOKEN_WIDE_STRING,
83 TOKEN_SPECIAL,
84 TOKEN_STREAMBEGIN,
85 TOKEN_STREAMEND,
86 TOKEN_MACRO_ARGUMENT,
87 TOKEN_STR_ARGUMENT,
88 TOKEN_QUOTED_ARGUMENT,
89 TOKEN_CONCAT,
90 TOKEN_GNU_KLUDGE,
91 TOKEN_UNTAINT,
92 TOKEN_ARG_COUNT,
93 TOKEN_IF,
94 TOKEN_SKIP_GROUPS,
95 TOKEN_ELSE,
98 /* Combination tokens */
99 #define COMBINATION_STRINGS { \
100 "+=", "++", \
101 "-=", "--", "->", \
102 "*=", \
103 "/=", \
104 "%=", \
105 "<=", ">=", \
106 "==", "!=", \
107 "&&", "&=", \
108 "||", "|=", \
109 "^=", "##", \
110 "<<", ">>", "..", \
111 "<<=", ">>=", "...", \
112 "", \
113 "<", ">", "<=", ">=" \
116 extern unsigned char combinations[][4];
118 enum special_token {
119 SPECIAL_BASE = 256,
120 SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
121 SPECIAL_INCREMENT,
122 SPECIAL_SUB_ASSIGN,
123 SPECIAL_DECREMENT,
124 SPECIAL_DEREFERENCE,
125 SPECIAL_MUL_ASSIGN,
126 SPECIAL_DIV_ASSIGN,
127 SPECIAL_MOD_ASSIGN,
128 SPECIAL_LTE,
129 SPECIAL_GTE,
130 SPECIAL_EQUAL,
131 SPECIAL_NOTEQUAL,
132 SPECIAL_LOGICAL_AND,
133 SPECIAL_AND_ASSIGN,
134 SPECIAL_LOGICAL_OR,
135 SPECIAL_OR_ASSIGN,
136 SPECIAL_XOR_ASSIGN,
137 SPECIAL_HASHHASH,
138 SPECIAL_LEFTSHIFT,
139 SPECIAL_RIGHTSHIFT,
140 SPECIAL_DOTDOT,
141 SPECIAL_SHL_ASSIGN,
142 SPECIAL_SHR_ASSIGN,
143 SPECIAL_ELLIPSIS,
144 SPECIAL_ARG_SEPARATOR,
145 SPECIAL_UNSIGNED_LT,
146 SPECIAL_UNSIGNED_GT,
147 SPECIAL_UNSIGNED_LTE,
148 SPECIAL_UNSIGNED_GTE,
151 struct string {
152 unsigned int length;
153 char data[];
156 /* will fit into 32 bits */
157 struct argcount {
158 unsigned normal:10;
159 unsigned quoted:10;
160 unsigned str:10;
161 unsigned vararg:1;
165 * This is a very common data structure, it should be kept
166 * as small as humanly possible. Big (rare) types go as
167 * pointers.
169 struct token {
170 struct position pos;
171 struct token *next;
172 union {
173 const char *number;
174 struct ident *ident;
175 unsigned int special;
176 struct string *string;
177 int argnum;
178 struct argcount count;
179 char embedded[4];
183 #define MAX_STRING 4095
185 static inline struct token *containing_token(struct token **p)
187 void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
188 return addr;
191 #define token_type(x) ((x)->pos.type)
194 * Last token in the stream - points to itself.
195 * This allows us to not test for NULL pointers
196 * when following the token->next chain..
198 extern struct token eof_token_entry;
199 #define eof_token(x) ((x) == &eof_token_entry)
201 extern int init_stream(const char *, int fd, const char **next_path);
202 extern const char *stream_name(int stream);
203 extern struct ident *hash_ident(struct ident *);
204 extern struct ident *built_in_ident(const char *);
205 extern struct token *built_in_token(int, const char *);
206 extern const char *show_special(int);
207 extern const char *show_ident(const struct ident *);
208 extern const char *show_string(const struct string *string);
209 extern const char *show_token(const struct token *);
210 extern const char *quote_token(const struct token *);
211 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
212 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
214 extern void show_identifier_stats(void);
215 extern struct token *preprocess(struct token *);
217 extern void store_all_tokens(struct token *token);
218 extern struct token *pos_get_token(struct position pos);
219 extern char *pos_ident(struct position pos);
221 extern void store_macro_pos(struct token *);
222 extern char *get_macro_name(struct position pos);
224 static inline int match_op(struct token *token, int op)
226 return token->pos.type == TOKEN_SPECIAL && token->special == op;
229 static inline int match_ident(struct token *token, struct ident *id)
231 return token->pos.type == TOKEN_IDENT && token->ident == id;
234 #endif