Added support for lexing Dlt files
[delight/core.git] / dmd / lexer.h
blob17939d1913fe6b5e038f8beb127459bdd7555607
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 #ifndef DMD_LEXER_H
12 #define DMD_LEXER_H
14 #ifdef __DMC__
15 #pragma once
16 #endif /* __DMC__ */
18 #include "root.h"
19 #include "mars.h"
21 struct StringTable;
22 struct Identifier;
23 struct Module;
25 /* Tokens:
26 ( )
27 [ ]
28 { }
29 < > <= >= == != === !==
30 << >> <<= >>= >>> >>>=
31 + - += -=
32 * / % *= /= %=
33 & | ^ &= |= ^=
34 = ! ~
35 ++ --
36 . -> : ,
37 ? && ||
41 enum TOK
43 TOKreserved,
45 // Other
46 TOKlparen, TOKrparen,
47 TOKlbracket, TOKrbracket,
48 TOKlcurly, TOKrcurly,
49 TOKcolon, TOKneg,
50 TOKsemicolon, TOKdotdotdot,
51 TOKeof, TOKcast,
52 TOKnull, TOKassert,
53 TOKtrue, TOKfalse,
54 TOKarray, TOKcall,
55 TOKaddress, TOKtypedot,
56 TOKtype, TOKthrow,
57 TOKnew, TOKdelete,
58 TOKstar, TOKsymoff,
59 TOKvar, TOKdotvar,
60 TOKdotti, TOKdotexp,
61 TOKdottype, TOKslice,
62 TOKarraylength, TOKversion,
63 TOKmodule, TOKdollar,
64 TOKtemplate, TOKdottd,
65 TOKdeclaration, TOKtypeof,
66 TOKpragma, TOKdsymbol,
67 TOKtypeid, TOKuadd,
68 TOKremove,
69 TOKnewanonclass, TOKcomment,
70 TOKarrayliteral, TOKassocarrayliteral,
71 TOKstructliteral,
73 // Operators
74 TOKlt, TOKgt,
75 TOKle, TOKge,
76 TOKequal, TOKnotequal,
77 TOKidentity, TOKnotidentity,
78 TOKindex, TOKis,
79 TOKtobool,
81 // 60
82 // NCEG floating point compares
83 // !<>= <> <>= !> !>= !< !<= !<>
84 TOKunord,TOKlg,TOKleg,TOKule,TOKul,TOKuge,TOKug,TOKue,
86 TOKshl, TOKshr,
87 TOKshlass, TOKshrass,
88 TOKushr, TOKushrass,
89 TOKcat, TOKcatass, // ~ ~=
90 TOKadd, TOKmin, TOKaddass, TOKminass,
91 TOKmul, TOKdiv, TOKmod,
92 TOKmulass, TOKdivass, TOKmodass,
93 TOKand, TOKor, TOKxor,
94 TOKandass, TOKorass, TOKxorass,
95 TOKassign, TOKnot, TOKtilde,
96 TOKplusplus, TOKminusminus, TOKconstruct, TOKblit,
97 TOKdot, TOKarrow, TOKcomma,
98 TOKquestion, TOKandand, TOKoror,
100 // 104
101 // Numeric literals
102 TOKint32v, TOKuns32v,
103 TOKint64v, TOKuns64v,
104 TOKfloat32v, TOKfloat64v, TOKfloat80v,
105 TOKimaginary32v, TOKimaginary64v, TOKimaginary80v,
107 // Char constants
108 TOKcharv, TOKwcharv, TOKdcharv,
110 // Leaf operators
111 TOKidentifier, TOKstring,
112 TOKthis, TOKsuper,
113 TOKhalt, TOKtuple,
115 // Basic types
116 TOKvoid,
117 TOKint8, TOKuns8,
118 TOKint16, TOKuns16,
119 TOKint32, TOKuns32,
120 TOKint64, TOKuns64,
121 TOKfloat32, TOKfloat64, TOKfloat80,
122 TOKimaginary32, TOKimaginary64, TOKimaginary80,
123 TOKcomplex32, TOKcomplex64, TOKcomplex80,
124 TOKchar, TOKwchar, TOKdchar, TOKbit, TOKbool,
125 TOKcent, TOKucent,
127 // Aggregates
128 TOKstruct, TOKclass, TOKinterface, TOKunion, TOKenum, TOKimport,
129 TOKtypedef, TOKalias, TOKoverride, TOKdelegate, TOKfunction,
130 TOKmixin,
132 TOKalign, TOKextern, TOKprivate, TOKprotected, TOKpublic, TOKexport,
133 TOKstatic, /*TOKvirtual,*/ TOKfinal, TOKconst, TOKabstract, TOKvolatile,
134 TOKdebug, TOKdeprecated, TOKin, TOKout, TOKinout, TOKlazy,
135 TOKauto, TOKpackage, TOKmanifest,
137 // Statements
138 TOKif, TOKelse, TOKwhile, TOKfor, TOKdo, TOKswitch,
139 TOKcase, TOKdefault, TOKbreak, TOKcontinue, TOKwith,
140 TOKsynchronized, TOKreturn, TOKgoto, TOKtry, TOKcatch, TOKfinally,
141 TOKasm, TOKforeach, TOKforeach_reverse, TOKreversed,
142 TOKscope,
143 TOKon_scope_exit, TOKon_scope_failure, TOKon_scope_success,
144 TOKlog_error, TOKlog_warning, TOKlog_info, TOKlog_trace,
146 // Contracts
147 TOKbody, TOKinvariant,
149 // Testing
150 TOKunittest,
152 // Added after 1.0
153 TOKref,
154 TOKmacro,
155 #if V2
156 TOKtraits,
157 TOKoverloadset,
158 TOKpure,
159 #endif
161 // TAL
162 TOKextends,
163 TOKendline,
164 TOKat,
165 TOKlogger, // Not a real symbol
167 TOKMAX
170 #define CASE_BASIC_TYPES \
171 case TOKwchar: case TOKdchar: \
172 case TOKbit: case TOKbool: case TOKchar: \
173 case TOKint8: case TOKuns8: \
174 case TOKint16: case TOKuns16: \
175 case TOKint32: case TOKuns32: \
176 case TOKint64: case TOKuns64: \
177 case TOKfloat32: case TOKfloat64: case TOKfloat80: \
178 case TOKimaginary32: case TOKimaginary64: case TOKimaginary80: \
179 case TOKcomplex32: case TOKcomplex64: case TOKcomplex80: \
180 case TOKvoid
182 #define CASE_BASIC_TYPES_X(t) \
183 case TOKvoid: t = Type::tvoid; goto LabelX; \
184 case TOKint8: t = Type::tint8; goto LabelX; \
185 case TOKuns8: t = Type::tuns8; goto LabelX; \
186 case TOKint16: t = Type::tint16; goto LabelX; \
187 case TOKuns16: t = Type::tuns16; goto LabelX; \
188 case TOKint32: t = Type::tint32; goto LabelX; \
189 case TOKuns32: t = Type::tuns32; goto LabelX; \
190 case TOKint64: t = Type::tint64; goto LabelX; \
191 case TOKuns64: t = Type::tuns64; goto LabelX; \
192 case TOKfloat32: t = Type::tfloat32; goto LabelX; \
193 case TOKfloat64: t = Type::tfloat64; goto LabelX; \
194 case TOKfloat80: t = Type::tfloat80; goto LabelX; \
195 case TOKimaginary32: t = Type::timaginary32; goto LabelX; \
196 case TOKimaginary64: t = Type::timaginary64; goto LabelX; \
197 case TOKimaginary80: t = Type::timaginary80; goto LabelX; \
198 case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \
199 case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \
200 case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \
201 case TOKbit: t = Type::tbit; goto LabelX; \
202 case TOKbool: t = Type::tbool; goto LabelX; \
203 case TOKchar: t = Type::tchar; goto LabelX; \
204 case TOKwchar: t = Type::twchar; goto LabelX; \
205 case TOKdchar: t = Type::tdchar; goto LabelX; \
206 LabelX
208 struct Token
210 Token *next;
211 unsigned char *ptr; // pointer to first character of this token within buffer
212 enum TOK value;
213 unsigned char *blockComment; // doc comment string prior to this token
214 unsigned char *lineComment; // doc comment for previous token
215 union
217 // Integers
218 d_int32 int32value;
219 d_uns32 uns32value;
220 d_int64 int64value;
221 d_uns64 uns64value;
223 // Floats
224 #ifdef IN_GCC
225 // real_t float80value; // can't use this in a union!
226 #else
227 d_float80 float80value;
228 #endif
230 struct
231 { unsigned char *ustring; // UTF8 string
232 unsigned len;
233 unsigned char postfix; // 'c', 'w', 'd'
236 Identifier *ident;
238 #ifdef IN_GCC
239 real_t float80value; // can't use this in a union!
240 #endif
242 static char *tochars[TOKMAX];
243 static void *operator new(size_t sz);
245 int isKeyword();
246 void print();
247 char *toChars();
248 static char *toChars(enum TOK);
251 struct Lexer
253 static StringTable stringtable;
254 static OutBuffer stringbuffer;
255 static Token *freelist;
257 Loc loc; // for error messages
259 unsigned char *base; // pointer to start of buffer
260 unsigned char *end; // past end of buffer
261 unsigned char *p; // current character
262 Token token;
263 Module *mod;
264 int doDocComment; // collect doc comment information
265 int anyToken; // !=0 means seen at least one token
266 int commentToken; // !=0 means comments are TOKcomment's
268 bool dltSyntax;
269 unsigned int indent; // Current indent level
270 int atStartOfLine;
271 int nesting; // Counts { [ (; we ignore indents inside these
273 Lexer(Module *mod,
274 unsigned char *base, unsigned begoffset, unsigned endoffset,
275 int doDocComment, int commentToken, bool dltSyntax);
277 static void initKeywords();
278 static Identifier *idPool(const char *s);
279 static Identifier *uniqueId(const char *s);
280 static Identifier *uniqueId(const char *s, int num);
282 TOK nextToken();
283 void scan(Token *t);
284 Token *peek(Token *t);
285 Token *peekPastParen(Token *t);
286 unsigned escapeSequence();
287 TOK wysiwygStringConstant(Token *t, int tc);
288 TOK hexStringConstant(Token *t);
289 #if V2
290 TOK delimitedStringConstant(Token *t);
291 TOK tokenStringConstant(Token *t);
292 #endif
293 TOK escapeStringConstant(Token *t, int wide);
294 TOK charConstant(Token *t, int wide);
295 void stringPostfix(Token *t);
296 unsigned wchar(unsigned u);
297 TOK number(Token *t);
298 TOK inreal(Token *t);
299 void error(const char *format, ...);
300 void error(Loc loc, const char *format, ...);
301 void pragma();
302 unsigned decodeUTF();
303 void getDocComment(Token *t, unsigned lineComment);
305 static int isValidIdentifier(char *p);
306 static unsigned char *combineComments(unsigned char *c1, unsigned char *c2);
309 #endif /* DMD_LEXER_H */