added TCO detection to vm; cut another 10 msecs from ackermann
[gaemu.git] / gmlparser / lexer.d
blobeac7ec984ded0485cc33ef775c1c666793a055a1
1 /* GML parser
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 module gmlparser.lexer is aliced;
20 import gmlparser.tokens;
23 // ////////////////////////////////////////////////////////////////////////// //
24 public struct Loc {
25 string file;
26 int line, col;
27 uint tpos;
29 string toString () const { import std.string : format; return "%s (%s,%s)".format(file, line, col); }
30 string toStringNoFile () const { import std.string : format; return "(%s,%s)".format(line, col); }
32 @property bool valid () const pure nothrow @safe @nogc { pragma(inline, true); return (line > 0 && col > 0); }
36 // ////////////////////////////////////////////////////////////////////////// //
37 public class ErrorAt : Exception {
38 Loc loc;
40 this (string msg, Throwable next=null, string file=__FILE__, usize line=__LINE__) pure nothrow @safe @nogc { super(msg, file, line, next); }
41 this (in Loc aloc, string msg, Throwable next=null, string file=__FILE__, usize line=__LINE__) pure nothrow @safe @nogc { loc = aloc; super(msg, file, line, next); }
45 // ////////////////////////////////////////////////////////////////////////// //
46 public struct Token {
47 public:
48 enum Type {
49 EOF = -1,
50 Kw,
51 Id,
52 Str,
53 Num,
54 Spec,
57 private:
58 const(char)[] tkstr;
60 public:
61 Loc loc, eloc; // token start, token end (after last char)
62 Type type = Type.EOF; // token type
63 union {
64 Keyword kw;
65 float num;
68 @safe:
69 void mustbeType (Token.Type tp, string msg="identifier expected", string file=__FILE__, usize line=__LINE__) {
70 pragma(inline, true);
71 if (type != tp) throw new ErrorAt(loc, msg, null, file, line);
73 void mustbeId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Id, msg, file, line); }
74 void mustbeStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Str, msg, file, line); }
75 void mustbeNum (string msg="number expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Num, msg, file, line); }
77 string toString () const @trusted {
78 import std.string : format;
79 final switch (type) with (Type) {
80 case EOF: return "(%s,%d): <EOF>".format(loc.line, loc.col);
81 case Kw: return "(%s,%d): kw.%s <%s>".format(loc.line, loc.col, kw, tkstr);
82 case Id: return "(%s,%d): Id:%s".format(loc.line, loc.col, tkstr);
83 case Str: return "(%s,%d): Str:%s".format(loc.line, loc.col, Lexer.quote(tkstr));
84 case Num: return "(%s,%d): Num:%s".format(loc.line, loc.col, num);
85 case Spec: return "(%s,%d): Spec:<%s>".format(loc.line, loc.col, tkstr);
87 assert(0);
90 nothrow:
91 // get immutable string
92 // this converts id to `string` via `.idup`, use with caution!
93 // `.idup` is used to not anchor the whole source string
94 @property string istr () { pragma(inline, true); return (tkstr.length ? tkstr.idup : null); }
96 const pure nothrow @nogc:
97 bool opEquals (Keyword akw) { pragma(inline, true); return (type == Type.Kw && kw == akw); }
98 bool isKw (Keyword akw) { pragma(inline, true); return (type == Type.Kw && kw == akw); }
99 bool isKw () { pragma(inline, true); return (type == Type.Kw); }
101 @property:
102 const(char)[] str () { pragma(inline, true); return tkstr; }
103 Keyword Kw () { pragma(inline, true); return (type == Type.Kw ? kw : Keyword.NoKW); }
104 bool isId () { pragma(inline, true); return (type == Type.Id); }
105 bool isStr () { pragma(inline, true); return (type == Type.Str); }
106 bool isNum () { pragma(inline, true); return (type == Type.Num); }
107 bool isSpec () { pragma(inline, true); return (type == Type.Spec); }
108 bool isEOF () { pragma(inline, true); return (type == Type.EOF); }
112 // ////////////////////////////////////////////////////////////////////////// //
113 public final class Lexer {
114 private:
115 const(char)[] text;
116 uint tpos;
117 Loc cpos; // position for last `getChar()`
118 Loc pend; // end of previous token, for better error messages
119 bool eof;
120 bool lastWasEOL = true;
121 Token[] lookup;
122 Token tokeof; // will be fixed by `nextToken()`
124 public:
125 this(T) (const(char)[] atext, T afname=null) if (is(T : const(char)[])) {
126 text = atext;
127 if (afname.length > 0) { static if (is(T == string)) cpos.file = afname; else cpos.file = afname.idup; }
128 tokeof.loc.file = cpos.file;
129 nextToken();
130 pend.line = 1;
131 pend.col = 1;
132 pend.tpos = 0;
135 void error (string msg, string file=__FILE__, usize line=__LINE__) {
136 pragma(inline, true);
137 throw new ErrorAt((lookup.length == 0 ? loc : lookup[0].loc), msg, null, file, line);
140 static private void error (in ref Token tk, string msg, string file=__FILE__, usize line=__LINE__) {
141 pragma(inline, true);
142 throw new ErrorAt(tk.loc, msg, null, file, line);
145 static private void error() (in auto ref Loc loc, string msg, string file=__FILE__, usize line=__LINE__) {
146 pragma(inline, true);
147 throw new ErrorAt(loc, msg, null, file, line);
150 const(char)[] line (uint idx) {
151 if (idx == 0) ++idx;
152 uint pos = 0;
153 while (--idx > 0) {
154 while (pos < text.length && text.ptr[pos] != '\n') ++pos;
155 ++pos;
157 if (pos >= text.length) return null;
158 uint epos = pos;
159 while (epos < text.length && text.ptr[epos] != '\n') ++epos;
160 while (epos > pos && text.ptr[epos-1] <= ' ') --epos;
161 return text[pos..epos];
164 void popFront () {
165 if (lookup.length > 0) {
166 pend = lookup.ptr[0].eloc;
167 ++pend.col; // for better error messages
168 ++pend.tpos; // to be consistent
169 foreach (immutable idx; 1..lookup.length) lookup.ptr[idx-1] = lookup.ptr[idx];
170 lookup.length -= 1;
171 lookup.assumeSafeAppend;
173 nextToken();
176 @property pure nothrow @safe @nogc {
177 bool empty () const { pragma(inline, true); return (lookup.length == 0); }
178 ref inout(Token) front () inout { pragma(inline, true); return (lookup.length ? lookup.ptr[0] : tokeof); }
179 // current token's loc
180 auto loc () const { pragma(inline, true); return front.loc; }
181 auto eloc () const { pragma(inline, true); return front.eloc; }
182 auto peloc () const { pragma(inline, true); return pend; }
184 bool isId () const { pragma(inline, true); return front.isId; }
185 bool isStr () const { pragma(inline, true); return front.isStr; }
186 bool isNum () const { pragma(inline, true); return front.isNum; }
187 bool isSpec () const { pragma(inline, true); return front.isSpec; }
190 bool isKw (Keyword kw) const pure nothrow @safe @nogc { pragma(inline, true); return front.isKw(kw); }
191 bool isKw () const pure nothrow @safe @nogc { pragma(inline, true); return front.isKw(); }
193 bool opEquals (Keyword kw) const pure nothrow @safe @nogc { pragma(inline, true); return (front == kw); }
195 // this eats keyword
196 void expect (Keyword kw, string file=__FILE__, usize line=__LINE__) {
197 if (!front.isKw(kw)) error(loc, "`"~keywordtext(kw)~"` expected", file, line);
198 popFront();
201 // this converts id to `string` via `.idup`, use with caution!
202 // `.idup` is used to not anchor the whole source string
203 string expectId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) {
204 mustbeId(msg, file, line);
205 auto res = lookup[0].istr;
206 popFront();
207 return res;
210 // this converts id to `string` via `.idup`, use with caution!
211 // `.idup` is used to not anchor the whole source string
212 string expectStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) {
213 //pragma(inline, true);
214 mustbeStr(msg, file, line);
215 auto res = lookup[0].istr;
216 popFront();
217 return res;
220 // `mustbe` doesn't eat token
221 void mustbeType (Token.Type tp, string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeType(tp, msg, file, line); }
222 void mustbeId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeId(msg, file, line); }
223 void mustbeStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeStr(msg, file, line); }
224 void mustbeNum (string msg="number expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeNum(msg, file, line); }
226 bool eatKw (Keyword kw) {
227 if (!isKw(kw)) return false;
228 popFront();
229 return true;
232 ref Token peek (uint dist) {
233 while (!eof && lookup.length <= dist) nextToken();
234 return (dist < lookup.length ? lookup.ptr[dist] : tokeof);
237 ref Token opIndex (usize dist) { pragma(inline, true); return peek(dist); }
239 // return loc for next `getChar()`
240 Loc nextLoc () nothrow @safe @nogc {
241 Loc res = cpos;
242 if (lastWasEOL) { ++res.line; res.col = 1; } else ++res.col;
243 return res;
246 char peekChar (uint dist=0) nothrow @trusted @nogc {
247 pragma(inline, true);
248 return (tpos+dist >= text.length ? '\0' : (text.ptr[tpos+dist] ? text.ptr[tpos+dist] : ' '));
251 // return char or 0
252 char getChar () nothrow @trusted @nogc {
253 if (tpos >= text.length) { tpos = text.length; eof = true; }
254 if (eof) return '\0';
255 cpos.tpos = tpos;
256 char ch = text.ptr[tpos++];
257 if (ch == '\0') ch = ' ';
258 if (lastWasEOL) { ++cpos.line; cpos.col = 1; } else ++cpos.col;
259 lastWasEOL = (ch == '\n');
260 return ch;
263 // skip blanks and comments
264 //TODO: make special "comment" token(s)?
265 void skipBlanks () @safe {
266 mainloop: for (;;) {
267 char ch = peekChar;
268 if (ch == '/') {
269 switch (peekChar(1)) {
270 case '/': // single-line comment
271 do { ch = getChar(); } while (ch != 0 && ch != '\n');
272 continue mainloop;
273 case '*': // multiline comment
274 getChar(); // skip slash
275 auto lc = cpos;
276 getChar(); // skip star
277 char pch = ' ';
278 ch = ' '; // we need this
279 for (;;) {
280 pch = ch;
281 ch = getChar();
282 if (ch == 0) error(lc, "unterminated comment");
283 if (ch == '/' && pch == '*') break;
285 continue mainloop;
286 default:
289 if (ch == 0 || ch > 32) return;
290 getChar();
294 private void nextToken () {
295 if (eof) return;
297 skipBlanks();
298 if (peekChar == '\0') {
299 eof = true;
300 tokeof.loc = cpos;
301 tokeof.eloc = cpos;
302 //++tokeof.eloc.col; // for better error messages
303 //++tokeof.eloc.tpos; // to be consistent
304 return;
307 Token tk;
308 auto tkspos = tpos;
309 char ch = getChar();
310 tk.loc = cpos;
312 // quoted string
313 if (ch == '"' || ch == '\'') {
314 char ech = ch;
315 tk.type = Token.Type.Str;
316 ++tkspos; // skip quote
317 for (;;) {
318 ch = getChar();
319 if (ch == 0) error(tk, "unterminated string");
320 if (ch == ech) break;
322 tk.tkstr = text[tkspos..tpos-1]; // -1 due to eaten quote
323 tk.eloc = cpos;
324 //++tk.eloc.col; // for better error messages
325 //++tk.eloc.tpos; // to be consistent
326 lookup ~= tk;
327 return;
330 // hex number
331 if (ch == '$') {
332 float n = 0;
333 tk.type = Token.Type.Num;
334 getChar(); // skip dollar
335 int dv = digitValue(peekChar);
336 if (dv < 0 || dv > 15) error(tk, "hex number expected");
337 for (;;) {
338 dv = digitValue(peekChar);
339 if (dv < 0 || dv > 15) break;
340 n = n*16+dv;
341 getChar();
343 ch = peekChar;
344 if (isIdChar(ch) || ch == '.') error(tk, "hex number expected");
345 tk.num = n;
346 tk.tkstr = text[tkspos..tpos];
347 tk.eloc = cpos;
348 //++tk.eloc.col; // for better error messages
349 //++tk.eloc.tpos; // to be consistent
350 lookup ~= tk;
351 return;
354 // number
355 if (isDigit(ch) || (ch == '.' && isDigit(peekChar))) {
356 float n = 0;
357 tk.type = Token.Type.Num;
358 if (ch != '.') n = ch-'0';
359 if (ch != '.') {
360 // integral part
361 for (;;) {
362 if (!isDigit(peekChar)) break;
363 ch = getChar();
364 n = n*10+ch-'0';
366 if (peekChar == '.') ch = getChar();
368 if (ch == '.') {
369 // fractional part
370 if (!isDigit(peekChar)) error(tk, "real number expected");
371 float div = 1;
372 for (;;) {
373 if (!isDigit(peekChar)) break;
374 ch = getChar();
375 div /= 10;
376 n += div*(ch-'0');
379 if (peekChar == 'e' || peekChar == 'E') {
380 // exponent
381 getChar();
382 bool neg = false;
383 if (peekChar == '+') getChar(); else if (peekChar == '-') { getChar(); neg = true; }
384 if (!isDigit(peekChar)) error(tk, "invalid number");
385 int e = 0;
386 while (isDigit(peekChar)) {
387 ch = getChar();
388 e = e*10+(ch-'0');
389 if (e < 0) error(tk, "invalid number (exponent overflow)");
391 //{ import std.conv : to; assert(0, to!string(e)); }
392 if (neg) {
393 while (e-- > 0) n = n/10;
394 } else {
395 while (e-- > 0) n = n*10;
398 tk.num = n;
399 tk.tkstr = text[tkspos..tpos];
400 tk.eloc = cpos;
401 //++tk.eloc.col; // for better error messages
402 //++tk.eloc.tpos; // to be consistent
403 ch = peekChar;
404 if (isIdChar(ch) || ch == '.') error(tk, "invalid number");
405 lookup ~= tk;
406 return;
409 // identifier
410 if (isIdStart(ch)) {
411 tk.type = Token.Type.Id;
412 while (isIdChar(peekChar)) getChar();
413 tk.tkstr = text[tkspos..tpos];
414 tk.eloc = cpos;
415 //++tk.eloc.col; // for better error messages
416 //++tk.eloc.tpos; // to be consistent
417 if (auto kw = tk.tkstr in keywords) {
418 tk.type = Token.Type.Kw;
419 tk.kw = *kw;
421 lookup ~= tk;
422 return;
425 // delimiter
426 char[5] dbuf;
427 dbuf[0] = ch;
428 if (auto xkw = dbuf[0..1] in keywords) {
429 tk.type = Token.Type.Kw;
430 tk.kw = *xkw;
431 foreach (uint dpos; 1..dbuf.length) {
432 dbuf[dpos] = peekChar;
433 if (auto kw = dbuf[0..dpos+1] in keywords) {
434 tk.type = Token.Type.Kw;
435 tk.kw = *kw;
436 getChar(); // eat token char
437 } else {
438 break;
441 } else {
442 tk.type = Token.Type.Spec;
444 tk.tkstr = text[tkspos..tpos];
445 tk.eloc = cpos;
446 //++tk.eloc.col; // for better error messages
447 //++tk.eloc.tpos; // to be consistent
448 lookup ~= tk;
451 auto select(RetType, string mode="peek", A...) (scope A args) { pragma(inline, true); return selectN!(RetType, mode)(0, args); }
453 auto selectN(RetType, string mode="peek", A...) (usize n, scope A args) {
454 import std.traits : ReturnType;
456 static assert(mode == "peek" || mode == "pop" || mode == "pop-nondefault", "selectN: invalid mode: '"~mode~"'");
458 template isGoodDg(usize idx, T) {
459 private import std.traits;
460 static if (idx < A.length && isCallable!(A[idx]) && arity!(args[idx]) == 1) {
461 enum isGoodDg = is(Parameters!(A[idx])[0] == T);
462 } else {
463 enum isGoodDg = false;
467 template isGoodArglessDg(usize idx) {
468 private import std.traits;
469 static if (idx < A.length && isCallable!(A[idx]) && arity!(args[idx]) == 0) {
470 enum isGoodArglessDg = true;
471 } else {
472 enum isGoodArglessDg = false;
476 // sorry, but this has to be string mixin, due to possible empty `arg`
477 enum DoCallDg(string arg) =
478 "static if (!is(ReturnType!(A[xidx]) == void)) return cast(RetType)(args[xidx]("~arg~")); else { args[xidx]("~arg~"); return RetType.init; }";
480 // we can't have inner mixin templates, so... sorry, it's string again
481 enum CallDg = q{
482 static if (isGoodDg!(xidx, Token)) { mixin(DoCallDg!"tk"); }
483 else static if (isGoodDg!(xidx, Loc)) { mixin(DoCallDg!"tk.loc"); }
484 else static if (isGoodDg!(xidx, Token.Type)) { mixin(DoCallDg!"tk.type"); }
485 else static if (isGoodDg!(xidx, Keyword)) { mixin(DoCallDg!"tk.Kw"); }
486 else static if (isGoodArglessDg!(xidx)) { mixin(DoCallDg!""); }
487 else static assert(0, "selectN: invalid delegate #"~xidx.stringof);
490 auto tk = peek(n);
491 bool found = false;
492 foreach (immutable aidx, auto arg; args) {
493 static if (aidx%2 == 0) {
494 static if (is(typeof(arg) == Keyword) || is(typeof(arg) == Token.Type)) {
495 static if (is(typeof(arg) == Keyword)) found = (tk == arg);
496 else static if (is(typeof(arg) == Token.Type)) found = (tk.type == arg);
497 else static assert(0, "wtf?!");
498 if (found) {
499 // process `mode`
500 static if (mode != "peek") popFront();
501 // call delegate
502 enum xidx = aidx+1;
503 mixin(CallDg);
505 } else {
506 // default
507 // process `mode`
508 static if (mode == "pop") popFront();
509 // call delegate
510 enum xidx = aidx;
511 mixin(CallDg);
515 error(tk, "selectN is out of nodes");
516 assert(0);
519 static:
520 private immutable byte[256] digitValues = {
521 byte[256] res = -1;
522 foreach (ubyte idx; '0'..'9'+1) res[idx] = cast(byte)(idx-'0');
523 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = cast(byte)(idx-'A'+10);
524 foreach (ubyte idx; 'a'..'z'+1) res[idx] = cast(byte)(idx-'a'+10);
525 return res;
526 }();
528 private immutable bool[256] idStartChars = {
529 bool[256] res = false;
530 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = true;
531 foreach (ubyte idx; 'a'..'z'+1) res[idx] = true;
532 res['_'] = true;
533 return res;
534 }();
536 private immutable bool[256] idChars = {
537 bool[256] res = false;
538 foreach (ubyte idx; '0'..'9'+1) res[idx] = true;
539 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = true;
540 foreach (ubyte idx; 'a'..'z'+1) res[idx] = true;
541 res['_'] = true;
542 return res;
543 }();
545 bool isDigit() (char ch) { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
546 int digitValue() (char ch) { pragma(inline, true); return digitValues.ptr[cast(ubyte)ch]; }
547 bool isIdStart() (char ch) { pragma(inline, true); return idStartChars.ptr[cast(ubyte)ch]; }
548 bool isIdChar() (char ch) { pragma(inline, true); return idChars.ptr[cast(ubyte)ch]; }
550 string gmlQuote (const(char)[] s) {
551 import std.array : appender;
552 auto res = appender!string();
553 enum Prev { Nothing, Char, Spec }
554 Prev prev = Prev.Nothing;
555 foreach (char ch; s) {
556 if (ch < ' ' || ch == 127 || ch == '"') {
557 import std.conv : to;
558 final switch (prev) with (Prev) {
559 case Nothing: break;
560 case Char: res.put(`"+`); break;
561 case Spec: res.put(`+`); break;
563 prev = Prev.Spec;
564 res.put("chr(");
565 res.put(to!string(cast(uint)ch));
566 res.put(")");
567 } else {
568 final switch (prev) with (Prev) {
569 case Nothing: res.put('"'); break;
570 case Char: break;
571 case Spec: res.put(`+"`); break;
573 prev = Prev.Char;
574 res.put(ch);
577 if (prev == Prev.Nothing) return `""`;
578 if (prev == Prev.Char) res.put('"');
579 return res.data;
582 /// quote string: append double quotes, screen all special chars;
583 /// so quoted string forms valid D string literal.
584 /// allocates.
585 string quote (const(char)[] s) {
586 import std.array : appender;
587 import std.format : formatElement, FormatSpec;
588 auto res = appender!string();
589 FormatSpec!char fspc; // defaults to 's'
590 formatElement(res, s, fspc);
591 return res.data;
596 version(gml_lexer_test) unittest {
597 import std.file;
598 import std.stdio;
599 auto s = readText("scrDrawHUD.gml");
600 auto lex = new Lexer(s, "scrDrawHUD.gml");
601 try {
602 while (!lex.empty) {
603 //if (lex == Keyword.RCurly) writeln("*******************");
604 auto v = lex.select!(int, "pop")(
605 Keyword.LCurly, (ref Token tk) => 1,
606 Keyword.RCurly, (Keyword kw) => 2,
607 Keyword.Semi, () => 6,
608 Keyword.Sub, (Loc loc) => 99,
609 Token.Type.Num, (ref Token tk) => 3,
610 (ref Token tk) => writeln(tk),
612 if (v) writeln("*** ", v);
613 //writeln(v, ": ", lex.front);
614 //lex.popFront();
616 } catch (ErrorAt e) {
617 writeln("PARSE ERROR: ", e.line);
618 writeln(e.loc);