API changes
[gaemu.git] / gmlparser / lexer.d
blobe3ab6e417700dcff8c088c4a30fea6a3f5ce9e15
1 /* GML parser
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 module gmlparser.lexer is aliced;
20 import gmlparser.tokens;
23 // ////////////////////////////////////////////////////////////////////////// //
24 public struct Loc {
25 string file;
26 int line, col;
27 uint tpos;
29 string toString () const { import std.string : format; return "%s (%s,%s)".format(file, line, col); }
30 string toStringNoFile () const { import std.string : format; return "(%s,%s)".format(line, col); }
34 // ////////////////////////////////////////////////////////////////////////// //
35 public class ErrorAt : Exception {
36 Loc loc;
38 this (string msg, Throwable next=null, string file=__FILE__, usize line=__LINE__) pure nothrow @safe @nogc { super(msg, file, line, next); }
39 this (in Loc aloc, string msg, Throwable next=null, string file=__FILE__, usize line=__LINE__) pure nothrow @safe @nogc { loc = aloc; super(msg, file, line, next); }
43 // ////////////////////////////////////////////////////////////////////////// //
44 public struct Token {
45 public:
46 enum Type {
47 EOF = -1,
48 Kw,
49 Id,
50 Str,
51 Num,
52 Spec,
55 private:
56 const(char)[] tkstr;
58 public:
59 Loc loc, eloc; // token start, token end (after last char)
60 Type type = Type.EOF; // token type
61 union {
62 Keyword kw;
63 float num;
66 @safe:
67 void mustbeType (Token.Type tp, string msg="identifier expected", string file=__FILE__, usize line=__LINE__) {
68 pragma(inline, true);
69 if (type != tp) throw new ErrorAt(loc, msg, null, file, line);
71 void mustbeId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Id, msg, file, line); }
72 void mustbeStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Str, msg, file, line); }
73 void mustbeNum (string msg="number expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); mustbeType(Type.Num, msg, file, line); }
75 string toString () const @trusted {
76 import std.string : format;
77 final switch (type) with (Type) {
78 case EOF: return "(%s,%d): <EOF>".format(loc.line, loc.col);
79 case Kw: return "(%s,%d): kw.%s <%s>".format(loc.line, loc.col, kw, tkstr);
80 case Id: return "(%s,%d): Id:%s".format(loc.line, loc.col, tkstr);
81 case Str: return "(%s,%d): Str:%s".format(loc.line, loc.col, Lexer.quote(tkstr));
82 case Num: return "(%s,%d): Num:%s".format(loc.line, loc.col, num);
83 case Spec: return "(%s,%d): Spec:<%s>".format(loc.line, loc.col, tkstr);
85 assert(0);
88 nothrow:
89 // get immutable string
90 // this converts id to `string` via `.idup`, use with caution!
91 // `.idup` is used to not anchor the whole source string
92 @property string istr () { pragma(inline, true); return (tkstr.length ? tkstr.idup : null); }
94 const pure nothrow @nogc:
95 bool opEquals (Keyword akw) { pragma(inline, true); return (type == Type.Kw && kw == akw); }
96 bool isKw (Keyword akw) { pragma(inline, true); return (type == Type.Kw && kw == akw); }
97 bool isKw () { pragma(inline, true); return (type == Type.Kw); }
99 @property:
100 const(char)[] str () { pragma(inline, true); return tkstr; }
101 Keyword Kw () { pragma(inline, true); return (type == Type.Kw ? kw : Keyword.NoKW); }
102 bool isId () { pragma(inline, true); return (type == Type.Id); }
103 bool isStr () { pragma(inline, true); return (type == Type.Str); }
104 bool isNum () { pragma(inline, true); return (type == Type.Num); }
105 bool isSpec () { pragma(inline, true); return (type == Type.Spec); }
106 bool isEOF () { pragma(inline, true); return (type == Type.EOF); }
110 // ////////////////////////////////////////////////////////////////////////// //
111 public final class Lexer {
112 private:
113 const(char)[] text;
114 uint tpos;
115 Loc cpos; // position for last `getChar()`
116 Loc pend; // end of previous token, for better error messages
117 bool eof;
118 bool lastWasEOL = true;
119 Token[] lookup;
120 Token tokeof; // will be fixed by `nextToken()`
122 public:
123 this(T) (const(char)[] atext, T afname=null) if (is(T : const(char)[])) {
124 text = atext;
125 if (afname.length > 0) { static if (is(T == string)) cpos.file = afname; else cpos.file = afname.idup; }
126 tokeof.loc.file = cpos.file;
127 nextToken();
128 pend.line = 1;
129 pend.col = 1;
130 pend.tpos = 0;
133 void error (string msg, string file=__FILE__, usize line=__LINE__) {
134 pragma(inline, true);
135 throw new ErrorAt((lookup.length == 0 ? loc : lookup[0].loc), msg, null, file, line);
138 static private void error (in ref Token tk, string msg, string file=__FILE__, usize line=__LINE__) {
139 pragma(inline, true);
140 throw new ErrorAt(tk.loc, msg, null, file, line);
143 static private void error() (in auto ref Loc loc, string msg, string file=__FILE__, usize line=__LINE__) {
144 pragma(inline, true);
145 throw new ErrorAt(loc, msg, null, file, line);
148 const(char)[] line (uint idx) {
149 if (idx == 0) ++idx;
150 uint pos = 0;
151 while (--idx > 0) {
152 while (pos < text.length && text.ptr[pos] != '\n') ++pos;
153 ++pos;
155 if (pos >= text.length) return null;
156 uint epos = pos;
157 while (epos < text.length && text.ptr[epos] != '\n') ++epos;
158 while (epos > pos && text.ptr[epos-1] <= ' ') --epos;
159 return text[pos..epos];
162 void popFront () {
163 if (lookup.length > 0) {
164 pend = lookup.ptr[0].eloc;
165 ++pend.col; // for better error messages
166 ++pend.tpos; // to be consistent
167 foreach (immutable idx; 1..lookup.length) lookup.ptr[idx-1] = lookup.ptr[idx];
168 lookup.length -= 1;
169 lookup.assumeSafeAppend;
171 nextToken();
174 @property pure nothrow @safe @nogc {
175 bool empty () const { pragma(inline, true); return (lookup.length == 0); }
176 ref inout(Token) front () inout { pragma(inline, true); return (lookup.length ? lookup.ptr[0] : tokeof); }
177 // current token's loc
178 auto loc () const { pragma(inline, true); return front.loc; }
179 auto eloc () const { pragma(inline, true); return front.eloc; }
180 auto peloc () const { pragma(inline, true); return pend; }
182 bool isId () const { pragma(inline, true); return front.isId; }
183 bool isStr () const { pragma(inline, true); return front.isStr; }
184 bool isNum () const { pragma(inline, true); return front.isNum; }
185 bool isSpec () const { pragma(inline, true); return front.isSpec; }
188 bool isKw (Keyword kw) const pure nothrow @safe @nogc { pragma(inline, true); return front.isKw(kw); }
189 bool isKw () const pure nothrow @safe @nogc { pragma(inline, true); return front.isKw(); }
191 bool opEquals (Keyword kw) const pure nothrow @safe @nogc { pragma(inline, true); return (front == kw); }
193 // this eats keyword
194 void expect (Keyword kw, string file=__FILE__, usize line=__LINE__) {
195 if (!front.isKw(kw)) error(loc, "`"~keywordtext(kw)~"` expected", file, line);
196 popFront();
199 // this converts id to `string` via `.idup`, use with caution!
200 // `.idup` is used to not anchor the whole source string
201 string expectId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) {
202 mustbeId(msg, file, line);
203 auto res = lookup[0].istr;
204 popFront();
205 return res;
208 // this converts id to `string` via `.idup`, use with caution!
209 // `.idup` is used to not anchor the whole source string
210 string expectStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) {
211 //pragma(inline, true);
212 mustbeStr(msg, file, line);
213 auto res = lookup[0].istr;
214 popFront();
215 return res;
218 // `mustbe` doesn't eat token
219 void mustbeType (Token.Type tp, string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeType(tp, msg, file, line); }
220 void mustbeId (string msg="identifier expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeId(msg, file, line); }
221 void mustbeStr (string msg="string expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeStr(msg, file, line); }
222 void mustbeNum (string msg="number expected", string file=__FILE__, usize line=__LINE__) { pragma(inline, true); return front.mustbeNum(msg, file, line); }
224 bool eatKw (Keyword kw) {
225 if (!isKw(kw)) return false;
226 popFront();
227 return true;
230 ref Token peek (uint dist) {
231 while (!eof && lookup.length <= dist) nextToken();
232 return (dist < lookup.length ? lookup.ptr[dist] : tokeof);
235 ref Token opIndex (usize dist) { pragma(inline, true); return peek(dist); }
237 // return loc for next `getChar()`
238 Loc nextLoc () nothrow @safe @nogc {
239 Loc res = cpos;
240 if (lastWasEOL) { ++res.line; res.col = 1; } else ++res.col;
241 return res;
244 char peekChar (uint dist=0) nothrow @trusted @nogc {
245 pragma(inline, true);
246 return (tpos+dist >= text.length ? '\0' : (text.ptr[tpos+dist] ? text.ptr[tpos+dist] : ' '));
249 // return char or 0
250 char getChar () nothrow @trusted @nogc {
251 if (tpos >= text.length) { tpos = text.length; eof = true; }
252 if (eof) return '\0';
253 cpos.tpos = tpos;
254 char ch = text.ptr[tpos++];
255 if (ch == '\0') ch = ' ';
256 if (lastWasEOL) { ++cpos.line; cpos.col = 1; } else ++cpos.col;
257 lastWasEOL = (ch == '\n');
258 return ch;
261 // skip blanks and comments
262 //TODO: make special "comment" token(s)?
263 void skipBlanks () @safe {
264 mainloop: for (;;) {
265 char ch = peekChar;
266 if (ch == '/') {
267 switch (peekChar(1)) {
268 case '/': // single-line comment
269 do { ch = getChar(); } while (ch != 0 && ch != '\n');
270 continue mainloop;
271 case '*': // multiline comment
272 getChar(); // skip slash
273 auto lc = cpos;
274 getChar(); // skip star
275 char pch = ' ';
276 ch = ' '; // we need this
277 for (;;) {
278 pch = ch;
279 ch = getChar();
280 if (ch == 0) error(lc, "unterminated comment");
281 if (ch == '/' && pch == '*') break;
283 continue mainloop;
284 default:
287 if (ch == 0 || ch > 32) return;
288 getChar();
292 private void nextToken () {
293 if (eof) return;
295 skipBlanks();
296 if (peekChar == '\0') {
297 eof = true;
298 tokeof.loc = cpos;
299 tokeof.eloc = cpos;
300 //++tokeof.eloc.col; // for better error messages
301 //++tokeof.eloc.tpos; // to be consistent
302 return;
305 Token tk;
306 auto tkspos = tpos;
307 char ch = getChar();
308 tk.loc = cpos;
310 // quoted string
311 if (ch == '"' || ch == '\'') {
312 char ech = ch;
313 tk.type = Token.Type.Str;
314 ++tkspos; // skip quote
315 for (;;) {
316 ch = getChar();
317 if (ch == 0) error(tk, "unterminated string");
318 if (ch == ech) break;
320 tk.tkstr = text[tkspos..tpos-1]; // -1 due to eaten quote
321 tk.eloc = cpos;
322 //++tk.eloc.col; // for better error messages
323 //++tk.eloc.tpos; // to be consistent
324 lookup ~= tk;
325 return;
328 // hex number
329 if (ch == '$') {
330 float n = 0;
331 tk.type = Token.Type.Num;
332 getChar(); // skip dollar
333 int dv = digitValue(peekChar);
334 if (dv < 0 || dv > 15) error(tk, "hex number expected");
335 for (;;) {
336 dv = digitValue(peekChar);
337 if (dv < 0 || dv > 15) break;
338 n = n*16+dv;
339 getChar();
341 ch = peekChar;
342 if (isIdChar(ch) || ch == '.') error(tk, "hex number expected");
343 tk.num = n;
344 tk.tkstr = text[tkspos..tpos];
345 tk.eloc = cpos;
346 //++tk.eloc.col; // for better error messages
347 //++tk.eloc.tpos; // to be consistent
348 lookup ~= tk;
349 return;
352 // number
353 if (isDigit(ch) || (ch == '.' && isDigit(peekChar))) {
354 float n = 0;
355 tk.type = Token.Type.Num;
356 if (ch != '.') n = ch-'0';
357 if (ch != '.') {
358 // integral part
359 for (;;) {
360 if (!isDigit(peekChar)) break;
361 ch = getChar();
362 n = n*10+ch-'0';
364 if (peekChar == '.') ch = getChar();
366 if (ch == '.') {
367 // fractional part
368 if (!isDigit(peekChar)) error(tk, "real number expected");
369 float div = 1;
370 for (;;) {
371 if (!isDigit(peekChar)) break;
372 ch = getChar();
373 div /= 10;
374 n += div*(ch-'0');
377 if (peekChar == 'e' || peekChar == 'E') {
378 // exponent
379 getChar();
380 bool neg = false;
381 if (peekChar == '+') getChar(); else if (peekChar == '-') { getChar(); neg = true; }
382 if (!isDigit(peekChar)) error(tk, "invalid number");
383 int e = 0;
384 while (isDigit(peekChar)) {
385 ch = getChar();
386 e = e*10+(ch-'0');
387 if (e < 0) error(tk, "invalid number (exponent overflow)");
389 //{ import std.conv : to; assert(0, to!string(e)); }
390 if (neg) {
391 while (e-- > 0) n = n/10;
392 } else {
393 while (e-- > 0) n = n*10;
396 tk.num = n;
397 tk.tkstr = text[tkspos..tpos];
398 tk.eloc = cpos;
399 //++tk.eloc.col; // for better error messages
400 //++tk.eloc.tpos; // to be consistent
401 ch = peekChar;
402 if (isIdChar(ch) || ch == '.') error(tk, "invalid number");
403 lookup ~= tk;
404 return;
407 // identifier
408 if (isIdStart(ch)) {
409 tk.type = Token.Type.Id;
410 while (isIdChar(peekChar)) getChar();
411 tk.tkstr = text[tkspos..tpos];
412 tk.eloc = cpos;
413 //++tk.eloc.col; // for better error messages
414 //++tk.eloc.tpos; // to be consistent
415 if (auto kw = tk.tkstr in keywords) {
416 tk.type = Token.Type.Kw;
417 tk.kw = *kw;
419 lookup ~= tk;
420 return;
423 // delimiter
424 char[5] dbuf;
425 dbuf[0] = ch;
426 if (auto xkw = dbuf[0..1] in keywords) {
427 tk.type = Token.Type.Kw;
428 tk.kw = *xkw;
429 foreach (uint dpos; 1..dbuf.length) {
430 dbuf[dpos] = peekChar;
431 if (auto kw = dbuf[0..dpos+1] in keywords) {
432 tk.type = Token.Type.Kw;
433 tk.kw = *kw;
434 getChar(); // eat token char
435 } else {
436 break;
439 } else {
440 tk.type = Token.Type.Spec;
442 tk.tkstr = text[tkspos..tpos];
443 tk.eloc = cpos;
444 //++tk.eloc.col; // for better error messages
445 //++tk.eloc.tpos; // to be consistent
446 lookup ~= tk;
449 auto select(RetType, string mode="peek", A...) (scope A args) { pragma(inline, true); return selectN!(RetType, mode)(0, args); }
451 auto selectN(RetType, string mode="peek", A...) (usize n, scope A args) {
452 import std.traits : ReturnType;
454 static assert(mode == "peek" || mode == "pop" || mode == "pop-nondefault", "selectN: invalid mode: '"~mode~"'");
456 template isGoodDg(usize idx, T) {
457 private import std.traits;
458 static if (idx < A.length && isCallable!(A[idx]) && arity!(args[idx]) == 1) {
459 enum isGoodDg = is(Parameters!(A[idx])[0] == T);
460 } else {
461 enum isGoodDg = false;
465 template isGoodArglessDg(usize idx) {
466 private import std.traits;
467 static if (idx < A.length && isCallable!(A[idx]) && arity!(args[idx]) == 0) {
468 enum isGoodArglessDg = true;
469 } else {
470 enum isGoodArglessDg = false;
474 // sorry, but this has to be string mixin, due to possible empty `arg`
475 enum DoCallDg(string arg) =
476 "static if (!is(ReturnType!(A[xidx]) == void)) return cast(RetType)(args[xidx]("~arg~")); else { args[xidx]("~arg~"); return RetType.init; }";
478 // we can't have inner mixin templates, so... sorry, it's string again
479 enum CallDg = q{
480 static if (isGoodDg!(xidx, Token)) { mixin(DoCallDg!"tk"); }
481 else static if (isGoodDg!(xidx, Loc)) { mixin(DoCallDg!"tk.loc"); }
482 else static if (isGoodDg!(xidx, Token.Type)) { mixin(DoCallDg!"tk.type"); }
483 else static if (isGoodDg!(xidx, Keyword)) { mixin(DoCallDg!"tk.Kw"); }
484 else static if (isGoodArglessDg!(xidx)) { mixin(DoCallDg!""); }
485 else static assert(0, "selectN: invalid delegate #"~xidx.stringof);
488 auto tk = peek(n);
489 bool found = false;
490 foreach (immutable aidx, auto arg; args) {
491 static if (aidx%2 == 0) {
492 static if (is(typeof(arg) == Keyword) || is(typeof(arg) == Token.Type)) {
493 static if (is(typeof(arg) == Keyword)) found = (tk == arg);
494 else static if (is(typeof(arg) == Token.Type)) found = (tk.type == arg);
495 else static assert(0, "wtf?!");
496 if (found) {
497 // process `mode`
498 static if (mode != "peek") popFront();
499 // call delegate
500 enum xidx = aidx+1;
501 mixin(CallDg);
503 } else {
504 // default
505 // process `mode`
506 static if (mode == "pop") popFront();
507 // call delegate
508 enum xidx = aidx;
509 mixin(CallDg);
513 error(tk, "selectN is out of nodes");
514 assert(0);
517 static:
518 private immutable byte[256] digitValues = {
519 byte[256] res = -1;
520 foreach (ubyte idx; '0'..'9'+1) res[idx] = cast(byte)(idx-'0');
521 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = cast(byte)(idx-'A'+10);
522 foreach (ubyte idx; 'a'..'z'+1) res[idx] = cast(byte)(idx-'a'+10);
523 return res;
524 }();
526 private immutable bool[256] idStartChars = {
527 bool[256] res = false;
528 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = true;
529 foreach (ubyte idx; 'a'..'z'+1) res[idx] = true;
530 res['_'] = true;
531 return res;
532 }();
534 private immutable bool[256] idChars = {
535 bool[256] res = false;
536 foreach (ubyte idx; '0'..'9'+1) res[idx] = true;
537 foreach (ubyte idx; 'A'..'Z'+1) res[idx] = true;
538 foreach (ubyte idx; 'a'..'z'+1) res[idx] = true;
539 res['_'] = true;
540 return res;
541 }();
543 bool isDigit() (char ch) { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
544 int digitValue() (char ch) { pragma(inline, true); return digitValues.ptr[cast(ubyte)ch]; }
545 bool isIdStart() (char ch) { pragma(inline, true); return idStartChars.ptr[cast(ubyte)ch]; }
546 bool isIdChar() (char ch) { pragma(inline, true); return idChars.ptr[cast(ubyte)ch]; }
548 string gmlQuote (const(char)[] s) {
549 import std.array : appender;
550 auto res = appender!string();
551 enum Prev { Nothing, Char, Spec }
552 Prev prev = Prev.Nothing;
553 foreach (char ch; s) {
554 if (ch < ' ' || ch == 127 || ch == '"') {
555 import std.conv : to;
556 final switch (prev) with (Prev) {
557 case Nothing: break;
558 case Char: res.put(`"+`); break;
559 case Spec: res.put(`+`); break;
561 prev = Prev.Spec;
562 res.put("chr(");
563 res.put(to!string(cast(uint)ch));
564 res.put(")");
565 } else {
566 final switch (prev) with (Prev) {
567 case Nothing: res.put('"'); break;
568 case Char: break;
569 case Spec: res.put(`+"`); break;
571 prev = Prev.Char;
572 res.put(ch);
575 if (prev == Prev.Nothing) return `""`;
576 if (prev == Prev.Char) res.put('"');
577 return res.data;
580 /// quote string: append double quotes, screen all special chars;
581 /// so quoted string forms valid D string literal.
582 /// allocates.
583 string quote (const(char)[] s) {
584 import std.array : appender;
585 import std.format : formatElement, FormatSpec;
586 auto res = appender!string();
587 FormatSpec!char fspc; // defaults to 's'
588 formatElement(res, s, fspc);
589 return res.data;
594 version(gml_lexer_test) unittest {
595 import std.file;
596 import std.stdio;
597 auto s = readText("scrDrawHUD.gml");
598 auto lex = new Lexer(s, "scrDrawHUD.gml");
599 try {
600 while (!lex.empty) {
601 //if (lex == Keyword.RCurly) writeln("*******************");
602 auto v = lex.select!(int, "pop")(
603 Keyword.LCurly, (ref Token tk) => 1,
604 Keyword.RCurly, (Keyword kw) => 2,
605 Keyword.Semi, () => 6,
606 Keyword.Sub, (Loc loc) => 99,
607 Token.Type.Num, (ref Token tk) => 3,
608 (ref Token tk) => writeln(tk),
610 if (v) writeln("*** ", v);
611 //writeln(v, ": ", lex.front);
612 //lex.popFront();
614 } catch (ErrorAt e) {
615 writeln("PARSE ERROR: ", e.line);
616 writeln(e.loc);