2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 module gmlparser
.lexer
is aliced
;
20 import gmlparser
.tokens
;
23 // ////////////////////////////////////////////////////////////////////////// //
29 string
toString () const { import std
.string
: format
; return "%s (%s,%s)".format(file
, line
, col
); }
30 string
toStringNoFile () const { import std
.string
: format
; return "(%s,%s)".format(line
, col
); }
34 // ////////////////////////////////////////////////////////////////////////// //
35 public class ErrorAt
: Exception
{
38 this (string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { super(msg
, file
, line
, next
); }
39 this (in Loc aloc
, string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { loc
= aloc
; super(msg
, file
, line
, next
); }
43 // ////////////////////////////////////////////////////////////////////////// //
59 Loc loc
, eloc
; // token start, token end (after last char)
60 Type type
= Type
.EOF
; // token type
67 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
69 if (type
!= tp
) throw new ErrorAt(loc
, msg
, null, file
, line
);
71 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Id
, msg
, file
, line
); }
72 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Str
, msg
, file
, line
); }
73 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Num
, msg
, file
, line
); }
75 string
toString () const @trusted {
76 import std
.string
: format
;
77 final switch (type
) with (Type
) {
78 case EOF
: return "(%s,%d): <EOF>".format(loc
.line
, loc
.col
);
79 case Kw
: return "(%s,%d): kw.%s <%s>".format(loc
.line
, loc
.col
, kw
, tkstr
);
80 case Id
: return "(%s,%d): Id:%s".format(loc
.line
, loc
.col
, tkstr
);
81 case Str
: return "(%s,%d): Str:%s".format(loc
.line
, loc
.col
, Lexer
.quote(tkstr
));
82 case Num
: return "(%s,%d): Num:%s".format(loc
.line
, loc
.col
, num
);
83 case Spec
: return "(%s,%d): Spec:<%s>".format(loc
.line
, loc
.col
, tkstr
);
89 // get immutable string
90 // this converts id to `string` via `.idup`, use with caution!
91 // `.idup` is used to not anchor the whole source string
92 @property string
istr () { pragma(inline
, true); return (tkstr
.length ? tkstr
.idup
: null); }
94 const pure nothrow @nogc:
95 bool opEquals (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
96 bool isKw (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
97 bool isKw () { pragma(inline
, true); return (type
== Type
.Kw
); }
100 const(char)[] str () { pragma(inline
, true); return tkstr
; }
101 Keyword
Kw () { pragma(inline
, true); return (type
== Type
.Kw ? kw
: Keyword
.NoKW
); }
102 bool isId () { pragma(inline
, true); return (type
== Type
.Id
); }
103 bool isStr () { pragma(inline
, true); return (type
== Type
.Str
); }
104 bool isNum () { pragma(inline
, true); return (type
== Type
.Num
); }
105 bool isSpec () { pragma(inline
, true); return (type
== Type
.Spec
); }
106 bool isEOF () { pragma(inline
, true); return (type
== Type
.EOF
); }
110 // ////////////////////////////////////////////////////////////////////////// //
111 public final class Lexer
{
115 Loc cpos
; // position for last `getChar()`
116 Loc pend
; // end of previous token, for better error messages
118 bool lastWasEOL
= true;
120 Token tokeof
; // will be fixed by `nextToken()`
123 this(T
) (const(char)[] atext
, T afname
=null) if (is(T
: const(char)[])) {
125 if (afname
.length
> 0) { static if (is(T
== string
)) cpos
.file
= afname
; else cpos
.file
= afname
.idup
; }
126 tokeof
.loc
.file
= cpos
.file
;
133 void error (string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
134 pragma(inline
, true);
135 throw new ErrorAt((lookup
.length
== 0 ? loc
: lookup
[0].loc
), msg
, null, file
, line
);
138 static private void error (in ref Token tk
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
139 pragma(inline
, true);
140 throw new ErrorAt(tk
.loc
, msg
, null, file
, line
);
143 static private void error() (in auto ref Loc loc
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
144 pragma(inline
, true);
145 throw new ErrorAt(loc
, msg
, null, file
, line
);
148 const(char)[] line (uint idx
) {
152 while (pos
< text
.length
&& text
.ptr
[pos
] != '\n') ++pos
;
155 if (pos
>= text
.length
) return null;
157 while (epos
< text
.length
&& text
.ptr
[epos
] != '\n') ++epos
;
158 while (epos
> pos
&& text
.ptr
[epos
-1] <= ' ') --epos
;
159 return text
[pos
..epos
];
163 if (lookup
.length
> 0) {
164 pend
= lookup
.ptr
[0].eloc
;
165 ++pend
.col
; // for better error messages
166 ++pend
.tpos
; // to be consistent
167 foreach (immutable idx
; 1..lookup
.length
) lookup
.ptr
[idx
-1] = lookup
.ptr
[idx
];
169 lookup
.assumeSafeAppend
;
174 @property pure nothrow @safe @nogc {
175 bool empty () const { pragma(inline
, true); return (lookup
.length
== 0); }
176 ref inout(Token
) front () inout { pragma(inline
, true); return (lookup
.length ? lookup
.ptr
[0] : tokeof
); }
177 // current token's loc
178 auto loc () const { pragma(inline
, true); return front
.loc
; }
179 auto eloc () const { pragma(inline
, true); return front
.eloc
; }
180 auto peloc () const { pragma(inline
, true); return pend
; }
182 bool isId () const { pragma(inline
, true); return front
.isId
; }
183 bool isStr () const { pragma(inline
, true); return front
.isStr
; }
184 bool isNum () const { pragma(inline
, true); return front
.isNum
; }
185 bool isSpec () const { pragma(inline
, true); return front
.isSpec
; }
188 bool isKw (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(kw
); }
189 bool isKw () const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(); }
191 bool opEquals (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return (front
== kw
); }
194 void expect (Keyword kw
, string file
=__FILE__
, usize line
=__LINE__
) {
195 if (!front
.isKw(kw
)) error(loc
, "`"~keywordtext(kw
)~"` expected", file
, line
);
199 // this converts id to `string` via `.idup`, use with caution!
200 // `.idup` is used to not anchor the whole source string
201 string
expectId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
202 mustbeId(msg
, file
, line
);
203 auto res
= lookup
[0].istr
;
208 // this converts id to `string` via `.idup`, use with caution!
209 // `.idup` is used to not anchor the whole source string
210 string
expectStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) {
211 //pragma(inline, true);
212 mustbeStr(msg
, file
, line
);
213 auto res
= lookup
[0].istr
;
218 // `mustbe` doesn't eat token
219 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeType(tp
, msg
, file
, line
); }
220 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeId(msg
, file
, line
); }
221 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeStr(msg
, file
, line
); }
222 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeNum(msg
, file
, line
); }
224 bool eatKw (Keyword kw
) {
225 if (!isKw(kw
)) return false;
230 ref Token
peek (uint dist
) {
231 while (!eof
&& lookup
.length
<= dist
) nextToken();
232 return (dist
< lookup
.length ? lookup
.ptr
[dist
] : tokeof
);
235 ref Token
opIndex (usize dist
) { pragma(inline
, true); return peek(dist
); }
237 // return loc for next `getChar()`
238 Loc
nextLoc () nothrow @safe @nogc {
240 if (lastWasEOL
) { ++res
.line
; res
.col
= 1; } else ++res
.col
;
244 char peekChar (uint dist
=0) nothrow @trusted @nogc {
245 pragma(inline
, true);
246 return (tpos
+dist
>= text
.length ?
'\0' : (text
.ptr
[tpos
+dist
] ? text
.ptr
[tpos
+dist
] : ' '));
250 char getChar () nothrow @trusted @nogc {
251 if (tpos
>= text
.length
) { tpos
= text
.length
; eof
= true; }
252 if (eof
) return '\0';
254 char ch
= text
.ptr
[tpos
++];
255 if (ch
== '\0') ch
= ' ';
256 if (lastWasEOL
) { ++cpos
.line
; cpos
.col
= 1; } else ++cpos
.col
;
257 lastWasEOL
= (ch
== '\n');
261 // skip blanks and comments
262 //TODO: make special "comment" token(s)?
263 void skipBlanks () @safe {
267 switch (peekChar(1)) {
268 case '/': // single-line comment
269 do { ch
= getChar(); } while (ch
!= 0 && ch
!= '\n');
271 case '*': // multiline comment
272 getChar(); // skip slash
274 getChar(); // skip star
276 ch
= ' '; // we need this
280 if (ch
== 0) error(lc
, "unterminated comment");
281 if (ch
== '/' && pch
== '*') break;
287 if (ch
== 0 || ch
> 32) return;
292 private void nextToken () {
296 if (peekChar
== '\0') {
300 //++tokeof.eloc.col; // for better error messages
301 //++tokeof.eloc.tpos; // to be consistent
311 if (ch
== '"' || ch
== '\'') {
313 tk
.type
= Token
.Type
.Str
;
314 ++tkspos
; // skip quote
317 if (ch
== 0) error(tk
, "unterminated string");
318 if (ch
== ech
) break;
320 tk
.tkstr
= text
[tkspos
..tpos
-1]; // -1 due to eaten quote
322 //++tk.eloc.col; // for better error messages
323 //++tk.eloc.tpos; // to be consistent
331 tk
.type
= Token
.Type
.Num
;
332 getChar(); // skip dollar
333 int dv
= digitValue(peekChar
);
334 if (dv
< 0 || dv
> 15) error(tk
, "hex number expected");
336 dv
= digitValue(peekChar
);
337 if (dv
< 0 || dv
> 15) break;
342 if (isIdChar(ch
) || ch
== '.') error(tk
, "hex number expected");
344 tk
.tkstr
= text
[tkspos
..tpos
];
346 //++tk.eloc.col; // for better error messages
347 //++tk.eloc.tpos; // to be consistent
353 if (isDigit(ch
) ||
(ch
== '.' && isDigit(peekChar
))) {
355 tk
.type
= Token
.Type
.Num
;
356 if (ch
!= '.') n
= ch
-'0';
360 if (!isDigit(peekChar
)) break;
364 if (peekChar
== '.') ch
= getChar();
368 if (!isDigit(peekChar
)) error(tk
, "real number expected");
371 if (!isDigit(peekChar
)) break;
377 if (peekChar
== 'e' || peekChar
== 'E') {
381 if (peekChar
== '+') getChar(); else if (peekChar
== '-') { getChar(); neg = true; }
382 if (!isDigit(peekChar
)) error(tk
, "invalid number");
384 while (isDigit(peekChar
)) {
387 if (e
< 0) error(tk
, "invalid number (exponent overflow)");
389 //{ import std.conv : to; assert(0, to!string(e)); }
391 while (e
-- > 0) n
= n
/10;
393 while (e
-- > 0) n
= n
*10;
397 tk
.tkstr
= text
[tkspos
..tpos
];
399 //++tk.eloc.col; // for better error messages
400 //++tk.eloc.tpos; // to be consistent
402 if (isIdChar(ch
) || ch
== '.') error(tk
, "invalid number");
409 tk
.type
= Token
.Type
.Id
;
410 while (isIdChar(peekChar
)) getChar();
411 tk
.tkstr
= text
[tkspos
..tpos
];
413 //++tk.eloc.col; // for better error messages
414 //++tk.eloc.tpos; // to be consistent
415 if (auto kw
= tk
.tkstr
in keywords
) {
416 tk
.type
= Token
.Type
.Kw
;
426 if (auto xkw
= dbuf
[0..1] in keywords
) {
427 tk
.type
= Token
.Type
.Kw
;
429 foreach (uint dpos
; 1..dbuf
.length
) {
430 dbuf
[dpos
] = peekChar
;
431 if (auto kw
= dbuf
[0..dpos
+1] in keywords
) {
432 tk
.type
= Token
.Type
.Kw
;
434 getChar(); // eat token char
440 tk
.type
= Token
.Type
.Spec
;
442 tk
.tkstr
= text
[tkspos
..tpos
];
444 //++tk.eloc.col; // for better error messages
445 //++tk.eloc.tpos; // to be consistent
449 auto select(RetType
, string mode
="peek", A
...) (scope A args
) { pragma(inline
, true); return selectN
!(RetType
, mode
)(0, args
); }
451 auto selectN(RetType
, string mode
="peek", A
...) (usize n
, scope A args
) {
452 import std
.traits
: ReturnType
;
454 static assert(mode
== "peek" || mode
== "pop" || mode
== "pop-nondefault", "selectN: invalid mode: '"~mode
~"'");
456 template isGoodDg(usize idx
, T
) {
457 private import std
.traits
;
458 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 1) {
459 enum isGoodDg
= is(Parameters
!(A
[idx
])[0] == T
);
461 enum isGoodDg
= false;
465 template isGoodArglessDg(usize idx
) {
466 private import std
.traits
;
467 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 0) {
468 enum isGoodArglessDg
= true;
470 enum isGoodArglessDg
= false;
474 // sorry, but this has to be string mixin, due to possible empty `arg`
475 enum DoCallDg(string arg
) =
476 "static if (!is(ReturnType!(A[xidx]) == void)) return cast(RetType)(args[xidx]("~arg
~")); else { args[xidx]("~arg
~"); return RetType.init; }";
478 // we can't have inner mixin templates, so... sorry, it's string again
480 static if (isGoodDg
!(xidx
, Token
)) { mixin(DoCallDg
!"tk"); }
481 else static if (isGoodDg
!(xidx
, Loc
)) { mixin(DoCallDg
!"tk.loc"); }
482 else static if (isGoodDg
!(xidx
, Token
.Type
)) { mixin(DoCallDg
!"tk.type"); }
483 else static if (isGoodDg
!(xidx
, Keyword
)) { mixin(DoCallDg
!"tk.Kw"); }
484 else static if (isGoodArglessDg
!(xidx
)) { mixin(DoCallDg
!""); }
485 else static assert(0, "selectN: invalid delegate #"~xidx
.stringof
);
490 foreach (immutable aidx
, auto arg
; args
) {
491 static if (aidx
%2 == 0) {
492 static if (is(typeof(arg
) == Keyword
) ||
is(typeof(arg
) == Token
.Type
)) {
493 static if (is(typeof(arg
) == Keyword
)) found
= (tk
== arg
);
494 else static if (is(typeof(arg
) == Token
.Type
)) found
= (tk
.type
== arg
);
495 else static assert(0, "wtf?!");
498 static if (mode
!= "peek") popFront();
506 static if (mode
== "pop") popFront();
513 error(tk
, "selectN is out of nodes");
518 private immutable byte[256] digitValues
= {
520 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = cast(byte)(idx
-'0');
521 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = cast(byte)(idx
-'A'+10);
522 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = cast(byte)(idx
-'a'+10);
526 private immutable bool[256] idStartChars
= {
527 bool[256] res
= false;
528 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
529 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
534 private immutable bool[256] idChars
= {
535 bool[256] res
= false;
536 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = true;
537 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
538 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
543 bool isDigit() (char ch
) { pragma(inline
, true); return (ch
>= '0' && ch
<= '9'); }
544 int digitValue() (char ch
) { pragma(inline
, true); return digitValues
.ptr
[cast(ubyte)ch
]; }
545 bool isIdStart() (char ch
) { pragma(inline
, true); return idStartChars
.ptr
[cast(ubyte)ch
]; }
546 bool isIdChar() (char ch
) { pragma(inline
, true); return idChars
.ptr
[cast(ubyte)ch
]; }
548 string
gmlQuote (const(char)[] s
) {
549 import std
.array
: appender
;
550 auto res
= appender
!string();
551 enum Prev
{ Nothing
, Char
, Spec
}
552 Prev prev
= Prev
.Nothing
;
553 foreach (char ch
; s
) {
554 if (ch
< ' ' || ch
== 127 || ch
== '"') {
555 import std
.conv
: to
;
556 final switch (prev
) with (Prev
) {
558 case Char
: res
.put(`"+`); break;
559 case Spec
: res
.put(`+`); break;
563 res
.put(to
!string(cast(uint)ch
));
566 final switch (prev
) with (Prev
) {
567 case Nothing
: res
.put('"'); break;
569 case Spec
: res
.put(`+"`); break;
575 if (prev
== Prev
.Nothing
) return `""`;
576 if (prev
== Prev
.Char
) res
.put('"');
580 /// quote string: append double quotes, screen all special chars;
581 /// so quoted string forms valid D string literal.
583 string
quote (const(char)[] s
) {
584 import std
.array
: appender
;
585 import std
.format
: formatElement
, FormatSpec
;
586 auto res
= appender
!string();
587 FormatSpec
!char fspc
; // defaults to 's'
588 formatElement(res
, s
, fspc
);
594 version(gml_lexer_test
) unittest {
597 auto s
= readText("scrDrawHUD.gml");
598 auto lex
= new Lexer(s
, "scrDrawHUD.gml");
601 //if (lex == Keyword.RCurly) writeln("*******************");
602 auto v
= lex
.select
!(int, "pop")(
603 Keyword
.LCurly
, (ref Token tk
) => 1,
604 Keyword
.RCurly
, (Keyword kw
) => 2,
605 Keyword
.Semi
, () => 6,
606 Keyword
.Sub
, (Loc loc
) => 99,
607 Token
.Type
.Num
, (ref Token tk
) => 3,
608 (ref Token tk
) => writeln(tk
),
610 if (v
) writeln("*** ", v
);
611 //writeln(v, ": ", lex.front);
614 } catch (ErrorAt e
) {
615 writeln("PARSE ERROR: ", e
.line
);