2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 module gmlparser
.lexer
is aliced
;
20 import gmlparser
.tokens
;
23 // ////////////////////////////////////////////////////////////////////////// //
29 string
toString () const { import std
.string
: format
; return "%s (%s,%s)".format(file
, line
, col
); }
30 string
toStringNoFile () const { import std
.string
: format
; return "(%s,%s)".format(line
, col
); }
32 @property bool valid () const pure nothrow @safe @nogc { pragma(inline
, true); return (line
> 0 && col
> 0); }
36 // ////////////////////////////////////////////////////////////////////////// //
37 public class ErrorAt
: Exception
{
40 this (string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { super(msg
, file
, line
, next
); }
41 this (in Loc aloc
, string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { loc
= aloc
; super(msg
, file
, line
, next
); }
45 // ////////////////////////////////////////////////////////////////////////// //
61 Loc loc
, eloc
; // token start, token end (after last char)
62 Type type
= Type
.EOF
; // token type
69 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
71 if (type
!= tp
) throw new ErrorAt(loc
, msg
, null, file
, line
);
73 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Id
, msg
, file
, line
); }
74 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Str
, msg
, file
, line
); }
75 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Num
, msg
, file
, line
); }
77 string
toString () const @trusted {
78 import std
.string
: format
;
79 final switch (type
) with (Type
) {
80 case EOF
: return "(%s,%d): <EOF>".format(loc
.line
, loc
.col
);
81 case Kw
: return "(%s,%d): kw.%s <%s>".format(loc
.line
, loc
.col
, kw
, tkstr
);
82 case Id
: return "(%s,%d): Id:%s".format(loc
.line
, loc
.col
, tkstr
);
83 case Str
: return "(%s,%d): Str:%s".format(loc
.line
, loc
.col
, Lexer
.quote(tkstr
));
84 case Num
: return "(%s,%d): Num:%s".format(loc
.line
, loc
.col
, num
);
85 case Spec
: return "(%s,%d): Spec:<%s>".format(loc
.line
, loc
.col
, tkstr
);
91 // get immutable string
92 // this converts id to `string` via `.idup`, use with caution!
93 // `.idup` is used to not anchor the whole source string
94 @property string
istr () { pragma(inline
, true); return (tkstr
.length ? tkstr
.idup
: null); }
96 const pure nothrow @nogc:
97 bool opEquals (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
98 bool isKw (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
99 bool isKw () { pragma(inline
, true); return (type
== Type
.Kw
); }
102 const(char)[] str () { pragma(inline
, true); return tkstr
; }
103 Keyword
Kw () { pragma(inline
, true); return (type
== Type
.Kw ? kw
: Keyword
.NoKW
); }
104 bool isId () { pragma(inline
, true); return (type
== Type
.Id
); }
105 bool isStr () { pragma(inline
, true); return (type
== Type
.Str
); }
106 bool isNum () { pragma(inline
, true); return (type
== Type
.Num
); }
107 bool isSpec () { pragma(inline
, true); return (type
== Type
.Spec
); }
108 bool isEOF () { pragma(inline
, true); return (type
== Type
.EOF
); }
112 // ////////////////////////////////////////////////////////////////////////// //
113 public final class Lexer
{
117 Loc cpos
; // position for last `getChar()`
118 Loc pend
; // end of previous token, for better error messages
120 bool lastWasEOL
= true;
122 Token tokeof
; // will be fixed by `nextToken()`
125 this(T
) (const(char)[] atext
, T afname
=null) if (is(T
: const(char)[])) {
127 if (afname
.length
> 0) { static if (is(T
== string
)) cpos
.file
= afname
; else cpos
.file
= afname
.idup
; }
128 tokeof
.loc
.file
= cpos
.file
;
135 void error (string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
136 pragma(inline
, true);
137 throw new ErrorAt((lookup
.length
== 0 ? loc
: lookup
[0].loc
), msg
, null, file
, line
);
140 static private void error (in ref Token tk
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
141 pragma(inline
, true);
142 throw new ErrorAt(tk
.loc
, msg
, null, file
, line
);
145 static private void error() (in auto ref Loc loc
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
146 pragma(inline
, true);
147 throw new ErrorAt(loc
, msg
, null, file
, line
);
150 const(char)[] line (uint idx
) {
154 while (pos
< text
.length
&& text
.ptr
[pos
] != '\n') ++pos
;
157 if (pos
>= text
.length
) return null;
159 while (epos
< text
.length
&& text
.ptr
[epos
] != '\n') ++epos
;
160 while (epos
> pos
&& text
.ptr
[epos
-1] <= ' ') --epos
;
161 return text
[pos
..epos
];
165 if (lookup
.length
> 0) {
166 pend
= lookup
.ptr
[0].eloc
;
167 ++pend
.col
; // for better error messages
168 ++pend
.tpos
; // to be consistent
169 foreach (immutable idx
; 1..lookup
.length
) lookup
.ptr
[idx
-1] = lookup
.ptr
[idx
];
171 lookup
.assumeSafeAppend
;
176 @property pure nothrow @safe @nogc {
177 bool empty () const { pragma(inline
, true); return (lookup
.length
== 0); }
178 ref inout(Token
) front () inout { pragma(inline
, true); return (lookup
.length ? lookup
.ptr
[0] : tokeof
); }
179 // current token's loc
180 auto loc () const { pragma(inline
, true); return front
.loc
; }
181 auto eloc () const { pragma(inline
, true); return front
.eloc
; }
182 auto peloc () const { pragma(inline
, true); return pend
; }
184 bool isId () const { pragma(inline
, true); return front
.isId
; }
185 bool isStr () const { pragma(inline
, true); return front
.isStr
; }
186 bool isNum () const { pragma(inline
, true); return front
.isNum
; }
187 bool isSpec () const { pragma(inline
, true); return front
.isSpec
; }
190 bool isKw (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(kw
); }
191 bool isKw () const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(); }
193 bool opEquals (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return (front
== kw
); }
196 void expect (Keyword kw
, string file
=__FILE__
, usize line
=__LINE__
) {
197 if (!front
.isKw(kw
)) error(loc
, "`"~keywordtext(kw
)~"` expected", file
, line
);
201 // this converts id to `string` via `.idup`, use with caution!
202 // `.idup` is used to not anchor the whole source string
203 string
expectId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
204 mustbeId(msg
, file
, line
);
205 auto res
= lookup
[0].istr
;
210 // this converts id to `string` via `.idup`, use with caution!
211 // `.idup` is used to not anchor the whole source string
212 string
expectStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) {
213 //pragma(inline, true);
214 mustbeStr(msg
, file
, line
);
215 auto res
= lookup
[0].istr
;
220 // `mustbe` doesn't eat token
221 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeType(tp
, msg
, file
, line
); }
222 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeId(msg
, file
, line
); }
223 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeStr(msg
, file
, line
); }
224 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeNum(msg
, file
, line
); }
226 bool eatKw (Keyword kw
) {
227 if (!isKw(kw
)) return false;
232 ref Token
peek (uint dist
) {
233 while (!eof
&& lookup
.length
<= dist
) nextToken();
234 return (dist
< lookup
.length ? lookup
.ptr
[dist
] : tokeof
);
237 ref Token
opIndex (usize dist
) { pragma(inline
, true); return peek(dist
); }
239 // return loc for next `getChar()`
240 Loc
nextLoc () nothrow @safe @nogc {
242 if (lastWasEOL
) { ++res
.line
; res
.col
= 1; } else ++res
.col
;
246 char peekChar (uint dist
=0) nothrow @trusted @nogc {
247 pragma(inline
, true);
248 return (tpos
+dist
>= text
.length ?
'\0' : (text
.ptr
[tpos
+dist
] ? text
.ptr
[tpos
+dist
] : ' '));
252 char getChar () nothrow @trusted @nogc {
253 if (tpos
>= text
.length
) { tpos
= text
.length
; eof
= true; }
254 if (eof
) return '\0';
256 char ch
= text
.ptr
[tpos
++];
257 if (ch
== '\0') ch
= ' ';
258 if (lastWasEOL
) { ++cpos
.line
; cpos
.col
= 1; } else ++cpos
.col
;
259 lastWasEOL
= (ch
== '\n');
263 // skip blanks and comments
264 //TODO: make special "comment" token(s)?
265 void skipBlanks () @safe {
269 switch (peekChar(1)) {
270 case '/': // single-line comment
271 do { ch
= getChar(); } while (ch
!= 0 && ch
!= '\n');
273 case '*': // multiline comment
274 getChar(); // skip slash
276 getChar(); // skip star
278 ch
= ' '; // we need this
282 if (ch
== 0) error(lc
, "unterminated comment");
283 if (ch
== '/' && pch
== '*') break;
289 if (ch
== 0 || ch
> 32) return;
294 private void nextToken () {
298 if (peekChar
== '\0') {
302 //++tokeof.eloc.col; // for better error messages
303 //++tokeof.eloc.tpos; // to be consistent
313 if (ch
== '"' || ch
== '\'') {
315 tk
.type
= Token
.Type
.Str
;
316 ++tkspos
; // skip quote
319 if (ch
== 0) error(tk
, "unterminated string");
320 if (ch
== ech
) break;
322 tk
.tkstr
= text
[tkspos
..tpos
-1]; // -1 due to eaten quote
324 //++tk.eloc.col; // for better error messages
325 //++tk.eloc.tpos; // to be consistent
333 tk
.type
= Token
.Type
.Num
;
334 getChar(); // skip dollar
335 int dv
= digitValue(peekChar
);
336 if (dv
< 0 || dv
> 15) error(tk
, "hex number expected");
338 dv
= digitValue(peekChar
);
339 if (dv
< 0 || dv
> 15) break;
344 if (isIdChar(ch
) || ch
== '.') error(tk
, "hex number expected");
346 tk
.tkstr
= text
[tkspos
..tpos
];
348 //++tk.eloc.col; // for better error messages
349 //++tk.eloc.tpos; // to be consistent
355 if (isDigit(ch
) ||
(ch
== '.' && isDigit(peekChar
))) {
357 tk
.type
= Token
.Type
.Num
;
358 if (ch
!= '.') n
= ch
-'0';
362 if (!isDigit(peekChar
)) break;
366 if (peekChar
== '.') ch
= getChar();
370 if (!isDigit(peekChar
)) error(tk
, "real number expected");
373 if (!isDigit(peekChar
)) break;
379 if (peekChar
== 'e' || peekChar
== 'E') {
383 if (peekChar
== '+') getChar(); else if (peekChar
== '-') { getChar(); neg = true; }
384 if (!isDigit(peekChar
)) error(tk
, "invalid number");
386 while (isDigit(peekChar
)) {
389 if (e
< 0) error(tk
, "invalid number (exponent overflow)");
391 //{ import std.conv : to; assert(0, to!string(e)); }
393 while (e
-- > 0) n
= n
/10;
395 while (e
-- > 0) n
= n
*10;
399 tk
.tkstr
= text
[tkspos
..tpos
];
401 //++tk.eloc.col; // for better error messages
402 //++tk.eloc.tpos; // to be consistent
404 if (isIdChar(ch
) || ch
== '.') error(tk
, "invalid number");
411 tk
.type
= Token
.Type
.Id
;
412 while (isIdChar(peekChar
)) getChar();
413 tk
.tkstr
= text
[tkspos
..tpos
];
415 //++tk.eloc.col; // for better error messages
416 //++tk.eloc.tpos; // to be consistent
417 if (auto kw
= tk
.tkstr
in keywords
) {
418 tk
.type
= Token
.Type
.Kw
;
428 if (auto xkw
= dbuf
[0..1] in keywords
) {
429 tk
.type
= Token
.Type
.Kw
;
431 foreach (uint dpos
; 1..dbuf
.length
) {
432 dbuf
[dpos
] = peekChar
;
433 if (auto kw
= dbuf
[0..dpos
+1] in keywords
) {
434 tk
.type
= Token
.Type
.Kw
;
436 getChar(); // eat token char
442 tk
.type
= Token
.Type
.Spec
;
444 tk
.tkstr
= text
[tkspos
..tpos
];
446 //++tk.eloc.col; // for better error messages
447 //++tk.eloc.tpos; // to be consistent
451 auto select(RetType
, string mode
="peek", A
...) (scope A args
) { pragma(inline
, true); return selectN
!(RetType
, mode
)(0, args
); }
453 auto selectN(RetType
, string mode
="peek", A
...) (usize n
, scope A args
) {
454 import std
.traits
: ReturnType
;
456 static assert(mode
== "peek" || mode
== "pop" || mode
== "pop-nondefault", "selectN: invalid mode: '"~mode
~"'");
458 template isGoodDg(usize idx
, T
) {
459 private import std
.traits
;
460 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 1) {
461 enum isGoodDg
= is(Parameters
!(A
[idx
])[0] == T
);
463 enum isGoodDg
= false;
467 template isGoodArglessDg(usize idx
) {
468 private import std
.traits
;
469 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 0) {
470 enum isGoodArglessDg
= true;
472 enum isGoodArglessDg
= false;
476 // sorry, but this has to be string mixin, due to possible empty `arg`
477 enum DoCallDg(string arg
) =
478 "static if (!is(ReturnType!(A[xidx]) == void)) return cast(RetType)(args[xidx]("~arg
~")); else { args[xidx]("~arg
~"); return RetType.init; }";
480 // we can't have inner mixin templates, so... sorry, it's string again
482 static if (isGoodDg
!(xidx
, Token
)) { mixin(DoCallDg
!"tk"); }
483 else static if (isGoodDg
!(xidx
, Loc
)) { mixin(DoCallDg
!"tk.loc"); }
484 else static if (isGoodDg
!(xidx
, Token
.Type
)) { mixin(DoCallDg
!"tk.type"); }
485 else static if (isGoodDg
!(xidx
, Keyword
)) { mixin(DoCallDg
!"tk.Kw"); }
486 else static if (isGoodArglessDg
!(xidx
)) { mixin(DoCallDg
!""); }
487 else static assert(0, "selectN: invalid delegate #"~xidx
.stringof
);
492 foreach (immutable aidx
, auto arg
; args
) {
493 static if (aidx
%2 == 0) {
494 static if (is(typeof(arg
) == Keyword
) ||
is(typeof(arg
) == Token
.Type
)) {
495 static if (is(typeof(arg
) == Keyword
)) found
= (tk
== arg
);
496 else static if (is(typeof(arg
) == Token
.Type
)) found
= (tk
.type
== arg
);
497 else static assert(0, "wtf?!");
500 static if (mode
!= "peek") popFront();
508 static if (mode
== "pop") popFront();
515 error(tk
, "selectN is out of nodes");
520 private immutable byte[256] digitValues
= {
522 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = cast(byte)(idx
-'0');
523 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = cast(byte)(idx
-'A'+10);
524 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = cast(byte)(idx
-'a'+10);
528 private immutable bool[256] idStartChars
= {
529 bool[256] res
= false;
530 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
531 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
536 private immutable bool[256] idChars
= {
537 bool[256] res
= false;
538 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = true;
539 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
540 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
545 bool isDigit() (char ch
) { pragma(inline
, true); return (ch
>= '0' && ch
<= '9'); }
546 int digitValue() (char ch
) { pragma(inline
, true); return digitValues
.ptr
[cast(ubyte)ch
]; }
547 bool isIdStart() (char ch
) { pragma(inline
, true); return idStartChars
.ptr
[cast(ubyte)ch
]; }
548 bool isIdChar() (char ch
) { pragma(inline
, true); return idChars
.ptr
[cast(ubyte)ch
]; }
550 string
gmlQuote (const(char)[] s
) {
551 import std
.array
: appender
;
552 auto res
= appender
!string();
553 enum Prev
{ Nothing
, Char
, Spec
}
554 Prev prev
= Prev
.Nothing
;
555 foreach (char ch
; s
) {
556 if (ch
< ' ' || ch
== 127 || ch
== '"') {
557 import std
.conv
: to
;
558 final switch (prev
) with (Prev
) {
560 case Char
: res
.put(`"+`); break;
561 case Spec
: res
.put(`+`); break;
565 res
.put(to
!string(cast(uint)ch
));
568 final switch (prev
) with (Prev
) {
569 case Nothing
: res
.put('"'); break;
571 case Spec
: res
.put(`+"`); break;
577 if (prev
== Prev
.Nothing
) return `""`;
578 if (prev
== Prev
.Char
) res
.put('"');
582 /// quote string: append double quotes, screen all special chars;
583 /// so quoted string forms valid D string literal.
585 string
quote (const(char)[] s
) {
586 import std
.array
: appender
;
587 import std
.format
: formatElement
, FormatSpec
;
588 auto res
= appender
!string();
589 FormatSpec
!char fspc
; // defaults to 's'
590 formatElement(res
, s
, fspc
);
596 version(gml_lexer_test
) unittest {
599 auto s
= readText("scrDrawHUD.gml");
600 auto lex
= new Lexer(s
, "scrDrawHUD.gml");
603 //if (lex == Keyword.RCurly) writeln("*******************");
604 auto v
= lex
.select
!(int, "pop")(
605 Keyword
.LCurly
, (ref Token tk
) => 1,
606 Keyword
.RCurly
, (Keyword kw
) => 2,
607 Keyword
.Semi
, () => 6,
608 Keyword
.Sub
, (Loc loc
) => 99,
609 Token
.Type
.Num
, (ref Token tk
) => 3,
610 (ref Token tk
) => writeln(tk
),
612 if (v
) writeln("*** ", v
);
613 //writeln(v, ": ", lex.front);
616 } catch (ErrorAt e
) {
617 writeln("PARSE ERROR: ", e
.line
);