2 Copyright (C) 2003 by Walter Schreppers
3 Copyright (C) 2004 by Cies Breijs
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of version 2 of the GNU General Public
7 License as published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 Lexer::Lexer(QTextIStream
& iStream
)
32 inputStream
= &iStream
;
36 translate
= new Translate();
42 skipSpaces(); // skips the white space that it quite likely (indentation) infront of the Token
45 currentToken
.type
= tokNotSet
; // not really needed
46 currentToken
.look
= "";
47 currentToken
.value
= 0;
48 currentToken
.start
.row
= row
;
49 currentToken
.start
.col
= col
;
51 QChar currentChar
= getChar();
53 if ( inputStream
->atEnd() )
55 kdDebug(0)<<"Lexer::lex(), got EOF."<<endl
;
56 currentToken
.type
= tokEOF
;
57 currentToken
.look
= "EOF";
58 ungetChar(currentChar
); // unget the currentChar and fix the row/col values
62 if (currentChar
== '#')
64 while ( !inputStream
->atEnd() && !(currentChar
== '\x0a' || currentChar
== '\n') )
65 currentChar
= getChar();
68 // if (currentChar.category() == QChar::Separator_Line) somehow doesnt work
69 if (currentChar
== '\x0a' || currentChar
== '\n')
71 currentToken
.type
= tokEOL
;
72 currentToken
.look
= "EOL";
74 else if (currentChar
.isLetter() || currentChar
== '[' || currentChar
== ']')
76 ungetChar(currentChar
);
77 // sets currentToken.look by reference, and set the currentToken.type to tokUnknown
78 currentToken
.type
= getWord(currentToken
.look
);
79 setTokenType(currentToken
); // gets the actual tokenType
81 else if ( currentChar
.isNumber() )
83 ungetChar(currentChar
);
84 // set currentToken.value/look by reference, and set the currentToken.type to tokNumber
85 currentToken
.type
= getNumber(currentToken
.value
, currentToken
.look
);
87 else if (currentChar
== '>')
89 currentChar
= getChar();
90 if (currentChar
== '=')
92 currentToken
.type
= tokGe
;
93 currentToken
.look
= ">=";
97 ungetChar(currentChar
);
98 currentToken
.type
= tokGt
;
99 currentToken
.look
= ">";
102 else if (currentChar
== '<')
104 currentChar
= getChar();
105 if ( currentChar
== '=' )
107 currentToken
.type
= tokLe
;
108 currentToken
.look
= "<=";
112 ungetChar(currentChar
);
113 currentToken
.type
= tokLt
;
114 currentToken
.look
= ">";
117 else if (currentChar
== '!')
119 currentChar
= getChar();
120 if (currentChar
== '=')
122 currentToken
.type
= tokNe
;
123 currentToken
.look
= "!=";
127 ungetChar(currentChar
);
128 currentToken
.type
= tokNot
;
129 currentToken
.look
= "!";
132 else if (currentChar
== '=')
134 currentChar
= getChar();
135 if (currentChar
== '=')
137 currentToken
.type
= tokEq
;
138 currentToken
.look
= "==";
142 ungetChar(currentChar
);
143 currentToken
.type
= tokAssign
;
144 currentToken
.look
= "=";
147 else if (currentChar
== '(')
149 currentToken
.type
= tokBraceOpen
;
150 currentToken
.look
= "(";
152 else if (currentChar
== ')')
154 currentToken
.type
= tokBraceClose
;
155 currentToken
.look
= ")";
157 else if (currentChar
== '+')
159 currentToken
.type
= tokPlus
;
160 currentToken
.look
= "+";
162 else if (currentChar
== '-')
164 currentToken
.type
= tokMinus
;
165 currentToken
.look
= "-";
167 else if (currentChar
== '*')
169 currentToken
.type
= tokMul
;
170 currentToken
.look
= "*";
172 else if (currentChar
== '/')
174 currentToken
.type
= tokDev
;
175 currentToken
.look
= "/";
177 else if (currentChar
== ',')
179 currentToken
.type
= tokComma
;
180 currentToken
.look
= ",";
182 else if (currentChar
== '"')
184 getString(currentToken
);
188 currentToken
.type
= tokUnknown
;
189 currentToken
.look
= currentChar
;
192 currentToken
.end
.row
= row
;
193 currentToken
.end
.col
= col
;
201 QChar
Lexer::getChar()
204 if ( !putBackChar
.isNull() )
206 c
= putBackChar
; // use the char that is stored to be put back
207 // kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
208 putBackChar
= QChar(); // and set putBackChar back to NULL
209 if (c
== '\x0a' || c
== '\n')
222 *inputStream
>> c
; // take a QChar of the inputStream
223 // kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
224 if (c
== '\x0a' || c
== '\n')
238 void Lexer::ungetChar(QChar c
)
240 if (c
== '\x0a' || c
== '\n')
250 // kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl;
253 int Lexer::getWord(QString
& word
)
255 // kdDebug(0)<<"Lexer::getWord()"<<endl;
256 QChar currentChar
= getChar();
257 if ( currentChar
.isLetter() || currentChar
== '[' || currentChar
== ']' ) {
258 while ( ( currentChar
.isLetterOrNumber() || currentChar
== '_' || currentChar
== '[' || currentChar
== ']' ) && !inputStream
->atEnd() )
261 currentChar
= getChar();
263 kdDebug(0)<<"Lexer::getWord(), got NAME: '"<<word
<<"'"<<endl
;
264 ungetChar(currentChar
); //read one too much
265 return tokUnknown
; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType
267 else return tokError
;
270 void Lexer::setTokenType(Token
& currentToken
)
272 if (currentToken
.type
== tokUnknown
)
274 // make lowercase copy of the word as it was found in the inputStream
275 QString k
= currentToken
.look
.lower();
276 // if the key is an alias translate that alias to a key
277 if ( !translate
->alias2key(k
).isEmpty() ) k
= translate
->alias2key(k
);
279 if (k
== translate
->name2key("begin") ) currentToken
.type
= tokBegin
;
280 else if (k
== translate
->name2key("end") ) currentToken
.type
= tokEnd
;
281 else if (k
== translate
->name2key("while") ) currentToken
.type
= tokWhile
;
282 else if (k
== translate
->name2key("if") ) currentToken
.type
= tokIf
;
283 else if (k
== translate
->name2key("else") ) currentToken
.type
= tokElse
;
284 else if (k
== translate
->name2key("for") ) currentToken
.type
= tokFor
;
285 else if (k
== translate
->name2key("to") ) currentToken
.type
= tokTo
;
286 else if (k
== translate
->name2key("step") ) currentToken
.type
= tokStep
;
287 else if (k
== translate
->name2key("and") ) currentToken
.type
= tokAnd
;
288 else if (k
== translate
->name2key("or") ) currentToken
.type
= tokOr
;
289 else if (k
== translate
->name2key("not") ) currentToken
.type
= tokNot
;
290 else if (k
== translate
->name2key("return") ) currentToken
.type
= tokReturn
;
291 else if (k
== translate
->name2key("break") ) currentToken
.type
= tokBreak
;
292 else if (k
== translate
->name2key("run") ) currentToken
.type
= tokRun
;
293 else if (k
== translate
->name2key("foreach") ) currentToken
.type
= tokForEach
;
294 else if (k
== translate
->name2key("in") ) currentToken
.type
= tokIn
;
296 else if (k
== translate
->name2key("learn") ) currentToken
.type
= tokLearn
;
298 else if (k
== translate
->name2key("clear") ) currentToken
.type
= tokClear
;
299 else if (k
== translate
->name2key("go") ) currentToken
.type
= tokGo
;
300 else if (k
== translate
->name2key("gox") ) currentToken
.type
= tokGoX
;
301 else if (k
== translate
->name2key("goy") ) currentToken
.type
= tokGoY
;
302 else if (k
== translate
->name2key("forward") ) currentToken
.type
= tokForward
;
303 else if (k
== translate
->name2key("backward") ) currentToken
.type
= tokBackward
;
304 else if (k
== translate
->name2key("direction") ) currentToken
.type
= tokDirection
;
305 else if (k
== translate
->name2key("turnleft") ) currentToken
.type
= tokTurnLeft
;
306 else if (k
== translate
->name2key("turnright") ) currentToken
.type
= tokTurnRight
;
307 else if (k
== translate
->name2key("center") ) currentToken
.type
= tokCenter
;
308 else if (k
== translate
->name2key("setpenwidth") ) currentToken
.type
= tokSetPenWidth
;
309 else if (k
== translate
->name2key("penup") ) currentToken
.type
= tokPenUp
;
310 else if (k
== translate
->name2key("pendown") ) currentToken
.type
= tokPenDown
;
311 else if (k
== translate
->name2key("setfgcolor") ) currentToken
.type
= tokSetFgColor
;
312 else if (k
== translate
->name2key("setbgcolor") ) currentToken
.type
= tokSetBgColor
;
313 else if (k
== translate
->name2key("resizecanvas") ) currentToken
.type
= tokResizeCanvas
;
314 else if (k
== translate
->name2key("spriteshow") ) currentToken
.type
= tokSpriteShow
;
315 else if (k
== translate
->name2key("spritehide") ) currentToken
.type
= tokSpriteHide
;
316 else if (k
== translate
->name2key("spritepress") ) currentToken
.type
= tokSpritePress
;
317 else if (k
== translate
->name2key("spritechange") ) currentToken
.type
= tokSpriteChange
;
319 else if (k
== translate
->name2key("do") ) currentToken
.type
= tokDo
; // dummy commands
321 else if (k
== translate
->name2key("message") ) currentToken
.type
= tokMessage
;
322 else if (k
== translate
->name2key("inputwindow") ) currentToken
.type
= tokInputWindow
;
323 else if (k
== translate
->name2key("print") ) currentToken
.type
= tokPrint
;
324 else if (k
== translate
->name2key("fonttype") ) currentToken
.type
= tokFontType
;
325 else if (k
== translate
->name2key("fontsize") ) currentToken
.type
= tokFontSize
;
326 else if (k
== translate
->name2key("repeat") ) currentToken
.type
= tokRepeat
;
327 else if (k
== translate
->name2key("random") ) currentToken
.type
= tokRandom
;
328 else if (k
== translate
->name2key("wait") ) currentToken
.type
= tokWait
;
329 else if (k
== translate
->name2key("wrapon") ) currentToken
.type
= tokWrapOn
;
330 else if (k
== translate
->name2key("wrapoff") ) currentToken
.type
= tokWrapOff
;
331 else if (k
== translate
->name2key("reset") ) currentToken
.type
= tokReset
;
334 kdDebug(0)<<"Lexer::setTokenType, found UNKNOWN word @ ("<<currentToken
.start
.row
<<", "<<currentToken
.start
.col
<<"), can be anything"<<endl
;
335 // t.type = tokUnknown; is allready
338 kdDebug(0)<<"Lexer::setTokenType, found tok-number: '"<<currentToken
.type
<<"' with the key: '"<<k
<<"' @ ("<<currentToken
.start
.row
<<", "<<currentToken
.start
.col
<<")"<<endl
;
343 void Lexer::skipSpaces()
345 // kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl;
346 QChar currentChar
= getChar();
347 // when the Separator_* groups can be identified in the QChar thing would be easier
348 while ( !inputStream
->atEnd() && ( currentChar
.isSpace() && !(currentChar
== '\x0a' || currentChar
== '\n') ) )
350 currentChar
= getChar();
352 ungetChar(currentChar
); // unget the tokEOL we likely just found
356 int Lexer::getNumber(Value
& num
, QString
& look
)
358 // by reference the value (Value) and look part are set
359 // kdDebug(0)<<"Lexer::getNumber()"<<endl;
360 bool hasPoint
= false;
361 QChar currentChar
= getChar();
362 if ( currentChar
.isNumber() )
364 while ( ( currentChar
.isNumber() || (currentChar
== '.' && !hasPoint
) ) && !inputStream
->atEnd() )
366 if (currentChar
== '.')
371 currentChar
= getChar();
373 ungetChar(currentChar
); //read one too much
374 num
.setNumber( look
.toDouble() );
375 kdDebug(0)<<"Lexer::getNumber(), got NUMBER: '"<<num
.Number()<<"'"<<endl
;
378 else return tokError
;
381 void Lexer::getString(Token
& currentToken
)
383 QString str
= "\""; // start with a " cauz it just got lost
384 QChar currentChar
= QChar(); // start empty
385 while ( currentChar
!= '"' && !(currentChar
== '\x0a' || currentChar
== '\n') && !inputStream
->atEnd() )
387 currentChar
= getChar();
388 if (currentChar
== '\\') // escape sequence
390 currentChar
= getChar();
393 case 'n': str
+= '\n'; break;
394 case 't': str
+= '\t'; break;
395 case 'f': str
+= '\f'; break;
396 case '"': str
+= '"'; break;
399 else if (currentChar
== '\x0a' || currentChar
== '\n') // if the user forgot to delimit the string
401 ungetChar(currentChar
);
404 else str
+= currentChar
;
406 currentToken
.type
= tokString
;
407 currentToken
.look
= str
;
409 kdDebug(0)<<"Lexer::getStringConstant, got STRINGCONSTANT: "<<currentToken
.look
<<"'"<<endl
;