moved kdeaccessibility kdeaddons kdeadmin kdeartwork kdebindings kdeedu kdegames...
[kdeedu.git] / kturtle / src / lexer.cpp
blob6e8dec7994d90b2e7c6543c19635aae6f03b27a6
1 /*
2 Copyright (C) 2003 by Walter Schreppers
3 Copyright (C) 2004 by Cies Breijs
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of version 2 of the GNU General Public
7 License as published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 #include <qdom.h>
20 #include <qfile.h>
22 #include <kdebug.h>
23 #include <klocale.h>
25 #include "settings.h"
27 #include "lexer.h"
30 Lexer::Lexer(QTextIStream& iStream)
32 inputStream = &iStream;
33 row = 1;
34 col = 1;
35 prevCol = 1;
36 translate = new Translate();
40 Token Lexer::lex()
42 skipSpaces(); // skips the white space that it quite likely (indentation) infront of the Token
44 Token currentToken;
45 currentToken.type = tokNotSet; // not really needed
46 currentToken.look = "";
47 currentToken.value = 0;
48 currentToken.start.row = row;
49 currentToken.start.col = col;
51 QChar currentChar = getChar();
53 if ( inputStream->atEnd() )
55 kdDebug(0)<<"Lexer::lex(), got EOF."<<endl;
56 currentToken.type = tokEOF;
57 currentToken.look = "EOF";
58 ungetChar(currentChar); // unget the currentChar and fix the row/col values
59 return currentToken;
62 if (currentChar == '#')
64 while ( !inputStream->atEnd() && !(currentChar == '\x0a' || currentChar == '\n') )
65 currentChar = getChar();
68 // if (currentChar.category() == QChar::Separator_Line) somehow doesnt work
69 if (currentChar == '\x0a' || currentChar == '\n')
71 currentToken.type = tokEOL;
72 currentToken.look = "EOL";
74 else if (currentChar.isLetter() || currentChar == '[' || currentChar == ']')
76 ungetChar(currentChar);
77 // sets currentToken.look by reference, and set the currentToken.type to tokUnknown
78 currentToken.type = getWord(currentToken.look);
79 setTokenType(currentToken); // gets the actual tokenType
81 else if ( currentChar.isNumber() )
83 ungetChar(currentChar);
84 // set currentToken.value/look by reference, and set the currentToken.type to tokNumber
85 currentToken.type = getNumber(currentToken.value, currentToken.look);
87 else if (currentChar == '>')
89 currentChar = getChar();
90 if (currentChar == '=')
92 currentToken.type = tokGe;
93 currentToken.look = ">=";
95 else
97 ungetChar(currentChar);
98 currentToken.type = tokGt;
99 currentToken.look = ">";
102 else if (currentChar == '<')
104 currentChar = getChar();
105 if ( currentChar == '=' )
107 currentToken.type = tokLe;
108 currentToken.look = "<=";
110 else
112 ungetChar(currentChar);
113 currentToken.type = tokLt;
114 currentToken.look = ">";
117 else if (currentChar == '!')
119 currentChar = getChar();
120 if (currentChar == '=')
122 currentToken.type = tokNe;
123 currentToken.look = "!=";
125 else
127 ungetChar(currentChar);
128 currentToken.type = tokNot;
129 currentToken.look = "!";
132 else if (currentChar == '=')
134 currentChar = getChar();
135 if (currentChar == '=')
137 currentToken.type = tokEq;
138 currentToken.look = "==";
140 else
142 ungetChar(currentChar);
143 currentToken.type = tokAssign;
144 currentToken.look = "=";
147 else if (currentChar == '(')
149 currentToken.type = tokBraceOpen;
150 currentToken.look = "(";
152 else if (currentChar == ')')
154 currentToken.type = tokBraceClose;
155 currentToken.look = ")";
157 else if (currentChar == '+')
159 currentToken.type = tokPlus;
160 currentToken.look = "+";
162 else if (currentChar == '-')
164 currentToken.type = tokMinus;
165 currentToken.look = "-";
167 else if (currentChar == '*')
169 currentToken.type = tokMul;
170 currentToken.look = "*";
172 else if (currentChar == '/')
174 currentToken.type = tokDev;
175 currentToken.look = "/";
177 else if (currentChar == ',')
179 currentToken.type = tokComma;
180 currentToken.look = ",";
182 else if (currentChar == '"')
184 getString(currentToken);
186 else
188 currentToken.type = tokUnknown;
189 currentToken.look = currentChar;
192 currentToken.end.row = row;
193 currentToken.end.col = col;
194 return currentToken;
199 // PRIVATEs
201 QChar Lexer::getChar()
203 QChar c;
204 if ( !putBackChar.isNull() )
206 c = putBackChar; // use the char that is stored to be put back
207 // kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
208 putBackChar = QChar(); // and set putBackChar back to NULL
209 if (c == '\x0a' || c == '\n')
211 row++;
212 prevCol = col;
213 col = 1;
215 else
217 col++;
220 else
222 *inputStream >> c; // take a QChar of the inputStream
223 // kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
224 if (c == '\x0a' || c == '\n')
226 row++;
227 prevCol = col;
228 col = 1;
230 else
232 col++;
235 return c;
238 void Lexer::ungetChar(QChar c)
240 if (c == '\x0a' || c == '\n')
242 row--;
243 col = prevCol;
245 else
247 col--;
249 putBackChar = c;
250 // kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl;
253 int Lexer::getWord(QString& word)
255 // kdDebug(0)<<"Lexer::getWord()"<<endl;
256 QChar currentChar = getChar();
257 if ( currentChar.isLetter() || currentChar == '[' || currentChar == ']' ) {
258 while ( ( currentChar.isLetterOrNumber() || currentChar == '_' || currentChar == '[' || currentChar == ']' ) && !inputStream->atEnd() )
260 word += currentChar;
261 currentChar = getChar();
263 kdDebug(0)<<"Lexer::getWord(), got NAME: '"<<word<<"'"<<endl;
264 ungetChar(currentChar); //read one too much
265 return tokUnknown; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType
267 else return tokError;
270 void Lexer::setTokenType(Token& currentToken)
272 if (currentToken.type == tokUnknown)
274 // make lowercase copy of the word as it was found in the inputStream
275 QString k = currentToken.look.lower();
276 // if the key is an alias translate that alias to a key
277 if ( !translate->alias2key(k).isEmpty() ) k = translate->alias2key(k);
279 if (k == translate->name2key("begin") ) currentToken.type = tokBegin;
280 else if (k == translate->name2key("end") ) currentToken.type = tokEnd;
281 else if (k == translate->name2key("while") ) currentToken.type = tokWhile;
282 else if (k == translate->name2key("if") ) currentToken.type = tokIf;
283 else if (k == translate->name2key("else") ) currentToken.type = tokElse;
284 else if (k == translate->name2key("for") ) currentToken.type = tokFor;
285 else if (k == translate->name2key("to") ) currentToken.type = tokTo;
286 else if (k == translate->name2key("step") ) currentToken.type = tokStep;
287 else if (k == translate->name2key("and") ) currentToken.type = tokAnd;
288 else if (k == translate->name2key("or") ) currentToken.type = tokOr;
289 else if (k == translate->name2key("not") ) currentToken.type = tokNot;
290 else if (k == translate->name2key("return") ) currentToken.type = tokReturn;
291 else if (k == translate->name2key("break") ) currentToken.type = tokBreak;
292 else if (k == translate->name2key("run") ) currentToken.type = tokRun;
293 else if (k == translate->name2key("foreach") ) currentToken.type = tokForEach;
294 else if (k == translate->name2key("in") ) currentToken.type = tokIn;
296 else if (k == translate->name2key("learn") ) currentToken.type = tokLearn;
298 else if (k == translate->name2key("clear") ) currentToken.type = tokClear;
299 else if (k == translate->name2key("go") ) currentToken.type = tokGo;
300 else if (k == translate->name2key("gox") ) currentToken.type = tokGoX;
301 else if (k == translate->name2key("goy") ) currentToken.type = tokGoY;
302 else if (k == translate->name2key("forward") ) currentToken.type = tokForward;
303 else if (k == translate->name2key("backward") ) currentToken.type = tokBackward;
304 else if (k == translate->name2key("direction") ) currentToken.type = tokDirection;
305 else if (k == translate->name2key("turnleft") ) currentToken.type = tokTurnLeft;
306 else if (k == translate->name2key("turnright") ) currentToken.type = tokTurnRight;
307 else if (k == translate->name2key("center") ) currentToken.type = tokCenter;
308 else if (k == translate->name2key("setpenwidth") ) currentToken.type = tokSetPenWidth;
309 else if (k == translate->name2key("penup") ) currentToken.type = tokPenUp;
310 else if (k == translate->name2key("pendown") ) currentToken.type = tokPenDown;
311 else if (k == translate->name2key("setfgcolor") ) currentToken.type = tokSetFgColor;
312 else if (k == translate->name2key("setbgcolor") ) currentToken.type = tokSetBgColor;
313 else if (k == translate->name2key("resizecanvas") ) currentToken.type = tokResizeCanvas;
314 else if (k == translate->name2key("spriteshow") ) currentToken.type = tokSpriteShow;
315 else if (k == translate->name2key("spritehide") ) currentToken.type = tokSpriteHide;
316 else if (k == translate->name2key("spritepress") ) currentToken.type = tokSpritePress;
317 else if (k == translate->name2key("spritechange") ) currentToken.type = tokSpriteChange;
319 else if (k == translate->name2key("do") ) currentToken.type = tokDo; // dummy commands
321 else if (k == translate->name2key("message") ) currentToken.type = tokMessage;
322 else if (k == translate->name2key("inputwindow") ) currentToken.type = tokInputWindow;
323 else if (k == translate->name2key("print") ) currentToken.type = tokPrint;
324 else if (k == translate->name2key("fonttype") ) currentToken.type = tokFontType;
325 else if (k == translate->name2key("fontsize") ) currentToken.type = tokFontSize;
326 else if (k == translate->name2key("repeat") ) currentToken.type = tokRepeat;
327 else if (k == translate->name2key("random") ) currentToken.type = tokRandom;
328 else if (k == translate->name2key("wait") ) currentToken.type = tokWait;
329 else if (k == translate->name2key("wrapon") ) currentToken.type = tokWrapOn;
330 else if (k == translate->name2key("wrapoff") ) currentToken.type = tokWrapOff;
331 else if (k == translate->name2key("reset") ) currentToken.type = tokReset;
332 else
334 kdDebug(0)<<"Lexer::setTokenType, found UNKNOWN word @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<"), can be anything"<<endl;
335 // t.type = tokUnknown; is allready
338 kdDebug(0)<<"Lexer::setTokenType, found tok-number: '"<<currentToken.type<<"' with the key: '"<<k<<"' @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<")"<<endl;
343 void Lexer::skipSpaces()
345 // kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl;
346 QChar currentChar = getChar();
347 // when the Separator_* groups can be identified in the QChar thing would be easier
348 while ( !inputStream->atEnd() && ( currentChar.isSpace() && !(currentChar == '\x0a' || currentChar == '\n') ) )
350 currentChar = getChar();
352 ungetChar(currentChar); // unget the tokEOL we likely just found
356 int Lexer::getNumber(Value& num, QString& look)
358 // by reference the value (Value) and look part are set
359 // kdDebug(0)<<"Lexer::getNumber()"<<endl;
360 bool hasPoint = false;
361 QChar currentChar = getChar();
362 if ( currentChar.isNumber() )
364 while ( ( currentChar.isNumber() || (currentChar == '.' && !hasPoint) ) && !inputStream->atEnd() )
366 if (currentChar == '.')
368 hasPoint = true;
370 look += currentChar;
371 currentChar = getChar();
373 ungetChar(currentChar); //read one too much
374 num.setNumber( look.toDouble() );
375 kdDebug(0)<<"Lexer::getNumber(), got NUMBER: '"<<num.Number()<<"'"<<endl;
376 return tokNumber;
378 else return tokError;
381 void Lexer::getString(Token& currentToken)
383 QString str = "\""; // start with a " cauz it just got lost
384 QChar currentChar = QChar(); // start empty
385 while ( currentChar != '"' && !(currentChar == '\x0a' || currentChar == '\n') && !inputStream->atEnd() )
387 currentChar = getChar();
388 if (currentChar == '\\') // escape sequence
390 currentChar = getChar();
391 switch (currentChar)
393 case 'n': str += '\n'; break;
394 case 't': str += '\t'; break;
395 case 'f': str += '\f'; break;
396 case '"': str += '"'; break;
399 else if (currentChar == '\x0a' || currentChar == '\n') // if the user forgot to delimit the string
401 ungetChar(currentChar);
402 break;
404 else str += currentChar;
406 currentToken.type = tokString;
407 currentToken.look = str;
409 kdDebug(0)<<"Lexer::getStringConstant, got STRINGCONSTANT: "<<currentToken.look<<"'"<<endl;