moved the rest of the parser code to parser.{h,cpp}, respectively
[aqualang.git] / src / parser.h
blob84fdc26a1eb658e254bf3c2559cc799df842e5b9
2 namespace Aqua
4 namespace Internal
6 static const std::unordered_map<std::string, TokenType> keywords =
8 {"if", Tok_If},
9 {"else", Tok_Else},
10 {"while", Tok_While},
11 {"for", Tok_For},
12 {"return", Tok_Return},
13 {"break", Tok_Break},
14 {"continue", Tok_Continue},
15 {"func", Tok_Function},
16 {"nil", Tok_Nil},
17 {"expression", Tok_Expression},
18 {"statement", Tok_Statement},
19 {"eval", Tok_Eval},
20 {"parse_statement", Tok_ParseStatement},
21 {"parse_expression", Tok_ParseExpression},
22 {"global", Tok_Global},
23 {"local", Tok_Local},
24 {"root", Tok_Root},
25 {"typeof", Tok_TypeOf},
26 {"sizeof", Tok_SizeOf},
28 // these aren't implemented yet
29 {"switch", Tok_Switch},
30 {"case", Tok_Case},
31 {"import", Tok_Import},
34 template<class Iter>
35 std::shared_ptr<std::vector<Token<Iter>>> tokenize(Iter begin, Iter end)
37 auto tokens_ptr = std::make_shared<std::vector<Token<Iter>>>();
38 std::vector<Token<Iter>> &tokens = *tokens_ptr;
39 Iter i = begin;
40 while(i!=end)
42 Iter start = i;
43 switch(*i)
45 case ' ':
46 case '\t':
47 case '\n': // ignore whitespaces
48 ++i;
49 break;
50 case '(':
51 tokens.emplace_back(Tok_LeftParen, i, i+1);
52 ++i;
53 break;
54 case ')':
55 tokens.emplace_back(Tok_RightParen, i, i+1);
56 ++i;
57 break;
58 case '[':
59 tokens.emplace_back(Tok_LeftBracket, i, i+1);
60 ++i;
61 break;
62 case ']':
63 tokens.emplace_back(Tok_RightBracket, i, i+1);
64 ++i;
65 break;
66 case '{':
67 tokens.emplace_back(Tok_LeftBrace, i, i+1);
68 ++i;
69 break;
70 case '}':
71 tokens.emplace_back(Tok_RightBrace, i, i+1);
72 ++i;
73 break;
74 case ',':
75 tokens.emplace_back(Tok_Comma, i, i+1);
76 ++i;
77 break;
78 case '.':
79 tokens.emplace_back(Tok_Dot, i, i+1);
80 ++i;
81 break;
82 case ';':
83 tokens.emplace_back(Tok_Semicolon, i, i+1);
84 ++i;
85 break;
86 case ':':
87 tokens.emplace_back(Tok_Colon, i, i+1);
88 ++i;
89 break;
90 case '+':
91 if(i+1 != end && *(i+1) == '=')
93 tokens.emplace_back(Tok_PlusEqual, i, i+2);
94 i+=2;
96 else
98 tokens.emplace_back(Tok_Plus, i, i+1);
99 ++i;
101 break;
102 case '-':
103 if(i+1 != end && *(i+1) == '=')
105 tokens.emplace_back(Tok_DashEqual, i, i+2);
106 i+=2;
108 else
110 tokens.emplace_back(Tok_Dash, i, i+1);
111 ++i;
113 break;
114 case '*':
115 if(i+1 != end && *(i+1) == '=')
117 tokens.emplace_back(Tok_StarEqual, i, i+2);
118 i+=2;
120 else
122 tokens.emplace_back(Tok_Star, i, i+1);
123 ++i;
125 break;
126 case '%':
127 if(i+1 != end && *(i+1) == '=')
129 tokens.emplace_back(Tok_PercentEqual, i, i+2);
130 i+=2;
132 else
134 tokens.emplace_back(Tok_Percent, i, i+1);
135 ++i;
137 break;
138 case '/':
139 if(i+1 != end && *(i+1) == '/')
141 ++i;
142 while(i!=end && *i != '\n')
144 ++i;
145 } ++i;
147 else if(i+1 != end && *(i+1) == '=')
149 tokens.emplace_back(Tok_SlashEqual, i, i+2);
150 i+=2;
152 else
154 tokens.emplace_back(Tok_Slash, i, i+1);
155 ++i;
157 break;
158 case '<':
159 if(i+1 != end && *(i+1) == '=')
161 tokens.emplace_back(Tok_LessEqual, i, i+2);
162 i+=2;
164 else
166 tokens.emplace_back(Tok_Less, i, i+1);
167 ++i;
169 break;
170 case '>':
171 if(i+1 != end && *(i+1) == '=')
173 tokens.emplace_back(Tok_GreaterEqual, i, i+2);
174 i+=2;
176 else
178 tokens.emplace_back(Tok_Greater, i, i+1);
179 ++i;
181 break;
182 case '=':
183 if(i+1 != end && *(i+1) == '=')
185 tokens.emplace_back(Tok_EqOp, i, i+2);
186 i+=2;
188 else
190 tokens.emplace_back(Tok_Equal, i, i+1);
191 ++i;
193 break;
194 case '!':
195 if(i+1 != end && *(i+1) == '=')
197 tokens.emplace_back(Tok_NotEqual, i, i+2);
198 i+=2;
200 else
202 tokens.emplace_back(Tok_Not, i, i+1);
203 i+=2;
205 break;
206 case '&':
207 if(i+1 != end && *(i+1) == '&')
209 tokens.emplace_back(Tok_LogicalAnd, i, i+2);
210 i+=2;
212 else
214 auto lc = Utils::LineColumn(begin, i);
215 throw std::runtime_error(
216 std::to_string(lc.first) + ":" +
217 std::to_string(lc.second) + ": unrecognized symbol: '" + *i + "'"
220 break;
221 case '|':
222 if(i+1 != end && *(i+1) == '|')
224 tokens.emplace_back(Tok_LogicalOr, i, i+2);
225 i+=2;
227 else
229 auto lc = Utils::LineColumn(begin, i);
230 throw std::runtime_error(
231 std::to_string(lc.first) + ":" +
232 std::to_string(lc.second) + ": unrecognized symbol: '" + *i + "'"
235 break;
236 case '"':
237 ++i;
238 while(i!=end && !(*i == '"' && *(i-1) != '\\'))
240 ++i;
242 ++i;
243 tokens.emplace_back(Tok_String, start, i);
244 break;
245 default:
246 if(std::isdigit(*i))
248 while(i!=end && std::isdigit(*i))
250 ++i;
252 if(i!=end && (*i == '.'))
254 ++i;
255 while((i != end) && std::isdigit(*i))
257 ++i;
260 if((i != end) && (*i == 'e'))
262 ++i;
263 if((i != end) && (*i == '-'))
265 ++i;
267 while(i!=end && std::isdigit(*i))
269 ++i;
272 tokens.emplace_back(Tok_Number, start, i);
273 break;
275 else if(std::isalpha(*i) || (*i == '_') || (*i == '$'))
277 ++i;
278 while(i!=end && (std::isalnum(*i) || *i == '_'))
280 ++i;
282 auto kw = keywords.find(std::string(start, i));
283 if(kw == keywords.end())
285 tokens.emplace_back(Tok_Identifier, start, i);
287 else
289 tokens.emplace_back(kw->second, start, i);
291 break;
293 else
295 auto lc = Utils::LineColumn(begin, i);
296 throw std::runtime_error(
297 std::to_string(lc.first) + ":" +
298 std::to_string(lc.second) + ": unrecognized symbol: '" + *i + "'"
303 return tokens_ptr;
306 class Parser
308 public:
309 using FuncType = Interpreter;
310 using Value = Interpreter::Value;
311 using Iter = std::vector<Token<std::string::iterator>>::iterator;
312 using return_t = std::shared_ptr<ASTNode<Iter,Value,FuncType>>;
314 template<typename StrIter>
315 return_t operator()(StrIter sbegin, StrIter send)
317 auto source = std::make_shared<std::string>(sbegin, send);
318 auto tokens = tokenize(source->begin(), source->end());
319 this->begin = tokens->begin();
320 this->end = tokens->end();
321 i = this->begin;
322 accepted = this->end;
323 auto node = statement_list();
324 node->source = source;
325 node->tokens = tokens;
326 node->root = node;
327 node->inject_dependencies();
328 return node;
331 template<class StrIter>
332 return_t parse_expression(StrIter sbegin, StrIter send)
334 auto source = std::make_shared<std::string>(sbegin, send);
335 auto tokens = tokenize(source->begin(), source->end());
336 this->begin = tokens->begin();
337 this->end = tokens->end();
338 i = this->begin;
339 accepted = this->end;
340 auto node = expression();
341 node->source = source;
342 node->tokens = tokens;
343 node->root = node;
344 node->inject_dependencies();
345 return node;
348 private:
349 Iter i;
350 Iter accepted;
352 public:
353 Iter begin;
354 Iter end;
356 public:
357 void advance();
359 void rewind(Iter pos);
361 bool peek(TokenType t, int amount = 0);
363 bool accept(TokenType t);
365 bool end_of_input();
367 bool expect(TokenType t);
369 void expect(const std::string &expected);
371 return_t variable();
373 return_t field_name();
375 return_t identifier_list();
377 return_t function();
379 return_t parse_expression();
381 return_t parse_statement();
383 return_t tree();
385 return_t root();
387 return_t eval();
389 return_t type();
391 return_t size();
393 return_t do_import();
395 return_t table_initializer();
397 return_t array_initializer();
399 return_t primary_expression();
401 return_t postfix_expression();
403 return_t unary_expression();
405 return_t multiplicative_expression();
407 return_t additive_expression();
409 return_t relational_expression();
411 return_t equality_expression();
413 return_t logical_and_expression();
415 return_t logical_or_expression();
417 return_t assignment_expression();
419 return_t initializer_assignment_expression();
421 return_t expression();
423 return_t block();
425 return_t if_statement();
427 return_t while_statement();
429 return_t nop();
431 return_t for_statement();
433 return_t break_statement();
435 return_t continue_statement();
437 return_t return_statement();
439 return_t statement();
441 return_t statement_list();