Putting together all the intermediary representation algorithms now
[fridhskrift.git] / intermediary / intermediary.cpp
blob6b8f063d450a16de030e03fd431d1f0272e132c6
1 #include <stack>
2 #include <ail/file.hpp>
3 #include <ail/string.hpp>
4 #include <ail/string.hpp>
5 #include <fridh/intermediary.hpp>
6 #include <fridh/lexer.hpp>
8 namespace fridh
10 intermediary_translator::intermediary_translator():
11 running(false)
15 bool intermediary_translator::load_module(std::string const & path, std::string const & name, std::string & error_message);
17 std::string content;
18 if(!ail::read_file(path, content))
20 error_message = "Unable to read file \"" + path + "\"";
21 return false;
24 module module_output;
25 bool success = translate_data(module_output, content, error_message);
26 if(!success)
27 return false;
29 return true;
32 bool intermediary_translator::name_is_used(std::string const & name)
34 return current_node->exists(name);
37 std::string const & intermediary_translator::get_declaration_name()
39 return *lines[line_offset][1].string;
42 void intermediary_translator::name_collision_check()
44 std::string const & name = get_declaration_name();
45 if(name_is_used(name))
46 error("Name \"" + name + "\" has already been used by another function or class in the current scope");
49 symbol_tree_node & intermediary_translator::add_name(symbol::type symbol_type)
51 std::string const & name = get_declaration_name();
52 symbol_tree_node & new_node = current_node->children[name];
53 new_node = symbol_tree_node(symbol_type);
54 new_node.parent = current_node;
55 current_node = &new_node;
56 return new_node;
59 void intermediary_translator::process_body(executable_units * output)
61 line_offset++;
62 indentation++;
64 bool is_class = (output == 0);
66 if(is_class)
67 nested_class_level++;
69 while(line_offset < line_end)
71 if(process_line(output))
73 if(indentation > 0)
74 indentation--;
75 if(is_class)
76 nested_class_level--;
77 current_node = current_node->parent;
78 break;
83 bool intermediary_translator::process_class()
85 lexeme_container & lexemes = lines[line_offset].lexemes;
86 if(!(lexemes.size() == 2 && lexemes[0].type == lexeme_type::class_operator && lexemes[1].type == lexeme_type::name))
87 return false;
89 name_collision_check();
91 add_name(symbol::class_symbol);
93 process_body(0);
94 return true;
97 bool intermediary_translator::process_function()
99 lexeme_container & lexemes = lines[line_offset].lexemes;
100 if(!(lexemes.size() >= 2 && lexemes[0].type == lexeme_type::function_declaration))
101 return false;
103 for(std::size_t i = 1, end = lexemes.size(); i < end; i++)
104 if(lexemes[i].type != lexeme_type::name)
105 error("Encountered an invalid lexeme type in a function declaration - at this point only names are permitted");
107 name_collision_check();
109 function & current_function = *add_name(symbol::class_symbol).function_pointer;
110 for(std::size_t i = 2, end = lexemes.size(); i < end; i++)
111 current_function.arguments.push_back(*lexemes[i].string);
113 process_body(&current_function.body);
114 return true;
117 void operator_resolution(parse_tree_nodes & input, parse_tree_node & output)
119 void call_check(std::size_t extremum_offset)
121 if(extremum_offset != 1)
122 throw ail::exception("Invalid call offset encountered during operator resolution");
125 if(input.size() != 1)
127 output = input[0];
128 return;
131 bool got_an_operator = false;
132 word extremum;
133 std::size_t extremum_offset;
135 for(std::size_t i = 0, end = input.size(); i < end; i++)
137 word precedence;
138 parse_tree_node & current_node = input[i];
139 if(get_parse_tree_node_precedence(current_node, precedence))
143 !got_an_operator ||
144 precedence > extremum ||
145 (is_right_to_left_operator(current_node) && precedence == extremum)
148 got_an_operator = true;
149 extremum = precedence;
150 extremum_offset = i;
155 if(!got_an_operator)
156 throw ail::exception("Failed to perform operator resolution");
158 parse_tree_node & operator_node = input[extremum_offset];
159 std::size_t next_offset = extremum_offset + 1;
160 switch(operator_node.type)
162 case parse_tree_node_type::unary_operator_node:
163 std::size_t argument_offset = next_offset;
164 parse_tree_unary_operator_node & unary_operator_node = *operator_node.unary_operator_pointer;
165 unary_operator_node.argument = input.at(argument_offset);
166 input.erase(input.begin() + argument_offset);
167 break;
169 case parse_tree_node_type::binary_operator_node:
170 parse_tree_binary_operator_node & binary_operator_node = *operator_node.binary_operator_pointer;
172 parse_tree_nodes
173 left_side,
174 right_side;
176 std::copy(input.begin(), input.begin() + extremum_offset, left_side.begin());
177 std::copy(input.begin() + next_offset, input.end(), right_side.begin());
179 operator_resolution(left_side, binary_operator_node.left_argument);
180 operator_resolution(right_side, binary_operator_node.right_argument);
182 output = operator_node;
183 break;
185 case parse_tree_node_type::call:
186 //this is questionable
187 call_check(extremum_offset);
188 operator_node.call_pointer->function = input[0];
189 input.erase(input.begin());
190 break;
192 case parse_tree_node_type::call_operator:
193 case parse_tree_node_type::spaced_call_operator:
194 call_check(extremum_offset);
195 operator_node.is_call();
196 operator_node.call_pointer->function = input[0];
197 input.erase(input.begin());
198 if(operator_node.type != parse_tree_node_type::spaced_call_operator && next_offset != input.size())
200 //it's an unary call
201 operator_node.call_pointer->arguments.push_back(input[next_offset]);
202 input.erase(input.end() - 1);
204 break;
206 default:
207 throw ail::exception("Invalid operator node type encountered during operator resolution");
211 void intermediary_translator::process_atomic_statement(lexeme_container & lexemes, std::size_t & offset, parse_tree_nodes & output, bool allow_multi_statements, lexeme_type::type terminator)
213 bool got_last_group = false;
214 lexeme_group::type last_group;
216 parse_tree_nodes arguments;
218 void set_last_group(lexeme_group::type new_last_group)
220 last_group = new_last_group;
221 got_last_group = true;
224 void add_unary_node(lexeme & current_lexeme)
226 parse_tree_node unary_operator_node;
227 lexeme_to_unary_operator_node(current_lexeme, unary_operator_node);
228 arguments.push_back(unary_operator_node);
231 void add_negation_lexeme()
233 add_unary_node(lexeme(lexeme_type::negation));
236 void process_node_group()
238 parse_tree_node new_node;
239 operator_resolution(arguments, new_node);
240 output.push_back(new_node);
243 symbol_prefix::type prefix = symbol_prefix::none;
245 for(std::size_t & i = offset, end = lexemes.size(); i < end; i++)
247 lexeme & current_lexeme = lexemes[i];
249 if(current_lexeme.type == terminator)
251 i++;
252 break;
254 else if(prefix != symbol_prefix::none && current_lexeme.type == lexeme_type::name)
255 error("Invalid use of a symbol prefix");
257 switch(current_lexeme.type)
259 case lexeme_type::scope_operator:
260 prefix = symbol_prefix::scope_operator;
261 continue;
263 case lexeme_type::class_operator:
264 prefix = symbol_prefix::class_operator;
265 continue;
267 case lexeme_type::bracket_start:
269 parse_tree_nodes content;
270 if(got_last_group && last_group == lexeme_group::argument)
272 process_atomic_statement(lexemes, offset, content, true, lexeme_type::bracket_end);
273 parse_tree_node call;
274 call.is_call();
275 call.call_pointer->arguments = content;
276 arguments.push_back(call);
278 else
280 process_atomic_statement(lexemes, offset, content, false, lexeme_type::bracket_end);
281 arguments.push_back(content[0]);
283 set_last_group(lexeme_group::argument);
284 break;
287 case lexeme_type::bracket_end:
288 error("Unmatched closing bracket");
290 case lexeme_type::array_start:
292 parse_tree_nodes elements;
293 process_atomic_statement(lexemes, offset, content, true, lexeme_type::array_end);
294 arguments.push_back(parse_tree_node(elements));
295 set_last_group(lexeme_group::argument);
296 break;
299 case lexeme_type::array_end:
300 error("Unmatched curled brace");
302 case lexeme_type::call_operator:
303 arguments.push_back(parse_tree_node(parse_tree_node_type::call_operator));
304 set_last_group(lexeme_group::call_operator);
305 continue;
307 case lexeme_type::spaced_call_operator:
308 arguments.push_back(parse_tree_node(parse_tree_node_type::spaced_call_operator));
309 set_last_group(lexeme_group::call_operator);
310 continue;
312 case lexeme_type::iterator:
313 arguments.push_back(parse_tree_node(parse_tree_node_type::iterator));
314 set_last_group(lexeme_group::argument);
315 continue;
318 lexeme_group::type group;
319 if(!get_lexeme_group(current_lexeme.type, group))
320 error("Invalid lexeme type in statement");
322 switch(group)
324 case lexeme_group::argument:
326 if(!allow_multi_statements && !got_last_group && last_group == lexeme_group::argument)
327 error("Encountered two arguments without an operator between them");
329 parse_tree_node argument_node;
330 lexeme_to_argument_node(current_lexeme, argument_node);
331 if(current_lexeme.type == lexeme_type::name)
333 argument_node.symbol_pointer->type = prefix;
334 if(prefix != symbol_prefix::none)
338 argument_node.type == parse_tree_node_type::binary_operator_node &&
339 argument_node.binary_operator_pointer->type == binary_operator_type::selection
341 error("Encountered a symbol prefix after a selection operator");
342 prefix = symbol_prefix::none;
345 arguments.push_back(argument_node);
347 if(allow_multi_statements)
349 process_node_group();
350 arguments.clear();
351 got_last_group = false;
352 continue;
354 break;
357 case lexeme_group::unary_operator:
358 if(got_last_group && last_group == lexeme_group::argument)
359 error("Encountered an argument followed by an unary operator without a binary operator between them");
360 add_unary_node(current_lexeme);
361 break;
363 case lexeme_group::binary_operator:
364 if(got_last_group)
366 switch(last_group)
368 case lexeme_group::unary_operator:
369 error("Encountered a unary operator followed by a binary operator");
371 case lexeme_group::binary_operator:
372 if(current_lexeme.type == lexeme_type::subtraction)
373 add_negation_lexeme();
374 else
375 error("Encountered two sequential binary operators");
376 break;
379 else
381 if(current_lexeme.type == lexeme_type::subtraction)
382 add_negation_lexeme();
383 else
384 error("Encountered a binary operator in the beginning of a statement");
385 break;
387 parse_tree_node binary_operator_node;
388 lexeme_to_binary_operator_node(current_lexeme, binary_operator_node);
389 arguments.push_back(binary_operator_node);
390 break;
393 set_last_group(group);
396 if(!got_last_group)
397 error("Empty statement");
399 if(last_group != lexeme_group::argument)
400 error("An operator is missing an argument");
402 process_node_group();
405 void intermediary_translator::process_offset_atomic_statement(parse_tree_node & output, std::size_t offset)
407 lexeme_container & lexemes = lines[line_offset].lexemes;
408 parse_tree_nodes nodes;
409 process_atomic_statement(lexemes, offset, nodes);
410 output = nodes[0];
411 line_offset++;
414 void intermediary_translator::process_composite_term(parse_tree_node & output)
416 process_offset_atomic_statement(output, 1);
419 bool intermediary_translator::is_if_statement()
421 return lines[line_offset].lexemes[0].type == lexeme_type::division;
424 bool intermediary_translator::process_if(executable_unit & output)
426 lexeme_container & lexemes = lines[line_offset].lexemes;
427 if(is_if_statement())
428 return false;
430 parse_tree_node conditional;
431 process_composite_term(conditional);
433 executable_units if_body;
434 process_body(&if_body);
436 bool is_if_else = false;
438 if(line_offset < line_end)
440 lexeme_container & lexemes = lines[line_offset].lexemes;
441 if(is_if_statement() && lexemes.size() == 1)
443 is_if_else = true;
445 executable_units else_body;
446 process_body(&else_body);
448 output.type = executable_unit_type::if_else_statement;
449 if_else_statement * & if_else_pointer = output.if_else_pointer;
450 if_else_pointer = new if_else_statement;
451 if_else_pointer->conditional_term = conditional;
452 if_else_pointer->if_body = if_body;
453 if_else_pointer->else_body = else_body;
457 if(!is_if_else)
459 output.type = executable_unit_type::if_statement;
460 if_statement * & if_pointer = output.if_pointer;
461 if_pointer = new if_statement;
462 if_pointer->conditional_term = conditional;
463 if_pointer->body = if_body;
466 return true;
469 bool intermediary_translator::process_while(executable_unit & output)
471 lexeme_container & lexemes = lines[line_offset].lexemes;
472 if(lexemes[0].type != lexeme_type::while_statement)
473 return false;
475 parse_tree_node conditional;
476 process_composite_term(conditional);
478 output.type = executable_unit_type::while_statement;
479 while_statement * & while_pointer = output.while_pointer;
480 while_pointer = new while_statement;
481 while_pointer->conditional_term = conditional;
483 process_body(&while_pointer->body);
485 return true;
488 bool intermediary_translator::process_for(executable_unit & output)
490 lexeme_container & lexemes = lines[line_offset].lexemes;
491 if(lexemes[0].type != lexeme_type::iteration)
492 return false;
494 if(lexemes.size() == 1)
496 //three part for
498 if(lines.size() - line_offset < 4)
499 throw ail::exception("Incomplete for statement");
501 line_offset++;
503 for(std::size_t i = line_offset, end = i + 3; i < end; i++)
505 if(lines[i].indentation_level != indentation_level)
506 throw ail::exception("Invalid indentation level in a for statement");
509 output.type = executable_unit_type::for_statement;
510 for_statement * & for_pointer = output.for_pointer;
511 for_pointer = new for_statement;
512 process_offset_atomic_statement(for_pointer->initialisation);
513 process_offset_atomic_statement(for_pointer->conditional);
514 process_offset_atomic_statement(for_pointer->iteration);
516 else
518 //for each statement
520 output.type = executable_unit_type::for_each_statement;
521 for_each_statement * & for_each_pointer = output.for_each_pointer;
522 for_each_pointer = new for_each_statement;
523 process_composite_term(for_each_pointer->conditional_term);
525 process_body(&for_each_pointer->body);
528 return true;
531 bool intermediary_translator::process_return(executable_unit & output)
533 lexeme_container & lexemes = lines[line_offset].lexemes;
534 if(lexemes[0].type != lexeme_type::selection_operator)
535 return false;
537 output.type = executable_unit_type::return_statement;
538 parse_tree_node * & statement_pointer = output.statement_pointer;
539 statement_pointer = new parse_tree_node;
540 process_composite_term(*statement_pointer);
542 return true;
545 void intermediary_translator::process_statement(executable_unit & output)
551 process_if(output) ||
552 process_while(output) ||
553 process_for(output) ||
554 process_return(output)
557 process_offset_atomic_statement(output);
560 bool intermediary_translator::process_line(executable_unit * output)
562 line_of_code & current_line = lines[line_offset];
563 if(current_line.indentation_level > indentation)
564 error("Unexpected increase in the indentation level");
566 if(!process_class())
568 if(output)
570 if(!process_function())
571 process_statement(*output);
573 else
574 error("Regular statements and assignments need to be placed within functions");
577 line_offset++;
579 if(line_offset == line_end)
581 //end of file -> end of the module entry function block
582 return true;
585 line_of_code & next_line = lines[line_offset];
587 //end of block?
588 return next_line.indentation_level < indentation;
591 bool intermediary_translator::translate_data(module & target_module, std::string const & data, std::string const & module_name, std::string & error_message_output)
593 lines = std::vector<line_of_code>();
594 lexer current_lexer(data, lines, error_message);
595 if(!current_lexer.parse())
596 return false;
598 current_node = &target_module.symbols;
599 indentation_level = 0;
600 nested_class_level = 0;
602 while(line_offset < line_end)
606 return true;
609 void intermediary_translator::error(std::string const & message)
611 throw ail::exception("Line " + ail::number_to_string(lines[line_offset].line) + ": " + message);