2 #include <ail/file.hpp>
3 #include <ail/string.hpp>
4 #include <ail/string.hpp>
5 #include <fridh/intermediary.hpp>
6 #include <fridh/lexer.hpp>
10 intermediary_translator::intermediary_translator():
15 bool intermediary_translator::load_module(std::string
const & path
, std::string
const & name
, std::string
& error_message
);
18 if(!ail::read_file(path
, content
))
20 error_message
= "Unable to read file \"" + path
+ "\"";
25 bool success
= translate_data(module_output
, content
, error_message
);
32 bool intermediary_translator::name_is_used(std::string
const & name
)
34 return current_node
->exists(name
);
37 std::string
const & intermediary_translator::get_declaration_name()
39 return *lines
[line_offset
][1].string
;
42 void intermediary_translator::name_collision_check()
44 std::string
const & name
= get_declaration_name();
45 if(name_is_used(name
))
46 error("Name \"" + name
+ "\" has already been used by another function or class in the current scope");
49 symbol_tree_node
& intermediary_translator::add_name(symbol::type symbol_type
)
51 std::string
const & name
= get_declaration_name();
52 symbol_tree_node
& new_node
= current_node
->children
[name
];
53 new_node
= symbol_tree_node(symbol_type
);
54 new_node
.parent
= current_node
;
55 current_node
= &new_node
;
59 void intermediary_translator::process_body(executable_units
* output
)
64 bool is_class
= (output
== 0);
69 while(line_offset
< line_end
)
71 if(process_line(output
))
77 current_node
= current_node
->parent
;
83 bool intermediary_translator::process_class()
85 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
86 if(!(lexemes
.size() == 2 && lexemes
[0].type
== lexeme_type::class_operator
&& lexemes
[1].type
== lexeme_type::name
))
89 name_collision_check();
91 add_name(symbol::class_symbol
);
97 bool intermediary_translator::process_function()
99 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
100 if(!(lexemes
.size() >= 2 && lexemes
[0].type
== lexeme_type::function_declaration
))
103 for(std::size_t i
= 1, end
= lexemes
.size(); i
< end
; i
++)
104 if(lexemes
[i
].type
!= lexeme_type::name
)
105 error("Encountered an invalid lexeme type in a function declaration - at this point only names are permitted");
107 name_collision_check();
109 function
& current_function
= *add_name(symbol::class_symbol
).function_pointer
;
110 for(std::size_t i
= 2, end
= lexemes
.size(); i
< end
; i
++)
111 current_function
.arguments
.push_back(*lexemes
[i
].string
);
113 process_body(¤t_function
.body
);
117 void operator_resolution(parse_tree_nodes
& input
, parse_tree_node
& output
)
119 void call_check(std::size_t extremum_offset
)
121 if(extremum_offset
!= 1)
122 throw ail::exception("Invalid call offset encountered during operator resolution");
125 if(input
.size() != 1)
131 bool got_an_operator
= false;
133 std::size_t extremum_offset
;
135 for(std::size_t i
= 0, end
= input
.size(); i
< end
; i
++)
138 parse_tree_node
& current_node
= input
[i
];
139 if(get_parse_tree_node_precedence(current_node
, precedence
))
144 precedence
> extremum
||
145 (is_right_to_left_operator(current_node
) && precedence
== extremum
)
148 got_an_operator
= true;
149 extremum
= precedence
;
156 throw ail::exception("Failed to perform operator resolution");
158 parse_tree_node
& operator_node
= input
[extremum_offset
];
159 std::size_t next_offset
= extremum_offset
+ 1;
160 switch(operator_node
.type
)
162 case parse_tree_node_type::unary_operator_node
:
163 std::size_t argument_offset
= next_offset
;
164 parse_tree_unary_operator_node
& unary_operator_node
= *operator_node
.unary_operator_pointer
;
165 unary_operator_node
.argument
= input
.at(argument_offset
);
166 input
.erase(input
.begin() + argument_offset
);
169 case parse_tree_node_type::binary_operator_node
:
170 parse_tree_binary_operator_node
& binary_operator_node
= *operator_node
.binary_operator_pointer
;
176 std::copy(input
.begin(), input
.begin() + extremum_offset
, left_side
.begin());
177 std::copy(input
.begin() + next_offset
, input
.end(), right_side
.begin());
179 operator_resolution(left_side
, binary_operator_node
.left_argument
);
180 operator_resolution(right_side
, binary_operator_node
.right_argument
);
182 output
= operator_node
;
185 case parse_tree_node_type::call
:
186 //this is questionable
187 call_check(extremum_offset
);
188 operator_node
.call_pointer
->function
= input
[0];
189 input
.erase(input
.begin());
192 case parse_tree_node_type::call_operator
:
193 case parse_tree_node_type::spaced_call_operator
:
194 call_check(extremum_offset
);
195 operator_node
.is_call();
196 operator_node
.call_pointer
->function
= input
[0];
197 input
.erase(input
.begin());
198 if(operator_node
.type
!= parse_tree_node_type::spaced_call_operator
&& next_offset
!= input
.size())
201 operator_node
.call_pointer
->arguments
.push_back(input
[next_offset
]);
202 input
.erase(input
.end() - 1);
207 throw ail::exception("Invalid operator node type encountered during operator resolution");
211 void intermediary_translator::process_atomic_statement(lexeme_container
& lexemes
, std::size_t & offset
, parse_tree_nodes
& output
, bool allow_multi_statements
, lexeme_type::type terminator
)
213 bool got_last_group
= false;
214 lexeme_group::type last_group
;
216 parse_tree_nodes arguments
;
218 void set_last_group(lexeme_group::type new_last_group
)
220 last_group
= new_last_group
;
221 got_last_group
= true;
224 void add_unary_node(lexeme
& current_lexeme
)
226 parse_tree_node unary_operator_node
;
227 lexeme_to_unary_operator_node(current_lexeme
, unary_operator_node
);
228 arguments
.push_back(unary_operator_node
);
231 void add_negation_lexeme()
233 add_unary_node(lexeme(lexeme_type::negation
));
236 void process_node_group()
238 parse_tree_node new_node
;
239 operator_resolution(arguments
, new_node
);
240 output
.push_back(new_node
);
243 symbol_prefix::type prefix
= symbol_prefix::none
;
245 for(std::size_t & i
= offset
, end
= lexemes
.size(); i
< end
; i
++)
247 lexeme
& current_lexeme
= lexemes
[i
];
249 if(current_lexeme
.type
== terminator
)
254 else if(prefix
!= symbol_prefix::none
&& current_lexeme
.type
== lexeme_type::name
)
255 error("Invalid use of a symbol prefix");
257 switch(current_lexeme
.type
)
259 case lexeme_type::scope_operator
:
260 prefix
= symbol_prefix::scope_operator
;
263 case lexeme_type::class_operator
:
264 prefix
= symbol_prefix::class_operator
;
267 case lexeme_type::bracket_start
:
269 parse_tree_nodes content
;
270 if(got_last_group
&& last_group
== lexeme_group::argument
)
272 process_atomic_statement(lexemes
, offset
, content
, true, lexeme_type::bracket_end
);
273 parse_tree_node call
;
275 call
.call_pointer
->arguments
= content
;
276 arguments
.push_back(call
);
280 process_atomic_statement(lexemes
, offset
, content
, false, lexeme_type::bracket_end
);
281 arguments
.push_back(content
[0]);
283 set_last_group(lexeme_group::argument
);
287 case lexeme_type::bracket_end
:
288 error("Unmatched closing bracket");
290 case lexeme_type::array_start
:
292 parse_tree_nodes elements
;
293 process_atomic_statement(lexemes
, offset
, content
, true, lexeme_type::array_end
);
294 arguments
.push_back(parse_tree_node(elements
));
295 set_last_group(lexeme_group::argument
);
299 case lexeme_type::array_end
:
300 error("Unmatched curled brace");
302 case lexeme_type::call_operator
:
303 arguments
.push_back(parse_tree_node(parse_tree_node_type::call_operator
));
304 set_last_group(lexeme_group::call_operator
);
307 case lexeme_type::spaced_call_operator
:
308 arguments
.push_back(parse_tree_node(parse_tree_node_type::spaced_call_operator
));
309 set_last_group(lexeme_group::call_operator
);
312 case lexeme_type::iterator
:
313 arguments
.push_back(parse_tree_node(parse_tree_node_type::iterator
));
314 set_last_group(lexeme_group::argument
);
318 lexeme_group::type group
;
319 if(!get_lexeme_group(current_lexeme
.type
, group
))
320 error("Invalid lexeme type in statement");
324 case lexeme_group::argument
:
326 if(!allow_multi_statements
&& !got_last_group
&& last_group
== lexeme_group::argument
)
327 error("Encountered two arguments without an operator between them");
329 parse_tree_node argument_node
;
330 lexeme_to_argument_node(current_lexeme
, argument_node
);
331 if(current_lexeme
.type
== lexeme_type::name
)
333 argument_node
.symbol_pointer
->type
= prefix
;
334 if(prefix
!= symbol_prefix::none
)
338 argument_node
.type
== parse_tree_node_type::binary_operator_node
&&
339 argument_node
.binary_operator_pointer
->type
== binary_operator_type::selection
341 error("Encountered a symbol prefix after a selection operator");
342 prefix
= symbol_prefix::none
;
345 arguments
.push_back(argument_node
);
347 if(allow_multi_statements
)
349 process_node_group();
351 got_last_group
= false;
357 case lexeme_group::unary_operator
:
358 if(got_last_group
&& last_group
== lexeme_group::argument
)
359 error("Encountered an argument followed by an unary operator without a binary operator between them");
360 add_unary_node(current_lexeme
);
363 case lexeme_group::binary_operator
:
368 case lexeme_group::unary_operator
:
369 error("Encountered a unary operator followed by a binary operator");
371 case lexeme_group::binary_operator
:
372 if(current_lexeme
.type
== lexeme_type::subtraction
)
373 add_negation_lexeme();
375 error("Encountered two sequential binary operators");
381 if(current_lexeme
.type
== lexeme_type::subtraction
)
382 add_negation_lexeme();
384 error("Encountered a binary operator in the beginning of a statement");
387 parse_tree_node binary_operator_node
;
388 lexeme_to_binary_operator_node(current_lexeme
, binary_operator_node
);
389 arguments
.push_back(binary_operator_node
);
393 set_last_group(group
);
397 error("Empty statement");
399 if(last_group
!= lexeme_group::argument
)
400 error("An operator is missing an argument");
402 process_node_group();
405 void intermediary_translator::process_offset_atomic_statement(parse_tree_node
& output
, std::size_t offset
)
407 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
408 parse_tree_nodes nodes
;
409 process_atomic_statement(lexemes
, offset
, nodes
);
414 void intermediary_translator::process_composite_term(parse_tree_node
& output
)
416 process_offset_atomic_statement(output
, 1);
419 bool intermediary_translator::is_if_statement()
421 return lines
[line_offset
].lexemes
[0].type
== lexeme_type::division
;
424 bool intermediary_translator::process_if(executable_unit
& output
)
426 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
427 if(is_if_statement())
430 parse_tree_node conditional
;
431 process_composite_term(conditional
);
433 executable_units if_body
;
434 process_body(&if_body
);
436 bool is_if_else
= false;
438 if(line_offset
< line_end
)
440 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
441 if(is_if_statement() && lexemes
.size() == 1)
445 executable_units else_body
;
446 process_body(&else_body
);
448 output
.type
= executable_unit_type::if_else_statement
;
449 if_else_statement
* & if_else_pointer
= output
.if_else_pointer
;
450 if_else_pointer
= new if_else_statement
;
451 if_else_pointer
->conditional_term
= conditional
;
452 if_else_pointer
->if_body
= if_body
;
453 if_else_pointer
->else_body
= else_body
;
459 output
.type
= executable_unit_type::if_statement
;
460 if_statement
* & if_pointer
= output
.if_pointer
;
461 if_pointer
= new if_statement
;
462 if_pointer
->conditional_term
= conditional
;
463 if_pointer
->body
= if_body
;
469 bool intermediary_translator::process_while(executable_unit
& output
)
471 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
472 if(lexemes
[0].type
!= lexeme_type::while_statement
)
475 parse_tree_node conditional
;
476 process_composite_term(conditional
);
478 output
.type
= executable_unit_type::while_statement
;
479 while_statement
* & while_pointer
= output
.while_pointer
;
480 while_pointer
= new while_statement
;
481 while_pointer
->conditional_term
= conditional
;
483 process_body(&while_pointer
->body
);
488 bool intermediary_translator::process_for(executable_unit
& output
)
490 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
491 if(lexemes
[0].type
!= lexeme_type::iteration
)
494 if(lexemes
.size() == 1)
498 if(lines
.size() - line_offset
< 4)
499 throw ail::exception("Incomplete for statement");
503 for(std::size_t i
= line_offset
, end
= i
+ 3; i
< end
; i
++)
505 if(lines
[i
].indentation_level
!= indentation_level
)
506 throw ail::exception("Invalid indentation level in a for statement");
509 output
.type
= executable_unit_type::for_statement
;
510 for_statement
* & for_pointer
= output
.for_pointer
;
511 for_pointer
= new for_statement
;
512 process_offset_atomic_statement(for_pointer
->initialisation
);
513 process_offset_atomic_statement(for_pointer
->conditional
);
514 process_offset_atomic_statement(for_pointer
->iteration
);
520 output
.type
= executable_unit_type::for_each_statement
;
521 for_each_statement
* & for_each_pointer
= output
.for_each_pointer
;
522 for_each_pointer
= new for_each_statement
;
523 process_composite_term(for_each_pointer
->conditional_term
);
525 process_body(&for_each_pointer
->body
);
531 bool intermediary_translator::process_return(executable_unit
& output
)
533 lexeme_container
& lexemes
= lines
[line_offset
].lexemes
;
534 if(lexemes
[0].type
!= lexeme_type::selection_operator
)
537 output
.type
= executable_unit_type::return_statement
;
538 parse_tree_node
* & statement_pointer
= output
.statement_pointer
;
539 statement_pointer
= new parse_tree_node
;
540 process_composite_term(*statement_pointer
);
545 void intermediary_translator::process_statement(executable_unit
& output
)
551 process_if(output
) ||
552 process_while(output
) ||
553 process_for(output
) ||
554 process_return(output
)
557 process_offset_atomic_statement(output
);
560 bool intermediary_translator::process_line(executable_unit
* output
)
562 line_of_code
& current_line
= lines
[line_offset
];
563 if(current_line
.indentation_level
> indentation
)
564 error("Unexpected increase in the indentation level");
570 if(!process_function())
571 process_statement(*output
);
574 error("Regular statements and assignments need to be placed within functions");
579 if(line_offset
== line_end
)
581 //end of file -> end of the module entry function block
585 line_of_code
& next_line
= lines
[line_offset
];
588 return next_line
.indentation_level
< indentation
;
591 bool intermediary_translator::translate_data(module
& target_module
, std::string
const & data
, std::string
const & module_name
, std::string
& error_message_output
)
593 lines
= std::vector
<line_of_code
>();
594 lexer
current_lexer(data
, lines
, error_message
);
595 if(!current_lexer
.parse())
598 current_node
= &target_module
.symbols
;
599 indentation_level
= 0;
600 nested_class_level
= 0;
602 while(line_offset
< line_end
)
609 void intermediary_translator::error(std::string
const & message
)
611 throw ail::exception("Line " + ail::number_to_string(lines
[line_offset
].line
) + ": " + message
);