1 #include <fridh/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
8 boost::mutex table_mutex
;
10 line_of_code::line_of_code():
15 operator_lexeme::operator_lexeme(lexeme_type::type lexeme
, std::string
const & string
):
21 bool operator_lexeme::operator<(operator_lexeme
const & other
) const
23 return string
.length() > other
.string
.length();
26 lexer::lexer(std::string
const & input
, lines_of_code
& lines
):
32 bool lexer::parse_operator(line_of_code
& output
)
34 std::size_t remaining_characters
= end
- i
;
36 BOOST_FOREACH(operator_lexeme
& current_lexeme
, operator_lexeme_data
)
38 std::size_t operator_length
= current_lexeme
.string
.size();
39 if(remaining_characters
< operator_length
)
42 std::string substring
= input
.substr(i
, operator_length
);
44 if(substring
== current_lexeme
.string
)
46 output
.lexemes
.push_back(current_lexeme
.lexeme
);
54 void lexer::lexer_error(std::string
const & message
, uword error_line
)
58 throw ail::exception("Lexer error: Line " + ail::number_to_string
<uword
>(error_line
) + ": " + message
);
61 void lexer::number_parsing_error(std::string
const & message
)
66 bool lexer::is_name_char(char input
)
68 return ail::is_alpha(input
) || ail::is_digit(input
) || input
== '_';
71 void lexer::parse_name(line_of_code
& output
)
73 std::size_t start
= i
;
74 for(i
++; i
< end
&& is_name_char(input
[i
]); i
++);
75 std::string name
= input
.substr(start
, i
- start
);
77 lexeme current_lexeme
;
79 current_lexeme
= lexeme(true);
80 else if(name
== "false")
81 current_lexeme
= lexeme(false);
84 current_lexeme
= lexeme(name
);
85 current_lexeme
.type
= lexeme_type::name
;
88 output
.lexemes
.push_back(current_lexeme
);
91 bool lexer::string_match(std::string
const & target
)
93 if(end
- i
< target
.size())
96 return input
.substr(i
, target
.size()) == target
;
99 void lexer::process_newline()
101 if(!current_line
.lexemes
.empty())
103 current_line
.line
= line
;
104 lines
.push_back(current_line
);
106 std::string line_string
= input
.substr(line_offset
, i
- line_offset
);
107 current_line
= line_of_code();
113 void lexer::parse_lexemes()
121 for(i
= 0, end
= input
.size(); i
< end
;)
123 if(parse_operator(current_line
))
126 char const tab
= '\t';
128 char byte
= input
[i
];
133 if(current_line
.indentation_level
> 0)
134 lexer_error("Tabs are only permitted in the beginning of a line (offset " + ail::number_to_string(i
- line_offset
+ 1) + ")");
135 for(i
++, current_line
.indentation_level
= 1; i
< end
&& input
[i
] == tab
; i
++, current_line
.indentation_level
++);
153 parse_string(current_line
);
162 if(parse_number(current_line
))
165 parse_name(current_line
);
168 if(!current_line
.lexemes
.empty())
170 current_line
.line
= line
;
171 lines
.push_back(current_line
);
175 std::string
visualise_lexemes(lines_of_code
& lines
)
179 BOOST_FOREACH(line_of_code
& current_line
, lines
)
181 std::string number_string
= ail::number_to_string(current_line
.line
);
182 for(word i
= 0, end
= 5 - number_string
.size(); i
< end
; i
++)
184 output
+= number_string
;
186 for(uword indentation
= 0; indentation
< current_line
.indentation_level
; indentation
++)
189 BOOST_FOREACH(lexeme
& current_lexeme
, current_line
.lexemes
)
195 output
+= "[" + current_lexeme
.to_string() + "]";
203 bool lexer::parse(std::string
& error
)
210 catch(ail::exception
& exception
)
212 error
= exception
.get_message();