1 #include <fridh/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
8 boost::mutex table_mutex
;
10 line_of_code::line_of_code():
15 operator_lexeme::operator_lexeme(lexeme_type::type lexeme
, std::string
const & string
):
21 bool operator_lexeme::operator<(operator_lexeme
const & other
) const
23 return string
.length() > other
.string
.length();
26 lexer::lexer(std::string
const & input
, lines_of_code
& lines
):
32 bool lexer::parse_operator(line_of_code
& output
)
34 std::size_t remaining_characters
= end
- i
;
36 BOOST_FOREACH(operator_lexeme
& current_lexeme
, operator_lexeme_data
)
38 std::size_t operator_length
= current_lexeme
.string
.size();
39 if(remaining_characters
< operator_length
)
42 std::string substring
= input
.substr(i
, operator_length
);
44 if(substring
== current_lexeme
.string
)
46 output
.lexemes
.push_back(current_lexeme
.lexeme
);
54 void lexer::lexer_error(std::string
const & message
, uword error_line
)
58 throw ail::exception("Lexer error: Line " + ail::number_to_string
<uword
>(error_line
) + ": " + message
);
61 void lexer::number_parsing_error(std::string
const & message
)
66 bool lexer::is_name_char(char input
)
68 return ail::is_alpha(input
) || ail::is_digit(input
) || input
== '_';
71 void lexer::parse_name(line_of_code
& output
)
73 std::size_t start
= i
;
74 for(i
++; i
< end
&& is_name_char(input
[i
]); i
++);
75 std::string name
= input
.substr(start
, i
- start
);
77 lexeme current_lexeme
;
79 current_lexeme
= lexeme(true);
80 else if(name
== "false")
81 current_lexeme
= lexeme(false);
82 else if(name
== "nil")
83 current_lexeme
.type
= lexeme_type::nil
;
86 current_lexeme
= lexeme(name
);
87 current_lexeme
.type
= lexeme_type::name
;
90 output
.lexemes
.push_back(current_lexeme
);
93 bool lexer::string_match(std::string
const & target
)
95 if(end
- i
< target
.size())
98 return input
.substr(i
, target
.size()) == target
;
101 void lexer::process_newline(bool next_line
)
103 if(!current_line
.lexemes
.empty())
105 current_line
.line
= line
;
106 lines
.push_back(current_line
);
108 std::string line_string
= input
.substr(line_offset
, i
- line_offset
);
109 current_line
= line_of_code();
118 //skip initial spaces after ` and (
119 for(; i
< end
&& input
[i
] == ' '; i
++);
123 void lexer::process_one_liner(word summand
)
125 uword indentation_level
= current_line
.indentation_level
;
126 process_newline(false);
127 current_line
.indentation_level
= indentation_level
+ summand
;
130 void lexer::parse_lexemes()
138 for(i
= 0, end
= input
.size(); i
< end
;)
140 if(parse_operator(current_line
))
143 char const tab
= '\t';
145 char byte
= input
[i
];
150 if(current_line
.indentation_level
> 0)
151 lexer_error("Tabs are only permitted in the beginning of a line (offset " + ail::number_to_string(i
- line_offset
+ 1) + ")");
152 for(i
++, current_line
.indentation_level
= 1; i
< end
&& input
[i
] == tab
; i
++, current_line
.indentation_level
++);
170 parse_string(current_line
);
179 process_newline(false);
183 process_one_liner(1);
187 process_one_liner(-1);
191 if(parse_number(current_line
))
194 parse_name(current_line
);
197 if(!current_line
.lexemes
.empty())
199 current_line
.line
= line
;
200 lines
.push_back(current_line
);
204 std::string
visualise_lexemes(lines_of_code
& lines
)
208 BOOST_FOREACH(line_of_code
& current_line
, lines
)
210 std::string number_string
= ail::number_to_string(current_line
.line
);
211 for(word i
= 0, end
= 5 - number_string
.size(); i
< end
; i
++)
213 output
+= number_string
;
215 for(uword indentation
= 0; indentation
< current_line
.indentation_level
; indentation
++)
218 BOOST_FOREACH(lexeme
& current_lexeme
, current_line
.lexemes
)
224 output
+= "[" + current_lexeme
.to_string() + "]";
232 bool lexer::parse(std::string
& error
)
239 catch(ail::exception
& exception
)
241 error
= exception
.get_message();