Added parser testing code, debugging commences, fixed one initialisation bug
[fridhskrift.git] / lexer / lexer.cpp
blob8ebf5dcbecb2d79545fa3a576ef56b2feac367e9
1 #include <fridh/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
6 namespace fridh
8 boost::mutex table_mutex;
10 line_of_code::line_of_code():
11 indentation_level(0)
15 operator_lexeme::operator_lexeme(lexeme_type::type lexeme, std::string const & string):
16 lexeme(lexeme),
17 string(string)
21 bool operator_lexeme::operator<(operator_lexeme const & other) const
23 return string.length() > other.string.length();
26 lexer::lexer(std::string const & input, lines_of_code & lines):
27 input(input),
28 lines(lines)
32 bool lexer::parse_operator(line_of_code & output)
34 std::size_t remaining_characters = end - i;
36 BOOST_FOREACH(operator_lexeme & current_lexeme, operator_lexeme_data)
38 std::size_t operator_length = current_lexeme.string.size();
39 if(remaining_characters < operator_length)
40 return false;
42 std::string substring = input.substr(i, operator_length);
44 if(substring == current_lexeme.string)
46 output.lexemes.push_back(current_lexeme.lexeme);
47 i += operator_length;
48 return true;
51 return false;
54 void lexer::lexer_error(std::string const & message, uword error_line)
56 if(error_line == 0)
57 error_line = line;
58 throw ail::exception("Lexer error: Line " + ail::number_to_string<uword>(error_line) + ": " + message);
61 void lexer::number_parsing_error(std::string const & message)
63 lexer_error(message);
66 bool lexer::is_name_char(char input)
68 return ail::is_alpha(input) || ail::is_digit(input) || input == '_';
71 void lexer::parse_name(line_of_code & output)
73 std::size_t start = i;
74 for(i++; i < end && is_name_char(input[i]); i++);
75 std::string name = input.substr(start, i - start);
77 lexeme current_lexeme;
78 if(name == "true")
79 current_lexeme = lexeme(true);
80 else if(name == "false")
81 current_lexeme = lexeme(false);
82 else if(name == "nil")
83 current_lexeme.type = lexeme_type::nil;
84 else
86 current_lexeme = lexeme(name);
87 current_lexeme.type = lexeme_type::name;
90 output.lexemes.push_back(current_lexeme);
93 bool lexer::string_match(std::string const & target)
95 if(end - i < target.size())
96 return false;
98 return input.substr(i, target.size()) == target;
101 void lexer::process_newline()
103 if(!current_line.lexemes.empty())
105 current_line.line = line;
106 lines.push_back(current_line);
108 std::string line_string = input.substr(line_offset, i - line_offset);
109 current_line = line_of_code();
110 line++;
111 i++;
112 line_offset = i;
115 void lexer::parse_lexemes()
117 initialise_tables();
119 line = 1;
121 line_offset = 0;
123 for(i = 0, end = input.size(); i < end;)
125 if(parse_operator(current_line))
126 continue;
128 char const tab = '\t';
130 char byte = input[i];
132 switch(byte)
134 case tab:
135 if(current_line.indentation_level > 0)
136 lexer_error("Tabs are only permitted in the beginning of a line (offset " + ail::number_to_string(i - line_offset + 1) + ")");
137 for(i++, current_line.indentation_level = 1; i < end && input[i] == tab; i++, current_line.indentation_level++);
138 continue;
140 case ' ':
141 case '\r':
142 i++;
143 continue;
145 case '\n':
147 process_newline();
148 continue;
151 case '\'':
152 case '"':
154 std::string string;
155 parse_string(current_line);
156 continue;
159 case ';':
160 parse_comment();
161 continue;
164 if(parse_number(current_line))
165 continue;
167 parse_name(current_line);
170 if(!current_line.lexemes.empty())
172 current_line.line = line;
173 lines.push_back(current_line);
177 std::string visualise_lexemes(lines_of_code & lines)
179 std::string output;
181 BOOST_FOREACH(line_of_code & current_line, lines)
183 std::string number_string = ail::number_to_string(current_line.line);
184 for(word i = 0, end = 5 - number_string.size(); i < end; i++)
185 output += " ";
186 output += number_string;
187 output += ": ";
188 for(uword indentation = 0; indentation < current_line.indentation_level; indentation++)
189 output += " ";
190 bool first = true;
191 BOOST_FOREACH(lexeme & current_lexeme, current_line.lexemes)
193 if(first)
194 first = false;
195 else
196 output += " ";
197 output += "[" + current_lexeme.to_string() + "]";
199 output += "\n";
202 return output;
205 bool lexer::parse(std::string & error)
209 parse_lexemes();
210 return true;
212 catch(ail::exception & exception)
214 error = exception.get_message();
215 return false;