Fixed the string/name mixup. It was caused by a simplification of the string lexeme...
[fridhskrift.git] / lexer / lexer.cpp
blob4dddb0e38a4992a9b30096482f547bb0eafa0657
1 #include <fridh/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
6 namespace fridh
8 boost::mutex table_mutex;
10 line_of_code::line_of_code():
11 indentation_level(0)
15 operator_lexeme::operator_lexeme(lexeme_type::type lexeme, std::string const & string):
16 lexeme(lexeme),
17 string(string)
21 bool operator_lexeme::operator<(operator_lexeme const & other) const
23 return string.length() > other.string.length();
26 lexer::lexer(std::string const & input, lines_of_code & lines):
27 input(input),
28 lines(lines)
32 bool lexer::parse_operator(line_of_code & output)
34 std::size_t remaining_characters = end - i;
36 BOOST_FOREACH(operator_lexeme & current_lexeme, operator_lexeme_data)
38 std::size_t operator_length = current_lexeme.string.size();
39 if(remaining_characters < operator_length)
40 return false;
42 std::string substring = input.substr(i, operator_length);
44 if(substring == current_lexeme.string)
46 output.lexemes.push_back(current_lexeme.lexeme);
47 i += operator_length;
48 return true;
51 return false;
54 void lexer::lexer_error(std::string const & message, uword error_line)
56 if(error_line == 0)
57 error_line = line;
58 throw ail::exception("Lexer error: Line " + ail::number_to_string<uword>(error_line) + ": " + message);
61 void lexer::number_parsing_error(std::string const & message)
63 lexer_error(message);
66 bool lexer::is_name_char(char input)
68 return ail::is_alpha(input) || ail::is_digit(input) || input == '_';
71 void lexer::parse_name(line_of_code & output)
73 std::size_t start = i;
74 for(i++; i < end && is_name_char(input[i]); i++);
75 std::string name = input.substr(start, i - start);
77 lexeme current_lexeme;
78 if(name == "true")
79 current_lexeme = lexeme(true);
80 else if(name == "false")
81 current_lexeme = lexeme(false);
82 else
84 current_lexeme = lexeme(name);
85 current_lexeme.type = lexeme_type::name;
88 output.lexemes.push_back(current_lexeme);
91 bool lexer::string_match(std::string const & target)
93 if(end - i < target.size())
94 return false;
96 return input.substr(i, target.size()) == target;
99 void lexer::process_newline()
101 if(!current_line.lexemes.empty())
103 current_line.line = line;
104 lines.push_back(current_line);
106 std::string line_string = input.substr(line_offset, i - line_offset);
107 current_line = line_of_code();
108 line++;
109 i++;
110 line_offset = i;
113 void lexer::parse_lexemes()
115 initialise_tables();
117 line = 1;
119 line_offset = 0;
121 for(i = 0, end = input.size(); i < end;)
123 if(parse_operator(current_line))
124 continue;
126 char const tab = '\t';
128 char byte = input[i];
130 switch(byte)
132 case tab:
133 if(current_line.indentation_level > 0)
134 lexer_error("Tabs are only permitted in the beginning of a line (offset " + ail::number_to_string(i - line_offset + 1) + ")");
135 for(i++, current_line.indentation_level = 1; i < end && input[i] == tab; i++, current_line.indentation_level++);
136 continue;
138 case ' ':
139 case '\r':
140 i++;
141 continue;
143 case '\n':
145 process_newline();
146 continue;
149 case '\'':
150 case '"':
152 std::string string;
153 parse_string(current_line);
154 continue;
157 case ';':
158 parse_comment();
159 continue;
162 if(parse_number(current_line))
163 continue;
165 parse_name(current_line);
168 if(!current_line.lexemes.empty())
170 current_line.line = line;
171 lines.push_back(current_line);
175 std::string visualise_lexemes(lines_of_code & lines)
177 std::string output;
179 BOOST_FOREACH(line_of_code & current_line, lines)
181 std::string number_string = ail::number_to_string(current_line.line);
182 for(word i = 0, end = 5 - number_string.size(); i < end; i++)
183 output += " ";
184 output += number_string;
185 output += ": ";
186 for(uword indentation = 0; indentation < current_line.indentation_level; indentation++)
187 output += " ";
188 bool first = true;
189 BOOST_FOREACH(lexeme & current_lexeme, current_line.lexemes)
191 if(first)
192 first = false;
193 else
194 output += " ";
195 output += "[" + current_lexeme.to_string() + "]";
197 output += "\n";
200 return output;
203 bool lexer::parse(std::string & error)
207 parse_lexemes();
208 return true;
210 catch(ail::exception & exception)
212 error = exception.get_message();
213 return false;