Implemented the one-liner operators ` ( )
[fridhskrift.git] / lexer / lexer.cpp
bloba4e1bc64d3485b91d72da0f88084cbfa7caef9af
1 #include <fridh/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
6 namespace fridh
8 boost::mutex table_mutex;
10 line_of_code::line_of_code():
11 indentation_level(0)
15 operator_lexeme::operator_lexeme(lexeme_type::type lexeme, std::string const & string):
16 lexeme(lexeme),
17 string(string)
21 bool operator_lexeme::operator<(operator_lexeme const & other) const
23 return string.length() > other.string.length();
26 lexer::lexer(std::string const & input, lines_of_code & lines):
27 input(input),
28 lines(lines)
32 bool lexer::parse_operator(line_of_code & output)
34 std::size_t remaining_characters = end - i;
36 BOOST_FOREACH(operator_lexeme & current_lexeme, operator_lexeme_data)
38 std::size_t operator_length = current_lexeme.string.size();
39 if(remaining_characters < operator_length)
40 return false;
42 std::string substring = input.substr(i, operator_length);
44 if(substring == current_lexeme.string)
46 output.lexemes.push_back(current_lexeme.lexeme);
47 i += operator_length;
48 return true;
51 return false;
54 void lexer::lexer_error(std::string const & message, uword error_line)
56 if(error_line == 0)
57 error_line = line;
58 throw ail::exception("Lexer error: Line " + ail::number_to_string<uword>(error_line) + ": " + message);
61 void lexer::number_parsing_error(std::string const & message)
63 lexer_error(message);
66 bool lexer::is_name_char(char input)
68 return ail::is_alpha(input) || ail::is_digit(input) || input == '_';
71 void lexer::parse_name(line_of_code & output)
73 std::size_t start = i;
74 for(i++; i < end && is_name_char(input[i]); i++);
75 std::string name = input.substr(start, i - start);
77 lexeme current_lexeme;
78 if(name == "true")
79 current_lexeme = lexeme(true);
80 else if(name == "false")
81 current_lexeme = lexeme(false);
82 else if(name == "nil")
83 current_lexeme.type = lexeme_type::nil;
84 else
86 current_lexeme = lexeme(name);
87 current_lexeme.type = lexeme_type::name;
90 output.lexemes.push_back(current_lexeme);
93 bool lexer::string_match(std::string const & target)
95 if(end - i < target.size())
96 return false;
98 return input.substr(i, target.size()) == target;
101 void lexer::process_newline(bool next_line)
103 if(!current_line.lexemes.empty())
105 current_line.line = line;
106 lines.push_back(current_line);
108 std::string line_string = input.substr(line_offset, i - line_offset);
109 current_line = line_of_code();
111 i++;
112 line_offset = i;
114 if(next_line)
115 line++;
116 else
118 //skip initial spaces after ` and (
119 for(; i < end && input[i] == ' '; i++);
123 void lexer::process_one_liner(word summand)
125 uword indentation_level = current_line.indentation_level;
126 process_newline(false);
127 current_line.indentation_level = indentation_level + summand;
130 void lexer::parse_lexemes()
132 initialise_tables();
134 line = 1;
136 line_offset = 0;
138 for(i = 0, end = input.size(); i < end;)
140 if(parse_operator(current_line))
141 continue;
143 char const tab = '\t';
145 char byte = input[i];
147 switch(byte)
149 case tab:
150 if(current_line.indentation_level > 0)
151 lexer_error("Tabs are only permitted in the beginning of a line (offset " + ail::number_to_string(i - line_offset + 1) + ")");
152 for(i++, current_line.indentation_level = 1; i < end && input[i] == tab; i++, current_line.indentation_level++);
153 continue;
155 case ' ':
156 case '\r':
157 i++;
158 continue;
160 case '\n':
162 process_newline();
163 continue;
166 case '\'':
167 case '"':
169 std::string string;
170 parse_string(current_line);
171 continue;
174 case ';':
175 parse_comment();
176 continue;
178 case '`':
179 process_newline(false);
180 continue;
182 case '(':
183 process_one_liner(1);
184 continue;
186 case ')':
187 process_one_liner(-1);
188 continue;
191 if(parse_number(current_line))
192 continue;
194 parse_name(current_line);
197 if(!current_line.lexemes.empty())
199 current_line.line = line;
200 lines.push_back(current_line);
204 std::string visualise_lexemes(lines_of_code & lines)
206 std::string output;
208 BOOST_FOREACH(line_of_code & current_line, lines)
210 std::string number_string = ail::number_to_string(current_line.line);
211 for(word i = 0, end = 5 - number_string.size(); i < end; i++)
212 output += " ";
213 output += number_string;
214 output += ": ";
215 for(uword indentation = 0; indentation < current_line.indentation_level; indentation++)
216 output += " ";
217 bool first = true;
218 BOOST_FOREACH(lexeme & current_lexeme, current_line.lexemes)
220 if(first)
221 first = false;
222 else
223 output += " ";
224 output += "[" + current_lexeme.to_string() + "]";
226 output += "\n";
229 return output;
232 bool lexer::parse(std::string & error)
236 parse_lexemes();
237 return true;
239 catch(ail::exception & exception)
241 error = exception.get_message();
242 return false;