Made a lot of progress on the class and function code
[fridhskrift.git] / lexer / lexer.cpp
blob686eb29a8a999a2c5c82d382b59784cdab9cbe63
1 #include <frith/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
6 namespace frith
8 boost::mutex table_mutex;
10 line_of_code::line_of_code():
11 indentation_level(0)
15 operator_lexeme_type::operator_lexeme(lexeme_type::type lexeme, std::string const & string):
16 lexeme(lexeme),
17 string(string)
21 bool operator_lexeme_type::operator<(operator_lexeme const & other) const
23 return string.length() > other.string.length();
26 lexer::lexer(std::string const & input, std::vector<line_of_code> & lines, std::string & error):
27 input(input),
28 lines(lines),
29 error(error)
33 bool lexer::parse_operator(line_of_code & output)
35 std::size_t remaining_characters = end - i;
37 BOOST_FOREACH(operator_lexeme & current_lexeme, operator_lexeme_data)
39 std::size_t operator_length = current_lexeme.string.size();
40 if(remaining_characters < operator_length)
41 return false;
43 std::string substring = input.substr(i, operator_length);
45 if(substring == current_lexeme.string)
47 output.lexemes.push_back(lexeme(current_lexeme.lexeme));
48 i += operator_length;
49 return true;
52 return false;
55 std::string lexer::lexer_error(std::string const & message, uword error_line)
57 if(error_line == 0)
58 error_line = line;
59 return "Line " + ail::number_to_string<uword>(error_line) + ": " + message;
62 std::string lexer::number_parsing_error(std::string const & message, bool & error_occured)
64 error_occured = true;
65 return lexer_error(message);
68 bool lexer::is_name_char(char input)
70 return ail::is_alpha(input) || ail::is_digit(input) || input == '_';
73 void lexer::parse_name(line_of_code & output)
75 std::size_t start = i;
76 for(i++; i < end && is_name_char(input[i]); i++);
77 std::string name = input.substr(start, i - start);
79 lexeme current_lexeme;
80 if(name == "true")
81 current_lexeme = lexeme(true);
82 else if(name == "false")
83 current_lexeme = lexeme(false);
84 else
85 current_lexeme = lexeme(name, name);
87 output.lexemes.push_back(current_lexeme);
90 bool lexer::string_match(std::string const & target)
92 if(end - i < target.size())
93 return false;
95 return input.substr(i, target.size()) == target;
98 void lexer::process_newline()
100 if(!current_line.lexemes.empty())
102 current_line.line = line;
103 lines.push_back(current_line);
105 std::string line_string = input.substr(line_offset, i - line_offset);
106 current_line = line_of_code();
107 line++;
108 i++;
109 line_offset = i;
112 bool lexer::parse()
114 initialise_tables();
116 line = 1;
118 line_offset = 0;
120 for(i = 0, end = input.size(); i < end;)
122 if(parse_operator(current_line))
123 continue;
125 char const tab = '\t';
127 char byte = input[i];
128 switch(byte)
130 case tab:
131 if(current_line.indentation_level > 0)
133 error = lexer_error("Tabs are only permitted at the beginning of a line (offset " + ail::number_to_string(i - line_offset + 1) + ")");
134 return false;
136 for(i++, current_line.indentation_level = 1; i < end && input[i] == tab; i++, current_line.indentation_level++);
137 continue;
139 case ' ':
140 case '\r':
141 i++;
142 continue;
144 case '\n':
146 process_newline();
147 continue;
150 case '\'':
151 case '"':
153 std::string string;
154 if(!parse_string(current_line, error))
155 return false;
156 continue;
159 case ';':
160 if(!parse_comment(error))
161 return false;
162 continue;
165 bool error_occured;
166 if(parse_number(current_line, error_occured))
167 continue;
169 if(error_occured)
170 return false;
172 parse_name(current_line);
175 if(!current_line.lexemes.empty())
177 current_line.line = line;
178 lines.push_back(current_line);
181 return true;
184 std::string visualise_lexemes(std::vector<line_of_code> & lines)
186 std::string output;
188 BOOST_FOREACH(line_of_code & current_line, lines)
190 std::string number_string = ail::number_to_string(current_line.line);
191 for(word i = 0, end = 5 - number_string.size(); i < end; i++)
192 output += " ";
193 output += number_string;
194 output += ": ";
195 for(uword indentation = 0; indentation < current_line.indentation_level; indentation++)
196 output += " ";
197 bool first = true;
198 BOOST_FOREACH(lexeme & current_lexeme, current_line.lexemes)
200 if(first)
201 first = false;
202 else
203 output += " ";
204 output += "[" + current_lexeme.to_string() + "]";
206 output += "\n";
209 return output;