Still working on the parse tree, added lexeme groups
[fridhskrift.git] / frith / lexer.hpp
bloba184e960f8a6c09a496f5875eae4a4ccc14f9594
1 #pragma once
3 #include <string>
4 #include <vector>
5 #include <ail/types.hpp>
6 #include <frith/symbol.hpp>
7 #include <boost/thread/mutex.hpp>
9 namespace frith
11 namespace lexeme_type
13 enum type
15 name,
17 boolean,
18 signed_integer,
19 unsigned_integer,
20 floating_point_value,
21 string,
23 addition,
24 subtraction,
25 multiplication,
26 division,
27 modulo,
28 exponentiation,
30 assignment,
31 addition_assignment,
32 subtraction_assignment,
33 multiplication_assignment,
34 division_assignment,
35 modulo_assignment,
36 exponentiation_assignment,
38 increment,
39 decrement,
41 less_than,
42 less_than_or_equal,
43 greater_than,
44 greater_than_or_equal,
45 unequal,
46 equal,
48 logical_not,
50 logical_and,
51 logical_or,
53 shift_left,
54 shift_right,
56 binary_and,
57 binary_or,
58 binary_xor,
60 binary_not,
62 bracket_start,
63 bracket_start_call,
64 bracket_end,
66 array_start,
67 array_end,
69 scope_start,
70 scope_end,
72 iteration,
73 iterator,
75 function_declaration,
76 anonymous_function_declaration,
78 class_operator,
79 selection_operator,
80 call_operator,
81 scope_operator,
85 namespace lexeme_group
87 enum type
89 argument,
90 unary_operator,
91 binary_operator
95 struct lexeme;
97 typedef std::vector<lexeme> lexeme_container;
99 struct lexeme
101 lexeme_type::type type;
102 union
104 bool boolean;
105 types::signed_integer signed_integer;
106 types::unsigned_integer unsigned_integer;
107 types::floating_point_value floating_point_value;
108 std::string * string;
111 lexeme();
112 lexeme(lexeme_type::type type);
113 explicit lexeme(types::boolean boolean);
114 explicit lexeme(types::signed_integer signed_integer);
115 explicit lexeme(types::unsigned_integer unsigned_integer);
116 explicit lexeme(types::floating_point_value floating_point_value);
117 explicit lexeme(lexeme_type::type type, std::string const & string);
118 std::string to_string() const;
121 struct line_of_code
123 uword line;
124 uword indentation_level;
125 lexeme_container lexemes;
127 line_of_code();
130 struct operator_lexeme
132 lexeme_type::type lexeme;
133 std::string string;
135 operator_lexeme(lexeme_type::type lexeme, std::string const & string);
136 bool operator<(operator_lexeme const & other) const;
139 class lexer
141 public:
142 lexer(std::string const & input, std::vector<line_of_code> & lines, std::string & error);
144 bool parse();
146 private:
147 std::string const & input;
148 std::vector<line_of_code> & lines;
149 std::string & error;
151 uword line;
152 std::size_t
154 end,
155 line_offset;
156 line_of_code current_line;
157 bool is_call_bracket;
159 bool parse_operator(line_of_code & output);
160 bool parse_string(line_of_code & output, std::string & error_message, std::string error_prefix = "");
161 bool parse_number(line_of_code & output, bool & error_occured);
162 void parse_name(line_of_code & output);
163 bool parse_comment(std::string & error_message);
165 std::string lexer_error(std::string const & message, uword error_line = 0);
166 std::string number_parsing_error(std::string const & message, bool & error_occured);
168 bool is_name_char(char input);
169 bool string_match(std::string const & target);
170 void process_newline();
173 std::string visualise_lexemes(std::vector<line_of_code> & lines);
175 void initialise_tables();
177 bool get_lexeme_group(lexeme_type::type input, lexeme_group::type & output);
179 extern boost::mutex table_mutex;
180 extern std::vector<operator_lexeme> operator_lexeme_data;