The dot lexeme is now properly converted to a binary selection operator
[fridhskrift.git] / fridh / lexer.hpp
blob6fadb6facf7b90faff681f1d732e53fc45c41838
1 #pragma once
3 #include <string>
4 #include <vector>
5 #include <ail/types.hpp>
6 #include <fridh/symbol.hpp>
7 #include <fridh/construction.hpp>
8 #include <boost/thread/mutex.hpp>
10 namespace fridh
12 namespace lexeme_type
14 enum type
16 uninitialised,
18 non_terminating_placeholder,
20 name,
22 nil,
23 boolean,
24 signed_integer,
25 unsigned_integer,
26 floating_point_value,
27 string,
29 addition,
30 subtraction,
31 multiplication,
32 division,
33 modulo,
34 exponentiation,
36 negation,
38 assignment,
39 addition_assignment,
40 subtraction_assignment,
41 multiplication_assignment,
42 division_assignment,
43 modulo_assignment,
44 exponentiation_assignment,
46 increment,
47 decrement,
49 less_than,
50 less_than_or_equal,
51 greater_than,
52 greater_than_or_equal,
53 not_equal,
54 equal,
56 logical_not,
58 logical_and,
59 logical_or,
61 shift_left,
62 shift_right,
64 binary_and,
65 binary_or,
66 binary_xor,
68 binary_not,
70 bracket_start,
71 bracket_end,
73 array_start,
74 array_end,
76 scope_start,
77 scope_end,
79 iteration,
80 iterator,
82 while_operator,
84 function_declaration,
85 anonymous_function_declaration,
87 scope_operator,
88 class_operator,
90 dot,
92 call_operator,
93 spaced_call_operator,
97 namespace lexeme_group
99 enum type
101 argument,
102 unary_operator,
103 binary_operator,
105 call_operator,
107 post_fix_operator,
111 struct lexeme;
112 struct line_of_code;
114 typedef std::vector<lexeme> lexeme_container;
115 typedef std::vector<line_of_code> lines_of_code;
117 struct lexeme
119 lexeme_type::type type;
120 union
122 bool boolean;
123 types::signed_integer signed_integer;
124 types::unsigned_integer unsigned_integer;
125 types::floating_point_value floating_point_value;
126 std::string * string;
129 lexeme();
130 lexeme(lexeme const & other);
131 lexeme(lexeme_type::type type);
132 explicit lexeme(types::boolean boolean);
133 explicit lexeme(types::signed_integer signed_integer);
134 explicit lexeme(types::unsigned_integer unsigned_integer);
135 explicit lexeme(types::floating_point_value floating_point_value);
136 explicit lexeme(std::string const & string);
138 ~lexeme();
140 std::string to_string() const;
142 lexeme & operator=(lexeme const & other);
144 void copy(lexeme const & other);
145 void destroy();
147 bool is_string() const;
150 struct line_of_code
152 uword line;
153 uword indentation_level;
154 lexeme_container lexemes;
156 line_of_code();
159 struct operator_lexeme
161 lexeme_type::type lexeme;
162 std::string string;
164 operator_lexeme(lexeme_type::type lexeme, std::string const & string);
165 bool operator<(operator_lexeme const & other) const;
168 class lexer
170 public:
171 lexer(std::string const & input, lines_of_code & lines);
172 bool parse(std::string & error);
174 private:
175 std::string const & input;
176 lines_of_code & lines;
178 uword line;
179 std::size_t
181 end,
182 line_offset;
183 line_of_code current_line;
185 bool parse_operator(line_of_code & output);
186 void parse_string(line_of_code & output);
187 bool parse_number(line_of_code & output);
188 void parse_name(line_of_code & output);
189 void parse_comment();
191 void lexer_error(std::string const & message, uword error_line = 0);
192 void number_parsing_error(std::string const & message);
194 bool is_name_char(char input);
195 bool string_match(std::string const & target);
196 void process_newline();
198 void parse_lexemes();
201 std::string visualise_lexemes(lines_of_code & lines);
203 void initialise_tables();
205 bool get_lexeme_group(lexeme_type::type input, lexeme_group::type & output);
207 extern boost::mutex table_mutex;
208 extern std::vector<operator_lexeme> operator_lexeme_data;