1 #include <frith/lexer.hpp>
2 #include <ail/array.hpp>
3 #include <ail/string.hpp>
4 #include <boost/foreach.hpp>
8 boost::mutex table_mutex
;
10 line_of_code::line_of_code():
15 operator_lexeme::operator_lexeme(lexeme_type lexeme
, std::string
const & string
):
21 bool operator_lexeme::operator<(operator_lexeme
const & other
) const
23 return string
.length() > other
.string
.length();
26 lexer::lexer(std::string
const & input
, std::vector
<line_of_code
> & lines
, std::string
& error
):
33 bool lexer::parse_operator(line_of_code
& output
)
35 std::size_t remaining_characters
= end
- i
;
37 BOOST_FOREACH(operator_lexeme
& current_lexeme
, operator_lexeme_data
)
39 std::size_t operator_length
= current_lexeme
.string
.size();
40 if(remaining_characters
< operator_length
)
43 std::string substring
= input
.substr(i
, operator_length
);
45 if(substring
== current_lexeme
.string
)
47 output
.lexemes
.push_back(lexeme(current_lexeme
.lexeme
));
55 std::string
lexer::lexer_error(std::string
const & message
, uword error_line
)
59 return "Line " + ail::number_to_string
<uword
>(error_line
) + ": " + message
;
62 std::string
lexer::number_parsing_error(std::string
const & message
, bool & error_occured
)
65 return lexer_error(message
);
68 bool lexer::is_name_char(char input
)
70 return ail::is_alpha(input
) || ail::is_digit(input
) || input
== '_';
73 void lexer::parse_name(line_of_code
& output
)
75 std::size_t start
= i
;
76 for(i
++; i
< end
&& is_name_char(input
[i
]); i
++);
77 std::string name
= input
.substr(start
, i
- start
);
79 lexeme current_lexeme
;
81 current_lexeme
= lexeme(true);
82 else if(name
== "false")
83 current_lexeme
= lexeme(false);
85 current_lexeme
= lexeme(lexeme_type_name
, name
);
87 output
.lexemes
.push_back(current_lexeme
);
90 bool lexer::string_match(std::string
const & target
)
92 if(end
- i
< target
.size())
95 return input
.substr(i
, target
.size()) == target
;
98 void lexer::process_newline()
100 if(!current_line
.lexemes
.empty())
102 current_line
.line
= line
;
103 lines
.push_back(current_line
);
105 std::string line_string
= input
.substr(line_offset
, i
- line_offset
);
106 current_line
= line_of_code();
120 for(i
= 0, end
= input
.size(); i
< end
;)
122 if(parse_operator(current_line
))
125 char const tab
= '\t';
127 char byte
= input
[i
];
131 if(current_line
.indentation_level
> 0)
133 error
= lexer_error("Tabs are only permitted at the beginning of a line (offset " + ail::number_to_string(i
- line_offset
+ 1) + ")");
136 for(i
++, current_line
.indentation_level
= 1; i
< end
&& input
[i
] == tab
; i
++, current_line
.indentation_level
++);
154 if(!parse_string(current_line
, error
))
160 if(!parse_comment(error
))
166 if(parse_number(current_line
, error_occured
))
172 parse_name(current_line
);
175 if(!current_line
.lexemes
.empty())
177 current_line
.line
= line
;
178 lines
.push_back(current_line
);
184 std::string
visualise_lexemes(std::vector
<line_of_code
> & lines
)
188 BOOST_FOREACH(line_of_code
& current_line
, lines
)
190 std::string number_string
= ail::number_to_string(current_line
.line
);
191 for(word i
= 0, end
= 5 - number_string
.size(); i
< end
; i
++)
193 output
+= number_string
;
195 for(uword indentation
= 0; indentation
< current_line
.indentation_level
; indentation
++)
198 BOOST_FOREACH(lexeme
& current_lexeme
, current_line
.lexemes
)
204 output
+= "[" + current_lexeme
.to_string() + "]";