1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
13 #include "go-linemap.h"
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
23 KEYWORD_INVALID
, // Not a keyword.
52 // A token returned from the lexer.
57 // Token classification.
62 // Token indicates end of input.
64 // Token is a keyword.
66 // Token is an identifier.
68 // Token is a string of characters.
70 // Token is an operator.
72 // Token is a character constant.
74 // Token is an integer.
76 // Token is a floating point number.
78 // Token is an imaginary number.
84 Token
& operator=(const Token
&);
86 // Get token classification.
88 classification() const
89 { return this->classification_
; }
91 // Make a token for an invalid value.
93 make_invalid_token(Location location
)
94 { return Token(TOKEN_INVALID
, location
); }
96 // Make a token representing end of file.
98 make_eof_token(Location location
)
99 { return Token(TOKEN_EOF
, location
); }
101 // Make a keyword token.
103 make_keyword_token(Keyword keyword
, Location location
)
105 Token
tok(TOKEN_KEYWORD
, location
);
106 tok
.u_
.keyword
= keyword
;
110 // Make an identifier token.
112 make_identifier_token(const std::string
& value
, bool is_exported
,
115 Token
tok(TOKEN_IDENTIFIER
, location
);
116 tok
.u_
.identifier_value
.name
= new std::string(value
);
117 tok
.u_
.identifier_value
.is_exported
= is_exported
;
121 // Make a quoted string token.
123 make_string_token(const std::string
& value
, Location location
)
125 Token
tok(TOKEN_STRING
, location
);
126 tok
.u_
.string_value
= new std::string(value
);
130 // Make an operator token.
132 make_operator_token(Operator op
, Location location
)
134 Token
tok(TOKEN_OPERATOR
, location
);
139 // Make a character constant token.
141 make_character_token(mpz_t val
, Location location
)
143 Token
tok(TOKEN_CHARACTER
, location
);
144 mpz_init(tok
.u_
.integer_value
);
145 mpz_swap(tok
.u_
.integer_value
, val
);
149 // Make an integer token.
151 make_integer_token(mpz_t val
, Location location
)
153 Token
tok(TOKEN_INTEGER
, location
);
154 mpz_init(tok
.u_
.integer_value
);
155 mpz_swap(tok
.u_
.integer_value
, val
);
159 // Make a float token.
161 make_float_token(mpfr_t val
, Location location
)
163 Token
tok(TOKEN_FLOAT
, location
);
164 mpfr_init(tok
.u_
.float_value
);
165 mpfr_swap(tok
.u_
.float_value
, val
);
169 // Make a token for an imaginary number.
171 make_imaginary_token(mpfr_t val
, Location location
)
173 Token
tok(TOKEN_IMAGINARY
, location
);
174 mpfr_init(tok
.u_
.float_value
);
175 mpfr_swap(tok
.u_
.float_value
, val
);
179 // Get the location of the token.
182 { return this->location_
; }
184 // Return whether this is an invalid token.
187 { return this->classification_
== TOKEN_INVALID
; }
189 // Return whether this is the EOF token.
192 { return this->classification_
== TOKEN_EOF
; }
194 // Return the keyword value for a keyword token.
198 go_assert(this->classification_
== TOKEN_KEYWORD
);
199 return this->u_
.keyword
;
202 // Return whether this is an identifier.
204 is_identifier() const
205 { return this->classification_
== TOKEN_IDENTIFIER
; }
207 // Return the identifier.
211 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
212 return *this->u_
.identifier_value
.name
;
215 // Return whether the identifier is exported.
217 is_identifier_exported() const
219 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
220 return this->u_
.identifier_value
.is_exported
;
223 // Return whether this is a string.
227 return this->classification_
== TOKEN_STRING
;
230 // Return the value of a string. The returned value is a string of
235 go_assert(this->classification_
== TOKEN_STRING
);
236 return *this->u_
.string_value
;
239 // Return the value of a character constant.
241 character_value() const
243 go_assert(this->classification_
== TOKEN_CHARACTER
);
244 return &this->u_
.integer_value
;
247 // Return the value of an integer.
249 integer_value() const
251 go_assert(this->classification_
== TOKEN_INTEGER
);
252 return &this->u_
.integer_value
;
255 // Return the value of a float.
259 go_assert(this->classification_
== TOKEN_FLOAT
);
260 return &this->u_
.float_value
;
263 // Return the value of an imaginary number.
265 imaginary_value() const
267 go_assert(this->classification_
== TOKEN_IMAGINARY
);
268 return &this->u_
.float_value
;
271 // Return the operator value for an operator token.
275 go_assert(this->classification_
== TOKEN_OPERATOR
);
279 // Return whether this token is KEYWORD.
281 is_keyword(Keyword keyword
) const
283 return (this->classification_
== TOKEN_KEYWORD
284 && this->u_
.keyword
== keyword
);
287 // Return whether this token is OP.
289 is_op(Operator op
) const
290 { return this->classification_
== TOKEN_OPERATOR
&& this->u_
.op
== op
; }
292 // Print the token for debugging.
297 // Private constructor used by make_..._token functions above.
298 Token(Classification
, Location
);
304 // The token classification.
305 Classification classification_
;
308 // The keyword value for TOKEN_KEYWORD.
310 // The token value for TOKEN_IDENTIFIER.
313 // The name of the identifier. This has been mangled to only
314 // include ASCII characters.
316 // Whether this name should be exported. This is true if the
317 // first letter in the name is upper case.
320 // The string value for TOKEN_STRING.
321 std::string
* string_value
;
322 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
324 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
326 // The token value for TOKEN_OPERATOR or the keyword value
329 // The source location.
338 Lex(const char* input_file_name
, FILE* input_file
, Linemap
*linemap
);
342 // Return the next token.
346 // Return the contents of any current //extern comment.
349 { return this->extern_
; }
351 // Return whether we have seen a //go:nointerface comment, clearing
354 get_and_clear_nointerface()
356 bool ret
= this->saw_nointerface_
;
357 this->saw_nointerface_
= false;
361 // Return whether the identifier NAME should be exported. NAME is a
362 // mangled name which includes only ASCII characters.
364 is_exported_name(const std::string
& name
);
366 // Return whether the identifier NAME is invalid. When we see an
367 // invalid character we still build an identifier, but we use a
368 // magic string to indicate that the identifier is invalid. We then
369 // use this to avoid knockon errors.
371 is_invalid_identifier(const std::string
& name
);
373 // A helper function. Append V to STR. IS_CHARACTER is true if V
374 // is a Unicode character which should be converted into UTF-8,
375 // false if it is a byte value to be appended directly. The
376 // location is used to warn about an out of range character.
378 append_char(unsigned int v
, bool is_charater
, std::string
* str
,
381 // A helper function. Fetch a UTF-8 character from STR and store it
382 // in *VALUE. Return the number of bytes read from STR. Return 0
383 // if STR does not point to a valid UTF-8 character.
385 fetch_char(const char* str
, unsigned int *value
);
387 // Return whether C is a Unicode or "C" locale space character.
389 is_unicode_space(unsigned int c
);
398 // The current location.
402 // A position CHARS column positions before the current location.
404 earlier_location(int chars
) const;
415 { return Token::make_invalid_token(this->location()); }
419 { return Token::make_eof_token(this->location()); }
422 make_operator(Operator op
, int chars
)
423 { return Token::make_operator_token(op
, this->earlier_location(chars
)); }
429 could_be_exponent(const char*, const char*);
444 advance_one_utf8_char(const char*, unsigned int*, bool*);
447 advance_one_char(const char*, bool, unsigned int*, bool*);
450 is_unicode_digit(unsigned int c
);
453 is_unicode_letter(unsigned int c
);
456 is_unicode_uppercase(unsigned int c
);
459 is_in_unicode_range(unsigned int C
, const Unicode_range
* ranges
,
463 three_character_operator(char, char, char);
466 two_character_operator(char, char);
469 one_character_operator(char);
477 // The input file name.
478 const char* input_file_name_
;
481 // The object used to keep track of file names and line numbers.
483 // The line buffer. This holds the current line.
485 // The size of the line buffer.
487 // The nmber of characters in the current line.
489 // The current offset in linebuf_.
491 // The current line number.
493 // Whether to add a semicolon if we see a newline now.
494 bool add_semi_at_eol_
;
495 // Whether we just saw a magic go:nointerface comment.
496 bool saw_nointerface_
;
497 // The external name to use for a function declaration, from a magic
502 #endif // !defined(GO_LEX_H)