1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
14 #include "go-linemap.h"
18 // The keywords. These must be in sorted order, other than
19 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
24 KEYWORD_INVALID
, // Not a keyword.
53 // A token returned from the lexer.
58 // Token classification.
63 // Token indicates end of input.
65 // Token is a keyword.
67 // Token is an identifier.
69 // Token is a string of characters.
71 // Token is an operator.
73 // Token is a character constant.
75 // Token is an integer.
77 // Token is a floating point number.
79 // Token is an imaginary number.
85 Token
& operator=(const Token
&);
87 // Get token classification.
89 classification() const
90 { return this->classification_
; }
92 // Make a token for an invalid value.
94 make_invalid_token(Location location
)
95 { return Token(TOKEN_INVALID
, location
); }
97 // Make a token representing end of file.
99 make_eof_token(Location location
)
100 { return Token(TOKEN_EOF
, location
); }
102 // Make a keyword token.
104 make_keyword_token(Keyword keyword
, Location location
)
106 Token
tok(TOKEN_KEYWORD
, location
);
107 tok
.u_
.keyword
= keyword
;
111 // Make an identifier token.
113 make_identifier_token(const std::string
& value
, bool is_exported
,
116 Token
tok(TOKEN_IDENTIFIER
, location
);
117 tok
.u_
.identifier_value
.name
= new std::string(value
);
118 tok
.u_
.identifier_value
.is_exported
= is_exported
;
122 // Make a quoted string token.
124 make_string_token(const std::string
& value
, Location location
)
126 Token
tok(TOKEN_STRING
, location
);
127 tok
.u_
.string_value
= new std::string(value
);
131 // Make an operator token.
133 make_operator_token(Operator op
, Location location
)
135 Token
tok(TOKEN_OPERATOR
, location
);
140 // Make a character constant token.
142 make_character_token(mpz_t val
, Location location
)
144 Token
tok(TOKEN_CHARACTER
, location
);
145 mpz_init(tok
.u_
.integer_value
);
146 mpz_swap(tok
.u_
.integer_value
, val
);
150 // Make an integer token.
152 make_integer_token(mpz_t val
, Location location
)
154 Token
tok(TOKEN_INTEGER
, location
);
155 mpz_init(tok
.u_
.integer_value
);
156 mpz_swap(tok
.u_
.integer_value
, val
);
160 // Make a float token.
162 make_float_token(mpfr_t val
, Location location
)
164 Token
tok(TOKEN_FLOAT
, location
);
165 mpfr_init(tok
.u_
.float_value
);
166 mpfr_swap(tok
.u_
.float_value
, val
);
170 // Make a token for an imaginary number.
172 make_imaginary_token(mpfr_t val
, Location location
)
174 Token
tok(TOKEN_IMAGINARY
, location
);
175 mpfr_init(tok
.u_
.float_value
);
176 mpfr_swap(tok
.u_
.float_value
, val
);
180 // Get the location of the token.
183 { return this->location_
; }
185 // Return whether this is an invalid token.
188 { return this->classification_
== TOKEN_INVALID
; }
190 // Return whether this is the EOF token.
193 { return this->classification_
== TOKEN_EOF
; }
195 // Return the keyword value for a keyword token.
199 go_assert(this->classification_
== TOKEN_KEYWORD
);
200 return this->u_
.keyword
;
203 // Return whether this is an identifier.
205 is_identifier() const
206 { return this->classification_
== TOKEN_IDENTIFIER
; }
208 // Return the identifier.
212 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
213 return *this->u_
.identifier_value
.name
;
216 // Return whether the identifier is exported.
218 is_identifier_exported() const
220 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
221 return this->u_
.identifier_value
.is_exported
;
224 // Return whether this is a string.
228 return this->classification_
== TOKEN_STRING
;
231 // Return the value of a string. The returned value is a string of
236 go_assert(this->classification_
== TOKEN_STRING
);
237 return *this->u_
.string_value
;
240 // Return the value of a character constant.
242 character_value() const
244 go_assert(this->classification_
== TOKEN_CHARACTER
);
245 return &this->u_
.integer_value
;
248 // Return the value of an integer.
250 integer_value() const
252 go_assert(this->classification_
== TOKEN_INTEGER
);
253 return &this->u_
.integer_value
;
256 // Return the value of a float.
260 go_assert(this->classification_
== TOKEN_FLOAT
);
261 return &this->u_
.float_value
;
264 // Return the value of an imaginary number.
266 imaginary_value() const
268 go_assert(this->classification_
== TOKEN_IMAGINARY
);
269 return &this->u_
.float_value
;
272 // Return the operator value for an operator token.
276 go_assert(this->classification_
== TOKEN_OPERATOR
);
280 // Return whether this token is KEYWORD.
282 is_keyword(Keyword keyword
) const
284 return (this->classification_
== TOKEN_KEYWORD
285 && this->u_
.keyword
== keyword
);
288 // Return whether this token is OP.
290 is_op(Operator op
) const
291 { return this->classification_
== TOKEN_OPERATOR
&& this->u_
.op
== op
; }
293 // Print the token for debugging.
298 // Private constructor used by make_..._token functions above.
299 Token(Classification
, Location
);
305 // The token classification.
306 Classification classification_
;
309 // The keyword value for TOKEN_KEYWORD.
311 // The token value for TOKEN_IDENTIFIER.
314 // The name of the identifier. This has been mangled to only
315 // include ASCII characters.
317 // Whether this name should be exported. This is true if the
318 // first letter in the name is upper case.
321 // The string value for TOKEN_STRING.
322 std::string
* string_value
;
323 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
325 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
327 // The token value for TOKEN_OPERATOR or the keyword value
330 // The source location.
339 Lex(const char* input_file_name
, FILE* input_file
, Linemap
*linemap
);
343 // Return the next token.
347 // Return the contents of any current //extern comment.
350 { return this->extern_
; }
352 // Return whether the identifier NAME should be exported. NAME is a
353 // mangled name which includes only ASCII characters.
355 is_exported_name(const std::string
& name
);
357 // Return whether the identifier NAME is invalid. When we see an
358 // invalid character we still build an identifier, but we use a
359 // magic string to indicate that the identifier is invalid. We then
360 // use this to avoid knockon errors.
362 is_invalid_identifier(const std::string
& name
);
364 // A helper function. Append V to STR. IS_CHARACTER is true if V
365 // is a Unicode character which should be converted into UTF-8,
366 // false if it is a byte value to be appended directly. The
367 // location is used to warn about an out of range character.
369 append_char(unsigned int v
, bool is_charater
, std::string
* str
,
372 // A helper function. Fetch a UTF-8 character from STR and store it
373 // in *VALUE. Return the number of bytes read from STR. Return 0
374 // if STR does not point to a valid UTF-8 character.
376 fetch_char(const char* str
, unsigned int *value
);
385 // The current location.
389 // A position CHARS column positions before the current location.
391 earlier_location(int chars
) const;
402 { return Token::make_invalid_token(this->location()); }
406 { return Token::make_eof_token(this->location()); }
409 make_operator(Operator op
, int chars
)
410 { return Token::make_operator_token(op
, this->earlier_location(chars
)); }
416 could_be_exponent(const char*, const char*);
431 advance_one_utf8_char(const char*, unsigned int*, bool*);
434 advance_one_char(const char*, bool, unsigned int*, bool*);
437 is_unicode_digit(unsigned int c
);
440 is_unicode_letter(unsigned int c
);
443 is_unicode_uppercase(unsigned int c
);
446 is_in_unicode_range(unsigned int C
, const Unicode_range
* ranges
,
450 three_character_operator(char, char, char);
453 two_character_operator(char, char);
456 one_character_operator(char);
464 // The input file name.
465 const char* input_file_name_
;
468 // The object used to keep track of file names and line numbers.
470 // The line buffer. This holds the current line.
472 // The size of the line buffer.
474 // The nmber of characters in the current line.
476 // The current offset in linebuf_.
478 // The current line number.
480 // Whether to add a semicolon if we see a newline now.
481 bool add_semi_at_eol_
;
482 // The external name to use for a function declaration, from a magic
487 #endif // !defined(GO_LEX_H)