1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
13 #include "go-linemap.h"
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
23 KEYWORD_INVALID
, // Not a keyword.
52 // Pragmas built from magic comments and recorded for functions.
53 // These are used as bits in a bitmask.
54 // The set of values is intended to be the same as the gc compiler.
58 GOPRAGMA_NOINTERFACE
= 1 << 0, // Method not in type descriptor.
59 GOPRAGMA_NOESCAPE
= 1 << 1, // Args do not escape.
60 GOPRAGMA_NORACE
= 1 << 2, // No race detector.
61 GOPRAGMA_NOSPLIT
= 1 << 3, // Do not split stack.
62 GOPRAGMA_NOINLINE
= 1 << 4, // Do not inline.
63 GOPRAGMA_SYSTEMSTACK
= 1 << 5, // Must run on system stack.
64 GOPRAGMA_NOWRITEBARRIER
= 1 << 6, // No write barriers.
65 GOPRAGMA_NOWRITEBARRIERREC
= 1 << 7, // No write barriers here or callees.
66 GOPRAGMA_YESWRITEBARRIERREC
= 1 << 8, // Stops nowritebarrierrec.
67 GOPRAGMA_MARK
= 1 << 9, // Marker for nowritebarrierrec.
68 GOPRAGMA_CGOUNSAFEARGS
= 1 << 10, // Pointer to arg is pointer to all.
69 GOPRAGMA_UINTPTRESCAPES
= 1 << 11, // uintptr(p) escapes.
70 GOPRAGMA_NOTINHEAP
= 1 << 12 // type is not in heap.
73 // A token returned from the lexer.
78 // Token classification.
83 // Token indicates end of input.
85 // Token is a keyword.
87 // Token is an identifier.
89 // Token is a string of characters.
91 // Token is an operator.
93 // Token is a character constant.
95 // Token is an integer.
97 // Token is a floating point number.
99 // Token is an imaginary number.
105 Token
& operator=(const Token
&);
107 // Get token classification.
109 classification() const
110 { return this->classification_
; }
112 // Make a token for an invalid value.
114 make_invalid_token(Location location
)
115 { return Token(TOKEN_INVALID
, location
); }
117 // Make a token representing end of file.
119 make_eof_token(Location location
)
120 { return Token(TOKEN_EOF
, location
); }
122 // Make a keyword token.
124 make_keyword_token(Keyword keyword
, Location location
)
126 Token
tok(TOKEN_KEYWORD
, location
);
127 tok
.u_
.keyword
= keyword
;
131 // Make an identifier token.
133 make_identifier_token(const std::string
& value
, bool is_exported
,
136 Token
tok(TOKEN_IDENTIFIER
, location
);
137 tok
.u_
.identifier_value
.name
= new std::string(value
);
138 tok
.u_
.identifier_value
.is_exported
= is_exported
;
142 // Make a quoted string token.
144 make_string_token(const std::string
& value
, Location location
)
146 Token
tok(TOKEN_STRING
, location
);
147 tok
.u_
.string_value
= new std::string(value
);
151 // Make an operator token.
153 make_operator_token(Operator op
, Location location
)
155 Token
tok(TOKEN_OPERATOR
, location
);
160 // Make a character constant token.
162 make_character_token(mpz_t val
, Location location
)
164 Token
tok(TOKEN_CHARACTER
, location
);
165 mpz_init(tok
.u_
.integer_value
);
166 mpz_swap(tok
.u_
.integer_value
, val
);
170 // Make an integer token.
172 make_integer_token(mpz_t val
, Location location
)
174 Token
tok(TOKEN_INTEGER
, location
);
175 mpz_init(tok
.u_
.integer_value
);
176 mpz_swap(tok
.u_
.integer_value
, val
);
180 // Make a float token.
182 make_float_token(mpfr_t val
, Location location
)
184 Token
tok(TOKEN_FLOAT
, location
);
185 mpfr_init(tok
.u_
.float_value
);
186 mpfr_swap(tok
.u_
.float_value
, val
);
190 // Make a token for an imaginary number.
192 make_imaginary_token(mpfr_t val
, Location location
)
194 Token
tok(TOKEN_IMAGINARY
, location
);
195 mpfr_init(tok
.u_
.float_value
);
196 mpfr_swap(tok
.u_
.float_value
, val
);
200 // Get the location of the token.
203 { return this->location_
; }
205 // Return whether this is an invalid token.
208 { return this->classification_
== TOKEN_INVALID
; }
210 // Return whether this is the EOF token.
213 { return this->classification_
== TOKEN_EOF
; }
215 // Return the keyword value for a keyword token.
219 go_assert(this->classification_
== TOKEN_KEYWORD
);
220 return this->u_
.keyword
;
223 // Return whether this is an identifier.
225 is_identifier() const
226 { return this->classification_
== TOKEN_IDENTIFIER
; }
228 // Return the identifier.
232 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
233 return *this->u_
.identifier_value
.name
;
236 // Return whether the identifier is exported.
238 is_identifier_exported() const
240 go_assert(this->classification_
== TOKEN_IDENTIFIER
);
241 return this->u_
.identifier_value
.is_exported
;
244 // Return whether this is a string.
248 return this->classification_
== TOKEN_STRING
;
251 // Return the value of a string. The returned value is a string of
256 go_assert(this->classification_
== TOKEN_STRING
);
257 return *this->u_
.string_value
;
260 // Return the value of a character constant.
262 character_value() const
264 go_assert(this->classification_
== TOKEN_CHARACTER
);
265 return &this->u_
.integer_value
;
268 // Return the value of an integer.
270 integer_value() const
272 go_assert(this->classification_
== TOKEN_INTEGER
);
273 return &this->u_
.integer_value
;
276 // Return the value of a float.
280 go_assert(this->classification_
== TOKEN_FLOAT
);
281 return &this->u_
.float_value
;
284 // Return the value of an imaginary number.
286 imaginary_value() const
288 go_assert(this->classification_
== TOKEN_IMAGINARY
);
289 return &this->u_
.float_value
;
292 // Return the operator value for an operator token.
296 go_assert(this->classification_
== TOKEN_OPERATOR
);
300 // Return whether this token is KEYWORD.
302 is_keyword(Keyword keyword
) const
304 return (this->classification_
== TOKEN_KEYWORD
305 && this->u_
.keyword
== keyword
);
308 // Return whether this token is OP.
310 is_op(Operator op
) const
311 { return this->classification_
== TOKEN_OPERATOR
&& this->u_
.op
== op
; }
313 // Print the token for debugging.
318 // Private constructor used by make_..._token functions above.
319 Token(Classification
, Location
);
325 // The token classification.
326 Classification classification_
;
329 // The keyword value for TOKEN_KEYWORD.
331 // The token value for TOKEN_IDENTIFIER.
334 // The name of the identifier. This has been mangled to only
335 // include ASCII characters.
337 // Whether this name should be exported. This is true if the
338 // first letter in the name is upper case.
341 // The string value for TOKEN_STRING.
342 std::string
* string_value
;
343 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
345 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
347 // The token value for TOKEN_OPERATOR or the keyword value
350 // The source location.
359 Lex(const char* input_file_name
, FILE* input_file
, Linemap
*linemap
);
363 // Return the next token.
367 // Return the contents of any current //extern comment.
370 { return this->extern_
; }
372 // Return the current set of pragmas, and clear them.
374 get_and_clear_pragmas()
376 unsigned int ret
= this->pragmas_
;
383 std::string ext_name
; // External name; empty to just export.
384 bool is_exported
; // Whether the internal name is exported.
385 Location loc
; // Location of go:linkname directive.
388 : ext_name(), is_exported(false), loc()
391 Linkname(const std::string
& ext_name_a
, bool is_exported_a
, Location loc_a
)
392 : ext_name(ext_name_a
), is_exported(is_exported_a
), loc(loc_a
)
396 typedef std::map
<std::string
, Linkname
> Linknames
;
398 // Return the linknames seen so far, or NULL if none, and clear the
399 // set. These are from go:linkname compiler directives.
401 get_and_clear_linknames()
403 Linknames
* ret
= this->linknames_
;
404 this->linknames_
= NULL
;
408 // Return whether there are any current go:embed patterns.
411 { return !this->embeds_
.empty(); }
413 // If there are any go:embed patterns seen so far, store them in
414 // *EMBEDS and clear the saved set. *EMBEDS must be an empty
417 get_and_clear_embeds(std::vector
<std::string
>* embeds
)
419 go_assert(embeds
->empty());
420 std::swap(*embeds
, this->embeds_
);
423 // Clear any go:embed patterns seen so far. This is used for
427 { this->embeds_
.clear(); }
429 // Return whether the identifier NAME should be exported. NAME is a
430 // mangled name which includes only ASCII characters.
432 is_exported_mangled_name(const std::string
& name
);
434 // Return whether the identifier NAME should be exported. NAME is
435 // an unmangled utf-8 string and may contain non-ASCII characters.
437 is_exported_name(const std::string
& name
);
439 // Return whether the identifier NAME is invalid. When we see an
440 // invalid character we still build an identifier, but we use a
441 // magic string to indicate that the identifier is invalid. We then
442 // use this to avoid knockon errors.
444 is_invalid_identifier(const std::string
& name
);
446 // A helper function. Append V to STR. IS_CHARACTER is true if V
447 // is a Unicode character which should be converted into UTF-8,
448 // false if it is a byte value to be appended directly. The
449 // location is used to warn about an out of range character.
451 append_char(unsigned int v
, bool is_charater
, std::string
* str
,
454 // A helper function. Fetch a UTF-8 character from STR and store it
455 // in *VALUE. Return the number of bytes read from STR. Return 0
456 // if STR does not point to a valid UTF-8 character.
458 fetch_char(const char* str
, unsigned int *value
);
460 // Return whether C is a Unicode or "C" locale space character.
462 is_unicode_space(unsigned int c
);
464 // Convert the specified hex char into an unsigned integer value.
475 // The current location.
479 // A position CHARS column positions before the current location.
481 earlier_location(int chars
) const;
487 is_base_digit(int base
, char);
495 { return Token::make_invalid_token(this->location()); }
499 { return Token::make_eof_token(this->location()); }
502 make_operator(Operator op
, int chars
)
503 { return Token::make_operator_token(op
, this->earlier_location(chars
)); }
509 could_be_exponent(int base
, const char*, const char*);
527 advance_one_utf8_char(const char*, unsigned int*, bool*);
530 advance_one_char(const char*, bool, unsigned int*, bool*);
533 is_unicode_digit(unsigned int c
);
536 is_unicode_letter(unsigned int c
);
539 is_unicode_uppercase(unsigned int c
);
542 is_in_unicode_range(unsigned int C
, const Unicode_range
* ranges
,
546 three_character_operator(char, char, char);
549 two_character_operator(char, char);
552 one_character_operator(char);
555 skip_c_comment(bool* found_newline
);
561 gather_embed(const char*, const char*);
563 // The input file name.
564 const char* input_file_name_ ATTRIBUTE_UNUSED
;
567 // The object used to keep track of file names and line numbers.
569 // The line buffer. This holds the current line.
571 // The size of the line buffer.
573 // The nmber of characters in the current line.
575 // The current offset in linebuf_.
577 // The current line number.
579 // Whether to add a semicolon if we see a newline now.
580 bool add_semi_at_eol_
;
581 // Pragmas for the next function, from magic comments.
582 unsigned int pragmas_
;
583 // The external name to use for a function declaration, from a magic
586 // The list of //go:linkname comments, if any.
587 Linknames
* linknames_
;
588 // The list of //go:embed patterns, if any.
589 std::vector
<std::string
> embeds_
;
592 #endif // !defined(GO_LEX_H)