i386: Allow all register_operand SUBREGs in x86_ternlog_idx.
[official-gcc.git] / gcc / go / gofrontend / lex.h
blob701e5d4b353ba310f847b9d4afb4f5f0a86638ac
1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 #ifndef GO_LEX_H
8 #define GO_LEX_H
10 #include <mpfr.h>
12 #include "operator.h"
13 #include "go-linemap.h"
15 struct Unicode_range;
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
19 // lex.cc.
21 enum Keyword
23 KEYWORD_INVALID, // Not a keyword.
24 KEYWORD_ASM,
25 KEYWORD_BREAK,
26 KEYWORD_CASE,
27 KEYWORD_CHAN,
28 KEYWORD_CONST,
29 KEYWORD_CONTINUE,
30 KEYWORD_DEFAULT,
31 KEYWORD_DEFER,
32 KEYWORD_ELSE,
33 KEYWORD_FALLTHROUGH,
34 KEYWORD_FOR,
35 KEYWORD_FUNC,
36 KEYWORD_GO,
37 KEYWORD_GOTO,
38 KEYWORD_IF,
39 KEYWORD_IMPORT,
40 KEYWORD_INTERFACE,
41 KEYWORD_MAP,
42 KEYWORD_PACKAGE,
43 KEYWORD_RANGE,
44 KEYWORD_RETURN,
45 KEYWORD_SELECT,
46 KEYWORD_STRUCT,
47 KEYWORD_SWITCH,
48 KEYWORD_TYPE,
49 KEYWORD_VAR
52 // Pragmas built from magic comments and recorded for functions.
53 // These are used as bits in a bitmask.
54 // The set of values is intended to be the same as the gc compiler.
56 enum GoPragma
58 GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor.
59 GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape.
60 GOPRAGMA_NORACE = 1 << 2, // No race detector.
61 GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack.
62 GOPRAGMA_NOINLINE = 1 << 4, // Do not inline.
63 GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack.
64 GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers.
65 GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees.
66 GOPRAGMA_YESWRITEBARRIERREC = 1 << 8, // Stops nowritebarrierrec.
67 GOPRAGMA_MARK = 1 << 9, // Marker for nowritebarrierrec.
68 GOPRAGMA_CGOUNSAFEARGS = 1 << 10, // Pointer to arg is pointer to all.
69 GOPRAGMA_UINTPTRESCAPES = 1 << 11, // uintptr(p) escapes.
70 GOPRAGMA_NOTINHEAP = 1 << 12 // type is not in heap.
73 // A token returned from the lexer.
75 class Token
77 public:
78 // Token classification.
79 enum Classification
81 // Token is invalid.
82 TOKEN_INVALID,
83 // Token indicates end of input.
84 TOKEN_EOF,
85 // Token is a keyword.
86 TOKEN_KEYWORD,
87 // Token is an identifier.
88 TOKEN_IDENTIFIER,
89 // Token is a string of characters.
90 TOKEN_STRING,
91 // Token is an operator.
92 TOKEN_OPERATOR,
93 // Token is a character constant.
94 TOKEN_CHARACTER,
95 // Token is an integer.
96 TOKEN_INTEGER,
97 // Token is a floating point number.
98 TOKEN_FLOAT,
99 // Token is an imaginary number.
100 TOKEN_IMAGINARY
103 ~Token();
104 Token(const Token&);
105 Token& operator=(const Token&);
107 // Get token classification.
108 Classification
109 classification() const
110 { return this->classification_; }
112 // Make a token for an invalid value.
113 static Token
114 make_invalid_token(Location location)
115 { return Token(TOKEN_INVALID, location); }
117 // Make a token representing end of file.
118 static Token
119 make_eof_token(Location location)
120 { return Token(TOKEN_EOF, location); }
122 // Make a keyword token.
123 static Token
124 make_keyword_token(Keyword keyword, Location location)
126 Token tok(TOKEN_KEYWORD, location);
127 tok.u_.keyword = keyword;
128 return tok;
131 // Make an identifier token.
132 static Token
133 make_identifier_token(const std::string& value, bool is_exported,
134 Location location)
136 Token tok(TOKEN_IDENTIFIER, location);
137 tok.u_.identifier_value.name = new std::string(value);
138 tok.u_.identifier_value.is_exported = is_exported;
139 return tok;
142 // Make a quoted string token.
143 static Token
144 make_string_token(const std::string& value, Location location)
146 Token tok(TOKEN_STRING, location);
147 tok.u_.string_value = new std::string(value);
148 return tok;
151 // Make an operator token.
152 static Token
153 make_operator_token(Operator op, Location location)
155 Token tok(TOKEN_OPERATOR, location);
156 tok.u_.op = op;
157 return tok;
160 // Make a character constant token.
161 static Token
162 make_character_token(mpz_t val, Location location)
164 Token tok(TOKEN_CHARACTER, location);
165 mpz_init(tok.u_.integer_value);
166 mpz_swap(tok.u_.integer_value, val);
167 return tok;
170 // Make an integer token.
171 static Token
172 make_integer_token(mpz_t val, Location location)
174 Token tok(TOKEN_INTEGER, location);
175 mpz_init(tok.u_.integer_value);
176 mpz_swap(tok.u_.integer_value, val);
177 return tok;
180 // Make a float token.
181 static Token
182 make_float_token(mpfr_t val, Location location)
184 Token tok(TOKEN_FLOAT, location);
185 mpfr_init(tok.u_.float_value);
186 mpfr_swap(tok.u_.float_value, val);
187 return tok;
190 // Make a token for an imaginary number.
191 static Token
192 make_imaginary_token(mpfr_t val, Location location)
194 Token tok(TOKEN_IMAGINARY, location);
195 mpfr_init(tok.u_.float_value);
196 mpfr_swap(tok.u_.float_value, val);
197 return tok;
200 // Get the location of the token.
201 Location
202 location() const
203 { return this->location_; }
205 // Return whether this is an invalid token.
206 bool
207 is_invalid() const
208 { return this->classification_ == TOKEN_INVALID; }
210 // Return whether this is the EOF token.
211 bool
212 is_eof() const
213 { return this->classification_ == TOKEN_EOF; }
215 // Return the keyword value for a keyword token.
216 Keyword
217 keyword() const
219 go_assert(this->classification_ == TOKEN_KEYWORD);
220 return this->u_.keyword;
223 // Return whether this is an identifier.
224 bool
225 is_identifier() const
226 { return this->classification_ == TOKEN_IDENTIFIER; }
228 // Return the identifier.
229 const std::string&
230 identifier() const
232 go_assert(this->classification_ == TOKEN_IDENTIFIER);
233 return *this->u_.identifier_value.name;
236 // Return whether the identifier is exported.
237 bool
238 is_identifier_exported() const
240 go_assert(this->classification_ == TOKEN_IDENTIFIER);
241 return this->u_.identifier_value.is_exported;
244 // Return whether this is a string.
245 bool
246 is_string() const
248 return this->classification_ == TOKEN_STRING;
251 // Return the value of a string. The returned value is a string of
252 // UTF-8 characters.
253 std::string
254 string_value() const
256 go_assert(this->classification_ == TOKEN_STRING);
257 return *this->u_.string_value;
260 // Return the value of a character constant.
261 const mpz_t*
262 character_value() const
264 go_assert(this->classification_ == TOKEN_CHARACTER);
265 return &this->u_.integer_value;
268 // Return the value of an integer.
269 const mpz_t*
270 integer_value() const
272 go_assert(this->classification_ == TOKEN_INTEGER);
273 return &this->u_.integer_value;
276 // Return the value of a float.
277 const mpfr_t*
278 float_value() const
280 go_assert(this->classification_ == TOKEN_FLOAT);
281 return &this->u_.float_value;
284 // Return the value of an imaginary number.
285 const mpfr_t*
286 imaginary_value() const
288 go_assert(this->classification_ == TOKEN_IMAGINARY);
289 return &this->u_.float_value;
292 // Return the operator value for an operator token.
293 Operator
294 op() const
296 go_assert(this->classification_ == TOKEN_OPERATOR);
297 return this->u_.op;
300 // Return whether this token is KEYWORD.
301 bool
302 is_keyword(Keyword keyword) const
304 return (this->classification_ == TOKEN_KEYWORD
305 && this->u_.keyword == keyword);
308 // Return whether this token is OP.
309 bool
310 is_op(Operator op) const
311 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
313 // Print the token for debugging.
314 void
315 print(FILE*) const;
317 private:
318 // Private constructor used by make_..._token functions above.
319 Token(Classification, Location);
321 // Clear the token.
322 void
323 clear();
325 // The token classification.
326 Classification classification_;
327 union
329 // The keyword value for TOKEN_KEYWORD.
330 Keyword keyword;
331 // The token value for TOKEN_IDENTIFIER.
332 struct
334 // The name of the identifier. This has been mangled to only
335 // include ASCII characters.
336 std::string* name;
337 // Whether this name should be exported. This is true if the
338 // first letter in the name is upper case.
339 bool is_exported;
340 } identifier_value;
341 // The string value for TOKEN_STRING.
342 std::string* string_value;
343 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
344 mpz_t integer_value;
345 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
346 mpfr_t float_value;
347 // The token value for TOKEN_OPERATOR or the keyword value
348 Operator op;
349 } u_;
350 // The source location.
351 Location location_;
354 // The lexer itself.
356 class Lex
358 public:
359 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
361 ~Lex();
363 // Return the next token.
364 Token
365 next_token();
367 // Return the contents of any current //extern comment.
368 const std::string&
369 extern_name() const
370 { return this->extern_; }
372 // Return the current set of pragmas, and clear them.
373 unsigned int
374 get_and_clear_pragmas()
376 unsigned int ret = this->pragmas_;
377 this->pragmas_ = 0;
378 return ret;
381 struct Linkname
383 std::string ext_name; // External name; empty to just export.
384 bool is_exported; // Whether the internal name is exported.
385 Location loc; // Location of go:linkname directive.
387 Linkname()
388 : ext_name(), is_exported(false), loc()
391 Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
392 : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
396 typedef std::map<std::string, Linkname> Linknames;
398 // Return the linknames seen so far, or NULL if none, and clear the
399 // set. These are from go:linkname compiler directives.
400 Linknames*
401 get_and_clear_linknames()
403 Linknames* ret = this->linknames_;
404 this->linknames_ = NULL;
405 return ret;
408 // Return whether there are any current go:embed patterns.
409 bool
410 has_embeds() const
411 { return !this->embeds_.empty(); }
413 // If there are any go:embed patterns seen so far, store them in
414 // *EMBEDS and clear the saved set. *EMBEDS must be an empty
415 // vector.
416 void
417 get_and_clear_embeds(std::vector<std::string>* embeds)
419 go_assert(embeds->empty());
420 std::swap(*embeds, this->embeds_);
423 // Clear any go:embed patterns seen so far. This is used for
424 // erroneous cases.
425 void
426 clear_embeds()
427 { this->embeds_.clear(); }
429 // Return whether the identifier NAME should be exported. NAME is a
430 // mangled name which includes only ASCII characters.
431 static bool
432 is_exported_mangled_name(const std::string& name);
434 // Return whether the identifier NAME should be exported. NAME is
435 // an unmangled utf-8 string and may contain non-ASCII characters.
436 static bool
437 is_exported_name(const std::string& name);
439 // Return whether the identifier NAME is invalid. When we see an
440 // invalid character we still build an identifier, but we use a
441 // magic string to indicate that the identifier is invalid. We then
442 // use this to avoid knockon errors.
443 static bool
444 is_invalid_identifier(const std::string& name);
446 // A helper function. Append V to STR. IS_CHARACTER is true if V
447 // is a Unicode character which should be converted into UTF-8,
448 // false if it is a byte value to be appended directly. The
449 // location is used to warn about an out of range character.
450 static void
451 append_char(unsigned int v, bool is_charater, std::string* str,
452 Location);
454 // A helper function. Fetch a UTF-8 character from STR and store it
455 // in *VALUE. Return the number of bytes read from STR. Return 0
456 // if STR does not point to a valid UTF-8 character.
457 static int
458 fetch_char(const char* str, unsigned int *value);
460 // Return whether C is a Unicode or "C" locale space character.
461 static bool
462 is_unicode_space(unsigned int c);
464 // Convert the specified hex char into an unsigned integer value.
465 static unsigned
466 hex_val(char c);
468 private:
469 ssize_t
470 get_line();
472 bool
473 require_line();
475 // The current location.
476 Location
477 location() const;
479 // A position CHARS column positions before the current location.
480 Location
481 earlier_location(int chars) const;
483 static bool
484 is_hex_digit(char);
486 static bool
487 is_base_digit(int base, char);
489 static unsigned char
490 octal_value(char c)
491 { return c - '0'; }
493 Token
494 make_invalid_token()
495 { return Token::make_invalid_token(this->location()); }
497 Token
498 make_eof_token()
499 { return Token::make_eof_token(this->location()); }
501 Token
502 make_operator(Operator op, int chars)
503 { return Token::make_operator_token(op, this->earlier_location(chars)); }
505 Token
506 gather_identifier();
508 static bool
509 could_be_exponent(int base, const char*, const char*);
511 Token
512 gather_number();
514 void
515 skip_exponent();
517 Token
518 gather_character();
520 Token
521 gather_string();
523 Token
524 gather_raw_string();
526 const char*
527 advance_one_utf8_char(const char*, unsigned int*, bool*);
529 const char*
530 advance_one_char(const char*, bool, unsigned int*, bool*);
532 static bool
533 is_unicode_digit(unsigned int c);
535 static bool
536 is_unicode_letter(unsigned int c);
538 static bool
539 is_unicode_uppercase(unsigned int c);
541 static bool
542 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
543 size_t range_size);
545 Operator
546 three_character_operator(char, char, char);
548 Operator
549 two_character_operator(char, char);
551 Operator
552 one_character_operator(char);
554 bool
555 skip_c_comment(bool* found_newline);
557 void
558 skip_cpp_comment();
560 void
561 gather_embed(const char*, const char*);
563 // The input file name.
564 const char* input_file_name_ ATTRIBUTE_UNUSED;
565 // The input file.
566 FILE* input_file_;
567 // The object used to keep track of file names and line numbers.
568 Linemap* linemap_;
569 // The line buffer. This holds the current line.
570 char* linebuf_;
571 // The size of the line buffer.
572 size_t linebufsize_;
573 // The nmber of characters in the current line.
574 size_t linesize_;
575 // The current offset in linebuf_.
576 size_t lineoff_;
577 // The current line number.
578 size_t lineno_;
579 // Whether to add a semicolon if we see a newline now.
580 bool add_semi_at_eol_;
581 // Pragmas for the next function, from magic comments.
582 unsigned int pragmas_;
583 // The external name to use for a function declaration, from a magic
584 // //extern comment.
585 std::string extern_;
586 // The list of //go:linkname comments, if any.
587 Linknames* linknames_;
588 // The list of //go:embed patterns, if any.
589 std::vector<std::string> embeds_;
592 #endif // !defined(GO_LEX_H)