* auto-profile.c (afdo_annotate_cfg): Use update_max_bb_count.
[official-gcc.git] / gcc / go / gofrontend / lex.h
bloba8b7091b584b7cf90247cba336cea6eb7c56fd3e
1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 #ifndef GO_LEX_H
8 #define GO_LEX_H
10 #include <mpfr.h>
12 #include "operator.h"
13 #include "go-linemap.h"
15 struct Unicode_range;
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
19 // lex.cc.
21 enum Keyword
23 KEYWORD_INVALID, // Not a keyword.
24 KEYWORD_ASM,
25 KEYWORD_BREAK,
26 KEYWORD_CASE,
27 KEYWORD_CHAN,
28 KEYWORD_CONST,
29 KEYWORD_CONTINUE,
30 KEYWORD_DEFAULT,
31 KEYWORD_DEFER,
32 KEYWORD_ELSE,
33 KEYWORD_FALLTHROUGH,
34 KEYWORD_FOR,
35 KEYWORD_FUNC,
36 KEYWORD_GO,
37 KEYWORD_GOTO,
38 KEYWORD_IF,
39 KEYWORD_IMPORT,
40 KEYWORD_INTERFACE,
41 KEYWORD_MAP,
42 KEYWORD_PACKAGE,
43 KEYWORD_RANGE,
44 KEYWORD_RETURN,
45 KEYWORD_SELECT,
46 KEYWORD_STRUCT,
47 KEYWORD_SWITCH,
48 KEYWORD_TYPE,
49 KEYWORD_VAR
52 // Pragmas built from magic comments and recorded for functions.
53 // These are used as bits in a bitmask.
54 // The set of values is intended to be the same as the gc compiler.
56 enum GoPragma
58 GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor.
59 GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape.
60 GOPRAGMA_NORACE = 1 << 2, // No race detector.
61 GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack.
62 GOPRAGMA_NOINLINE = 1 << 4, // Do not inline.
63 GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack.
64 GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers.
65 GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees.
66 GOPRAGMA_CGOUNSAFEARGS = 1 << 8, // Pointer to arg is pointer to all.
67 GOPRAGMA_UINTPTRESCAPES = 1 << 9, // uintptr(p) escapes.
68 GOPRAGMA_NOTINHEAP = 1 << 10 // type is not in heap.
71 // A token returned from the lexer.
73 class Token
75 public:
76 // Token classification.
77 enum Classification
79 // Token is invalid.
80 TOKEN_INVALID,
81 // Token indicates end of input.
82 TOKEN_EOF,
83 // Token is a keyword.
84 TOKEN_KEYWORD,
85 // Token is an identifier.
86 TOKEN_IDENTIFIER,
87 // Token is a string of characters.
88 TOKEN_STRING,
89 // Token is an operator.
90 TOKEN_OPERATOR,
91 // Token is a character constant.
92 TOKEN_CHARACTER,
93 // Token is an integer.
94 TOKEN_INTEGER,
95 // Token is a floating point number.
96 TOKEN_FLOAT,
97 // Token is an imaginary number.
98 TOKEN_IMAGINARY
101 ~Token();
102 Token(const Token&);
103 Token& operator=(const Token&);
105 // Get token classification.
106 Classification
107 classification() const
108 { return this->classification_; }
110 // Make a token for an invalid value.
111 static Token
112 make_invalid_token(Location location)
113 { return Token(TOKEN_INVALID, location); }
115 // Make a token representing end of file.
116 static Token
117 make_eof_token(Location location)
118 { return Token(TOKEN_EOF, location); }
120 // Make a keyword token.
121 static Token
122 make_keyword_token(Keyword keyword, Location location)
124 Token tok(TOKEN_KEYWORD, location);
125 tok.u_.keyword = keyword;
126 return tok;
129 // Make an identifier token.
130 static Token
131 make_identifier_token(const std::string& value, bool is_exported,
132 Location location)
134 Token tok(TOKEN_IDENTIFIER, location);
135 tok.u_.identifier_value.name = new std::string(value);
136 tok.u_.identifier_value.is_exported = is_exported;
137 return tok;
140 // Make a quoted string token.
141 static Token
142 make_string_token(const std::string& value, Location location)
144 Token tok(TOKEN_STRING, location);
145 tok.u_.string_value = new std::string(value);
146 return tok;
149 // Make an operator token.
150 static Token
151 make_operator_token(Operator op, Location location)
153 Token tok(TOKEN_OPERATOR, location);
154 tok.u_.op = op;
155 return tok;
158 // Make a character constant token.
159 static Token
160 make_character_token(mpz_t val, Location location)
162 Token tok(TOKEN_CHARACTER, location);
163 mpz_init(tok.u_.integer_value);
164 mpz_swap(tok.u_.integer_value, val);
165 return tok;
168 // Make an integer token.
169 static Token
170 make_integer_token(mpz_t val, Location location)
172 Token tok(TOKEN_INTEGER, location);
173 mpz_init(tok.u_.integer_value);
174 mpz_swap(tok.u_.integer_value, val);
175 return tok;
178 // Make a float token.
179 static Token
180 make_float_token(mpfr_t val, Location location)
182 Token tok(TOKEN_FLOAT, location);
183 mpfr_init(tok.u_.float_value);
184 mpfr_swap(tok.u_.float_value, val);
185 return tok;
188 // Make a token for an imaginary number.
189 static Token
190 make_imaginary_token(mpfr_t val, Location location)
192 Token tok(TOKEN_IMAGINARY, location);
193 mpfr_init(tok.u_.float_value);
194 mpfr_swap(tok.u_.float_value, val);
195 return tok;
198 // Get the location of the token.
199 Location
200 location() const
201 { return this->location_; }
203 // Return whether this is an invalid token.
204 bool
205 is_invalid() const
206 { return this->classification_ == TOKEN_INVALID; }
208 // Return whether this is the EOF token.
209 bool
210 is_eof() const
211 { return this->classification_ == TOKEN_EOF; }
213 // Return the keyword value for a keyword token.
214 Keyword
215 keyword() const
217 go_assert(this->classification_ == TOKEN_KEYWORD);
218 return this->u_.keyword;
221 // Return whether this is an identifier.
222 bool
223 is_identifier() const
224 { return this->classification_ == TOKEN_IDENTIFIER; }
226 // Return the identifier.
227 const std::string&
228 identifier() const
230 go_assert(this->classification_ == TOKEN_IDENTIFIER);
231 return *this->u_.identifier_value.name;
234 // Return whether the identifier is exported.
235 bool
236 is_identifier_exported() const
238 go_assert(this->classification_ == TOKEN_IDENTIFIER);
239 return this->u_.identifier_value.is_exported;
242 // Return whether this is a string.
243 bool
244 is_string() const
246 return this->classification_ == TOKEN_STRING;
249 // Return the value of a string. The returned value is a string of
250 // UTF-8 characters.
251 std::string
252 string_value() const
254 go_assert(this->classification_ == TOKEN_STRING);
255 return *this->u_.string_value;
258 // Return the value of a character constant.
259 const mpz_t*
260 character_value() const
262 go_assert(this->classification_ == TOKEN_CHARACTER);
263 return &this->u_.integer_value;
266 // Return the value of an integer.
267 const mpz_t*
268 integer_value() const
270 go_assert(this->classification_ == TOKEN_INTEGER);
271 return &this->u_.integer_value;
274 // Return the value of a float.
275 const mpfr_t*
276 float_value() const
278 go_assert(this->classification_ == TOKEN_FLOAT);
279 return &this->u_.float_value;
282 // Return the value of an imaginary number.
283 const mpfr_t*
284 imaginary_value() const
286 go_assert(this->classification_ == TOKEN_IMAGINARY);
287 return &this->u_.float_value;
290 // Return the operator value for an operator token.
291 Operator
292 op() const
294 go_assert(this->classification_ == TOKEN_OPERATOR);
295 return this->u_.op;
298 // Return whether this token is KEYWORD.
299 bool
300 is_keyword(Keyword keyword) const
302 return (this->classification_ == TOKEN_KEYWORD
303 && this->u_.keyword == keyword);
306 // Return whether this token is OP.
307 bool
308 is_op(Operator op) const
309 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
311 // Print the token for debugging.
312 void
313 print(FILE*) const;
315 private:
316 // Private constructor used by make_..._token functions above.
317 Token(Classification, Location);
319 // Clear the token.
320 void
321 clear();
323 // The token classification.
324 Classification classification_;
325 union
327 // The keyword value for TOKEN_KEYWORD.
328 Keyword keyword;
329 // The token value for TOKEN_IDENTIFIER.
330 struct
332 // The name of the identifier. This has been mangled to only
333 // include ASCII characters.
334 std::string* name;
335 // Whether this name should be exported. This is true if the
336 // first letter in the name is upper case.
337 bool is_exported;
338 } identifier_value;
339 // The string value for TOKEN_STRING.
340 std::string* string_value;
341 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
342 mpz_t integer_value;
343 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
344 mpfr_t float_value;
345 // The token value for TOKEN_OPERATOR or the keyword value
346 Operator op;
347 } u_;
348 // The source location.
349 Location location_;
352 // The lexer itself.
354 class Lex
356 public:
357 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
359 ~Lex();
361 // Return the next token.
362 Token
363 next_token();
365 // Return the contents of any current //extern comment.
366 const std::string&
367 extern_name() const
368 { return this->extern_; }
370 // Return the current set of pragmas, and clear them.
371 unsigned int
372 get_and_clear_pragmas()
374 unsigned int ret = this->pragmas_;
375 this->pragmas_ = 0;
376 return ret;
379 struct Linkname
381 std::string ext_name; // External name.
382 bool is_exported; // Whether the internal name is exported.
383 Location loc; // Location of go:linkname directive.
385 Linkname()
386 : ext_name(), is_exported(false), loc()
389 Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
390 : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
394 typedef std::map<std::string, Linkname> Linknames;
396 // Return the linknames seen so far, or NULL if none, and clear the
397 // set. These are from go:linkname compiler directives.
398 Linknames*
399 get_and_clear_linknames()
401 Linknames* ret = this->linknames_;
402 this->linknames_ = NULL;
403 return ret;
406 // Return whether the identifier NAME should be exported. NAME is a
407 // mangled name which includes only ASCII characters.
408 static bool
409 is_exported_name(const std::string& name);
411 // Return whether the identifier NAME is invalid. When we see an
412 // invalid character we still build an identifier, but we use a
413 // magic string to indicate that the identifier is invalid. We then
414 // use this to avoid knockon errors.
415 static bool
416 is_invalid_identifier(const std::string& name);
418 // A helper function. Append V to STR. IS_CHARACTER is true if V
419 // is a Unicode character which should be converted into UTF-8,
420 // false if it is a byte value to be appended directly. The
421 // location is used to warn about an out of range character.
422 static void
423 append_char(unsigned int v, bool is_charater, std::string* str,
424 Location);
426 // A helper function. Fetch a UTF-8 character from STR and store it
427 // in *VALUE. Return the number of bytes read from STR. Return 0
428 // if STR does not point to a valid UTF-8 character.
429 static int
430 fetch_char(const char* str, unsigned int *value);
432 // Return whether C is a Unicode or "C" locale space character.
433 static bool
434 is_unicode_space(unsigned int c);
436 private:
437 ssize_t
438 get_line();
440 bool
441 require_line();
443 // The current location.
444 Location
445 location() const;
447 // A position CHARS column positions before the current location.
448 Location
449 earlier_location(int chars) const;
451 static bool
452 is_hex_digit(char);
454 static unsigned char
455 octal_value(char c)
456 { return c - '0'; }
458 static unsigned
459 hex_val(char c);
461 Token
462 make_invalid_token()
463 { return Token::make_invalid_token(this->location()); }
465 Token
466 make_eof_token()
467 { return Token::make_eof_token(this->location()); }
469 Token
470 make_operator(Operator op, int chars)
471 { return Token::make_operator_token(op, this->earlier_location(chars)); }
473 Token
474 gather_identifier();
476 static bool
477 could_be_exponent(const char*, const char*);
479 Token
480 gather_number();
482 Token
483 gather_character();
485 Token
486 gather_string();
488 Token
489 gather_raw_string();
491 const char*
492 advance_one_utf8_char(const char*, unsigned int*, bool*);
494 const char*
495 advance_one_char(const char*, bool, unsigned int*, bool*);
497 static bool
498 is_unicode_digit(unsigned int c);
500 static bool
501 is_unicode_letter(unsigned int c);
503 static bool
504 is_unicode_uppercase(unsigned int c);
506 static bool
507 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
508 size_t range_size);
510 Operator
511 three_character_operator(char, char, char);
513 Operator
514 two_character_operator(char, char);
516 Operator
517 one_character_operator(char);
519 bool
520 skip_c_comment(bool* found_newline);
522 void
523 skip_cpp_comment();
525 // The input file name.
526 const char* input_file_name_;
527 // The input file.
528 FILE* input_file_;
529 // The object used to keep track of file names and line numbers.
530 Linemap* linemap_;
531 // The line buffer. This holds the current line.
532 char* linebuf_;
533 // The size of the line buffer.
534 size_t linebufsize_;
535 // The nmber of characters in the current line.
536 size_t linesize_;
537 // The current offset in linebuf_.
538 size_t lineoff_;
539 // The current line number.
540 size_t lineno_;
541 // Whether to add a semicolon if we see a newline now.
542 bool add_semi_at_eol_;
543 // Pragmas for the next function, from magic comments.
544 unsigned int pragmas_;
545 // The external name to use for a function declaration, from a magic
546 // //extern comment.
547 std::string extern_;
548 // The list of //go:linkname comments, if any.
549 Linknames* linknames_;
552 #endif // !defined(GO_LEX_H)