* gcc.dg/guality/guality.exp: Skip on AIX.
[official-gcc.git] / gcc / go / gofrontend / lex.h
blob383a91787802c563337ea80ddfde83bcca9d155b
1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 #ifndef GO_LEX_H
8 #define GO_LEX_H
10 #include <mpfr.h>
12 #include "operator.h"
13 #include "go-linemap.h"
15 struct Unicode_range;
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
19 // lex.cc.
21 enum Keyword
23 KEYWORD_INVALID, // Not a keyword.
24 KEYWORD_ASM,
25 KEYWORD_BREAK,
26 KEYWORD_CASE,
27 KEYWORD_CHAN,
28 KEYWORD_CONST,
29 KEYWORD_CONTINUE,
30 KEYWORD_DEFAULT,
31 KEYWORD_DEFER,
32 KEYWORD_ELSE,
33 KEYWORD_FALLTHROUGH,
34 KEYWORD_FOR,
35 KEYWORD_FUNC,
36 KEYWORD_GO,
37 KEYWORD_GOTO,
38 KEYWORD_IF,
39 KEYWORD_IMPORT,
40 KEYWORD_INTERFACE,
41 KEYWORD_MAP,
42 KEYWORD_PACKAGE,
43 KEYWORD_RANGE,
44 KEYWORD_RETURN,
45 KEYWORD_SELECT,
46 KEYWORD_STRUCT,
47 KEYWORD_SWITCH,
48 KEYWORD_TYPE,
49 KEYWORD_VAR
52 // A token returned from the lexer.
54 class Token
56 public:
57 // Token classification.
58 enum Classification
60 // Token is invalid.
61 TOKEN_INVALID,
62 // Token indicates end of input.
63 TOKEN_EOF,
64 // Token is a keyword.
65 TOKEN_KEYWORD,
66 // Token is an identifier.
67 TOKEN_IDENTIFIER,
68 // Token is a string of characters.
69 TOKEN_STRING,
70 // Token is an operator.
71 TOKEN_OPERATOR,
72 // Token is a character constant.
73 TOKEN_CHARACTER,
74 // Token is an integer.
75 TOKEN_INTEGER,
76 // Token is a floating point number.
77 TOKEN_FLOAT,
78 // Token is an imaginary number.
79 TOKEN_IMAGINARY
82 ~Token();
83 Token(const Token&);
84 Token& operator=(const Token&);
86 // Get token classification.
87 Classification
88 classification() const
89 { return this->classification_; }
91 // Make a token for an invalid value.
92 static Token
93 make_invalid_token(Location location)
94 { return Token(TOKEN_INVALID, location); }
96 // Make a token representing end of file.
97 static Token
98 make_eof_token(Location location)
99 { return Token(TOKEN_EOF, location); }
101 // Make a keyword token.
102 static Token
103 make_keyword_token(Keyword keyword, Location location)
105 Token tok(TOKEN_KEYWORD, location);
106 tok.u_.keyword = keyword;
107 return tok;
110 // Make an identifier token.
111 static Token
112 make_identifier_token(const std::string& value, bool is_exported,
113 Location location)
115 Token tok(TOKEN_IDENTIFIER, location);
116 tok.u_.identifier_value.name = new std::string(value);
117 tok.u_.identifier_value.is_exported = is_exported;
118 return tok;
121 // Make a quoted string token.
122 static Token
123 make_string_token(const std::string& value, Location location)
125 Token tok(TOKEN_STRING, location);
126 tok.u_.string_value = new std::string(value);
127 return tok;
130 // Make an operator token.
131 static Token
132 make_operator_token(Operator op, Location location)
134 Token tok(TOKEN_OPERATOR, location);
135 tok.u_.op = op;
136 return tok;
139 // Make a character constant token.
140 static Token
141 make_character_token(mpz_t val, Location location)
143 Token tok(TOKEN_CHARACTER, location);
144 mpz_init(tok.u_.integer_value);
145 mpz_swap(tok.u_.integer_value, val);
146 return tok;
149 // Make an integer token.
150 static Token
151 make_integer_token(mpz_t val, Location location)
153 Token tok(TOKEN_INTEGER, location);
154 mpz_init(tok.u_.integer_value);
155 mpz_swap(tok.u_.integer_value, val);
156 return tok;
159 // Make a float token.
160 static Token
161 make_float_token(mpfr_t val, Location location)
163 Token tok(TOKEN_FLOAT, location);
164 mpfr_init(tok.u_.float_value);
165 mpfr_swap(tok.u_.float_value, val);
166 return tok;
169 // Make a token for an imaginary number.
170 static Token
171 make_imaginary_token(mpfr_t val, Location location)
173 Token tok(TOKEN_IMAGINARY, location);
174 mpfr_init(tok.u_.float_value);
175 mpfr_swap(tok.u_.float_value, val);
176 return tok;
179 // Get the location of the token.
180 Location
181 location() const
182 { return this->location_; }
184 // Return whether this is an invalid token.
185 bool
186 is_invalid() const
187 { return this->classification_ == TOKEN_INVALID; }
189 // Return whether this is the EOF token.
190 bool
191 is_eof() const
192 { return this->classification_ == TOKEN_EOF; }
194 // Return the keyword value for a keyword token.
195 Keyword
196 keyword() const
198 go_assert(this->classification_ == TOKEN_KEYWORD);
199 return this->u_.keyword;
202 // Return whether this is an identifier.
203 bool
204 is_identifier() const
205 { return this->classification_ == TOKEN_IDENTIFIER; }
207 // Return the identifier.
208 const std::string&
209 identifier() const
211 go_assert(this->classification_ == TOKEN_IDENTIFIER);
212 return *this->u_.identifier_value.name;
215 // Return whether the identifier is exported.
216 bool
217 is_identifier_exported() const
219 go_assert(this->classification_ == TOKEN_IDENTIFIER);
220 return this->u_.identifier_value.is_exported;
223 // Return whether this is a string.
224 bool
225 is_string() const
227 return this->classification_ == TOKEN_STRING;
230 // Return the value of a string. The returned value is a string of
231 // UTF-8 characters.
232 std::string
233 string_value() const
235 go_assert(this->classification_ == TOKEN_STRING);
236 return *this->u_.string_value;
239 // Return the value of a character constant.
240 const mpz_t*
241 character_value() const
243 go_assert(this->classification_ == TOKEN_CHARACTER);
244 return &this->u_.integer_value;
247 // Return the value of an integer.
248 const mpz_t*
249 integer_value() const
251 go_assert(this->classification_ == TOKEN_INTEGER);
252 return &this->u_.integer_value;
255 // Return the value of a float.
256 const mpfr_t*
257 float_value() const
259 go_assert(this->classification_ == TOKEN_FLOAT);
260 return &this->u_.float_value;
263 // Return the value of an imaginary number.
264 const mpfr_t*
265 imaginary_value() const
267 go_assert(this->classification_ == TOKEN_IMAGINARY);
268 return &this->u_.float_value;
271 // Return the operator value for an operator token.
272 Operator
273 op() const
275 go_assert(this->classification_ == TOKEN_OPERATOR);
276 return this->u_.op;
279 // Return whether this token is KEYWORD.
280 bool
281 is_keyword(Keyword keyword) const
283 return (this->classification_ == TOKEN_KEYWORD
284 && this->u_.keyword == keyword);
287 // Return whether this token is OP.
288 bool
289 is_op(Operator op) const
290 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
292 // Print the token for debugging.
293 void
294 print(FILE*) const;
296 private:
297 // Private constructor used by make_..._token functions above.
298 Token(Classification, Location);
300 // Clear the token.
301 void
302 clear();
304 // The token classification.
305 Classification classification_;
306 union
308 // The keyword value for TOKEN_KEYWORD.
309 Keyword keyword;
310 // The token value for TOKEN_IDENTIFIER.
311 struct
313 // The name of the identifier. This has been mangled to only
314 // include ASCII characters.
315 std::string* name;
316 // Whether this name should be exported. This is true if the
317 // first letter in the name is upper case.
318 bool is_exported;
319 } identifier_value;
320 // The string value for TOKEN_STRING.
321 std::string* string_value;
322 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
323 mpz_t integer_value;
324 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
325 mpfr_t float_value;
326 // The token value for TOKEN_OPERATOR or the keyword value
327 Operator op;
328 } u_;
329 // The source location.
330 Location location_;
333 // The lexer itself.
335 class Lex
337 public:
338 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
340 ~Lex();
342 // Return the next token.
343 Token
344 next_token();
346 // Return the contents of any current //extern comment.
347 const std::string&
348 extern_name() const
349 { return this->extern_; }
351 // Return whether we have seen a //go:nointerface comment, clearing
352 // the flag.
353 bool
354 get_and_clear_nointerface()
356 bool ret = this->saw_nointerface_;
357 this->saw_nointerface_ = false;
358 return ret;
361 // Return whether the identifier NAME should be exported. NAME is a
362 // mangled name which includes only ASCII characters.
363 static bool
364 is_exported_name(const std::string& name);
366 // Return whether the identifier NAME is invalid. When we see an
367 // invalid character we still build an identifier, but we use a
368 // magic string to indicate that the identifier is invalid. We then
369 // use this to avoid knockon errors.
370 static bool
371 is_invalid_identifier(const std::string& name);
373 // A helper function. Append V to STR. IS_CHARACTER is true if V
374 // is a Unicode character which should be converted into UTF-8,
375 // false if it is a byte value to be appended directly. The
376 // location is used to warn about an out of range character.
377 static void
378 append_char(unsigned int v, bool is_charater, std::string* str,
379 Location);
381 // A helper function. Fetch a UTF-8 character from STR and store it
382 // in *VALUE. Return the number of bytes read from STR. Return 0
383 // if STR does not point to a valid UTF-8 character.
384 static int
385 fetch_char(const char* str, unsigned int *value);
387 // Return whether C is a Unicode or "C" locale space character.
388 static bool
389 is_unicode_space(unsigned int c);
391 private:
392 ssize_t
393 get_line();
395 bool
396 require_line();
398 // The current location.
399 Location
400 location() const;
402 // A position CHARS column positions before the current location.
403 Location
404 earlier_location(int chars) const;
406 static bool
407 is_hex_digit(char);
409 static unsigned char
410 octal_value(char c)
411 { return c - '0'; }
413 Token
414 make_invalid_token()
415 { return Token::make_invalid_token(this->location()); }
417 Token
418 make_eof_token()
419 { return Token::make_eof_token(this->location()); }
421 Token
422 make_operator(Operator op, int chars)
423 { return Token::make_operator_token(op, this->earlier_location(chars)); }
425 Token
426 gather_identifier();
428 static bool
429 could_be_exponent(const char*, const char*);
431 Token
432 gather_number();
434 Token
435 gather_character();
437 Token
438 gather_string();
440 Token
441 gather_raw_string();
443 const char*
444 advance_one_utf8_char(const char*, unsigned int*, bool*);
446 const char*
447 advance_one_char(const char*, bool, unsigned int*, bool*);
449 static bool
450 is_unicode_digit(unsigned int c);
452 static bool
453 is_unicode_letter(unsigned int c);
455 static bool
456 is_unicode_uppercase(unsigned int c);
458 static bool
459 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
460 size_t range_size);
462 Operator
463 three_character_operator(char, char, char);
465 Operator
466 two_character_operator(char, char);
468 Operator
469 one_character_operator(char);
471 bool
472 skip_c_comment();
474 void
475 skip_cpp_comment();
477 // The input file name.
478 const char* input_file_name_;
479 // The input file.
480 FILE* input_file_;
481 // The object used to keep track of file names and line numbers.
482 Linemap* linemap_;
483 // The line buffer. This holds the current line.
484 char* linebuf_;
485 // The size of the line buffer.
486 size_t linebufsize_;
487 // The nmber of characters in the current line.
488 size_t linesize_;
489 // The current offset in linebuf_.
490 size_t lineoff_;
491 // The current line number.
492 size_t lineno_;
493 // Whether to add a semicolon if we see a newline now.
494 bool add_semi_at_eol_;
495 // Whether we just saw a magic go:nointerface comment.
496 bool saw_nointerface_;
497 // The external name to use for a function declaration, from a magic
498 // //extern comment.
499 std::string extern_;
502 #endif // !defined(GO_LEX_H)