2012-05-01 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / go / gofrontend / lex.h
blob8858e73d97a006edba4a5e614db7c8ea044d35cd
1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 #ifndef GO_LEX_H
8 #define GO_LEX_H
10 #include <gmp.h>
11 #include <mpfr.h>
13 #include "operator.h"
14 #include "go-linemap.h"
16 struct Unicode_range;
18 // The keywords. These must be in sorted order, other than
19 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
20 // lex.cc.
22 enum Keyword
24 KEYWORD_INVALID, // Not a keyword.
25 KEYWORD_ASM,
26 KEYWORD_BREAK,
27 KEYWORD_CASE,
28 KEYWORD_CHAN,
29 KEYWORD_CONST,
30 KEYWORD_CONTINUE,
31 KEYWORD_DEFAULT,
32 KEYWORD_DEFER,
33 KEYWORD_ELSE,
34 KEYWORD_FALLTHROUGH,
35 KEYWORD_FOR,
36 KEYWORD_FUNC,
37 KEYWORD_GO,
38 KEYWORD_GOTO,
39 KEYWORD_IF,
40 KEYWORD_IMPORT,
41 KEYWORD_INTERFACE,
42 KEYWORD_MAP,
43 KEYWORD_PACKAGE,
44 KEYWORD_RANGE,
45 KEYWORD_RETURN,
46 KEYWORD_SELECT,
47 KEYWORD_STRUCT,
48 KEYWORD_SWITCH,
49 KEYWORD_TYPE,
50 KEYWORD_VAR
53 // A token returned from the lexer.
55 class Token
57 public:
58 // Token classification.
59 enum Classification
61 // Token is invalid.
62 TOKEN_INVALID,
63 // Token indicates end of input.
64 TOKEN_EOF,
65 // Token is a keyword.
66 TOKEN_KEYWORD,
67 // Token is an identifier.
68 TOKEN_IDENTIFIER,
69 // Token is a string of characters.
70 TOKEN_STRING,
71 // Token is an operator.
72 TOKEN_OPERATOR,
73 // Token is a character constant.
74 TOKEN_CHARACTER,
75 // Token is an integer.
76 TOKEN_INTEGER,
77 // Token is a floating point number.
78 TOKEN_FLOAT,
79 // Token is an imaginary number.
80 TOKEN_IMAGINARY
83 ~Token();
84 Token(const Token&);
85 Token& operator=(const Token&);
87 // Get token classification.
88 Classification
89 classification() const
90 { return this->classification_; }
92 // Make a token for an invalid value.
93 static Token
94 make_invalid_token(Location location)
95 { return Token(TOKEN_INVALID, location); }
97 // Make a token representing end of file.
98 static Token
99 make_eof_token(Location location)
100 { return Token(TOKEN_EOF, location); }
102 // Make a keyword token.
103 static Token
104 make_keyword_token(Keyword keyword, Location location)
106 Token tok(TOKEN_KEYWORD, location);
107 tok.u_.keyword = keyword;
108 return tok;
111 // Make an identifier token.
112 static Token
113 make_identifier_token(const std::string& value, bool is_exported,
114 Location location)
116 Token tok(TOKEN_IDENTIFIER, location);
117 tok.u_.identifier_value.name = new std::string(value);
118 tok.u_.identifier_value.is_exported = is_exported;
119 return tok;
122 // Make a quoted string token.
123 static Token
124 make_string_token(const std::string& value, Location location)
126 Token tok(TOKEN_STRING, location);
127 tok.u_.string_value = new std::string(value);
128 return tok;
131 // Make an operator token.
132 static Token
133 make_operator_token(Operator op, Location location)
135 Token tok(TOKEN_OPERATOR, location);
136 tok.u_.op = op;
137 return tok;
140 // Make a character constant token.
141 static Token
142 make_character_token(mpz_t val, Location location)
144 Token tok(TOKEN_CHARACTER, location);
145 mpz_init(tok.u_.integer_value);
146 mpz_swap(tok.u_.integer_value, val);
147 return tok;
150 // Make an integer token.
151 static Token
152 make_integer_token(mpz_t val, Location location)
154 Token tok(TOKEN_INTEGER, location);
155 mpz_init(tok.u_.integer_value);
156 mpz_swap(tok.u_.integer_value, val);
157 return tok;
160 // Make a float token.
161 static Token
162 make_float_token(mpfr_t val, Location location)
164 Token tok(TOKEN_FLOAT, location);
165 mpfr_init(tok.u_.float_value);
166 mpfr_swap(tok.u_.float_value, val);
167 return tok;
170 // Make a token for an imaginary number.
171 static Token
172 make_imaginary_token(mpfr_t val, Location location)
174 Token tok(TOKEN_IMAGINARY, location);
175 mpfr_init(tok.u_.float_value);
176 mpfr_swap(tok.u_.float_value, val);
177 return tok;
180 // Get the location of the token.
181 Location
182 location() const
183 { return this->location_; }
185 // Return whether this is an invalid token.
186 bool
187 is_invalid() const
188 { return this->classification_ == TOKEN_INVALID; }
190 // Return whether this is the EOF token.
191 bool
192 is_eof() const
193 { return this->classification_ == TOKEN_EOF; }
195 // Return the keyword value for a keyword token.
196 Keyword
197 keyword() const
199 go_assert(this->classification_ == TOKEN_KEYWORD);
200 return this->u_.keyword;
203 // Return whether this is an identifier.
204 bool
205 is_identifier() const
206 { return this->classification_ == TOKEN_IDENTIFIER; }
208 // Return the identifier.
209 const std::string&
210 identifier() const
212 go_assert(this->classification_ == TOKEN_IDENTIFIER);
213 return *this->u_.identifier_value.name;
216 // Return whether the identifier is exported.
217 bool
218 is_identifier_exported() const
220 go_assert(this->classification_ == TOKEN_IDENTIFIER);
221 return this->u_.identifier_value.is_exported;
224 // Return whether this is a string.
225 bool
226 is_string() const
228 return this->classification_ == TOKEN_STRING;
231 // Return the value of a string. The returned value is a string of
232 // UTF-8 characters.
233 std::string
234 string_value() const
236 go_assert(this->classification_ == TOKEN_STRING);
237 return *this->u_.string_value;
240 // Return the value of a character constant.
241 const mpz_t*
242 character_value() const
244 go_assert(this->classification_ == TOKEN_CHARACTER);
245 return &this->u_.integer_value;
248 // Return the value of an integer.
249 const mpz_t*
250 integer_value() const
252 go_assert(this->classification_ == TOKEN_INTEGER);
253 return &this->u_.integer_value;
256 // Return the value of a float.
257 const mpfr_t*
258 float_value() const
260 go_assert(this->classification_ == TOKEN_FLOAT);
261 return &this->u_.float_value;
264 // Return the value of an imaginary number.
265 const mpfr_t*
266 imaginary_value() const
268 go_assert(this->classification_ == TOKEN_IMAGINARY);
269 return &this->u_.float_value;
272 // Return the operator value for an operator token.
273 Operator
274 op() const
276 go_assert(this->classification_ == TOKEN_OPERATOR);
277 return this->u_.op;
280 // Return whether this token is KEYWORD.
281 bool
282 is_keyword(Keyword keyword) const
284 return (this->classification_ == TOKEN_KEYWORD
285 && this->u_.keyword == keyword);
288 // Return whether this token is OP.
289 bool
290 is_op(Operator op) const
291 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
293 // Print the token for debugging.
294 void
295 print(FILE*) const;
297 private:
298 // Private constructor used by make_..._token functions above.
299 Token(Classification, Location);
301 // Clear the token.
302 void
303 clear();
305 // The token classification.
306 Classification classification_;
307 union
309 // The keyword value for TOKEN_KEYWORD.
310 Keyword keyword;
311 // The token value for TOKEN_IDENTIFIER.
312 struct
314 // The name of the identifier. This has been mangled to only
315 // include ASCII characters.
316 std::string* name;
317 // Whether this name should be exported. This is true if the
318 // first letter in the name is upper case.
319 bool is_exported;
320 } identifier_value;
321 // The string value for TOKEN_STRING.
322 std::string* string_value;
323 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
324 mpz_t integer_value;
325 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
326 mpfr_t float_value;
327 // The token value for TOKEN_OPERATOR or the keyword value
328 Operator op;
329 } u_;
330 // The source location.
331 Location location_;
334 // The lexer itself.
336 class Lex
338 public:
339 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
341 ~Lex();
343 // Return the next token.
344 Token
345 next_token();
347 // Return the contents of any current //extern comment.
348 const std::string&
349 extern_name() const
350 { return this->extern_; }
352 // Return whether the identifier NAME should be exported. NAME is a
353 // mangled name which includes only ASCII characters.
354 static bool
355 is_exported_name(const std::string& name);
357 // Return whether the identifier NAME is invalid. When we see an
358 // invalid character we still build an identifier, but we use a
359 // magic string to indicate that the identifier is invalid. We then
360 // use this to avoid knockon errors.
361 static bool
362 is_invalid_identifier(const std::string& name);
364 // A helper function. Append V to STR. IS_CHARACTER is true if V
365 // is a Unicode character which should be converted into UTF-8,
366 // false if it is a byte value to be appended directly. The
367 // location is used to warn about an out of range character.
368 static void
369 append_char(unsigned int v, bool is_charater, std::string* str,
370 Location);
372 // A helper function. Fetch a UTF-8 character from STR and store it
373 // in *VALUE. Return the number of bytes read from STR. Return 0
374 // if STR does not point to a valid UTF-8 character.
375 static int
376 fetch_char(const char* str, unsigned int *value);
378 private:
379 ssize_t
380 get_line();
382 bool
383 require_line();
385 // The current location.
386 Location
387 location() const;
389 // A position CHARS column positions before the current location.
390 Location
391 earlier_location(int chars) const;
393 static bool
394 is_hex_digit(char);
396 static unsigned char
397 octal_value(char c)
398 { return c - '0'; }
400 Token
401 make_invalid_token()
402 { return Token::make_invalid_token(this->location()); }
404 Token
405 make_eof_token()
406 { return Token::make_eof_token(this->location()); }
408 Token
409 make_operator(Operator op, int chars)
410 { return Token::make_operator_token(op, this->earlier_location(chars)); }
412 Token
413 gather_identifier();
415 static bool
416 could_be_exponent(const char*, const char*);
418 Token
419 gather_number();
421 Token
422 gather_character();
424 Token
425 gather_string();
427 Token
428 gather_raw_string();
430 const char*
431 advance_one_utf8_char(const char*, unsigned int*, bool*);
433 const char*
434 advance_one_char(const char*, bool, unsigned int*, bool*);
436 static bool
437 is_unicode_digit(unsigned int c);
439 static bool
440 is_unicode_letter(unsigned int c);
442 static bool
443 is_unicode_uppercase(unsigned int c);
445 static bool
446 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
447 size_t range_size);
449 Operator
450 three_character_operator(char, char, char);
452 Operator
453 two_character_operator(char, char);
455 Operator
456 one_character_operator(char);
458 bool
459 skip_c_comment();
461 void
462 skip_cpp_comment();
464 // The input file name.
465 const char* input_file_name_;
466 // The input file.
467 FILE* input_file_;
468 // The object used to keep track of file names and line numbers.
469 Linemap* linemap_;
470 // The line buffer. This holds the current line.
471 char* linebuf_;
472 // The size of the line buffer.
473 size_t linebufsize_;
474 // The nmber of characters in the current line.
475 size_t linesize_;
476 // The current offset in linebuf_.
477 size_t lineoff_;
478 // The current line number.
479 size_t lineno_;
480 // Whether to add a semicolon if we see a newline now.
481 bool add_semi_at_eol_;
482 // The external name to use for a function declaration, from a magic
483 // //extern comment.
484 std::string extern_;
487 #endif // !defined(GO_LEX_H)