vala/valageniescanner.vala

   1 /* valageniescanner.vala
   2  *
   3  * Copyright (C) 2008-2012  Jamie McCracken, Jürg Billeter
   4  * Based on code by Jürg Billeter
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  19  *
  20  * Author:
  21  *      Jamie McCracken jamiemcc gnome org
  22  */
  23
  24 using GLib;
  25
  26 /**
  27  * Lexical scanner for Genie source files.
  28  */
  29 public class Vala.Genie.Scanner {
  30         public SourceFile source_file { get; private set; }
  31
  32         public int indent_spaces { get; set;}
  33
  34         char* begin;
  35         char* current;
  36         char* end;
  37
  38         int line;
  39         int column;
  40
  41         int current_indent_level;
  42         int indent_level;
  43         int pending_dedents;
  44
  45         /* track open parens and braces for automatic line continuations */
  46         int open_parens_count;
  47         int open_brace_count;
  48
  49         TokenType last_token;
  50         bool parse_started;
  51
  52         Comment _comment;
  53
  54         Conditional[] conditional_stack;
  55
  56         struct Conditional {
  57                 public bool matched;
  58                 public bool else_found;
  59                 public bool skip_section;
  60         }
  61
  62         State[] state_stack;
  63
  64         enum State {
  65                 PARENS,
  66                 BRACE,
  67                 BRACKET,
  68                 REGEX_LITERAL,
  69                 TEMPLATE,
  70                 TEMPLATE_PART
  71         }
  72
  73         public Scanner (SourceFile source_file) {
  74                 this.source_file = source_file;
  75
  76                 begin = source_file.get_mapped_contents ();
  77                 end = begin + source_file.get_mapped_length ();
  78
  79                 current = begin;
  80
  81                 _indent_spaces = 0;
  82                 line = 1;
  83                 column = 1;
  84                 current_indent_level = 0;
  85                 indent_level = 0;
  86                 pending_dedents = 0;
  87
  88                 open_parens_count = 0;
  89                 open_brace_count = 0;
  90
  91                 parse_started = false;
  92                 last_token = TokenType.NONE;
  93
  94         }
  95
  96         bool in_template () {
  97                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
  98         }
  99
 100         bool in_template_part () {
 101                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
 102         }
 103
 104         bool is_ident_char (char c) {
 105                 return (c.isalnum () || c == '_');
 106         }
 107
 108         bool in_regex_literal () {
 109                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
 110         }
 111
 112         SourceReference get_source_reference (int offset, int length = 0) {
 113                 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
 114         }
 115
 116         public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
 117                 TokenType type;
 118                 char* begin = current;
 119                 token_begin = SourceLocation (begin, line, column);
 120
 121                 int token_length_in_chars = -1;
 122
 123                 if (current >= end) {
 124                         type = TokenType.EOF;
 125                 } else {
 126                         switch (current[0]) {
 127                         case '/':
 128                                 type = TokenType.CLOSE_REGEX_LITERAL;
 129                                 current++;
 130                                 state_stack.length--;
 131                                 var fl_i = false;
 132                                 var fl_s = false;
 133                                 var fl_m = false;
 134                                 var fl_x = false;
 135                                 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
 136                                         switch (current[0]) {
 137                                         case 'i':
 138                                                 if (fl_i) {
 139                                                         Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
 140                                                 }
 141                                                 fl_i = true;
 142                                                 break;
 143                                         case 's':
 144                                                 if (fl_s) {
 145                                                         Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
 146                                                 }
 147                                                 fl_s = true;
 148                                                 break;
 149                                         case 'm':
 150                                                 if (fl_m) {
 151                                                         Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
 152                                                 }
 153                                                 fl_m = true;
 154                                                 break;
 155                                         case 'x':
 156                                                 if (fl_x) {
 157                                                         Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
 158                                                 }
 159                                                 fl_x = true;
 160                                                 break;
 161                                         }
 162                                         current++;
 163                                         token_length_in_chars++;
 164                                 }
 165                                 break;
 166                         default:
 167                                 type = TokenType.REGEX_LITERAL;
 168                                 token_length_in_chars = 0;
 169                                 while (current < end && current[0] != '/') {
 170                                         if (current[0] == '\\') {
 171                                                 current++;
 172                                                 token_length_in_chars++;
 173                                                 if (current >= end) {
 174                                                         break;
 175                                                 }
 176
 177                                                 switch (current[0]) {
 178                                                 case '\'':
 179                                                 case '"':
 180                                                 case '\\':
 181                                                 case '/':
 182                                                 case '^':
 183                                                 case '$':
 184                                                 case '.':
 185                                                 case '[':
 186                                                 case ']':
 187                                                 case '{':
 188                                                 case '}':
 189                                                 case '(':
 190                                                 case ')':
 191                                                 case '?':
 192                                                 case '*':
 193                                                 case '+':
 194                                                 case '-':
 195                                                 case '#':
 196                                                 case '&':
 197                                                 case '~':
 198                                                 case ':':
 199                                                 case ';':
 200                                                 case '<':
 201                                                 case '>':
 202                                                 case '|':
 203                                                 case '%':
 204                                                 case '=':
 205                                                 case '@':
 206                                                 case '0':
 207                                                 case 'b':
 208                                                 case 'B':
 209                                                 case 'f':
 210                                                 case 'n':
 211                                                 case 'r':
 212                                                 case 't':
 213                                                 case 'a':
 214                                                 case 'A':
 215                                                 case 'p':
 216                                                 case 'P':
 217                                                 case 'e':
 218                                                 case 'd':
 219                                                 case 'D':
 220                                                 case 's':
 221                                                 case 'S':
 222                                                 case 'w':
 223                                                 case 'W':
 224                                                 case 'G':
 225                                                 case 'z':
 226                                                 case 'Z':
 227                                                         current++;
 228                                                         token_length_in_chars++;
 229                                                         break;
 230                                                 case 'x':
 231                                                         // hexadecimal escape character
 232                                                         current++;
 233                                                         token_length_in_chars++;
 234                                                         while (current < end && current[0].isxdigit ()) {
 235                                                                 current++;
 236                                                                 token_length_in_chars++;
 237                                                         }
 238                                                         break;
 239                                                 default:
 240                                                         Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
 241                                                         break;
 242                                                 }
 243                                         } else if (current[0] == '\n') {
 244                                                 break;
 245                                         } else {
 246                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 247                                                 if (u != (unichar) (-1)) {
 248                                                         current += u.to_utf8 (null);
 249                                                         token_length_in_chars++;
 250                                                 } else {
 251                                                         current++;
 252                                                         Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
 253                                                 }
 254                                         }
 255                                 }
 256                                 if (current >= end || current[0] == '\n') {
 257                                         Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
 258                                         state_stack.length--;
 259                                         return read_token (out token_begin, out token_end);
 260                                 }
 261                                 break;
 262                         }
 263                 }
 264
 265                 if (token_length_in_chars < 0) {
 266                         column += (int) (current - begin);
 267                 } else {
 268                         column += token_length_in_chars;
 269                 }
 270
 271                 token_end = SourceLocation (current, line, column - 1);
 272
 273                 return type;
 274         }
 275
 276
 277         public void seek (SourceLocation location) {
 278                 current = location.pos;
 279                 line = location.line;
 280                 column = location.column;
 281
 282                 conditional_stack = null;
 283                 state_stack = null;
 284         }
 285
 286         public static TokenType get_identifier_or_keyword (char* begin, int len) {
 287                 switch (len) {
 288                 case 2:
 289                         switch (begin[0]) {
 290                         case 'a':
 291                                 if (matches (begin, "as")) return TokenType.AS;
 292                                 break;
 293                         case 'd':
 294                                 if (matches (begin, "do")) return TokenType.DO;
 295                                 break;
 296                         case 'i':
 297                                 switch (begin[1]) {
 298                                 case 'f':
 299                                         return TokenType.IF;
 300                                 case 'n':
 301                                         return TokenType.IN;
 302                                 case 's':
 303                                         return TokenType.IS;
 304                                 }
 305                                 break;
 306                         case 'o':
 307                                 if (matches (begin, "of")) return TokenType.OF;
 308
 309                                 if (matches (begin, "or")) return TokenType.OP_OR;
 310                                 break;
 311                         case 't':
 312                                 if (matches (begin, "to")) return TokenType.TO;
 313                                 break;
 314                         }
 315                         break;
 316                 case 3:
 317                         switch (begin[0]) {
 318                         case 'a':
 319                                 if (matches (begin, "and")) return TokenType.OP_AND;
 320                                 break;
 321                         case 'd':
 322                                 if (matches (begin, "def")) return TokenType.DEF;
 323                                 break;
 324                         case 'f':
 325                                 if (matches (begin, "for")) return TokenType.FOR;
 326                                 break;
 327                         case 'g':
 328                                 if (matches (begin, "get")) return TokenType.GET;
 329                                 break;
 330                         case 'i':
 331                                 if (matches (begin, "isa")) return TokenType.ISA;
 332                                 break;
 333                         case 'n':
 334                                 switch (begin[1]) {
 335                                 case 'e':
 336                                         if (matches (begin, "new")) return TokenType.NEW;
 337                                         break;
 338                                 case 'o':
 339                                         if (matches (begin, "not")) return TokenType.OP_NEG;
 340                                         break;
 341                                 }
 342                                 break;
 343                         case 'o':
 344                                 if (matches (begin, "out")) return TokenType.OUT;
 345                                 break;
 346                         case 'r':
 347                                 if (matches (begin, "ref")) return TokenType.REF;
 348                                 break;
 349                         case 's':
 350                                 if (matches (begin, "set")) return TokenType.SET;
 351                                 break;
 352                         case 't':
 353                                 if (matches (begin, "try")) return TokenType.TRY;
 354                                 break;
 355                         case 'v':
 356                                 if (matches (begin, "var")) return TokenType.VAR;
 357                                 break;
 358                         }
 359                         break;
 360                 case 4:
 361                         switch (begin[0]) {
 362                         case 'c':
 363                                 if (matches (begin, "case")) return TokenType.CASE;
 364                                 break;
 365                         case 'd':
 366                                 if (matches (begin, "dict")) return TokenType.DICT;
 367                                 break;
 368                         case 'e':
 369                                 switch (begin[1]) {
 370                                 case 'l':
 371                                         if (matches (begin, "else")) return TokenType.ELSE;
 372                                         break;
 373                                 case 'n':
 374                                         if (matches (begin, "enum")) return TokenType.ENUM;
 375                                         break;
 376                                 }
 377                                 break;
 378                         case 'i':
 379                                 if (matches (begin, "init")) return TokenType.INIT;
 380                                 break;
 381                         case 'l':
 382                                 switch (begin[1]) {
 383                                 case 'i':
 384                                         if (matches (begin, "list")) return TokenType.LIST;
 385                                         break;
 386                                 case 'o':
 387                                         if (matches (begin, "lock")) return TokenType.LOCK;
 388                                         break;
 389                                 }
 390                                 break;
 391
 392                         case 'n':
 393                                 if (matches (begin, "null")) return TokenType.NULL;
 394                                 break;
 395                         case 'p':
 396                                 switch (begin[1]) {
 397                                 case 'a':
 398                                         if (matches (begin, "pass")) return TokenType.PASS;
 399                                         break;
 400                                 case 'r':
 401                                         if (matches (begin, "prop")) return TokenType.PROP;
 402                                         break;
 403                                 }
 404                                 break;
 405                         case 's':
 406                                 if (matches (begin, "self")) return TokenType.THIS;
 407                                 break;
 408                         case 't':
 409                                 if (matches (begin, "true")) return TokenType.TRUE;
 410                                 break;
 411                         case 'u':
 412                                 if (matches (begin, "uses")) return TokenType.USES;
 413                                 break;
 414                         case 'v':
 415                                 if (matches (begin, "void")) return TokenType.VOID;
 416                                 break;
 417                         case 'w':
 418                                 switch (begin[1]) {
 419                                 case 'e':
 420                                         if (matches (begin, "weak")) return TokenType.WEAK;
 421                                         break;
 422                                 case 'h':
 423                                         if (matches (begin, "when")) return TokenType.WHEN;
 424                                         break;
 425                                 }
 426                                 break;
 427                         }
 428                         break;
 429                 case 5:
 430                         switch (begin[0]) {
 431                         case 'a':
 432                                 switch (begin[1]) {
 433                                 case 'r':
 434                                         if (matches (begin, "array")) return TokenType.ARRAY;
 435                                         break;
 436                                 case 's':
 437                                         if (matches (begin, "async")) return TokenType.ASYNC;
 438                                         break;
 439                                 }
 440                                 break;
 441                         case 'b':
 442                                 if (matches (begin, "break")) return TokenType.BREAK;
 443                                 break;
 444                         case 'c':
 445                                 switch (begin[1]) {
 446                                 case 'l':
 447                                         if (matches (begin, "class")) return TokenType.CLASS;
 448                                         break;
 449                                 case 'o':
 450                                         if (matches (begin, "const")) return TokenType.CONST;
 451                                         break;
 452                                 }
 453                                 break;
 454                         case 'e':
 455                                 if (matches (begin, "event")) return TokenType.EVENT;
 456                                 break;
 457                         case 'f':
 458                                 switch (begin[1]) {
 459                                 case 'a':
 460                                         if (matches (begin, "false")) return TokenType.FALSE;
 461                                         break;
 462                                 case 'i':
 463                                         if (matches (begin, "final")) return TokenType.FINAL;
 464                                         break;
 465                                 }
 466                                 break;
 467                         case 'o':
 468                                 if (matches (begin, "owned")) return TokenType.OWNED;
 469                                 break;
 470                         case 'p':
 471                                 if (matches (begin, "print")) return TokenType.PRINT;
 472                                 break;
 473                         case 's':
 474                                 if (matches (begin, "super")) return TokenType.SUPER;
 475                                 break;
 476                         case 'r':
 477                                 if (matches (begin, "raise")) return TokenType.RAISE;
 478                                 break;
 479                         case 'w':
 480                                 if (matches (begin, "while")) return TokenType.WHILE;
 481                                 break;
 482                         case 'y':
 483                                 if (matches (begin, "yield")) return TokenType.YIELD;
 484                                 break;
 485                         }
 486                         break;
 487                 case 6:
 488                         switch (begin[0]) {
 489                         case 'a':
 490                                 if (matches (begin, "assert")) return TokenType.ASSERT;
 491                                 break;
 492                         case 'd':
 493                                 switch (begin[1]) {
 494                                 case 'e':
 495                                         if (matches (begin, "delete")) return TokenType.DELETE;
 496                                         break;
 497                                 case 'o':
 498                                         if (matches (begin, "downto")) return TokenType.DOWNTO;
 499                                         break;
 500                                 }
 501                                 break;
 502                         case 'e':
 503                                 switch (begin[1]) {
 504                                 case 'x':
 505                                         switch (begin[2]) {
 506                                         case 'c':
 507                                                 if (matches (begin, "except")) return TokenType.EXCEPT;
 508                                                 break;
 509                                         case 't':
 510                                                 if (matches (begin, "extern")) return TokenType.EXTERN;
 511                                                 break;
 512                                         }
 513                                         break;
 514                                 }
 515                                 break;
 516                         case 'i':
 517                                 if (matches (begin, "inline")) return TokenType.INLINE;
 518                                 break;
 519                         case 'p':
 520                                 switch (begin[1]) {
 521                                 case 'a':
 522                                         if (matches (begin, "params")) return TokenType.PARAMS;
 523                                         break;
 524                                 case 'u':
 525                                         if (matches (begin, "public")) return TokenType.PUBLIC;
 526                                         break;
 527                                 }
 528                                 break;
 529                         case 'r':
 530                                 switch (begin[1]) {
 531                                 case 'a':
 532                                         if (matches (begin, "raises")) return TokenType.RAISES;
 533                                         break;
 534                                 case 'e':
 535                                         if (matches (begin, "return")) return TokenType.RETURN;
 536                                         break;
 537                                 }
 538                                 break;
 539                         case 's':
 540                                 switch (begin[1]) {
 541                                 case 'e':
 542                                         if (matches (begin, "sealed")) return TokenType.SEALED;
 543                                         break;
 544                                 case 'i':
 545                                         if (matches (begin, "sizeof")) return TokenType.SIZEOF;
 546                                         break;
 547                                 case 't':
 548                                         switch (begin[2]) {
 549                                         case 'a':
 550                                                 if (matches (begin, "static")) return TokenType.STATIC;
 551                                                 break;
 552                                         case 'r':
 553                                                 if (matches (begin, "struct")) return TokenType.STRUCT;
 554                                                 break;
 555                                         }
 556                                         break;
 557                                 }
 558                                 break;
 559                         case 't':
 560                                 if (matches (begin, "typeof")) return TokenType.TYPEOF;
 561                                 break;
 562                         }
 563                         break;
 564                 case 7:
 565                         switch (begin[0]) {
 566                         case 'd':
 567                                 switch (begin[1]) {
 568                                 case 'e':
 569                                         if (matches (begin, "default")) return TokenType.DEFAULT;
 570                                         break;
 571                                 case 'y':
 572                                         if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
 573                                         break;
 574                                 }
 575                                 break;
 576                         case 'e':
 577                                 if (matches (begin, "ensures")) return TokenType.ENSURES;
 578                                 break;
 579                         case 'f':
 580                                 switch (begin[1]) {
 581                                 case 'i':
 582                                         if (matches (begin, "finally")) return TokenType.FINALLY;
 583                                         break;
 584                                 }
 585                                 break;
 586                         case 'p':
 587                                 if (matches (begin, "private")) return TokenType.PRIVATE;
 588                                 break;
 589                         case 'u':
 590                                 if (matches (begin, "unowned")) return TokenType.UNOWNED;
 591                                 break;
 592                         case 'v':
 593                                 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
 594                                 break;
 595                         }
 596                         break;
 597                 case 8:
 598                         switch (begin[0]) {
 599                         case 'a':
 600                                 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
 601                                 break;
 602                         case 'c':
 603                                 if (matches (begin, "continue")) return TokenType.CONTINUE;
 604                                 break;
 605                         case 'd':
 606                                 if (matches (begin, "delegate")) return TokenType.DELEGATE;
 607                                 break;
 608                         case 'i':
 609                                 if (matches (begin, "internal")) return TokenType.INTERNAL;
 610                                 break;
 611                         case 'o':
 612                                 if (matches (begin, "override")) return TokenType.OVERRIDE;
 613                                 break;
 614                         case 'r':
 615                                 switch (begin[2]) {
 616                                 case 'a':
 617                                         if (matches (begin, "readonly")) return TokenType.READONLY;
 618                                         break;
 619                                 case 'q':
 620                                         if (matches (begin, "requires")) return TokenType.REQUIRES;
 621                                         break;
 622                                 }
 623                                 break;
 624                         case 'v':
 625                                 if (matches (begin, "volatile")) return TokenType.VOLATILE;
 626                                 break;
 627                         }
 628                         break;
 629                 case 9:
 630                         switch (begin[0]) {
 631                         case 'c':
 632                                 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
 633                                 break;
 634                         case 'e':
 635                                 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
 636                                 break;
 637                         case 'i':
 638                                 if (matches (begin, "interface")) return TokenType.INTERFACE;
 639                                 break;
 640                         case 'n':
 641                                 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
 642                                 break;
 643                         case 'p':
 644                                 if (matches (begin, "protected")) return TokenType.PROTECTED;
 645                                 break;
 646                         case 'w':
 647                                 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
 648                                 break;
 649                         }
 650                         break;
 651                 case 10:
 652                         switch (begin[0]) {
 653                         case 'i':
 654                                 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
 655                                 break;
 656                         }
 657                         break;
 658                 }
 659                 return TokenType.IDENTIFIER;
 660         }
 661
 662
 663         public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
 664                 TokenType type;
 665                 char* begin = current;
 666                 token_begin = SourceLocation (begin, line, column);
 667
 668                 int token_length_in_chars = -1;
 669
 670                 if (current >= end) {
 671                         type = TokenType.EOF;
 672                 } else {
 673                         switch (current[0]) {
 674                         case '"':
 675                                 type = TokenType.CLOSE_TEMPLATE;
 676                                 current++;
 677                                 state_stack.length--;
 678                                 break;
 679                         case '$':
 680                                 token_begin.pos++; // $ is not part of following token
 681                                 current++;
 682                                 if (current[0].isalpha () || current[0] == '_') {
 683                                         int len = 0;
 684                                         while (current < end && is_ident_char (current[0])) {
 685                                                 current++;
 686                                                 len++;
 687                                         }
 688                                         type = TokenType.IDENTIFIER;
 689                                         state_stack += State.TEMPLATE_PART;
 690                                 } else if (current[0] == '(') {
 691                                         current++;
 692                                         column += 2;
 693                                         state_stack += State.PARENS;
 694                                         return read_token (out token_begin, out token_end);
 695                                 } else if (current[0] == '$') {
 696                                         type = TokenType.TEMPLATE_STRING_LITERAL;
 697                                         current++;
 698                                         state_stack += State.TEMPLATE_PART;
 699                                 } else {
 700                                         Report.error (get_source_reference (1), "unexpected character");
 701                                         return read_template_token (out token_begin, out token_end);
 702                                 }
 703                                 break;
 704                         default:
 705                                 type = TokenType.TEMPLATE_STRING_LITERAL;
 706                                 token_length_in_chars = 0;
 707                                 while (current < end && current[0] != '"' && current[0] != '$') {
 708                                         if (current[0] == '\\') {
 709                                                 current++;
 710                                                 token_length_in_chars++;
 711                                                 if (current >= end) {
 712                                                         break;
 713                                                 }
 714
 715                                                 switch (current[0]) {
 716                                                 case '\'':
 717                                                 case '"':
 718                                                 case '\\':
 719                                                 case '0':
 720                                                 case 'b':
 721                                                 case 'f':
 722                                                 case 'n':
 723                                                 case 'r':
 724                                                 case 't':
 725                                                         current++;
 726                                                         token_length_in_chars++;
 727                                                         break;
 728                                                 case 'x':
 729                                                         // hexadecimal escape character
 730                                                         current++;
 731                                                         token_length_in_chars++;
 732                                                         while (current < end && current[0].isxdigit ()) {
 733                                                                 current++;
 734                                                                 token_length_in_chars++;
 735                                                         }
 736                                                         break;
 737                                                 default:
 738                                                         Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
 739                                                         break;
 740                                                 }
 741                                         } else if (current[0] == '\n') {
 742                                                 current++;
 743                                                 line++;
 744                                                 column = 1;
 745                                                 token_length_in_chars = 1;
 746                                         } else {
 747                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 748                                                 if (u != (unichar) (-1)) {
 749                                                         current += u.to_utf8 (null);
 750                                                         token_length_in_chars++;
 751                                                 } else {
 752                                                         current++;
 753                                                         Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
 754                                                 }
 755                                         }
 756                                 }
 757                                 if (current >= end) {
 758                                         Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
 759                                         state_stack.length--;
 760                                         return read_token (out token_begin, out token_end);
 761                                 }
 762                                 state_stack += State.TEMPLATE_PART;
 763                                 break;
 764                         }
 765                 }
 766
 767                 if (token_length_in_chars < 0) {
 768                         column += (int) (current - begin);
 769                 } else {
 770                         column += token_length_in_chars;
 771                 }
 772
 773                 token_end = SourceLocation (current, line, column - 1);
 774
 775                 return type;
 776         }
 777
 778
 779         public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
 780                 if (current == null) {
 781                         token_begin = SourceLocation (current, line, column);
 782                         token_end = SourceLocation (current, line, column);
 783                         return TokenType.EOF;
 784                 }
 785
 786                 if (in_template ()) {
 787                         return read_template_token (out token_begin, out token_end);
 788                 } else if (in_template_part ()) {
 789                         state_stack.length--;
 790
 791                         token_begin = SourceLocation (current, line, column);
 792                         token_end = SourceLocation (current, line, column - 1);
 793
 794                         return TokenType.COMMA;
 795                 } else if (in_regex_literal ()) {
 796                         return read_regex_token (out token_begin, out token_end);
 797                 }
 798
 799
 800
 801                 /* emit dedents if outstanding before checking any other chars */
 802
 803                 if (pending_dedents > 0) {
 804                         pending_dedents--;
 805                         indent_level--;
 806
 807                         token_begin = SourceLocation (current, line, column);
 808                         token_end = SourceLocation (current, line, column);
 809
 810                         last_token = TokenType.DEDENT;
 811
 812                         return TokenType.DEDENT;
 813                 }
 814
 815                 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
 816                         /* scrub whitespace (excluding newlines) and comments */
 817                         space ();
 818                 }
 819
 820
 821                 /* handle explicit line continuation (lines ending with "\") */
 822                 while (current < end && current[0] == '\\' && current[1] == '\n') {
 823                         current += 2;
 824                         line++;
 825                         skip_space_tabs ();
 826                 }
 827
 828                 /* handle automatic line continuations (when inside parens or braces) */
 829                 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
 830                         current++;
 831                         line++;
 832                         skip_space_tabs ();
 833                 }
 834
 835
 836                 /* handle non-consecutive new line once parsing is underway - EOL */
 837                 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
 838                         token_begin = SourceLocation (current, line, column);
 839                         token_end = SourceLocation (current, line, column);
 840
 841                         last_token = TokenType.EOL;
 842
 843                         return TokenType.EOL;
 844                 }
 845
 846
 847                 while (skip_newlines ()) {
 848                         token_begin = SourceLocation (current, line, column);
 849
 850                         current_indent_level = count_tabs ();
 851
 852                         /* if its an empty new line then ignore */
 853                         if (current_indent_level == -1)  {
 854                                 continue;
 855                         }
 856
 857                         if (current_indent_level > indent_level) {
 858                                 indent_level = current_indent_level;
 859
 860                                 token_end = SourceLocation (current, line, column);
 861
 862                                 last_token = TokenType.INDENT;
 863
 864                                 return TokenType.INDENT;
 865                         } else if (current_indent_level < indent_level) {
 866                                 indent_level--;
 867
 868                                 pending_dedents = (indent_level - current_indent_level);
 869                                 token_end = SourceLocation (current, line, column);
 870
 871                                 last_token = TokenType.DEDENT;
 872
 873                                 return TokenType.DEDENT;
 874                         }
 875                 }
 876
 877                 TokenType type;
 878                 char* begin = current;
 879                 token_begin = SourceLocation (begin, line, column);
 880
 881                 int token_length_in_chars = -1;
 882
 883                 parse_started = true;
 884
 885                 if (current >= end) {
 886                         if (indent_level > 0) {
 887                                 indent_level--;
 888
 889                                 pending_dedents = indent_level;
 890
 891                                 type = TokenType.DEDENT;
 892                         } else {
 893                                 type = TokenType.EOF;
 894                         }
 895                 } else if (current[0].isalpha () || current[0] == '_') {
 896                         int len = 0;
 897                         while (current < end && is_ident_char (current[0])) {
 898                                 current++;
 899                                 len++;
 900                         }
 901                         type = get_identifier_or_keyword (begin, len);
 902                 } else if (current[0] == '@') {
 903                         if (current < end - 1 && current[1] == '"') {
 904                                 type = TokenType.OPEN_TEMPLATE;
 905                                 current += 2;
 906                                 state_stack += State.TEMPLATE;
 907                         } else {
 908                                 token_begin.pos++; // @ is not part of the identifier
 909                                 current++;
 910                                 int len = 0;
 911                                 while (current < end && is_ident_char (current[0])) {
 912                                         current++;
 913                                         len++;
 914                                 }
 915                                 type = TokenType.IDENTIFIER;
 916                         }
 917                 } else if (current[0].isdigit ()) {
 918                         while (current < end && current[0].isdigit ()) {
 919                                 current++;
 920                         }
 921                         type = TokenType.INTEGER_LITERAL;
 922                         if (current < end && current[0].tolower () == 'l') {
 923                                 current++;
 924                                 if (current < end && current[0].tolower () == 'l') {
 925                                         current++;
 926                                 }
 927                         } else if (current < end && current[0].tolower () == 'u') {
 928                                 current++;
 929                                 if (current < end && current[0].tolower () == 'l') {
 930                                         current++;
 931                                         if (current < end && current[0].tolower () == 'l') {
 932                                                 current++;
 933                                         }
 934                                 }
 935                         } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
 936                                 current++;
 937                                 while (current < end && current[0].isdigit ()) {
 938                                         current++;
 939                                 }
 940                                 if (current < end && current[0].tolower () == 'e') {
 941                                         current++;
 942                                         if (current < end && (current[0] == '+' || current[0] == '-')) {
 943                                                 current++;
 944                                         }
 945                                         while (current < end && current[0].isdigit ()) {
 946                                                 current++;
 947                                         }
 948                                 }
 949                                 if (current < end && current[0].tolower () == 'f') {
 950                                         current++;
 951                                 }
 952                                 type = TokenType.REAL_LITERAL;
 953                         } else if (current < end && current == begin + 1
 954                                            && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
 955                                 // hexadecimal integer literal
 956                                 current++;
 957                                 while (current < end && current[0].isxdigit ()) {
 958                                         current++;
 959                                 }
 960                         } else if (current < end && is_ident_char (current[0])) {
 961                                 // allow identifiers to start with a digit
 962                                 // as long as they contain at least one char
 963                                 while (current < end && is_ident_char (current[0])) {
 964                                         current++;
 965                                 }
 966                                 type = TokenType.IDENTIFIER;
 967                         }
 968                 } else {
 969                         switch (current[0]) {
 970                         case '{':
 971                                 type = TokenType.OPEN_BRACE;
 972                                 open_brace_count++;
 973                                 state_stack += State.BRACE;
 974                                 current++;
 975                                 break;
 976                         case '}':
 977                                 type = TokenType.CLOSE_BRACE;
 978                                 open_brace_count--;
 979                                 if (state_stack.length > 0) {
 980                                         state_stack.length--;
 981                                 }
 982                                 current++;
 983                                 break;
 984                         case '(':
 985                                 type = TokenType.OPEN_PARENS;
 986                                 open_parens_count++;
 987                                 state_stack += State.PARENS;
 988                                 current++;
 989                                 break;
 990                         case ')':
 991                                 type = TokenType.CLOSE_PARENS;
 992                                 open_parens_count--;
 993                                 current++;
 994                                 if (state_stack.length > 0) {
 995                                         state_stack.length--;
 996                                 }
 997                                 if (in_template ()) {
 998                                         type = TokenType.COMMA;
 999                                 }
1000                                 break;
1001                         case '[':
1002                                 type = TokenType.OPEN_BRACKET;
1003                                 state_stack += State.BRACKET;
1004                                 current++;
1005                                 break;
1006                         case ']':
1007                                 type = TokenType.CLOSE_BRACKET;
1008                                 if (state_stack.length > 0) {
1009                                         state_stack.length--;
1010                                 }
1011                                 current++;
1012                                 break;
1013                         case '.':
1014                                 type = TokenType.DOT;
1015                                 current++;
1016                                 if (current < end - 1) {
1017                                         if (current[0] == '.' && current[1] == '.') {
1018                                                 type = TokenType.ELLIPSIS;
1019                                                 current += 2;
1020                                         }
1021                                 }
1022                                 break;
1023                         case ':':
1024                                 type = TokenType.COLON;
1025                                 current++;
1026                                 break;
1027                         case ',':
1028                                 type = TokenType.COMMA;
1029                                 current++;
1030                                 break;
1031                         case ';':
1032                                 type = TokenType.SEMICOLON;
1033                                 current++;
1034                                 break;
1035                         case '#':
1036                                 type = TokenType.HASH;
1037                                 current++;
1038                                 break;
1039                         case '?':
1040                                 type = TokenType.INTERR;
1041                                 current++;
1042                                 break;
1043                         case '|':
1044                                 type = TokenType.BITWISE_OR;
1045                                 current++;
1046                                 if (current < end) {
1047                                         switch (current[0]) {
1048                                         case '=':
1049                                                 type = TokenType.ASSIGN_BITWISE_OR;
1050                                                 current++;
1051                                                 break;
1052                                         case '|':
1053                                                 type = TokenType.OP_OR;
1054                                                 current++;
1055                                                 break;
1056                                         }
1057                                 }
1058                                 break;
1059                         case '&':
1060                                 type = TokenType.BITWISE_AND;
1061                                 current++;
1062                                 if (current < end) {
1063                                         switch (current[0]) {
1064                                         case '=':
1065                                                 type = TokenType.ASSIGN_BITWISE_AND;
1066                                                 current++;
1067                                                 break;
1068                                         case '&':
1069                                                 type = TokenType.OP_AND;
1070                                                 current++;
1071                                                 break;
1072                                         }
1073                                 }
1074                                 break;
1075                         case '^':
1076                                 type = TokenType.CARRET;
1077                                 current++;
1078                                 if (current < end && current[0] == '=') {
1079                                         type = TokenType.ASSIGN_BITWISE_XOR;
1080                                         current++;
1081                                 }
1082                                 break;
1083                         case '~':
1084                                 type = TokenType.TILDE;
1085                                 current++;
1086                                 break;
1087                         case '=':
1088                                 type = TokenType.ASSIGN;
1089                                 current++;
1090                                 if (current < end) {
1091                                         switch (current[0]) {
1092                                         case '=':
1093                                                 type = TokenType.OP_EQ;
1094                                                 current++;
1095                                                 break;
1096                                         case '>':
1097                                                 type = TokenType.LAMBDA;
1098                                                 current++;
1099                                                 break;
1100                                         }
1101                                 }
1102                                 break;
1103                         case '<':
1104                                 type = TokenType.OP_LT;
1105                                 current++;
1106                                 if (current < end) {
1107                                         switch (current[0]) {
1108                                         case '=':
1109                                                 type = TokenType.OP_LE;
1110                                                 current++;
1111                                                 break;
1112                                         case '<':
1113                                                 type = TokenType.OP_SHIFT_LEFT;
1114                                                 current++;
1115                                                 if (current < end && current[0] == '=') {
1116                                                         type = TokenType.ASSIGN_SHIFT_LEFT;
1117                                                         current++;
1118                                                 }
1119                                                 break;
1120                                         }
1121                                 }
1122                                 break;
1123                         case '>':
1124                                 type = TokenType.OP_GT;
1125                                 current++;
1126                                 if (current < end && current[0] == '=') {
1127                                         type = TokenType.OP_GE;
1128                                         current++;
1129                                 }
1130                                 break;
1131                         case '!':
1132                                 type = TokenType.OP_NEG;
1133                                 current++;
1134                                 if (current < end && current[0] == '=') {
1135                                         type = TokenType.OP_NE;
1136                                         current++;
1137                                 }
1138                                 break;
1139                         case '+':
1140                                 type = TokenType.PLUS;
1141                                 current++;
1142                                 if (current < end) {
1143                                         switch (current[0]) {
1144                                         case '=':
1145                                                 type = TokenType.ASSIGN_ADD;
1146                                                 current++;
1147                                                 break;
1148                                         case '+':
1149                                                 type = TokenType.OP_INC;
1150                                                 current++;
1151                                                 break;
1152                                         }
1153                                 }
1154                                 break;
1155                         case '-':
1156                                 type = TokenType.MINUS;
1157                                 current++;
1158                                 if (current < end) {
1159                                         switch (current[0]) {
1160                                         case '=':
1161                                                 type = TokenType.ASSIGN_SUB;
1162                                                 current++;
1163                                                 break;
1164                                         case '-':
1165                                                 type = TokenType.OP_DEC;
1166                                                 current++;
1167                                                 break;
1168                                         case '>':
1169                                                 type = TokenType.OP_PTR;
1170                                                 current++;
1171                                                 break;
1172                                         }
1173                                 }
1174                                 break;
1175                         case '*':
1176                                 type = TokenType.STAR;
1177                                 current++;
1178                                 if (current < end && current[0] == '=') {
1179                                         type = TokenType.ASSIGN_MUL;
1180                                         current++;
1181                                 }
1182                                 break;
1183                         case '/':
1184                                 switch (last_token) {
1185                                 case TokenType.ASSIGN:
1186                                 case TokenType.COMMA:
1187                                 case TokenType.MINUS:
1188                                 case TokenType.OP_AND:
1189                                 case TokenType.OP_EQ:
1190                                 case TokenType.OP_GE:
1191                                 case TokenType.OP_GT:
1192                                 case TokenType.OP_INC:
1193                                 case TokenType.OP_LE:
1194                                 case TokenType.OP_LT:
1195                                 case TokenType.OP_NE:
1196                                 case TokenType.OP_NEG:
1197                                 case TokenType.OP_OR:
1198                                 case TokenType.OPEN_BRACE:
1199                                 case TokenType.OPEN_PARENS:
1200                                 case TokenType.PLUS:
1201                                 case TokenType.RETURN:
1202                                         type = TokenType.OPEN_REGEX_LITERAL;
1203                                         state_stack += State.REGEX_LITERAL;
1204                                         current++;
1205                                         break;
1206                                 default:
1207                                         type = TokenType.DIV;
1208                                         current++;
1209                                         if (current < end && current[0] == '=') {
1210                                                 type = TokenType.ASSIGN_DIV;
1211                                                 current++;
1212                                         }
1213                                         break;
1214                                 }
1215                                 break;
1216
1217                         case '%':
1218                                 type = TokenType.PERCENT;
1219                                 current++;
1220                                 if (current < end && current[0] == '=') {
1221                                         type = TokenType.ASSIGN_PERCENT;
1222                                         current++;
1223                                 }
1224                                 break;
1225                         case '\'':
1226                         case '"':
1227                                 if (begin[0] == '\'') {
1228                                         type = TokenType.CHARACTER_LITERAL;
1229                                 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1230                                         type = TokenType.VERBATIM_STRING_LITERAL;
1231                                         token_length_in_chars = 6;
1232                                         current += 3;
1233                                         while (current < end - 4) {
1234                                                 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1235                                                         break;
1236                                                 } else if (current[0] == '\n') {
1237                                                         current++;
1238                                                         line++;
1239                                                         column = 1;
1240                                                         token_length_in_chars = 3;
1241                                                 } else {
1242                                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
1243                                                         if (u != (unichar) (-1)) {
1244                                                                 current += u.to_utf8 (null);
1245                                                                 token_length_in_chars++;
1246                                                         } else {
1247                                                                 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1248                                                         }
1249                                                 }
1250                                         }
1251                                         if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1252                                                 current += 3;
1253                                         } else {
1254                                                 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1255                                         }
1256                                         break;
1257                                 } else {
1258                                         type = TokenType.STRING_LITERAL;
1259                                 }
1260                                 token_length_in_chars = 2;
1261                                 current++;
1262                                 while (current < end && current[0] != begin[0]) {
1263                                         if (current[0] == '\\') {
1264                                                 current++;
1265                                                 token_length_in_chars++;
1266                                                 if (current >= end) {
1267                                                         break;
1268                                                 }
1269
1270                                                 switch (current[0]) {
1271                                                 case '\'':
1272                                                 case '"':
1273                                                 case '\\':
1274                                                 case '0':
1275                                                 case 'b':
1276                                                 case 'f':
1277                                                 case 'n':
1278                                                 case 'r':
1279                                                 case 't':
1280                                                         current++;
1281                                                         token_length_in_chars++;
1282                                                         break;
1283                                                 case 'x':
1284                                                         // hexadecimal escape character
1285                                                         current++;
1286                                                         token_length_in_chars++;
1287                                                         while (current < end && current[0].isxdigit ()) {
1288                                                                 current++;
1289                                                                 token_length_in_chars++;
1290                                                         }
1291                                                         break;
1292                                                 default:
1293                                                         Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1294                                                         break;
1295                                                 }
1296                                         } else if (current[0] == '\n') {
1297                                                 current++;
1298                                                 line++;
1299                                                 column = 1;
1300                                                 token_length_in_chars = 1;
1301                                         } else {
1302                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
1303                                                 if (u != (unichar) (-1)) {
1304                                                         current += u.to_utf8 (null);
1305                                                         token_length_in_chars++;
1306                                                 } else {
1307                                                         current++;
1308                                                         Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1309                                                 }
1310                                         }
1311                                         if (current < end && begin[0] == '\'' && current[0] != '\'') {
1312                                                 // multiple characters in single character literal
1313                                                 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1314                                         }
1315                                 }
1316                                 if (current < end) {
1317                                         current++;
1318                                 } else {
1319                                         Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1320                                 }
1321                                 break;
1322                         default:
1323                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
1324                                 if (u != (unichar) (-1)) {
1325                                         current += u.to_utf8 (null);
1326                                         Report.error (get_source_reference (0), "syntax error, unexpected character");
1327                                 } else {
1328                                         current++;
1329                                         Report.error (get_source_reference (0), "invalid UTF-8 character");
1330                                 }
1331                                 column++;
1332                                 return read_token (out token_begin, out token_end);
1333                         }
1334                 }
1335
1336                 if (token_length_in_chars < 0) {
1337                         column += (int) (current - begin);
1338                 } else {
1339                         column += token_length_in_chars;
1340                 }
1341
1342                 token_end = SourceLocation (current, line, column - 1);
1343                 last_token = type;
1344
1345                 return type;
1346         }
1347
1348         int count_tabs ()
1349         {
1350
1351                 int tab_count = 0;
1352
1353
1354                 if (_indent_spaces == 0) {
1355                         while (current < end && current[0] == '\t') {
1356                                 current++;
1357                                 column++;
1358                                 tab_count++;
1359                         }
1360                 } else {
1361                         int space_count = 0;
1362                         while (current < end && current[0] == ' ') {
1363                                 current++;
1364                                 column++;
1365                                 space_count++;
1366                         }
1367
1368                         tab_count = space_count / _indent_spaces;
1369
1370                 }
1371
1372                 /* ignore comments and whitspace and other lines that contain no code */
1373
1374                 space ();
1375
1376                 if ((current < end) && (current[0] == '\n')) return -1;
1377
1378                 return tab_count;
1379         }
1380
1381         static bool matches (char* begin, string keyword) {
1382                 char* keyword_array = (char *) keyword;
1383                 long len = keyword.length;
1384                 for (int i = 0; i < len; i++) {
1385                         if (begin[i] != keyword_array[i]) {
1386                                 return false;
1387                         }
1388                 }
1389                 return true;
1390         }
1391
1392         bool whitespace () {
1393                 bool found = false;
1394                 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1395
1396                         found = true;
1397                         current++;
1398                         column++;
1399                 }
1400
1401                 if ((column == 1) && (current < end) && (current[0] == '#')) {
1402                         pp_directive ();
1403                         return true;
1404                 }
1405
1406                 return found;
1407         }
1408
1409         inline bool newline () {
1410                 if (current[0] == '\n') {
1411                         return true;
1412                 }
1413
1414                 return false;
1415         }
1416
1417         bool skip_newlines () {
1418                 bool new_lines = false;
1419
1420                 while (newline ()) {
1421                         current++;
1422
1423                         line++;
1424                         column = 1;
1425                         current_indent_level = 0;
1426
1427                         new_lines = true;
1428                 }
1429
1430                 return new_lines;
1431         }
1432
1433         bool comment (bool file_comment = false) {
1434                 if (current == null
1435                         || current > end - 2
1436                         || current[0] != '/'
1437                         || (current[1] != '/' && current[1] != '*')) {
1438                         return false;
1439                 }
1440
1441
1442                 if (current[1] == '/') {
1443                         // single-line comment
1444
1445                         SourceReference source_reference = null;
1446                         if (file_comment) {
1447                                 source_reference = get_source_reference (0);
1448                         }
1449
1450                         current += 2;
1451
1452                         // skip until end of line or end of file
1453                         while (current < end && current[0] != '\n') {
1454                                 current++;
1455                         }
1456
1457                         /* do not ignore EOL if comment does not exclusively occupy the line */
1458                         if (current[0] == '\n' && last_token == TokenType.EOL) {
1459                                 current++;
1460                                 line++;
1461                                 column = 1;
1462                                 current_indent_level = 0;
1463                         }
1464
1465                         if (source_reference != null) {
1466                                 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1467                         }
1468
1469                 } else {
1470                         // delimited comment
1471                         SourceReference source_reference = null;
1472                         if (file_comment && current[2] == '*') {
1473                                 return false;
1474                         }
1475
1476                         if (current[2] == '*' || file_comment) {
1477                                 source_reference = get_source_reference (0);
1478                         }
1479
1480                         current += 2;
1481                         char* begin = current;
1482
1483                         while (current < end - 1
1484                                    && (current[0] != '*' || current[1] != '/')) {
1485                                 if (current[0] == '\n') {
1486                                         line++;
1487                                         column = 0;
1488                                 }
1489                                 current++;
1490                                 column++;
1491                         }
1492                         if (current == end - 1) {
1493                                 Report.error (get_source_reference (0), "syntax error, expected */");
1494                                 return true;
1495                         }
1496
1497                         if (source_reference != null) {
1498                                 string comment = ((string) begin).substring (0, (long) (current - begin));
1499                                 push_comment (comment, source_reference, file_comment);
1500                         }
1501
1502                         current += 2;
1503                         column += 2;
1504                 }
1505
1506                 return true;
1507         }
1508
1509         bool skip_tabs () {
1510                 bool found = false;
1511                 while (current < end && current[0] == '\t' ) {
1512                         current++;
1513                         column++;
1514                         found = true;
1515                 }
1516
1517                 return found;
1518         }
1519
1520         void skip_space_tabs () {
1521                 while (whitespace () || skip_tabs () || comment () ) {
1522                 }
1523
1524         }
1525
1526         void space () {
1527                 while (whitespace () || comment ()) {
1528                 }
1529         }
1530
1531         public void parse_file_comments () {
1532                 while (whitespace () || comment (true)) {
1533                 }
1534
1535         }
1536
1537         void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1538                 if (comment_item[0] == '*') {
1539                         if (_comment != null) {
1540                                 // extra doc comment, add it to source file comments
1541                                 source_file.add_comment (_comment);
1542                         }
1543                         _comment = new Comment (comment_item, source_reference);
1544                 }
1545
1546                 if (file_comment) {
1547                         source_file.add_comment (new Comment (comment_item, source_reference));
1548                         _comment = null;
1549                 }
1550         }
1551
1552         /**
1553          * Clears and returns the content of the comment stack.
1554          *
1555          * @return saved comment
1556          */
1557         public Comment? pop_comment () {
1558                 if (_comment == null) {
1559                         return null;
1560                 }
1561
1562                 var comment = _comment;
1563                 _comment = null;
1564                 return comment;
1565         }
1566
1567         bool pp_whitespace () {
1568                 bool found = false;
1569                 while (current < end && current[0].isspace () && current[0] != '\n') {
1570                         found = true;
1571                         current++;
1572                         column++;
1573                 }
1574                 return found;
1575         }
1576
1577         void pp_space () {
1578                 while (pp_whitespace () || comment ()) {
1579                 }
1580         }
1581
1582         void pp_directive () {
1583                 // hash sign
1584                 current++;
1585                 column++;
1586
1587                 pp_space ();
1588
1589                 char* begin = current;
1590                 int len = 0;
1591                 while (current < end && current[0].isalnum ()) {
1592                         current++;
1593                         column++;
1594                         len++;
1595                 }
1596
1597                 if (len == 2 && matches (begin, "if")) {
1598                         parse_pp_if ();
1599                 } else if (len == 4 && matches (begin, "elif")) {
1600                         parse_pp_elif ();
1601                 } else if (len == 4 && matches (begin, "else")) {
1602                         parse_pp_else ();
1603                 } else if (len == 5 && matches (begin, "endif")) {
1604                         parse_pp_endif ();
1605                 } else {
1606                         Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1607                 }
1608
1609                 if (conditional_stack.length > 0
1610                         && conditional_stack[conditional_stack.length - 1].skip_section) {
1611                         // skip lines until next preprocessing directive
1612                         bool bol = false;
1613                         while (current < end) {
1614                                 if (bol && current < end && current[0] == '#') {
1615                                         // go back to begin of line
1616                                         current -= (column - 1);
1617                                         column = 1;
1618                                         return;
1619                                 }
1620                                 if (current[0] == '\n') {
1621                                         line++;
1622                                         column = 0;
1623                                         bol = true;
1624                                 } else if (!current[0].isspace ()) {
1625                                         bol = false;
1626                                 }
1627                                 current++;
1628                                 column++;
1629                         }
1630                 }
1631         }
1632
1633         void pp_eol () {
1634                 pp_space ();
1635                 if (current >= end || current[0] != '\n') {
1636                         Report.error (get_source_reference (0), "syntax error, expected newline");
1637                 }
1638         }
1639
1640         void parse_pp_if () {
1641                 pp_space ();
1642
1643                 bool condition = parse_pp_expression ();
1644
1645                 pp_eol ();
1646
1647                 conditional_stack += Conditional ();
1648
1649                 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1650                         // condition true => process code within if
1651                         conditional_stack[conditional_stack.length - 1].matched = true;
1652                 } else {
1653                         // skip lines until next preprocessing directive
1654                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1655                 }
1656         }
1657
1658         void parse_pp_elif () {
1659                 pp_space ();
1660
1661                 bool condition = parse_pp_expression ();
1662
1663                 pp_eol ();
1664
1665                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1666                         Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1667                         return;
1668                 }
1669
1670                 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1671                         && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1672                         // condition true => process code within if
1673                         conditional_stack[conditional_stack.length - 1].matched = true;
1674                         conditional_stack[conditional_stack.length - 1].skip_section = false;
1675                 } else {
1676                         // skip lines until next preprocessing directive
1677                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1678                 }
1679         }
1680
1681         void parse_pp_else () {
1682                 pp_eol ();
1683
1684                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1685                         Report.error (get_source_reference (0), "syntax error, unexpected #else");
1686                         return;
1687                 }
1688
1689                 if (!conditional_stack[conditional_stack.length - 1].matched
1690                         && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1691                         // condition true => process code within if
1692                         conditional_stack[conditional_stack.length - 1].matched = true;
1693                         conditional_stack[conditional_stack.length - 1].skip_section = false;
1694                 } else {
1695                         // skip lines until next preprocessing directive
1696                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1697                 }
1698         }
1699
1700         void parse_pp_endif () {
1701                 pp_eol ();
1702
1703                 if (conditional_stack.length == 0) {
1704                         Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1705                         return;
1706                 }
1707
1708                 conditional_stack.length--;
1709         }
1710
1711         bool parse_pp_symbol () {
1712                 int len = 0;
1713                 while (current < end && is_ident_char (current[0])) {
1714                         current++;
1715                         column++;
1716                         len++;
1717                 }
1718
1719                 if (len == 0) {
1720                         Report.error (get_source_reference (0), "syntax error, expected identifier");
1721                         return false;
1722                 }
1723
1724                 string identifier = ((string) (current - len)).substring (0, len);
1725                 bool defined;
1726                 if (identifier == "true") {
1727                         defined = true;
1728                 } else if (identifier == "false") {
1729                         defined = false;
1730                 } else {
1731                         defined = source_file.context.is_defined (identifier);
1732                 }
1733
1734                 return defined;
1735         }
1736
1737         bool parse_pp_primary_expression () {
1738                 if (current >= end) {
1739                         Report.error (get_source_reference (0), "syntax error, expected identifier");
1740                 } else if (is_ident_char (current[0])) {
1741                         return parse_pp_symbol ();
1742                 } else if (current[0] == '(') {
1743                         current++;
1744                         column++;
1745                         pp_space ();
1746                         bool result = parse_pp_expression ();
1747                         pp_space ();
1748                         if (current < end && current[0] ==  ')') {
1749                                 current++;
1750                                 column++;
1751                         } else {
1752                                 Report.error (get_source_reference (0), "syntax error, expected `)'");
1753                         }
1754                         return result;
1755                 } else {
1756                         Report.error (get_source_reference (0), "syntax error, expected identifier");
1757                 }
1758                 return false;
1759         }
1760
1761         bool parse_pp_unary_expression () {
1762                 if (current < end && current[0] == '!') {
1763                         current++;
1764                         column++;
1765                         pp_space ();
1766                         return !parse_pp_unary_expression ();
1767                 }
1768
1769                 return parse_pp_primary_expression ();
1770         }
1771
1772         bool parse_pp_equality_expression () {
1773                 bool left = parse_pp_unary_expression ();
1774                 pp_space ();
1775                 while (true) {
1776                         if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1777                                 current += 2;
1778                                 column += 2;
1779                                 pp_space ();
1780                                 bool right = parse_pp_unary_expression ();
1781                                 left = (left == right);
1782                         } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1783                                 current += 2;
1784                                 column += 2;
1785                                 pp_space ();
1786                                 bool right = parse_pp_unary_expression ();
1787                                 left = (left != right);
1788                         } else {
1789                                 break;
1790                         }
1791                 }
1792                 return left;
1793         }
1794
1795         bool parse_pp_and_expression () {
1796                 bool left = parse_pp_equality_expression ();
1797                 pp_space ();
1798                 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1799                         current += 2;
1800                         column += 2;
1801                         pp_space ();
1802                         bool right = parse_pp_equality_expression ();
1803                         left = left && right;
1804                 }
1805                 return left;
1806         }
1807
1808         bool parse_pp_or_expression () {
1809                 bool left = parse_pp_and_expression ();
1810                 pp_space ();
1811                 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1812                         current += 2;
1813                         column += 2;
1814                         pp_space ();
1815                         bool right = parse_pp_and_expression ();
1816                         left = left || right;
1817                 }
1818                 return left;
1819         }
1820
1821         bool parse_pp_expression () {
1822                 return parse_pp_or_expression ();
1823         }
1824 }
1825