scintilla/LexRuby.cxx

   1 // Scintilla source code edit control
   2 /** @file LexRuby.cxx
   3  ** Lexer for Ruby.
   4  **/
   5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <ctype.h>
  11 #include <stdio.h>
  12 #include <stdarg.h>
  13
  14 #include "Platform.h"
  15
  16 #include "PropSet.h"
  17 #include "Accessor.h"
  18 #include "KeyWords.h"
  19 #include "Scintilla.h"
  20 #include "SciLexer.h"
  21
  22 #ifdef SCI_NAMESPACE
  23 using namespace Scintilla;
  24 #endif
  25
  26 //XXX Identical to Perl, put in common area
  27 static inline bool isEOLChar(char ch) {
  28         return (ch == '\r') || (ch == '\n');
  29 }
  30
  31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
  32 // This one's redundant, but makes for more readable code
  33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
  34
  35 static inline bool isSafeAlpha(char ch) {
  36     return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
  37 }
  38
  39 static inline bool isSafeAlnum(char ch) {
  40     return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
  41 }
  42
  43 static inline bool isSafeAlnumOrHigh(char ch) {
  44     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
  45 }
  46
  47 static inline bool isSafeDigit(char ch) {
  48     return isSafeASCII(ch) && isdigit(ch);
  49 }
  50
  51 static inline bool isSafeWordcharOrHigh(char ch) {
  52     // Error: scintilla's KeyWords.h includes '.' as a word-char
  53     // we want to separate things that can take methods from the
  54     // methods.
  55     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
  56 }
  57
  58 static bool inline iswhitespace(char ch) {
  59         return ch == ' ' || ch == '\t';
  60 }
  61
  62 #define MAX_KEYWORD_LENGTH 200
  63
  64 #define STYLE_MASK 63
  65 #define actual_style(style) (style & STYLE_MASK)
  66
  67 static bool followsDot(unsigned int pos, Accessor &styler) {
  68     styler.Flush();
  69     for (; pos >= 1; --pos) {
  70         int style = actual_style(styler.StyleAt(pos));
  71         char ch;
  72         switch (style) {
  73             case SCE_RB_DEFAULT:
  74                 ch = styler[pos];
  75                 if (ch == ' ' || ch == '\t') {
  76                     //continue
  77                 } else {
  78                     return false;
  79                 }
  80                 break;
  81
  82             case SCE_RB_OPERATOR:
  83                 return styler[pos] == '.';
  84
  85             default:
  86                 return false;
  87         }
  88     }
  89     return false;
  90 }
  91
  92 // Forward declarations
  93 static bool keywordIsAmbiguous(const char *prevWord);
  94 static bool keywordDoStartsLoop(int pos,
  95                                 Accessor &styler);
  96 static bool keywordIsModifier(const char *word,
  97                               int pos,
  98                               Accessor &styler);
  99
 100 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
 101         char s[MAX_KEYWORD_LENGTH];
 102     unsigned int i, j;
 103         unsigned int lim = end - start + 1; // num chars to copy
 104         if (lim >= MAX_KEYWORD_LENGTH) {
 105                 lim = MAX_KEYWORD_LENGTH - 1;
 106         }
 107         for (i = start, j = 0; j < lim; i++, j++) {
 108                 s[j] = styler[i];
 109         }
 110     s[j] = '\0';
 111         int chAttr;
 112         if (0 == strcmp(prevWord, "class"))
 113                 chAttr = SCE_RB_CLASSNAME;
 114         else if (0 == strcmp(prevWord, "module"))
 115                 chAttr = SCE_RB_MODULE_NAME;
 116         else if (0 == strcmp(prevWord, "def"))
 117                 chAttr = SCE_RB_DEFNAME;
 118     else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
 119         if (keywordIsAmbiguous(s)
 120             && keywordIsModifier(s, start, styler)) {
 121
 122             // Demoted keywords are colored as keywords,
 123             // but do not affect changes in indentation.
 124             //
 125             // Consider the word 'if':
 126             // 1. <<if test ...>> : normal
 127             // 2. <<stmt if test>> : demoted
 128             // 3. <<lhs = if ...>> : normal: start a new indent level
 129             // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
 130
 131             chAttr = SCE_RB_WORD_DEMOTED;
 132         } else {
 133             chAttr = SCE_RB_WORD;
 134         }
 135         } else
 136         chAttr = SCE_RB_IDENTIFIER;
 137         styler.ColourTo(end, chAttr);
 138         if (chAttr == SCE_RB_WORD) {
 139                 strcpy(prevWord, s);
 140         } else {
 141                 prevWord[0] = 0;
 142         }
 143     return chAttr;
 144 }
 145
 146
 147 //XXX Identical to Perl, put in common area
 148 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
 149         if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
 150                 return false;
 151         }
 152         while (*val) {
 153                 if (*val != styler[pos++]) {
 154                         return false;
 155                 }
 156                 val++;
 157         }
 158         return true;
 159 }
 160
 161 // Do Ruby better -- find the end of the line, work back,
 162 // and then check for leading white space
 163
 164 // Precondition: the here-doc target can be indented
 165 static bool lookingAtHereDocDelim(Accessor         &styler,
 166                                   int                   pos,
 167                                   int                   lengthDoc,
 168                                   const char   *HereDocDelim)
 169 {
 170     if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
 171         return false;
 172     }
 173     while (--pos > 0) {
 174         char ch = styler[pos];
 175         if (isEOLChar(ch)) {
 176             return true;
 177         } else if (ch != ' ' && ch != '\t') {
 178             return false;
 179         }
 180     }
 181     return false;
 182 }
 183
 184 //XXX Identical to Perl, put in common area
 185 static char opposite(char ch) {
 186         if (ch == '(')
 187                 return ')';
 188         if (ch == '[')
 189                 return ']';
 190         if (ch == '{')
 191                 return '}';
 192         if (ch == '<')
 193                 return '>';
 194         return ch;
 195 }
 196
 197 // Null transitions when we see we've reached the end
 198 // and need to relex the curr char.
 199
 200 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
 201                       int &state) {
 202     i--;
 203     chNext2 = chNext;
 204     chNext = ch;
 205     state = SCE_RB_DEFAULT;
 206 }
 207
 208 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
 209     i++;
 210     ch = chNext;
 211     chNext = chNext2;
 212 }
 213
 214 // precondition: startPos points to one after the EOL char
 215 static bool currLineContainsHereDelims(int& startPos,
 216                                        Accessor &styler) {
 217     if (startPos <= 1)
 218         return false;
 219
 220     int pos;
 221     for (pos = startPos - 1; pos > 0; pos--) {
 222         char ch = styler.SafeGetCharAt(pos);
 223         if (isEOLChar(ch)) {
 224             // Leave the pointers where they are -- there are no
 225             // here doc delims on the current line, even if
 226             // the EOL isn't default style
 227
 228             return false;
 229         } else {
 230             styler.Flush();
 231             if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
 232                 break;
 233             }
 234         }
 235     }
 236     if (pos == 0) {
 237         return false;
 238     }
 239     // Update the pointers so we don't have to re-analyze the string
 240     startPos = pos;
 241     return true;
 242 }
 243
 244 // This class is used by the enter and exit methods, so it needs
 245 // to be hoisted out of the function.
 246
 247 class QuoteCls {
 248     public:
 249     int  Count;
 250     char Up;
 251     char Down;
 252     QuoteCls() {
 253         this->New();
 254     }
 255     void New() {
 256         Count = 0;
 257         Up    = '\0';
 258         Down  = '\0';
 259     }
 260     void Open(char u) {
 261         Count++;
 262         Up    = u;
 263         Down  = opposite(Up);
 264     }
 265     QuoteCls(const QuoteCls& q) {
 266         // copy constructor -- use this for copying in
 267         Count = q.Count;
 268         Up    = q.Up;
 269         Down  = q.Down;
 270     }
 271     QuoteCls& operator=(const QuoteCls& q) { // assignment constructor
 272         if (this != &q) {
 273             Count = q.Count;
 274             Up    = q.Up;
 275             Down  = q.Down;
 276         }
 277                 return *this;
 278     }
 279
 280 };
 281
 282
 283 static void enterInnerExpression(int  *p_inner_string_types,
 284                                  int  *p_inner_expn_brace_counts,
 285                                  QuoteCls *p_inner_quotes,
 286                                  int&  inner_string_count,
 287                                  int&  state,
 288                                  int&  brace_counts,
 289                                  QuoteCls curr_quote
 290                                  ) {
 291     p_inner_string_types[inner_string_count] = state;
 292     state = SCE_RB_DEFAULT;
 293     p_inner_expn_brace_counts[inner_string_count] = brace_counts;
 294     brace_counts = 0;
 295     p_inner_quotes[inner_string_count] = curr_quote;
 296     ++inner_string_count;
 297 }
 298
 299 static void exitInnerExpression(int *p_inner_string_types,
 300                                  int *p_inner_expn_brace_counts,
 301                                  QuoteCls *p_inner_quotes,
 302                                  int& inner_string_count,
 303                                  int& state,
 304                                  int&  brace_counts,
 305                                  QuoteCls& curr_quote
 306                                 ) {
 307     --inner_string_count;
 308     state = p_inner_string_types[inner_string_count];
 309     brace_counts = p_inner_expn_brace_counts[inner_string_count];
 310     curr_quote = p_inner_quotes[inner_string_count];
 311 }
 312
 313 static bool isEmptyLine(int pos,
 314                         Accessor &styler) {
 315         int spaceFlags = 0;
 316         int lineCurrent = styler.GetLine(pos);
 317         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 318     return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
 319 }
 320
 321 static bool RE_CanFollowKeyword(const char *keyword) {
 322     if (!strcmp(keyword, "and")
 323         || !strcmp(keyword, "begin")
 324         || !strcmp(keyword, "break")
 325         || !strcmp(keyword, "case")
 326         || !strcmp(keyword, "do")
 327         || !strcmp(keyword, "else")
 328         || !strcmp(keyword, "elsif")
 329         || !strcmp(keyword, "if")
 330         || !strcmp(keyword, "next")
 331         || !strcmp(keyword, "return")
 332         || !strcmp(keyword, "when")
 333         || !strcmp(keyword, "unless")
 334         || !strcmp(keyword, "until")
 335         || !strcmp(keyword, "not")
 336         || !strcmp(keyword, "or")) {
 337         return true;
 338     }
 339     return false;
 340 }
 341
 342 // Look at chars up to but not including endPos
 343 // Don't look at styles in case we're looking forward
 344
 345 static int skipWhitespace(int startPos,
 346                            int endPos,
 347                            Accessor &styler) {
 348     for (int i = startPos; i < endPos; i++) {
 349         if (!iswhitespace(styler[i])) {
 350             return i;
 351         }
 352     }
 353     return endPos;
 354 }
 355
 356 // This routine looks for false positives like
 357 // undef foo, <<
 358 // There aren't too many.
 359 //
 360 // iPrev points to the start of <<
 361
 362 static bool sureThisIsHeredoc(int iPrev,
 363                               Accessor &styler,
 364                               char *prevWord) {
 365
 366     // Not so fast, since Ruby's so dynamic.  Check the context
 367     // to make sure we're OK.
 368     int prevStyle;
 369     int lineStart = styler.GetLine(iPrev);
 370     int lineStartPosn = styler.LineStart(lineStart);
 371     styler.Flush();
 372
 373     // Find the first word after some whitespace
 374     int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
 375     if (firstWordPosn >= iPrev) {
 376         // Have something like {^     <<}
 377                 //XXX Look at the first previous non-comment non-white line
 378                 // to establish the context.  Not too likely though.
 379         return true;
 380     } else {
 381         switch (prevStyle = styler.StyleAt(firstWordPosn)) {
 382         case SCE_RB_WORD:
 383         case SCE_RB_WORD_DEMOTED:
 384         case SCE_RB_IDENTIFIER:
 385             break;
 386         default:
 387             return true;
 388         }
 389     }
 390     int firstWordEndPosn = firstWordPosn;
 391     char *dst = prevWord;
 392     for (;;) {
 393         if (firstWordEndPosn >= iPrev ||
 394             styler.StyleAt(firstWordEndPosn) != prevStyle) {
 395             *dst = 0;
 396             break;
 397         }
 398         *dst++ = styler[firstWordEndPosn];
 399         firstWordEndPosn += 1;
 400     }
 401     //XXX Write a style-aware thing to regex scintilla buffer objects
 402     if (!strcmp(prevWord, "undef")
 403         || !strcmp(prevWord, "def")
 404         || !strcmp(prevWord, "alias")) {
 405         // These keywords are what we were looking for
 406         return false;
 407     }
 408     return true;
 409 }
 410
 411 // Routine that saves us from allocating a buffer for the here-doc target
 412 // targetEndPos points one past the end of the current target
 413 static bool haveTargetMatch(int currPos,
 414                             int lengthDoc,
 415                             int targetStartPos,
 416                             int targetEndPos,
 417                             Accessor &styler) {
 418     if (lengthDoc - currPos < targetEndPos - targetStartPos) {
 419         return false;
 420     }
 421     int i, j;
 422     for (i = targetStartPos, j = currPos;
 423          i < targetEndPos && j < lengthDoc;
 424          i++, j++) {
 425         if (styler[i] != styler[j]) {
 426             return false;
 427         }
 428     }
 429     return true;
 430 }
 431
 432 // We need a check because the form
 433 // [identifier] <<[target]
 434 // is ambiguous.  The Ruby lexer/parser resolves it by
 435 // looking to see if [identifier] names a variable or a
 436 // function.  If it's the first, it's the start of a here-doc.
 437 // If it's a var, it's an operator.  This lexer doesn't
 438 // maintain a symbol table, so it looks ahead to see what's
 439 // going on, in cases where we have
 440 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
 441 //
 442 // If there's no occurrence of [target] on a line, assume we don't.
 443
 444 // return true == yes, we have no heredocs
 445
 446 static bool sureThisIsNotHeredoc(int lt2StartPos,
 447                                  Accessor &styler) {
 448     int prevStyle;
 449      // Use full document, not just part we're styling
 450     int lengthDoc = styler.Length();
 451     int lineStart = styler.GetLine(lt2StartPos);
 452     int lineStartPosn = styler.LineStart(lineStart);
 453     styler.Flush();
 454     const bool definitely_not_a_here_doc = true;
 455     const bool looks_like_a_here_doc = false;
 456
 457     // Find the first word after some whitespace
 458     int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
 459     if (firstWordPosn >= lt2StartPos) {
 460         return definitely_not_a_here_doc;
 461     }
 462     prevStyle = styler.StyleAt(firstWordPosn);
 463     // If we have '<<' following a keyword, it's not a heredoc
 464     if (prevStyle != SCE_RB_IDENTIFIER) {
 465         return definitely_not_a_here_doc;
 466     }
 467     int newStyle = prevStyle;
 468     // Some compilers incorrectly warn about uninit newStyle
 469     for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 470         // Inner loop looks at the name
 471         for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 472             newStyle = styler.StyleAt(firstWordPosn);
 473             if (newStyle != prevStyle) {
 474                 break;
 475             }
 476         }
 477         // Do we have '::' or '.'?
 478         if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
 479             char ch = styler[firstWordPosn];
 480             if (ch == '.') {
 481                 // yes
 482             } else if (ch == ':') {
 483                 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
 484                     return definitely_not_a_here_doc;
 485                 } else if (styler[firstWordPosn] != ':') {
 486                     return definitely_not_a_here_doc;
 487                 }
 488             } else {
 489                 break;
 490             }
 491         } else {
 492             break;
 493         }
 494     }
 495     // Skip next batch of white-space
 496     firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
 497     if (firstWordPosn != lt2StartPos) {
 498         // Have [[^ws[identifier]ws[*something_else*]ws<<
 499         return definitely_not_a_here_doc;
 500     }
 501     // OK, now 'j' will point to the current spot moving ahead
 502         int j = firstWordPosn + 1;
 503     if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
 504         // This shouldn't happen
 505         return definitely_not_a_here_doc;
 506     }
 507     int nextLineStartPosn = styler.LineStart(lineStart + 1);
 508     if (nextLineStartPosn >= lengthDoc) {
 509         return definitely_not_a_here_doc;
 510     }
 511     j = skipWhitespace(j + 1, nextLineStartPosn, styler);
 512     if (j >= lengthDoc) {
 513         return definitely_not_a_here_doc;
 514     }
 515     bool allow_indent;
 516     int target_start, target_end;
 517     // From this point on no more styling, since we're looking ahead
 518     if (styler[j] == '-') {
 519         allow_indent = true;
 520         j++;
 521     } else {
 522         allow_indent = false;
 523     }
 524
 525     // Allow for quoted targets.
 526     char target_quote = 0;
 527     switch (styler[j]) {
 528     case '\'':
 529     case '"':
 530     case '`':
 531         target_quote = styler[j];
 532         j += 1;
 533     }
 534
 535     if (isSafeAlnum(styler[j])) {
 536         // Init target_end because some compilers think it won't
 537         // be initialized by the time it's used
 538         target_start = target_end = j;
 539         j++;
 540     } else {
 541         return definitely_not_a_here_doc;
 542     }
 543     for (; j < lengthDoc; j++) {
 544         if (!isSafeAlnum(styler[j])) {
 545             if (target_quote && styler[j] != target_quote) {
 546                 // unquoted end
 547                 return definitely_not_a_here_doc;
 548             }
 549
 550             // And for now make sure that it's a newline
 551             // don't handle arbitrary expressions yet
 552
 553             target_end = j;
 554                         if (target_quote) {
 555                                 // Now we can move to the character after the string delimiter.
 556                                 j += 1;
 557                         }
 558             j = skipWhitespace(j, lengthDoc, styler);
 559             if (j >= lengthDoc) {
 560                 return definitely_not_a_here_doc;
 561             } else {
 562                 char ch = styler[j];
 563                 if (ch == '#' || isEOLChar(ch)) {
 564                     // This is OK, so break and continue;
 565                     break;
 566                 } else {
 567                     return definitely_not_a_here_doc;
 568                 }
 569             }
 570         }
 571     }
 572
 573     // Just look at the start of each line
 574     int last_line = styler.GetLine(lengthDoc - 1);
 575     // But don't go too far
 576     if (last_line > lineStart + 50) {
 577         last_line = lineStart + 50;
 578     }
 579     for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
 580         if (allow_indent) {
 581             j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
 582         } else {
 583             j = styler.LineStart(line_num);
 584         }
 585         // target_end is one past the end
 586         if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
 587             // We got it
 588             return looks_like_a_here_doc;
 589         }
 590     }
 591     return definitely_not_a_here_doc;
 592 }
 593
 594 //todo: if we aren't looking at a stdio character,
 595 // move to the start of the first line that is not in a
 596 // multi-line construct
 597
 598 static void synchronizeDocStart(unsigned int& startPos,
 599                                 int &length,
 600                                 int &initStyle,
 601                                 Accessor &styler,
 602                                 bool skipWhiteSpace=false) {
 603
 604     styler.Flush();
 605     int style = actual_style(styler.StyleAt(startPos));
 606     switch (style) {
 607         case SCE_RB_STDIN:
 608         case SCE_RB_STDOUT:
 609         case SCE_RB_STDERR:
 610             // Don't do anything else with these.
 611             return;
 612     }
 613
 614     int pos = startPos;
 615     // Quick way to characterize each line
 616     int lineStart;
 617     for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
 618         // Now look at the style before the previous line's EOL
 619         pos = styler.LineStart(lineStart) - 1;
 620         if (pos <= 10) {
 621             lineStart = 0;
 622             break;
 623         }
 624         char ch = styler.SafeGetCharAt(pos);
 625         char chPrev = styler.SafeGetCharAt(pos - 1);
 626         if (ch == '\n' && chPrev == '\r') {
 627             pos--;
 628         }
 629         if (styler.SafeGetCharAt(pos - 1) == '\\') {
 630             // Continuation line -- keep going
 631         } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
 632             // Part of multi-line construct -- keep going
 633         } else if (currLineContainsHereDelims(pos, styler)) {
 634             // Keep going, with pos and length now pointing
 635             // at the end of the here-doc delimiter
 636         } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
 637             // Keep going
 638         } else {
 639             break;
 640         }
 641     }
 642     pos = styler.LineStart(lineStart);
 643     length += (startPos - pos);
 644     startPos = pos;
 645     initStyle = SCE_RB_DEFAULT;
 646 }
 647
 648 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
 649                                                    WordList *keywordlists[], Accessor &styler) {
 650
 651         // Lexer for Ruby often has to backtrack to start of current style to determine
 652         // which characters are being used as quotes, how deeply nested is the
 653         // start position and what the termination string is for here documents
 654
 655         WordList &keywords = *keywordlists[0];
 656
 657         class HereDocCls {
 658         public:
 659                 int State;
 660         // States
 661         // 0: '<<' encountered
 662                 // 1: collect the delimiter
 663         // 1b: text between the end of the delimiter and the EOL
 664                 // 2: here doc text (lines after the delimiter)
 665                 char Quote;             // the char after '<<'
 666                 bool Quoted;            // true if Quote in ('\'','"','`')
 667                 int DelimiterLength;    // strlen(Delimiter)
 668                 char Delimiter[256];    // the Delimiter, limit of 256: from Perl
 669         bool CanBeIndented;
 670                 HereDocCls() {
 671                         State = 0;
 672                         DelimiterLength = 0;
 673                         Delimiter[0] = '\0';
 674             CanBeIndented = false;
 675                 }
 676         };
 677         HereDocCls HereDoc;
 678
 679         QuoteCls Quote;
 680
 681     int numDots = 0;  // For numbers --
 682                       // Don't start lexing in the middle of a num
 683
 684     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
 685                         false);
 686
 687         bool preferRE = true;
 688     int state = initStyle;
 689         int lengthDoc = startPos + length;
 690
 691         char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
 692         prevWord[0] = '\0';
 693         if (length == 0)
 694                 return;
 695
 696         char chPrev = styler.SafeGetCharAt(startPos - 1);
 697         char chNext = styler.SafeGetCharAt(startPos);
 698         bool is_real_number = true;   // Differentiate between constants and ?-sequences.
 699         // Ruby uses a different mask because bad indentation is marked by oring with 32
 700         styler.StartAt(startPos, 127);
 701         styler.StartSegment(startPos);
 702
 703     static int q_states[] = {SCE_RB_STRING_Q,
 704                              SCE_RB_STRING_QQ,
 705                              SCE_RB_STRING_QR,
 706                              SCE_RB_STRING_QW,
 707                              SCE_RB_STRING_QW,
 708                              SCE_RB_STRING_QX};
 709     static const char* q_chars = "qQrwWx";
 710
 711     // In most cases a value of 2 should be ample for the code in the
 712     // Ruby library, and the code the user is likely to enter.
 713     // For example,
 714     // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
 715     //     if options[:verbose]
 716     // from fileutils.rb nests to a level of 2
 717     // If the user actually hits a 6th occurrence of '#{' in a double-quoted
 718     // string (including regex'es, %Q, %<sym>, %w, and other strings
 719     // that interpolate), it will stay as a string.  The problem with this
 720     // is that quotes might flip, a 7th '#{' will look like a comment,
 721     // and code-folding might be wrong.
 722
 723     // If anyone runs into this problem, I recommend raising this
 724     // value slightly higher to replacing the fixed array with a linked
 725     // list.  Keep in mind this code will be called everytime the lexer
 726     // is invoked.
 727
 728 #define INNER_STRINGS_MAX_COUNT 5
 729     // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
 730     int inner_string_types[INNER_STRINGS_MAX_COUNT];
 731     // Track # braces when we push a new #{ thing
 732     int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
 733     QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
 734     int inner_string_count = 0;
 735     int brace_counts = 0;   // Number of #{ ... } things within an expression
 736
 737     int i;
 738         for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
 739         inner_string_types[i] = 0;
 740         inner_expn_brace_counts[i] = 0;
 741     }
 742         for (i = startPos; i < lengthDoc; i++) {
 743                 char ch = chNext;
 744                 chNext = styler.SafeGetCharAt(i + 1);
 745                 char chNext2 = styler.SafeGetCharAt(i + 2);
 746
 747         if (styler.IsLeadByte(ch)) {
 748                         chNext = chNext2;
 749                         chPrev = ' ';
 750                         i += 1;
 751                         continue;
 752                 }
 753
 754         // skip on DOS/Windows
 755         //No, don't, because some things will get tagged on,
 756         // so we won't recognize keywords, for example
 757 #if 0
 758                 if (ch == '\r' && chNext == '\n') {
 759                 continue;
 760         }
 761 #endif
 762
 763         if (HereDoc.State == 1 && isEOLChar(ch)) {
 764                         // Begin of here-doc (the line after the here-doc delimiter):
 765                         HereDoc.State = 2;
 766                         styler.ColourTo(i-1, state);
 767             // Don't check for a missing quote, just jump into
 768             // the here-doc state
 769             state = SCE_RB_HERE_Q;
 770         }
 771
 772         // Regular transitions
 773                 if (state == SCE_RB_DEFAULT) {
 774             if (isSafeDigit(ch)) {
 775                 styler.ColourTo(i - 1, state);
 776                                 state = SCE_RB_NUMBER;
 777                 is_real_number = true;
 778                 numDots = 0;
 779             } else if (isHighBitChar(ch) || iswordstart(ch)) {
 780                 styler.ColourTo(i - 1, state);
 781                                 state = SCE_RB_WORD;
 782                         } else if (ch == '#') {
 783                                 styler.ColourTo(i - 1, state);
 784                                 state = SCE_RB_COMMENTLINE;
 785                         } else if (ch == '=') {
 786                                 // =begin indicates the start of a comment (doc) block
 787                 if (i == 0 || (isEOLChar(chPrev)
 788                     && chNext == 'b'
 789                     && styler.SafeGetCharAt(i + 2) == 'e'
 790                     && styler.SafeGetCharAt(i + 3) == 'g'
 791                     && styler.SafeGetCharAt(i + 4) == 'i'
 792                     && styler.SafeGetCharAt(i + 5) == 'n'
 793                     && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6)))) {
 794                     styler.ColourTo(i - 1, state);
 795                     state = SCE_RB_POD;
 796                                 } else {
 797                                         styler.ColourTo(i - 1, state);
 798                                         styler.ColourTo(i, SCE_RB_OPERATOR);
 799                                         preferRE = true;
 800                                 }
 801                         } else if (ch == '"') {
 802                                 styler.ColourTo(i - 1, state);
 803                                 state = SCE_RB_STRING;
 804                                 Quote.New();
 805                                 Quote.Open(ch);
 806                         } else if (ch == '\'') {
 807                 styler.ColourTo(i - 1, state);
 808                 state = SCE_RB_CHARACTER;
 809                 Quote.New();
 810                 Quote.Open(ch);
 811                         } else if (ch == '`') {
 812                                 styler.ColourTo(i - 1, state);
 813                                 state = SCE_RB_BACKTICKS;
 814                                 Quote.New();
 815                                 Quote.Open(ch);
 816                         } else if (ch == '@') {
 817                 // Instance or class var
 818                                 styler.ColourTo(i - 1, state);
 819                 if (chNext == '@') {
 820                     state = SCE_RB_CLASS_VAR;
 821                     advance_char(i, ch, chNext, chNext2); // pass by ref
 822                 } else {
 823                     state = SCE_RB_INSTANCE_VAR;
 824                 }
 825                         } else if (ch == '$') {
 826                 // Check for a builtin global
 827                                 styler.ColourTo(i - 1, state);
 828                 // Recognize it bit by bit
 829                 state = SCE_RB_GLOBAL;
 830             } else if (ch == '/' && preferRE) {
 831                 // Ambigous operator
 832                                 styler.ColourTo(i - 1, state);
 833                                 state = SCE_RB_REGEX;
 834                 Quote.New();
 835                 Quote.Open(ch);
 836                         } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
 837
 838                 // Recognise the '<<' symbol - either a here document or a binary op
 839                                 styler.ColourTo(i - 1, state);
 840                 i++;
 841                 chNext = chNext2;
 842                                 styler.ColourTo(i, SCE_RB_OPERATOR);
 843
 844                 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
 845                     // It's definitely not a here-doc,
 846                     // based on Ruby's lexer/parser in the
 847                     // heredoc_identifier routine.
 848                     // Nothing else to do.
 849                 } else if (preferRE) {
 850                     if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
 851                         state = SCE_RB_HERE_DELIM;
 852                         HereDoc.State = 0;
 853                     }
 854                     // else leave it in default state
 855                 } else {
 856                     if (sureThisIsNotHeredoc(i - 1, styler)) {
 857                         // leave state as default
 858                         // We don't have all the heuristics Perl has for indications
 859                         // of a here-doc, because '<<' is overloadable and used
 860                         // for so many other classes.
 861                     } else {
 862                         state = SCE_RB_HERE_DELIM;
 863                         HereDoc.State = 0;
 864                     }
 865                 }
 866                 preferRE = (state != SCE_RB_HERE_DELIM);
 867             } else if (ch == ':') {
 868                                 styler.ColourTo(i - 1, state);
 869                 if (chNext == ':') {
 870                     // Mark "::" as an operator, not symbol start
 871                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
 872                     advance_char(i, ch, chNext, chNext2); // pass by ref
 873                     state = SCE_RB_DEFAULT;
 874                                         preferRE = false;
 875                 } else if (isSafeWordcharOrHigh(chNext)) {
 876                                         state = SCE_RB_SYMBOL;
 877                 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
 878                     // Do the operator analysis in-line, looking ahead
 879                     // Based on the table in pickaxe 2nd ed., page 339
 880                     bool doColoring = true;
 881                     switch (chNext) {
 882                     case '[':
 883                         if (chNext2 == ']' ) {
 884                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 885                             if (ch_tmp == '=') {
 886                                 i += 3;
 887                                 ch = ch_tmp;
 888                                 chNext = styler.SafeGetCharAt(i + 1);
 889                             } else {
 890                                 i += 2;
 891                                 ch = chNext2;
 892                                 chNext = ch_tmp;
 893                             }
 894                         } else {
 895                             doColoring = false;
 896                         }
 897                         break;
 898
 899                     case '*':
 900                         if (chNext2 == '*') {
 901                             i += 2;
 902                             ch = chNext2;
 903                             chNext = styler.SafeGetCharAt(i + 1);
 904                         } else {
 905                             advance_char(i, ch, chNext, chNext2);
 906                         }
 907                         break;
 908
 909                     case '!':
 910                         if (chNext2 == '=' || chNext2 == '~') {
 911                             i += 2;
 912                             ch = chNext2;
 913                             chNext = styler.SafeGetCharAt(i + 1);
 914                         } else {
 915                             advance_char(i, ch, chNext, chNext2);
 916                         }
 917                         break;
 918
 919                     case '<':
 920                         if (chNext2 == '<') {
 921                             i += 2;
 922                             ch = chNext2;
 923                             chNext = styler.SafeGetCharAt(i + 1);
 924                         } else if (chNext2 == '=') {
 925                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 926                             if (ch_tmp == '>') {  // <=> operator
 927                                 i += 3;
 928                                 ch = ch_tmp;
 929                                 chNext = styler.SafeGetCharAt(i + 1);
 930                             } else {
 931                                 i += 2;
 932                                 ch = chNext2;
 933                                 chNext = ch_tmp;
 934                             }
 935                         } else {
 936                             advance_char(i, ch, chNext, chNext2);
 937                         }
 938                         break;
 939
 940                     default:
 941                         // Simple one-character operators
 942                         advance_char(i, ch, chNext, chNext2);
 943                         break;
 944                     }
 945                     if (doColoring) {
 946                         styler.ColourTo(i, SCE_RB_SYMBOL);
 947                         state = SCE_RB_DEFAULT;
 948                     }
 949                                 } else if (!preferRE) {
 950                                         // Don't color symbol strings (yet)
 951                                         // Just color the ":" and color rest as string
 952                                         styler.ColourTo(i, SCE_RB_SYMBOL);
 953                                         state = SCE_RB_DEFAULT;
 954                 } else {
 955                     styler.ColourTo(i, SCE_RB_OPERATOR);
 956                     state = SCE_RB_DEFAULT;
 957                     preferRE = true;
 958                 }
 959             } else if (ch == '%') {
 960                 styler.ColourTo(i - 1, state);
 961                 bool have_string = false;
 962                 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
 963                     Quote.New();
 964                     const char *hit = strchr(q_chars, chNext);
 965                     if (hit != NULL) {
 966                         state = q_states[hit - q_chars];
 967                         Quote.Open(chNext2);
 968                         i += 2;
 969                         ch = chNext2;
 970                                                 chNext = styler.SafeGetCharAt(i + 1);
 971                         have_string = true;
 972                     }
 973                 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
 974                     // Ruby doesn't allow high bit chars here,
 975                     // but the editor host might
 976                     state = SCE_RB_STRING_QQ;
 977                     Quote.Open(chNext);
 978                     advance_char(i, ch, chNext, chNext2); // pass by ref
 979                     have_string = true;
 980                 }
 981                 if (!have_string) {
 982                     styler.ColourTo(i, SCE_RB_OPERATOR);
 983                     // stay in default
 984                     preferRE = true;
 985                 }
 986             } else if (ch == '?') {
 987                 styler.ColourTo(i - 1, state);
 988                 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
 989                     styler.ColourTo(i, SCE_RB_OPERATOR);
 990                 } else {
 991                     // It's the start of a character code escape sequence
 992                     // Color it as a number.
 993                     state = SCE_RB_NUMBER;
 994                     is_real_number = false;
 995                 }
 996             } else if (isoperator(ch) || ch == '.') {
 997                                 styler.ColourTo(i - 1, state);
 998                                 styler.ColourTo(i, SCE_RB_OPERATOR);
 999                 // If we're ending an expression or block,
1000                 // assume it ends an object, and the ambivalent
1001                 // constructs are binary operators
1002                 //
1003                 // So if we don't have one of these chars,
1004                 // we aren't ending an object exp'n, and ops
1005                 // like : << / are unary operators.
1006
1007                 if (ch == '{') {
1008                     ++brace_counts;
1009                     preferRE = true;
1010                 } else if (ch == '}' && --brace_counts < 0
1011                            && inner_string_count > 0) {
1012                     styler.ColourTo(i, SCE_RB_OPERATOR);
1013                     exitInnerExpression(inner_string_types,
1014                                         inner_expn_brace_counts,
1015                                         inner_quotes,
1016                                         inner_string_count,
1017                                         state, brace_counts, Quote);
1018                 } else {
1019                     preferRE = (strchr(")}].", ch) == NULL);
1020                 }
1021                 // Stay in default state
1022             } else if (isEOLChar(ch)) {
1023                 // Make sure it's a true line-end, with no backslash
1024                 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1025                     && chPrev != '\\') {
1026                     // Assume we've hit the end of the statement.
1027                     preferRE = true;
1028                 }
1029             }
1030         } else if (state == SCE_RB_WORD) {
1031             if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1032                 // Words include x? in all contexts,
1033                 // and <letters>= after either 'def' or a dot
1034                 // Move along until a complete word is on our left
1035
1036                 // Default accessor treats '.' as word-chars,
1037                 // but we don't for now.
1038
1039                 if (ch == '='
1040                     && isSafeWordcharOrHigh(chPrev)
1041                     && (chNext == '('
1042                         || strchr(" \t\n\r", chNext) != NULL)
1043                     && (!strcmp(prevWord, "def")
1044                         || followsDot(styler.GetStartSegment(), styler))) {
1045                     // <name>= is a name only when being def'd -- Get it the next time
1046                     // This means that <name>=<name> is always lexed as
1047                     // <name>, (op, =), <name>
1048                 } else if ((ch == '?' || ch == '!')
1049                            && isSafeWordcharOrHigh(chPrev)
1050                            && !isSafeWordcharOrHigh(chNext)) {
1051                     // <name>? is a name -- Get it the next time
1052                     // But <name>?<name> is always lexed as
1053                     // <name>, (op, ?), <name>
1054                     // Same with <name>! to indicate a method that
1055                     // modifies its target
1056                 } else if (isEOLChar(ch)
1057                            && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1058                     styler.ColourTo(i, SCE_RB_DATASECTION);
1059                     state = SCE_RB_DATASECTION;
1060                     // No need to handle this state -- we'll just move to the end
1061                     preferRE = false;
1062                 } else {
1063                                         int wordStartPos = styler.GetStartSegment();
1064                     int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1065                     switch (word_style) {
1066                         case SCE_RB_WORD:
1067                             preferRE = RE_CanFollowKeyword(prevWord);
1068                                                         break;
1069
1070                         case SCE_RB_WORD_DEMOTED:
1071                             preferRE = true;
1072                                                         break;
1073
1074                         case SCE_RB_IDENTIFIER:
1075                             if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1076                                 preferRE = true;
1077                             } else if (isEOLChar(ch)) {
1078                                 preferRE = true;
1079                             } else {
1080                                 preferRE = false;
1081                             }
1082                                                         break;
1083                         default:
1084                             preferRE = false;
1085                     }
1086                     if (ch == '.') {
1087                         // We might be redefining an operator-method
1088                         preferRE = false;
1089                     }
1090                     // And if it's the first
1091                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1092                 }
1093             }
1094         } else if (state == SCE_RB_NUMBER) {
1095             if (!is_real_number) {
1096                 if (ch != '\\') {
1097                     styler.ColourTo(i, state);
1098                     state = SCE_RB_DEFAULT;
1099                     preferRE = false;
1100                 } else if (strchr("\\ntrfvaebs", chNext)) {
1101                     // Terminal escape sequence -- handle it next time
1102                     // Nothing more to do this time through the loop
1103                 } else if (chNext == 'C' || chNext == 'M') {
1104                     if (chNext2 != '-') {
1105                         // \C or \M ends the sequence -- handle it next time
1106                     } else {
1107                         // Move from abc?\C-x
1108                         //               ^
1109                         // to
1110                         //                 ^
1111                         i += 2;
1112                         ch = chNext2;
1113                         chNext = styler.SafeGetCharAt(i + 1);
1114                     }
1115                 } else if (chNext == 'c') {
1116                     // Stay here, \c is a combining sequence
1117                     advance_char(i, ch, chNext, chNext2); // pass by ref
1118                 } else {
1119                     // ?\x, including ?\\ is final.
1120                     styler.ColourTo(i + 1, state);
1121                     state = SCE_RB_DEFAULT;
1122                     preferRE = false;
1123                     advance_char(i, ch, chNext, chNext2);
1124                 }
1125             } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1126                 // Keep going
1127             } else if (ch == '.' && ++numDots == 1) {
1128                 // Keep going
1129             } else {
1130                 styler.ColourTo(i - 1, state);
1131                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1132                 preferRE = false;
1133             }
1134         } else if (state == SCE_RB_COMMENTLINE) {
1135                         if (isEOLChar(ch)) {
1136                 styler.ColourTo(i - 1, state);
1137                 state = SCE_RB_DEFAULT;
1138                 // Use whatever setting we had going into the comment
1139             }
1140         } else if (state == SCE_RB_HERE_DELIM) {
1141             // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1142             // Slightly different: if we find an immediate '-',
1143             // the target can appear indented.
1144
1145                         if (HereDoc.State == 0) { // '<<' encountered
1146                                 HereDoc.State = 1;
1147                 HereDoc.DelimiterLength = 0;
1148                 if (ch == '-') {
1149                     HereDoc.CanBeIndented = true;
1150                     advance_char(i, ch, chNext, chNext2); // pass by ref
1151                 } else {
1152                     HereDoc.CanBeIndented = false;
1153                 }
1154                 if (isEOLChar(ch)) {
1155                     // Bail out of doing a here doc if there's no target
1156                     state = SCE_RB_DEFAULT;
1157                     preferRE = false;
1158                 } else {
1159                     HereDoc.Quote = ch;
1160
1161                     if (ch == '\'' || ch == '"' || ch == '`') {
1162                         HereDoc.Quoted = true;
1163                         HereDoc.Delimiter[0] = '\0';
1164                     } else {
1165                         HereDoc.Quoted = false;
1166                         HereDoc.Delimiter[0] = ch;
1167                         HereDoc.Delimiter[1] = '\0';
1168                         HereDoc.DelimiterLength = 1;
1169                     }
1170                 }
1171                         } else if (HereDoc.State == 1) { // collect the delimiter
1172                 if (isEOLChar(ch)) {
1173                     // End the quote now, and go back for more
1174                     styler.ColourTo(i - 1, state);
1175                     state = SCE_RB_DEFAULT;
1176                     i--;
1177                     chNext = ch;
1178                     chNext2 = chNext;
1179                     preferRE = false;
1180                 } else if (HereDoc.Quoted) {
1181                                         if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1182                                                 styler.ColourTo(i, state);
1183                                                 state = SCE_RB_DEFAULT;
1184                         preferRE = false;
1185                     } else {
1186                                                 if (ch == '\\' && !isEOLChar(chNext)) {
1187                             advance_char(i, ch, chNext, chNext2);
1188                                                 }
1189                                                 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1190                                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1191                     }
1192                 } else { // an unquoted here-doc delimiter
1193                                         if (isSafeAlnumOrHigh(ch) || ch == '_') {
1194                                                 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1195                                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1196                                         } else {
1197                                                 styler.ColourTo(i - 1, state);
1198                         redo_char(i, ch, chNext, chNext2, state);
1199                         preferRE = false;
1200                                         }
1201                 }
1202                                 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1203                                         styler.ColourTo(i - 1, state);
1204                                         state = SCE_RB_ERROR;
1205                     preferRE = false;
1206                                 }
1207             }
1208         } else if (state == SCE_RB_HERE_Q) {
1209             // Not needed: HereDoc.State == 2
1210             // Indentable here docs: look backwards
1211             // Non-indentable: look forwards, like in Perl
1212             //
1213             // Why: so we can quickly resolve things like <<-" abc"
1214
1215             if (!HereDoc.CanBeIndented) {
1216                 if (isEOLChar(chPrev)
1217                     && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1218                     styler.ColourTo(i - 1, state);
1219                     i += HereDoc.DelimiterLength - 1;
1220                     chNext = styler.SafeGetCharAt(i + 1);
1221                     if (isEOLChar(chNext)) {
1222                         styler.ColourTo(i, SCE_RB_HERE_DELIM);
1223                         state = SCE_RB_DEFAULT;
1224                         HereDoc.State = 0;
1225                         preferRE = false;
1226                     }
1227                     // Otherwise we skipped through the here doc faster.
1228                 }
1229             } else if (isEOLChar(chNext)
1230                        && lookingAtHereDocDelim(styler,
1231                                                 i - HereDoc.DelimiterLength + 1,
1232                                                 lengthDoc,
1233                                                 HereDoc.Delimiter)) {
1234                 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1235                 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1236                 state = SCE_RB_DEFAULT;
1237                 preferRE = false;
1238                 HereDoc.State = 0;
1239             }
1240         } else if (state == SCE_RB_CLASS_VAR
1241                    || state == SCE_RB_INSTANCE_VAR
1242                    || state == SCE_RB_SYMBOL) {
1243             if (!isSafeWordcharOrHigh(ch)) {
1244                 styler.ColourTo(i - 1, state);
1245                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1246                 preferRE = false;
1247             }
1248         } else if (state == SCE_RB_GLOBAL) {
1249             if (!isSafeWordcharOrHigh(ch)) {
1250                 // handle special globals here as well
1251                 if (chPrev == '$') {
1252                     if (ch == '-') {
1253                         // Include the next char, like $-a
1254                         advance_char(i, ch, chNext, chNext2);
1255                     }
1256                     styler.ColourTo(i, state);
1257                     state = SCE_RB_DEFAULT;
1258                 } else {
1259                     styler.ColourTo(i - 1, state);
1260                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1261                 }
1262                 preferRE = false;
1263             }
1264         } else if (state == SCE_RB_POD) {
1265             // PODs end with ^=end\s, -- any whitespace can follow =end
1266             if (strchr(" \t\n\r", ch) != NULL
1267                 && i > 5
1268                 && isEOLChar(styler[i - 5])
1269                 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1270                 styler.ColourTo(i - 1, state);
1271                 state = SCE_RB_DEFAULT;
1272                 preferRE = false;
1273             }
1274         } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1275             if (ch == '\\' && Quote.Up != '\\') {
1276                 // Skip one
1277                 advance_char(i, ch, chNext, chNext2);
1278             } else if (ch == Quote.Down) {
1279                 Quote.Count--;
1280                 if (Quote.Count == 0) {
1281                     // Include the options
1282                     while (isSafeAlpha(chNext)) {
1283                         i++;
1284                                                 ch = chNext;
1285                         chNext = styler.SafeGetCharAt(i + 1);
1286                     }
1287                     styler.ColourTo(i, state);
1288                     state = SCE_RB_DEFAULT;
1289                     preferRE = false;
1290                 }
1291             } else if (ch == Quote.Up) {
1292                 // Only if close quoter != open quoter
1293                 Quote.Count++;
1294
1295             } else if (ch == '#' ) {
1296                 if (chNext == '{'
1297                     && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1298                     // process #{ ... }
1299                     styler.ColourTo(i - 1, state);
1300                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1301                     enterInnerExpression(inner_string_types,
1302                                          inner_expn_brace_counts,
1303                                          inner_quotes,
1304                                          inner_string_count,
1305                                          state,
1306                                          brace_counts,
1307                                          Quote);
1308                     preferRE = true;
1309                     // Skip one
1310                     advance_char(i, ch, chNext, chNext2);
1311                 } else {
1312                     //todo: distinguish comments from pound chars
1313                     // for now, handle as comment
1314                     styler.ColourTo(i - 1, state);
1315                     bool inEscape = false;
1316                     while (++i < lengthDoc) {
1317                         ch = styler.SafeGetCharAt(i);
1318                         if (ch == '\\') {
1319                             inEscape = true;
1320                         } else if (isEOLChar(ch)) {
1321                             // Comment inside a regex
1322                             styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1323                             break;
1324                         } else if (inEscape) {
1325                             inEscape = false;  // don't look at char
1326                         } else if (ch == Quote.Down) {
1327                             // Have the regular handler deal with this
1328                             // to get trailing modifiers.
1329                             i--;
1330                             ch = styler[i];
1331                             break;
1332                         }
1333                     }
1334                     chNext = styler.SafeGetCharAt(i + 1);
1335                     chNext2 = styler.SafeGetCharAt(i + 2);
1336                 }
1337             }
1338         // Quotes of all kinds...
1339         } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1340                    state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1341                    state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1342                    state == SCE_RB_BACKTICKS) {
1343             if (!Quote.Down && !isspacechar(ch)) {
1344                 Quote.Open(ch);
1345             } else if (ch == '\\' && Quote.Up != '\\') {
1346                 //Riddle me this: Is it safe to skip *every* escaped char?
1347                 advance_char(i, ch, chNext, chNext2);
1348             } else if (ch == Quote.Down) {
1349                 Quote.Count--;
1350                 if (Quote.Count == 0) {
1351                     styler.ColourTo(i, state);
1352                     state = SCE_RB_DEFAULT;
1353                     preferRE = false;
1354                 }
1355             } else if (ch == Quote.Up) {
1356                 Quote.Count++;
1357             } else if (ch == '#' && chNext == '{'
1358                        && inner_string_count < INNER_STRINGS_MAX_COUNT
1359                        && state != SCE_RB_CHARACTER
1360                        && state != SCE_RB_STRING_Q) {
1361                 // process #{ ... }
1362                 styler.ColourTo(i - 1, state);
1363                 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1364                 enterInnerExpression(inner_string_types,
1365                                      inner_expn_brace_counts,
1366                                      inner_quotes,
1367                                      inner_string_count,
1368                                      state,
1369                                      brace_counts,
1370                                      Quote);
1371                 preferRE = true;
1372                 // Skip one
1373                 advance_char(i, ch, chNext, chNext2);
1374             }
1375         }
1376
1377         if (state == SCE_RB_ERROR) {
1378             break;
1379         }
1380         chPrev = ch;
1381     }
1382     if (state == SCE_RB_WORD) {
1383         // We've ended on a word, possibly at EOF, and need to
1384         // classify it.
1385         (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1386     } else {
1387         styler.ColourTo(lengthDoc - 1, state);
1388     }
1389 }
1390
1391 // Helper functions for folding, disambiguation keywords
1392 // Assert that there are no high-bit chars
1393
1394 static void getPrevWord(int pos,
1395                         char *prevWord,
1396                         Accessor &styler,
1397                         int word_state)
1398 {
1399     int i;
1400     styler.Flush();
1401     for (i = pos - 1; i > 0; i--) {
1402         if (actual_style(styler.StyleAt(i)) != word_state) {
1403             i++;
1404             break;
1405         }
1406     }
1407     if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1408         i = pos - MAX_KEYWORD_LENGTH;
1409     char *dst = prevWord;
1410     for (; i <= pos; i++) {
1411         *dst++ = styler[i];
1412     }
1413         *dst = 0;
1414 }
1415
1416 static bool keywordIsAmbiguous(const char *prevWord)
1417 {
1418     // Order from most likely used to least likely
1419     // Lots of ways to do a loop in Ruby besides 'while/until'
1420     if (!strcmp(prevWord, "if")
1421         || !strcmp(prevWord, "do")
1422         || !strcmp(prevWord, "while")
1423         || !strcmp(prevWord, "unless")
1424         || !strcmp(prevWord, "until")) {
1425         return true;
1426     } else {
1427         return false;
1428     }
1429 }
1430
1431 // Demote keywords in the following conditions:
1432 // if, while, unless, until modify a statement
1433 // do after a while or until, as a noise word (like then after if)
1434
1435 static bool keywordIsModifier(const char *word,
1436                               int pos,
1437                               Accessor &styler)
1438 {
1439     if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1440         return keywordDoStartsLoop(pos, styler);
1441     }
1442     char ch;
1443     int style = SCE_RB_DEFAULT;
1444         int lineStart = styler.GetLine(pos);
1445     int lineStartPosn = styler.LineStart(lineStart);
1446     styler.Flush();
1447     while (--pos >= lineStartPosn) {
1448         style = actual_style(styler.StyleAt(pos));
1449                 if (style == SCE_RB_DEFAULT) {
1450                         if (iswhitespace(ch = styler[pos])) {
1451                                 //continue
1452                         } else if (ch == '\r' || ch == '\n') {
1453                                 // Scintilla's LineStart() and GetLine() routines aren't
1454                                 // platform-independent, so if we have text prepared with
1455                                 // a different system we can't rely on it.
1456                                 return false;
1457                         }
1458                 } else {
1459             break;
1460                 }
1461     }
1462     if (pos < lineStartPosn) {
1463         return false; //XXX not quite right if the prev line is a continuation
1464     }
1465     // First things where the action is unambiguous
1466     switch (style) {
1467         case SCE_RB_DEFAULT:
1468         case SCE_RB_COMMENTLINE:
1469         case SCE_RB_POD:
1470         case SCE_RB_CLASSNAME:
1471         case SCE_RB_DEFNAME:
1472         case SCE_RB_MODULE_NAME:
1473             return false;
1474         case SCE_RB_OPERATOR:
1475             break;
1476         case SCE_RB_WORD:
1477             // Watch out for uses of 'else if'
1478             //XXX: Make a list of other keywords where 'if' isn't a modifier
1479             //     and can appear legitimately
1480             // Formulate this to avoid warnings from most compilers
1481             if (strcmp(word, "if") == 0) {
1482                 char prevWord[MAX_KEYWORD_LENGTH + 1];
1483                 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1484                 return strcmp(prevWord, "else") != 0;
1485             }
1486             return true;
1487         default:
1488             return true;
1489     }
1490     // Assume that if the keyword follows an operator,
1491     // usually it's a block assignment, like
1492     // a << if x then y else z
1493
1494     ch = styler[pos];
1495     switch (ch) {
1496         case ')':
1497         case ']':
1498         case '}':
1499             return true;
1500         default:
1501             return false;
1502     }
1503 }
1504
1505 #define WHILE_BACKWARDS "elihw"
1506 #define UNTIL_BACKWARDS "litnu"
1507
1508 // Nothing fancy -- look to see if we follow a while/until somewhere
1509 // on the current line
1510
1511 static bool keywordDoStartsLoop(int pos,
1512                                 Accessor &styler)
1513 {
1514     char ch;
1515     int style;
1516         int lineStart = styler.GetLine(pos);
1517     int lineStartPosn = styler.LineStart(lineStart);
1518     styler.Flush();
1519     while (--pos >= lineStartPosn) {
1520         style = actual_style(styler.StyleAt(pos));
1521                 if (style == SCE_RB_DEFAULT) {
1522                         if ((ch = styler[pos]) == '\r' || ch == '\n') {
1523                                 // Scintilla's LineStart() and GetLine() routines aren't
1524                                 // platform-independent, so if we have text prepared with
1525                                 // a different system we can't rely on it.
1526                                 return false;
1527                         }
1528                 } else if (style == SCE_RB_WORD) {
1529             // Check for while or until, but write the word in backwards
1530             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1531             char *dst = prevWord;
1532             int wordLen = 0;
1533             int start_word;
1534             for (start_word = pos;
1535                  start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1536                  start_word--) {
1537                 if (++wordLen < MAX_KEYWORD_LENGTH) {
1538                     *dst++ = styler[start_word];
1539                 }
1540             }
1541             *dst = 0;
1542             // Did we see our keyword?
1543             if (!strcmp(prevWord, WHILE_BACKWARDS)
1544                 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1545                 return true;
1546             }
1547             // We can move pos to the beginning of the keyword, and then
1548             // accept another decrement, as we can never have two contiguous
1549             // keywords:
1550             // word1 word2
1551             //           ^
1552             //        <-  move to start_word
1553             //      ^
1554             //      <- loop decrement
1555             //     ^  # pointing to end of word1 is fine
1556             pos = start_word;
1557         }
1558     }
1559     return false;
1560 }
1561
1562 /*
1563  *  Folding Ruby
1564  *
1565  *  The language is quite complex to analyze without a full parse.
1566  *  For example, this line shouldn't affect fold level:
1567  *
1568  *   print "hello" if feeling_friendly?
1569  *
1570  *  Neither should this:
1571  *
1572  *   print "hello" \
1573  *      if feeling_friendly?
1574  *
1575  *
1576  *  But this should:
1577  *
1578  *   if feeling_friendly?  #++
1579  *     print "hello" \
1580  *     print "goodbye"
1581  *   end                   #--
1582  *
1583  *  So we cheat, by actually looking at the existing indentation
1584  *  levels for each line, and just echoing it back.  Like Python.
1585  *  Then if we get better at it, we'll take braces into consideration,
1586  *  which always affect folding levels.
1587
1588  *  How the keywords should work:
1589  *  No effect:
1590  *  __FILE__ __LINE__ BEGIN END alias and
1591  *  defined? false in nil not or self super then
1592  *  true undef
1593
1594  *  Always increment:
1595  *  begin  class def do for module when {
1596  *
1597  *  Always decrement:
1598  *  end }
1599  *
1600  *  Increment if these start a statement
1601  *  if unless until while -- do nothing if they're modifiers
1602
1603  *  These end a block if there's no modifier, but don't bother
1604  *  break next redo retry return yield
1605  *
1606  *  These temporarily de-indent, but re-indent
1607  *  case else elsif ensure rescue
1608  *
1609  *  This means that the folder reflects indentation rather
1610  *  than setting it.  The language-service updates indentation
1611  *  when users type return and finishes entering de-denters.
1612  *
1613  *  Later offer to fold POD, here-docs, strings, and blocks of comments
1614  */
1615
1616 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1617                       WordList *[], Accessor &styler) {
1618         const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1619         bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1620
1621     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1622                         false);
1623         unsigned int endPos = startPos + length;
1624         int visibleChars = 0;
1625         int lineCurrent = styler.GetLine(startPos);
1626         int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1627                                          & SC_FOLDLEVELNUMBERMASK
1628                                          & ~SC_FOLDLEVELBASE);
1629         int levelCurrent = levelPrev;
1630         char chNext = styler[startPos];
1631         int styleNext = styler.StyleAt(startPos);
1632         int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1633     bool buffer_ends_with_eol = false;
1634         for (unsigned int i = startPos; i < endPos; i++) {
1635                 char ch = chNext;
1636                 chNext = styler.SafeGetCharAt(i + 1);
1637                 int style = styleNext;
1638                 styleNext = styler.StyleAt(i + 1);
1639                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1640         if (style == SCE_RB_COMMENTLINE) {
1641             if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1642                 if (chNext == '{') {
1643                                         levelCurrent++;
1644                                 } else if (chNext == '}' && levelCurrent > 0) {
1645                                         levelCurrent--;
1646                                 }
1647             }
1648         } else if (style == SCE_RB_OPERATOR) {
1649                         if (strchr("[{(", ch)) {
1650                                 levelCurrent++;
1651                         } else if (strchr(")}]", ch)) {
1652                 // Don't decrement below 0
1653                 if (levelCurrent > 0)
1654                     levelCurrent--;
1655                         }
1656         } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1657             // Look at the keyword on the left and decide what to do
1658             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1659             prevWord[0] = 0;
1660             getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1661             if (!strcmp(prevWord, "end")) {
1662                 // Don't decrement below 0
1663                 if (levelCurrent > 0)
1664                     levelCurrent--;
1665             } else if (   !strcmp(prevWord, "if")
1666                        || !strcmp(prevWord, "def")
1667                        || !strcmp(prevWord, "class")
1668                        || !strcmp(prevWord, "module")
1669                        || !strcmp(prevWord, "begin")
1670                        || !strcmp(prevWord, "case")
1671                        || !strcmp(prevWord, "do")
1672                        || !strcmp(prevWord, "while")
1673                        || !strcmp(prevWord, "unless")
1674                        || !strcmp(prevWord, "until")
1675                        || !strcmp(prevWord, "for")
1676                           ) {
1677                                 levelCurrent++;
1678             }
1679         }
1680                 if (atEOL) {
1681                         int lev = levelPrev;
1682                         if (visibleChars == 0 && foldCompact)
1683                                 lev |= SC_FOLDLEVELWHITEFLAG;
1684                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1685                                 lev |= SC_FOLDLEVELHEADERFLAG;
1686             styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1687                         lineCurrent++;
1688                         levelPrev = levelCurrent;
1689                         visibleChars = 0;
1690             buffer_ends_with_eol = true;
1691                 } else if (!isspacechar(ch)) {
1692                         visibleChars++;
1693             buffer_ends_with_eol = false;
1694         }
1695                 stylePrev = style;
1696     }
1697         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1698     if (!buffer_ends_with_eol) {
1699         lineCurrent++;
1700         int new_lev = levelCurrent;
1701         if (visibleChars == 0 && foldCompact)
1702             new_lev |= SC_FOLDLEVELWHITEFLAG;
1703                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1704                                 new_lev |= SC_FOLDLEVELHEADERFLAG;
1705             levelCurrent = new_lev;
1706     }
1707         styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1708 }
1709
1710 static const char * const rubyWordListDesc[] = {
1711         "Keywords",
1712         0
1713 };
1714
1715 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);