ext/scintilla/lexers/LexRuby.cxx

   1 // Scintilla source code edit control
   2 /** @file LexRuby.cxx
   3  ** Lexer for Ruby.
   4  **/
   5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <stdarg.h>
  12 #include <assert.h>
  13 #include <ctype.h>
  14
  15 #include "ILexer.h"
  16 #include "Scintilla.h"
  17 #include "SciLexer.h"
  18
  19 #include "WordList.h"
  20 #include "LexAccessor.h"
  21 #include "Accessor.h"
  22 #include "StyleContext.h"
  23 #include "CharacterSet.h"
  24 #include "LexerModule.h"
  25
  26 #ifdef SCI_NAMESPACE
  27 using namespace Scintilla;
  28 #endif
  29
  30 //XXX Identical to Perl, put in common area
  31 static inline bool isEOLChar(char ch) {
  32     return (ch == '\r') || (ch == '\n');
  33 }
  34
  35 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
  36 // This one's redundant, but makes for more readable code
  37 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
  38
  39 static inline bool isSafeAlpha(char ch) {
  40     return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
  41 }
  42
  43 static inline bool isSafeAlnum(char ch) {
  44     return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
  45 }
  46
  47 static inline bool isSafeAlnumOrHigh(char ch) {
  48     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
  49 }
  50
  51 static inline bool isSafeDigit(char ch) {
  52     return isSafeASCII(ch) && isdigit(ch);
  53 }
  54
  55 static inline bool isSafeWordcharOrHigh(char ch) {
  56     // Error: scintilla's KeyWords.h includes '.' as a word-char
  57     // we want to separate things that can take methods from the
  58     // methods.
  59     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
  60 }
  61
  62 static bool inline iswhitespace(char ch) {
  63     return ch == ' ' || ch == '\t';
  64 }
  65
  66 #define MAX_KEYWORD_LENGTH 200
  67
  68 #define STYLE_MASK 63
  69 #define actual_style(style) (style & STYLE_MASK)
  70
  71 static bool followsDot(unsigned int pos, Accessor &styler) {
  72     styler.Flush();
  73     for (; pos >= 1; --pos) {
  74         int style = actual_style(styler.StyleAt(pos));
  75         char ch;
  76         switch (style) {
  77         case SCE_RB_DEFAULT:
  78             ch = styler[pos];
  79             if (ch == ' ' || ch == '\t') {
  80                 //continue
  81             } else {
  82                 return false;
  83             }
  84             break;
  85
  86         case SCE_RB_OPERATOR:
  87             return styler[pos] == '.';
  88
  89         default:
  90             return false;
  91         }
  92     }
  93     return false;
  94 }
  95
  96 // Forward declarations
  97 static bool keywordIsAmbiguous(const char *prevWord);
  98 static bool keywordDoStartsLoop(int pos,
  99                                 Accessor &styler);
 100 static bool keywordIsModifier(const char *word,
 101                               int pos,
 102                               Accessor &styler);
 103
 104 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
 105     char s[MAX_KEYWORD_LENGTH];
 106     unsigned int i, j;
 107     unsigned int lim = end - start + 1; // num chars to copy
 108     if (lim >= MAX_KEYWORD_LENGTH) {
 109         lim = MAX_KEYWORD_LENGTH - 1;
 110     }
 111     for (i = start, j = 0; j < lim; i++, j++) {
 112         s[j] = styler[i];
 113     }
 114     s[j] = '\0';
 115     int chAttr;
 116     if (0 == strcmp(prevWord, "class"))
 117         chAttr = SCE_RB_CLASSNAME;
 118     else if (0 == strcmp(prevWord, "module"))
 119         chAttr = SCE_RB_MODULE_NAME;
 120     else if (0 == strcmp(prevWord, "def"))
 121         chAttr = SCE_RB_DEFNAME;
 122     else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
 123         if (keywordIsAmbiguous(s)
 124                 && keywordIsModifier(s, start, styler)) {
 125
 126             // Demoted keywords are colored as keywords,
 127             // but do not affect changes in indentation.
 128             //
 129             // Consider the word 'if':
 130             // 1. <<if test ...>> : normal
 131             // 2. <<stmt if test>> : demoted
 132             // 3. <<lhs = if ...>> : normal: start a new indent level
 133             // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
 134
 135             chAttr = SCE_RB_WORD_DEMOTED;
 136         } else {
 137             chAttr = SCE_RB_WORD;
 138         }
 139     } else
 140         chAttr = SCE_RB_IDENTIFIER;
 141     styler.ColourTo(end, chAttr);
 142     if (chAttr == SCE_RB_WORD) {
 143         strcpy(prevWord, s);
 144     } else {
 145         prevWord[0] = 0;
 146     }
 147     return chAttr;
 148 }
 149
 150
 151 //XXX Identical to Perl, put in common area
 152 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
 153     if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
 154         return false;
 155     }
 156     while (*val) {
 157         if (*val != styler[pos++]) {
 158             return false;
 159         }
 160         val++;
 161     }
 162     return true;
 163 }
 164
 165 // Do Ruby better -- find the end of the line, work back,
 166 // and then check for leading white space
 167
 168 // Precondition: the here-doc target can be indented
 169 static bool lookingAtHereDocDelim(Accessor      &styler,
 170                                   int                   pos,
 171                                   int                   lengthDoc,
 172                                   const char   *HereDocDelim)
 173 {
 174     if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
 175         return false;
 176     }
 177     while (--pos > 0) {
 178         char ch = styler[pos];
 179         if (isEOLChar(ch)) {
 180             return true;
 181         } else if (ch != ' ' && ch != '\t') {
 182             return false;
 183         }
 184     }
 185     return false;
 186 }
 187
 188 //XXX Identical to Perl, put in common area
 189 static char opposite(char ch) {
 190     if (ch == '(')
 191         return ')';
 192     if (ch == '[')
 193         return ']';
 194     if (ch == '{')
 195         return '}';
 196     if (ch == '<')
 197         return '>';
 198     return ch;
 199 }
 200
 201 // Null transitions when we see we've reached the end
 202 // and need to relex the curr char.
 203
 204 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
 205                       int &state) {
 206     i--;
 207     chNext2 = chNext;
 208     chNext = ch;
 209     state = SCE_RB_DEFAULT;
 210 }
 211
 212 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
 213     i++;
 214     ch = chNext;
 215     chNext = chNext2;
 216 }
 217
 218 // precondition: startPos points to one after the EOL char
 219 static bool currLineContainsHereDelims(int &startPos,
 220                                        Accessor &styler) {
 221     if (startPos <= 1)
 222         return false;
 223
 224     int pos;
 225     for (pos = startPos - 1; pos > 0; pos--) {
 226         char ch = styler.SafeGetCharAt(pos);
 227         if (isEOLChar(ch)) {
 228             // Leave the pointers where they are -- there are no
 229             // here doc delims on the current line, even if
 230             // the EOL isn't default style
 231
 232             return false;
 233         } else {
 234             styler.Flush();
 235             if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
 236                 break;
 237             }
 238         }
 239     }
 240     if (pos == 0) {
 241         return false;
 242     }
 243     // Update the pointers so we don't have to re-analyze the string
 244     startPos = pos;
 245     return true;
 246 }
 247
 248 // This class is used by the enter and exit methods, so it needs
 249 // to be hoisted out of the function.
 250
 251 class QuoteCls {
 252 public:
 253     int  Count;
 254     char Up;
 255     char Down;
 256     QuoteCls() {
 257         New();
 258     }
 259     void New() {
 260         Count = 0;
 261         Up    = '\0';
 262         Down  = '\0';
 263     }
 264     void Open(char u) {
 265         Count++;
 266         Up    = u;
 267         Down  = opposite(Up);
 268     }
 269     QuoteCls(const QuoteCls &q) {
 270         // copy constructor -- use this for copying in
 271         Count = q.Count;
 272         Up    = q.Up;
 273         Down  = q.Down;
 274     }
 275     QuoteCls &operator=(const QuoteCls &q) { // assignment constructor
 276         if (this != &q) {
 277             Count = q.Count;
 278             Up    = q.Up;
 279             Down  = q.Down;
 280         }
 281         return *this;
 282     }
 283
 284 };
 285
 286
 287 static void enterInnerExpression(int  *p_inner_string_types,
 288                                  int  *p_inner_expn_brace_counts,
 289                                  QuoteCls *p_inner_quotes,
 290                                  int  &inner_string_count,
 291                                  int  &state,
 292                                  int  &brace_counts,
 293                                  QuoteCls curr_quote
 294                                 ) {
 295     p_inner_string_types[inner_string_count] = state;
 296     state = SCE_RB_DEFAULT;
 297     p_inner_expn_brace_counts[inner_string_count] = brace_counts;
 298     brace_counts = 0;
 299     p_inner_quotes[inner_string_count] = curr_quote;
 300     ++inner_string_count;
 301 }
 302
 303 static void exitInnerExpression(int *p_inner_string_types,
 304                                 int *p_inner_expn_brace_counts,
 305                                 QuoteCls *p_inner_quotes,
 306                                 int &inner_string_count,
 307                                 int &state,
 308                                 int  &brace_counts,
 309                                 QuoteCls &curr_quote
 310                                ) {
 311     --inner_string_count;
 312     state = p_inner_string_types[inner_string_count];
 313     brace_counts = p_inner_expn_brace_counts[inner_string_count];
 314     curr_quote = p_inner_quotes[inner_string_count];
 315 }
 316
 317 static bool isEmptyLine(int pos,
 318                         Accessor &styler) {
 319     int spaceFlags = 0;
 320     int lineCurrent = styler.GetLine(pos);
 321     int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 322     return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
 323 }
 324
 325 static bool RE_CanFollowKeyword(const char *keyword) {
 326     if (!strcmp(keyword, "and")
 327             || !strcmp(keyword, "begin")
 328             || !strcmp(keyword, "break")
 329             || !strcmp(keyword, "case")
 330             || !strcmp(keyword, "do")
 331             || !strcmp(keyword, "else")
 332             || !strcmp(keyword, "elsif")
 333             || !strcmp(keyword, "if")
 334             || !strcmp(keyword, "next")
 335             || !strcmp(keyword, "return")
 336             || !strcmp(keyword, "when")
 337             || !strcmp(keyword, "unless")
 338             || !strcmp(keyword, "until")
 339             || !strcmp(keyword, "not")
 340             || !strcmp(keyword, "or")) {
 341         return true;
 342     }
 343     return false;
 344 }
 345
 346 // Look at chars up to but not including endPos
 347 // Don't look at styles in case we're looking forward
 348
 349 static int skipWhitespace(int startPos,
 350                           int endPos,
 351                           Accessor &styler) {
 352     for (int i = startPos; i < endPos; i++) {
 353         if (!iswhitespace(styler[i])) {
 354             return i;
 355         }
 356     }
 357     return endPos;
 358 }
 359
 360 // This routine looks for false positives like
 361 // undef foo, <<
 362 // There aren't too many.
 363 //
 364 // iPrev points to the start of <<
 365
 366 static bool sureThisIsHeredoc(int iPrev,
 367                               Accessor &styler,
 368                               char *prevWord) {
 369
 370     // Not so fast, since Ruby's so dynamic.  Check the context
 371     // to make sure we're OK.
 372     int prevStyle;
 373     int lineStart = styler.GetLine(iPrev);
 374     int lineStartPosn = styler.LineStart(lineStart);
 375     styler.Flush();
 376
 377     // Find the first word after some whitespace
 378     int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
 379     if (firstWordPosn >= iPrev) {
 380         // Have something like {^     <<}
 381         //XXX Look at the first previous non-comment non-white line
 382         // to establish the context.  Not too likely though.
 383         return true;
 384     } else {
 385         switch (prevStyle = styler.StyleAt(firstWordPosn)) {
 386         case SCE_RB_WORD:
 387         case SCE_RB_WORD_DEMOTED:
 388         case SCE_RB_IDENTIFIER:
 389             break;
 390         default:
 391             return true;
 392         }
 393     }
 394     int firstWordEndPosn = firstWordPosn;
 395     char *dst = prevWord;
 396     for (;;) {
 397         if (firstWordEndPosn >= iPrev ||
 398                 styler.StyleAt(firstWordEndPosn) != prevStyle) {
 399             *dst = 0;
 400             break;
 401         }
 402         *dst++ = styler[firstWordEndPosn];
 403         firstWordEndPosn += 1;
 404     }
 405     //XXX Write a style-aware thing to regex scintilla buffer objects
 406     if (!strcmp(prevWord, "undef")
 407             || !strcmp(prevWord, "def")
 408             || !strcmp(prevWord, "alias")) {
 409         // These keywords are what we were looking for
 410         return false;
 411     }
 412     return true;
 413 }
 414
 415 // Routine that saves us from allocating a buffer for the here-doc target
 416 // targetEndPos points one past the end of the current target
 417 static bool haveTargetMatch(int currPos,
 418                             int lengthDoc,
 419                             int targetStartPos,
 420                             int targetEndPos,
 421                             Accessor &styler) {
 422     if (lengthDoc - currPos < targetEndPos - targetStartPos) {
 423         return false;
 424     }
 425     int i, j;
 426     for (i = targetStartPos, j = currPos;
 427             i < targetEndPos && j < lengthDoc;
 428             i++, j++) {
 429         if (styler[i] != styler[j]) {
 430             return false;
 431         }
 432     }
 433     return true;
 434 }
 435
 436 // We need a check because the form
 437 // [identifier] <<[target]
 438 // is ambiguous.  The Ruby lexer/parser resolves it by
 439 // looking to see if [identifier] names a variable or a
 440 // function.  If it's the first, it's the start of a here-doc.
 441 // If it's a var, it's an operator.  This lexer doesn't
 442 // maintain a symbol table, so it looks ahead to see what's
 443 // going on, in cases where we have
 444 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
 445 //
 446 // If there's no occurrence of [target] on a line, assume we don't.
 447
 448 // return true == yes, we have no heredocs
 449
 450 static bool sureThisIsNotHeredoc(int lt2StartPos,
 451                                  Accessor &styler) {
 452     int prevStyle;
 453     // Use full document, not just part we're styling
 454     int lengthDoc = styler.Length();
 455     int lineStart = styler.GetLine(lt2StartPos);
 456     int lineStartPosn = styler.LineStart(lineStart);
 457     styler.Flush();
 458     const bool definitely_not_a_here_doc = true;
 459     const bool looks_like_a_here_doc = false;
 460
 461     // Find the first word after some whitespace
 462     int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
 463     if (firstWordPosn >= lt2StartPos) {
 464         return definitely_not_a_here_doc;
 465     }
 466     prevStyle = styler.StyleAt(firstWordPosn);
 467     // If we have '<<' following a keyword, it's not a heredoc
 468     if (prevStyle != SCE_RB_IDENTIFIER
 469             && prevStyle != SCE_RB_INSTANCE_VAR
 470             && prevStyle != SCE_RB_CLASS_VAR) {
 471         return definitely_not_a_here_doc;
 472     }
 473     int newStyle = prevStyle;
 474     // Some compilers incorrectly warn about uninit newStyle
 475     for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 476         // Inner loop looks at the name
 477         for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 478             newStyle = styler.StyleAt(firstWordPosn);
 479             if (newStyle != prevStyle) {
 480                 break;
 481             }
 482         }
 483         // Do we have '::' or '.'?
 484         if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
 485             char ch = styler[firstWordPosn];
 486             if (ch == '.') {
 487                 // yes
 488             } else if (ch == ':') {
 489                 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
 490                     return definitely_not_a_here_doc;
 491                 } else if (styler[firstWordPosn] != ':') {
 492                     return definitely_not_a_here_doc;
 493                 }
 494             } else {
 495                 break;
 496             }
 497         } else {
 498             break;
 499         }
 500         // on second and next passes, only identifiers may appear since
 501         // class and instance variable are private
 502         prevStyle = SCE_RB_IDENTIFIER;
 503     }
 504     // Skip next batch of white-space
 505     firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
 506     if (firstWordPosn != lt2StartPos) {
 507         // Have [[^ws[identifier]ws[*something_else*]ws<<
 508         return definitely_not_a_here_doc;
 509     }
 510     // OK, now 'j' will point to the current spot moving ahead
 511     int j = firstWordPosn + 1;
 512     if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
 513         // This shouldn't happen
 514         return definitely_not_a_here_doc;
 515     }
 516     int nextLineStartPosn = styler.LineStart(lineStart + 1);
 517     if (nextLineStartPosn >= lengthDoc) {
 518         return definitely_not_a_here_doc;
 519     }
 520     j = skipWhitespace(j + 1, nextLineStartPosn, styler);
 521     if (j >= lengthDoc) {
 522         return definitely_not_a_here_doc;
 523     }
 524     bool allow_indent;
 525     int target_start, target_end;
 526     // From this point on no more styling, since we're looking ahead
 527     if (styler[j] == '-') {
 528         allow_indent = true;
 529         j++;
 530     } else {
 531         allow_indent = false;
 532     }
 533
 534     // Allow for quoted targets.
 535     char target_quote = 0;
 536     switch (styler[j]) {
 537     case '\'':
 538     case '"':
 539     case '`':
 540         target_quote = styler[j];
 541         j += 1;
 542     }
 543
 544     if (isSafeAlnum(styler[j])) {
 545         // Init target_end because some compilers think it won't
 546         // be initialized by the time it's used
 547         target_start = target_end = j;
 548         j++;
 549     } else {
 550         return definitely_not_a_here_doc;
 551     }
 552     for (; j < lengthDoc; j++) {
 553         if (!isSafeAlnum(styler[j])) {
 554             if (target_quote && styler[j] != target_quote) {
 555                 // unquoted end
 556                 return definitely_not_a_here_doc;
 557             }
 558
 559             // And for now make sure that it's a newline
 560             // don't handle arbitrary expressions yet
 561
 562             target_end = j;
 563             if (target_quote) {
 564                 // Now we can move to the character after the string delimiter.
 565                 j += 1;
 566             }
 567             j = skipWhitespace(j, lengthDoc, styler);
 568             if (j >= lengthDoc) {
 569                 return definitely_not_a_here_doc;
 570             } else {
 571                 char ch = styler[j];
 572                 if (ch == '#' || isEOLChar(ch)) {
 573                     // This is OK, so break and continue;
 574                     break;
 575                 } else {
 576                     return definitely_not_a_here_doc;
 577                 }
 578             }
 579         }
 580     }
 581
 582     // Just look at the start of each line
 583     int last_line = styler.GetLine(lengthDoc - 1);
 584     // But don't go too far
 585     if (last_line > lineStart + 50) {
 586         last_line = lineStart + 50;
 587     }
 588     for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
 589         if (allow_indent) {
 590             j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
 591         } else {
 592             j = styler.LineStart(line_num);
 593         }
 594         // target_end is one past the end
 595         if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
 596             // We got it
 597             return looks_like_a_here_doc;
 598         }
 599     }
 600     return definitely_not_a_here_doc;
 601 }
 602
 603 //todo: if we aren't looking at a stdio character,
 604 // move to the start of the first line that is not in a
 605 // multi-line construct
 606
 607 static void synchronizeDocStart(unsigned int &startPos,
 608                                 int &length,
 609                                 int &initStyle,
 610                                 Accessor &styler,
 611                                 bool skipWhiteSpace=false) {
 612
 613     styler.Flush();
 614     int style = actual_style(styler.StyleAt(startPos));
 615     switch (style) {
 616     case SCE_RB_STDIN:
 617     case SCE_RB_STDOUT:
 618     case SCE_RB_STDERR:
 619         // Don't do anything else with these.
 620         return;
 621     }
 622
 623     int pos = startPos;
 624     // Quick way to characterize each line
 625     int lineStart;
 626     for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
 627         // Now look at the style before the previous line's EOL
 628         pos = styler.LineStart(lineStart) - 1;
 629         if (pos <= 10) {
 630             lineStart = 0;
 631             break;
 632         }
 633         char ch = styler.SafeGetCharAt(pos);
 634         char chPrev = styler.SafeGetCharAt(pos - 1);
 635         if (ch == '\n' && chPrev == '\r') {
 636             pos--;
 637         }
 638         if (styler.SafeGetCharAt(pos - 1) == '\\') {
 639             // Continuation line -- keep going
 640         } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
 641             // Part of multi-line construct -- keep going
 642         } else if (currLineContainsHereDelims(pos, styler)) {
 643             // Keep going, with pos and length now pointing
 644             // at the end of the here-doc delimiter
 645         } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
 646             // Keep going
 647         } else {
 648             break;
 649         }
 650     }
 651     pos = styler.LineStart(lineStart);
 652     length += (startPos - pos);
 653     startPos = pos;
 654     initStyle = SCE_RB_DEFAULT;
 655 }
 656
 657 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
 658                            WordList *keywordlists[], Accessor &styler) {
 659
 660     // Lexer for Ruby often has to backtrack to start of current style to determine
 661     // which characters are being used as quotes, how deeply nested is the
 662     // start position and what the termination string is for here documents
 663
 664     WordList &keywords = *keywordlists[0];
 665
 666     class HereDocCls {
 667     public:
 668         int State;
 669         // States
 670         // 0: '<<' encountered
 671         // 1: collect the delimiter
 672         // 1b: text between the end of the delimiter and the EOL
 673         // 2: here doc text (lines after the delimiter)
 674         char Quote;             // the char after '<<'
 675         bool Quoted;            // true if Quote in ('\'','"','`')
 676         int DelimiterLength;    // strlen(Delimiter)
 677         char Delimiter[256];    // the Delimiter, limit of 256: from Perl
 678         bool CanBeIndented;
 679         HereDocCls() {
 680             State = 0;
 681             DelimiterLength = 0;
 682             Delimiter[0] = '\0';
 683             CanBeIndented = false;
 684         }
 685     };
 686     HereDocCls HereDoc;
 687
 688     QuoteCls Quote;
 689
 690     int numDots = 0;  // For numbers --
 691     // Don't start lexing in the middle of a num
 692
 693     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
 694                         false);
 695
 696     bool preferRE = true;
 697     int state = initStyle;
 698     int lengthDoc = startPos + length;
 699
 700     char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
 701     prevWord[0] = '\0';
 702     if (length == 0)
 703         return;
 704
 705     char chPrev = styler.SafeGetCharAt(startPos - 1);
 706     char chNext = styler.SafeGetCharAt(startPos);
 707     bool is_real_number = true;   // Differentiate between constants and ?-sequences.
 708     styler.StartAt(startPos);
 709     styler.StartSegment(startPos);
 710
 711     static int q_states[] = {SCE_RB_STRING_Q,
 712                              SCE_RB_STRING_QQ,
 713                              SCE_RB_STRING_QR,
 714                              SCE_RB_STRING_QW,
 715                              SCE_RB_STRING_QW,
 716                              SCE_RB_STRING_QX
 717                             };
 718     static const char *q_chars = "qQrwWx";
 719
 720     // In most cases a value of 2 should be ample for the code in the
 721     // Ruby library, and the code the user is likely to enter.
 722     // For example,
 723     // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
 724     //     if options[:verbose]
 725     // from fileutils.rb nests to a level of 2
 726     // If the user actually hits a 6th occurrence of '#{' in a double-quoted
 727     // string (including regex'es, %Q, %<sym>, %w, and other strings
 728     // that interpolate), it will stay as a string.  The problem with this
 729     // is that quotes might flip, a 7th '#{' will look like a comment,
 730     // and code-folding might be wrong.
 731
 732     // If anyone runs into this problem, I recommend raising this
 733     // value slightly higher to replacing the fixed array with a linked
 734     // list.  Keep in mind this code will be called every time the lexer
 735     // is invoked.
 736
 737 #define INNER_STRINGS_MAX_COUNT 5
 738     // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
 739     int inner_string_types[INNER_STRINGS_MAX_COUNT];
 740     // Track # braces when we push a new #{ thing
 741     int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
 742     QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
 743     int inner_string_count = 0;
 744     int brace_counts = 0;   // Number of #{ ... } things within an expression
 745
 746     int i;
 747     for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
 748         inner_string_types[i] = 0;
 749         inner_expn_brace_counts[i] = 0;
 750     }
 751     for (i = startPos; i < lengthDoc; i++) {
 752         char ch = chNext;
 753         chNext = styler.SafeGetCharAt(i + 1);
 754         char chNext2 = styler.SafeGetCharAt(i + 2);
 755
 756         if (styler.IsLeadByte(ch)) {
 757             chNext = chNext2;
 758             chPrev = ' ';
 759             i += 1;
 760             continue;
 761         }
 762
 763         // skip on DOS/Windows
 764         //No, don't, because some things will get tagged on,
 765         // so we won't recognize keywords, for example
 766 #if 0
 767         if (ch == '\r' && chNext == '\n') {
 768             continue;
 769         }
 770 #endif
 771
 772         if (HereDoc.State == 1 && isEOLChar(ch)) {
 773             // Begin of here-doc (the line after the here-doc delimiter):
 774             HereDoc.State = 2;
 775             styler.ColourTo(i-1, state);
 776             // Don't check for a missing quote, just jump into
 777             // the here-doc state
 778             state = SCE_RB_HERE_Q;
 779         }
 780
 781         // Regular transitions
 782         if (state == SCE_RB_DEFAULT) {
 783             if (isSafeDigit(ch)) {
 784                 styler.ColourTo(i - 1, state);
 785                 state = SCE_RB_NUMBER;
 786                 is_real_number = true;
 787                 numDots = 0;
 788             } else if (isHighBitChar(ch) || iswordstart(ch)) {
 789                 styler.ColourTo(i - 1, state);
 790                 state = SCE_RB_WORD;
 791             } else if (ch == '#') {
 792                 styler.ColourTo(i - 1, state);
 793                 state = SCE_RB_COMMENTLINE;
 794             } else if (ch == '=') {
 795                 // =begin indicates the start of a comment (doc) block
 796                 if ((i == 0 || isEOLChar(chPrev))
 797                         && chNext == 'b'
 798                         && styler.SafeGetCharAt(i + 2) == 'e'
 799                         && styler.SafeGetCharAt(i + 3) == 'g'
 800                         && styler.SafeGetCharAt(i + 4) == 'i'
 801                         && styler.SafeGetCharAt(i + 5) == 'n'
 802                         && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
 803                     styler.ColourTo(i - 1, state);
 804                     state = SCE_RB_POD;
 805                 } else {
 806                     styler.ColourTo(i - 1, state);
 807                     styler.ColourTo(i, SCE_RB_OPERATOR);
 808                     preferRE = true;
 809                 }
 810             } else if (ch == '"') {
 811                 styler.ColourTo(i - 1, state);
 812                 state = SCE_RB_STRING;
 813                 Quote.New();
 814                 Quote.Open(ch);
 815             } else if (ch == '\'') {
 816                 styler.ColourTo(i - 1, state);
 817                 state = SCE_RB_CHARACTER;
 818                 Quote.New();
 819                 Quote.Open(ch);
 820             } else if (ch == '`') {
 821                 styler.ColourTo(i - 1, state);
 822                 state = SCE_RB_BACKTICKS;
 823                 Quote.New();
 824                 Quote.Open(ch);
 825             } else if (ch == '@') {
 826                 // Instance or class var
 827                 styler.ColourTo(i - 1, state);
 828                 if (chNext == '@') {
 829                     state = SCE_RB_CLASS_VAR;
 830                     advance_char(i, ch, chNext, chNext2); // pass by ref
 831                 } else {
 832                     state = SCE_RB_INSTANCE_VAR;
 833                 }
 834             } else if (ch == '$') {
 835                 // Check for a builtin global
 836                 styler.ColourTo(i - 1, state);
 837                 // Recognize it bit by bit
 838                 state = SCE_RB_GLOBAL;
 839             } else if (ch == '/' && preferRE) {
 840                 // Ambigous operator
 841                 styler.ColourTo(i - 1, state);
 842                 state = SCE_RB_REGEX;
 843                 Quote.New();
 844                 Quote.Open(ch);
 845             } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
 846
 847                 // Recognise the '<<' symbol - either a here document or a binary op
 848                 styler.ColourTo(i - 1, state);
 849                 i++;
 850                 chNext = chNext2;
 851                 styler.ColourTo(i, SCE_RB_OPERATOR);
 852
 853                 if (!(strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
 854                     // It's definitely not a here-doc,
 855                     // based on Ruby's lexer/parser in the
 856                     // heredoc_identifier routine.
 857                     // Nothing else to do.
 858                 } else if (preferRE) {
 859                     if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
 860                         state = SCE_RB_HERE_DELIM;
 861                         HereDoc.State = 0;
 862                     }
 863                     // else leave it in default state
 864                 } else {
 865                     if (sureThisIsNotHeredoc(i - 1, styler)) {
 866                         // leave state as default
 867                         // We don't have all the heuristics Perl has for indications
 868                         // of a here-doc, because '<<' is overloadable and used
 869                         // for so many other classes.
 870                     } else {
 871                         state = SCE_RB_HERE_DELIM;
 872                         HereDoc.State = 0;
 873                     }
 874                 }
 875                 preferRE = (state != SCE_RB_HERE_DELIM);
 876             } else if (ch == ':') {
 877                 styler.ColourTo(i - 1, state);
 878                 if (chNext == ':') {
 879                     // Mark "::" as an operator, not symbol start
 880                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
 881                     advance_char(i, ch, chNext, chNext2); // pass by ref
 882                     state = SCE_RB_DEFAULT;
 883                     preferRE = false;
 884                 } else if (isSafeWordcharOrHigh(chNext)) {
 885                     state = SCE_RB_SYMBOL;
 886                 } else if ((chNext == '@' || chNext == '$') &&
 887                            isSafeWordcharOrHigh(chNext2)) {
 888                     // instance and global variable followed by an identifier
 889                     advance_char(i, ch, chNext, chNext2);
 890                     state = SCE_RB_SYMBOL;
 891                 } else if (((chNext == '@' && chNext2 == '@')  ||
 892                             (chNext == '$' && chNext2 == '-')) &&
 893                            isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) {
 894                     // class variables and special global variable "$-IDENTCHAR"
 895                     state = SCE_RB_SYMBOL;
 896                     // $-IDENTCHAR doesn't continue past the IDENTCHAR
 897                     if (chNext == '$') {
 898                         styler.ColourTo(i+3, SCE_RB_SYMBOL);
 899                         state = SCE_RB_DEFAULT;
 900                     }
 901                     i += 3;
 902                     ch = styler.SafeGetCharAt(i);
 903                     chNext = styler.SafeGetCharAt(i+1);
 904                 } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
 905                     // single-character special global variables
 906                     i += 2;
 907                     ch = chNext2;
 908                     chNext = styler.SafeGetCharAt(i+1);
 909                     styler.ColourTo(i, SCE_RB_SYMBOL);
 910                     state = SCE_RB_DEFAULT;
 911                 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
 912                     // Do the operator analysis in-line, looking ahead
 913                     // Based on the table in pickaxe 2nd ed., page 339
 914                     bool doColoring = true;
 915                     switch (chNext) {
 916                     case '[':
 917                         if (chNext2 == ']') {
 918                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 919                             if (ch_tmp == '=') {
 920                                 i += 3;
 921                                 ch = ch_tmp;
 922                                 chNext = styler.SafeGetCharAt(i + 1);
 923                             } else {
 924                                 i += 2;
 925                                 ch = chNext2;
 926                                 chNext = ch_tmp;
 927                             }
 928                         } else {
 929                             doColoring = false;
 930                         }
 931                         break;
 932
 933                     case '*':
 934                         if (chNext2 == '*') {
 935                             i += 2;
 936                             ch = chNext2;
 937                             chNext = styler.SafeGetCharAt(i + 1);
 938                         } else {
 939                             advance_char(i, ch, chNext, chNext2);
 940                         }
 941                         break;
 942
 943                     case '!':
 944                         if (chNext2 == '=' || chNext2 == '~') {
 945                             i += 2;
 946                             ch = chNext2;
 947                             chNext = styler.SafeGetCharAt(i + 1);
 948                         } else {
 949                             advance_char(i, ch, chNext, chNext2);
 950                         }
 951                         break;
 952
 953                     case '<':
 954                         if (chNext2 == '<') {
 955                             i += 2;
 956                             ch = chNext2;
 957                             chNext = styler.SafeGetCharAt(i + 1);
 958                         } else if (chNext2 == '=') {
 959                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 960                             if (ch_tmp == '>') {  // <=> operator
 961                                 i += 3;
 962                                 ch = ch_tmp;
 963                                 chNext = styler.SafeGetCharAt(i + 1);
 964                             } else {
 965                                 i += 2;
 966                                 ch = chNext2;
 967                                 chNext = ch_tmp;
 968                             }
 969                         } else {
 970                             advance_char(i, ch, chNext, chNext2);
 971                         }
 972                         break;
 973
 974                     default:
 975                         // Simple one-character operators
 976                         advance_char(i, ch, chNext, chNext2);
 977                         break;
 978                     }
 979                     if (doColoring) {
 980                         styler.ColourTo(i, SCE_RB_SYMBOL);
 981                         state = SCE_RB_DEFAULT;
 982                     }
 983                 } else if (!preferRE) {
 984                     // Don't color symbol strings (yet)
 985                     // Just color the ":" and color rest as string
 986                     styler.ColourTo(i, SCE_RB_SYMBOL);
 987                     state = SCE_RB_DEFAULT;
 988                 } else {
 989                     styler.ColourTo(i, SCE_RB_OPERATOR);
 990                     state = SCE_RB_DEFAULT;
 991                     preferRE = true;
 992                 }
 993             } else if (ch == '%') {
 994                 styler.ColourTo(i - 1, state);
 995                 bool have_string = false;
 996                 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
 997                     Quote.New();
 998                     const char *hit = strchr(q_chars, chNext);
 999                     if (hit != NULL) {
1000                         state = q_states[hit - q_chars];
1001                         Quote.Open(chNext2);
1002                         i += 2;
1003                         ch = chNext2;
1004                         chNext = styler.SafeGetCharAt(i + 1);
1005                         have_string = true;
1006                     }
1007                 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1008                     // Ruby doesn't allow high bit chars here,
1009                     // but the editor host might
1010                     Quote.New();
1011                     state = SCE_RB_STRING_QQ;
1012                     Quote.Open(chNext);
1013                     advance_char(i, ch, chNext, chNext2); // pass by ref
1014                     have_string = true;
1015                 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1016                     // Ruby doesn't allow high bit chars here,
1017                     // but the editor host might
1018                     Quote.New();
1019                     state = SCE_RB_STRING_QQ;
1020                     Quote.Open(chNext);
1021                     advance_char(i, ch, chNext, chNext2); // pass by ref
1022                     have_string = true;
1023                 }
1024                 if (!have_string) {
1025                     styler.ColourTo(i, SCE_RB_OPERATOR);
1026                     // stay in default
1027                     preferRE = true;
1028                 }
1029             } else if (ch == '?') {
1030                 styler.ColourTo(i - 1, state);
1031                 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1032                     styler.ColourTo(i, SCE_RB_OPERATOR);
1033                 } else {
1034                     // It's the start of a character code escape sequence
1035                     // Color it as a number.
1036                     state = SCE_RB_NUMBER;
1037                     is_real_number = false;
1038                 }
1039             } else if (isoperator(ch) || ch == '.') {
1040                 styler.ColourTo(i - 1, state);
1041                 styler.ColourTo(i, SCE_RB_OPERATOR);
1042                 // If we're ending an expression or block,
1043                 // assume it ends an object, and the ambivalent
1044                 // constructs are binary operators
1045                 //
1046                 // So if we don't have one of these chars,
1047                 // we aren't ending an object exp'n, and ops
1048                 // like : << / are unary operators.
1049
1050                 if (ch == '{') {
1051                     ++brace_counts;
1052                     preferRE = true;
1053                 } else if (ch == '}' && --brace_counts < 0
1054                            && inner_string_count > 0) {
1055                     styler.ColourTo(i, SCE_RB_OPERATOR);
1056                     exitInnerExpression(inner_string_types,
1057                                         inner_expn_brace_counts,
1058                                         inner_quotes,
1059                                         inner_string_count,
1060                                         state, brace_counts, Quote);
1061                 } else {
1062                     preferRE = (strchr(")}].", ch) == NULL);
1063                 }
1064                 // Stay in default state
1065             } else if (isEOLChar(ch)) {
1066                 // Make sure it's a true line-end, with no backslash
1067                 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1068                         && chPrev != '\\') {
1069                     // Assume we've hit the end of the statement.
1070                     preferRE = true;
1071                 }
1072             }
1073         } else if (state == SCE_RB_WORD) {
1074             if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1075                 // Words include x? in all contexts,
1076                 // and <letters>= after either 'def' or a dot
1077                 // Move along until a complete word is on our left
1078
1079                 // Default accessor treats '.' as word-chars,
1080                 // but we don't for now.
1081
1082                 if (ch == '='
1083                         && isSafeWordcharOrHigh(chPrev)
1084                         && (chNext == '('
1085                             || strchr(" \t\n\r", chNext) != NULL)
1086                         && (!strcmp(prevWord, "def")
1087                             || followsDot(styler.GetStartSegment(), styler))) {
1088                     // <name>= is a name only when being def'd -- Get it the next time
1089                     // This means that <name>=<name> is always lexed as
1090                     // <name>, (op, =), <name>
1091                 } else if ((ch == '?' || ch == '!')
1092                            && isSafeWordcharOrHigh(chPrev)
1093                            && !isSafeWordcharOrHigh(chNext)) {
1094                     // <name>? is a name -- Get it the next time
1095                     // But <name>?<name> is always lexed as
1096                     // <name>, (op, ?), <name>
1097                     // Same with <name>! to indicate a method that
1098                     // modifies its target
1099                 } else if (isEOLChar(ch)
1100                            && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1101                     styler.ColourTo(i, SCE_RB_DATASECTION);
1102                     state = SCE_RB_DATASECTION;
1103                     // No need to handle this state -- we'll just move to the end
1104                     preferRE = false;
1105                 } else {
1106                     int wordStartPos = styler.GetStartSegment();
1107                     int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1108                     switch (word_style) {
1109                     case SCE_RB_WORD:
1110                         preferRE = RE_CanFollowKeyword(prevWord);
1111                         break;
1112
1113                     case SCE_RB_WORD_DEMOTED:
1114                         preferRE = true;
1115                         break;
1116
1117                     case SCE_RB_IDENTIFIER:
1118                         if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1119                             preferRE = true;
1120                         } else if (isEOLChar(ch)) {
1121                             preferRE = true;
1122                         } else {
1123                             preferRE = false;
1124                         }
1125                         break;
1126                     default:
1127                         preferRE = false;
1128                     }
1129                     if (ch == '.') {
1130                         // We might be redefining an operator-method
1131                         preferRE = false;
1132                     }
1133                     // And if it's the first
1134                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1135                 }
1136             }
1137         } else if (state == SCE_RB_NUMBER) {
1138             if (!is_real_number) {
1139                 if (ch != '\\') {
1140                     styler.ColourTo(i, state);
1141                     state = SCE_RB_DEFAULT;
1142                     preferRE = false;
1143                 } else if (strchr("\\ntrfvaebs", chNext)) {
1144                     // Terminal escape sequence -- handle it next time
1145                     // Nothing more to do this time through the loop
1146                 } else if (chNext == 'C' || chNext == 'M') {
1147                     if (chNext2 != '-') {
1148                         // \C or \M ends the sequence -- handle it next time
1149                     } else {
1150                         // Move from abc?\C-x
1151                         //               ^
1152                         // to
1153                         //                 ^
1154                         i += 2;
1155                         ch = chNext2;
1156                         chNext = styler.SafeGetCharAt(i + 1);
1157                     }
1158                 } else if (chNext == 'c') {
1159                     // Stay here, \c is a combining sequence
1160                     advance_char(i, ch, chNext, chNext2); // pass by ref
1161                 } else {
1162                     // ?\x, including ?\\ is final.
1163                     styler.ColourTo(i + 1, state);
1164                     state = SCE_RB_DEFAULT;
1165                     preferRE = false;
1166                     advance_char(i, ch, chNext, chNext2);
1167                 }
1168             } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1169                 // Keep going
1170             } else if (ch == '.' && chNext == '.') {
1171                 ++numDots;
1172                 styler.ColourTo(i - 1, state);
1173                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1174             } else if (ch == '.' && ++numDots == 1) {
1175                 // Keep going
1176             } else {
1177                 styler.ColourTo(i - 1, state);
1178                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1179                 preferRE = false;
1180             }
1181         } else if (state == SCE_RB_COMMENTLINE) {
1182             if (isEOLChar(ch)) {
1183                 styler.ColourTo(i - 1, state);
1184                 state = SCE_RB_DEFAULT;
1185                 // Use whatever setting we had going into the comment
1186             }
1187         } else if (state == SCE_RB_HERE_DELIM) {
1188             // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1189             // Slightly different: if we find an immediate '-',
1190             // the target can appear indented.
1191
1192             if (HereDoc.State == 0) { // '<<' encountered
1193                 HereDoc.State = 1;
1194                 HereDoc.DelimiterLength = 0;
1195                 if (ch == '-') {
1196                     HereDoc.CanBeIndented = true;
1197                     advance_char(i, ch, chNext, chNext2); // pass by ref
1198                 } else {
1199                     HereDoc.CanBeIndented = false;
1200                 }
1201                 if (isEOLChar(ch)) {
1202                     // Bail out of doing a here doc if there's no target
1203                     state = SCE_RB_DEFAULT;
1204                     preferRE = false;
1205                 } else {
1206                     HereDoc.Quote = ch;
1207
1208                     if (ch == '\'' || ch == '"' || ch == '`') {
1209                         HereDoc.Quoted = true;
1210                         HereDoc.Delimiter[0] = '\0';
1211                     } else {
1212                         HereDoc.Quoted = false;
1213                         HereDoc.Delimiter[0] = ch;
1214                         HereDoc.Delimiter[1] = '\0';
1215                         HereDoc.DelimiterLength = 1;
1216                     }
1217                 }
1218             } else if (HereDoc.State == 1) { // collect the delimiter
1219                 if (isEOLChar(ch)) {
1220                     // End the quote now, and go back for more
1221                     styler.ColourTo(i - 1, state);
1222                     state = SCE_RB_DEFAULT;
1223                     i--;
1224                     chNext = ch;
1225                     preferRE = false;
1226                 } else if (HereDoc.Quoted) {
1227                     if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1228                         styler.ColourTo(i, state);
1229                         state = SCE_RB_DEFAULT;
1230                         preferRE = false;
1231                     } else {
1232                         if (ch == '\\' && !isEOLChar(chNext)) {
1233                             advance_char(i, ch, chNext, chNext2);
1234                         }
1235                         HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1236                         HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1237                     }
1238                 } else { // an unquoted here-doc delimiter
1239                     if (isSafeAlnumOrHigh(ch) || ch == '_') {
1240                         HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1241                         HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1242                     } else {
1243                         styler.ColourTo(i - 1, state);
1244                         redo_char(i, ch, chNext, chNext2, state);
1245                         preferRE = false;
1246                     }
1247                 }
1248                 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1249                     styler.ColourTo(i - 1, state);
1250                     state = SCE_RB_ERROR;
1251                     preferRE = false;
1252                 }
1253             }
1254         } else if (state == SCE_RB_HERE_Q) {
1255             // Not needed: HereDoc.State == 2
1256             // Indentable here docs: look backwards
1257             // Non-indentable: look forwards, like in Perl
1258             //
1259             // Why: so we can quickly resolve things like <<-" abc"
1260
1261             if (!HereDoc.CanBeIndented) {
1262                 if (isEOLChar(chPrev)
1263                         && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1264                     styler.ColourTo(i - 1, state);
1265                     i += HereDoc.DelimiterLength - 1;
1266                     chNext = styler.SafeGetCharAt(i + 1);
1267                     if (isEOLChar(chNext)) {
1268                         styler.ColourTo(i, SCE_RB_HERE_DELIM);
1269                         state = SCE_RB_DEFAULT;
1270                         HereDoc.State = 0;
1271                         preferRE = false;
1272                     }
1273                     // Otherwise we skipped through the here doc faster.
1274                 }
1275             } else if (isEOLChar(chNext)
1276                        && lookingAtHereDocDelim(styler,
1277                                                 i - HereDoc.DelimiterLength + 1,
1278                                                 lengthDoc,
1279                                                 HereDoc.Delimiter)) {
1280                 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1281                 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1282                 state = SCE_RB_DEFAULT;
1283                 preferRE = false;
1284                 HereDoc.State = 0;
1285             }
1286         } else if (state == SCE_RB_CLASS_VAR
1287                    || state == SCE_RB_INSTANCE_VAR
1288                    || state == SCE_RB_SYMBOL) {
1289             if (state == SCE_RB_SYMBOL &&
1290                     // FIDs suffices '?' and '!'
1291                     (((ch == '!' || ch == '?') && chNext != '=') ||
1292                      // identifier suffix '='
1293                      (ch == '=' && (chNext != '~' && chNext != '>' &&
1294                                     (chNext != '=' || chNext2 == '>'))))) {
1295                 styler.ColourTo(i, state);
1296                 state = SCE_RB_DEFAULT;
1297                 preferRE = false;
1298             } else if (!isSafeWordcharOrHigh(ch)) {
1299                 styler.ColourTo(i - 1, state);
1300                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1301                 preferRE = false;
1302             }
1303         } else if (state == SCE_RB_GLOBAL) {
1304             if (!isSafeWordcharOrHigh(ch)) {
1305                 // handle special globals here as well
1306                 if (chPrev == '$') {
1307                     if (ch == '-') {
1308                         // Include the next char, like $-a
1309                         advance_char(i, ch, chNext, chNext2);
1310                     }
1311                     styler.ColourTo(i, state);
1312                     state = SCE_RB_DEFAULT;
1313                 } else {
1314                     styler.ColourTo(i - 1, state);
1315                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1316                 }
1317                 preferRE = false;
1318             }
1319         } else if (state == SCE_RB_POD) {
1320             // PODs end with ^=end\s, -- any whitespace can follow =end
1321             if (strchr(" \t\n\r", ch) != NULL
1322                     && i > 5
1323                     && isEOLChar(styler[i - 5])
1324                     && isMatch(styler, lengthDoc, i - 4, "=end")) {
1325                 styler.ColourTo(i - 1, state);
1326                 state = SCE_RB_DEFAULT;
1327                 preferRE = false;
1328             }
1329         } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1330             if (ch == '\\' && Quote.Up != '\\') {
1331                 // Skip one
1332                 advance_char(i, ch, chNext, chNext2);
1333             } else if (ch == Quote.Down) {
1334                 Quote.Count--;
1335                 if (Quote.Count == 0) {
1336                     // Include the options
1337                     while (isSafeAlpha(chNext)) {
1338                         i++;
1339                         ch = chNext;
1340                         chNext = styler.SafeGetCharAt(i + 1);
1341                     }
1342                     styler.ColourTo(i, state);
1343                     state = SCE_RB_DEFAULT;
1344                     preferRE = false;
1345                 }
1346             } else if (ch == Quote.Up) {
1347                 // Only if close quoter != open quoter
1348                 Quote.Count++;
1349
1350             } else if (ch == '#') {
1351                 if (chNext == '{'
1352                         && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1353                     // process #{ ... }
1354                     styler.ColourTo(i - 1, state);
1355                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1356                     enterInnerExpression(inner_string_types,
1357                                          inner_expn_brace_counts,
1358                                          inner_quotes,
1359                                          inner_string_count,
1360                                          state,
1361                                          brace_counts,
1362                                          Quote);
1363                     preferRE = true;
1364                     // Skip one
1365                     advance_char(i, ch, chNext, chNext2);
1366                 } else {
1367                     //todo: distinguish comments from pound chars
1368                     // for now, handle as comment
1369                     styler.ColourTo(i - 1, state);
1370                     bool inEscape = false;
1371                     while (++i < lengthDoc) {
1372                         ch = styler.SafeGetCharAt(i);
1373                         if (ch == '\\') {
1374                             inEscape = true;
1375                         } else if (isEOLChar(ch)) {
1376                             // Comment inside a regex
1377                             styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1378                             break;
1379                         } else if (inEscape) {
1380                             inEscape = false;  // don't look at char
1381                         } else if (ch == Quote.Down) {
1382                             // Have the regular handler deal with this
1383                             // to get trailing modifiers.
1384                             i--;
1385                             ch = styler[i];
1386                             break;
1387                         }
1388                     }
1389                     chNext = styler.SafeGetCharAt(i + 1);
1390                 }
1391             }
1392             // Quotes of all kinds...
1393         } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1394                    state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1395                    state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1396                    state == SCE_RB_BACKTICKS) {
1397             if (!Quote.Down && !isspacechar(ch)) {
1398                 Quote.Open(ch);
1399             } else if (ch == '\\' && Quote.Up != '\\') {
1400                 //Riddle me this: Is it safe to skip *every* escaped char?
1401                 advance_char(i, ch, chNext, chNext2);
1402             } else if (ch == Quote.Down) {
1403                 Quote.Count--;
1404                 if (Quote.Count == 0) {
1405                     styler.ColourTo(i, state);
1406                     state = SCE_RB_DEFAULT;
1407                     preferRE = false;
1408                 }
1409             } else if (ch == Quote.Up) {
1410                 Quote.Count++;
1411             } else if (ch == '#' && chNext == '{'
1412                        && inner_string_count < INNER_STRINGS_MAX_COUNT
1413                        && state != SCE_RB_CHARACTER
1414                        && state != SCE_RB_STRING_Q) {
1415                 // process #{ ... }
1416                 styler.ColourTo(i - 1, state);
1417                 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1418                 enterInnerExpression(inner_string_types,
1419                                      inner_expn_brace_counts,
1420                                      inner_quotes,
1421                                      inner_string_count,
1422                                      state,
1423                                      brace_counts,
1424                                      Quote);
1425                 preferRE = true;
1426                 // Skip one
1427                 advance_char(i, ch, chNext, chNext2);
1428             }
1429         }
1430
1431         if (state == SCE_RB_ERROR) {
1432             break;
1433         }
1434         chPrev = ch;
1435     }
1436     if (state == SCE_RB_WORD) {
1437         // We've ended on a word, possibly at EOF, and need to
1438         // classify it.
1439         (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1440     } else {
1441         styler.ColourTo(lengthDoc - 1, state);
1442     }
1443 }
1444
1445 // Helper functions for folding, disambiguation keywords
1446 // Assert that there are no high-bit chars
1447
1448 static void getPrevWord(int pos,
1449                         char *prevWord,
1450                         Accessor &styler,
1451                         int word_state)
1452 {
1453     int i;
1454     styler.Flush();
1455     for (i = pos - 1; i > 0; i--) {
1456         if (actual_style(styler.StyleAt(i)) != word_state) {
1457             i++;
1458             break;
1459         }
1460     }
1461     if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1462         i = pos - MAX_KEYWORD_LENGTH;
1463     char *dst = prevWord;
1464     for (; i <= pos; i++) {
1465         *dst++ = styler[i];
1466     }
1467     *dst = 0;
1468 }
1469
1470 static bool keywordIsAmbiguous(const char *prevWord)
1471 {
1472     // Order from most likely used to least likely
1473     // Lots of ways to do a loop in Ruby besides 'while/until'
1474     if (!strcmp(prevWord, "if")
1475             || !strcmp(prevWord, "do")
1476             || !strcmp(prevWord, "while")
1477             || !strcmp(prevWord, "unless")
1478             || !strcmp(prevWord, "until")
1479             || !strcmp(prevWord, "for")) {
1480         return true;
1481     } else {
1482         return false;
1483     }
1484 }
1485
1486 // Demote keywords in the following conditions:
1487 // if, while, unless, until modify a statement
1488 // do after a while or until, as a noise word (like then after if)
1489
1490 static bool keywordIsModifier(const char *word,
1491                               int pos,
1492                               Accessor &styler)
1493 {
1494     if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1495         return keywordDoStartsLoop(pos, styler);
1496     }
1497     char ch, chPrev, chPrev2;
1498     int style = SCE_RB_DEFAULT;
1499     int lineStart = styler.GetLine(pos);
1500     int lineStartPosn = styler.LineStart(lineStart);
1501     // We want to step backwards until we don't care about the current
1502     // position. But first move lineStartPosn back behind any
1503     // continuations immediately above word.
1504     while (lineStartPosn > 0) {
1505         ch = styler[lineStartPosn-1];
1506         if (ch == '\n' || ch == '\r') {
1507             chPrev  = styler.SafeGetCharAt(lineStartPosn-2);
1508             chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1509             lineStart = styler.GetLine(lineStartPosn-1);
1510             // If we find a continuation line, include it in our analysis.
1511             if (chPrev == '\\') {
1512                 lineStartPosn = styler.LineStart(lineStart);
1513             } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1514                 lineStartPosn = styler.LineStart(lineStart);
1515             } else {
1516                 break;
1517             }
1518         } else {
1519             break;
1520         }
1521     }
1522
1523     styler.Flush();
1524     while (--pos >= lineStartPosn) {
1525         style = actual_style(styler.StyleAt(pos));
1526         if (style == SCE_RB_DEFAULT) {
1527             if (iswhitespace(ch = styler[pos])) {
1528                 //continue
1529             } else if (ch == '\r' || ch == '\n') {
1530                 // Scintilla's LineStart() and GetLine() routines aren't
1531                 // platform-independent, so if we have text prepared with
1532                 // a different system we can't rely on it.
1533
1534                 // Also, lineStartPosn may have been moved to more than one
1535                 // line above word's line while pushing past continuations.
1536                 chPrev = styler.SafeGetCharAt(pos - 1);
1537                 chPrev2 = styler.SafeGetCharAt(pos - 2);
1538                 if (chPrev == '\\') {
1539                     pos-=1;  // gloss over the "\\"
1540                     //continue
1541                 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1542                     pos-=2;  // gloss over the "\\\r"
1543                     //continue
1544                 } else {
1545                     return false;
1546                 }
1547             }
1548         } else {
1549             break;
1550         }
1551     }
1552     if (pos < lineStartPosn) {
1553         return false;
1554     }
1555     // First things where the action is unambiguous
1556     switch (style) {
1557     case SCE_RB_DEFAULT:
1558     case SCE_RB_COMMENTLINE:
1559     case SCE_RB_POD:
1560     case SCE_RB_CLASSNAME:
1561     case SCE_RB_DEFNAME:
1562     case SCE_RB_MODULE_NAME:
1563         return false;
1564     case SCE_RB_OPERATOR:
1565         break;
1566     case SCE_RB_WORD:
1567         // Watch out for uses of 'else if'
1568         //XXX: Make a list of other keywords where 'if' isn't a modifier
1569         //     and can appear legitimately
1570         // Formulate this to avoid warnings from most compilers
1571         if (strcmp(word, "if") == 0) {
1572             char prevWord[MAX_KEYWORD_LENGTH + 1];
1573             getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1574             return strcmp(prevWord, "else") != 0;
1575         }
1576         return true;
1577     default:
1578         return true;
1579     }
1580     // Assume that if the keyword follows an operator,
1581     // usually it's a block assignment, like
1582     // a << if x then y else z
1583
1584     ch = styler[pos];
1585     switch (ch) {
1586     case ')':
1587     case ']':
1588     case '}':
1589         return true;
1590     default:
1591         return false;
1592     }
1593 }
1594
1595 #define WHILE_BACKWARDS "elihw"
1596 #define UNTIL_BACKWARDS "litnu"
1597 #define FOR_BACKWARDS "rof"
1598
1599 // Nothing fancy -- look to see if we follow a while/until somewhere
1600 // on the current line
1601
1602 static bool keywordDoStartsLoop(int pos,
1603                                 Accessor &styler)
1604 {
1605     char ch;
1606     int style;
1607     int lineStart = styler.GetLine(pos);
1608     int lineStartPosn = styler.LineStart(lineStart);
1609     styler.Flush();
1610     while (--pos >= lineStartPosn) {
1611         style = actual_style(styler.StyleAt(pos));
1612         if (style == SCE_RB_DEFAULT) {
1613             if ((ch = styler[pos]) == '\r' || ch == '\n') {
1614                 // Scintilla's LineStart() and GetLine() routines aren't
1615                 // platform-independent, so if we have text prepared with
1616                 // a different system we can't rely on it.
1617                 return false;
1618             }
1619         } else if (style == SCE_RB_WORD) {
1620             // Check for while or until, but write the word in backwards
1621             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1622             char *dst = prevWord;
1623             int wordLen = 0;
1624             int start_word;
1625             for (start_word = pos;
1626                     start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1627                     start_word--) {
1628                 if (++wordLen < MAX_KEYWORD_LENGTH) {
1629                     *dst++ = styler[start_word];
1630                 }
1631             }
1632             *dst = 0;
1633             // Did we see our keyword?
1634             if (!strcmp(prevWord, WHILE_BACKWARDS)
1635                     || !strcmp(prevWord, UNTIL_BACKWARDS)
1636                     || !strcmp(prevWord, FOR_BACKWARDS)) {
1637                 return true;
1638             }
1639             // We can move pos to the beginning of the keyword, and then
1640             // accept another decrement, as we can never have two contiguous
1641             // keywords:
1642             // word1 word2
1643             //           ^
1644             //        <-  move to start_word
1645             //      ^
1646             //      <- loop decrement
1647             //     ^  # pointing to end of word1 is fine
1648             pos = start_word;
1649         }
1650     }
1651     return false;
1652 }
1653
1654 static bool IsCommentLine(int line, Accessor &styler) {
1655     int pos = styler.LineStart(line);
1656     int eol_pos = styler.LineStart(line + 1) - 1;
1657     for (int i = pos; i < eol_pos; i++) {
1658         char ch = styler[i];
1659         if (ch == '#')
1660             return true;
1661         else if (ch != ' ' && ch != '\t')
1662             return false;
1663     }
1664     return false;
1665 }
1666
1667 /*
1668  *  Folding Ruby
1669  *
1670  *  The language is quite complex to analyze without a full parse.
1671  *  For example, this line shouldn't affect fold level:
1672  *
1673  *   print "hello" if feeling_friendly?
1674  *
1675  *  Neither should this:
1676  *
1677  *   print "hello" \
1678  *      if feeling_friendly?
1679  *
1680  *
1681  *  But this should:
1682  *
1683  *   if feeling_friendly?  #++
1684  *     print "hello" \
1685  *     print "goodbye"
1686  *   end                   #--
1687  *
1688  *  So we cheat, by actually looking at the existing indentation
1689  *  levels for each line, and just echoing it back.  Like Python.
1690  *  Then if we get better at it, we'll take braces into consideration,
1691  *  which always affect folding levels.
1692
1693  *  How the keywords should work:
1694  *  No effect:
1695  *  __FILE__ __LINE__ BEGIN END alias and
1696  *  defined? false in nil not or self super then
1697  *  true undef
1698
1699  *  Always increment:
1700  *  begin  class def do for module when {
1701  *
1702  *  Always decrement:
1703  *  end }
1704  *
1705  *  Increment if these start a statement
1706  *  if unless until while -- do nothing if they're modifiers
1707
1708  *  These end a block if there's no modifier, but don't bother
1709  *  break next redo retry return yield
1710  *
1711  *  These temporarily de-indent, but re-indent
1712  *  case else elsif ensure rescue
1713  *
1714  *  This means that the folder reflects indentation rather
1715  *  than setting it.  The language-service updates indentation
1716  *  when users type return and finishes entering de-denters.
1717  *
1718  *  Later offer to fold POD, here-docs, strings, and blocks of comments
1719  */
1720
1721 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1722                       WordList *[], Accessor &styler) {
1723     const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1724     bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1725
1726     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1727                         false);
1728     unsigned int endPos = startPos + length;
1729     int visibleChars = 0;
1730     int lineCurrent = styler.GetLine(startPos);
1731     int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1732                                          & SC_FOLDLEVELNUMBERMASK
1733                                          & ~SC_FOLDLEVELBASE);
1734     int levelCurrent = levelPrev;
1735     char chNext = styler[startPos];
1736     int styleNext = styler.StyleAt(startPos);
1737     int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1738     bool buffer_ends_with_eol = false;
1739     for (unsigned int i = startPos; i < endPos; i++) {
1740         char ch = chNext;
1741         chNext = styler.SafeGetCharAt(i + 1);
1742         int style = styleNext;
1743         styleNext = styler.StyleAt(i + 1);
1744         bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1745
1746         /*Mutiline comment patch*/
1747         if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1748             if (!IsCommentLine(lineCurrent - 1, styler)
1749                     && IsCommentLine(lineCurrent + 1, styler))
1750                 levelCurrent++;
1751             else if (IsCommentLine(lineCurrent - 1, styler)
1752                      && !IsCommentLine(lineCurrent + 1, styler))
1753                 levelCurrent--;
1754         }
1755
1756         if (style == SCE_RB_COMMENTLINE) {
1757             if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1758                 if (chNext == '{') {
1759                     levelCurrent++;
1760                 } else if (chNext == '}' && levelCurrent > 0) {
1761                     levelCurrent--;
1762                 }
1763             }
1764         } else if (style == SCE_RB_OPERATOR) {
1765             if (strchr("[{(", ch)) {
1766                 levelCurrent++;
1767             } else if (strchr(")}]", ch)) {
1768                 // Don't decrement below 0
1769                 if (levelCurrent > 0)
1770                     levelCurrent--;
1771             }
1772         } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1773             // Look at the keyword on the left and decide what to do
1774             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1775             prevWord[0] = 0;
1776             getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1777             if (!strcmp(prevWord, "end")) {
1778                 // Don't decrement below 0
1779                 if (levelCurrent > 0)
1780                     levelCurrent--;
1781             } else if (!strcmp(prevWord, "if")
1782                        || !strcmp(prevWord, "def")
1783                        || !strcmp(prevWord, "class")
1784                        || !strcmp(prevWord, "module")
1785                        || !strcmp(prevWord, "begin")
1786                        || !strcmp(prevWord, "case")
1787                        || !strcmp(prevWord, "do")
1788                        || !strcmp(prevWord, "while")
1789                        || !strcmp(prevWord, "unless")
1790                        || !strcmp(prevWord, "until")
1791                        || !strcmp(prevWord, "for")
1792                       ) {
1793                 levelCurrent++;
1794             }
1795         } else if (style == SCE_RB_HERE_DELIM) {
1796             if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1797                 levelCurrent++;
1798             } else if (styleNext == SCE_RB_DEFAULT) {
1799                 levelCurrent--;
1800             }
1801         }
1802         if (atEOL) {
1803             int lev = levelPrev;
1804             if (visibleChars == 0 && foldCompact)
1805                 lev |= SC_FOLDLEVELWHITEFLAG;
1806             if ((levelCurrent > levelPrev) && (visibleChars > 0))
1807                 lev |= SC_FOLDLEVELHEADERFLAG;
1808             styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1809             lineCurrent++;
1810             levelPrev = levelCurrent;
1811             visibleChars = 0;
1812             buffer_ends_with_eol = true;
1813         } else if (!isspacechar(ch)) {
1814             visibleChars++;
1815             buffer_ends_with_eol = false;
1816         }
1817         stylePrev = style;
1818     }
1819     // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1820     if (!buffer_ends_with_eol) {
1821         lineCurrent++;
1822         int new_lev = levelCurrent;
1823         if (visibleChars == 0 && foldCompact)
1824             new_lev |= SC_FOLDLEVELWHITEFLAG;
1825         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1826             new_lev |= SC_FOLDLEVELHEADERFLAG;
1827         levelCurrent = new_lev;
1828     }
1829     styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1830 }
1831
1832 static const char *const rubyWordListDesc[] = {
1833     "Keywords",
1834     0
1835 };
1836
1837 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);