Update Scintilla to version 3.5.4
[TortoiseGit.git] / ext / scintilla / lexers / LexRuby.cxx
blob7918d4bcf2e40fa30cec2269deb7e98a03536240
1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
30 //XXX Identical to Perl, put in common area
31 static inline bool isEOLChar(char ch) {
32 return (ch == '\r') || (ch == '\n');
35 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
36 // This one's redundant, but makes for more readable code
37 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
39 static inline bool isSafeAlpha(char ch) {
40 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
43 static inline bool isSafeAlnum(char ch) {
44 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
47 static inline bool isSafeAlnumOrHigh(char ch) {
48 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
51 static inline bool isSafeDigit(char ch) {
52 return isSafeASCII(ch) && isdigit(ch);
55 static inline bool isSafeWordcharOrHigh(char ch) {
56 // Error: scintilla's KeyWords.h includes '.' as a word-char
57 // we want to separate things that can take methods from the
58 // methods.
59 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
62 static bool inline iswhitespace(char ch) {
63 return ch == ' ' || ch == '\t';
66 #define MAX_KEYWORD_LENGTH 200
68 #define STYLE_MASK 63
69 #define actual_style(style) (style & STYLE_MASK)
71 static bool followsDot(unsigned int pos, Accessor &styler) {
72 styler.Flush();
73 for (; pos >= 1; --pos) {
74 int style = actual_style(styler.StyleAt(pos));
75 char ch;
76 switch (style) {
77 case SCE_RB_DEFAULT:
78 ch = styler[pos];
79 if (ch == ' ' || ch == '\t') {
80 //continue
81 } else {
82 return false;
84 break;
86 case SCE_RB_OPERATOR:
87 return styler[pos] == '.';
89 default:
90 return false;
93 return false;
96 // Forward declarations
97 static bool keywordIsAmbiguous(const char *prevWord);
98 static bool keywordDoStartsLoop(int pos,
99 Accessor &styler);
100 static bool keywordIsModifier(const char *word,
101 int pos,
102 Accessor &styler);
104 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
105 char s[MAX_KEYWORD_LENGTH];
106 unsigned int i, j;
107 unsigned int lim = end - start + 1; // num chars to copy
108 if (lim >= MAX_KEYWORD_LENGTH) {
109 lim = MAX_KEYWORD_LENGTH - 1;
111 for (i = start, j = 0; j < lim; i++, j++) {
112 s[j] = styler[i];
114 s[j] = '\0';
115 int chAttr;
116 if (0 == strcmp(prevWord, "class"))
117 chAttr = SCE_RB_CLASSNAME;
118 else if (0 == strcmp(prevWord, "module"))
119 chAttr = SCE_RB_MODULE_NAME;
120 else if (0 == strcmp(prevWord, "def"))
121 chAttr = SCE_RB_DEFNAME;
122 else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
123 if (keywordIsAmbiguous(s)
124 && keywordIsModifier(s, start, styler)) {
126 // Demoted keywords are colored as keywords,
127 // but do not affect changes in indentation.
129 // Consider the word 'if':
130 // 1. <<if test ...>> : normal
131 // 2. <<stmt if test>> : demoted
132 // 3. <<lhs = if ...>> : normal: start a new indent level
133 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
135 chAttr = SCE_RB_WORD_DEMOTED;
136 } else {
137 chAttr = SCE_RB_WORD;
139 } else
140 chAttr = SCE_RB_IDENTIFIER;
141 styler.ColourTo(end, chAttr);
142 if (chAttr == SCE_RB_WORD) {
143 strcpy(prevWord, s);
144 } else {
145 prevWord[0] = 0;
147 return chAttr;
151 //XXX Identical to Perl, put in common area
152 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
153 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
154 return false;
156 while (*val) {
157 if (*val != styler[pos++]) {
158 return false;
160 val++;
162 return true;
165 // Do Ruby better -- find the end of the line, work back,
166 // and then check for leading white space
168 // Precondition: the here-doc target can be indented
169 static bool lookingAtHereDocDelim(Accessor &styler,
170 int pos,
171 int lengthDoc,
172 const char *HereDocDelim)
174 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
175 return false;
177 while (--pos > 0) {
178 char ch = styler[pos];
179 if (isEOLChar(ch)) {
180 return true;
181 } else if (ch != ' ' && ch != '\t') {
182 return false;
185 return false;
188 //XXX Identical to Perl, put in common area
189 static char opposite(char ch) {
190 if (ch == '(')
191 return ')';
192 if (ch == '[')
193 return ']';
194 if (ch == '{')
195 return '}';
196 if (ch == '<')
197 return '>';
198 return ch;
201 // Null transitions when we see we've reached the end
202 // and need to relex the curr char.
204 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
205 int &state) {
206 i--;
207 chNext2 = chNext;
208 chNext = ch;
209 state = SCE_RB_DEFAULT;
212 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
213 i++;
214 ch = chNext;
215 chNext = chNext2;
218 // precondition: startPos points to one after the EOL char
219 static bool currLineContainsHereDelims(int &startPos,
220 Accessor &styler) {
221 if (startPos <= 1)
222 return false;
224 int pos;
225 for (pos = startPos - 1; pos > 0; pos--) {
226 char ch = styler.SafeGetCharAt(pos);
227 if (isEOLChar(ch)) {
228 // Leave the pointers where they are -- there are no
229 // here doc delims on the current line, even if
230 // the EOL isn't default style
232 return false;
233 } else {
234 styler.Flush();
235 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
236 break;
240 if (pos == 0) {
241 return false;
243 // Update the pointers so we don't have to re-analyze the string
244 startPos = pos;
245 return true;
248 // This class is used by the enter and exit methods, so it needs
249 // to be hoisted out of the function.
251 class QuoteCls {
252 public:
253 int Count;
254 char Up;
255 char Down;
256 QuoteCls() {
257 New();
259 void New() {
260 Count = 0;
261 Up = '\0';
262 Down = '\0';
264 void Open(char u) {
265 Count++;
266 Up = u;
267 Down = opposite(Up);
269 QuoteCls(const QuoteCls &q) {
270 // copy constructor -- use this for copying in
271 Count = q.Count;
272 Up = q.Up;
273 Down = q.Down;
275 QuoteCls &operator=(const QuoteCls &q) { // assignment constructor
276 if (this != &q) {
277 Count = q.Count;
278 Up = q.Up;
279 Down = q.Down;
281 return *this;
287 static void enterInnerExpression(int *p_inner_string_types,
288 int *p_inner_expn_brace_counts,
289 QuoteCls *p_inner_quotes,
290 int &inner_string_count,
291 int &state,
292 int &brace_counts,
293 QuoteCls curr_quote
295 p_inner_string_types[inner_string_count] = state;
296 state = SCE_RB_DEFAULT;
297 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
298 brace_counts = 0;
299 p_inner_quotes[inner_string_count] = curr_quote;
300 ++inner_string_count;
303 static void exitInnerExpression(int *p_inner_string_types,
304 int *p_inner_expn_brace_counts,
305 QuoteCls *p_inner_quotes,
306 int &inner_string_count,
307 int &state,
308 int &brace_counts,
309 QuoteCls &curr_quote
311 --inner_string_count;
312 state = p_inner_string_types[inner_string_count];
313 brace_counts = p_inner_expn_brace_counts[inner_string_count];
314 curr_quote = p_inner_quotes[inner_string_count];
317 static bool isEmptyLine(int pos,
318 Accessor &styler) {
319 int spaceFlags = 0;
320 int lineCurrent = styler.GetLine(pos);
321 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
322 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
325 static bool RE_CanFollowKeyword(const char *keyword) {
326 if (!strcmp(keyword, "and")
327 || !strcmp(keyword, "begin")
328 || !strcmp(keyword, "break")
329 || !strcmp(keyword, "case")
330 || !strcmp(keyword, "do")
331 || !strcmp(keyword, "else")
332 || !strcmp(keyword, "elsif")
333 || !strcmp(keyword, "if")
334 || !strcmp(keyword, "next")
335 || !strcmp(keyword, "return")
336 || !strcmp(keyword, "when")
337 || !strcmp(keyword, "unless")
338 || !strcmp(keyword, "until")
339 || !strcmp(keyword, "not")
340 || !strcmp(keyword, "or")) {
341 return true;
343 return false;
346 // Look at chars up to but not including endPos
347 // Don't look at styles in case we're looking forward
349 static int skipWhitespace(int startPos,
350 int endPos,
351 Accessor &styler) {
352 for (int i = startPos; i < endPos; i++) {
353 if (!iswhitespace(styler[i])) {
354 return i;
357 return endPos;
360 // This routine looks for false positives like
361 // undef foo, <<
362 // There aren't too many.
364 // iPrev points to the start of <<
366 static bool sureThisIsHeredoc(int iPrev,
367 Accessor &styler,
368 char *prevWord) {
370 // Not so fast, since Ruby's so dynamic. Check the context
371 // to make sure we're OK.
372 int prevStyle;
373 int lineStart = styler.GetLine(iPrev);
374 int lineStartPosn = styler.LineStart(lineStart);
375 styler.Flush();
377 // Find the first word after some whitespace
378 int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
379 if (firstWordPosn >= iPrev) {
380 // Have something like {^ <<}
381 //XXX Look at the first previous non-comment non-white line
382 // to establish the context. Not too likely though.
383 return true;
384 } else {
385 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
386 case SCE_RB_WORD:
387 case SCE_RB_WORD_DEMOTED:
388 case SCE_RB_IDENTIFIER:
389 break;
390 default:
391 return true;
394 int firstWordEndPosn = firstWordPosn;
395 char *dst = prevWord;
396 for (;;) {
397 if (firstWordEndPosn >= iPrev ||
398 styler.StyleAt(firstWordEndPosn) != prevStyle) {
399 *dst = 0;
400 break;
402 *dst++ = styler[firstWordEndPosn];
403 firstWordEndPosn += 1;
405 //XXX Write a style-aware thing to regex scintilla buffer objects
406 if (!strcmp(prevWord, "undef")
407 || !strcmp(prevWord, "def")
408 || !strcmp(prevWord, "alias")) {
409 // These keywords are what we were looking for
410 return false;
412 return true;
415 // Routine that saves us from allocating a buffer for the here-doc target
416 // targetEndPos points one past the end of the current target
417 static bool haveTargetMatch(int currPos,
418 int lengthDoc,
419 int targetStartPos,
420 int targetEndPos,
421 Accessor &styler) {
422 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
423 return false;
425 int i, j;
426 for (i = targetStartPos, j = currPos;
427 i < targetEndPos && j < lengthDoc;
428 i++, j++) {
429 if (styler[i] != styler[j]) {
430 return false;
433 return true;
436 // We need a check because the form
437 // [identifier] <<[target]
438 // is ambiguous. The Ruby lexer/parser resolves it by
439 // looking to see if [identifier] names a variable or a
440 // function. If it's the first, it's the start of a here-doc.
441 // If it's a var, it's an operator. This lexer doesn't
442 // maintain a symbol table, so it looks ahead to see what's
443 // going on, in cases where we have
444 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
446 // If there's no occurrence of [target] on a line, assume we don't.
448 // return true == yes, we have no heredocs
450 static bool sureThisIsNotHeredoc(int lt2StartPos,
451 Accessor &styler) {
452 int prevStyle;
453 // Use full document, not just part we're styling
454 int lengthDoc = styler.Length();
455 int lineStart = styler.GetLine(lt2StartPos);
456 int lineStartPosn = styler.LineStart(lineStart);
457 styler.Flush();
458 const bool definitely_not_a_here_doc = true;
459 const bool looks_like_a_here_doc = false;
461 // Find the first word after some whitespace
462 int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
463 if (firstWordPosn >= lt2StartPos) {
464 return definitely_not_a_here_doc;
466 prevStyle = styler.StyleAt(firstWordPosn);
467 // If we have '<<' following a keyword, it's not a heredoc
468 if (prevStyle != SCE_RB_IDENTIFIER
469 && prevStyle != SCE_RB_INSTANCE_VAR
470 && prevStyle != SCE_RB_CLASS_VAR) {
471 return definitely_not_a_here_doc;
473 int newStyle = prevStyle;
474 // Some compilers incorrectly warn about uninit newStyle
475 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
476 // Inner loop looks at the name
477 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
478 newStyle = styler.StyleAt(firstWordPosn);
479 if (newStyle != prevStyle) {
480 break;
483 // Do we have '::' or '.'?
484 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
485 char ch = styler[firstWordPosn];
486 if (ch == '.') {
487 // yes
488 } else if (ch == ':') {
489 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
490 return definitely_not_a_here_doc;
491 } else if (styler[firstWordPosn] != ':') {
492 return definitely_not_a_here_doc;
494 } else {
495 break;
497 } else {
498 break;
500 // on second and next passes, only identifiers may appear since
501 // class and instance variable are private
502 prevStyle = SCE_RB_IDENTIFIER;
504 // Skip next batch of white-space
505 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
506 if (firstWordPosn != lt2StartPos) {
507 // Have [[^ws[identifier]ws[*something_else*]ws<<
508 return definitely_not_a_here_doc;
510 // OK, now 'j' will point to the current spot moving ahead
511 int j = firstWordPosn + 1;
512 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
513 // This shouldn't happen
514 return definitely_not_a_here_doc;
516 int nextLineStartPosn = styler.LineStart(lineStart + 1);
517 if (nextLineStartPosn >= lengthDoc) {
518 return definitely_not_a_here_doc;
520 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
521 if (j >= lengthDoc) {
522 return definitely_not_a_here_doc;
524 bool allow_indent;
525 int target_start, target_end;
526 // From this point on no more styling, since we're looking ahead
527 if (styler[j] == '-') {
528 allow_indent = true;
529 j++;
530 } else {
531 allow_indent = false;
534 // Allow for quoted targets.
535 char target_quote = 0;
536 switch (styler[j]) {
537 case '\'':
538 case '"':
539 case '`':
540 target_quote = styler[j];
541 j += 1;
544 if (isSafeAlnum(styler[j])) {
545 // Init target_end because some compilers think it won't
546 // be initialized by the time it's used
547 target_start = target_end = j;
548 j++;
549 } else {
550 return definitely_not_a_here_doc;
552 for (; j < lengthDoc; j++) {
553 if (!isSafeAlnum(styler[j])) {
554 if (target_quote && styler[j] != target_quote) {
555 // unquoted end
556 return definitely_not_a_here_doc;
559 // And for now make sure that it's a newline
560 // don't handle arbitrary expressions yet
562 target_end = j;
563 if (target_quote) {
564 // Now we can move to the character after the string delimiter.
565 j += 1;
567 j = skipWhitespace(j, lengthDoc, styler);
568 if (j >= lengthDoc) {
569 return definitely_not_a_here_doc;
570 } else {
571 char ch = styler[j];
572 if (ch == '#' || isEOLChar(ch)) {
573 // This is OK, so break and continue;
574 break;
575 } else {
576 return definitely_not_a_here_doc;
582 // Just look at the start of each line
583 int last_line = styler.GetLine(lengthDoc - 1);
584 // But don't go too far
585 if (last_line > lineStart + 50) {
586 last_line = lineStart + 50;
588 for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
589 if (allow_indent) {
590 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
591 } else {
592 j = styler.LineStart(line_num);
594 // target_end is one past the end
595 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
596 // We got it
597 return looks_like_a_here_doc;
600 return definitely_not_a_here_doc;
603 //todo: if we aren't looking at a stdio character,
604 // move to the start of the first line that is not in a
605 // multi-line construct
607 static void synchronizeDocStart(unsigned int &startPos,
608 int &length,
609 int &initStyle,
610 Accessor &styler,
611 bool skipWhiteSpace=false) {
613 styler.Flush();
614 int style = actual_style(styler.StyleAt(startPos));
615 switch (style) {
616 case SCE_RB_STDIN:
617 case SCE_RB_STDOUT:
618 case SCE_RB_STDERR:
619 // Don't do anything else with these.
620 return;
623 int pos = startPos;
624 // Quick way to characterize each line
625 int lineStart;
626 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
627 // Now look at the style before the previous line's EOL
628 pos = styler.LineStart(lineStart) - 1;
629 if (pos <= 10) {
630 lineStart = 0;
631 break;
633 char ch = styler.SafeGetCharAt(pos);
634 char chPrev = styler.SafeGetCharAt(pos - 1);
635 if (ch == '\n' && chPrev == '\r') {
636 pos--;
638 if (styler.SafeGetCharAt(pos - 1) == '\\') {
639 // Continuation line -- keep going
640 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
641 // Part of multi-line construct -- keep going
642 } else if (currLineContainsHereDelims(pos, styler)) {
643 // Keep going, with pos and length now pointing
644 // at the end of the here-doc delimiter
645 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
646 // Keep going
647 } else {
648 break;
651 pos = styler.LineStart(lineStart);
652 length += (startPos - pos);
653 startPos = pos;
654 initStyle = SCE_RB_DEFAULT;
657 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
658 WordList *keywordlists[], Accessor &styler) {
660 // Lexer for Ruby often has to backtrack to start of current style to determine
661 // which characters are being used as quotes, how deeply nested is the
662 // start position and what the termination string is for here documents
664 WordList &keywords = *keywordlists[0];
666 class HereDocCls {
667 public:
668 int State;
669 // States
670 // 0: '<<' encountered
671 // 1: collect the delimiter
672 // 1b: text between the end of the delimiter and the EOL
673 // 2: here doc text (lines after the delimiter)
674 char Quote; // the char after '<<'
675 bool Quoted; // true if Quote in ('\'','"','`')
676 int DelimiterLength; // strlen(Delimiter)
677 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
678 bool CanBeIndented;
679 HereDocCls() {
680 State = 0;
681 DelimiterLength = 0;
682 Delimiter[0] = '\0';
683 CanBeIndented = false;
686 HereDocCls HereDoc;
688 QuoteCls Quote;
690 int numDots = 0; // For numbers --
691 // Don't start lexing in the middle of a num
693 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
694 false);
696 bool preferRE = true;
697 int state = initStyle;
698 int lengthDoc = startPos + length;
700 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
701 prevWord[0] = '\0';
702 if (length == 0)
703 return;
705 char chPrev = styler.SafeGetCharAt(startPos - 1);
706 char chNext = styler.SafeGetCharAt(startPos);
707 bool is_real_number = true; // Differentiate between constants and ?-sequences.
708 styler.StartAt(startPos);
709 styler.StartSegment(startPos);
711 static int q_states[] = {SCE_RB_STRING_Q,
712 SCE_RB_STRING_QQ,
713 SCE_RB_STRING_QR,
714 SCE_RB_STRING_QW,
715 SCE_RB_STRING_QW,
716 SCE_RB_STRING_QX
718 static const char *q_chars = "qQrwWx";
720 // In most cases a value of 2 should be ample for the code in the
721 // Ruby library, and the code the user is likely to enter.
722 // For example,
723 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
724 // if options[:verbose]
725 // from fileutils.rb nests to a level of 2
726 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
727 // string (including regex'es, %Q, %<sym>, %w, and other strings
728 // that interpolate), it will stay as a string. The problem with this
729 // is that quotes might flip, a 7th '#{' will look like a comment,
730 // and code-folding might be wrong.
732 // If anyone runs into this problem, I recommend raising this
733 // value slightly higher to replacing the fixed array with a linked
734 // list. Keep in mind this code will be called every time the lexer
735 // is invoked.
737 #define INNER_STRINGS_MAX_COUNT 5
738 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
739 int inner_string_types[INNER_STRINGS_MAX_COUNT];
740 // Track # braces when we push a new #{ thing
741 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
742 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
743 int inner_string_count = 0;
744 int brace_counts = 0; // Number of #{ ... } things within an expression
746 int i;
747 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
748 inner_string_types[i] = 0;
749 inner_expn_brace_counts[i] = 0;
751 for (i = startPos; i < lengthDoc; i++) {
752 char ch = chNext;
753 chNext = styler.SafeGetCharAt(i + 1);
754 char chNext2 = styler.SafeGetCharAt(i + 2);
756 if (styler.IsLeadByte(ch)) {
757 chNext = chNext2;
758 chPrev = ' ';
759 i += 1;
760 continue;
763 // skip on DOS/Windows
764 //No, don't, because some things will get tagged on,
765 // so we won't recognize keywords, for example
766 #if 0
767 if (ch == '\r' && chNext == '\n') {
768 continue;
770 #endif
772 if (HereDoc.State == 1 && isEOLChar(ch)) {
773 // Begin of here-doc (the line after the here-doc delimiter):
774 HereDoc.State = 2;
775 styler.ColourTo(i-1, state);
776 // Don't check for a missing quote, just jump into
777 // the here-doc state
778 state = SCE_RB_HERE_Q;
781 // Regular transitions
782 if (state == SCE_RB_DEFAULT) {
783 if (isSafeDigit(ch)) {
784 styler.ColourTo(i - 1, state);
785 state = SCE_RB_NUMBER;
786 is_real_number = true;
787 numDots = 0;
788 } else if (isHighBitChar(ch) || iswordstart(ch)) {
789 styler.ColourTo(i - 1, state);
790 state = SCE_RB_WORD;
791 } else if (ch == '#') {
792 styler.ColourTo(i - 1, state);
793 state = SCE_RB_COMMENTLINE;
794 } else if (ch == '=') {
795 // =begin indicates the start of a comment (doc) block
796 if ((i == 0 || isEOLChar(chPrev))
797 && chNext == 'b'
798 && styler.SafeGetCharAt(i + 2) == 'e'
799 && styler.SafeGetCharAt(i + 3) == 'g'
800 && styler.SafeGetCharAt(i + 4) == 'i'
801 && styler.SafeGetCharAt(i + 5) == 'n'
802 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
803 styler.ColourTo(i - 1, state);
804 state = SCE_RB_POD;
805 } else {
806 styler.ColourTo(i - 1, state);
807 styler.ColourTo(i, SCE_RB_OPERATOR);
808 preferRE = true;
810 } else if (ch == '"') {
811 styler.ColourTo(i - 1, state);
812 state = SCE_RB_STRING;
813 Quote.New();
814 Quote.Open(ch);
815 } else if (ch == '\'') {
816 styler.ColourTo(i - 1, state);
817 state = SCE_RB_CHARACTER;
818 Quote.New();
819 Quote.Open(ch);
820 } else if (ch == '`') {
821 styler.ColourTo(i - 1, state);
822 state = SCE_RB_BACKTICKS;
823 Quote.New();
824 Quote.Open(ch);
825 } else if (ch == '@') {
826 // Instance or class var
827 styler.ColourTo(i - 1, state);
828 if (chNext == '@') {
829 state = SCE_RB_CLASS_VAR;
830 advance_char(i, ch, chNext, chNext2); // pass by ref
831 } else {
832 state = SCE_RB_INSTANCE_VAR;
834 } else if (ch == '$') {
835 // Check for a builtin global
836 styler.ColourTo(i - 1, state);
837 // Recognize it bit by bit
838 state = SCE_RB_GLOBAL;
839 } else if (ch == '/' && preferRE) {
840 // Ambigous operator
841 styler.ColourTo(i - 1, state);
842 state = SCE_RB_REGEX;
843 Quote.New();
844 Quote.Open(ch);
845 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
847 // Recognise the '<<' symbol - either a here document or a binary op
848 styler.ColourTo(i - 1, state);
849 i++;
850 chNext = chNext2;
851 styler.ColourTo(i, SCE_RB_OPERATOR);
853 if (!(strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
854 // It's definitely not a here-doc,
855 // based on Ruby's lexer/parser in the
856 // heredoc_identifier routine.
857 // Nothing else to do.
858 } else if (preferRE) {
859 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
860 state = SCE_RB_HERE_DELIM;
861 HereDoc.State = 0;
863 // else leave it in default state
864 } else {
865 if (sureThisIsNotHeredoc(i - 1, styler)) {
866 // leave state as default
867 // We don't have all the heuristics Perl has for indications
868 // of a here-doc, because '<<' is overloadable and used
869 // for so many other classes.
870 } else {
871 state = SCE_RB_HERE_DELIM;
872 HereDoc.State = 0;
875 preferRE = (state != SCE_RB_HERE_DELIM);
876 } else if (ch == ':') {
877 styler.ColourTo(i - 1, state);
878 if (chNext == ':') {
879 // Mark "::" as an operator, not symbol start
880 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
881 advance_char(i, ch, chNext, chNext2); // pass by ref
882 state = SCE_RB_DEFAULT;
883 preferRE = false;
884 } else if (isSafeWordcharOrHigh(chNext)) {
885 state = SCE_RB_SYMBOL;
886 } else if ((chNext == '@' || chNext == '$') &&
887 isSafeWordcharOrHigh(chNext2)) {
888 // instance and global variable followed by an identifier
889 advance_char(i, ch, chNext, chNext2);
890 state = SCE_RB_SYMBOL;
891 } else if (((chNext == '@' && chNext2 == '@') ||
892 (chNext == '$' && chNext2 == '-')) &&
893 isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) {
894 // class variables and special global variable "$-IDENTCHAR"
895 state = SCE_RB_SYMBOL;
896 // $-IDENTCHAR doesn't continue past the IDENTCHAR
897 if (chNext == '$') {
898 styler.ColourTo(i+3, SCE_RB_SYMBOL);
899 state = SCE_RB_DEFAULT;
901 i += 3;
902 ch = styler.SafeGetCharAt(i);
903 chNext = styler.SafeGetCharAt(i+1);
904 } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
905 // single-character special global variables
906 i += 2;
907 ch = chNext2;
908 chNext = styler.SafeGetCharAt(i+1);
909 styler.ColourTo(i, SCE_RB_SYMBOL);
910 state = SCE_RB_DEFAULT;
911 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
912 // Do the operator analysis in-line, looking ahead
913 // Based on the table in pickaxe 2nd ed., page 339
914 bool doColoring = true;
915 switch (chNext) {
916 case '[':
917 if (chNext2 == ']') {
918 char ch_tmp = styler.SafeGetCharAt(i + 3);
919 if (ch_tmp == '=') {
920 i += 3;
921 ch = ch_tmp;
922 chNext = styler.SafeGetCharAt(i + 1);
923 } else {
924 i += 2;
925 ch = chNext2;
926 chNext = ch_tmp;
928 } else {
929 doColoring = false;
931 break;
933 case '*':
934 if (chNext2 == '*') {
935 i += 2;
936 ch = chNext2;
937 chNext = styler.SafeGetCharAt(i + 1);
938 } else {
939 advance_char(i, ch, chNext, chNext2);
941 break;
943 case '!':
944 if (chNext2 == '=' || chNext2 == '~') {
945 i += 2;
946 ch = chNext2;
947 chNext = styler.SafeGetCharAt(i + 1);
948 } else {
949 advance_char(i, ch, chNext, chNext2);
951 break;
953 case '<':
954 if (chNext2 == '<') {
955 i += 2;
956 ch = chNext2;
957 chNext = styler.SafeGetCharAt(i + 1);
958 } else if (chNext2 == '=') {
959 char ch_tmp = styler.SafeGetCharAt(i + 3);
960 if (ch_tmp == '>') { // <=> operator
961 i += 3;
962 ch = ch_tmp;
963 chNext = styler.SafeGetCharAt(i + 1);
964 } else {
965 i += 2;
966 ch = chNext2;
967 chNext = ch_tmp;
969 } else {
970 advance_char(i, ch, chNext, chNext2);
972 break;
974 default:
975 // Simple one-character operators
976 advance_char(i, ch, chNext, chNext2);
977 break;
979 if (doColoring) {
980 styler.ColourTo(i, SCE_RB_SYMBOL);
981 state = SCE_RB_DEFAULT;
983 } else if (!preferRE) {
984 // Don't color symbol strings (yet)
985 // Just color the ":" and color rest as string
986 styler.ColourTo(i, SCE_RB_SYMBOL);
987 state = SCE_RB_DEFAULT;
988 } else {
989 styler.ColourTo(i, SCE_RB_OPERATOR);
990 state = SCE_RB_DEFAULT;
991 preferRE = true;
993 } else if (ch == '%') {
994 styler.ColourTo(i - 1, state);
995 bool have_string = false;
996 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
997 Quote.New();
998 const char *hit = strchr(q_chars, chNext);
999 if (hit != NULL) {
1000 state = q_states[hit - q_chars];
1001 Quote.Open(chNext2);
1002 i += 2;
1003 ch = chNext2;
1004 chNext = styler.SafeGetCharAt(i + 1);
1005 have_string = true;
1007 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1008 // Ruby doesn't allow high bit chars here,
1009 // but the editor host might
1010 Quote.New();
1011 state = SCE_RB_STRING_QQ;
1012 Quote.Open(chNext);
1013 advance_char(i, ch, chNext, chNext2); // pass by ref
1014 have_string = true;
1015 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1016 // Ruby doesn't allow high bit chars here,
1017 // but the editor host might
1018 Quote.New();
1019 state = SCE_RB_STRING_QQ;
1020 Quote.Open(chNext);
1021 advance_char(i, ch, chNext, chNext2); // pass by ref
1022 have_string = true;
1024 if (!have_string) {
1025 styler.ColourTo(i, SCE_RB_OPERATOR);
1026 // stay in default
1027 preferRE = true;
1029 } else if (ch == '?') {
1030 styler.ColourTo(i - 1, state);
1031 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1032 styler.ColourTo(i, SCE_RB_OPERATOR);
1033 } else {
1034 // It's the start of a character code escape sequence
1035 // Color it as a number.
1036 state = SCE_RB_NUMBER;
1037 is_real_number = false;
1039 } else if (isoperator(ch) || ch == '.') {
1040 styler.ColourTo(i - 1, state);
1041 styler.ColourTo(i, SCE_RB_OPERATOR);
1042 // If we're ending an expression or block,
1043 // assume it ends an object, and the ambivalent
1044 // constructs are binary operators
1046 // So if we don't have one of these chars,
1047 // we aren't ending an object exp'n, and ops
1048 // like : << / are unary operators.
1050 if (ch == '{') {
1051 ++brace_counts;
1052 preferRE = true;
1053 } else if (ch == '}' && --brace_counts < 0
1054 && inner_string_count > 0) {
1055 styler.ColourTo(i, SCE_RB_OPERATOR);
1056 exitInnerExpression(inner_string_types,
1057 inner_expn_brace_counts,
1058 inner_quotes,
1059 inner_string_count,
1060 state, brace_counts, Quote);
1061 } else {
1062 preferRE = (strchr(")}].", ch) == NULL);
1064 // Stay in default state
1065 } else if (isEOLChar(ch)) {
1066 // Make sure it's a true line-end, with no backslash
1067 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1068 && chPrev != '\\') {
1069 // Assume we've hit the end of the statement.
1070 preferRE = true;
1073 } else if (state == SCE_RB_WORD) {
1074 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1075 // Words include x? in all contexts,
1076 // and <letters>= after either 'def' or a dot
1077 // Move along until a complete word is on our left
1079 // Default accessor treats '.' as word-chars,
1080 // but we don't for now.
1082 if (ch == '='
1083 && isSafeWordcharOrHigh(chPrev)
1084 && (chNext == '('
1085 || strchr(" \t\n\r", chNext) != NULL)
1086 && (!strcmp(prevWord, "def")
1087 || followsDot(styler.GetStartSegment(), styler))) {
1088 // <name>= is a name only when being def'd -- Get it the next time
1089 // This means that <name>=<name> is always lexed as
1090 // <name>, (op, =), <name>
1091 } else if ((ch == '?' || ch == '!')
1092 && isSafeWordcharOrHigh(chPrev)
1093 && !isSafeWordcharOrHigh(chNext)) {
1094 // <name>? is a name -- Get it the next time
1095 // But <name>?<name> is always lexed as
1096 // <name>, (op, ?), <name>
1097 // Same with <name>! to indicate a method that
1098 // modifies its target
1099 } else if (isEOLChar(ch)
1100 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1101 styler.ColourTo(i, SCE_RB_DATASECTION);
1102 state = SCE_RB_DATASECTION;
1103 // No need to handle this state -- we'll just move to the end
1104 preferRE = false;
1105 } else {
1106 int wordStartPos = styler.GetStartSegment();
1107 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1108 switch (word_style) {
1109 case SCE_RB_WORD:
1110 preferRE = RE_CanFollowKeyword(prevWord);
1111 break;
1113 case SCE_RB_WORD_DEMOTED:
1114 preferRE = true;
1115 break;
1117 case SCE_RB_IDENTIFIER:
1118 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1119 preferRE = true;
1120 } else if (isEOLChar(ch)) {
1121 preferRE = true;
1122 } else {
1123 preferRE = false;
1125 break;
1126 default:
1127 preferRE = false;
1129 if (ch == '.') {
1130 // We might be redefining an operator-method
1131 preferRE = false;
1133 // And if it's the first
1134 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1137 } else if (state == SCE_RB_NUMBER) {
1138 if (!is_real_number) {
1139 if (ch != '\\') {
1140 styler.ColourTo(i, state);
1141 state = SCE_RB_DEFAULT;
1142 preferRE = false;
1143 } else if (strchr("\\ntrfvaebs", chNext)) {
1144 // Terminal escape sequence -- handle it next time
1145 // Nothing more to do this time through the loop
1146 } else if (chNext == 'C' || chNext == 'M') {
1147 if (chNext2 != '-') {
1148 // \C or \M ends the sequence -- handle it next time
1149 } else {
1150 // Move from abc?\C-x
1151 // ^
1152 // to
1153 // ^
1154 i += 2;
1155 ch = chNext2;
1156 chNext = styler.SafeGetCharAt(i + 1);
1158 } else if (chNext == 'c') {
1159 // Stay here, \c is a combining sequence
1160 advance_char(i, ch, chNext, chNext2); // pass by ref
1161 } else {
1162 // ?\x, including ?\\ is final.
1163 styler.ColourTo(i + 1, state);
1164 state = SCE_RB_DEFAULT;
1165 preferRE = false;
1166 advance_char(i, ch, chNext, chNext2);
1168 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1169 // Keep going
1170 } else if (ch == '.' && chNext == '.') {
1171 ++numDots;
1172 styler.ColourTo(i - 1, state);
1173 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1174 } else if (ch == '.' && ++numDots == 1) {
1175 // Keep going
1176 } else {
1177 styler.ColourTo(i - 1, state);
1178 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1179 preferRE = false;
1181 } else if (state == SCE_RB_COMMENTLINE) {
1182 if (isEOLChar(ch)) {
1183 styler.ColourTo(i - 1, state);
1184 state = SCE_RB_DEFAULT;
1185 // Use whatever setting we had going into the comment
1187 } else if (state == SCE_RB_HERE_DELIM) {
1188 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1189 // Slightly different: if we find an immediate '-',
1190 // the target can appear indented.
1192 if (HereDoc.State == 0) { // '<<' encountered
1193 HereDoc.State = 1;
1194 HereDoc.DelimiterLength = 0;
1195 if (ch == '-') {
1196 HereDoc.CanBeIndented = true;
1197 advance_char(i, ch, chNext, chNext2); // pass by ref
1198 } else {
1199 HereDoc.CanBeIndented = false;
1201 if (isEOLChar(ch)) {
1202 // Bail out of doing a here doc if there's no target
1203 state = SCE_RB_DEFAULT;
1204 preferRE = false;
1205 } else {
1206 HereDoc.Quote = ch;
1208 if (ch == '\'' || ch == '"' || ch == '`') {
1209 HereDoc.Quoted = true;
1210 HereDoc.Delimiter[0] = '\0';
1211 } else {
1212 HereDoc.Quoted = false;
1213 HereDoc.Delimiter[0] = ch;
1214 HereDoc.Delimiter[1] = '\0';
1215 HereDoc.DelimiterLength = 1;
1218 } else if (HereDoc.State == 1) { // collect the delimiter
1219 if (isEOLChar(ch)) {
1220 // End the quote now, and go back for more
1221 styler.ColourTo(i - 1, state);
1222 state = SCE_RB_DEFAULT;
1223 i--;
1224 chNext = ch;
1225 preferRE = false;
1226 } else if (HereDoc.Quoted) {
1227 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1228 styler.ColourTo(i, state);
1229 state = SCE_RB_DEFAULT;
1230 preferRE = false;
1231 } else {
1232 if (ch == '\\' && !isEOLChar(chNext)) {
1233 advance_char(i, ch, chNext, chNext2);
1235 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1236 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1238 } else { // an unquoted here-doc delimiter
1239 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1240 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1241 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1242 } else {
1243 styler.ColourTo(i - 1, state);
1244 redo_char(i, ch, chNext, chNext2, state);
1245 preferRE = false;
1248 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1249 styler.ColourTo(i - 1, state);
1250 state = SCE_RB_ERROR;
1251 preferRE = false;
1254 } else if (state == SCE_RB_HERE_Q) {
1255 // Not needed: HereDoc.State == 2
1256 // Indentable here docs: look backwards
1257 // Non-indentable: look forwards, like in Perl
1259 // Why: so we can quickly resolve things like <<-" abc"
1261 if (!HereDoc.CanBeIndented) {
1262 if (isEOLChar(chPrev)
1263 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1264 styler.ColourTo(i - 1, state);
1265 i += HereDoc.DelimiterLength - 1;
1266 chNext = styler.SafeGetCharAt(i + 1);
1267 if (isEOLChar(chNext)) {
1268 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1269 state = SCE_RB_DEFAULT;
1270 HereDoc.State = 0;
1271 preferRE = false;
1273 // Otherwise we skipped through the here doc faster.
1275 } else if (isEOLChar(chNext)
1276 && lookingAtHereDocDelim(styler,
1277 i - HereDoc.DelimiterLength + 1,
1278 lengthDoc,
1279 HereDoc.Delimiter)) {
1280 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1281 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1282 state = SCE_RB_DEFAULT;
1283 preferRE = false;
1284 HereDoc.State = 0;
1286 } else if (state == SCE_RB_CLASS_VAR
1287 || state == SCE_RB_INSTANCE_VAR
1288 || state == SCE_RB_SYMBOL) {
1289 if (state == SCE_RB_SYMBOL &&
1290 // FIDs suffices '?' and '!'
1291 (((ch == '!' || ch == '?') && chNext != '=') ||
1292 // identifier suffix '='
1293 (ch == '=' && (chNext != '~' && chNext != '>' &&
1294 (chNext != '=' || chNext2 == '>'))))) {
1295 styler.ColourTo(i, state);
1296 state = SCE_RB_DEFAULT;
1297 preferRE = false;
1298 } else if (!isSafeWordcharOrHigh(ch)) {
1299 styler.ColourTo(i - 1, state);
1300 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1301 preferRE = false;
1303 } else if (state == SCE_RB_GLOBAL) {
1304 if (!isSafeWordcharOrHigh(ch)) {
1305 // handle special globals here as well
1306 if (chPrev == '$') {
1307 if (ch == '-') {
1308 // Include the next char, like $-a
1309 advance_char(i, ch, chNext, chNext2);
1311 styler.ColourTo(i, state);
1312 state = SCE_RB_DEFAULT;
1313 } else {
1314 styler.ColourTo(i - 1, state);
1315 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1317 preferRE = false;
1319 } else if (state == SCE_RB_POD) {
1320 // PODs end with ^=end\s, -- any whitespace can follow =end
1321 if (strchr(" \t\n\r", ch) != NULL
1322 && i > 5
1323 && isEOLChar(styler[i - 5])
1324 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1325 styler.ColourTo(i - 1, state);
1326 state = SCE_RB_DEFAULT;
1327 preferRE = false;
1329 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1330 if (ch == '\\' && Quote.Up != '\\') {
1331 // Skip one
1332 advance_char(i, ch, chNext, chNext2);
1333 } else if (ch == Quote.Down) {
1334 Quote.Count--;
1335 if (Quote.Count == 0) {
1336 // Include the options
1337 while (isSafeAlpha(chNext)) {
1338 i++;
1339 ch = chNext;
1340 chNext = styler.SafeGetCharAt(i + 1);
1342 styler.ColourTo(i, state);
1343 state = SCE_RB_DEFAULT;
1344 preferRE = false;
1346 } else if (ch == Quote.Up) {
1347 // Only if close quoter != open quoter
1348 Quote.Count++;
1350 } else if (ch == '#') {
1351 if (chNext == '{'
1352 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1353 // process #{ ... }
1354 styler.ColourTo(i - 1, state);
1355 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1356 enterInnerExpression(inner_string_types,
1357 inner_expn_brace_counts,
1358 inner_quotes,
1359 inner_string_count,
1360 state,
1361 brace_counts,
1362 Quote);
1363 preferRE = true;
1364 // Skip one
1365 advance_char(i, ch, chNext, chNext2);
1366 } else {
1367 //todo: distinguish comments from pound chars
1368 // for now, handle as comment
1369 styler.ColourTo(i - 1, state);
1370 bool inEscape = false;
1371 while (++i < lengthDoc) {
1372 ch = styler.SafeGetCharAt(i);
1373 if (ch == '\\') {
1374 inEscape = true;
1375 } else if (isEOLChar(ch)) {
1376 // Comment inside a regex
1377 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1378 break;
1379 } else if (inEscape) {
1380 inEscape = false; // don't look at char
1381 } else if (ch == Quote.Down) {
1382 // Have the regular handler deal with this
1383 // to get trailing modifiers.
1384 i--;
1385 ch = styler[i];
1386 break;
1389 chNext = styler.SafeGetCharAt(i + 1);
1392 // Quotes of all kinds...
1393 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1394 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1395 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1396 state == SCE_RB_BACKTICKS) {
1397 if (!Quote.Down && !isspacechar(ch)) {
1398 Quote.Open(ch);
1399 } else if (ch == '\\' && Quote.Up != '\\') {
1400 //Riddle me this: Is it safe to skip *every* escaped char?
1401 advance_char(i, ch, chNext, chNext2);
1402 } else if (ch == Quote.Down) {
1403 Quote.Count--;
1404 if (Quote.Count == 0) {
1405 styler.ColourTo(i, state);
1406 state = SCE_RB_DEFAULT;
1407 preferRE = false;
1409 } else if (ch == Quote.Up) {
1410 Quote.Count++;
1411 } else if (ch == '#' && chNext == '{'
1412 && inner_string_count < INNER_STRINGS_MAX_COUNT
1413 && state != SCE_RB_CHARACTER
1414 && state != SCE_RB_STRING_Q) {
1415 // process #{ ... }
1416 styler.ColourTo(i - 1, state);
1417 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1418 enterInnerExpression(inner_string_types,
1419 inner_expn_brace_counts,
1420 inner_quotes,
1421 inner_string_count,
1422 state,
1423 brace_counts,
1424 Quote);
1425 preferRE = true;
1426 // Skip one
1427 advance_char(i, ch, chNext, chNext2);
1431 if (state == SCE_RB_ERROR) {
1432 break;
1434 chPrev = ch;
1436 if (state == SCE_RB_WORD) {
1437 // We've ended on a word, possibly at EOF, and need to
1438 // classify it.
1439 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1440 } else {
1441 styler.ColourTo(lengthDoc - 1, state);
1445 // Helper functions for folding, disambiguation keywords
1446 // Assert that there are no high-bit chars
1448 static void getPrevWord(int pos,
1449 char *prevWord,
1450 Accessor &styler,
1451 int word_state)
1453 int i;
1454 styler.Flush();
1455 for (i = pos - 1; i > 0; i--) {
1456 if (actual_style(styler.StyleAt(i)) != word_state) {
1457 i++;
1458 break;
1461 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1462 i = pos - MAX_KEYWORD_LENGTH;
1463 char *dst = prevWord;
1464 for (; i <= pos; i++) {
1465 *dst++ = styler[i];
1467 *dst = 0;
1470 static bool keywordIsAmbiguous(const char *prevWord)
1472 // Order from most likely used to least likely
1473 // Lots of ways to do a loop in Ruby besides 'while/until'
1474 if (!strcmp(prevWord, "if")
1475 || !strcmp(prevWord, "do")
1476 || !strcmp(prevWord, "while")
1477 || !strcmp(prevWord, "unless")
1478 || !strcmp(prevWord, "until")
1479 || !strcmp(prevWord, "for")) {
1480 return true;
1481 } else {
1482 return false;
1486 // Demote keywords in the following conditions:
1487 // if, while, unless, until modify a statement
1488 // do after a while or until, as a noise word (like then after if)
1490 static bool keywordIsModifier(const char *word,
1491 int pos,
1492 Accessor &styler)
1494 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1495 return keywordDoStartsLoop(pos, styler);
1497 char ch, chPrev, chPrev2;
1498 int style = SCE_RB_DEFAULT;
1499 int lineStart = styler.GetLine(pos);
1500 int lineStartPosn = styler.LineStart(lineStart);
1501 // We want to step backwards until we don't care about the current
1502 // position. But first move lineStartPosn back behind any
1503 // continuations immediately above word.
1504 while (lineStartPosn > 0) {
1505 ch = styler[lineStartPosn-1];
1506 if (ch == '\n' || ch == '\r') {
1507 chPrev = styler.SafeGetCharAt(lineStartPosn-2);
1508 chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1509 lineStart = styler.GetLine(lineStartPosn-1);
1510 // If we find a continuation line, include it in our analysis.
1511 if (chPrev == '\\') {
1512 lineStartPosn = styler.LineStart(lineStart);
1513 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1514 lineStartPosn = styler.LineStart(lineStart);
1515 } else {
1516 break;
1518 } else {
1519 break;
1523 styler.Flush();
1524 while (--pos >= lineStartPosn) {
1525 style = actual_style(styler.StyleAt(pos));
1526 if (style == SCE_RB_DEFAULT) {
1527 if (iswhitespace(ch = styler[pos])) {
1528 //continue
1529 } else if (ch == '\r' || ch == '\n') {
1530 // Scintilla's LineStart() and GetLine() routines aren't
1531 // platform-independent, so if we have text prepared with
1532 // a different system we can't rely on it.
1534 // Also, lineStartPosn may have been moved to more than one
1535 // line above word's line while pushing past continuations.
1536 chPrev = styler.SafeGetCharAt(pos - 1);
1537 chPrev2 = styler.SafeGetCharAt(pos - 2);
1538 if (chPrev == '\\') {
1539 pos-=1; // gloss over the "\\"
1540 //continue
1541 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1542 pos-=2; // gloss over the "\\\r"
1543 //continue
1544 } else {
1545 return false;
1548 } else {
1549 break;
1552 if (pos < lineStartPosn) {
1553 return false;
1555 // First things where the action is unambiguous
1556 switch (style) {
1557 case SCE_RB_DEFAULT:
1558 case SCE_RB_COMMENTLINE:
1559 case SCE_RB_POD:
1560 case SCE_RB_CLASSNAME:
1561 case SCE_RB_DEFNAME:
1562 case SCE_RB_MODULE_NAME:
1563 return false;
1564 case SCE_RB_OPERATOR:
1565 break;
1566 case SCE_RB_WORD:
1567 // Watch out for uses of 'else if'
1568 //XXX: Make a list of other keywords where 'if' isn't a modifier
1569 // and can appear legitimately
1570 // Formulate this to avoid warnings from most compilers
1571 if (strcmp(word, "if") == 0) {
1572 char prevWord[MAX_KEYWORD_LENGTH + 1];
1573 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1574 return strcmp(prevWord, "else") != 0;
1576 return true;
1577 default:
1578 return true;
1580 // Assume that if the keyword follows an operator,
1581 // usually it's a block assignment, like
1582 // a << if x then y else z
1584 ch = styler[pos];
1585 switch (ch) {
1586 case ')':
1587 case ']':
1588 case '}':
1589 return true;
1590 default:
1591 return false;
1595 #define WHILE_BACKWARDS "elihw"
1596 #define UNTIL_BACKWARDS "litnu"
1597 #define FOR_BACKWARDS "rof"
1599 // Nothing fancy -- look to see if we follow a while/until somewhere
1600 // on the current line
1602 static bool keywordDoStartsLoop(int pos,
1603 Accessor &styler)
1605 char ch;
1606 int style;
1607 int lineStart = styler.GetLine(pos);
1608 int lineStartPosn = styler.LineStart(lineStart);
1609 styler.Flush();
1610 while (--pos >= lineStartPosn) {
1611 style = actual_style(styler.StyleAt(pos));
1612 if (style == SCE_RB_DEFAULT) {
1613 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1614 // Scintilla's LineStart() and GetLine() routines aren't
1615 // platform-independent, so if we have text prepared with
1616 // a different system we can't rely on it.
1617 return false;
1619 } else if (style == SCE_RB_WORD) {
1620 // Check for while or until, but write the word in backwards
1621 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1622 char *dst = prevWord;
1623 int wordLen = 0;
1624 int start_word;
1625 for (start_word = pos;
1626 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1627 start_word--) {
1628 if (++wordLen < MAX_KEYWORD_LENGTH) {
1629 *dst++ = styler[start_word];
1632 *dst = 0;
1633 // Did we see our keyword?
1634 if (!strcmp(prevWord, WHILE_BACKWARDS)
1635 || !strcmp(prevWord, UNTIL_BACKWARDS)
1636 || !strcmp(prevWord, FOR_BACKWARDS)) {
1637 return true;
1639 // We can move pos to the beginning of the keyword, and then
1640 // accept another decrement, as we can never have two contiguous
1641 // keywords:
1642 // word1 word2
1643 // ^
1644 // <- move to start_word
1645 // ^
1646 // <- loop decrement
1647 // ^ # pointing to end of word1 is fine
1648 pos = start_word;
1651 return false;
1654 static bool IsCommentLine(int line, Accessor &styler) {
1655 int pos = styler.LineStart(line);
1656 int eol_pos = styler.LineStart(line + 1) - 1;
1657 for (int i = pos; i < eol_pos; i++) {
1658 char ch = styler[i];
1659 if (ch == '#')
1660 return true;
1661 else if (ch != ' ' && ch != '\t')
1662 return false;
1664 return false;
1668 * Folding Ruby
1670 * The language is quite complex to analyze without a full parse.
1671 * For example, this line shouldn't affect fold level:
1673 * print "hello" if feeling_friendly?
1675 * Neither should this:
1677 * print "hello" \
1678 * if feeling_friendly?
1681 * But this should:
1683 * if feeling_friendly? #++
1684 * print "hello" \
1685 * print "goodbye"
1686 * end #--
1688 * So we cheat, by actually looking at the existing indentation
1689 * levels for each line, and just echoing it back. Like Python.
1690 * Then if we get better at it, we'll take braces into consideration,
1691 * which always affect folding levels.
1693 * How the keywords should work:
1694 * No effect:
1695 * __FILE__ __LINE__ BEGIN END alias and
1696 * defined? false in nil not or self super then
1697 * true undef
1699 * Always increment:
1700 * begin class def do for module when {
1702 * Always decrement:
1703 * end }
1705 * Increment if these start a statement
1706 * if unless until while -- do nothing if they're modifiers
1708 * These end a block if there's no modifier, but don't bother
1709 * break next redo retry return yield
1711 * These temporarily de-indent, but re-indent
1712 * case else elsif ensure rescue
1714 * This means that the folder reflects indentation rather
1715 * than setting it. The language-service updates indentation
1716 * when users type return and finishes entering de-denters.
1718 * Later offer to fold POD, here-docs, strings, and blocks of comments
1721 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1722 WordList *[], Accessor &styler) {
1723 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1724 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1726 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1727 false);
1728 unsigned int endPos = startPos + length;
1729 int visibleChars = 0;
1730 int lineCurrent = styler.GetLine(startPos);
1731 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1732 & SC_FOLDLEVELNUMBERMASK
1733 & ~SC_FOLDLEVELBASE);
1734 int levelCurrent = levelPrev;
1735 char chNext = styler[startPos];
1736 int styleNext = styler.StyleAt(startPos);
1737 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1738 bool buffer_ends_with_eol = false;
1739 for (unsigned int i = startPos; i < endPos; i++) {
1740 char ch = chNext;
1741 chNext = styler.SafeGetCharAt(i + 1);
1742 int style = styleNext;
1743 styleNext = styler.StyleAt(i + 1);
1744 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1746 /*Mutiline comment patch*/
1747 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1748 if (!IsCommentLine(lineCurrent - 1, styler)
1749 && IsCommentLine(lineCurrent + 1, styler))
1750 levelCurrent++;
1751 else if (IsCommentLine(lineCurrent - 1, styler)
1752 && !IsCommentLine(lineCurrent + 1, styler))
1753 levelCurrent--;
1756 if (style == SCE_RB_COMMENTLINE) {
1757 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1758 if (chNext == '{') {
1759 levelCurrent++;
1760 } else if (chNext == '}' && levelCurrent > 0) {
1761 levelCurrent--;
1764 } else if (style == SCE_RB_OPERATOR) {
1765 if (strchr("[{(", ch)) {
1766 levelCurrent++;
1767 } else if (strchr(")}]", ch)) {
1768 // Don't decrement below 0
1769 if (levelCurrent > 0)
1770 levelCurrent--;
1772 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1773 // Look at the keyword on the left and decide what to do
1774 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1775 prevWord[0] = 0;
1776 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1777 if (!strcmp(prevWord, "end")) {
1778 // Don't decrement below 0
1779 if (levelCurrent > 0)
1780 levelCurrent--;
1781 } else if (!strcmp(prevWord, "if")
1782 || !strcmp(prevWord, "def")
1783 || !strcmp(prevWord, "class")
1784 || !strcmp(prevWord, "module")
1785 || !strcmp(prevWord, "begin")
1786 || !strcmp(prevWord, "case")
1787 || !strcmp(prevWord, "do")
1788 || !strcmp(prevWord, "while")
1789 || !strcmp(prevWord, "unless")
1790 || !strcmp(prevWord, "until")
1791 || !strcmp(prevWord, "for")
1793 levelCurrent++;
1795 } else if (style == SCE_RB_HERE_DELIM) {
1796 if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1797 levelCurrent++;
1798 } else if (styleNext == SCE_RB_DEFAULT) {
1799 levelCurrent--;
1802 if (atEOL) {
1803 int lev = levelPrev;
1804 if (visibleChars == 0 && foldCompact)
1805 lev |= SC_FOLDLEVELWHITEFLAG;
1806 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1807 lev |= SC_FOLDLEVELHEADERFLAG;
1808 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1809 lineCurrent++;
1810 levelPrev = levelCurrent;
1811 visibleChars = 0;
1812 buffer_ends_with_eol = true;
1813 } else if (!isspacechar(ch)) {
1814 visibleChars++;
1815 buffer_ends_with_eol = false;
1817 stylePrev = style;
1819 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1820 if (!buffer_ends_with_eol) {
1821 lineCurrent++;
1822 int new_lev = levelCurrent;
1823 if (visibleChars == 0 && foldCompact)
1824 new_lev |= SC_FOLDLEVELWHITEFLAG;
1825 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1826 new_lev |= SC_FOLDLEVELHEADERFLAG;
1827 levelCurrent = new_lev;
1829 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1832 static const char *const rubyWordListDesc[] = {
1833 "Keywords",
1837 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);