Return GdkColor via out parameter rather than return value
[geany-mirror.git] / scintilla / lexers / LexRuby.cxx
blob42f30fd2a1544932022a00cb932ff1b968544c73
1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
30 //XXX Identical to Perl, put in common area
31 static inline bool isEOLChar(char ch) {
32 return (ch == '\r') || (ch == '\n');
35 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
36 // This one's redundant, but makes for more readable code
37 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
39 static inline bool isSafeAlpha(char ch) {
40 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
43 static inline bool isSafeAlnum(char ch) {
44 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
47 static inline bool isSafeAlnumOrHigh(char ch) {
48 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
51 static inline bool isSafeDigit(char ch) {
52 return isSafeASCII(ch) && isdigit(ch);
55 static inline bool isSafeWordcharOrHigh(char ch) {
56 // Error: scintilla's KeyWords.h includes '.' as a word-char
57 // we want to separate things that can take methods from the
58 // methods.
59 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
62 static bool inline iswhitespace(char ch) {
63 return ch == ' ' || ch == '\t';
66 #define MAX_KEYWORD_LENGTH 200
68 #define STYLE_MASK 63
69 #define actual_style(style) (style & STYLE_MASK)
71 static bool followsDot(Sci_PositionU pos, Accessor &styler) {
72 styler.Flush();
73 for (; pos >= 1; --pos) {
74 int style = actual_style(styler.StyleAt(pos));
75 char ch;
76 switch (style) {
77 case SCE_RB_DEFAULT:
78 ch = styler[pos];
79 if (ch == ' ' || ch == '\t') {
80 //continue
81 } else {
82 return false;
84 break;
86 case SCE_RB_OPERATOR:
87 return styler[pos] == '.';
89 default:
90 return false;
93 return false;
96 // Forward declarations
97 static bool keywordIsAmbiguous(const char *prevWord);
98 static bool keywordDoStartsLoop(Sci_Position pos,
99 Accessor &styler);
100 static bool keywordIsModifier(const char *word,
101 Sci_Position pos,
102 Accessor &styler);
104 static int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord) {
105 char s[MAX_KEYWORD_LENGTH];
106 Sci_PositionU i, j;
107 Sci_PositionU lim = end - start + 1; // num chars to copy
108 if (lim >= MAX_KEYWORD_LENGTH) {
109 lim = MAX_KEYWORD_LENGTH - 1;
111 for (i = start, j = 0; j < lim; i++, j++) {
112 s[j] = styler[i];
114 s[j] = '\0';
115 int chAttr;
116 if (0 == strcmp(prevWord, "class"))
117 chAttr = SCE_RB_CLASSNAME;
118 else if (0 == strcmp(prevWord, "module"))
119 chAttr = SCE_RB_MODULE_NAME;
120 else if (0 == strcmp(prevWord, "def"))
121 chAttr = SCE_RB_DEFNAME;
122 else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
123 if (keywordIsAmbiguous(s)
124 && keywordIsModifier(s, start, styler)) {
126 // Demoted keywords are colored as keywords,
127 // but do not affect changes in indentation.
129 // Consider the word 'if':
130 // 1. <<if test ...>> : normal
131 // 2. <<stmt if test>> : demoted
132 // 3. <<lhs = if ...>> : normal: start a new indent level
133 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
135 chAttr = SCE_RB_WORD_DEMOTED;
136 } else {
137 chAttr = SCE_RB_WORD;
139 } else
140 chAttr = SCE_RB_IDENTIFIER;
141 styler.ColourTo(end, chAttr);
142 if (chAttr == SCE_RB_WORD) {
143 strcpy(prevWord, s);
144 } else {
145 prevWord[0] = 0;
147 return chAttr;
151 //XXX Identical to Perl, put in common area
152 static bool isMatch(Accessor &styler, Sci_Position lengthDoc, Sci_Position pos, const char *val) {
153 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
154 return false;
156 while (*val) {
157 if (*val != styler[pos++]) {
158 return false;
160 val++;
162 return true;
165 // Do Ruby better -- find the end of the line, work back,
166 // and then check for leading white space
168 // Precondition: the here-doc target can be indented
169 static bool lookingAtHereDocDelim(Accessor &styler,
170 Sci_Position pos,
171 Sci_Position lengthDoc,
172 const char *HereDocDelim)
174 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
175 return false;
177 while (--pos > 0) {
178 char ch = styler[pos];
179 if (isEOLChar(ch)) {
180 return true;
181 } else if (ch != ' ' && ch != '\t') {
182 return false;
185 return false;
188 //XXX Identical to Perl, put in common area
189 static char opposite(char ch) {
190 if (ch == '(')
191 return ')';
192 if (ch == '[')
193 return ']';
194 if (ch == '{')
195 return '}';
196 if (ch == '<')
197 return '>';
198 return ch;
201 // Null transitions when we see we've reached the end
202 // and need to relex the curr char.
204 static void redo_char(Sci_Position &i, char &ch, char &chNext, char &chNext2,
205 int &state) {
206 i--;
207 chNext2 = chNext;
208 chNext = ch;
209 state = SCE_RB_DEFAULT;
212 static void advance_char(Sci_Position &i, char &ch, char &chNext, char &chNext2) {
213 i++;
214 ch = chNext;
215 chNext = chNext2;
218 // precondition: startPos points to one after the EOL char
219 static bool currLineContainsHereDelims(Sci_Position &startPos,
220 Accessor &styler) {
221 if (startPos <= 1)
222 return false;
224 Sci_Position pos;
225 for (pos = startPos - 1; pos > 0; pos--) {
226 char ch = styler.SafeGetCharAt(pos);
227 if (isEOLChar(ch)) {
228 // Leave the pointers where they are -- there are no
229 // here doc delims on the current line, even if
230 // the EOL isn't default style
232 return false;
233 } else {
234 styler.Flush();
235 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
236 break;
240 if (pos == 0) {
241 return false;
243 // Update the pointers so we don't have to re-analyze the string
244 startPos = pos;
245 return true;
248 // This class is used by the enter and exit methods, so it needs
249 // to be hoisted out of the function.
251 class QuoteCls {
252 public:
253 int Count;
254 char Up;
255 char Down;
256 QuoteCls() {
257 New();
259 void New() {
260 Count = 0;
261 Up = '\0';
262 Down = '\0';
264 void Open(char u) {
265 Count++;
266 Up = u;
267 Down = opposite(Up);
269 QuoteCls(const QuoteCls &q) {
270 // copy constructor -- use this for copying in
271 Count = q.Count;
272 Up = q.Up;
273 Down = q.Down;
275 QuoteCls &operator=(const QuoteCls &q) { // assignment constructor
276 if (this != &q) {
277 Count = q.Count;
278 Up = q.Up;
279 Down = q.Down;
281 return *this;
287 static void enterInnerExpression(int *p_inner_string_types,
288 int *p_inner_expn_brace_counts,
289 QuoteCls *p_inner_quotes,
290 int &inner_string_count,
291 int &state,
292 int &brace_counts,
293 QuoteCls curr_quote
295 p_inner_string_types[inner_string_count] = state;
296 state = SCE_RB_DEFAULT;
297 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
298 brace_counts = 0;
299 p_inner_quotes[inner_string_count] = curr_quote;
300 ++inner_string_count;
303 static void exitInnerExpression(int *p_inner_string_types,
304 int *p_inner_expn_brace_counts,
305 QuoteCls *p_inner_quotes,
306 int &inner_string_count,
307 int &state,
308 int &brace_counts,
309 QuoteCls &curr_quote
311 --inner_string_count;
312 state = p_inner_string_types[inner_string_count];
313 brace_counts = p_inner_expn_brace_counts[inner_string_count];
314 curr_quote = p_inner_quotes[inner_string_count];
317 static bool isEmptyLine(Sci_Position pos,
318 Accessor &styler) {
319 int spaceFlags = 0;
320 Sci_Position lineCurrent = styler.GetLine(pos);
321 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
322 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
325 static bool RE_CanFollowKeyword(const char *keyword) {
326 if (!strcmp(keyword, "and")
327 || !strcmp(keyword, "begin")
328 || !strcmp(keyword, "break")
329 || !strcmp(keyword, "case")
330 || !strcmp(keyword, "do")
331 || !strcmp(keyword, "else")
332 || !strcmp(keyword, "elsif")
333 || !strcmp(keyword, "if")
334 || !strcmp(keyword, "next")
335 || !strcmp(keyword, "return")
336 || !strcmp(keyword, "when")
337 || !strcmp(keyword, "unless")
338 || !strcmp(keyword, "until")
339 || !strcmp(keyword, "not")
340 || !strcmp(keyword, "or")) {
341 return true;
343 return false;
346 // Look at chars up to but not including endPos
347 // Don't look at styles in case we're looking forward
349 static int skipWhitespace(Sci_Position startPos,
350 Sci_Position endPos,
351 Accessor &styler) {
352 for (Sci_Position i = startPos; i < endPos; i++) {
353 if (!iswhitespace(styler[i])) {
354 return i;
357 return endPos;
360 // This routine looks for false positives like
361 // undef foo, <<
362 // There aren't too many.
364 // iPrev points to the start of <<
366 static bool sureThisIsHeredoc(Sci_Position iPrev,
367 Accessor &styler,
368 char *prevWord) {
370 // Not so fast, since Ruby's so dynamic. Check the context
371 // to make sure we're OK.
372 int prevStyle;
373 Sci_Position lineStart = styler.GetLine(iPrev);
374 Sci_Position lineStartPosn = styler.LineStart(lineStart);
375 styler.Flush();
377 // Find the first word after some whitespace
378 Sci_Position firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
379 if (firstWordPosn >= iPrev) {
380 // Have something like {^ <<}
381 //XXX Look at the first previous non-comment non-white line
382 // to establish the context. Not too likely though.
383 return true;
384 } else {
385 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
386 case SCE_RB_WORD:
387 case SCE_RB_WORD_DEMOTED:
388 case SCE_RB_IDENTIFIER:
389 break;
390 default:
391 return true;
394 Sci_Position firstWordEndPosn = firstWordPosn;
395 char *dst = prevWord;
396 for (;;) {
397 if (firstWordEndPosn >= iPrev ||
398 styler.StyleAt(firstWordEndPosn) != prevStyle) {
399 *dst = 0;
400 break;
402 *dst++ = styler[firstWordEndPosn];
403 firstWordEndPosn += 1;
405 //XXX Write a style-aware thing to regex scintilla buffer objects
406 if (!strcmp(prevWord, "undef")
407 || !strcmp(prevWord, "def")
408 || !strcmp(prevWord, "alias")) {
409 // These keywords are what we were looking for
410 return false;
412 return true;
415 // Routine that saves us from allocating a buffer for the here-doc target
416 // targetEndPos points one past the end of the current target
417 static bool haveTargetMatch(Sci_Position currPos,
418 Sci_Position lengthDoc,
419 Sci_Position targetStartPos,
420 Sci_Position targetEndPos,
421 Accessor &styler) {
422 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
423 return false;
425 Sci_Position i, j;
426 for (i = targetStartPos, j = currPos;
427 i < targetEndPos && j < lengthDoc;
428 i++, j++) {
429 if (styler[i] != styler[j]) {
430 return false;
433 return true;
436 // Finds the start position of the expression containing @p pos
437 // @p min_pos should be a known expression start, e.g. the start of the line
438 static Sci_Position findExpressionStart(Sci_Position pos,
439 Sci_Position min_pos,
440 Accessor &styler) {
441 int depth = 0;
442 for (; pos > min_pos; pos -= 1) {
443 int style = styler.StyleAt(pos - 1);
444 if (style == SCE_RB_OPERATOR) {
445 int ch = styler[pos - 1];
446 if (ch == '}' || ch == ')' || ch == ']') {
447 depth += 1;
448 } else if (ch == '{' || ch == '(' || ch == '[') {
449 if (depth == 0) {
450 break;
451 } else {
452 depth -= 1;
454 } else if (ch == ';' && depth == 0) {
455 break;
459 return pos;
462 // We need a check because the form
463 // [identifier] <<[target]
464 // is ambiguous. The Ruby lexer/parser resolves it by
465 // looking to see if [identifier] names a variable or a
466 // function. If it's the first, it's the start of a here-doc.
467 // If it's a var, it's an operator. This lexer doesn't
468 // maintain a symbol table, so it looks ahead to see what's
469 // going on, in cases where we have
470 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
472 // If there's no occurrence of [target] on a line, assume we don't.
474 // return true == yes, we have no heredocs
476 static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos,
477 Accessor &styler) {
478 int prevStyle;
479 // Use full document, not just part we're styling
480 Sci_Position lengthDoc = styler.Length();
481 Sci_Position lineStart = styler.GetLine(lt2StartPos);
482 Sci_Position lineStartPosn = styler.LineStart(lineStart);
483 styler.Flush();
484 const bool definitely_not_a_here_doc = true;
485 const bool looks_like_a_here_doc = false;
487 // find the expression start rather than the line start
488 Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler);
490 // Find the first word after some whitespace
491 Sci_Position firstWordPosn = skipWhitespace(exprStartPosn, lt2StartPos, styler);
492 if (firstWordPosn >= lt2StartPos) {
493 return definitely_not_a_here_doc;
495 prevStyle = styler.StyleAt(firstWordPosn);
496 // If we have '<<' following a keyword, it's not a heredoc
497 if (prevStyle != SCE_RB_IDENTIFIER
498 && prevStyle != SCE_RB_SYMBOL
499 && prevStyle != SCE_RB_INSTANCE_VAR
500 && prevStyle != SCE_RB_CLASS_VAR) {
501 return definitely_not_a_here_doc;
503 int newStyle = prevStyle;
504 // Some compilers incorrectly warn about uninit newStyle
505 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
506 // Inner loop looks at the name
507 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
508 newStyle = styler.StyleAt(firstWordPosn);
509 if (newStyle != prevStyle) {
510 break;
513 // Do we have '::' or '.'?
514 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
515 char ch = styler[firstWordPosn];
516 if (ch == '.') {
517 // yes
518 } else if (ch == ':') {
519 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
520 return definitely_not_a_here_doc;
521 } else if (styler[firstWordPosn] != ':') {
522 return definitely_not_a_here_doc;
524 } else {
525 break;
527 } else {
528 break;
530 // on second and next passes, only identifiers may appear since
531 // class and instance variable are private
532 prevStyle = SCE_RB_IDENTIFIER;
534 // Skip next batch of white-space
535 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
536 // possible symbol for an implicit hash argument
537 if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) {
538 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
539 if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) {
540 break;
543 // Skip next batch of white-space
544 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
546 if (firstWordPosn != lt2StartPos) {
547 // Have [[^ws[identifier]ws[*something_else*]ws<<
548 return definitely_not_a_here_doc;
550 // OK, now 'j' will point to the current spot moving ahead
551 Sci_Position j = firstWordPosn + 1;
552 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
553 // This shouldn't happen
554 return definitely_not_a_here_doc;
556 Sci_Position nextLineStartPosn = styler.LineStart(lineStart + 1);
557 if (nextLineStartPosn >= lengthDoc) {
558 return definitely_not_a_here_doc;
560 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
561 if (j >= lengthDoc) {
562 return definitely_not_a_here_doc;
564 bool allow_indent;
565 Sci_Position target_start, target_end;
566 // From this point on no more styling, since we're looking ahead
567 if (styler[j] == '-') {
568 allow_indent = true;
569 j++;
570 } else {
571 allow_indent = false;
574 // Allow for quoted targets.
575 char target_quote = 0;
576 switch (styler[j]) {
577 case '\'':
578 case '"':
579 case '`':
580 target_quote = styler[j];
581 j += 1;
584 if (isSafeAlnum(styler[j])) {
585 // Init target_end because some compilers think it won't
586 // be initialized by the time it's used
587 target_start = target_end = j;
588 j++;
589 } else {
590 return definitely_not_a_here_doc;
592 for (; j < lengthDoc; j++) {
593 if (!isSafeAlnum(styler[j])) {
594 if (target_quote && styler[j] != target_quote) {
595 // unquoted end
596 return definitely_not_a_here_doc;
599 // And for now make sure that it's a newline
600 // don't handle arbitrary expressions yet
602 target_end = j;
603 if (target_quote) {
604 // Now we can move to the character after the string delimiter.
605 j += 1;
607 j = skipWhitespace(j, lengthDoc, styler);
608 if (j >= lengthDoc) {
609 return definitely_not_a_here_doc;
610 } else {
611 char ch = styler[j];
612 if (ch == '#' || isEOLChar(ch)) {
613 // This is OK, so break and continue;
614 break;
615 } else {
616 return definitely_not_a_here_doc;
622 // Just look at the start of each line
623 Sci_Position last_line = styler.GetLine(lengthDoc - 1);
624 // But don't go too far
625 if (last_line > lineStart + 50) {
626 last_line = lineStart + 50;
628 for (Sci_Position line_num = lineStart + 1; line_num <= last_line; line_num++) {
629 if (allow_indent) {
630 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
631 } else {
632 j = styler.LineStart(line_num);
634 // target_end is one past the end
635 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
636 // We got it
637 return looks_like_a_here_doc;
640 return definitely_not_a_here_doc;
643 //todo: if we aren't looking at a stdio character,
644 // move to the start of the first line that is not in a
645 // multi-line construct
647 static void synchronizeDocStart(Sci_PositionU &startPos,
648 Sci_Position &length,
649 int &initStyle,
650 Accessor &styler,
651 bool skipWhiteSpace=false) {
653 styler.Flush();
654 int style = actual_style(styler.StyleAt(startPos));
655 switch (style) {
656 case SCE_RB_STDIN:
657 case SCE_RB_STDOUT:
658 case SCE_RB_STDERR:
659 // Don't do anything else with these.
660 return;
663 Sci_Position pos = startPos;
664 // Quick way to characterize each line
665 Sci_Position lineStart;
666 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
667 // Now look at the style before the previous line's EOL
668 pos = styler.LineStart(lineStart) - 1;
669 if (pos <= 10) {
670 lineStart = 0;
671 break;
673 char ch = styler.SafeGetCharAt(pos);
674 char chPrev = styler.SafeGetCharAt(pos - 1);
675 if (ch == '\n' && chPrev == '\r') {
676 pos--;
678 if (styler.SafeGetCharAt(pos - 1) == '\\') {
679 // Continuation line -- keep going
680 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
681 // Part of multi-line construct -- keep going
682 } else if (currLineContainsHereDelims(pos, styler)) {
683 // Keep going, with pos and length now pointing
684 // at the end of the here-doc delimiter
685 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
686 // Keep going
687 } else {
688 break;
691 pos = styler.LineStart(lineStart);
692 length += (startPos - pos);
693 startPos = pos;
694 initStyle = SCE_RB_DEFAULT;
697 static void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
698 WordList *keywordlists[], Accessor &styler) {
700 // Lexer for Ruby often has to backtrack to start of current style to determine
701 // which characters are being used as quotes, how deeply nested is the
702 // start position and what the termination string is for here documents
704 WordList &keywords = *keywordlists[0];
706 class HereDocCls {
707 public:
708 int State;
709 // States
710 // 0: '<<' encountered
711 // 1: collect the delimiter
712 // 1b: text between the end of the delimiter and the EOL
713 // 2: here doc text (lines after the delimiter)
714 char Quote; // the char after '<<'
715 bool Quoted; // true if Quote in ('\'','"','`')
716 int DelimiterLength; // strlen(Delimiter)
717 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
718 bool CanBeIndented;
719 HereDocCls() {
720 State = 0;
721 DelimiterLength = 0;
722 Delimiter[0] = '\0';
723 CanBeIndented = false;
726 HereDocCls HereDoc;
728 QuoteCls Quote;
730 int numDots = 0; // For numbers --
731 // Don't start lexing in the middle of a num
733 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
734 false);
736 bool preferRE = true;
737 int state = initStyle;
738 Sci_Position lengthDoc = startPos + length;
740 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
741 prevWord[0] = '\0';
742 if (length == 0)
743 return;
745 char chPrev = styler.SafeGetCharAt(startPos - 1);
746 char chNext = styler.SafeGetCharAt(startPos);
747 bool is_real_number = true; // Differentiate between constants and ?-sequences.
748 styler.StartAt(startPos);
749 styler.StartSegment(startPos);
751 static int q_states[] = {SCE_RB_STRING_Q,
752 SCE_RB_STRING_QQ,
753 SCE_RB_STRING_QR,
754 SCE_RB_STRING_QW,
755 SCE_RB_STRING_QW,
756 SCE_RB_STRING_QX
758 static const char *q_chars = "qQrwWx";
760 // In most cases a value of 2 should be ample for the code in the
761 // Ruby library, and the code the user is likely to enter.
762 // For example,
763 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
764 // if options[:verbose]
765 // from fileutils.rb nests to a level of 2
766 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
767 // string (including regex'es, %Q, %<sym>, %w, and other strings
768 // that interpolate), it will stay as a string. The problem with this
769 // is that quotes might flip, a 7th '#{' will look like a comment,
770 // and code-folding might be wrong.
772 // If anyone runs into this problem, I recommend raising this
773 // value slightly higher to replacing the fixed array with a linked
774 // list. Keep in mind this code will be called every time the lexer
775 // is invoked.
777 #define INNER_STRINGS_MAX_COUNT 5
778 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
779 int inner_string_types[INNER_STRINGS_MAX_COUNT];
780 // Track # braces when we push a new #{ thing
781 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
782 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
783 int inner_string_count = 0;
784 int brace_counts = 0; // Number of #{ ... } things within an expression
786 Sci_Position i;
787 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
788 inner_string_types[i] = 0;
789 inner_expn_brace_counts[i] = 0;
791 for (i = startPos; i < lengthDoc; i++) {
792 char ch = chNext;
793 chNext = styler.SafeGetCharAt(i + 1);
794 char chNext2 = styler.SafeGetCharAt(i + 2);
796 if (styler.IsLeadByte(ch)) {
797 chNext = chNext2;
798 chPrev = ' ';
799 i += 1;
800 continue;
803 // skip on DOS/Windows
804 //No, don't, because some things will get tagged on,
805 // so we won't recognize keywords, for example
806 #if 0
807 if (ch == '\r' && chNext == '\n') {
808 continue;
810 #endif
812 if (HereDoc.State == 1 && isEOLChar(ch)) {
813 // Begin of here-doc (the line after the here-doc delimiter):
814 HereDoc.State = 2;
815 styler.ColourTo(i-1, state);
816 // Don't check for a missing quote, just jump into
817 // the here-doc state
818 state = SCE_RB_HERE_Q;
821 // Regular transitions
822 if (state == SCE_RB_DEFAULT) {
823 if (isSafeDigit(ch)) {
824 styler.ColourTo(i - 1, state);
825 state = SCE_RB_NUMBER;
826 is_real_number = true;
827 numDots = 0;
828 } else if (isHighBitChar(ch) || iswordstart(ch)) {
829 styler.ColourTo(i - 1, state);
830 state = SCE_RB_WORD;
831 } else if (ch == '#') {
832 styler.ColourTo(i - 1, state);
833 state = SCE_RB_COMMENTLINE;
834 } else if (ch == '=') {
835 // =begin indicates the start of a comment (doc) block
836 if ((i == 0 || isEOLChar(chPrev))
837 && chNext == 'b'
838 && styler.SafeGetCharAt(i + 2) == 'e'
839 && styler.SafeGetCharAt(i + 3) == 'g'
840 && styler.SafeGetCharAt(i + 4) == 'i'
841 && styler.SafeGetCharAt(i + 5) == 'n'
842 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
843 styler.ColourTo(i - 1, state);
844 state = SCE_RB_POD;
845 } else {
846 styler.ColourTo(i - 1, state);
847 styler.ColourTo(i, SCE_RB_OPERATOR);
848 preferRE = true;
850 } else if (ch == '"') {
851 styler.ColourTo(i - 1, state);
852 state = SCE_RB_STRING;
853 Quote.New();
854 Quote.Open(ch);
855 } else if (ch == '\'') {
856 styler.ColourTo(i - 1, state);
857 state = SCE_RB_CHARACTER;
858 Quote.New();
859 Quote.Open(ch);
860 } else if (ch == '`') {
861 styler.ColourTo(i - 1, state);
862 state = SCE_RB_BACKTICKS;
863 Quote.New();
864 Quote.Open(ch);
865 } else if (ch == '@') {
866 // Instance or class var
867 styler.ColourTo(i - 1, state);
868 if (chNext == '@') {
869 state = SCE_RB_CLASS_VAR;
870 advance_char(i, ch, chNext, chNext2); // pass by ref
871 } else {
872 state = SCE_RB_INSTANCE_VAR;
874 } else if (ch == '$') {
875 // Check for a builtin global
876 styler.ColourTo(i - 1, state);
877 // Recognize it bit by bit
878 state = SCE_RB_GLOBAL;
879 } else if (ch == '/' && preferRE) {
880 // Ambigous operator
881 styler.ColourTo(i - 1, state);
882 state = SCE_RB_REGEX;
883 Quote.New();
884 Quote.Open(ch);
885 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
887 // Recognise the '<<' symbol - either a here document or a binary op
888 styler.ColourTo(i - 1, state);
889 i++;
890 chNext = chNext2;
891 styler.ColourTo(i, SCE_RB_OPERATOR);
893 if (!(strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
894 // It's definitely not a here-doc,
895 // based on Ruby's lexer/parser in the
896 // heredoc_identifier routine.
897 // Nothing else to do.
898 } else if (preferRE) {
899 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
900 state = SCE_RB_HERE_DELIM;
901 HereDoc.State = 0;
903 // else leave it in default state
904 } else {
905 if (sureThisIsNotHeredoc(i - 1, styler)) {
906 // leave state as default
907 // We don't have all the heuristics Perl has for indications
908 // of a here-doc, because '<<' is overloadable and used
909 // for so many other classes.
910 } else {
911 state = SCE_RB_HERE_DELIM;
912 HereDoc.State = 0;
915 preferRE = (state != SCE_RB_HERE_DELIM);
916 } else if (ch == ':') {
917 styler.ColourTo(i - 1, state);
918 if (chNext == ':') {
919 // Mark "::" as an operator, not symbol start
920 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
921 advance_char(i, ch, chNext, chNext2); // pass by ref
922 state = SCE_RB_DEFAULT;
923 preferRE = false;
924 } else if (isSafeWordcharOrHigh(chNext)) {
925 state = SCE_RB_SYMBOL;
926 } else if ((chNext == '@' || chNext == '$') &&
927 isSafeWordcharOrHigh(chNext2)) {
928 // instance and global variable followed by an identifier
929 advance_char(i, ch, chNext, chNext2);
930 state = SCE_RB_SYMBOL;
931 } else if (((chNext == '@' && chNext2 == '@') ||
932 (chNext == '$' && chNext2 == '-')) &&
933 isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) {
934 // class variables and special global variable "$-IDENTCHAR"
935 state = SCE_RB_SYMBOL;
936 // $-IDENTCHAR doesn't continue past the IDENTCHAR
937 if (chNext == '$') {
938 styler.ColourTo(i+3, SCE_RB_SYMBOL);
939 state = SCE_RB_DEFAULT;
941 i += 3;
942 ch = styler.SafeGetCharAt(i);
943 chNext = styler.SafeGetCharAt(i+1);
944 } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
945 // single-character special global variables
946 i += 2;
947 ch = chNext2;
948 chNext = styler.SafeGetCharAt(i+1);
949 styler.ColourTo(i, SCE_RB_SYMBOL);
950 state = SCE_RB_DEFAULT;
951 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
952 // Do the operator analysis in-line, looking ahead
953 // Based on the table in pickaxe 2nd ed., page 339
954 bool doColoring = true;
955 switch (chNext) {
956 case '[':
957 if (chNext2 == ']') {
958 char ch_tmp = styler.SafeGetCharAt(i + 3);
959 if (ch_tmp == '=') {
960 i += 3;
961 ch = ch_tmp;
962 chNext = styler.SafeGetCharAt(i + 1);
963 } else {
964 i += 2;
965 ch = chNext2;
966 chNext = ch_tmp;
968 } else {
969 doColoring = false;
971 break;
973 case '*':
974 if (chNext2 == '*') {
975 i += 2;
976 ch = chNext2;
977 chNext = styler.SafeGetCharAt(i + 1);
978 } else {
979 advance_char(i, ch, chNext, chNext2);
981 break;
983 case '!':
984 if (chNext2 == '=' || chNext2 == '~') {
985 i += 2;
986 ch = chNext2;
987 chNext = styler.SafeGetCharAt(i + 1);
988 } else {
989 advance_char(i, ch, chNext, chNext2);
991 break;
993 case '<':
994 if (chNext2 == '<') {
995 i += 2;
996 ch = chNext2;
997 chNext = styler.SafeGetCharAt(i + 1);
998 } else if (chNext2 == '=') {
999 char ch_tmp = styler.SafeGetCharAt(i + 3);
1000 if (ch_tmp == '>') { // <=> operator
1001 i += 3;
1002 ch = ch_tmp;
1003 chNext = styler.SafeGetCharAt(i + 1);
1004 } else {
1005 i += 2;
1006 ch = chNext2;
1007 chNext = ch_tmp;
1009 } else {
1010 advance_char(i, ch, chNext, chNext2);
1012 break;
1014 default:
1015 // Simple one-character operators
1016 advance_char(i, ch, chNext, chNext2);
1017 break;
1019 if (doColoring) {
1020 styler.ColourTo(i, SCE_RB_SYMBOL);
1021 state = SCE_RB_DEFAULT;
1023 } else if (!preferRE) {
1024 // Don't color symbol strings (yet)
1025 // Just color the ":" and color rest as string
1026 styler.ColourTo(i, SCE_RB_SYMBOL);
1027 state = SCE_RB_DEFAULT;
1028 } else {
1029 styler.ColourTo(i, SCE_RB_OPERATOR);
1030 state = SCE_RB_DEFAULT;
1031 preferRE = true;
1033 } else if (ch == '%') {
1034 styler.ColourTo(i - 1, state);
1035 bool have_string = false;
1036 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
1037 Quote.New();
1038 const char *hit = strchr(q_chars, chNext);
1039 if (hit != NULL) {
1040 state = q_states[hit - q_chars];
1041 Quote.Open(chNext2);
1042 i += 2;
1043 ch = chNext2;
1044 chNext = styler.SafeGetCharAt(i + 1);
1045 have_string = true;
1047 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1048 // Ruby doesn't allow high bit chars here,
1049 // but the editor host might
1050 Quote.New();
1051 state = SCE_RB_STRING_QQ;
1052 Quote.Open(chNext);
1053 advance_char(i, ch, chNext, chNext2); // pass by ref
1054 have_string = true;
1055 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1056 // Ruby doesn't allow high bit chars here,
1057 // but the editor host might
1058 Quote.New();
1059 state = SCE_RB_STRING_QQ;
1060 Quote.Open(chNext);
1061 advance_char(i, ch, chNext, chNext2); // pass by ref
1062 have_string = true;
1064 if (!have_string) {
1065 styler.ColourTo(i, SCE_RB_OPERATOR);
1066 // stay in default
1067 preferRE = true;
1069 } else if (ch == '?') {
1070 styler.ColourTo(i - 1, state);
1071 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1072 styler.ColourTo(i, SCE_RB_OPERATOR);
1073 } else {
1074 // It's the start of a character code escape sequence
1075 // Color it as a number.
1076 state = SCE_RB_NUMBER;
1077 is_real_number = false;
1079 } else if (isoperator(ch) || ch == '.') {
1080 styler.ColourTo(i - 1, state);
1081 styler.ColourTo(i, SCE_RB_OPERATOR);
1082 // If we're ending an expression or block,
1083 // assume it ends an object, and the ambivalent
1084 // constructs are binary operators
1086 // So if we don't have one of these chars,
1087 // we aren't ending an object exp'n, and ops
1088 // like : << / are unary operators.
1090 if (ch == '{') {
1091 ++brace_counts;
1092 preferRE = true;
1093 } else if (ch == '}' && --brace_counts < 0
1094 && inner_string_count > 0) {
1095 styler.ColourTo(i, SCE_RB_OPERATOR);
1096 exitInnerExpression(inner_string_types,
1097 inner_expn_brace_counts,
1098 inner_quotes,
1099 inner_string_count,
1100 state, brace_counts, Quote);
1101 } else {
1102 preferRE = (strchr(")}].", ch) == NULL);
1104 // Stay in default state
1105 } else if (isEOLChar(ch)) {
1106 // Make sure it's a true line-end, with no backslash
1107 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1108 && chPrev != '\\') {
1109 // Assume we've hit the end of the statement.
1110 preferRE = true;
1113 } else if (state == SCE_RB_WORD) {
1114 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1115 // Words include x? in all contexts,
1116 // and <letters>= after either 'def' or a dot
1117 // Move along until a complete word is on our left
1119 // Default accessor treats '.' as word-chars,
1120 // but we don't for now.
1122 if (ch == '='
1123 && isSafeWordcharOrHigh(chPrev)
1124 && (chNext == '('
1125 || strchr(" \t\n\r", chNext) != NULL)
1126 && (!strcmp(prevWord, "def")
1127 || followsDot(styler.GetStartSegment(), styler))) {
1128 // <name>= is a name only when being def'd -- Get it the next time
1129 // This means that <name>=<name> is always lexed as
1130 // <name>, (op, =), <name>
1131 } else if (ch == ':'
1132 && isSafeWordcharOrHigh(chPrev)
1133 && strchr(" \t\n\r", chNext) != NULL) {
1134 state = SCE_RB_SYMBOL;
1135 } else if ((ch == '?' || ch == '!')
1136 && isSafeWordcharOrHigh(chPrev)
1137 && !isSafeWordcharOrHigh(chNext)) {
1138 // <name>? is a name -- Get it the next time
1139 // But <name>?<name> is always lexed as
1140 // <name>, (op, ?), <name>
1141 // Same with <name>! to indicate a method that
1142 // modifies its target
1143 } else if (isEOLChar(ch)
1144 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1145 styler.ColourTo(i, SCE_RB_DATASECTION);
1146 state = SCE_RB_DATASECTION;
1147 // No need to handle this state -- we'll just move to the end
1148 preferRE = false;
1149 } else {
1150 Sci_Position wordStartPos = styler.GetStartSegment();
1151 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1152 switch (word_style) {
1153 case SCE_RB_WORD:
1154 preferRE = RE_CanFollowKeyword(prevWord);
1155 break;
1157 case SCE_RB_WORD_DEMOTED:
1158 preferRE = true;
1159 break;
1161 case SCE_RB_IDENTIFIER:
1162 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1163 preferRE = true;
1164 } else if (isEOLChar(ch)) {
1165 preferRE = true;
1166 } else {
1167 preferRE = false;
1169 break;
1170 default:
1171 preferRE = false;
1173 if (ch == '.') {
1174 // We might be redefining an operator-method
1175 preferRE = false;
1177 // And if it's the first
1178 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1181 } else if (state == SCE_RB_NUMBER) {
1182 if (!is_real_number) {
1183 if (ch != '\\') {
1184 styler.ColourTo(i, state);
1185 state = SCE_RB_DEFAULT;
1186 preferRE = false;
1187 } else if (strchr("\\ntrfvaebs", chNext)) {
1188 // Terminal escape sequence -- handle it next time
1189 // Nothing more to do this time through the loop
1190 } else if (chNext == 'C' || chNext == 'M') {
1191 if (chNext2 != '-') {
1192 // \C or \M ends the sequence -- handle it next time
1193 } else {
1194 // Move from abc?\C-x
1195 // ^
1196 // to
1197 // ^
1198 i += 2;
1199 ch = chNext2;
1200 chNext = styler.SafeGetCharAt(i + 1);
1202 } else if (chNext == 'c') {
1203 // Stay here, \c is a combining sequence
1204 advance_char(i, ch, chNext, chNext2); // pass by ref
1205 } else {
1206 // ?\x, including ?\\ is final.
1207 styler.ColourTo(i + 1, state);
1208 state = SCE_RB_DEFAULT;
1209 preferRE = false;
1210 advance_char(i, ch, chNext, chNext2);
1212 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1213 // Keep going
1214 } else if (ch == '.' && chNext == '.') {
1215 ++numDots;
1216 styler.ColourTo(i - 1, state);
1217 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1218 } else if (ch == '.' && ++numDots == 1) {
1219 // Keep going
1220 } else {
1221 styler.ColourTo(i - 1, state);
1222 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1223 preferRE = false;
1225 } else if (state == SCE_RB_COMMENTLINE) {
1226 if (isEOLChar(ch)) {
1227 styler.ColourTo(i - 1, state);
1228 state = SCE_RB_DEFAULT;
1229 // Use whatever setting we had going into the comment
1231 } else if (state == SCE_RB_HERE_DELIM) {
1232 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1233 // Slightly different: if we find an immediate '-',
1234 // the target can appear indented.
1236 if (HereDoc.State == 0) { // '<<' encountered
1237 HereDoc.State = 1;
1238 HereDoc.DelimiterLength = 0;
1239 if (ch == '-') {
1240 HereDoc.CanBeIndented = true;
1241 advance_char(i, ch, chNext, chNext2); // pass by ref
1242 } else {
1243 HereDoc.CanBeIndented = false;
1245 if (isEOLChar(ch)) {
1246 // Bail out of doing a here doc if there's no target
1247 state = SCE_RB_DEFAULT;
1248 preferRE = false;
1249 } else {
1250 HereDoc.Quote = ch;
1252 if (ch == '\'' || ch == '"' || ch == '`') {
1253 HereDoc.Quoted = true;
1254 HereDoc.Delimiter[0] = '\0';
1255 } else {
1256 HereDoc.Quoted = false;
1257 HereDoc.Delimiter[0] = ch;
1258 HereDoc.Delimiter[1] = '\0';
1259 HereDoc.DelimiterLength = 1;
1262 } else if (HereDoc.State == 1) { // collect the delimiter
1263 if (isEOLChar(ch)) {
1264 // End the quote now, and go back for more
1265 styler.ColourTo(i - 1, state);
1266 state = SCE_RB_DEFAULT;
1267 i--;
1268 chNext = ch;
1269 preferRE = false;
1270 } else if (HereDoc.Quoted) {
1271 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1272 styler.ColourTo(i, state);
1273 state = SCE_RB_DEFAULT;
1274 preferRE = false;
1275 } else {
1276 if (ch == '\\' && !isEOLChar(chNext)) {
1277 advance_char(i, ch, chNext, chNext2);
1279 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1280 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1282 } else { // an unquoted here-doc delimiter
1283 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1284 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1285 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1286 } else {
1287 styler.ColourTo(i - 1, state);
1288 redo_char(i, ch, chNext, chNext2, state);
1289 preferRE = false;
1292 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1293 styler.ColourTo(i - 1, state);
1294 state = SCE_RB_ERROR;
1295 preferRE = false;
1298 } else if (state == SCE_RB_HERE_Q) {
1299 // Not needed: HereDoc.State == 2
1300 // Indentable here docs: look backwards
1301 // Non-indentable: look forwards, like in Perl
1303 // Why: so we can quickly resolve things like <<-" abc"
1305 if (!HereDoc.CanBeIndented) {
1306 if (isEOLChar(chPrev)
1307 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1308 styler.ColourTo(i - 1, state);
1309 i += HereDoc.DelimiterLength - 1;
1310 chNext = styler.SafeGetCharAt(i + 1);
1311 if (isEOLChar(chNext)) {
1312 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1313 state = SCE_RB_DEFAULT;
1314 HereDoc.State = 0;
1315 preferRE = false;
1317 // Otherwise we skipped through the here doc faster.
1319 } else if (isEOLChar(chNext)
1320 && lookingAtHereDocDelim(styler,
1321 i - HereDoc.DelimiterLength + 1,
1322 lengthDoc,
1323 HereDoc.Delimiter)) {
1324 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1325 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1326 state = SCE_RB_DEFAULT;
1327 preferRE = false;
1328 HereDoc.State = 0;
1330 } else if (state == SCE_RB_CLASS_VAR
1331 || state == SCE_RB_INSTANCE_VAR
1332 || state == SCE_RB_SYMBOL) {
1333 if (state == SCE_RB_SYMBOL &&
1334 // FIDs suffices '?' and '!'
1335 (((ch == '!' || ch == '?') && chNext != '=') ||
1336 // identifier suffix '='
1337 (ch == '=' && (chNext != '~' && chNext != '>' &&
1338 (chNext != '=' || chNext2 == '>'))))) {
1339 styler.ColourTo(i, state);
1340 state = SCE_RB_DEFAULT;
1341 preferRE = false;
1342 } else if (!isSafeWordcharOrHigh(ch)) {
1343 styler.ColourTo(i - 1, state);
1344 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1345 preferRE = false;
1347 } else if (state == SCE_RB_GLOBAL) {
1348 if (!isSafeWordcharOrHigh(ch)) {
1349 // handle special globals here as well
1350 if (chPrev == '$') {
1351 if (ch == '-') {
1352 // Include the next char, like $-a
1353 advance_char(i, ch, chNext, chNext2);
1355 styler.ColourTo(i, state);
1356 state = SCE_RB_DEFAULT;
1357 } else {
1358 styler.ColourTo(i - 1, state);
1359 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1361 preferRE = false;
1363 } else if (state == SCE_RB_POD) {
1364 // PODs end with ^=end\s, -- any whitespace can follow =end
1365 if (strchr(" \t\n\r", ch) != NULL
1366 && i > 5
1367 && isEOLChar(styler[i - 5])
1368 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1369 styler.ColourTo(i - 1, state);
1370 state = SCE_RB_DEFAULT;
1371 preferRE = false;
1373 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1374 if (ch == '\\' && Quote.Up != '\\') {
1375 // Skip one
1376 advance_char(i, ch, chNext, chNext2);
1377 } else if (ch == Quote.Down) {
1378 Quote.Count--;
1379 if (Quote.Count == 0) {
1380 // Include the options
1381 while (isSafeAlpha(chNext)) {
1382 i++;
1383 ch = chNext;
1384 chNext = styler.SafeGetCharAt(i + 1);
1386 styler.ColourTo(i, state);
1387 state = SCE_RB_DEFAULT;
1388 preferRE = false;
1390 } else if (ch == Quote.Up) {
1391 // Only if close quoter != open quoter
1392 Quote.Count++;
1394 } else if (ch == '#') {
1395 if (chNext == '{'
1396 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1397 // process #{ ... }
1398 styler.ColourTo(i - 1, state);
1399 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1400 enterInnerExpression(inner_string_types,
1401 inner_expn_brace_counts,
1402 inner_quotes,
1403 inner_string_count,
1404 state,
1405 brace_counts,
1406 Quote);
1407 preferRE = true;
1408 // Skip one
1409 advance_char(i, ch, chNext, chNext2);
1410 } else {
1411 //todo: distinguish comments from pound chars
1412 // for now, handle as comment
1413 styler.ColourTo(i - 1, state);
1414 bool inEscape = false;
1415 while (++i < lengthDoc) {
1416 ch = styler.SafeGetCharAt(i);
1417 if (ch == '\\') {
1418 inEscape = true;
1419 } else if (isEOLChar(ch)) {
1420 // Comment inside a regex
1421 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1422 break;
1423 } else if (inEscape) {
1424 inEscape = false; // don't look at char
1425 } else if (ch == Quote.Down) {
1426 // Have the regular handler deal with this
1427 // to get trailing modifiers.
1428 i--;
1429 ch = styler[i];
1430 break;
1433 chNext = styler.SafeGetCharAt(i + 1);
1436 // Quotes of all kinds...
1437 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1438 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1439 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1440 state == SCE_RB_BACKTICKS) {
1441 if (!Quote.Down && !isspacechar(ch)) {
1442 Quote.Open(ch);
1443 } else if (ch == '\\' && Quote.Up != '\\') {
1444 //Riddle me this: Is it safe to skip *every* escaped char?
1445 advance_char(i, ch, chNext, chNext2);
1446 } else if (ch == Quote.Down) {
1447 Quote.Count--;
1448 if (Quote.Count == 0) {
1449 styler.ColourTo(i, state);
1450 state = SCE_RB_DEFAULT;
1451 preferRE = false;
1453 } else if (ch == Quote.Up) {
1454 Quote.Count++;
1455 } else if (ch == '#' && chNext == '{'
1456 && inner_string_count < INNER_STRINGS_MAX_COUNT
1457 && state != SCE_RB_CHARACTER
1458 && state != SCE_RB_STRING_Q) {
1459 // process #{ ... }
1460 styler.ColourTo(i - 1, state);
1461 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1462 enterInnerExpression(inner_string_types,
1463 inner_expn_brace_counts,
1464 inner_quotes,
1465 inner_string_count,
1466 state,
1467 brace_counts,
1468 Quote);
1469 preferRE = true;
1470 // Skip one
1471 advance_char(i, ch, chNext, chNext2);
1475 if (state == SCE_RB_ERROR) {
1476 break;
1478 chPrev = ch;
1480 if (state == SCE_RB_WORD) {
1481 // We've ended on a word, possibly at EOF, and need to
1482 // classify it.
1483 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1484 } else {
1485 styler.ColourTo(lengthDoc - 1, state);
1489 // Helper functions for folding, disambiguation keywords
1490 // Assert that there are no high-bit chars
1492 static void getPrevWord(Sci_Position pos,
1493 char *prevWord,
1494 Accessor &styler,
1495 int word_state)
1497 Sci_Position i;
1498 styler.Flush();
1499 for (i = pos - 1; i > 0; i--) {
1500 if (actual_style(styler.StyleAt(i)) != word_state) {
1501 i++;
1502 break;
1505 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1506 i = pos - MAX_KEYWORD_LENGTH;
1507 char *dst = prevWord;
1508 for (; i <= pos; i++) {
1509 *dst++ = styler[i];
1511 *dst = 0;
1514 static bool keywordIsAmbiguous(const char *prevWord)
1516 // Order from most likely used to least likely
1517 // Lots of ways to do a loop in Ruby besides 'while/until'
1518 if (!strcmp(prevWord, "if")
1519 || !strcmp(prevWord, "do")
1520 || !strcmp(prevWord, "while")
1521 || !strcmp(prevWord, "unless")
1522 || !strcmp(prevWord, "until")
1523 || !strcmp(prevWord, "for")) {
1524 return true;
1525 } else {
1526 return false;
1530 // Demote keywords in the following conditions:
1531 // if, while, unless, until modify a statement
1532 // do after a while or until, as a noise word (like then after if)
1534 static bool keywordIsModifier(const char *word,
1535 Sci_Position pos,
1536 Accessor &styler)
1538 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1539 return keywordDoStartsLoop(pos, styler);
1541 char ch, chPrev, chPrev2;
1542 int style = SCE_RB_DEFAULT;
1543 Sci_Position lineStart = styler.GetLine(pos);
1544 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1545 // We want to step backwards until we don't care about the current
1546 // position. But first move lineStartPosn back behind any
1547 // continuations immediately above word.
1548 while (lineStartPosn > 0) {
1549 ch = styler[lineStartPosn-1];
1550 if (ch == '\n' || ch == '\r') {
1551 chPrev = styler.SafeGetCharAt(lineStartPosn-2);
1552 chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1553 lineStart = styler.GetLine(lineStartPosn-1);
1554 // If we find a continuation line, include it in our analysis.
1555 if (chPrev == '\\') {
1556 lineStartPosn = styler.LineStart(lineStart);
1557 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1558 lineStartPosn = styler.LineStart(lineStart);
1559 } else {
1560 break;
1562 } else {
1563 break;
1567 styler.Flush();
1568 while (--pos >= lineStartPosn) {
1569 style = actual_style(styler.StyleAt(pos));
1570 if (style == SCE_RB_DEFAULT) {
1571 if (iswhitespace(ch = styler[pos])) {
1572 //continue
1573 } else if (ch == '\r' || ch == '\n') {
1574 // Scintilla's LineStart() and GetLine() routines aren't
1575 // platform-independent, so if we have text prepared with
1576 // a different system we can't rely on it.
1578 // Also, lineStartPosn may have been moved to more than one
1579 // line above word's line while pushing past continuations.
1580 chPrev = styler.SafeGetCharAt(pos - 1);
1581 chPrev2 = styler.SafeGetCharAt(pos - 2);
1582 if (chPrev == '\\') {
1583 pos-=1; // gloss over the "\\"
1584 //continue
1585 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1586 pos-=2; // gloss over the "\\\r"
1587 //continue
1588 } else {
1589 return false;
1592 } else {
1593 break;
1596 if (pos < lineStartPosn) {
1597 return false;
1599 // First things where the action is unambiguous
1600 switch (style) {
1601 case SCE_RB_DEFAULT:
1602 case SCE_RB_COMMENTLINE:
1603 case SCE_RB_POD:
1604 case SCE_RB_CLASSNAME:
1605 case SCE_RB_DEFNAME:
1606 case SCE_RB_MODULE_NAME:
1607 return false;
1608 case SCE_RB_OPERATOR:
1609 break;
1610 case SCE_RB_WORD:
1611 // Watch out for uses of 'else if'
1612 //XXX: Make a list of other keywords where 'if' isn't a modifier
1613 // and can appear legitimately
1614 // Formulate this to avoid warnings from most compilers
1615 if (strcmp(word, "if") == 0) {
1616 char prevWord[MAX_KEYWORD_LENGTH + 1];
1617 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1618 return strcmp(prevWord, "else") != 0;
1620 return true;
1621 default:
1622 return true;
1624 // Assume that if the keyword follows an operator,
1625 // usually it's a block assignment, like
1626 // a << if x then y else z
1628 ch = styler[pos];
1629 switch (ch) {
1630 case ')':
1631 case ']':
1632 case '}':
1633 return true;
1634 default:
1635 return false;
1639 #define WHILE_BACKWARDS "elihw"
1640 #define UNTIL_BACKWARDS "litnu"
1641 #define FOR_BACKWARDS "rof"
1643 // Nothing fancy -- look to see if we follow a while/until somewhere
1644 // on the current line
1646 static bool keywordDoStartsLoop(Sci_Position pos,
1647 Accessor &styler)
1649 char ch;
1650 int style;
1651 Sci_Position lineStart = styler.GetLine(pos);
1652 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1653 styler.Flush();
1654 while (--pos >= lineStartPosn) {
1655 style = actual_style(styler.StyleAt(pos));
1656 if (style == SCE_RB_DEFAULT) {
1657 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1658 // Scintilla's LineStart() and GetLine() routines aren't
1659 // platform-independent, so if we have text prepared with
1660 // a different system we can't rely on it.
1661 return false;
1663 } else if (style == SCE_RB_WORD) {
1664 // Check for while or until, but write the word in backwards
1665 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1666 char *dst = prevWord;
1667 int wordLen = 0;
1668 Sci_Position start_word;
1669 for (start_word = pos;
1670 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1671 start_word--) {
1672 if (++wordLen < MAX_KEYWORD_LENGTH) {
1673 *dst++ = styler[start_word];
1676 *dst = 0;
1677 // Did we see our keyword?
1678 if (!strcmp(prevWord, WHILE_BACKWARDS)
1679 || !strcmp(prevWord, UNTIL_BACKWARDS)
1680 || !strcmp(prevWord, FOR_BACKWARDS)) {
1681 return true;
1683 // We can move pos to the beginning of the keyword, and then
1684 // accept another decrement, as we can never have two contiguous
1685 // keywords:
1686 // word1 word2
1687 // ^
1688 // <- move to start_word
1689 // ^
1690 // <- loop decrement
1691 // ^ # pointing to end of word1 is fine
1692 pos = start_word;
1695 return false;
1698 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
1699 Sci_Position pos = styler.LineStart(line);
1700 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
1701 for (Sci_Position i = pos; i < eol_pos; i++) {
1702 char ch = styler[i];
1703 if (ch == '#')
1704 return true;
1705 else if (ch != ' ' && ch != '\t')
1706 return false;
1708 return false;
1712 * Folding Ruby
1714 * The language is quite complex to analyze without a full parse.
1715 * For example, this line shouldn't affect fold level:
1717 * print "hello" if feeling_friendly?
1719 * Neither should this:
1721 * print "hello" \
1722 * if feeling_friendly?
1725 * But this should:
1727 * if feeling_friendly? #++
1728 * print "hello" \
1729 * print "goodbye"
1730 * end #--
1732 * So we cheat, by actually looking at the existing indentation
1733 * levels for each line, and just echoing it back. Like Python.
1734 * Then if we get better at it, we'll take braces into consideration,
1735 * which always affect folding levels.
1737 * How the keywords should work:
1738 * No effect:
1739 * __FILE__ __LINE__ BEGIN END alias and
1740 * defined? false in nil not or self super then
1741 * true undef
1743 * Always increment:
1744 * begin class def do for module when {
1746 * Always decrement:
1747 * end }
1749 * Increment if these start a statement
1750 * if unless until while -- do nothing if they're modifiers
1752 * These end a block if there's no modifier, but don't bother
1753 * break next redo retry return yield
1755 * These temporarily de-indent, but re-indent
1756 * case else elsif ensure rescue
1758 * This means that the folder reflects indentation rather
1759 * than setting it. The language-service updates indentation
1760 * when users type return and finishes entering de-denters.
1762 * Later offer to fold POD, here-docs, strings, and blocks of comments
1765 static void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
1766 WordList *[], Accessor &styler) {
1767 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1768 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1770 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1771 false);
1772 Sci_PositionU endPos = startPos + length;
1773 int visibleChars = 0;
1774 Sci_Position lineCurrent = styler.GetLine(startPos);
1775 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1776 & SC_FOLDLEVELNUMBERMASK
1777 & ~SC_FOLDLEVELBASE);
1778 int levelCurrent = levelPrev;
1779 char chNext = styler[startPos];
1780 int styleNext = styler.StyleAt(startPos);
1781 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1782 bool buffer_ends_with_eol = false;
1783 for (Sci_PositionU i = startPos; i < endPos; i++) {
1784 char ch = chNext;
1785 chNext = styler.SafeGetCharAt(i + 1);
1786 int style = styleNext;
1787 styleNext = styler.StyleAt(i + 1);
1788 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1790 /*Mutiline comment patch*/
1791 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1792 if (!IsCommentLine(lineCurrent - 1, styler)
1793 && IsCommentLine(lineCurrent + 1, styler))
1794 levelCurrent++;
1795 else if (IsCommentLine(lineCurrent - 1, styler)
1796 && !IsCommentLine(lineCurrent + 1, styler))
1797 levelCurrent--;
1800 if (style == SCE_RB_COMMENTLINE) {
1801 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1802 if (chNext == '{') {
1803 levelCurrent++;
1804 } else if (chNext == '}' && levelCurrent > 0) {
1805 levelCurrent--;
1808 } else if (style == SCE_RB_OPERATOR) {
1809 if (strchr("[{(", ch)) {
1810 levelCurrent++;
1811 } else if (strchr(")}]", ch)) {
1812 // Don't decrement below 0
1813 if (levelCurrent > 0)
1814 levelCurrent--;
1816 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1817 // Look at the keyword on the left and decide what to do
1818 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1819 prevWord[0] = 0;
1820 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1821 if (!strcmp(prevWord, "end")) {
1822 // Don't decrement below 0
1823 if (levelCurrent > 0)
1824 levelCurrent--;
1825 } else if (!strcmp(prevWord, "if")
1826 || !strcmp(prevWord, "def")
1827 || !strcmp(prevWord, "class")
1828 || !strcmp(prevWord, "module")
1829 || !strcmp(prevWord, "begin")
1830 || !strcmp(prevWord, "case")
1831 || !strcmp(prevWord, "do")
1832 || !strcmp(prevWord, "while")
1833 || !strcmp(prevWord, "unless")
1834 || !strcmp(prevWord, "until")
1835 || !strcmp(prevWord, "for")
1837 levelCurrent++;
1839 } else if (style == SCE_RB_HERE_DELIM) {
1840 if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1841 levelCurrent++;
1842 } else if (styleNext == SCE_RB_DEFAULT) {
1843 levelCurrent--;
1846 if (atEOL) {
1847 int lev = levelPrev;
1848 if (visibleChars == 0 && foldCompact)
1849 lev |= SC_FOLDLEVELWHITEFLAG;
1850 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1851 lev |= SC_FOLDLEVELHEADERFLAG;
1852 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1853 lineCurrent++;
1854 levelPrev = levelCurrent;
1855 visibleChars = 0;
1856 buffer_ends_with_eol = true;
1857 } else if (!isspacechar(ch)) {
1858 visibleChars++;
1859 buffer_ends_with_eol = false;
1861 stylePrev = style;
1863 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1864 if (!buffer_ends_with_eol) {
1865 lineCurrent++;
1866 int new_lev = levelCurrent;
1867 if (visibleChars == 0 && foldCompact)
1868 new_lev |= SC_FOLDLEVELWHITEFLAG;
1869 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1870 new_lev |= SC_FOLDLEVELHEADERFLAG;
1871 levelCurrent = new_lev;
1873 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1876 static const char *const rubyWordListDesc[] = {
1877 "Keywords",
1881 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);