1 // Scintilla source code edit control
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
16 #include "Scintilla.h"
20 #include "LexAccessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 using namespace Scintilla
;
28 //XXX Identical to Perl, put in common area
29 static inline bool isEOLChar(char ch
) {
30 return (ch
== '\r') || (ch
== '\n');
33 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
34 // This one's redundant, but makes for more readable code
35 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
37 static inline bool isSafeAlpha(char ch
) {
38 return (isSafeASCII(ch
) && isalpha(ch
)) || ch
== '_';
41 static inline bool isSafeAlnum(char ch
) {
42 return (isSafeASCII(ch
) && isalnum(ch
)) || ch
== '_';
45 static inline bool isSafeAlnumOrHigh(char ch
) {
46 return isHighBitChar(ch
) || isalnum(ch
) || ch
== '_';
49 static inline bool isSafeDigit(char ch
) {
50 return isSafeASCII(ch
) && isdigit(ch
);
53 static inline bool isSafeWordcharOrHigh(char ch
) {
54 // Error: scintilla's KeyWords.h includes '.' as a word-char
55 // we want to separate things that can take methods from the
57 return isHighBitChar(ch
) || isalnum(ch
) || ch
== '_';
60 static bool inline iswhitespace(char ch
) {
61 return ch
== ' ' || ch
== '\t';
64 #define MAX_KEYWORD_LENGTH 200
67 #define actual_style(style) (style & STYLE_MASK)
69 static bool followsDot(Sci_PositionU pos
, Accessor
&styler
) {
71 for (; pos
>= 1; --pos
) {
72 int style
= actual_style(styler
.StyleAt(pos
));
77 if (ch
== ' ' || ch
== '\t') {
85 return styler
[pos
] == '.';
94 // Forward declarations
95 static bool keywordIsAmbiguous(const char *prevWord
);
96 static bool keywordDoStartsLoop(Sci_Position pos
,
98 static bool keywordIsModifier(const char *word
,
102 static int ClassifyWordRb(Sci_PositionU start
, Sci_PositionU end
, WordList
&keywords
, Accessor
&styler
, char *prevWord
) {
103 char s
[MAX_KEYWORD_LENGTH
];
105 Sci_PositionU lim
= end
- start
+ 1; // num chars to copy
106 if (lim
>= MAX_KEYWORD_LENGTH
) {
107 lim
= MAX_KEYWORD_LENGTH
- 1;
109 for (i
= start
, j
= 0; j
< lim
; i
++, j
++) {
114 if (0 == strcmp(prevWord
, "class"))
115 chAttr
= SCE_RB_CLASSNAME
;
116 else if (0 == strcmp(prevWord
, "module"))
117 chAttr
= SCE_RB_MODULE_NAME
;
118 else if (0 == strcmp(prevWord
, "def"))
119 chAttr
= SCE_RB_DEFNAME
;
120 else if (keywords
.InList(s
) && ((start
== 0) || !followsDot(start
- 1, styler
))) {
121 if (keywordIsAmbiguous(s
)
122 && keywordIsModifier(s
, start
, styler
)) {
124 // Demoted keywords are colored as keywords,
125 // but do not affect changes in indentation.
127 // Consider the word 'if':
128 // 1. <<if test ...>> : normal
129 // 2. <<stmt if test>> : demoted
130 // 3. <<lhs = if ...>> : normal: start a new indent level
131 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
133 chAttr
= SCE_RB_WORD_DEMOTED
;
135 chAttr
= SCE_RB_WORD
;
138 chAttr
= SCE_RB_IDENTIFIER
;
139 styler
.ColourTo(end
, chAttr
);
140 if (chAttr
== SCE_RB_WORD
) {
149 //XXX Identical to Perl, put in common area
150 static bool isMatch(Accessor
&styler
, Sci_Position lengthDoc
, Sci_Position pos
, const char *val
) {
151 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
155 if (*val
!= styler
[pos
++]) {
163 // Do Ruby better -- find the end of the line, work back,
164 // and then check for leading white space
166 // Precondition: the here-doc target can be indented
167 static bool lookingAtHereDocDelim(Accessor
&styler
,
169 Sci_Position lengthDoc
,
170 const char *HereDocDelim
)
172 if (!isMatch(styler
, lengthDoc
, pos
, HereDocDelim
)) {
176 char ch
= styler
[pos
];
179 } else if (ch
!= ' ' && ch
!= '\t') {
186 //XXX Identical to Perl, put in common area
187 static char opposite(char ch
) {
199 // Null transitions when we see we've reached the end
200 // and need to relex the curr char.
202 static void redo_char(Sci_Position
&i
, char &ch
, char &chNext
, char &chNext2
,
207 state
= SCE_RB_DEFAULT
;
210 static void advance_char(Sci_Position
&i
, char &ch
, char &chNext
, char &chNext2
) {
216 // precondition: startPos points to one after the EOL char
217 static bool currLineContainsHereDelims(Sci_Position
&startPos
,
223 for (pos
= startPos
- 1; pos
> 0; pos
--) {
224 char ch
= styler
.SafeGetCharAt(pos
);
226 // Leave the pointers where they are -- there are no
227 // here doc delims on the current line, even if
228 // the EOL isn't default style
233 if (actual_style(styler
.StyleAt(pos
)) == SCE_RB_HERE_DELIM
) {
241 // Update the pointers so we don't have to re-analyze the string
246 // This class is used by the enter and exit methods, so it needs
247 // to be hoisted out of the function.
267 QuoteCls(const QuoteCls
&q
) {
268 // copy constructor -- use this for copying in
273 QuoteCls
&operator=(const QuoteCls
&q
) { // assignment constructor
285 static void enterInnerExpression(int *p_inner_string_types
,
286 int *p_inner_expn_brace_counts
,
287 QuoteCls
*p_inner_quotes
,
288 int &inner_string_count
,
293 p_inner_string_types
[inner_string_count
] = state
;
294 state
= SCE_RB_DEFAULT
;
295 p_inner_expn_brace_counts
[inner_string_count
] = brace_counts
;
297 p_inner_quotes
[inner_string_count
] = curr_quote
;
298 ++inner_string_count
;
301 static void exitInnerExpression(int *p_inner_string_types
,
302 int *p_inner_expn_brace_counts
,
303 QuoteCls
*p_inner_quotes
,
304 int &inner_string_count
,
309 --inner_string_count
;
310 state
= p_inner_string_types
[inner_string_count
];
311 brace_counts
= p_inner_expn_brace_counts
[inner_string_count
];
312 curr_quote
= p_inner_quotes
[inner_string_count
];
315 static bool isEmptyLine(Sci_Position pos
,
318 Sci_Position lineCurrent
= styler
.GetLine(pos
);
319 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
320 return (indentCurrent
& SC_FOLDLEVELWHITEFLAG
) != 0;
323 static bool RE_CanFollowKeyword(const char *keyword
) {
324 if (!strcmp(keyword
, "and")
325 || !strcmp(keyword
, "begin")
326 || !strcmp(keyword
, "break")
327 || !strcmp(keyword
, "case")
328 || !strcmp(keyword
, "do")
329 || !strcmp(keyword
, "else")
330 || !strcmp(keyword
, "elsif")
331 || !strcmp(keyword
, "if")
332 || !strcmp(keyword
, "next")
333 || !strcmp(keyword
, "return")
334 || !strcmp(keyword
, "when")
335 || !strcmp(keyword
, "unless")
336 || !strcmp(keyword
, "until")
337 || !strcmp(keyword
, "not")
338 || !strcmp(keyword
, "or")) {
344 // Look at chars up to but not including endPos
345 // Don't look at styles in case we're looking forward
347 static Sci_Position
skipWhitespace(Sci_Position startPos
,
350 for (Sci_Position i
= startPos
; i
< endPos
; i
++) {
351 if (!iswhitespace(styler
[i
])) {
358 // This routine looks for false positives like
360 // There aren't too many.
362 // iPrev points to the start of <<
364 static bool sureThisIsHeredoc(Sci_Position iPrev
,
368 // Not so fast, since Ruby's so dynamic. Check the context
369 // to make sure we're OK.
371 Sci_Position lineStart
= styler
.GetLine(iPrev
);
372 Sci_Position lineStartPosn
= styler
.LineStart(lineStart
);
375 // Find the first word after some whitespace
376 Sci_Position firstWordPosn
= skipWhitespace(lineStartPosn
, iPrev
, styler
);
377 if (firstWordPosn
>= iPrev
) {
378 // Have something like {^ <<}
379 //XXX Look at the first previous non-comment non-white line
380 // to establish the context. Not too likely though.
383 switch (prevStyle
= styler
.StyleAt(firstWordPosn
)) {
385 case SCE_RB_WORD_DEMOTED
:
386 case SCE_RB_IDENTIFIER
:
392 Sci_Position firstWordEndPosn
= firstWordPosn
;
393 char *dst
= prevWord
;
395 if (firstWordEndPosn
>= iPrev
||
396 styler
.StyleAt(firstWordEndPosn
) != prevStyle
) {
400 *dst
++ = styler
[firstWordEndPosn
];
401 firstWordEndPosn
+= 1;
403 //XXX Write a style-aware thing to regex scintilla buffer objects
404 if (!strcmp(prevWord
, "undef")
405 || !strcmp(prevWord
, "def")
406 || !strcmp(prevWord
, "alias")) {
407 // These keywords are what we were looking for
413 // Routine that saves us from allocating a buffer for the here-doc target
414 // targetEndPos points one past the end of the current target
415 static bool haveTargetMatch(Sci_Position currPos
,
416 Sci_Position lengthDoc
,
417 Sci_Position targetStartPos
,
418 Sci_Position targetEndPos
,
420 if (lengthDoc
- currPos
< targetEndPos
- targetStartPos
) {
424 for (i
= targetStartPos
, j
= currPos
;
425 i
< targetEndPos
&& j
< lengthDoc
;
427 if (styler
[i
] != styler
[j
]) {
434 // Finds the start position of the expression containing @p pos
435 // @p min_pos should be a known expression start, e.g. the start of the line
436 static Sci_Position
findExpressionStart(Sci_Position pos
,
437 Sci_Position min_pos
,
440 for (; pos
> min_pos
; pos
-= 1) {
441 int style
= styler
.StyleAt(pos
- 1);
442 if (style
== SCE_RB_OPERATOR
) {
443 int ch
= styler
[pos
- 1];
444 if (ch
== '}' || ch
== ')' || ch
== ']') {
446 } else if (ch
== '{' || ch
== '(' || ch
== '[') {
452 } else if (ch
== ';' && depth
== 0) {
460 // We need a check because the form
461 // [identifier] <<[target]
462 // is ambiguous. The Ruby lexer/parser resolves it by
463 // looking to see if [identifier] names a variable or a
464 // function. If it's the first, it's the start of a here-doc.
465 // If it's a var, it's an operator. This lexer doesn't
466 // maintain a symbol table, so it looks ahead to see what's
467 // going on, in cases where we have
468 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
470 // If there's no occurrence of [target] on a line, assume we don't.
472 // return true == yes, we have no heredocs
474 static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos
,
477 // Use full document, not just part we're styling
478 Sci_Position lengthDoc
= styler
.Length();
479 Sci_Position lineStart
= styler
.GetLine(lt2StartPos
);
480 Sci_Position lineStartPosn
= styler
.LineStart(lineStart
);
482 const bool definitely_not_a_here_doc
= true;
483 const bool looks_like_a_here_doc
= false;
485 // find the expression start rather than the line start
486 Sci_Position exprStartPosn
= findExpressionStart(lt2StartPos
, lineStartPosn
, styler
);
488 // Find the first word after some whitespace
489 Sci_Position firstWordPosn
= skipWhitespace(exprStartPosn
, lt2StartPos
, styler
);
490 if (firstWordPosn
>= lt2StartPos
) {
491 return definitely_not_a_here_doc
;
493 prevStyle
= styler
.StyleAt(firstWordPosn
);
494 // If we have '<<' following a keyword, it's not a heredoc
495 if (prevStyle
!= SCE_RB_IDENTIFIER
496 && prevStyle
!= SCE_RB_SYMBOL
497 && prevStyle
!= SCE_RB_INSTANCE_VAR
498 && prevStyle
!= SCE_RB_CLASS_VAR
) {
499 return definitely_not_a_here_doc
;
501 int newStyle
= prevStyle
;
502 // Some compilers incorrectly warn about uninit newStyle
503 for (firstWordPosn
+= 1; firstWordPosn
<= lt2StartPos
; firstWordPosn
+= 1) {
504 // Inner loop looks at the name
505 for (; firstWordPosn
<= lt2StartPos
; firstWordPosn
+= 1) {
506 newStyle
= styler
.StyleAt(firstWordPosn
);
507 if (newStyle
!= prevStyle
) {
511 // Do we have '::' or '.'?
512 if (firstWordPosn
< lt2StartPos
&& newStyle
== SCE_RB_OPERATOR
) {
513 char ch
= styler
[firstWordPosn
];
516 } else if (ch
== ':') {
517 if (styler
.StyleAt(++firstWordPosn
) != SCE_RB_OPERATOR
) {
518 return definitely_not_a_here_doc
;
519 } else if (styler
[firstWordPosn
] != ':') {
520 return definitely_not_a_here_doc
;
528 // on second and next passes, only identifiers may appear since
529 // class and instance variable are private
530 prevStyle
= SCE_RB_IDENTIFIER
;
532 // Skip next batch of white-space
533 firstWordPosn
= skipWhitespace(firstWordPosn
, lt2StartPos
, styler
);
534 // possible symbol for an implicit hash argument
535 if (firstWordPosn
< lt2StartPos
&& styler
.StyleAt(firstWordPosn
) == SCE_RB_SYMBOL
) {
536 for (; firstWordPosn
<= lt2StartPos
; firstWordPosn
+= 1) {
537 if (styler
.StyleAt(firstWordPosn
) != SCE_RB_SYMBOL
) {
541 // Skip next batch of white-space
542 firstWordPosn
= skipWhitespace(firstWordPosn
, lt2StartPos
, styler
);
544 if (firstWordPosn
!= lt2StartPos
) {
545 // Have [[^ws[identifier]ws[*something_else*]ws<<
546 return definitely_not_a_here_doc
;
548 // OK, now 'j' will point to the current spot moving ahead
549 Sci_Position j
= firstWordPosn
+ 1;
550 if (styler
.StyleAt(j
) != SCE_RB_OPERATOR
|| styler
[j
] != '<') {
551 // This shouldn't happen
552 return definitely_not_a_here_doc
;
554 Sci_Position nextLineStartPosn
= styler
.LineStart(lineStart
+ 1);
555 if (nextLineStartPosn
>= lengthDoc
) {
556 return definitely_not_a_here_doc
;
558 j
= skipWhitespace(j
+ 1, nextLineStartPosn
, styler
);
559 if (j
>= lengthDoc
) {
560 return definitely_not_a_here_doc
;
563 Sci_Position target_start
, target_end
;
564 // From this point on no more styling, since we're looking ahead
565 if (styler
[j
] == '-') {
569 allow_indent
= false;
572 // Allow for quoted targets.
573 char target_quote
= 0;
578 target_quote
= styler
[j
];
582 if (isSafeAlnum(styler
[j
])) {
583 // Init target_end because some compilers think it won't
584 // be initialized by the time it's used
585 target_start
= target_end
= j
;
588 return definitely_not_a_here_doc
;
590 for (; j
< lengthDoc
; j
++) {
591 if (!isSafeAlnum(styler
[j
])) {
592 if (target_quote
&& styler
[j
] != target_quote
) {
594 return definitely_not_a_here_doc
;
597 // And for now make sure that it's a newline
598 // don't handle arbitrary expressions yet
602 // Now we can move to the character after the string delimiter.
605 j
= skipWhitespace(j
, lengthDoc
, styler
);
606 if (j
>= lengthDoc
) {
607 return definitely_not_a_here_doc
;
610 if (ch
== '#' || isEOLChar(ch
)) {
611 // This is OK, so break and continue;
614 return definitely_not_a_here_doc
;
620 // Just look at the start of each line
621 Sci_Position last_line
= styler
.GetLine(lengthDoc
- 1);
622 // But don't go too far
623 if (last_line
> lineStart
+ 50) {
624 last_line
= lineStart
+ 50;
626 for (Sci_Position line_num
= lineStart
+ 1; line_num
<= last_line
; line_num
++) {
628 j
= skipWhitespace(styler
.LineStart(line_num
), lengthDoc
, styler
);
630 j
= styler
.LineStart(line_num
);
632 // target_end is one past the end
633 if (haveTargetMatch(j
, lengthDoc
, target_start
, target_end
, styler
)) {
635 return looks_like_a_here_doc
;
638 return definitely_not_a_here_doc
;
641 //todo: if we aren't looking at a stdio character,
642 // move to the start of the first line that is not in a
643 // multi-line construct
645 static void synchronizeDocStart(Sci_PositionU
&startPos
,
646 Sci_Position
&length
,
649 bool skipWhiteSpace
=false) {
652 int style
= actual_style(styler
.StyleAt(startPos
));
657 // Don't do anything else with these.
661 Sci_Position pos
= startPos
;
662 // Quick way to characterize each line
663 Sci_Position lineStart
;
664 for (lineStart
= styler
.GetLine(pos
); lineStart
> 0; lineStart
--) {
665 // Now look at the style before the previous line's EOL
666 pos
= styler
.LineStart(lineStart
) - 1;
671 char ch
= styler
.SafeGetCharAt(pos
);
672 char chPrev
= styler
.SafeGetCharAt(pos
- 1);
673 if (ch
== '\n' && chPrev
== '\r') {
676 if (styler
.SafeGetCharAt(pos
- 1) == '\\') {
677 // Continuation line -- keep going
678 } else if (actual_style(styler
.StyleAt(pos
)) != SCE_RB_DEFAULT
) {
679 // Part of multi-line construct -- keep going
680 } else if (currLineContainsHereDelims(pos
, styler
)) {
681 // Keep going, with pos and length now pointing
682 // at the end of the here-doc delimiter
683 } else if (skipWhiteSpace
&& isEmptyLine(pos
, styler
)) {
689 pos
= styler
.LineStart(lineStart
);
690 length
+= (startPos
- pos
);
692 initStyle
= SCE_RB_DEFAULT
;
695 static void ColouriseRbDoc(Sci_PositionU startPos
, Sci_Position length
, int initStyle
,
696 WordList
*keywordlists
[], Accessor
&styler
) {
698 // Lexer for Ruby often has to backtrack to start of current style to determine
699 // which characters are being used as quotes, how deeply nested is the
700 // start position and what the termination string is for here documents
702 WordList
&keywords
= *keywordlists
[0];
708 // 0: '<<' encountered
709 // 1: collect the delimiter
710 // 1b: text between the end of the delimiter and the EOL
711 // 2: here doc text (lines after the delimiter)
712 char Quote
; // the char after '<<'
713 bool Quoted
; // true if Quote in ('\'','"','`')
714 int DelimiterLength
; // strlen(Delimiter)
715 char Delimiter
[256]; // the Delimiter, limit of 256: from Perl
721 CanBeIndented
= false;
728 int numDots
= 0; // For numbers --
729 // Don't start lexing in the middle of a num
731 synchronizeDocStart(startPos
, length
, initStyle
, styler
, // ref args
734 bool preferRE
= true;
735 int state
= initStyle
;
736 Sci_Position lengthDoc
= startPos
+ length
;
738 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
743 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
744 char chNext
= styler
.SafeGetCharAt(startPos
);
745 bool is_real_number
= true; // Differentiate between constants and ?-sequences.
746 styler
.StartAt(startPos
);
747 styler
.StartSegment(startPos
);
749 static int q_states
[] = {SCE_RB_STRING_Q
,
756 static const char *q_chars
= "qQrwWx";
758 // In most cases a value of 2 should be ample for the code in the
759 // Ruby library, and the code the user is likely to enter.
761 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
762 // if options[:verbose]
763 // from fileutils.rb nests to a level of 2
764 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
765 // string (including regex'es, %Q, %<sym>, %w, and other strings
766 // that interpolate), it will stay as a string. The problem with this
767 // is that quotes might flip, a 7th '#{' will look like a comment,
768 // and code-folding might be wrong.
770 // If anyone runs into this problem, I recommend raising this
771 // value slightly higher to replacing the fixed array with a linked
772 // list. Keep in mind this code will be called every time the lexer
775 #define INNER_STRINGS_MAX_COUNT 5
776 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
777 int inner_string_types
[INNER_STRINGS_MAX_COUNT
];
778 // Track # braces when we push a new #{ thing
779 int inner_expn_brace_counts
[INNER_STRINGS_MAX_COUNT
];
780 QuoteCls inner_quotes
[INNER_STRINGS_MAX_COUNT
];
781 int inner_string_count
= 0;
782 int brace_counts
= 0; // Number of #{ ... } things within an expression
785 for (i
= 0; i
< INNER_STRINGS_MAX_COUNT
; i
++) {
786 inner_string_types
[i
] = 0;
787 inner_expn_brace_counts
[i
] = 0;
789 for (i
= startPos
; i
< lengthDoc
; i
++) {
791 chNext
= styler
.SafeGetCharAt(i
+ 1);
792 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
794 if (styler
.IsLeadByte(ch
)) {
801 // skip on DOS/Windows
802 //No, don't, because some things will get tagged on,
803 // so we won't recognize keywords, for example
805 if (ch
== '\r' && chNext
== '\n') {
810 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
811 // Begin of here-doc (the line after the here-doc delimiter):
813 styler
.ColourTo(i
-1, state
);
814 // Don't check for a missing quote, just jump into
815 // the here-doc state
816 state
= SCE_RB_HERE_Q
;
819 // Regular transitions
820 if (state
== SCE_RB_DEFAULT
) {
821 if (isSafeDigit(ch
)) {
822 styler
.ColourTo(i
- 1, state
);
823 state
= SCE_RB_NUMBER
;
824 is_real_number
= true;
826 } else if (isHighBitChar(ch
) || iswordstart(ch
)) {
827 styler
.ColourTo(i
- 1, state
);
829 } else if (ch
== '#') {
830 styler
.ColourTo(i
- 1, state
);
831 state
= SCE_RB_COMMENTLINE
;
832 } else if (ch
== '=') {
833 // =begin indicates the start of a comment (doc) block
834 if ((i
== 0 || isEOLChar(chPrev
))
836 && styler
.SafeGetCharAt(i
+ 2) == 'e'
837 && styler
.SafeGetCharAt(i
+ 3) == 'g'
838 && styler
.SafeGetCharAt(i
+ 4) == 'i'
839 && styler
.SafeGetCharAt(i
+ 5) == 'n'
840 && !isSafeWordcharOrHigh(styler
.SafeGetCharAt(i
+ 6))) {
841 styler
.ColourTo(i
- 1, state
);
844 styler
.ColourTo(i
- 1, state
);
845 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
848 } else if (ch
== '"') {
849 styler
.ColourTo(i
- 1, state
);
850 state
= SCE_RB_STRING
;
853 } else if (ch
== '\'') {
854 styler
.ColourTo(i
- 1, state
);
855 state
= SCE_RB_CHARACTER
;
858 } else if (ch
== '`') {
859 styler
.ColourTo(i
- 1, state
);
860 state
= SCE_RB_BACKTICKS
;
863 } else if (ch
== '@') {
864 // Instance or class var
865 styler
.ColourTo(i
- 1, state
);
867 state
= SCE_RB_CLASS_VAR
;
868 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
870 state
= SCE_RB_INSTANCE_VAR
;
872 } else if (ch
== '$') {
873 // Check for a builtin global
874 styler
.ColourTo(i
- 1, state
);
875 // Recognize it bit by bit
876 state
= SCE_RB_GLOBAL
;
877 } else if (ch
== '/' && preferRE
) {
879 styler
.ColourTo(i
- 1, state
);
880 state
= SCE_RB_REGEX
;
883 } else if (ch
== '<' && chNext
== '<' && chNext2
!= '=') {
885 // Recognise the '<<' symbol - either a here document or a binary op
886 styler
.ColourTo(i
- 1, state
);
889 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
891 if (!(strchr("\"\'`_-", chNext2
) || isSafeAlpha(chNext2
))) {
892 // It's definitely not a here-doc,
893 // based on Ruby's lexer/parser in the
894 // heredoc_identifier routine.
895 // Nothing else to do.
896 } else if (preferRE
) {
897 if (sureThisIsHeredoc(i
- 1, styler
, prevWord
)) {
898 state
= SCE_RB_HERE_DELIM
;
901 // else leave it in default state
903 if (sureThisIsNotHeredoc(i
- 1, styler
)) {
904 // leave state as default
905 // We don't have all the heuristics Perl has for indications
906 // of a here-doc, because '<<' is overloadable and used
907 // for so many other classes.
909 state
= SCE_RB_HERE_DELIM
;
913 preferRE
= (state
!= SCE_RB_HERE_DELIM
);
914 } else if (ch
== ':') {
915 styler
.ColourTo(i
- 1, state
);
917 // Mark "::" as an operator, not symbol start
918 styler
.ColourTo(i
+ 1, SCE_RB_OPERATOR
);
919 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
920 state
= SCE_RB_DEFAULT
;
922 } else if (isSafeWordcharOrHigh(chNext
)) {
923 state
= SCE_RB_SYMBOL
;
924 } else if ((chNext
== '@' || chNext
== '$') &&
925 isSafeWordcharOrHigh(chNext2
)) {
926 // instance and global variable followed by an identifier
927 advance_char(i
, ch
, chNext
, chNext2
);
928 state
= SCE_RB_SYMBOL
;
929 } else if (((chNext
== '@' && chNext2
== '@') ||
930 (chNext
== '$' && chNext2
== '-')) &&
931 isSafeWordcharOrHigh(styler
.SafeGetCharAt(i
+3))) {
932 // class variables and special global variable "$-IDENTCHAR"
933 state
= SCE_RB_SYMBOL
;
934 // $-IDENTCHAR doesn't continue past the IDENTCHAR
936 styler
.ColourTo(i
+3, SCE_RB_SYMBOL
);
937 state
= SCE_RB_DEFAULT
;
940 ch
= styler
.SafeGetCharAt(i
);
941 chNext
= styler
.SafeGetCharAt(i
+1);
942 } else if (chNext
== '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2
)) {
943 // single-character special global variables
946 chNext
= styler
.SafeGetCharAt(i
+1);
947 styler
.ColourTo(i
, SCE_RB_SYMBOL
);
948 state
= SCE_RB_DEFAULT
;
949 } else if (strchr("[*!~+-*/%=<>&^|", chNext
)) {
950 // Do the operator analysis in-line, looking ahead
951 // Based on the table in pickaxe 2nd ed., page 339
952 bool doColoring
= true;
955 if (chNext2
== ']') {
956 char ch_tmp
= styler
.SafeGetCharAt(i
+ 3);
960 chNext
= styler
.SafeGetCharAt(i
+ 1);
972 if (chNext2
== '*') {
975 chNext
= styler
.SafeGetCharAt(i
+ 1);
977 advance_char(i
, ch
, chNext
, chNext2
);
982 if (chNext2
== '=' || chNext2
== '~') {
985 chNext
= styler
.SafeGetCharAt(i
+ 1);
987 advance_char(i
, ch
, chNext
, chNext2
);
992 if (chNext2
== '<') {
995 chNext
= styler
.SafeGetCharAt(i
+ 1);
996 } else if (chNext2
== '=') {
997 char ch_tmp
= styler
.SafeGetCharAt(i
+ 3);
998 if (ch_tmp
== '>') { // <=> operator
1001 chNext
= styler
.SafeGetCharAt(i
+ 1);
1008 advance_char(i
, ch
, chNext
, chNext2
);
1013 // Simple one-character operators
1014 advance_char(i
, ch
, chNext
, chNext2
);
1018 styler
.ColourTo(i
, SCE_RB_SYMBOL
);
1019 state
= SCE_RB_DEFAULT
;
1021 } else if (!preferRE
) {
1022 // Don't color symbol strings (yet)
1023 // Just color the ":" and color rest as string
1024 styler
.ColourTo(i
, SCE_RB_SYMBOL
);
1025 state
= SCE_RB_DEFAULT
;
1027 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
1028 state
= SCE_RB_DEFAULT
;
1031 } else if (ch
== '%') {
1032 styler
.ColourTo(i
- 1, state
);
1033 bool have_string
= false;
1034 if (strchr(q_chars
, chNext
) && !isSafeWordcharOrHigh(chNext2
)) {
1036 const char *hit
= strchr(q_chars
, chNext
);
1038 state
= q_states
[hit
- q_chars
];
1039 Quote
.Open(chNext2
);
1042 chNext
= styler
.SafeGetCharAt(i
+ 1);
1045 } else if (preferRE
&& !isSafeWordcharOrHigh(chNext
)) {
1046 // Ruby doesn't allow high bit chars here,
1047 // but the editor host might
1049 state
= SCE_RB_STRING_QQ
;
1051 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
1053 } else if (!isSafeWordcharOrHigh(chNext
) && !iswhitespace(chNext
) && !isEOLChar(chNext
)) {
1054 // Ruby doesn't allow high bit chars here,
1055 // but the editor host might
1057 state
= SCE_RB_STRING_QQ
;
1059 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
1063 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
1067 } else if (ch
== '?') {
1068 styler
.ColourTo(i
- 1, state
);
1069 if (iswhitespace(chNext
) || chNext
== '\n' || chNext
== '\r') {
1070 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
1072 // It's the start of a character code escape sequence
1073 // Color it as a number.
1074 state
= SCE_RB_NUMBER
;
1075 is_real_number
= false;
1077 } else if (isoperator(ch
) || ch
== '.') {
1078 styler
.ColourTo(i
- 1, state
);
1079 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
1080 // If we're ending an expression or block,
1081 // assume it ends an object, and the ambivalent
1082 // constructs are binary operators
1084 // So if we don't have one of these chars,
1085 // we aren't ending an object exp'n, and ops
1086 // like : << / are unary operators.
1091 } else if (ch
== '}' && --brace_counts
< 0
1092 && inner_string_count
> 0) {
1093 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
1094 exitInnerExpression(inner_string_types
,
1095 inner_expn_brace_counts
,
1098 state
, brace_counts
, Quote
);
1100 preferRE
= (strchr(")}].", ch
) == NULL
);
1102 // Stay in default state
1103 } else if (isEOLChar(ch
)) {
1104 // Make sure it's a true line-end, with no backslash
1105 if ((ch
== '\r' || (ch
== '\n' && chPrev
!= '\r'))
1106 && chPrev
!= '\\') {
1107 // Assume we've hit the end of the statement.
1111 } else if (state
== SCE_RB_WORD
) {
1112 if (ch
== '.' || !isSafeWordcharOrHigh(ch
)) {
1113 // Words include x? in all contexts,
1114 // and <letters>= after either 'def' or a dot
1115 // Move along until a complete word is on our left
1117 // Default accessor treats '.' as word-chars,
1118 // but we don't for now.
1121 && isSafeWordcharOrHigh(chPrev
)
1123 || strchr(" \t\n\r", chNext
) != NULL
)
1124 && (!strcmp(prevWord
, "def")
1125 || followsDot(styler
.GetStartSegment(), styler
))) {
1126 // <name>= is a name only when being def'd -- Get it the next time
1127 // This means that <name>=<name> is always lexed as
1128 // <name>, (op, =), <name>
1129 } else if (ch
== ':'
1130 && isSafeWordcharOrHigh(chPrev
)
1131 && strchr(" \t\n\r", chNext
) != NULL
) {
1132 state
= SCE_RB_SYMBOL
;
1133 } else if ((ch
== '?' || ch
== '!')
1134 && isSafeWordcharOrHigh(chPrev
)
1135 && !isSafeWordcharOrHigh(chNext
)) {
1136 // <name>? is a name -- Get it the next time
1137 // But <name>?<name> is always lexed as
1138 // <name>, (op, ?), <name>
1139 // Same with <name>! to indicate a method that
1140 // modifies its target
1141 } else if (isEOLChar(ch
)
1142 && isMatch(styler
, lengthDoc
, i
- 7, "__END__")) {
1143 styler
.ColourTo(i
, SCE_RB_DATASECTION
);
1144 state
= SCE_RB_DATASECTION
;
1145 // No need to handle this state -- we'll just move to the end
1148 Sci_Position wordStartPos
= styler
.GetStartSegment();
1149 int word_style
= ClassifyWordRb(wordStartPos
, i
- 1, keywords
, styler
, prevWord
);
1150 switch (word_style
) {
1152 preferRE
= RE_CanFollowKeyword(prevWord
);
1155 case SCE_RB_WORD_DEMOTED
:
1159 case SCE_RB_IDENTIFIER
:
1160 if (isMatch(styler
, lengthDoc
, wordStartPos
, "print")) {
1162 } else if (isEOLChar(ch
)) {
1172 // We might be redefining an operator-method
1175 // And if it's the first
1176 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1179 } else if (state
== SCE_RB_NUMBER
) {
1180 if (!is_real_number
) {
1182 styler
.ColourTo(i
, state
);
1183 state
= SCE_RB_DEFAULT
;
1185 } else if (strchr("\\ntrfvaebs", chNext
)) {
1186 // Terminal escape sequence -- handle it next time
1187 // Nothing more to do this time through the loop
1188 } else if (chNext
== 'C' || chNext
== 'M') {
1189 if (chNext2
!= '-') {
1190 // \C or \M ends the sequence -- handle it next time
1192 // Move from abc?\C-x
1198 chNext
= styler
.SafeGetCharAt(i
+ 1);
1200 } else if (chNext
== 'c') {
1201 // Stay here, \c is a combining sequence
1202 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
1204 // ?\x, including ?\\ is final.
1205 styler
.ColourTo(i
+ 1, state
);
1206 state
= SCE_RB_DEFAULT
;
1208 advance_char(i
, ch
, chNext
, chNext2
);
1210 } else if (isSafeAlnumOrHigh(ch
) || ch
== '_') {
1212 } else if (ch
== '.' && chNext
== '.') {
1214 styler
.ColourTo(i
- 1, state
);
1215 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1216 } else if (ch
== '.' && ++numDots
== 1) {
1219 styler
.ColourTo(i
- 1, state
);
1220 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1223 } else if (state
== SCE_RB_COMMENTLINE
) {
1224 if (isEOLChar(ch
)) {
1225 styler
.ColourTo(i
- 1, state
);
1226 state
= SCE_RB_DEFAULT
;
1227 // Use whatever setting we had going into the comment
1229 } else if (state
== SCE_RB_HERE_DELIM
) {
1230 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1231 // Slightly different: if we find an immediate '-',
1232 // the target can appear indented.
1234 if (HereDoc
.State
== 0) { // '<<' encountered
1236 HereDoc
.DelimiterLength
= 0;
1238 HereDoc
.CanBeIndented
= true;
1239 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
1241 HereDoc
.CanBeIndented
= false;
1243 if (isEOLChar(ch
)) {
1244 // Bail out of doing a here doc if there's no target
1245 state
= SCE_RB_DEFAULT
;
1250 if (ch
== '\'' || ch
== '"' || ch
== '`') {
1251 HereDoc
.Quoted
= true;
1252 HereDoc
.Delimiter
[0] = '\0';
1254 HereDoc
.Quoted
= false;
1255 HereDoc
.Delimiter
[0] = ch
;
1256 HereDoc
.Delimiter
[1] = '\0';
1257 HereDoc
.DelimiterLength
= 1;
1260 } else if (HereDoc
.State
== 1) { // collect the delimiter
1261 if (isEOLChar(ch
)) {
1262 // End the quote now, and go back for more
1263 styler
.ColourTo(i
- 1, state
);
1264 state
= SCE_RB_DEFAULT
;
1268 } else if (HereDoc
.Quoted
) {
1269 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
1270 styler
.ColourTo(i
, state
);
1271 state
= SCE_RB_DEFAULT
;
1274 if (ch
== '\\' && !isEOLChar(chNext
)) {
1275 advance_char(i
, ch
, chNext
, chNext2
);
1277 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
1278 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
1280 } else { // an unquoted here-doc delimiter
1281 if (isSafeAlnumOrHigh(ch
) || ch
== '_') {
1282 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
1283 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
1285 styler
.ColourTo(i
- 1, state
);
1286 redo_char(i
, ch
, chNext
, chNext2
, state
);
1290 if (HereDoc
.DelimiterLength
>= static_cast<int>(sizeof(HereDoc
.Delimiter
)) - 1) {
1291 styler
.ColourTo(i
- 1, state
);
1292 state
= SCE_RB_ERROR
;
1296 } else if (state
== SCE_RB_HERE_Q
) {
1297 // Not needed: HereDoc.State == 2
1298 // Indentable here docs: look backwards
1299 // Non-indentable: look forwards, like in Perl
1301 // Why: so we can quickly resolve things like <<-" abc"
1303 if (!HereDoc
.CanBeIndented
) {
1304 if (isEOLChar(chPrev
)
1305 && isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
1306 styler
.ColourTo(i
- 1, state
);
1307 i
+= HereDoc
.DelimiterLength
- 1;
1308 chNext
= styler
.SafeGetCharAt(i
+ 1);
1309 if (isEOLChar(chNext
)) {
1310 styler
.ColourTo(i
, SCE_RB_HERE_DELIM
);
1311 state
= SCE_RB_DEFAULT
;
1315 // Otherwise we skipped through the here doc faster.
1317 } else if (isEOLChar(chNext
)
1318 && lookingAtHereDocDelim(styler
,
1319 i
- HereDoc
.DelimiterLength
+ 1,
1321 HereDoc
.Delimiter
)) {
1322 styler
.ColourTo(i
- 1 - HereDoc
.DelimiterLength
, state
);
1323 styler
.ColourTo(i
, SCE_RB_HERE_DELIM
);
1324 state
= SCE_RB_DEFAULT
;
1328 } else if (state
== SCE_RB_CLASS_VAR
1329 || state
== SCE_RB_INSTANCE_VAR
1330 || state
== SCE_RB_SYMBOL
) {
1331 if (state
== SCE_RB_SYMBOL
&&
1332 // FIDs suffices '?' and '!'
1333 (((ch
== '!' || ch
== '?') && chNext
!= '=') ||
1334 // identifier suffix '='
1335 (ch
== '=' && (chNext
!= '~' && chNext
!= '>' &&
1336 (chNext
!= '=' || chNext2
== '>'))))) {
1337 styler
.ColourTo(i
, state
);
1338 state
= SCE_RB_DEFAULT
;
1340 } else if (!isSafeWordcharOrHigh(ch
)) {
1341 styler
.ColourTo(i
- 1, state
);
1342 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1345 } else if (state
== SCE_RB_GLOBAL
) {
1346 if (!isSafeWordcharOrHigh(ch
)) {
1347 // handle special globals here as well
1348 if (chPrev
== '$') {
1350 // Include the next char, like $-a
1351 advance_char(i
, ch
, chNext
, chNext2
);
1353 styler
.ColourTo(i
, state
);
1354 state
= SCE_RB_DEFAULT
;
1356 styler
.ColourTo(i
- 1, state
);
1357 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1361 } else if (state
== SCE_RB_POD
) {
1362 // PODs end with ^=end\s, -- any whitespace can follow =end
1363 if (strchr(" \t\n\r", ch
) != NULL
1365 && isEOLChar(styler
[i
- 5])
1366 && isMatch(styler
, lengthDoc
, i
- 4, "=end")) {
1367 styler
.ColourTo(i
- 1, state
);
1368 state
= SCE_RB_DEFAULT
;
1371 } else if (state
== SCE_RB_REGEX
|| state
== SCE_RB_STRING_QR
) {
1372 if (ch
== '\\' && Quote
.Up
!= '\\') {
1374 advance_char(i
, ch
, chNext
, chNext2
);
1375 } else if (ch
== Quote
.Down
) {
1377 if (Quote
.Count
== 0) {
1378 // Include the options
1379 while (isSafeAlpha(chNext
)) {
1382 chNext
= styler
.SafeGetCharAt(i
+ 1);
1384 styler
.ColourTo(i
, state
);
1385 state
= SCE_RB_DEFAULT
;
1388 } else if (ch
== Quote
.Up
) {
1389 // Only if close quoter != open quoter
1392 } else if (ch
== '#') {
1394 && inner_string_count
< INNER_STRINGS_MAX_COUNT
) {
1396 styler
.ColourTo(i
- 1, state
);
1397 styler
.ColourTo(i
+ 1, SCE_RB_OPERATOR
);
1398 enterInnerExpression(inner_string_types
,
1399 inner_expn_brace_counts
,
1407 advance_char(i
, ch
, chNext
, chNext2
);
1409 //todo: distinguish comments from pound chars
1410 // for now, handle as comment
1411 styler
.ColourTo(i
- 1, state
);
1412 bool inEscape
= false;
1413 while (++i
< lengthDoc
) {
1414 ch
= styler
.SafeGetCharAt(i
);
1417 } else if (isEOLChar(ch
)) {
1418 // Comment inside a regex
1419 styler
.ColourTo(i
- 1, SCE_RB_COMMENTLINE
);
1421 } else if (inEscape
) {
1422 inEscape
= false; // don't look at char
1423 } else if (ch
== Quote
.Down
) {
1424 // Have the regular handler deal with this
1425 // to get trailing modifiers.
1431 chNext
= styler
.SafeGetCharAt(i
+ 1);
1434 // Quotes of all kinds...
1435 } else if (state
== SCE_RB_STRING_Q
|| state
== SCE_RB_STRING_QQ
||
1436 state
== SCE_RB_STRING_QX
|| state
== SCE_RB_STRING_QW
||
1437 state
== SCE_RB_STRING
|| state
== SCE_RB_CHARACTER
||
1438 state
== SCE_RB_BACKTICKS
) {
1439 if (!Quote
.Down
&& !isspacechar(ch
)) {
1441 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1442 //Riddle me this: Is it safe to skip *every* escaped char?
1443 advance_char(i
, ch
, chNext
, chNext2
);
1444 } else if (ch
== Quote
.Down
) {
1446 if (Quote
.Count
== 0) {
1447 styler
.ColourTo(i
, state
);
1448 state
= SCE_RB_DEFAULT
;
1451 } else if (ch
== Quote
.Up
) {
1453 } else if (ch
== '#' && chNext
== '{'
1454 && inner_string_count
< INNER_STRINGS_MAX_COUNT
1455 && state
!= SCE_RB_CHARACTER
1456 && state
!= SCE_RB_STRING_Q
) {
1458 styler
.ColourTo(i
- 1, state
);
1459 styler
.ColourTo(i
+ 1, SCE_RB_OPERATOR
);
1460 enterInnerExpression(inner_string_types
,
1461 inner_expn_brace_counts
,
1469 advance_char(i
, ch
, chNext
, chNext2
);
1473 if (state
== SCE_RB_ERROR
) {
1478 if (state
== SCE_RB_WORD
) {
1479 // We've ended on a word, possibly at EOF, and need to
1481 (void) ClassifyWordRb(styler
.GetStartSegment(), lengthDoc
- 1, keywords
, styler
, prevWord
);
1483 styler
.ColourTo(lengthDoc
- 1, state
);
1487 // Helper functions for folding, disambiguation keywords
1488 // Assert that there are no high-bit chars
1490 static void getPrevWord(Sci_Position pos
,
1497 for (i
= pos
- 1; i
> 0; i
--) {
1498 if (actual_style(styler
.StyleAt(i
)) != word_state
) {
1503 if (i
< pos
- MAX_KEYWORD_LENGTH
) // overflow
1504 i
= pos
- MAX_KEYWORD_LENGTH
;
1505 char *dst
= prevWord
;
1506 for (; i
<= pos
; i
++) {
1512 static bool keywordIsAmbiguous(const char *prevWord
)
1514 // Order from most likely used to least likely
1515 // Lots of ways to do a loop in Ruby besides 'while/until'
1516 if (!strcmp(prevWord
, "if")
1517 || !strcmp(prevWord
, "do")
1518 || !strcmp(prevWord
, "while")
1519 || !strcmp(prevWord
, "unless")
1520 || !strcmp(prevWord
, "until")
1521 || !strcmp(prevWord
, "for")) {
1528 // Demote keywords in the following conditions:
1529 // if, while, unless, until modify a statement
1530 // do after a while or until, as a noise word (like then after if)
1532 static bool keywordIsModifier(const char *word
,
1536 if (word
[0] == 'd' && word
[1] == 'o' && !word
[2]) {
1537 return keywordDoStartsLoop(pos
, styler
);
1539 char ch
, chPrev
, chPrev2
;
1540 int style
= SCE_RB_DEFAULT
;
1541 Sci_Position lineStart
= styler
.GetLine(pos
);
1542 Sci_Position lineStartPosn
= styler
.LineStart(lineStart
);
1543 // We want to step backwards until we don't care about the current
1544 // position. But first move lineStartPosn back behind any
1545 // continuations immediately above word.
1546 while (lineStartPosn
> 0) {
1547 ch
= styler
[lineStartPosn
-1];
1548 if (ch
== '\n' || ch
== '\r') {
1549 chPrev
= styler
.SafeGetCharAt(lineStartPosn
-2);
1550 chPrev2
= styler
.SafeGetCharAt(lineStartPosn
-3);
1551 lineStart
= styler
.GetLine(lineStartPosn
-1);
1552 // If we find a continuation line, include it in our analysis.
1553 if (chPrev
== '\\') {
1554 lineStartPosn
= styler
.LineStart(lineStart
);
1555 } else if (ch
== '\n' && chPrev
== '\r' && chPrev2
== '\\') {
1556 lineStartPosn
= styler
.LineStart(lineStart
);
1566 while (--pos
>= lineStartPosn
) {
1567 style
= actual_style(styler
.StyleAt(pos
));
1568 if (style
== SCE_RB_DEFAULT
) {
1569 if (iswhitespace(ch
= styler
[pos
])) {
1571 } else if (ch
== '\r' || ch
== '\n') {
1572 // Scintilla's LineStart() and GetLine() routines aren't
1573 // platform-independent, so if we have text prepared with
1574 // a different system we can't rely on it.
1576 // Also, lineStartPosn may have been moved to more than one
1577 // line above word's line while pushing past continuations.
1578 chPrev
= styler
.SafeGetCharAt(pos
- 1);
1579 chPrev2
= styler
.SafeGetCharAt(pos
- 2);
1580 if (chPrev
== '\\') {
1581 pos
-=1; // gloss over the "\\"
1583 } else if (ch
== '\n' && chPrev
== '\r' && chPrev2
== '\\') {
1584 pos
-=2; // gloss over the "\\\r"
1594 if (pos
< lineStartPosn
) {
1597 // First things where the action is unambiguous
1599 case SCE_RB_DEFAULT
:
1600 case SCE_RB_COMMENTLINE
:
1602 case SCE_RB_CLASSNAME
:
1603 case SCE_RB_DEFNAME
:
1604 case SCE_RB_MODULE_NAME
:
1606 case SCE_RB_OPERATOR
:
1609 // Watch out for uses of 'else if'
1610 //XXX: Make a list of other keywords where 'if' isn't a modifier
1611 // and can appear legitimately
1612 // Formulate this to avoid warnings from most compilers
1613 if (strcmp(word
, "if") == 0) {
1614 char prevWord
[MAX_KEYWORD_LENGTH
+ 1];
1615 getPrevWord(pos
, prevWord
, styler
, SCE_RB_WORD
);
1616 return strcmp(prevWord
, "else") != 0;
1622 // Assume that if the keyword follows an operator,
1623 // usually it's a block assignment, like
1624 // a << if x then y else z
1637 #define WHILE_BACKWARDS "elihw"
1638 #define UNTIL_BACKWARDS "litnu"
1639 #define FOR_BACKWARDS "rof"
1641 // Nothing fancy -- look to see if we follow a while/until somewhere
1642 // on the current line
1644 static bool keywordDoStartsLoop(Sci_Position pos
,
1649 Sci_Position lineStart
= styler
.GetLine(pos
);
1650 Sci_Position lineStartPosn
= styler
.LineStart(lineStart
);
1652 while (--pos
>= lineStartPosn
) {
1653 style
= actual_style(styler
.StyleAt(pos
));
1654 if (style
== SCE_RB_DEFAULT
) {
1655 if ((ch
= styler
[pos
]) == '\r' || ch
== '\n') {
1656 // Scintilla's LineStart() and GetLine() routines aren't
1657 // platform-independent, so if we have text prepared with
1658 // a different system we can't rely on it.
1661 } else if (style
== SCE_RB_WORD
) {
1662 // Check for while or until, but write the word in backwards
1663 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
1664 char *dst
= prevWord
;
1666 Sci_Position start_word
;
1667 for (start_word
= pos
;
1668 start_word
>= lineStartPosn
&& actual_style(styler
.StyleAt(start_word
)) == SCE_RB_WORD
;
1670 if (++wordLen
< MAX_KEYWORD_LENGTH
) {
1671 *dst
++ = styler
[start_word
];
1675 // Did we see our keyword?
1676 if (!strcmp(prevWord
, WHILE_BACKWARDS
)
1677 || !strcmp(prevWord
, UNTIL_BACKWARDS
)
1678 || !strcmp(prevWord
, FOR_BACKWARDS
)) {
1681 // We can move pos to the beginning of the keyword, and then
1682 // accept another decrement, as we can never have two contiguous
1686 // <- move to start_word
1688 // <- loop decrement
1689 // ^ # pointing to end of word1 is fine
1696 static bool IsCommentLine(Sci_Position line
, Accessor
&styler
) {
1697 Sci_Position pos
= styler
.LineStart(line
);
1698 Sci_Position eol_pos
= styler
.LineStart(line
+ 1) - 1;
1699 for (Sci_Position i
= pos
; i
< eol_pos
; i
++) {
1700 char ch
= styler
[i
];
1703 else if (ch
!= ' ' && ch
!= '\t')
1712 * The language is quite complex to analyze without a full parse.
1713 * For example, this line shouldn't affect fold level:
1715 * print "hello" if feeling_friendly?
1717 * Neither should this:
1720 * if feeling_friendly?
1725 * if feeling_friendly? #++
1730 * So we cheat, by actually looking at the existing indentation
1731 * levels for each line, and just echoing it back. Like Python.
1732 * Then if we get better at it, we'll take braces into consideration,
1733 * which always affect folding levels.
1735 * How the keywords should work:
1737 * __FILE__ __LINE__ BEGIN END alias and
1738 * defined? false in nil not or self super then
1742 * begin class def do for module when {
1747 * Increment if these start a statement
1748 * if unless until while -- do nothing if they're modifiers
1750 * These end a block if there's no modifier, but don't bother
1751 * break next redo retry return yield
1753 * These temporarily de-indent, but re-indent
1754 * case else elsif ensure rescue
1756 * This means that the folder reflects indentation rather
1757 * than setting it. The language-service updates indentation
1758 * when users type return and finishes entering de-denters.
1760 * Later offer to fold POD, here-docs, strings, and blocks of comments
1763 static void FoldRbDoc(Sci_PositionU startPos
, Sci_Position length
, int initStyle
,
1764 WordList
*[], Accessor
&styler
) {
1765 const bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
1766 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
1768 synchronizeDocStart(startPos
, length
, initStyle
, styler
, // ref args
1770 Sci_PositionU endPos
= startPos
+ length
;
1771 int visibleChars
= 0;
1772 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
1773 int levelPrev
= startPos
== 0 ? 0 : (styler
.LevelAt(lineCurrent
)
1774 & SC_FOLDLEVELNUMBERMASK
1775 & ~SC_FOLDLEVELBASE
);
1776 int levelCurrent
= levelPrev
;
1777 char chNext
= styler
[startPos
];
1778 int styleNext
= styler
.StyleAt(startPos
);
1779 int stylePrev
= startPos
<= 1 ? SCE_RB_DEFAULT
: styler
.StyleAt(startPos
- 1);
1780 bool buffer_ends_with_eol
= false;
1781 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
1783 chNext
= styler
.SafeGetCharAt(i
+ 1);
1784 int style
= styleNext
;
1785 styleNext
= styler
.StyleAt(i
+ 1);
1786 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1788 /*Mutiline comment patch*/
1789 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
)) {
1790 if (!IsCommentLine(lineCurrent
- 1, styler
)
1791 && IsCommentLine(lineCurrent
+ 1, styler
))
1793 else if (IsCommentLine(lineCurrent
- 1, styler
)
1794 && !IsCommentLine(lineCurrent
+ 1, styler
))
1798 if (style
== SCE_RB_COMMENTLINE
) {
1799 if (foldComment
&& stylePrev
!= SCE_RB_COMMENTLINE
) {
1800 if (chNext
== '{') {
1802 } else if (chNext
== '}' && levelCurrent
> 0) {
1806 } else if (style
== SCE_RB_OPERATOR
) {
1807 if (strchr("[{(", ch
)) {
1809 } else if (strchr(")}]", ch
)) {
1810 // Don't decrement below 0
1811 if (levelCurrent
> 0)
1814 } else if (style
== SCE_RB_WORD
&& styleNext
!= SCE_RB_WORD
) {
1815 // Look at the keyword on the left and decide what to do
1816 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
1818 getPrevWord(i
, prevWord
, styler
, SCE_RB_WORD
);
1819 if (!strcmp(prevWord
, "end")) {
1820 // Don't decrement below 0
1821 if (levelCurrent
> 0)
1823 } else if (!strcmp(prevWord
, "if")
1824 || !strcmp(prevWord
, "def")
1825 || !strcmp(prevWord
, "class")
1826 || !strcmp(prevWord
, "module")
1827 || !strcmp(prevWord
, "begin")
1828 || !strcmp(prevWord
, "case")
1829 || !strcmp(prevWord
, "do")
1830 || !strcmp(prevWord
, "while")
1831 || !strcmp(prevWord
, "unless")
1832 || !strcmp(prevWord
, "until")
1833 || !strcmp(prevWord
, "for")
1837 } else if (style
== SCE_RB_HERE_DELIM
) {
1838 if (styler
.SafeGetCharAt(i
-2) == '<' && styler
.SafeGetCharAt(i
-1) == '<') {
1840 } else if (styleNext
== SCE_RB_DEFAULT
) {
1845 int lev
= levelPrev
;
1846 if (visibleChars
== 0 && foldCompact
)
1847 lev
|= SC_FOLDLEVELWHITEFLAG
;
1848 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1849 lev
|= SC_FOLDLEVELHEADERFLAG
;
1850 styler
.SetLevel(lineCurrent
, lev
|SC_FOLDLEVELBASE
);
1852 levelPrev
= levelCurrent
;
1854 buffer_ends_with_eol
= true;
1855 } else if (!isspacechar(ch
)) {
1857 buffer_ends_with_eol
= false;
1861 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1862 if (!buffer_ends_with_eol
) {
1864 int new_lev
= levelCurrent
;
1865 if (visibleChars
== 0 && foldCompact
)
1866 new_lev
|= SC_FOLDLEVELWHITEFLAG
;
1867 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1868 new_lev
|= SC_FOLDLEVELHEADERFLAG
;
1869 levelCurrent
= new_lev
;
1871 styler
.SetLevel(lineCurrent
, levelCurrent
|SC_FOLDLEVELBASE
);
1874 static const char *const rubyWordListDesc
[] = {
1879 LexerModule
lmRuby(SCLEX_RUBY
, ColouriseRbDoc
, "ruby", FoldRbDoc
, rubyWordListDesc
);