Merge pull request #2212 from TwlyY29/bibtex-parser
[geany-mirror.git] / scintilla / lexers / LexRuby.cxx
blob2affffe65fb016f682ace0af8fef1a5eb3b1ea35
1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 using namespace Scintilla;
28 //XXX Identical to Perl, put in common area
29 static inline bool isEOLChar(char ch) {
30 return (ch == '\r') || (ch == '\n');
33 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
34 // This one's redundant, but makes for more readable code
35 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
37 static inline bool isSafeAlpha(char ch) {
38 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
41 static inline bool isSafeAlnum(char ch) {
42 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
45 static inline bool isSafeAlnumOrHigh(char ch) {
46 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
49 static inline bool isSafeDigit(char ch) {
50 return isSafeASCII(ch) && isdigit(ch);
53 static inline bool isSafeWordcharOrHigh(char ch) {
54 // Error: scintilla's KeyWords.h includes '.' as a word-char
55 // we want to separate things that can take methods from the
56 // methods.
57 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
60 static bool inline iswhitespace(char ch) {
61 return ch == ' ' || ch == '\t';
64 #define MAX_KEYWORD_LENGTH 200
66 #define STYLE_MASK 63
67 #define actual_style(style) (style & STYLE_MASK)
69 static bool followsDot(Sci_PositionU pos, Accessor &styler) {
70 styler.Flush();
71 for (; pos >= 1; --pos) {
72 int style = actual_style(styler.StyleAt(pos));
73 char ch;
74 switch (style) {
75 case SCE_RB_DEFAULT:
76 ch = styler[pos];
77 if (ch == ' ' || ch == '\t') {
78 //continue
79 } else {
80 return false;
82 break;
84 case SCE_RB_OPERATOR:
85 return styler[pos] == '.';
87 default:
88 return false;
91 return false;
94 // Forward declarations
95 static bool keywordIsAmbiguous(const char *prevWord);
96 static bool keywordDoStartsLoop(Sci_Position pos,
97 Accessor &styler);
98 static bool keywordIsModifier(const char *word,
99 Sci_Position pos,
100 Accessor &styler);
102 static int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord) {
103 char s[MAX_KEYWORD_LENGTH];
104 Sci_PositionU i, j;
105 Sci_PositionU lim = end - start + 1; // num chars to copy
106 if (lim >= MAX_KEYWORD_LENGTH) {
107 lim = MAX_KEYWORD_LENGTH - 1;
109 for (i = start, j = 0; j < lim; i++, j++) {
110 s[j] = styler[i];
112 s[j] = '\0';
113 int chAttr;
114 if (0 == strcmp(prevWord, "class"))
115 chAttr = SCE_RB_CLASSNAME;
116 else if (0 == strcmp(prevWord, "module"))
117 chAttr = SCE_RB_MODULE_NAME;
118 else if (0 == strcmp(prevWord, "def"))
119 chAttr = SCE_RB_DEFNAME;
120 else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
121 if (keywordIsAmbiguous(s)
122 && keywordIsModifier(s, start, styler)) {
124 // Demoted keywords are colored as keywords,
125 // but do not affect changes in indentation.
127 // Consider the word 'if':
128 // 1. <<if test ...>> : normal
129 // 2. <<stmt if test>> : demoted
130 // 3. <<lhs = if ...>> : normal: start a new indent level
131 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
133 chAttr = SCE_RB_WORD_DEMOTED;
134 } else {
135 chAttr = SCE_RB_WORD;
137 } else
138 chAttr = SCE_RB_IDENTIFIER;
139 styler.ColourTo(end, chAttr);
140 if (chAttr == SCE_RB_WORD) {
141 strcpy(prevWord, s);
142 } else {
143 prevWord[0] = 0;
145 return chAttr;
149 //XXX Identical to Perl, put in common area
150 static bool isMatch(Accessor &styler, Sci_Position lengthDoc, Sci_Position pos, const char *val) {
151 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
152 return false;
154 while (*val) {
155 if (*val != styler[pos++]) {
156 return false;
158 val++;
160 return true;
163 // Do Ruby better -- find the end of the line, work back,
164 // and then check for leading white space
166 // Precondition: the here-doc target can be indented
167 static bool lookingAtHereDocDelim(Accessor &styler,
168 Sci_Position pos,
169 Sci_Position lengthDoc,
170 const char *HereDocDelim)
172 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
173 return false;
175 while (--pos > 0) {
176 char ch = styler[pos];
177 if (isEOLChar(ch)) {
178 return true;
179 } else if (ch != ' ' && ch != '\t') {
180 return false;
183 return false;
186 //XXX Identical to Perl, put in common area
187 static char opposite(char ch) {
188 if (ch == '(')
189 return ')';
190 if (ch == '[')
191 return ']';
192 if (ch == '{')
193 return '}';
194 if (ch == '<')
195 return '>';
196 return ch;
199 // Null transitions when we see we've reached the end
200 // and need to relex the curr char.
202 static void redo_char(Sci_Position &i, char &ch, char &chNext, char &chNext2,
203 int &state) {
204 i--;
205 chNext2 = chNext;
206 chNext = ch;
207 state = SCE_RB_DEFAULT;
210 static void advance_char(Sci_Position &i, char &ch, char &chNext, char &chNext2) {
211 i++;
212 ch = chNext;
213 chNext = chNext2;
216 // precondition: startPos points to one after the EOL char
217 static bool currLineContainsHereDelims(Sci_Position &startPos,
218 Accessor &styler) {
219 if (startPos <= 1)
220 return false;
222 Sci_Position pos;
223 for (pos = startPos - 1; pos > 0; pos--) {
224 char ch = styler.SafeGetCharAt(pos);
225 if (isEOLChar(ch)) {
226 // Leave the pointers where they are -- there are no
227 // here doc delims on the current line, even if
228 // the EOL isn't default style
230 return false;
231 } else {
232 styler.Flush();
233 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
234 break;
238 if (pos == 0) {
239 return false;
241 // Update the pointers so we don't have to re-analyze the string
242 startPos = pos;
243 return true;
246 // This class is used by the enter and exit methods, so it needs
247 // to be hoisted out of the function.
249 class QuoteCls {
250 public:
251 int Count;
252 char Up;
253 char Down;
254 QuoteCls() {
255 New();
257 void New() {
258 Count = 0;
259 Up = '\0';
260 Down = '\0';
262 void Open(char u) {
263 Count++;
264 Up = u;
265 Down = opposite(Up);
267 QuoteCls(const QuoteCls &q) {
268 // copy constructor -- use this for copying in
269 Count = q.Count;
270 Up = q.Up;
271 Down = q.Down;
273 QuoteCls &operator=(const QuoteCls &q) { // assignment constructor
274 if (this != &q) {
275 Count = q.Count;
276 Up = q.Up;
277 Down = q.Down;
279 return *this;
285 static void enterInnerExpression(int *p_inner_string_types,
286 int *p_inner_expn_brace_counts,
287 QuoteCls *p_inner_quotes,
288 int &inner_string_count,
289 int &state,
290 int &brace_counts,
291 QuoteCls curr_quote
293 p_inner_string_types[inner_string_count] = state;
294 state = SCE_RB_DEFAULT;
295 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
296 brace_counts = 0;
297 p_inner_quotes[inner_string_count] = curr_quote;
298 ++inner_string_count;
301 static void exitInnerExpression(int *p_inner_string_types,
302 int *p_inner_expn_brace_counts,
303 QuoteCls *p_inner_quotes,
304 int &inner_string_count,
305 int &state,
306 int &brace_counts,
307 QuoteCls &curr_quote
309 --inner_string_count;
310 state = p_inner_string_types[inner_string_count];
311 brace_counts = p_inner_expn_brace_counts[inner_string_count];
312 curr_quote = p_inner_quotes[inner_string_count];
315 static bool isEmptyLine(Sci_Position pos,
316 Accessor &styler) {
317 int spaceFlags = 0;
318 Sci_Position lineCurrent = styler.GetLine(pos);
319 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
320 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
323 static bool RE_CanFollowKeyword(const char *keyword) {
324 if (!strcmp(keyword, "and")
325 || !strcmp(keyword, "begin")
326 || !strcmp(keyword, "break")
327 || !strcmp(keyword, "case")
328 || !strcmp(keyword, "do")
329 || !strcmp(keyword, "else")
330 || !strcmp(keyword, "elsif")
331 || !strcmp(keyword, "if")
332 || !strcmp(keyword, "next")
333 || !strcmp(keyword, "return")
334 || !strcmp(keyword, "when")
335 || !strcmp(keyword, "unless")
336 || !strcmp(keyword, "until")
337 || !strcmp(keyword, "not")
338 || !strcmp(keyword, "or")) {
339 return true;
341 return false;
344 // Look at chars up to but not including endPos
345 // Don't look at styles in case we're looking forward
347 static Sci_Position skipWhitespace(Sci_Position startPos,
348 Sci_Position endPos,
349 Accessor &styler) {
350 for (Sci_Position i = startPos; i < endPos; i++) {
351 if (!iswhitespace(styler[i])) {
352 return i;
355 return endPos;
358 // This routine looks for false positives like
359 // undef foo, <<
360 // There aren't too many.
362 // iPrev points to the start of <<
364 static bool sureThisIsHeredoc(Sci_Position iPrev,
365 Accessor &styler,
366 char *prevWord) {
368 // Not so fast, since Ruby's so dynamic. Check the context
369 // to make sure we're OK.
370 int prevStyle;
371 Sci_Position lineStart = styler.GetLine(iPrev);
372 Sci_Position lineStartPosn = styler.LineStart(lineStart);
373 styler.Flush();
375 // Find the first word after some whitespace
376 Sci_Position firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
377 if (firstWordPosn >= iPrev) {
378 // Have something like {^ <<}
379 //XXX Look at the first previous non-comment non-white line
380 // to establish the context. Not too likely though.
381 return true;
382 } else {
383 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
384 case SCE_RB_WORD:
385 case SCE_RB_WORD_DEMOTED:
386 case SCE_RB_IDENTIFIER:
387 break;
388 default:
389 return true;
392 Sci_Position firstWordEndPosn = firstWordPosn;
393 char *dst = prevWord;
394 for (;;) {
395 if (firstWordEndPosn >= iPrev ||
396 styler.StyleAt(firstWordEndPosn) != prevStyle) {
397 *dst = 0;
398 break;
400 *dst++ = styler[firstWordEndPosn];
401 firstWordEndPosn += 1;
403 //XXX Write a style-aware thing to regex scintilla buffer objects
404 if (!strcmp(prevWord, "undef")
405 || !strcmp(prevWord, "def")
406 || !strcmp(prevWord, "alias")) {
407 // These keywords are what we were looking for
408 return false;
410 return true;
413 // Routine that saves us from allocating a buffer for the here-doc target
414 // targetEndPos points one past the end of the current target
415 static bool haveTargetMatch(Sci_Position currPos,
416 Sci_Position lengthDoc,
417 Sci_Position targetStartPos,
418 Sci_Position targetEndPos,
419 Accessor &styler) {
420 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
421 return false;
423 Sci_Position i, j;
424 for (i = targetStartPos, j = currPos;
425 i < targetEndPos && j < lengthDoc;
426 i++, j++) {
427 if (styler[i] != styler[j]) {
428 return false;
431 return true;
434 // Finds the start position of the expression containing @p pos
435 // @p min_pos should be a known expression start, e.g. the start of the line
436 static Sci_Position findExpressionStart(Sci_Position pos,
437 Sci_Position min_pos,
438 Accessor &styler) {
439 int depth = 0;
440 for (; pos > min_pos; pos -= 1) {
441 int style = styler.StyleAt(pos - 1);
442 if (style == SCE_RB_OPERATOR) {
443 int ch = styler[pos - 1];
444 if (ch == '}' || ch == ')' || ch == ']') {
445 depth += 1;
446 } else if (ch == '{' || ch == '(' || ch == '[') {
447 if (depth == 0) {
448 break;
449 } else {
450 depth -= 1;
452 } else if (ch == ';' && depth == 0) {
453 break;
457 return pos;
460 // We need a check because the form
461 // [identifier] <<[target]
462 // is ambiguous. The Ruby lexer/parser resolves it by
463 // looking to see if [identifier] names a variable or a
464 // function. If it's the first, it's the start of a here-doc.
465 // If it's a var, it's an operator. This lexer doesn't
466 // maintain a symbol table, so it looks ahead to see what's
467 // going on, in cases where we have
468 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
470 // If there's no occurrence of [target] on a line, assume we don't.
472 // return true == yes, we have no heredocs
474 static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos,
475 Accessor &styler) {
476 int prevStyle;
477 // Use full document, not just part we're styling
478 Sci_Position lengthDoc = styler.Length();
479 Sci_Position lineStart = styler.GetLine(lt2StartPos);
480 Sci_Position lineStartPosn = styler.LineStart(lineStart);
481 styler.Flush();
482 const bool definitely_not_a_here_doc = true;
483 const bool looks_like_a_here_doc = false;
485 // find the expression start rather than the line start
486 Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler);
488 // Find the first word after some whitespace
489 Sci_Position firstWordPosn = skipWhitespace(exprStartPosn, lt2StartPos, styler);
490 if (firstWordPosn >= lt2StartPos) {
491 return definitely_not_a_here_doc;
493 prevStyle = styler.StyleAt(firstWordPosn);
494 // If we have '<<' following a keyword, it's not a heredoc
495 if (prevStyle != SCE_RB_IDENTIFIER
496 && prevStyle != SCE_RB_SYMBOL
497 && prevStyle != SCE_RB_INSTANCE_VAR
498 && prevStyle != SCE_RB_CLASS_VAR) {
499 return definitely_not_a_here_doc;
501 int newStyle = prevStyle;
502 // Some compilers incorrectly warn about uninit newStyle
503 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
504 // Inner loop looks at the name
505 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
506 newStyle = styler.StyleAt(firstWordPosn);
507 if (newStyle != prevStyle) {
508 break;
511 // Do we have '::' or '.'?
512 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
513 char ch = styler[firstWordPosn];
514 if (ch == '.') {
515 // yes
516 } else if (ch == ':') {
517 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
518 return definitely_not_a_here_doc;
519 } else if (styler[firstWordPosn] != ':') {
520 return definitely_not_a_here_doc;
522 } else {
523 break;
525 } else {
526 break;
528 // on second and next passes, only identifiers may appear since
529 // class and instance variable are private
530 prevStyle = SCE_RB_IDENTIFIER;
532 // Skip next batch of white-space
533 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
534 // possible symbol for an implicit hash argument
535 if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) {
536 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
537 if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) {
538 break;
541 // Skip next batch of white-space
542 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
544 if (firstWordPosn != lt2StartPos) {
545 // Have [[^ws[identifier]ws[*something_else*]ws<<
546 return definitely_not_a_here_doc;
548 // OK, now 'j' will point to the current spot moving ahead
549 Sci_Position j = firstWordPosn + 1;
550 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
551 // This shouldn't happen
552 return definitely_not_a_here_doc;
554 Sci_Position nextLineStartPosn = styler.LineStart(lineStart + 1);
555 if (nextLineStartPosn >= lengthDoc) {
556 return definitely_not_a_here_doc;
558 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
559 if (j >= lengthDoc) {
560 return definitely_not_a_here_doc;
562 bool allow_indent;
563 Sci_Position target_start, target_end;
564 // From this point on no more styling, since we're looking ahead
565 if (styler[j] == '-') {
566 allow_indent = true;
567 j++;
568 } else {
569 allow_indent = false;
572 // Allow for quoted targets.
573 char target_quote = 0;
574 switch (styler[j]) {
575 case '\'':
576 case '"':
577 case '`':
578 target_quote = styler[j];
579 j += 1;
582 if (isSafeAlnum(styler[j])) {
583 // Init target_end because some compilers think it won't
584 // be initialized by the time it's used
585 target_start = target_end = j;
586 j++;
587 } else {
588 return definitely_not_a_here_doc;
590 for (; j < lengthDoc; j++) {
591 if (!isSafeAlnum(styler[j])) {
592 if (target_quote && styler[j] != target_quote) {
593 // unquoted end
594 return definitely_not_a_here_doc;
597 // And for now make sure that it's a newline
598 // don't handle arbitrary expressions yet
600 target_end = j;
601 if (target_quote) {
602 // Now we can move to the character after the string delimiter.
603 j += 1;
605 j = skipWhitespace(j, lengthDoc, styler);
606 if (j >= lengthDoc) {
607 return definitely_not_a_here_doc;
608 } else {
609 char ch = styler[j];
610 if (ch == '#' || isEOLChar(ch)) {
611 // This is OK, so break and continue;
612 break;
613 } else {
614 return definitely_not_a_here_doc;
620 // Just look at the start of each line
621 Sci_Position last_line = styler.GetLine(lengthDoc - 1);
622 // But don't go too far
623 if (last_line > lineStart + 50) {
624 last_line = lineStart + 50;
626 for (Sci_Position line_num = lineStart + 1; line_num <= last_line; line_num++) {
627 if (allow_indent) {
628 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
629 } else {
630 j = styler.LineStart(line_num);
632 // target_end is one past the end
633 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
634 // We got it
635 return looks_like_a_here_doc;
638 return definitely_not_a_here_doc;
641 //todo: if we aren't looking at a stdio character,
642 // move to the start of the first line that is not in a
643 // multi-line construct
645 static void synchronizeDocStart(Sci_PositionU &startPos,
646 Sci_Position &length,
647 int &initStyle,
648 Accessor &styler,
649 bool skipWhiteSpace=false) {
651 styler.Flush();
652 int style = actual_style(styler.StyleAt(startPos));
653 switch (style) {
654 case SCE_RB_STDIN:
655 case SCE_RB_STDOUT:
656 case SCE_RB_STDERR:
657 // Don't do anything else with these.
658 return;
661 Sci_Position pos = startPos;
662 // Quick way to characterize each line
663 Sci_Position lineStart;
664 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
665 // Now look at the style before the previous line's EOL
666 pos = styler.LineStart(lineStart) - 1;
667 if (pos <= 10) {
668 lineStart = 0;
669 break;
671 char ch = styler.SafeGetCharAt(pos);
672 char chPrev = styler.SafeGetCharAt(pos - 1);
673 if (ch == '\n' && chPrev == '\r') {
674 pos--;
676 if (styler.SafeGetCharAt(pos - 1) == '\\') {
677 // Continuation line -- keep going
678 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
679 // Part of multi-line construct -- keep going
680 } else if (currLineContainsHereDelims(pos, styler)) {
681 // Keep going, with pos and length now pointing
682 // at the end of the here-doc delimiter
683 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
684 // Keep going
685 } else {
686 break;
689 pos = styler.LineStart(lineStart);
690 length += (startPos - pos);
691 startPos = pos;
692 initStyle = SCE_RB_DEFAULT;
695 static void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
696 WordList *keywordlists[], Accessor &styler) {
698 // Lexer for Ruby often has to backtrack to start of current style to determine
699 // which characters are being used as quotes, how deeply nested is the
700 // start position and what the termination string is for here documents
702 WordList &keywords = *keywordlists[0];
704 class HereDocCls {
705 public:
706 int State;
707 // States
708 // 0: '<<' encountered
709 // 1: collect the delimiter
710 // 1b: text between the end of the delimiter and the EOL
711 // 2: here doc text (lines after the delimiter)
712 char Quote; // the char after '<<'
713 bool Quoted; // true if Quote in ('\'','"','`')
714 int DelimiterLength; // strlen(Delimiter)
715 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
716 bool CanBeIndented;
717 HereDocCls() {
718 State = 0;
719 DelimiterLength = 0;
720 Delimiter[0] = '\0';
721 CanBeIndented = false;
724 HereDocCls HereDoc;
726 QuoteCls Quote;
728 int numDots = 0; // For numbers --
729 // Don't start lexing in the middle of a num
731 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
732 false);
734 bool preferRE = true;
735 int state = initStyle;
736 Sci_Position lengthDoc = startPos + length;
738 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
739 prevWord[0] = '\0';
740 if (length == 0)
741 return;
743 char chPrev = styler.SafeGetCharAt(startPos - 1);
744 char chNext = styler.SafeGetCharAt(startPos);
745 bool is_real_number = true; // Differentiate between constants and ?-sequences.
746 styler.StartAt(startPos);
747 styler.StartSegment(startPos);
749 static int q_states[] = {SCE_RB_STRING_Q,
750 SCE_RB_STRING_QQ,
751 SCE_RB_STRING_QR,
752 SCE_RB_STRING_QW,
753 SCE_RB_STRING_QW,
754 SCE_RB_STRING_QX
756 static const char *q_chars = "qQrwWx";
758 // In most cases a value of 2 should be ample for the code in the
759 // Ruby library, and the code the user is likely to enter.
760 // For example,
761 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
762 // if options[:verbose]
763 // from fileutils.rb nests to a level of 2
764 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
765 // string (including regex'es, %Q, %<sym>, %w, and other strings
766 // that interpolate), it will stay as a string. The problem with this
767 // is that quotes might flip, a 7th '#{' will look like a comment,
768 // and code-folding might be wrong.
770 // If anyone runs into this problem, I recommend raising this
771 // value slightly higher to replacing the fixed array with a linked
772 // list. Keep in mind this code will be called every time the lexer
773 // is invoked.
775 #define INNER_STRINGS_MAX_COUNT 5
776 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
777 int inner_string_types[INNER_STRINGS_MAX_COUNT];
778 // Track # braces when we push a new #{ thing
779 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
780 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
781 int inner_string_count = 0;
782 int brace_counts = 0; // Number of #{ ... } things within an expression
784 Sci_Position i;
785 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
786 inner_string_types[i] = 0;
787 inner_expn_brace_counts[i] = 0;
789 for (i = startPos; i < lengthDoc; i++) {
790 char ch = chNext;
791 chNext = styler.SafeGetCharAt(i + 1);
792 char chNext2 = styler.SafeGetCharAt(i + 2);
794 if (styler.IsLeadByte(ch)) {
795 chNext = chNext2;
796 chPrev = ' ';
797 i += 1;
798 continue;
801 // skip on DOS/Windows
802 //No, don't, because some things will get tagged on,
803 // so we won't recognize keywords, for example
804 #if 0
805 if (ch == '\r' && chNext == '\n') {
806 continue;
808 #endif
810 if (HereDoc.State == 1 && isEOLChar(ch)) {
811 // Begin of here-doc (the line after the here-doc delimiter):
812 HereDoc.State = 2;
813 styler.ColourTo(i-1, state);
814 // Don't check for a missing quote, just jump into
815 // the here-doc state
816 state = SCE_RB_HERE_Q;
819 // Regular transitions
820 if (state == SCE_RB_DEFAULT) {
821 if (isSafeDigit(ch)) {
822 styler.ColourTo(i - 1, state);
823 state = SCE_RB_NUMBER;
824 is_real_number = true;
825 numDots = 0;
826 } else if (isHighBitChar(ch) || iswordstart(ch)) {
827 styler.ColourTo(i - 1, state);
828 state = SCE_RB_WORD;
829 } else if (ch == '#') {
830 styler.ColourTo(i - 1, state);
831 state = SCE_RB_COMMENTLINE;
832 } else if (ch == '=') {
833 // =begin indicates the start of a comment (doc) block
834 if ((i == 0 || isEOLChar(chPrev))
835 && chNext == 'b'
836 && styler.SafeGetCharAt(i + 2) == 'e'
837 && styler.SafeGetCharAt(i + 3) == 'g'
838 && styler.SafeGetCharAt(i + 4) == 'i'
839 && styler.SafeGetCharAt(i + 5) == 'n'
840 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
841 styler.ColourTo(i - 1, state);
842 state = SCE_RB_POD;
843 } else {
844 styler.ColourTo(i - 1, state);
845 styler.ColourTo(i, SCE_RB_OPERATOR);
846 preferRE = true;
848 } else if (ch == '"') {
849 styler.ColourTo(i - 1, state);
850 state = SCE_RB_STRING;
851 Quote.New();
852 Quote.Open(ch);
853 } else if (ch == '\'') {
854 styler.ColourTo(i - 1, state);
855 state = SCE_RB_CHARACTER;
856 Quote.New();
857 Quote.Open(ch);
858 } else if (ch == '`') {
859 styler.ColourTo(i - 1, state);
860 state = SCE_RB_BACKTICKS;
861 Quote.New();
862 Quote.Open(ch);
863 } else if (ch == '@') {
864 // Instance or class var
865 styler.ColourTo(i - 1, state);
866 if (chNext == '@') {
867 state = SCE_RB_CLASS_VAR;
868 advance_char(i, ch, chNext, chNext2); // pass by ref
869 } else {
870 state = SCE_RB_INSTANCE_VAR;
872 } else if (ch == '$') {
873 // Check for a builtin global
874 styler.ColourTo(i - 1, state);
875 // Recognize it bit by bit
876 state = SCE_RB_GLOBAL;
877 } else if (ch == '/' && preferRE) {
878 // Ambigous operator
879 styler.ColourTo(i - 1, state);
880 state = SCE_RB_REGEX;
881 Quote.New();
882 Quote.Open(ch);
883 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
885 // Recognise the '<<' symbol - either a here document or a binary op
886 styler.ColourTo(i - 1, state);
887 i++;
888 chNext = chNext2;
889 styler.ColourTo(i, SCE_RB_OPERATOR);
891 if (!(strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
892 // It's definitely not a here-doc,
893 // based on Ruby's lexer/parser in the
894 // heredoc_identifier routine.
895 // Nothing else to do.
896 } else if (preferRE) {
897 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
898 state = SCE_RB_HERE_DELIM;
899 HereDoc.State = 0;
901 // else leave it in default state
902 } else {
903 if (sureThisIsNotHeredoc(i - 1, styler)) {
904 // leave state as default
905 // We don't have all the heuristics Perl has for indications
906 // of a here-doc, because '<<' is overloadable and used
907 // for so many other classes.
908 } else {
909 state = SCE_RB_HERE_DELIM;
910 HereDoc.State = 0;
913 preferRE = (state != SCE_RB_HERE_DELIM);
914 } else if (ch == ':') {
915 styler.ColourTo(i - 1, state);
916 if (chNext == ':') {
917 // Mark "::" as an operator, not symbol start
918 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
919 advance_char(i, ch, chNext, chNext2); // pass by ref
920 state = SCE_RB_DEFAULT;
921 preferRE = false;
922 } else if (isSafeWordcharOrHigh(chNext)) {
923 state = SCE_RB_SYMBOL;
924 } else if ((chNext == '@' || chNext == '$') &&
925 isSafeWordcharOrHigh(chNext2)) {
926 // instance and global variable followed by an identifier
927 advance_char(i, ch, chNext, chNext2);
928 state = SCE_RB_SYMBOL;
929 } else if (((chNext == '@' && chNext2 == '@') ||
930 (chNext == '$' && chNext2 == '-')) &&
931 isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) {
932 // class variables and special global variable "$-IDENTCHAR"
933 state = SCE_RB_SYMBOL;
934 // $-IDENTCHAR doesn't continue past the IDENTCHAR
935 if (chNext == '$') {
936 styler.ColourTo(i+3, SCE_RB_SYMBOL);
937 state = SCE_RB_DEFAULT;
939 i += 3;
940 ch = styler.SafeGetCharAt(i);
941 chNext = styler.SafeGetCharAt(i+1);
942 } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
943 // single-character special global variables
944 i += 2;
945 ch = chNext2;
946 chNext = styler.SafeGetCharAt(i+1);
947 styler.ColourTo(i, SCE_RB_SYMBOL);
948 state = SCE_RB_DEFAULT;
949 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
950 // Do the operator analysis in-line, looking ahead
951 // Based on the table in pickaxe 2nd ed., page 339
952 bool doColoring = true;
953 switch (chNext) {
954 case '[':
955 if (chNext2 == ']') {
956 char ch_tmp = styler.SafeGetCharAt(i + 3);
957 if (ch_tmp == '=') {
958 i += 3;
959 ch = ch_tmp;
960 chNext = styler.SafeGetCharAt(i + 1);
961 } else {
962 i += 2;
963 ch = chNext2;
964 chNext = ch_tmp;
966 } else {
967 doColoring = false;
969 break;
971 case '*':
972 if (chNext2 == '*') {
973 i += 2;
974 ch = chNext2;
975 chNext = styler.SafeGetCharAt(i + 1);
976 } else {
977 advance_char(i, ch, chNext, chNext2);
979 break;
981 case '!':
982 if (chNext2 == '=' || chNext2 == '~') {
983 i += 2;
984 ch = chNext2;
985 chNext = styler.SafeGetCharAt(i + 1);
986 } else {
987 advance_char(i, ch, chNext, chNext2);
989 break;
991 case '<':
992 if (chNext2 == '<') {
993 i += 2;
994 ch = chNext2;
995 chNext = styler.SafeGetCharAt(i + 1);
996 } else if (chNext2 == '=') {
997 char ch_tmp = styler.SafeGetCharAt(i + 3);
998 if (ch_tmp == '>') { // <=> operator
999 i += 3;
1000 ch = ch_tmp;
1001 chNext = styler.SafeGetCharAt(i + 1);
1002 } else {
1003 i += 2;
1004 ch = chNext2;
1005 chNext = ch_tmp;
1007 } else {
1008 advance_char(i, ch, chNext, chNext2);
1010 break;
1012 default:
1013 // Simple one-character operators
1014 advance_char(i, ch, chNext, chNext2);
1015 break;
1017 if (doColoring) {
1018 styler.ColourTo(i, SCE_RB_SYMBOL);
1019 state = SCE_RB_DEFAULT;
1021 } else if (!preferRE) {
1022 // Don't color symbol strings (yet)
1023 // Just color the ":" and color rest as string
1024 styler.ColourTo(i, SCE_RB_SYMBOL);
1025 state = SCE_RB_DEFAULT;
1026 } else {
1027 styler.ColourTo(i, SCE_RB_OPERATOR);
1028 state = SCE_RB_DEFAULT;
1029 preferRE = true;
1031 } else if (ch == '%') {
1032 styler.ColourTo(i - 1, state);
1033 bool have_string = false;
1034 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
1035 Quote.New();
1036 const char *hit = strchr(q_chars, chNext);
1037 if (hit != NULL) {
1038 state = q_states[hit - q_chars];
1039 Quote.Open(chNext2);
1040 i += 2;
1041 ch = chNext2;
1042 chNext = styler.SafeGetCharAt(i + 1);
1043 have_string = true;
1045 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1046 // Ruby doesn't allow high bit chars here,
1047 // but the editor host might
1048 Quote.New();
1049 state = SCE_RB_STRING_QQ;
1050 Quote.Open(chNext);
1051 advance_char(i, ch, chNext, chNext2); // pass by ref
1052 have_string = true;
1053 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1054 // Ruby doesn't allow high bit chars here,
1055 // but the editor host might
1056 Quote.New();
1057 state = SCE_RB_STRING_QQ;
1058 Quote.Open(chNext);
1059 advance_char(i, ch, chNext, chNext2); // pass by ref
1060 have_string = true;
1062 if (!have_string) {
1063 styler.ColourTo(i, SCE_RB_OPERATOR);
1064 // stay in default
1065 preferRE = true;
1067 } else if (ch == '?') {
1068 styler.ColourTo(i - 1, state);
1069 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1070 styler.ColourTo(i, SCE_RB_OPERATOR);
1071 } else {
1072 // It's the start of a character code escape sequence
1073 // Color it as a number.
1074 state = SCE_RB_NUMBER;
1075 is_real_number = false;
1077 } else if (isoperator(ch) || ch == '.') {
1078 styler.ColourTo(i - 1, state);
1079 styler.ColourTo(i, SCE_RB_OPERATOR);
1080 // If we're ending an expression or block,
1081 // assume it ends an object, and the ambivalent
1082 // constructs are binary operators
1084 // So if we don't have one of these chars,
1085 // we aren't ending an object exp'n, and ops
1086 // like : << / are unary operators.
1088 if (ch == '{') {
1089 ++brace_counts;
1090 preferRE = true;
1091 } else if (ch == '}' && --brace_counts < 0
1092 && inner_string_count > 0) {
1093 styler.ColourTo(i, SCE_RB_OPERATOR);
1094 exitInnerExpression(inner_string_types,
1095 inner_expn_brace_counts,
1096 inner_quotes,
1097 inner_string_count,
1098 state, brace_counts, Quote);
1099 } else {
1100 preferRE = (strchr(")}].", ch) == NULL);
1102 // Stay in default state
1103 } else if (isEOLChar(ch)) {
1104 // Make sure it's a true line-end, with no backslash
1105 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1106 && chPrev != '\\') {
1107 // Assume we've hit the end of the statement.
1108 preferRE = true;
1111 } else if (state == SCE_RB_WORD) {
1112 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1113 // Words include x? in all contexts,
1114 // and <letters>= after either 'def' or a dot
1115 // Move along until a complete word is on our left
1117 // Default accessor treats '.' as word-chars,
1118 // but we don't for now.
1120 if (ch == '='
1121 && isSafeWordcharOrHigh(chPrev)
1122 && (chNext == '('
1123 || strchr(" \t\n\r", chNext) != NULL)
1124 && (!strcmp(prevWord, "def")
1125 || followsDot(styler.GetStartSegment(), styler))) {
1126 // <name>= is a name only when being def'd -- Get it the next time
1127 // This means that <name>=<name> is always lexed as
1128 // <name>, (op, =), <name>
1129 } else if (ch == ':'
1130 && isSafeWordcharOrHigh(chPrev)
1131 && strchr(" \t\n\r", chNext) != NULL) {
1132 state = SCE_RB_SYMBOL;
1133 } else if ((ch == '?' || ch == '!')
1134 && isSafeWordcharOrHigh(chPrev)
1135 && !isSafeWordcharOrHigh(chNext)) {
1136 // <name>? is a name -- Get it the next time
1137 // But <name>?<name> is always lexed as
1138 // <name>, (op, ?), <name>
1139 // Same with <name>! to indicate a method that
1140 // modifies its target
1141 } else if (isEOLChar(ch)
1142 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1143 styler.ColourTo(i, SCE_RB_DATASECTION);
1144 state = SCE_RB_DATASECTION;
1145 // No need to handle this state -- we'll just move to the end
1146 preferRE = false;
1147 } else {
1148 Sci_Position wordStartPos = styler.GetStartSegment();
1149 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1150 switch (word_style) {
1151 case SCE_RB_WORD:
1152 preferRE = RE_CanFollowKeyword(prevWord);
1153 break;
1155 case SCE_RB_WORD_DEMOTED:
1156 preferRE = true;
1157 break;
1159 case SCE_RB_IDENTIFIER:
1160 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1161 preferRE = true;
1162 } else if (isEOLChar(ch)) {
1163 preferRE = true;
1164 } else {
1165 preferRE = false;
1167 break;
1168 default:
1169 preferRE = false;
1171 if (ch == '.') {
1172 // We might be redefining an operator-method
1173 preferRE = false;
1175 // And if it's the first
1176 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1179 } else if (state == SCE_RB_NUMBER) {
1180 if (!is_real_number) {
1181 if (ch != '\\') {
1182 styler.ColourTo(i, state);
1183 state = SCE_RB_DEFAULT;
1184 preferRE = false;
1185 } else if (strchr("\\ntrfvaebs", chNext)) {
1186 // Terminal escape sequence -- handle it next time
1187 // Nothing more to do this time through the loop
1188 } else if (chNext == 'C' || chNext == 'M') {
1189 if (chNext2 != '-') {
1190 // \C or \M ends the sequence -- handle it next time
1191 } else {
1192 // Move from abc?\C-x
1193 // ^
1194 // to
1195 // ^
1196 i += 2;
1197 ch = chNext2;
1198 chNext = styler.SafeGetCharAt(i + 1);
1200 } else if (chNext == 'c') {
1201 // Stay here, \c is a combining sequence
1202 advance_char(i, ch, chNext, chNext2); // pass by ref
1203 } else {
1204 // ?\x, including ?\\ is final.
1205 styler.ColourTo(i + 1, state);
1206 state = SCE_RB_DEFAULT;
1207 preferRE = false;
1208 advance_char(i, ch, chNext, chNext2);
1210 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1211 // Keep going
1212 } else if (ch == '.' && chNext == '.') {
1213 ++numDots;
1214 styler.ColourTo(i - 1, state);
1215 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1216 } else if (ch == '.' && ++numDots == 1) {
1217 // Keep going
1218 } else {
1219 styler.ColourTo(i - 1, state);
1220 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1221 preferRE = false;
1223 } else if (state == SCE_RB_COMMENTLINE) {
1224 if (isEOLChar(ch)) {
1225 styler.ColourTo(i - 1, state);
1226 state = SCE_RB_DEFAULT;
1227 // Use whatever setting we had going into the comment
1229 } else if (state == SCE_RB_HERE_DELIM) {
1230 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1231 // Slightly different: if we find an immediate '-',
1232 // the target can appear indented.
1234 if (HereDoc.State == 0) { // '<<' encountered
1235 HereDoc.State = 1;
1236 HereDoc.DelimiterLength = 0;
1237 if (ch == '-') {
1238 HereDoc.CanBeIndented = true;
1239 advance_char(i, ch, chNext, chNext2); // pass by ref
1240 } else {
1241 HereDoc.CanBeIndented = false;
1243 if (isEOLChar(ch)) {
1244 // Bail out of doing a here doc if there's no target
1245 state = SCE_RB_DEFAULT;
1246 preferRE = false;
1247 } else {
1248 HereDoc.Quote = ch;
1250 if (ch == '\'' || ch == '"' || ch == '`') {
1251 HereDoc.Quoted = true;
1252 HereDoc.Delimiter[0] = '\0';
1253 } else {
1254 HereDoc.Quoted = false;
1255 HereDoc.Delimiter[0] = ch;
1256 HereDoc.Delimiter[1] = '\0';
1257 HereDoc.DelimiterLength = 1;
1260 } else if (HereDoc.State == 1) { // collect the delimiter
1261 if (isEOLChar(ch)) {
1262 // End the quote now, and go back for more
1263 styler.ColourTo(i - 1, state);
1264 state = SCE_RB_DEFAULT;
1265 i--;
1266 chNext = ch;
1267 preferRE = false;
1268 } else if (HereDoc.Quoted) {
1269 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1270 styler.ColourTo(i, state);
1271 state = SCE_RB_DEFAULT;
1272 preferRE = false;
1273 } else {
1274 if (ch == '\\' && !isEOLChar(chNext)) {
1275 advance_char(i, ch, chNext, chNext2);
1277 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1278 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1280 } else { // an unquoted here-doc delimiter
1281 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1282 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1283 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1284 } else {
1285 styler.ColourTo(i - 1, state);
1286 redo_char(i, ch, chNext, chNext2, state);
1287 preferRE = false;
1290 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1291 styler.ColourTo(i - 1, state);
1292 state = SCE_RB_ERROR;
1293 preferRE = false;
1296 } else if (state == SCE_RB_HERE_Q) {
1297 // Not needed: HereDoc.State == 2
1298 // Indentable here docs: look backwards
1299 // Non-indentable: look forwards, like in Perl
1301 // Why: so we can quickly resolve things like <<-" abc"
1303 if (!HereDoc.CanBeIndented) {
1304 if (isEOLChar(chPrev)
1305 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1306 styler.ColourTo(i - 1, state);
1307 i += HereDoc.DelimiterLength - 1;
1308 chNext = styler.SafeGetCharAt(i + 1);
1309 if (isEOLChar(chNext)) {
1310 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1311 state = SCE_RB_DEFAULT;
1312 HereDoc.State = 0;
1313 preferRE = false;
1315 // Otherwise we skipped through the here doc faster.
1317 } else if (isEOLChar(chNext)
1318 && lookingAtHereDocDelim(styler,
1319 i - HereDoc.DelimiterLength + 1,
1320 lengthDoc,
1321 HereDoc.Delimiter)) {
1322 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1323 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1324 state = SCE_RB_DEFAULT;
1325 preferRE = false;
1326 HereDoc.State = 0;
1328 } else if (state == SCE_RB_CLASS_VAR
1329 || state == SCE_RB_INSTANCE_VAR
1330 || state == SCE_RB_SYMBOL) {
1331 if (state == SCE_RB_SYMBOL &&
1332 // FIDs suffices '?' and '!'
1333 (((ch == '!' || ch == '?') && chNext != '=') ||
1334 // identifier suffix '='
1335 (ch == '=' && (chNext != '~' && chNext != '>' &&
1336 (chNext != '=' || chNext2 == '>'))))) {
1337 styler.ColourTo(i, state);
1338 state = SCE_RB_DEFAULT;
1339 preferRE = false;
1340 } else if (!isSafeWordcharOrHigh(ch)) {
1341 styler.ColourTo(i - 1, state);
1342 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1343 preferRE = false;
1345 } else if (state == SCE_RB_GLOBAL) {
1346 if (!isSafeWordcharOrHigh(ch)) {
1347 // handle special globals here as well
1348 if (chPrev == '$') {
1349 if (ch == '-') {
1350 // Include the next char, like $-a
1351 advance_char(i, ch, chNext, chNext2);
1353 styler.ColourTo(i, state);
1354 state = SCE_RB_DEFAULT;
1355 } else {
1356 styler.ColourTo(i - 1, state);
1357 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1359 preferRE = false;
1361 } else if (state == SCE_RB_POD) {
1362 // PODs end with ^=end\s, -- any whitespace can follow =end
1363 if (strchr(" \t\n\r", ch) != NULL
1364 && i > 5
1365 && isEOLChar(styler[i - 5])
1366 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1367 styler.ColourTo(i - 1, state);
1368 state = SCE_RB_DEFAULT;
1369 preferRE = false;
1371 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1372 if (ch == '\\' && Quote.Up != '\\') {
1373 // Skip one
1374 advance_char(i, ch, chNext, chNext2);
1375 } else if (ch == Quote.Down) {
1376 Quote.Count--;
1377 if (Quote.Count == 0) {
1378 // Include the options
1379 while (isSafeAlpha(chNext)) {
1380 i++;
1381 ch = chNext;
1382 chNext = styler.SafeGetCharAt(i + 1);
1384 styler.ColourTo(i, state);
1385 state = SCE_RB_DEFAULT;
1386 preferRE = false;
1388 } else if (ch == Quote.Up) {
1389 // Only if close quoter != open quoter
1390 Quote.Count++;
1392 } else if (ch == '#') {
1393 if (chNext == '{'
1394 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1395 // process #{ ... }
1396 styler.ColourTo(i - 1, state);
1397 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1398 enterInnerExpression(inner_string_types,
1399 inner_expn_brace_counts,
1400 inner_quotes,
1401 inner_string_count,
1402 state,
1403 brace_counts,
1404 Quote);
1405 preferRE = true;
1406 // Skip one
1407 advance_char(i, ch, chNext, chNext2);
1408 } else {
1409 //todo: distinguish comments from pound chars
1410 // for now, handle as comment
1411 styler.ColourTo(i - 1, state);
1412 bool inEscape = false;
1413 while (++i < lengthDoc) {
1414 ch = styler.SafeGetCharAt(i);
1415 if (ch == '\\') {
1416 inEscape = true;
1417 } else if (isEOLChar(ch)) {
1418 // Comment inside a regex
1419 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1420 break;
1421 } else if (inEscape) {
1422 inEscape = false; // don't look at char
1423 } else if (ch == Quote.Down) {
1424 // Have the regular handler deal with this
1425 // to get trailing modifiers.
1426 i--;
1427 ch = styler[i];
1428 break;
1431 chNext = styler.SafeGetCharAt(i + 1);
1434 // Quotes of all kinds...
1435 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1436 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1437 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1438 state == SCE_RB_BACKTICKS) {
1439 if (!Quote.Down && !isspacechar(ch)) {
1440 Quote.Open(ch);
1441 } else if (ch == '\\' && Quote.Up != '\\') {
1442 //Riddle me this: Is it safe to skip *every* escaped char?
1443 advance_char(i, ch, chNext, chNext2);
1444 } else if (ch == Quote.Down) {
1445 Quote.Count--;
1446 if (Quote.Count == 0) {
1447 styler.ColourTo(i, state);
1448 state = SCE_RB_DEFAULT;
1449 preferRE = false;
1451 } else if (ch == Quote.Up) {
1452 Quote.Count++;
1453 } else if (ch == '#' && chNext == '{'
1454 && inner_string_count < INNER_STRINGS_MAX_COUNT
1455 && state != SCE_RB_CHARACTER
1456 && state != SCE_RB_STRING_Q) {
1457 // process #{ ... }
1458 styler.ColourTo(i - 1, state);
1459 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1460 enterInnerExpression(inner_string_types,
1461 inner_expn_brace_counts,
1462 inner_quotes,
1463 inner_string_count,
1464 state,
1465 brace_counts,
1466 Quote);
1467 preferRE = true;
1468 // Skip one
1469 advance_char(i, ch, chNext, chNext2);
1473 if (state == SCE_RB_ERROR) {
1474 break;
1476 chPrev = ch;
1478 if (state == SCE_RB_WORD) {
1479 // We've ended on a word, possibly at EOF, and need to
1480 // classify it.
1481 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1482 } else {
1483 styler.ColourTo(lengthDoc - 1, state);
1487 // Helper functions for folding, disambiguation keywords
1488 // Assert that there are no high-bit chars
1490 static void getPrevWord(Sci_Position pos,
1491 char *prevWord,
1492 Accessor &styler,
1493 int word_state)
1495 Sci_Position i;
1496 styler.Flush();
1497 for (i = pos - 1; i > 0; i--) {
1498 if (actual_style(styler.StyleAt(i)) != word_state) {
1499 i++;
1500 break;
1503 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1504 i = pos - MAX_KEYWORD_LENGTH;
1505 char *dst = prevWord;
1506 for (; i <= pos; i++) {
1507 *dst++ = styler[i];
1509 *dst = 0;
1512 static bool keywordIsAmbiguous(const char *prevWord)
1514 // Order from most likely used to least likely
1515 // Lots of ways to do a loop in Ruby besides 'while/until'
1516 if (!strcmp(prevWord, "if")
1517 || !strcmp(prevWord, "do")
1518 || !strcmp(prevWord, "while")
1519 || !strcmp(prevWord, "unless")
1520 || !strcmp(prevWord, "until")
1521 || !strcmp(prevWord, "for")) {
1522 return true;
1523 } else {
1524 return false;
1528 // Demote keywords in the following conditions:
1529 // if, while, unless, until modify a statement
1530 // do after a while or until, as a noise word (like then after if)
1532 static bool keywordIsModifier(const char *word,
1533 Sci_Position pos,
1534 Accessor &styler)
1536 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1537 return keywordDoStartsLoop(pos, styler);
1539 char ch, chPrev, chPrev2;
1540 int style = SCE_RB_DEFAULT;
1541 Sci_Position lineStart = styler.GetLine(pos);
1542 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1543 // We want to step backwards until we don't care about the current
1544 // position. But first move lineStartPosn back behind any
1545 // continuations immediately above word.
1546 while (lineStartPosn > 0) {
1547 ch = styler[lineStartPosn-1];
1548 if (ch == '\n' || ch == '\r') {
1549 chPrev = styler.SafeGetCharAt(lineStartPosn-2);
1550 chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1551 lineStart = styler.GetLine(lineStartPosn-1);
1552 // If we find a continuation line, include it in our analysis.
1553 if (chPrev == '\\') {
1554 lineStartPosn = styler.LineStart(lineStart);
1555 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1556 lineStartPosn = styler.LineStart(lineStart);
1557 } else {
1558 break;
1560 } else {
1561 break;
1565 styler.Flush();
1566 while (--pos >= lineStartPosn) {
1567 style = actual_style(styler.StyleAt(pos));
1568 if (style == SCE_RB_DEFAULT) {
1569 if (iswhitespace(ch = styler[pos])) {
1570 //continue
1571 } else if (ch == '\r' || ch == '\n') {
1572 // Scintilla's LineStart() and GetLine() routines aren't
1573 // platform-independent, so if we have text prepared with
1574 // a different system we can't rely on it.
1576 // Also, lineStartPosn may have been moved to more than one
1577 // line above word's line while pushing past continuations.
1578 chPrev = styler.SafeGetCharAt(pos - 1);
1579 chPrev2 = styler.SafeGetCharAt(pos - 2);
1580 if (chPrev == '\\') {
1581 pos-=1; // gloss over the "\\"
1582 //continue
1583 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1584 pos-=2; // gloss over the "\\\r"
1585 //continue
1586 } else {
1587 return false;
1590 } else {
1591 break;
1594 if (pos < lineStartPosn) {
1595 return false;
1597 // First things where the action is unambiguous
1598 switch (style) {
1599 case SCE_RB_DEFAULT:
1600 case SCE_RB_COMMENTLINE:
1601 case SCE_RB_POD:
1602 case SCE_RB_CLASSNAME:
1603 case SCE_RB_DEFNAME:
1604 case SCE_RB_MODULE_NAME:
1605 return false;
1606 case SCE_RB_OPERATOR:
1607 break;
1608 case SCE_RB_WORD:
1609 // Watch out for uses of 'else if'
1610 //XXX: Make a list of other keywords where 'if' isn't a modifier
1611 // and can appear legitimately
1612 // Formulate this to avoid warnings from most compilers
1613 if (strcmp(word, "if") == 0) {
1614 char prevWord[MAX_KEYWORD_LENGTH + 1];
1615 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1616 return strcmp(prevWord, "else") != 0;
1618 return true;
1619 default:
1620 return true;
1622 // Assume that if the keyword follows an operator,
1623 // usually it's a block assignment, like
1624 // a << if x then y else z
1626 ch = styler[pos];
1627 switch (ch) {
1628 case ')':
1629 case ']':
1630 case '}':
1631 return true;
1632 default:
1633 return false;
1637 #define WHILE_BACKWARDS "elihw"
1638 #define UNTIL_BACKWARDS "litnu"
1639 #define FOR_BACKWARDS "rof"
1641 // Nothing fancy -- look to see if we follow a while/until somewhere
1642 // on the current line
1644 static bool keywordDoStartsLoop(Sci_Position pos,
1645 Accessor &styler)
1647 char ch;
1648 int style;
1649 Sci_Position lineStart = styler.GetLine(pos);
1650 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1651 styler.Flush();
1652 while (--pos >= lineStartPosn) {
1653 style = actual_style(styler.StyleAt(pos));
1654 if (style == SCE_RB_DEFAULT) {
1655 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1656 // Scintilla's LineStart() and GetLine() routines aren't
1657 // platform-independent, so if we have text prepared with
1658 // a different system we can't rely on it.
1659 return false;
1661 } else if (style == SCE_RB_WORD) {
1662 // Check for while or until, but write the word in backwards
1663 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1664 char *dst = prevWord;
1665 int wordLen = 0;
1666 Sci_Position start_word;
1667 for (start_word = pos;
1668 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1669 start_word--) {
1670 if (++wordLen < MAX_KEYWORD_LENGTH) {
1671 *dst++ = styler[start_word];
1674 *dst = 0;
1675 // Did we see our keyword?
1676 if (!strcmp(prevWord, WHILE_BACKWARDS)
1677 || !strcmp(prevWord, UNTIL_BACKWARDS)
1678 || !strcmp(prevWord, FOR_BACKWARDS)) {
1679 return true;
1681 // We can move pos to the beginning of the keyword, and then
1682 // accept another decrement, as we can never have two contiguous
1683 // keywords:
1684 // word1 word2
1685 // ^
1686 // <- move to start_word
1687 // ^
1688 // <- loop decrement
1689 // ^ # pointing to end of word1 is fine
1690 pos = start_word;
1693 return false;
1696 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
1697 Sci_Position pos = styler.LineStart(line);
1698 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
1699 for (Sci_Position i = pos; i < eol_pos; i++) {
1700 char ch = styler[i];
1701 if (ch == '#')
1702 return true;
1703 else if (ch != ' ' && ch != '\t')
1704 return false;
1706 return false;
1710 * Folding Ruby
1712 * The language is quite complex to analyze without a full parse.
1713 * For example, this line shouldn't affect fold level:
1715 * print "hello" if feeling_friendly?
1717 * Neither should this:
1719 * print "hello" \
1720 * if feeling_friendly?
1723 * But this should:
1725 * if feeling_friendly? #++
1726 * print "hello" \
1727 * print "goodbye"
1728 * end #--
1730 * So we cheat, by actually looking at the existing indentation
1731 * levels for each line, and just echoing it back. Like Python.
1732 * Then if we get better at it, we'll take braces into consideration,
1733 * which always affect folding levels.
1735 * How the keywords should work:
1736 * No effect:
1737 * __FILE__ __LINE__ BEGIN END alias and
1738 * defined? false in nil not or self super then
1739 * true undef
1741 * Always increment:
1742 * begin class def do for module when {
1744 * Always decrement:
1745 * end }
1747 * Increment if these start a statement
1748 * if unless until while -- do nothing if they're modifiers
1750 * These end a block if there's no modifier, but don't bother
1751 * break next redo retry return yield
1753 * These temporarily de-indent, but re-indent
1754 * case else elsif ensure rescue
1756 * This means that the folder reflects indentation rather
1757 * than setting it. The language-service updates indentation
1758 * when users type return and finishes entering de-denters.
1760 * Later offer to fold POD, here-docs, strings, and blocks of comments
1763 static void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
1764 WordList *[], Accessor &styler) {
1765 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1766 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1768 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1769 false);
1770 Sci_PositionU endPos = startPos + length;
1771 int visibleChars = 0;
1772 Sci_Position lineCurrent = styler.GetLine(startPos);
1773 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1774 & SC_FOLDLEVELNUMBERMASK
1775 & ~SC_FOLDLEVELBASE);
1776 int levelCurrent = levelPrev;
1777 char chNext = styler[startPos];
1778 int styleNext = styler.StyleAt(startPos);
1779 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1780 bool buffer_ends_with_eol = false;
1781 for (Sci_PositionU i = startPos; i < endPos; i++) {
1782 char ch = chNext;
1783 chNext = styler.SafeGetCharAt(i + 1);
1784 int style = styleNext;
1785 styleNext = styler.StyleAt(i + 1);
1786 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1788 /*Mutiline comment patch*/
1789 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1790 if (!IsCommentLine(lineCurrent - 1, styler)
1791 && IsCommentLine(lineCurrent + 1, styler))
1792 levelCurrent++;
1793 else if (IsCommentLine(lineCurrent - 1, styler)
1794 && !IsCommentLine(lineCurrent + 1, styler))
1795 levelCurrent--;
1798 if (style == SCE_RB_COMMENTLINE) {
1799 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1800 if (chNext == '{') {
1801 levelCurrent++;
1802 } else if (chNext == '}' && levelCurrent > 0) {
1803 levelCurrent--;
1806 } else if (style == SCE_RB_OPERATOR) {
1807 if (strchr("[{(", ch)) {
1808 levelCurrent++;
1809 } else if (strchr(")}]", ch)) {
1810 // Don't decrement below 0
1811 if (levelCurrent > 0)
1812 levelCurrent--;
1814 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1815 // Look at the keyword on the left and decide what to do
1816 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1817 prevWord[0] = 0;
1818 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1819 if (!strcmp(prevWord, "end")) {
1820 // Don't decrement below 0
1821 if (levelCurrent > 0)
1822 levelCurrent--;
1823 } else if (!strcmp(prevWord, "if")
1824 || !strcmp(prevWord, "def")
1825 || !strcmp(prevWord, "class")
1826 || !strcmp(prevWord, "module")
1827 || !strcmp(prevWord, "begin")
1828 || !strcmp(prevWord, "case")
1829 || !strcmp(prevWord, "do")
1830 || !strcmp(prevWord, "while")
1831 || !strcmp(prevWord, "unless")
1832 || !strcmp(prevWord, "until")
1833 || !strcmp(prevWord, "for")
1835 levelCurrent++;
1837 } else if (style == SCE_RB_HERE_DELIM) {
1838 if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1839 levelCurrent++;
1840 } else if (styleNext == SCE_RB_DEFAULT) {
1841 levelCurrent--;
1844 if (atEOL) {
1845 int lev = levelPrev;
1846 if (visibleChars == 0 && foldCompact)
1847 lev |= SC_FOLDLEVELWHITEFLAG;
1848 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1849 lev |= SC_FOLDLEVELHEADERFLAG;
1850 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1851 lineCurrent++;
1852 levelPrev = levelCurrent;
1853 visibleChars = 0;
1854 buffer_ends_with_eol = true;
1855 } else if (!isspacechar(ch)) {
1856 visibleChars++;
1857 buffer_ends_with_eol = false;
1859 stylePrev = style;
1861 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1862 if (!buffer_ends_with_eol) {
1863 lineCurrent++;
1864 int new_lev = levelCurrent;
1865 if (visibleChars == 0 && foldCompact)
1866 new_lev |= SC_FOLDLEVELWHITEFLAG;
1867 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1868 new_lev |= SC_FOLDLEVELHEADERFLAG;
1869 levelCurrent = new_lev;
1871 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1874 static const char *const rubyWordListDesc[] = {
1875 "Keywords",
1879 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);