Add xml_indent_tags filetype setting for documents using the
[geany-mirror.git] / scintilla / LexRuby.cxx
blob8d6dc90b8f6e67ce0fe2b347116308bccfdeeb07
1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
14 #include "Platform.h"
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
22 #ifdef SCI_NAMESPACE
23 using namespace Scintilla;
24 #endif
26 //XXX Identical to Perl, put in common area
27 static inline bool isEOLChar(char ch) {
28 return (ch == '\r') || (ch == '\n');
31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
32 // This one's redundant, but makes for more readable code
33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
35 static inline bool isSafeAlpha(char ch) {
36 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
39 static inline bool isSafeAlnum(char ch) {
40 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
43 static inline bool isSafeAlnumOrHigh(char ch) {
44 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
47 static inline bool isSafeDigit(char ch) {
48 return isSafeASCII(ch) && isdigit(ch);
51 static inline bool isSafeWordcharOrHigh(char ch) {
52 // Error: scintilla's KeyWords.h includes '.' as a word-char
53 // we want to separate things that can take methods from the
54 // methods.
55 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
58 static bool inline iswhitespace(char ch) {
59 return ch == ' ' || ch == '\t';
62 #define MAX_KEYWORD_LENGTH 200
64 #define STYLE_MASK 63
65 #define actual_style(style) (style & STYLE_MASK)
67 static bool followsDot(unsigned int pos, Accessor &styler) {
68 styler.Flush();
69 for (; pos >= 1; --pos) {
70 int style = actual_style(styler.StyleAt(pos));
71 char ch;
72 switch (style) {
73 case SCE_RB_DEFAULT:
74 ch = styler[pos];
75 if (ch == ' ' || ch == '\t') {
76 //continue
77 } else {
78 return false;
80 break;
82 case SCE_RB_OPERATOR:
83 return styler[pos] == '.';
85 default:
86 return false;
89 return false;
92 // Forward declarations
93 static bool keywordIsAmbiguous(const char *prevWord);
94 static bool keywordDoStartsLoop(int pos,
95 Accessor &styler);
96 static bool keywordIsModifier(const char *word,
97 int pos,
98 Accessor &styler);
100 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
101 char s[MAX_KEYWORD_LENGTH];
102 unsigned int i, j;
103 unsigned int lim = end - start + 1; // num chars to copy
104 if (lim >= MAX_KEYWORD_LENGTH) {
105 lim = MAX_KEYWORD_LENGTH - 1;
107 for (i = start, j = 0; j < lim; i++, j++) {
108 s[j] = styler[i];
110 s[j] = '\0';
111 int chAttr;
112 if (0 == strcmp(prevWord, "class"))
113 chAttr = SCE_RB_CLASSNAME;
114 else if (0 == strcmp(prevWord, "module"))
115 chAttr = SCE_RB_MODULE_NAME;
116 else if (0 == strcmp(prevWord, "def"))
117 chAttr = SCE_RB_DEFNAME;
118 else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
119 if (keywordIsAmbiguous(s)
120 && keywordIsModifier(s, start, styler)) {
122 // Demoted keywords are colored as keywords,
123 // but do not affect changes in indentation.
125 // Consider the word 'if':
126 // 1. <<if test ...>> : normal
127 // 2. <<stmt if test>> : demoted
128 // 3. <<lhs = if ...>> : normal: start a new indent level
129 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
131 chAttr = SCE_RB_WORD_DEMOTED;
132 } else {
133 chAttr = SCE_RB_WORD;
135 } else
136 chAttr = SCE_RB_IDENTIFIER;
137 styler.ColourTo(end, chAttr);
138 if (chAttr == SCE_RB_WORD) {
139 strcpy(prevWord, s);
140 } else {
141 prevWord[0] = 0;
143 return chAttr;
147 //XXX Identical to Perl, put in common area
148 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
149 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
150 return false;
152 while (*val) {
153 if (*val != styler[pos++]) {
154 return false;
156 val++;
158 return true;
161 // Do Ruby better -- find the end of the line, work back,
162 // and then check for leading white space
164 // Precondition: the here-doc target can be indented
165 static bool lookingAtHereDocDelim(Accessor &styler,
166 int pos,
167 int lengthDoc,
168 const char *HereDocDelim)
170 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
171 return false;
173 while (--pos > 0) {
174 char ch = styler[pos];
175 if (isEOLChar(ch)) {
176 return true;
177 } else if (ch != ' ' && ch != '\t') {
178 return false;
181 return false;
184 //XXX Identical to Perl, put in common area
185 static char opposite(char ch) {
186 if (ch == '(')
187 return ')';
188 if (ch == '[')
189 return ']';
190 if (ch == '{')
191 return '}';
192 if (ch == '<')
193 return '>';
194 return ch;
197 // Null transitions when we see we've reached the end
198 // and need to relex the curr char.
200 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
201 int &state) {
202 i--;
203 chNext2 = chNext;
204 chNext = ch;
205 state = SCE_RB_DEFAULT;
208 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
209 i++;
210 ch = chNext;
211 chNext = chNext2;
214 // precondition: startPos points to one after the EOL char
215 static bool currLineContainsHereDelims(int& startPos,
216 Accessor &styler) {
217 if (startPos <= 1)
218 return false;
220 int pos;
221 for (pos = startPos - 1; pos > 0; pos--) {
222 char ch = styler.SafeGetCharAt(pos);
223 if (isEOLChar(ch)) {
224 // Leave the pointers where they are -- there are no
225 // here doc delims on the current line, even if
226 // the EOL isn't default style
228 return false;
229 } else {
230 styler.Flush();
231 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
232 break;
236 if (pos == 0) {
237 return false;
239 // Update the pointers so we don't have to re-analyze the string
240 startPos = pos;
241 return true;
244 // This class is used by the enter and exit methods, so it needs
245 // to be hoisted out of the function.
247 class QuoteCls {
248 public:
249 int Count;
250 char Up;
251 char Down;
252 QuoteCls() {
253 this->New();
255 void New() {
256 Count = 0;
257 Up = '\0';
258 Down = '\0';
260 void Open(char u) {
261 Count++;
262 Up = u;
263 Down = opposite(Up);
265 QuoteCls(const QuoteCls& q) {
266 // copy constructor -- use this for copying in
267 Count = q.Count;
268 Up = q.Up;
269 Down = q.Down;
271 QuoteCls& operator=(const QuoteCls& q) { // assignment constructor
272 if (this != &q) {
273 Count = q.Count;
274 Up = q.Up;
275 Down = q.Down;
277 return *this;
283 static void enterInnerExpression(int *p_inner_string_types,
284 int *p_inner_expn_brace_counts,
285 QuoteCls *p_inner_quotes,
286 int& inner_string_count,
287 int& state,
288 int& brace_counts,
289 QuoteCls curr_quote
291 p_inner_string_types[inner_string_count] = state;
292 state = SCE_RB_DEFAULT;
293 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
294 brace_counts = 0;
295 p_inner_quotes[inner_string_count] = curr_quote;
296 ++inner_string_count;
299 static void exitInnerExpression(int *p_inner_string_types,
300 int *p_inner_expn_brace_counts,
301 QuoteCls *p_inner_quotes,
302 int& inner_string_count,
303 int& state,
304 int& brace_counts,
305 QuoteCls& curr_quote
307 --inner_string_count;
308 state = p_inner_string_types[inner_string_count];
309 brace_counts = p_inner_expn_brace_counts[inner_string_count];
310 curr_quote = p_inner_quotes[inner_string_count];
313 static bool isEmptyLine(int pos,
314 Accessor &styler) {
315 int spaceFlags = 0;
316 int lineCurrent = styler.GetLine(pos);
317 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
318 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
321 static bool RE_CanFollowKeyword(const char *keyword) {
322 if (!strcmp(keyword, "and")
323 || !strcmp(keyword, "begin")
324 || !strcmp(keyword, "break")
325 || !strcmp(keyword, "case")
326 || !strcmp(keyword, "do")
327 || !strcmp(keyword, "else")
328 || !strcmp(keyword, "elsif")
329 || !strcmp(keyword, "if")
330 || !strcmp(keyword, "next")
331 || !strcmp(keyword, "return")
332 || !strcmp(keyword, "when")
333 || !strcmp(keyword, "unless")
334 || !strcmp(keyword, "until")
335 || !strcmp(keyword, "not")
336 || !strcmp(keyword, "or")) {
337 return true;
339 return false;
342 // Look at chars up to but not including endPos
343 // Don't look at styles in case we're looking forward
345 static int skipWhitespace(int startPos,
346 int endPos,
347 Accessor &styler) {
348 for (int i = startPos; i < endPos; i++) {
349 if (!iswhitespace(styler[i])) {
350 return i;
353 return endPos;
356 // This routine looks for false positives like
357 // undef foo, <<
358 // There aren't too many.
360 // iPrev points to the start of <<
362 static bool sureThisIsHeredoc(int iPrev,
363 Accessor &styler,
364 char *prevWord) {
366 // Not so fast, since Ruby's so dynamic. Check the context
367 // to make sure we're OK.
368 int prevStyle;
369 int lineStart = styler.GetLine(iPrev);
370 int lineStartPosn = styler.LineStart(lineStart);
371 styler.Flush();
373 // Find the first word after some whitespace
374 int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
375 if (firstWordPosn >= iPrev) {
376 // Have something like {^ <<}
377 //XXX Look at the first previous non-comment non-white line
378 // to establish the context. Not too likely though.
379 return true;
380 } else {
381 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
382 case SCE_RB_WORD:
383 case SCE_RB_WORD_DEMOTED:
384 case SCE_RB_IDENTIFIER:
385 break;
386 default:
387 return true;
390 int firstWordEndPosn = firstWordPosn;
391 char *dst = prevWord;
392 for (;;) {
393 if (firstWordEndPosn >= iPrev ||
394 styler.StyleAt(firstWordEndPosn) != prevStyle) {
395 *dst = 0;
396 break;
398 *dst++ = styler[firstWordEndPosn];
399 firstWordEndPosn += 1;
401 //XXX Write a style-aware thing to regex scintilla buffer objects
402 if (!strcmp(prevWord, "undef")
403 || !strcmp(prevWord, "def")
404 || !strcmp(prevWord, "alias")) {
405 // These keywords are what we were looking for
406 return false;
408 return true;
411 // Routine that saves us from allocating a buffer for the here-doc target
412 // targetEndPos points one past the end of the current target
413 static bool haveTargetMatch(int currPos,
414 int lengthDoc,
415 int targetStartPos,
416 int targetEndPos,
417 Accessor &styler) {
418 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
419 return false;
421 int i, j;
422 for (i = targetStartPos, j = currPos;
423 i < targetEndPos && j < lengthDoc;
424 i++, j++) {
425 if (styler[i] != styler[j]) {
426 return false;
429 return true;
432 // We need a check because the form
433 // [identifier] <<[target]
434 // is ambiguous. The Ruby lexer/parser resolves it by
435 // looking to see if [identifier] names a variable or a
436 // function. If it's the first, it's the start of a here-doc.
437 // If it's a var, it's an operator. This lexer doesn't
438 // maintain a symbol table, so it looks ahead to see what's
439 // going on, in cases where we have
440 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
442 // If there's no occurrence of [target] on a line, assume we don't.
444 // return true == yes, we have no heredocs
446 static bool sureThisIsNotHeredoc(int lt2StartPos,
447 Accessor &styler) {
448 int prevStyle;
449 // Use full document, not just part we're styling
450 int lengthDoc = styler.Length();
451 int lineStart = styler.GetLine(lt2StartPos);
452 int lineStartPosn = styler.LineStart(lineStart);
453 styler.Flush();
454 const bool definitely_not_a_here_doc = true;
455 const bool looks_like_a_here_doc = false;
457 // Find the first word after some whitespace
458 int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
459 if (firstWordPosn >= lt2StartPos) {
460 return definitely_not_a_here_doc;
462 prevStyle = styler.StyleAt(firstWordPosn);
463 // If we have '<<' following a keyword, it's not a heredoc
464 if (prevStyle != SCE_RB_IDENTIFIER) {
465 return definitely_not_a_here_doc;
467 int newStyle = prevStyle;
468 // Some compilers incorrectly warn about uninit newStyle
469 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
470 // Inner loop looks at the name
471 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
472 newStyle = styler.StyleAt(firstWordPosn);
473 if (newStyle != prevStyle) {
474 break;
477 // Do we have '::' or '.'?
478 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
479 char ch = styler[firstWordPosn];
480 if (ch == '.') {
481 // yes
482 } else if (ch == ':') {
483 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
484 return definitely_not_a_here_doc;
485 } else if (styler[firstWordPosn] != ':') {
486 return definitely_not_a_here_doc;
488 } else {
489 break;
491 } else {
492 break;
495 // Skip next batch of white-space
496 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
497 if (firstWordPosn != lt2StartPos) {
498 // Have [[^ws[identifier]ws[*something_else*]ws<<
499 return definitely_not_a_here_doc;
501 // OK, now 'j' will point to the current spot moving ahead
502 int j = firstWordPosn + 1;
503 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
504 // This shouldn't happen
505 return definitely_not_a_here_doc;
507 int nextLineStartPosn = styler.LineStart(lineStart + 1);
508 if (nextLineStartPosn >= lengthDoc) {
509 return definitely_not_a_here_doc;
511 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
512 if (j >= lengthDoc) {
513 return definitely_not_a_here_doc;
515 bool allow_indent;
516 int target_start, target_end;
517 // From this point on no more styling, since we're looking ahead
518 if (styler[j] == '-') {
519 allow_indent = true;
520 j++;
521 } else {
522 allow_indent = false;
525 // Allow for quoted targets.
526 char target_quote = 0;
527 switch (styler[j]) {
528 case '\'':
529 case '"':
530 case '`':
531 target_quote = styler[j];
532 j += 1;
535 if (isSafeAlnum(styler[j])) {
536 // Init target_end because some compilers think it won't
537 // be initialized by the time it's used
538 target_start = target_end = j;
539 j++;
540 } else {
541 return definitely_not_a_here_doc;
543 for (; j < lengthDoc; j++) {
544 if (!isSafeAlnum(styler[j])) {
545 if (target_quote && styler[j] != target_quote) {
546 // unquoted end
547 return definitely_not_a_here_doc;
550 // And for now make sure that it's a newline
551 // don't handle arbitrary expressions yet
553 target_end = j;
554 if (target_quote) {
555 // Now we can move to the character after the string delimiter.
556 j += 1;
558 j = skipWhitespace(j, lengthDoc, styler);
559 if (j >= lengthDoc) {
560 return definitely_not_a_here_doc;
561 } else {
562 char ch = styler[j];
563 if (ch == '#' || isEOLChar(ch)) {
564 // This is OK, so break and continue;
565 break;
566 } else {
567 return definitely_not_a_here_doc;
573 // Just look at the start of each line
574 int last_line = styler.GetLine(lengthDoc - 1);
575 // But don't go too far
576 if (last_line > lineStart + 50) {
577 last_line = lineStart + 50;
579 for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
580 if (allow_indent) {
581 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
582 } else {
583 j = styler.LineStart(line_num);
585 // target_end is one past the end
586 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
587 // We got it
588 return looks_like_a_here_doc;
591 return definitely_not_a_here_doc;
594 //todo: if we aren't looking at a stdio character,
595 // move to the start of the first line that is not in a
596 // multi-line construct
598 static void synchronizeDocStart(unsigned int& startPos,
599 int &length,
600 int &initStyle,
601 Accessor &styler,
602 bool skipWhiteSpace=false) {
604 styler.Flush();
605 int style = actual_style(styler.StyleAt(startPos));
606 switch (style) {
607 case SCE_RB_STDIN:
608 case SCE_RB_STDOUT:
609 case SCE_RB_STDERR:
610 // Don't do anything else with these.
611 return;
614 int pos = startPos;
615 // Quick way to characterize each line
616 int lineStart;
617 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
618 // Now look at the style before the previous line's EOL
619 pos = styler.LineStart(lineStart) - 1;
620 if (pos <= 10) {
621 lineStart = 0;
622 break;
624 char ch = styler.SafeGetCharAt(pos);
625 char chPrev = styler.SafeGetCharAt(pos - 1);
626 if (ch == '\n' && chPrev == '\r') {
627 pos--;
629 if (styler.SafeGetCharAt(pos - 1) == '\\') {
630 // Continuation line -- keep going
631 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
632 // Part of multi-line construct -- keep going
633 } else if (currLineContainsHereDelims(pos, styler)) {
634 // Keep going, with pos and length now pointing
635 // at the end of the here-doc delimiter
636 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
637 // Keep going
638 } else {
639 break;
642 pos = styler.LineStart(lineStart);
643 length += (startPos - pos);
644 startPos = pos;
645 initStyle = SCE_RB_DEFAULT;
648 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
649 WordList *keywordlists[], Accessor &styler) {
651 // Lexer for Ruby often has to backtrack to start of current style to determine
652 // which characters are being used as quotes, how deeply nested is the
653 // start position and what the termination string is for here documents
655 WordList &keywords = *keywordlists[0];
657 class HereDocCls {
658 public:
659 int State;
660 // States
661 // 0: '<<' encountered
662 // 1: collect the delimiter
663 // 1b: text between the end of the delimiter and the EOL
664 // 2: here doc text (lines after the delimiter)
665 char Quote; // the char after '<<'
666 bool Quoted; // true if Quote in ('\'','"','`')
667 int DelimiterLength; // strlen(Delimiter)
668 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
669 bool CanBeIndented;
670 HereDocCls() {
671 State = 0;
672 DelimiterLength = 0;
673 Delimiter[0] = '\0';
674 CanBeIndented = false;
677 HereDocCls HereDoc;
679 QuoteCls Quote;
681 int numDots = 0; // For numbers --
682 // Don't start lexing in the middle of a num
684 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
685 false);
687 bool preferRE = true;
688 int state = initStyle;
689 int lengthDoc = startPos + length;
691 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
692 prevWord[0] = '\0';
693 if (length == 0)
694 return;
696 char chPrev = styler.SafeGetCharAt(startPos - 1);
697 char chNext = styler.SafeGetCharAt(startPos);
698 bool is_real_number = true; // Differentiate between constants and ?-sequences.
699 // Ruby uses a different mask because bad indentation is marked by oring with 32
700 styler.StartAt(startPos, 127);
701 styler.StartSegment(startPos);
703 static int q_states[] = {SCE_RB_STRING_Q,
704 SCE_RB_STRING_QQ,
705 SCE_RB_STRING_QR,
706 SCE_RB_STRING_QW,
707 SCE_RB_STRING_QW,
708 SCE_RB_STRING_QX};
709 static const char* q_chars = "qQrwWx";
711 // In most cases a value of 2 should be ample for the code in the
712 // Ruby library, and the code the user is likely to enter.
713 // For example,
714 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
715 // if options[:verbose]
716 // from fileutils.rb nests to a level of 2
717 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
718 // string (including regex'es, %Q, %<sym>, %w, and other strings
719 // that interpolate), it will stay as a string. The problem with this
720 // is that quotes might flip, a 7th '#{' will look like a comment,
721 // and code-folding might be wrong.
723 // If anyone runs into this problem, I recommend raising this
724 // value slightly higher to replacing the fixed array with a linked
725 // list. Keep in mind this code will be called everytime the lexer
726 // is invoked.
728 #define INNER_STRINGS_MAX_COUNT 5
729 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
730 int inner_string_types[INNER_STRINGS_MAX_COUNT];
731 // Track # braces when we push a new #{ thing
732 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
733 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
734 int inner_string_count = 0;
735 int brace_counts = 0; // Number of #{ ... } things within an expression
737 int i;
738 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
739 inner_string_types[i] = 0;
740 inner_expn_brace_counts[i] = 0;
742 for (i = startPos; i < lengthDoc; i++) {
743 char ch = chNext;
744 chNext = styler.SafeGetCharAt(i + 1);
745 char chNext2 = styler.SafeGetCharAt(i + 2);
747 if (styler.IsLeadByte(ch)) {
748 chNext = chNext2;
749 chPrev = ' ';
750 i += 1;
751 continue;
754 // skip on DOS/Windows
755 //No, don't, because some things will get tagged on,
756 // so we won't recognize keywords, for example
757 #if 0
758 if (ch == '\r' && chNext == '\n') {
759 continue;
761 #endif
763 if (HereDoc.State == 1 && isEOLChar(ch)) {
764 // Begin of here-doc (the line after the here-doc delimiter):
765 HereDoc.State = 2;
766 styler.ColourTo(i-1, state);
767 // Don't check for a missing quote, just jump into
768 // the here-doc state
769 state = SCE_RB_HERE_Q;
772 // Regular transitions
773 if (state == SCE_RB_DEFAULT) {
774 if (isSafeDigit(ch)) {
775 styler.ColourTo(i - 1, state);
776 state = SCE_RB_NUMBER;
777 is_real_number = true;
778 numDots = 0;
779 } else if (isHighBitChar(ch) || iswordstart(ch)) {
780 styler.ColourTo(i - 1, state);
781 state = SCE_RB_WORD;
782 } else if (ch == '#') {
783 styler.ColourTo(i - 1, state);
784 state = SCE_RB_COMMENTLINE;
785 } else if (ch == '=') {
786 // =begin indicates the start of a comment (doc) block
787 if (i == 0 || (isEOLChar(chPrev)
788 && chNext == 'b'
789 && styler.SafeGetCharAt(i + 2) == 'e'
790 && styler.SafeGetCharAt(i + 3) == 'g'
791 && styler.SafeGetCharAt(i + 4) == 'i'
792 && styler.SafeGetCharAt(i + 5) == 'n'
793 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6)))) {
794 styler.ColourTo(i - 1, state);
795 state = SCE_RB_POD;
796 } else {
797 styler.ColourTo(i - 1, state);
798 styler.ColourTo(i, SCE_RB_OPERATOR);
799 preferRE = true;
801 } else if (ch == '"') {
802 styler.ColourTo(i - 1, state);
803 state = SCE_RB_STRING;
804 Quote.New();
805 Quote.Open(ch);
806 } else if (ch == '\'') {
807 styler.ColourTo(i - 1, state);
808 state = SCE_RB_CHARACTER;
809 Quote.New();
810 Quote.Open(ch);
811 } else if (ch == '`') {
812 styler.ColourTo(i - 1, state);
813 state = SCE_RB_BACKTICKS;
814 Quote.New();
815 Quote.Open(ch);
816 } else if (ch == '@') {
817 // Instance or class var
818 styler.ColourTo(i - 1, state);
819 if (chNext == '@') {
820 state = SCE_RB_CLASS_VAR;
821 advance_char(i, ch, chNext, chNext2); // pass by ref
822 } else {
823 state = SCE_RB_INSTANCE_VAR;
825 } else if (ch == '$') {
826 // Check for a builtin global
827 styler.ColourTo(i - 1, state);
828 // Recognize it bit by bit
829 state = SCE_RB_GLOBAL;
830 } else if (ch == '/' && preferRE) {
831 // Ambigous operator
832 styler.ColourTo(i - 1, state);
833 state = SCE_RB_REGEX;
834 Quote.New();
835 Quote.Open(ch);
836 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
838 // Recognise the '<<' symbol - either a here document or a binary op
839 styler.ColourTo(i - 1, state);
840 i++;
841 chNext = chNext2;
842 styler.ColourTo(i, SCE_RB_OPERATOR);
844 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
845 // It's definitely not a here-doc,
846 // based on Ruby's lexer/parser in the
847 // heredoc_identifier routine.
848 // Nothing else to do.
849 } else if (preferRE) {
850 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
851 state = SCE_RB_HERE_DELIM;
852 HereDoc.State = 0;
854 // else leave it in default state
855 } else {
856 if (sureThisIsNotHeredoc(i - 1, styler)) {
857 // leave state as default
858 // We don't have all the heuristics Perl has for indications
859 // of a here-doc, because '<<' is overloadable and used
860 // for so many other classes.
861 } else {
862 state = SCE_RB_HERE_DELIM;
863 HereDoc.State = 0;
866 preferRE = (state != SCE_RB_HERE_DELIM);
867 } else if (ch == ':') {
868 styler.ColourTo(i - 1, state);
869 if (chNext == ':') {
870 // Mark "::" as an operator, not symbol start
871 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
872 advance_char(i, ch, chNext, chNext2); // pass by ref
873 state = SCE_RB_DEFAULT;
874 preferRE = false;
875 } else if (isSafeWordcharOrHigh(chNext)) {
876 state = SCE_RB_SYMBOL;
877 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
878 // Do the operator analysis in-line, looking ahead
879 // Based on the table in pickaxe 2nd ed., page 339
880 bool doColoring = true;
881 switch (chNext) {
882 case '[':
883 if (chNext2 == ']' ) {
884 char ch_tmp = styler.SafeGetCharAt(i + 3);
885 if (ch_tmp == '=') {
886 i += 3;
887 ch = ch_tmp;
888 chNext = styler.SafeGetCharAt(i + 1);
889 } else {
890 i += 2;
891 ch = chNext2;
892 chNext = ch_tmp;
894 } else {
895 doColoring = false;
897 break;
899 case '*':
900 if (chNext2 == '*') {
901 i += 2;
902 ch = chNext2;
903 chNext = styler.SafeGetCharAt(i + 1);
904 } else {
905 advance_char(i, ch, chNext, chNext2);
907 break;
909 case '!':
910 if (chNext2 == '=' || chNext2 == '~') {
911 i += 2;
912 ch = chNext2;
913 chNext = styler.SafeGetCharAt(i + 1);
914 } else {
915 advance_char(i, ch, chNext, chNext2);
917 break;
919 case '<':
920 if (chNext2 == '<') {
921 i += 2;
922 ch = chNext2;
923 chNext = styler.SafeGetCharAt(i + 1);
924 } else if (chNext2 == '=') {
925 char ch_tmp = styler.SafeGetCharAt(i + 3);
926 if (ch_tmp == '>') { // <=> operator
927 i += 3;
928 ch = ch_tmp;
929 chNext = styler.SafeGetCharAt(i + 1);
930 } else {
931 i += 2;
932 ch = chNext2;
933 chNext = ch_tmp;
935 } else {
936 advance_char(i, ch, chNext, chNext2);
938 break;
940 default:
941 // Simple one-character operators
942 advance_char(i, ch, chNext, chNext2);
943 break;
945 if (doColoring) {
946 styler.ColourTo(i, SCE_RB_SYMBOL);
947 state = SCE_RB_DEFAULT;
949 } else if (!preferRE) {
950 // Don't color symbol strings (yet)
951 // Just color the ":" and color rest as string
952 styler.ColourTo(i, SCE_RB_SYMBOL);
953 state = SCE_RB_DEFAULT;
954 } else {
955 styler.ColourTo(i, SCE_RB_OPERATOR);
956 state = SCE_RB_DEFAULT;
957 preferRE = true;
959 } else if (ch == '%') {
960 styler.ColourTo(i - 1, state);
961 bool have_string = false;
962 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
963 Quote.New();
964 const char *hit = strchr(q_chars, chNext);
965 if (hit != NULL) {
966 state = q_states[hit - q_chars];
967 Quote.Open(chNext2);
968 i += 2;
969 ch = chNext2;
970 chNext = styler.SafeGetCharAt(i + 1);
971 have_string = true;
973 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
974 // Ruby doesn't allow high bit chars here,
975 // but the editor host might
976 state = SCE_RB_STRING_QQ;
977 Quote.Open(chNext);
978 advance_char(i, ch, chNext, chNext2); // pass by ref
979 have_string = true;
981 if (!have_string) {
982 styler.ColourTo(i, SCE_RB_OPERATOR);
983 // stay in default
984 preferRE = true;
986 } else if (ch == '?') {
987 styler.ColourTo(i - 1, state);
988 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
989 styler.ColourTo(i, SCE_RB_OPERATOR);
990 } else {
991 // It's the start of a character code escape sequence
992 // Color it as a number.
993 state = SCE_RB_NUMBER;
994 is_real_number = false;
996 } else if (isoperator(ch) || ch == '.') {
997 styler.ColourTo(i - 1, state);
998 styler.ColourTo(i, SCE_RB_OPERATOR);
999 // If we're ending an expression or block,
1000 // assume it ends an object, and the ambivalent
1001 // constructs are binary operators
1003 // So if we don't have one of these chars,
1004 // we aren't ending an object exp'n, and ops
1005 // like : << / are unary operators.
1007 if (ch == '{') {
1008 ++brace_counts;
1009 preferRE = true;
1010 } else if (ch == '}' && --brace_counts < 0
1011 && inner_string_count > 0) {
1012 styler.ColourTo(i, SCE_RB_OPERATOR);
1013 exitInnerExpression(inner_string_types,
1014 inner_expn_brace_counts,
1015 inner_quotes,
1016 inner_string_count,
1017 state, brace_counts, Quote);
1018 } else {
1019 preferRE = (strchr(")}].", ch) == NULL);
1021 // Stay in default state
1022 } else if (isEOLChar(ch)) {
1023 // Make sure it's a true line-end, with no backslash
1024 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1025 && chPrev != '\\') {
1026 // Assume we've hit the end of the statement.
1027 preferRE = true;
1030 } else if (state == SCE_RB_WORD) {
1031 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1032 // Words include x? in all contexts,
1033 // and <letters>= after either 'def' or a dot
1034 // Move along until a complete word is on our left
1036 // Default accessor treats '.' as word-chars,
1037 // but we don't for now.
1039 if (ch == '='
1040 && isSafeWordcharOrHigh(chPrev)
1041 && (chNext == '('
1042 || strchr(" \t\n\r", chNext) != NULL)
1043 && (!strcmp(prevWord, "def")
1044 || followsDot(styler.GetStartSegment(), styler))) {
1045 // <name>= is a name only when being def'd -- Get it the next time
1046 // This means that <name>=<name> is always lexed as
1047 // <name>, (op, =), <name>
1048 } else if ((ch == '?' || ch == '!')
1049 && isSafeWordcharOrHigh(chPrev)
1050 && !isSafeWordcharOrHigh(chNext)) {
1051 // <name>? is a name -- Get it the next time
1052 // But <name>?<name> is always lexed as
1053 // <name>, (op, ?), <name>
1054 // Same with <name>! to indicate a method that
1055 // modifies its target
1056 } else if (isEOLChar(ch)
1057 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1058 styler.ColourTo(i, SCE_RB_DATASECTION);
1059 state = SCE_RB_DATASECTION;
1060 // No need to handle this state -- we'll just move to the end
1061 preferRE = false;
1062 } else {
1063 int wordStartPos = styler.GetStartSegment();
1064 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1065 switch (word_style) {
1066 case SCE_RB_WORD:
1067 preferRE = RE_CanFollowKeyword(prevWord);
1068 break;
1070 case SCE_RB_WORD_DEMOTED:
1071 preferRE = true;
1072 break;
1074 case SCE_RB_IDENTIFIER:
1075 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1076 preferRE = true;
1077 } else if (isEOLChar(ch)) {
1078 preferRE = true;
1079 } else {
1080 preferRE = false;
1082 break;
1083 default:
1084 preferRE = false;
1086 if (ch == '.') {
1087 // We might be redefining an operator-method
1088 preferRE = false;
1090 // And if it's the first
1091 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1094 } else if (state == SCE_RB_NUMBER) {
1095 if (!is_real_number) {
1096 if (ch != '\\') {
1097 styler.ColourTo(i, state);
1098 state = SCE_RB_DEFAULT;
1099 preferRE = false;
1100 } else if (strchr("\\ntrfvaebs", chNext)) {
1101 // Terminal escape sequence -- handle it next time
1102 // Nothing more to do this time through the loop
1103 } else if (chNext == 'C' || chNext == 'M') {
1104 if (chNext2 != '-') {
1105 // \C or \M ends the sequence -- handle it next time
1106 } else {
1107 // Move from abc?\C-x
1108 // ^
1109 // to
1110 // ^
1111 i += 2;
1112 ch = chNext2;
1113 chNext = styler.SafeGetCharAt(i + 1);
1115 } else if (chNext == 'c') {
1116 // Stay here, \c is a combining sequence
1117 advance_char(i, ch, chNext, chNext2); // pass by ref
1118 } else {
1119 // ?\x, including ?\\ is final.
1120 styler.ColourTo(i + 1, state);
1121 state = SCE_RB_DEFAULT;
1122 preferRE = false;
1123 advance_char(i, ch, chNext, chNext2);
1125 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1126 // Keep going
1127 } else if (ch == '.' && ++numDots == 1) {
1128 // Keep going
1129 } else {
1130 styler.ColourTo(i - 1, state);
1131 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1132 preferRE = false;
1134 } else if (state == SCE_RB_COMMENTLINE) {
1135 if (isEOLChar(ch)) {
1136 styler.ColourTo(i - 1, state);
1137 state = SCE_RB_DEFAULT;
1138 // Use whatever setting we had going into the comment
1140 } else if (state == SCE_RB_HERE_DELIM) {
1141 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1142 // Slightly different: if we find an immediate '-',
1143 // the target can appear indented.
1145 if (HereDoc.State == 0) { // '<<' encountered
1146 HereDoc.State = 1;
1147 HereDoc.DelimiterLength = 0;
1148 if (ch == '-') {
1149 HereDoc.CanBeIndented = true;
1150 advance_char(i, ch, chNext, chNext2); // pass by ref
1151 } else {
1152 HereDoc.CanBeIndented = false;
1154 if (isEOLChar(ch)) {
1155 // Bail out of doing a here doc if there's no target
1156 state = SCE_RB_DEFAULT;
1157 preferRE = false;
1158 } else {
1159 HereDoc.Quote = ch;
1161 if (ch == '\'' || ch == '"' || ch == '`') {
1162 HereDoc.Quoted = true;
1163 HereDoc.Delimiter[0] = '\0';
1164 } else {
1165 HereDoc.Quoted = false;
1166 HereDoc.Delimiter[0] = ch;
1167 HereDoc.Delimiter[1] = '\0';
1168 HereDoc.DelimiterLength = 1;
1171 } else if (HereDoc.State == 1) { // collect the delimiter
1172 if (isEOLChar(ch)) {
1173 // End the quote now, and go back for more
1174 styler.ColourTo(i - 1, state);
1175 state = SCE_RB_DEFAULT;
1176 i--;
1177 chNext = ch;
1178 chNext2 = chNext;
1179 preferRE = false;
1180 } else if (HereDoc.Quoted) {
1181 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1182 styler.ColourTo(i, state);
1183 state = SCE_RB_DEFAULT;
1184 preferRE = false;
1185 } else {
1186 if (ch == '\\' && !isEOLChar(chNext)) {
1187 advance_char(i, ch, chNext, chNext2);
1189 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1190 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1192 } else { // an unquoted here-doc delimiter
1193 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1194 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1195 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1196 } else {
1197 styler.ColourTo(i - 1, state);
1198 redo_char(i, ch, chNext, chNext2, state);
1199 preferRE = false;
1202 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1203 styler.ColourTo(i - 1, state);
1204 state = SCE_RB_ERROR;
1205 preferRE = false;
1208 } else if (state == SCE_RB_HERE_Q) {
1209 // Not needed: HereDoc.State == 2
1210 // Indentable here docs: look backwards
1211 // Non-indentable: look forwards, like in Perl
1213 // Why: so we can quickly resolve things like <<-" abc"
1215 if (!HereDoc.CanBeIndented) {
1216 if (isEOLChar(chPrev)
1217 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1218 styler.ColourTo(i - 1, state);
1219 i += HereDoc.DelimiterLength - 1;
1220 chNext = styler.SafeGetCharAt(i + 1);
1221 if (isEOLChar(chNext)) {
1222 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1223 state = SCE_RB_DEFAULT;
1224 HereDoc.State = 0;
1225 preferRE = false;
1227 // Otherwise we skipped through the here doc faster.
1229 } else if (isEOLChar(chNext)
1230 && lookingAtHereDocDelim(styler,
1231 i - HereDoc.DelimiterLength + 1,
1232 lengthDoc,
1233 HereDoc.Delimiter)) {
1234 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1235 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1236 state = SCE_RB_DEFAULT;
1237 preferRE = false;
1238 HereDoc.State = 0;
1240 } else if (state == SCE_RB_CLASS_VAR
1241 || state == SCE_RB_INSTANCE_VAR
1242 || state == SCE_RB_SYMBOL) {
1243 if (!isSafeWordcharOrHigh(ch)) {
1244 styler.ColourTo(i - 1, state);
1245 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1246 preferRE = false;
1248 } else if (state == SCE_RB_GLOBAL) {
1249 if (!isSafeWordcharOrHigh(ch)) {
1250 // handle special globals here as well
1251 if (chPrev == '$') {
1252 if (ch == '-') {
1253 // Include the next char, like $-a
1254 advance_char(i, ch, chNext, chNext2);
1256 styler.ColourTo(i, state);
1257 state = SCE_RB_DEFAULT;
1258 } else {
1259 styler.ColourTo(i - 1, state);
1260 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1262 preferRE = false;
1264 } else if (state == SCE_RB_POD) {
1265 // PODs end with ^=end\s, -- any whitespace can follow =end
1266 if (strchr(" \t\n\r", ch) != NULL
1267 && i > 5
1268 && isEOLChar(styler[i - 5])
1269 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1270 styler.ColourTo(i - 1, state);
1271 state = SCE_RB_DEFAULT;
1272 preferRE = false;
1274 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1275 if (ch == '\\' && Quote.Up != '\\') {
1276 // Skip one
1277 advance_char(i, ch, chNext, chNext2);
1278 } else if (ch == Quote.Down) {
1279 Quote.Count--;
1280 if (Quote.Count == 0) {
1281 // Include the options
1282 while (isSafeAlpha(chNext)) {
1283 i++;
1284 ch = chNext;
1285 chNext = styler.SafeGetCharAt(i + 1);
1287 styler.ColourTo(i, state);
1288 state = SCE_RB_DEFAULT;
1289 preferRE = false;
1291 } else if (ch == Quote.Up) {
1292 // Only if close quoter != open quoter
1293 Quote.Count++;
1295 } else if (ch == '#' ) {
1296 if (chNext == '{'
1297 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1298 // process #{ ... }
1299 styler.ColourTo(i - 1, state);
1300 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1301 enterInnerExpression(inner_string_types,
1302 inner_expn_brace_counts,
1303 inner_quotes,
1304 inner_string_count,
1305 state,
1306 brace_counts,
1307 Quote);
1308 preferRE = true;
1309 // Skip one
1310 advance_char(i, ch, chNext, chNext2);
1311 } else {
1312 //todo: distinguish comments from pound chars
1313 // for now, handle as comment
1314 styler.ColourTo(i - 1, state);
1315 bool inEscape = false;
1316 while (++i < lengthDoc) {
1317 ch = styler.SafeGetCharAt(i);
1318 if (ch == '\\') {
1319 inEscape = true;
1320 } else if (isEOLChar(ch)) {
1321 // Comment inside a regex
1322 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1323 break;
1324 } else if (inEscape) {
1325 inEscape = false; // don't look at char
1326 } else if (ch == Quote.Down) {
1327 // Have the regular handler deal with this
1328 // to get trailing modifiers.
1329 i--;
1330 ch = styler[i];
1331 break;
1334 chNext = styler.SafeGetCharAt(i + 1);
1335 chNext2 = styler.SafeGetCharAt(i + 2);
1338 // Quotes of all kinds...
1339 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1340 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1341 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1342 state == SCE_RB_BACKTICKS) {
1343 if (!Quote.Down && !isspacechar(ch)) {
1344 Quote.Open(ch);
1345 } else if (ch == '\\' && Quote.Up != '\\') {
1346 //Riddle me this: Is it safe to skip *every* escaped char?
1347 advance_char(i, ch, chNext, chNext2);
1348 } else if (ch == Quote.Down) {
1349 Quote.Count--;
1350 if (Quote.Count == 0) {
1351 styler.ColourTo(i, state);
1352 state = SCE_RB_DEFAULT;
1353 preferRE = false;
1355 } else if (ch == Quote.Up) {
1356 Quote.Count++;
1357 } else if (ch == '#' && chNext == '{'
1358 && inner_string_count < INNER_STRINGS_MAX_COUNT
1359 && state != SCE_RB_CHARACTER
1360 && state != SCE_RB_STRING_Q) {
1361 // process #{ ... }
1362 styler.ColourTo(i - 1, state);
1363 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1364 enterInnerExpression(inner_string_types,
1365 inner_expn_brace_counts,
1366 inner_quotes,
1367 inner_string_count,
1368 state,
1369 brace_counts,
1370 Quote);
1371 preferRE = true;
1372 // Skip one
1373 advance_char(i, ch, chNext, chNext2);
1377 if (state == SCE_RB_ERROR) {
1378 break;
1380 chPrev = ch;
1382 if (state == SCE_RB_WORD) {
1383 // We've ended on a word, possibly at EOF, and need to
1384 // classify it.
1385 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1386 } else {
1387 styler.ColourTo(lengthDoc - 1, state);
1391 // Helper functions for folding, disambiguation keywords
1392 // Assert that there are no high-bit chars
1394 static void getPrevWord(int pos,
1395 char *prevWord,
1396 Accessor &styler,
1397 int word_state)
1399 int i;
1400 styler.Flush();
1401 for (i = pos - 1; i > 0; i--) {
1402 if (actual_style(styler.StyleAt(i)) != word_state) {
1403 i++;
1404 break;
1407 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1408 i = pos - MAX_KEYWORD_LENGTH;
1409 char *dst = prevWord;
1410 for (; i <= pos; i++) {
1411 *dst++ = styler[i];
1413 *dst = 0;
1416 static bool keywordIsAmbiguous(const char *prevWord)
1418 // Order from most likely used to least likely
1419 // Lots of ways to do a loop in Ruby besides 'while/until'
1420 if (!strcmp(prevWord, "if")
1421 || !strcmp(prevWord, "do")
1422 || !strcmp(prevWord, "while")
1423 || !strcmp(prevWord, "unless")
1424 || !strcmp(prevWord, "until")) {
1425 return true;
1426 } else {
1427 return false;
1431 // Demote keywords in the following conditions:
1432 // if, while, unless, until modify a statement
1433 // do after a while or until, as a noise word (like then after if)
1435 static bool keywordIsModifier(const char *word,
1436 int pos,
1437 Accessor &styler)
1439 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1440 return keywordDoStartsLoop(pos, styler);
1442 char ch;
1443 int style = SCE_RB_DEFAULT;
1444 int lineStart = styler.GetLine(pos);
1445 int lineStartPosn = styler.LineStart(lineStart);
1446 styler.Flush();
1447 while (--pos >= lineStartPosn) {
1448 style = actual_style(styler.StyleAt(pos));
1449 if (style == SCE_RB_DEFAULT) {
1450 if (iswhitespace(ch = styler[pos])) {
1451 //continue
1452 } else if (ch == '\r' || ch == '\n') {
1453 // Scintilla's LineStart() and GetLine() routines aren't
1454 // platform-independent, so if we have text prepared with
1455 // a different system we can't rely on it.
1456 return false;
1458 } else {
1459 break;
1462 if (pos < lineStartPosn) {
1463 return false; //XXX not quite right if the prev line is a continuation
1465 // First things where the action is unambiguous
1466 switch (style) {
1467 case SCE_RB_DEFAULT:
1468 case SCE_RB_COMMENTLINE:
1469 case SCE_RB_POD:
1470 case SCE_RB_CLASSNAME:
1471 case SCE_RB_DEFNAME:
1472 case SCE_RB_MODULE_NAME:
1473 return false;
1474 case SCE_RB_OPERATOR:
1475 break;
1476 case SCE_RB_WORD:
1477 // Watch out for uses of 'else if'
1478 //XXX: Make a list of other keywords where 'if' isn't a modifier
1479 // and can appear legitimately
1480 // Formulate this to avoid warnings from most compilers
1481 if (strcmp(word, "if") == 0) {
1482 char prevWord[MAX_KEYWORD_LENGTH + 1];
1483 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1484 return strcmp(prevWord, "else") != 0;
1486 return true;
1487 default:
1488 return true;
1490 // Assume that if the keyword follows an operator,
1491 // usually it's a block assignment, like
1492 // a << if x then y else z
1494 ch = styler[pos];
1495 switch (ch) {
1496 case ')':
1497 case ']':
1498 case '}':
1499 return true;
1500 default:
1501 return false;
1505 #define WHILE_BACKWARDS "elihw"
1506 #define UNTIL_BACKWARDS "litnu"
1508 // Nothing fancy -- look to see if we follow a while/until somewhere
1509 // on the current line
1511 static bool keywordDoStartsLoop(int pos,
1512 Accessor &styler)
1514 char ch;
1515 int style;
1516 int lineStart = styler.GetLine(pos);
1517 int lineStartPosn = styler.LineStart(lineStart);
1518 styler.Flush();
1519 while (--pos >= lineStartPosn) {
1520 style = actual_style(styler.StyleAt(pos));
1521 if (style == SCE_RB_DEFAULT) {
1522 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1523 // Scintilla's LineStart() and GetLine() routines aren't
1524 // platform-independent, so if we have text prepared with
1525 // a different system we can't rely on it.
1526 return false;
1528 } else if (style == SCE_RB_WORD) {
1529 // Check for while or until, but write the word in backwards
1530 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1531 char *dst = prevWord;
1532 int wordLen = 0;
1533 int start_word;
1534 for (start_word = pos;
1535 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1536 start_word--) {
1537 if (++wordLen < MAX_KEYWORD_LENGTH) {
1538 *dst++ = styler[start_word];
1541 *dst = 0;
1542 // Did we see our keyword?
1543 if (!strcmp(prevWord, WHILE_BACKWARDS)
1544 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1545 return true;
1547 // We can move pos to the beginning of the keyword, and then
1548 // accept another decrement, as we can never have two contiguous
1549 // keywords:
1550 // word1 word2
1551 // ^
1552 // <- move to start_word
1553 // ^
1554 // <- loop decrement
1555 // ^ # pointing to end of word1 is fine
1556 pos = start_word;
1559 return false;
1563 * Folding Ruby
1565 * The language is quite complex to analyze without a full parse.
1566 * For example, this line shouldn't affect fold level:
1568 * print "hello" if feeling_friendly?
1570 * Neither should this:
1572 * print "hello" \
1573 * if feeling_friendly?
1576 * But this should:
1578 * if feeling_friendly? #++
1579 * print "hello" \
1580 * print "goodbye"
1581 * end #--
1583 * So we cheat, by actually looking at the existing indentation
1584 * levels for each line, and just echoing it back. Like Python.
1585 * Then if we get better at it, we'll take braces into consideration,
1586 * which always affect folding levels.
1588 * How the keywords should work:
1589 * No effect:
1590 * __FILE__ __LINE__ BEGIN END alias and
1591 * defined? false in nil not or self super then
1592 * true undef
1594 * Always increment:
1595 * begin class def do for module when {
1597 * Always decrement:
1598 * end }
1600 * Increment if these start a statement
1601 * if unless until while -- do nothing if they're modifiers
1603 * These end a block if there's no modifier, but don't bother
1604 * break next redo retry return yield
1606 * These temporarily de-indent, but re-indent
1607 * case else elsif ensure rescue
1609 * This means that the folder reflects indentation rather
1610 * than setting it. The language-service updates indentation
1611 * when users type return and finishes entering de-denters.
1613 * Later offer to fold POD, here-docs, strings, and blocks of comments
1616 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1617 WordList *[], Accessor &styler) {
1618 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1619 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1621 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1622 false);
1623 unsigned int endPos = startPos + length;
1624 int visibleChars = 0;
1625 int lineCurrent = styler.GetLine(startPos);
1626 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1627 & SC_FOLDLEVELNUMBERMASK
1628 & ~SC_FOLDLEVELBASE);
1629 int levelCurrent = levelPrev;
1630 char chNext = styler[startPos];
1631 int styleNext = styler.StyleAt(startPos);
1632 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1633 bool buffer_ends_with_eol = false;
1634 for (unsigned int i = startPos; i < endPos; i++) {
1635 char ch = chNext;
1636 chNext = styler.SafeGetCharAt(i + 1);
1637 int style = styleNext;
1638 styleNext = styler.StyleAt(i + 1);
1639 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1640 if (style == SCE_RB_COMMENTLINE) {
1641 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1642 if (chNext == '{') {
1643 levelCurrent++;
1644 } else if (chNext == '}' && levelCurrent > 0) {
1645 levelCurrent--;
1648 } else if (style == SCE_RB_OPERATOR) {
1649 if (strchr("[{(", ch)) {
1650 levelCurrent++;
1651 } else if (strchr(")}]", ch)) {
1652 // Don't decrement below 0
1653 if (levelCurrent > 0)
1654 levelCurrent--;
1656 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1657 // Look at the keyword on the left and decide what to do
1658 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1659 prevWord[0] = 0;
1660 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1661 if (!strcmp(prevWord, "end")) {
1662 // Don't decrement below 0
1663 if (levelCurrent > 0)
1664 levelCurrent--;
1665 } else if ( !strcmp(prevWord, "if")
1666 || !strcmp(prevWord, "def")
1667 || !strcmp(prevWord, "class")
1668 || !strcmp(prevWord, "module")
1669 || !strcmp(prevWord, "begin")
1670 || !strcmp(prevWord, "case")
1671 || !strcmp(prevWord, "do")
1672 || !strcmp(prevWord, "while")
1673 || !strcmp(prevWord, "unless")
1674 || !strcmp(prevWord, "until")
1675 || !strcmp(prevWord, "for")
1677 levelCurrent++;
1680 if (atEOL) {
1681 int lev = levelPrev;
1682 if (visibleChars == 0 && foldCompact)
1683 lev |= SC_FOLDLEVELWHITEFLAG;
1684 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1685 lev |= SC_FOLDLEVELHEADERFLAG;
1686 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1687 lineCurrent++;
1688 levelPrev = levelCurrent;
1689 visibleChars = 0;
1690 buffer_ends_with_eol = true;
1691 } else if (!isspacechar(ch)) {
1692 visibleChars++;
1693 buffer_ends_with_eol = false;
1695 stylePrev = style;
1697 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1698 if (!buffer_ends_with_eol) {
1699 lineCurrent++;
1700 int new_lev = levelCurrent;
1701 if (visibleChars == 0 && foldCompact)
1702 new_lev |= SC_FOLDLEVELWHITEFLAG;
1703 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1704 new_lev |= SC_FOLDLEVELHEADERFLAG;
1705 levelCurrent = new_lev;
1707 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1710 static const char * const rubyWordListDesc[] = {
1711 "Keywords",
1715 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);