scintilla/lexers/LexCoffeeScript.cxx

   1 // Scintilla source code edit control
   2 /** @file LexCoffeeScript.cxx
   3  ** Lexer for CoffeeScript.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // Based on the Scintilla C++ Lexer
   7 // Written by Eric Promislow <ericp@activestate.com> in 2011 for the Komodo IDE
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <algorithm>
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21 #include "SciLexer.h"
  22
  23 #include "WordList.h"
  24 #include "LexAccessor.h"
  25 #include "Accessor.h"
  26 #include "StyleContext.h"
  27 #include "CharacterSet.h"
  28 #include "LexerModule.h"
  29
  30 using namespace Scintilla;
  31
  32 static bool IsSpaceEquiv(int state) {
  33         return (state == SCE_COFFEESCRIPT_DEFAULT
  34             || state == SCE_COFFEESCRIPT_COMMENTLINE
  35             || state == SCE_COFFEESCRIPT_COMMENTBLOCK
  36             || state == SCE_COFFEESCRIPT_VERBOSE_REGEX
  37             || state == SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT
  38             || state == SCE_COFFEESCRIPT_WORD
  39             || state == SCE_COFFEESCRIPT_REGEX);
  40 }
  41
  42 // Store the current lexer state and brace count prior to starting a new
  43 // `#{}` interpolation level.
  44 // Based on LexRuby.cxx.
  45 static void enterInnerExpression(int  *p_inner_string_types,
  46                                  int  *p_inner_expn_brace_counts,
  47                                  int&  inner_string_count,
  48                                  int   state,
  49                                  int&  brace_counts
  50                                  ) {
  51         p_inner_string_types[inner_string_count] = state;
  52         p_inner_expn_brace_counts[inner_string_count] = brace_counts;
  53         brace_counts = 0;
  54         ++inner_string_count;
  55 }
  56
  57 // Restore the lexer state and brace count for the previous `#{}` interpolation
  58 // level upon returning to it.
  59 // Note the previous lexer state is the return value and needs to be restored
  60 // manually by the StyleContext.
  61 // Based on LexRuby.cxx.
  62 static int exitInnerExpression(int  *p_inner_string_types,
  63                                int  *p_inner_expn_brace_counts,
  64                                int&  inner_string_count,
  65                                int&  brace_counts
  66                               ) {
  67         --inner_string_count;
  68         brace_counts = p_inner_expn_brace_counts[inner_string_count];
  69         return p_inner_string_types[inner_string_count];
  70 }
  71
  72 // Preconditions: sc.currentPos points to a character after '+' or '-'.
  73 // The test for pos reaching 0 should be redundant,
  74 // and is in only for safety measures.
  75 // Limitation: this code will give the incorrect answer for code like
  76 // a = b+++/ptn/...
  77 // Putting a space between the '++' post-inc operator and the '+' binary op
  78 // fixes this, and is highly recommended for readability anyway.
  79 static bool FollowsPostfixOperator(StyleContext &sc, Accessor &styler) {
  80         Sci_Position pos = (Sci_Position) sc.currentPos;
  81         while (--pos > 0) {
  82                 char ch = styler[pos];
  83                 if (ch == '+' || ch == '-') {
  84                         return styler[pos - 1] == ch;
  85                 }
  86         }
  87         return false;
  88 }
  89
  90 static bool followsKeyword(StyleContext &sc, Accessor &styler) {
  91         Sci_Position pos = (Sci_Position) sc.currentPos;
  92         Sci_Position currentLine = styler.GetLine(pos);
  93         Sci_Position lineStartPos = styler.LineStart(currentLine);
  94         while (--pos > lineStartPos) {
  95                 char ch = styler.SafeGetCharAt(pos);
  96                 if (ch != ' ' && ch != '\t') {
  97                         break;
  98                 }
  99         }
 100         styler.Flush();
 101         return styler.StyleAt(pos) == SCE_COFFEESCRIPT_WORD;
 102 }
 103
 104 static void ColouriseCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
 105                             Accessor &styler) {
 106
 107         WordList &keywords = *keywordlists[0];
 108         WordList &keywords2 = *keywordlists[1];
 109         WordList &keywords4 = *keywordlists[3];
 110
 111         CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
 112         CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
 113
 114         CharacterSet setWordStart(CharacterSet::setAlpha, "_$@", 0x80, true);
 115         CharacterSet setWord(CharacterSet::setAlphaNum, "._$", 0x80, true);
 116
 117         int chPrevNonWhite = ' ';
 118         int visibleChars = 0;
 119
 120         // String/Regex interpolation variables, based on LexRuby.cxx.
 121         // In most cases a value of 2 should be ample for the code the user is
 122         // likely to enter. For example,
 123         //   "Filling the #{container} with #{liquid}..."
 124         // from the CoffeeScript homepage nests to a level of 2
 125         // If the user actually hits a 6th occurrence of '#{' in a double-quoted
 126         // string (including regexes), it will stay as a string.  The problem with
 127         // this is that quotes might flip, a 7th '#{' will look like a comment,
 128         // and code-folding might be wrong.
 129 #define INNER_STRINGS_MAX_COUNT 5
 130         // These vars track our instances of "...#{,,,'..#{,,,}...',,,}..."
 131         int inner_string_types[INNER_STRINGS_MAX_COUNT];
 132         // Track # braces when we push a new #{ thing
 133         int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
 134         int inner_string_count = 0;
 135         int brace_counts = 0;   // Number of #{ ... } things within an expression
 136         for (int i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
 137                 inner_string_types[i] = 0;
 138                 inner_expn_brace_counts[i] = 0;
 139         }
 140
 141         // look back to set chPrevNonWhite properly for better regex colouring
 142         Sci_Position endPos = startPos + length;
 143         if (startPos > 0 && IsSpaceEquiv(initStyle)) {
 144                 Sci_PositionU back = startPos;
 145                 styler.Flush();
 146                 while (back > 0 && IsSpaceEquiv(styler.StyleAt(--back)))
 147                         ;
 148                 if (styler.StyleAt(back) == SCE_COFFEESCRIPT_OPERATOR) {
 149                         chPrevNonWhite = styler.SafeGetCharAt(back);
 150                 }
 151                 if (startPos != back) {
 152                         initStyle = styler.StyleAt(back);
 153                         if (IsSpaceEquiv(initStyle)) {
 154                                 initStyle = SCE_COFFEESCRIPT_DEFAULT;
 155                         }
 156                 }
 157                 startPos = back;
 158         }
 159
 160         StyleContext sc(startPos, endPos - startPos, initStyle, styler);
 161
 162         for (; sc.More();) {
 163
 164                 if (sc.atLineStart) {
 165                         // Reset states to beginning of colourise so no surprises
 166                         // if different sets of lines lexed.
 167                         visibleChars = 0;
 168                 }
 169
 170                 // Determine if the current state should terminate.
 171                 switch (sc.state) {
 172                         case SCE_COFFEESCRIPT_OPERATOR:
 173                                 sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 174                                 break;
 175                         case SCE_COFFEESCRIPT_NUMBER:
 176                                 // We accept almost anything because of hex. and number suffixes
 177                                 if (!setWord.Contains(sc.ch) || sc.Match('.', '.')) {
 178                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 179                                 }
 180                                 break;
 181                         case SCE_COFFEESCRIPT_IDENTIFIER:
 182                                 if (!setWord.Contains(sc.ch) || (sc.ch == '.') || (sc.ch == '$')) {
 183                                         char s[1000];
 184                                         sc.GetCurrent(s, sizeof(s));
 185                                         if (keywords.InList(s)) {
 186                                                 sc.ChangeState(SCE_COFFEESCRIPT_WORD);
 187                                         } else if (keywords2.InList(s)) {
 188                                                 sc.ChangeState(SCE_COFFEESCRIPT_WORD2);
 189                                         } else if (keywords4.InList(s)) {
 190                                                 sc.ChangeState(SCE_COFFEESCRIPT_GLOBALCLASS);
 191                                         } else if (sc.LengthCurrent() > 0 && s[0] == '@') {
 192                                                 sc.ChangeState(SCE_COFFEESCRIPT_INSTANCEPROPERTY);
 193                                         }
 194                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 195                                 }
 196                                 break;
 197                         case SCE_COFFEESCRIPT_WORD:
 198                         case SCE_COFFEESCRIPT_WORD2:
 199                         case SCE_COFFEESCRIPT_GLOBALCLASS:
 200                         case SCE_COFFEESCRIPT_INSTANCEPROPERTY:
 201                                 if (!setWord.Contains(sc.ch)) {
 202                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 203                                 }
 204                                 break;
 205                         case SCE_COFFEESCRIPT_COMMENTLINE:
 206                                 if (sc.atLineStart) {
 207                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 208                                 }
 209                                 break;
 210                         case SCE_COFFEESCRIPT_STRING:
 211                                 if (sc.ch == '\\') {
 212                                         if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
 213                                                 sc.Forward();
 214                                         }
 215                                 } else if (sc.ch == '\"') {
 216                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 217                                 } else if (sc.ch == '#' && sc.chNext == '{' && inner_string_count < INNER_STRINGS_MAX_COUNT) {
 218                                         // process interpolated code #{ ... }
 219                                         enterInnerExpression(inner_string_types,
 220                                                              inner_expn_brace_counts,
 221                                                              inner_string_count,
 222                                                              sc.state,
 223                                                              brace_counts);
 224                                         sc.SetState(SCE_COFFEESCRIPT_OPERATOR);
 225                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 226                                 }
 227                                 break;
 228                         case SCE_COFFEESCRIPT_CHARACTER:
 229                                 if (sc.ch == '\\') {
 230                                         if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
 231                                                 sc.Forward();
 232                                         }
 233                                 } else if (sc.ch == '\'') {
 234                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 235                                 }
 236                                 break;
 237                         case SCE_COFFEESCRIPT_REGEX:
 238                                 if (sc.atLineStart) {
 239                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 240                                 } else if (sc.ch == '/') {
 241                                         sc.Forward();
 242                                         while ((sc.ch < 0x80) && islower(sc.ch))
 243                                                 sc.Forward();    // gobble regex flags
 244                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 245                                 } else if (sc.ch == '\\') {
 246                                         // Gobble up the quoted character
 247                                         if (sc.chNext == '\\' || sc.chNext == '/') {
 248                                                 sc.Forward();
 249                                         }
 250                                 }
 251                                 break;
 252                         case SCE_COFFEESCRIPT_STRINGEOL:
 253                                 if (sc.atLineStart) {
 254                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 255                                 }
 256                                 break;
 257                         case SCE_COFFEESCRIPT_COMMENTBLOCK:
 258                                 if (sc.Match("###")) {
 259                                         sc.Forward();
 260                                         sc.Forward();
 261                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 262                                 } else if (sc.ch == '\\') {
 263                                         sc.Forward();
 264                                 }
 265                                 break;
 266                         case SCE_COFFEESCRIPT_VERBOSE_REGEX:
 267                                 if (sc.Match("///")) {
 268                                         sc.Forward();
 269                                         sc.Forward();
 270                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 271                                 } else if (sc.Match('#')) {
 272                                         sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT);
 273                                 } else if (sc.ch == '\\') {
 274                                         sc.Forward();
 275                                 }
 276                                 break;
 277                         case SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT:
 278                                 if (sc.atLineStart) {
 279                                         sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX);
 280                                 }
 281                                 break;
 282                 }
 283
 284                 // Determine if a new state should be entered.
 285                 if (sc.state == SCE_COFFEESCRIPT_DEFAULT) {
 286                         if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 287                                 sc.SetState(SCE_COFFEESCRIPT_NUMBER);
 288                         } else if (setWordStart.Contains(sc.ch)) {
 289                                 sc.SetState(SCE_COFFEESCRIPT_IDENTIFIER);
 290                         } else if (sc.Match("///")) {
 291                                 sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX);
 292                                 sc.Forward();
 293                                 sc.Forward();
 294                         } else if (sc.ch == '/'
 295                                    && (setOKBeforeRE.Contains(chPrevNonWhite)
 296                                        || followsKeyword(sc, styler))
 297                                    && (!setCouldBePostOp.Contains(chPrevNonWhite)
 298                                        || !FollowsPostfixOperator(sc, styler))) {
 299                                 sc.SetState(SCE_COFFEESCRIPT_REGEX);    // JavaScript's RegEx
 300                         } else if (sc.ch == '\"') {
 301                                 sc.SetState(SCE_COFFEESCRIPT_STRING);
 302                         } else if (sc.ch == '\'') {
 303                                 sc.SetState(SCE_COFFEESCRIPT_CHARACTER);
 304                         } else if (sc.ch == '#') {
 305                                 if (sc.Match("###")) {
 306                                         sc.SetState(SCE_COFFEESCRIPT_COMMENTBLOCK);
 307                                         sc.Forward();
 308                                         sc.Forward();
 309                                 } else {
 310                                         sc.SetState(SCE_COFFEESCRIPT_COMMENTLINE);
 311                                 }
 312                         } else if (isoperator(static_cast<char>(sc.ch))) {
 313                                 sc.SetState(SCE_COFFEESCRIPT_OPERATOR);
 314                                 // Handle '..' and '...' operators correctly.
 315                                 if (sc.ch == '.') {
 316                                         for (int i = 0; i < 2 && sc.chNext == '.'; i++, sc.Forward()) ;
 317                                 } else if (sc.ch == '{') {
 318                                         ++brace_counts;
 319                                 } else if (sc.ch == '}' && --brace_counts <= 0 && inner_string_count > 0) {
 320                                         // Return to previous state before #{ ... }
 321                                         sc.ForwardSetState(exitInnerExpression(inner_string_types,
 322                                                                                inner_expn_brace_counts,
 323                                                                                inner_string_count,
 324                                                                                brace_counts));
 325                                         continue; // skip sc.Forward() at loop end
 326                                 }
 327                         }
 328                 }
 329
 330                 if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) {
 331                         chPrevNonWhite = sc.ch;
 332                         visibleChars++;
 333                 }
 334                 sc.Forward();
 335         }
 336         sc.Complete();
 337 }
 338
 339 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
 340         Sci_Position pos = styler.LineStart(line);
 341         Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 342         for (Sci_Position i = pos; i < eol_pos; i++) {
 343                 char ch = styler[i];
 344                 if (ch == '#')
 345                         return true;
 346                 else if (ch != ' ' && ch != '\t')
 347                         return false;
 348         }
 349         return false;
 350 }
 351
 352 static void FoldCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int,
 353                                 WordList *[], Accessor &styler) {
 354         // A simplified version of FoldPyDoc
 355         const Sci_Position maxPos = startPos + length;
 356         const Sci_Position maxLines = styler.GetLine(maxPos - 1);             // Requested last line
 357         const Sci_Position docLines = styler.GetLine(styler.Length() - 1);  // Available last line
 358
 359         // property fold.coffeescript.comment
 360         const bool foldComment = styler.GetPropertyInt("fold.coffeescript.comment") != 0;
 361
 362         const bool foldCompact = styler.GetPropertyInt("fold.compact") != 0;
 363
 364         // Backtrack to previous non-blank line so we can determine indent level
 365         // for any white space lines
 366         // and so we can fix any preceding fold level (which is why we go back
 367         // at least one line in all cases)
 368         int spaceFlags = 0;
 369         Sci_Position lineCurrent = styler.GetLine(startPos);
 370         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 371         while (lineCurrent > 0) {
 372                 lineCurrent--;
 373                 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 374                 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)
 375                     && !IsCommentLine(lineCurrent, styler))
 376                         break;
 377         }
 378         int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 379
 380         // Set up initial loop state
 381         int prevComment = 0;
 382         if (lineCurrent >= 1)
 383                 prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
 384
 385         // Process all characters to end of requested range
 386         // or comment that hangs over the end of the range.  Cap processing in all cases
 387         // to end of document (in case of comment at end).
 388         while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevComment)) {
 389
 390                 // Gather info
 391                 int lev = indentCurrent;
 392                 Sci_Position lineNext = lineCurrent + 1;
 393                 int indentNext = indentCurrent;
 394                 if (lineNext <= docLines) {
 395                         // Information about next line is only available if not at end of document
 396                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 397                 }
 398                 const int comment = foldComment && IsCommentLine(lineCurrent, styler);
 399                 const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
 400                                            IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE));
 401                 const int comment_continue = (comment && prevComment);
 402                 if (!comment)
 403                         indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 404                 if (indentNext & SC_FOLDLEVELWHITEFLAG)
 405                         indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
 406
 407                 if (comment_start) {
 408                         // Place fold point at start of a block of comments
 409                         lev |= SC_FOLDLEVELHEADERFLAG;
 410                 } else if (comment_continue) {
 411                         // Add level to rest of lines in the block
 412                         lev = lev + 1;
 413                 }
 414
 415                 // Skip past any blank lines for next indent level info; we skip also
 416                 // comments (all comments, not just those starting in column 0)
 417                 // which effectively folds them into surrounding code rather
 418                 // than screwing up folding.
 419
 420                 while ((lineNext < docLines) &&
 421                         ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
 422                          (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
 423
 424                         lineNext++;
 425                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 426                 }
 427
 428                 const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
 429                 const int levelBeforeComments = std::max(indentCurrentLevel,levelAfterComments);
 430
 431                 // Now set all the indent levels on the lines we skipped
 432                 // Do this from end to start.  Once we encounter one line
 433                 // which is indented more than the line after the end of
 434                 // the comment-block, use the level of the block before
 435
 436                 Sci_Position skipLine = lineNext;
 437                 int skipLevel = levelAfterComments;
 438
 439                 while (--skipLine > lineCurrent) {
 440                         int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
 441
 442                         if (foldCompact) {
 443                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
 444                                         skipLevel = levelBeforeComments;
 445
 446                                 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
 447
 448                                 styler.SetLevel(skipLine, skipLevel | whiteFlag);
 449                         } else {
 450                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
 451                                         !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
 452                                         !IsCommentLine(skipLine, styler))
 453                                         skipLevel = levelBeforeComments;
 454
 455                                 styler.SetLevel(skipLine, skipLevel);
 456                         }
 457                 }
 458
 459                 // Set fold header on non-comment line
 460                 if (!comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
 461                         if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
 462                                 lev |= SC_FOLDLEVELHEADERFLAG;
 463                 }
 464
 465                 // Keep track of block comment state of previous line
 466                 prevComment = comment_start || comment_continue;
 467
 468                 // Set fold level for this line and move to next line
 469                 styler.SetLevel(lineCurrent, lev);
 470                 indentCurrent = indentNext;
 471                 lineCurrent = lineNext;
 472         }
 473 }
 474
 475 static const char *const csWordLists[] = {
 476             "Keywords",
 477             "Secondary keywords",
 478             "Unused",
 479             "Global classes",
 480             0,
 481 };
 482
 483 LexerModule lmCoffeeScript(SCLEX_COFFEESCRIPT, ColouriseCoffeeScriptDoc, "coffeescript", FoldCoffeeScriptDoc, csWordLists);