scintilla/lexers/LexCoffeeScript.cxx

   1 // Scintilla source code edit control
   2 /** @file LexCoffeeScript.cxx
   3  ** Lexer for CoffeeScript.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // Based on the Scintilla C++ Lexer
   7 // Written by Eric Promislow <ericp@activestate.com> in 2011 for the Komodo IDE
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include "Platform.h"
  18 #include "ILexer.h"
  19 #include "Scintilla.h"
  20 #include "SciLexer.h"
  21
  22 #include "WordList.h"
  23 #include "LexAccessor.h"
  24 #include "Accessor.h"
  25 #include "StyleContext.h"
  26 #include "CharacterSet.h"
  27 #include "LexerModule.h"
  28
  29 #ifdef SCI_NAMESPACE
  30 using namespace Scintilla;
  31 #endif
  32
  33 static bool IsSpaceEquiv(int state) {
  34         return (state == SCE_COFFEESCRIPT_DEFAULT
  35             || state == SCE_COFFEESCRIPT_COMMENTLINE
  36             || state == SCE_COFFEESCRIPT_COMMENTBLOCK
  37             || state == SCE_COFFEESCRIPT_VERBOSE_REGEX
  38             || state == SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT
  39             || state == SCE_COFFEESCRIPT_WORD
  40             || state == SCE_COFFEESCRIPT_REGEX);
  41 }
  42
  43 // Store the current lexer state and brace count prior to starting a new
  44 // `#{}` interpolation level.
  45 // Based on LexRuby.cxx.
  46 static void enterInnerExpression(int  *p_inner_string_types,
  47                                  int  *p_inner_expn_brace_counts,
  48                                  int&  inner_string_count,
  49                                  int   state,
  50                                  int&  brace_counts
  51                                  ) {
  52         p_inner_string_types[inner_string_count] = state;
  53         p_inner_expn_brace_counts[inner_string_count] = brace_counts;
  54         brace_counts = 0;
  55         ++inner_string_count;
  56 }
  57
  58 // Restore the lexer state and brace count for the previous `#{}` interpolation
  59 // level upon returning to it.
  60 // Note the previous lexer state is the return value and needs to be restored
  61 // manually by the StyleContext.
  62 // Based on LexRuby.cxx.
  63 static int exitInnerExpression(int  *p_inner_string_types,
  64                                int  *p_inner_expn_brace_counts,
  65                                int&  inner_string_count,
  66                                int&  brace_counts
  67                               ) {
  68         --inner_string_count;
  69         brace_counts = p_inner_expn_brace_counts[inner_string_count];
  70         return p_inner_string_types[inner_string_count];
  71 }
  72
  73 // Preconditions: sc.currentPos points to a character after '+' or '-'.
  74 // The test for pos reaching 0 should be redundant,
  75 // and is in only for safety measures.
  76 // Limitation: this code will give the incorrect answer for code like
  77 // a = b+++/ptn/...
  78 // Putting a space between the '++' post-inc operator and the '+' binary op
  79 // fixes this, and is highly recommended for readability anyway.
  80 static bool FollowsPostfixOperator(StyleContext &sc, Accessor &styler) {
  81         Sci_Position pos = (Sci_Position) sc.currentPos;
  82         while (--pos > 0) {
  83                 char ch = styler[pos];
  84                 if (ch == '+' || ch == '-') {
  85                         return styler[pos - 1] == ch;
  86                 }
  87         }
  88         return false;
  89 }
  90
  91 static bool followsKeyword(StyleContext &sc, Accessor &styler) {
  92         Sci_Position pos = (Sci_Position) sc.currentPos;
  93         Sci_Position currentLine = styler.GetLine(pos);
  94         Sci_Position lineStartPos = styler.LineStart(currentLine);
  95         while (--pos > lineStartPos) {
  96                 char ch = styler.SafeGetCharAt(pos);
  97                 if (ch != ' ' && ch != '\t') {
  98                         break;
  99                 }
 100         }
 101         styler.Flush();
 102         return styler.StyleAt(pos) == SCE_COFFEESCRIPT_WORD;
 103 }
 104
 105 static void ColouriseCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
 106                             Accessor &styler) {
 107
 108         WordList &keywords = *keywordlists[0];
 109         WordList &keywords2 = *keywordlists[1];
 110         WordList &keywords4 = *keywordlists[3];
 111
 112         CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
 113         CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
 114
 115         CharacterSet setWordStart(CharacterSet::setAlpha, "_$@", 0x80, true);
 116         CharacterSet setWord(CharacterSet::setAlphaNum, "._$", 0x80, true);
 117
 118         int chPrevNonWhite = ' ';
 119         int visibleChars = 0;
 120
 121         // String/Regex interpolation variables, based on LexRuby.cxx.
 122         // In most cases a value of 2 should be ample for the code the user is
 123         // likely to enter. For example,
 124         //   "Filling the #{container} with #{liquid}..."
 125         // from the CoffeeScript homepage nests to a level of 2
 126         // If the user actually hits a 6th occurrence of '#{' in a double-quoted
 127         // string (including regexes), it will stay as a string.  The problem with
 128         // this is that quotes might flip, a 7th '#{' will look like a comment,
 129         // and code-folding might be wrong.
 130 #define INNER_STRINGS_MAX_COUNT 5
 131         // These vars track our instances of "...#{,,,'..#{,,,}...',,,}..."
 132         int inner_string_types[INNER_STRINGS_MAX_COUNT];
 133         // Track # braces when we push a new #{ thing
 134         int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
 135         int inner_string_count = 0;
 136         int brace_counts = 0;   // Number of #{ ... } things within an expression
 137         for (int i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
 138                 inner_string_types[i] = 0;
 139                 inner_expn_brace_counts[i] = 0;
 140         }
 141
 142         // look back to set chPrevNonWhite properly for better regex colouring
 143         Sci_Position endPos = startPos + length;
 144         if (startPos > 0 && IsSpaceEquiv(initStyle)) {
 145                 Sci_PositionU back = startPos;
 146                 styler.Flush();
 147                 while (back > 0 && IsSpaceEquiv(styler.StyleAt(--back)))
 148                         ;
 149                 if (styler.StyleAt(back) == SCE_COFFEESCRIPT_OPERATOR) {
 150                         chPrevNonWhite = styler.SafeGetCharAt(back);
 151                 }
 152                 if (startPos != back) {
 153                         initStyle = styler.StyleAt(back);
 154                         if (IsSpaceEquiv(initStyle)) {
 155                                 initStyle = SCE_COFFEESCRIPT_DEFAULT;
 156                         }
 157                 }
 158                 startPos = back;
 159         }
 160
 161         StyleContext sc(startPos, endPos - startPos, initStyle, styler);
 162
 163         for (; sc.More();) {
 164
 165                 if (sc.atLineStart) {
 166                         // Reset states to beginning of colourise so no surprises
 167                         // if different sets of lines lexed.
 168                         visibleChars = 0;
 169                 }
 170
 171                 // Determine if the current state should terminate.
 172                 switch (sc.state) {
 173                         case SCE_COFFEESCRIPT_OPERATOR:
 174                                 sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 175                                 break;
 176                         case SCE_COFFEESCRIPT_NUMBER:
 177                                 // We accept almost anything because of hex. and number suffixes
 178                                 if (!setWord.Contains(sc.ch) || sc.Match('.', '.')) {
 179                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 180                                 }
 181                                 break;
 182                         case SCE_COFFEESCRIPT_IDENTIFIER:
 183                                 if (!setWord.Contains(sc.ch) || (sc.ch == '.') || (sc.ch == '$')) {
 184                                         char s[1000];
 185                                         sc.GetCurrent(s, sizeof(s));
 186                                         if (keywords.InList(s)) {
 187                                                 sc.ChangeState(SCE_COFFEESCRIPT_WORD);
 188                                         } else if (keywords2.InList(s)) {
 189                                                 sc.ChangeState(SCE_COFFEESCRIPT_WORD2);
 190                                         } else if (keywords4.InList(s)) {
 191                                                 sc.ChangeState(SCE_COFFEESCRIPT_GLOBALCLASS);
 192                                         } else if (sc.LengthCurrent() > 0 && s[0] == '@') {
 193                                                 sc.ChangeState(SCE_COFFEESCRIPT_INSTANCEPROPERTY);
 194                                         }
 195                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 196                                 }
 197                                 break;
 198                         case SCE_COFFEESCRIPT_WORD:
 199                         case SCE_COFFEESCRIPT_WORD2:
 200                         case SCE_COFFEESCRIPT_GLOBALCLASS:
 201                         case SCE_COFFEESCRIPT_INSTANCEPROPERTY:
 202                                 if (!setWord.Contains(sc.ch)) {
 203                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 204                                 }
 205                                 break;
 206                         case SCE_COFFEESCRIPT_COMMENTLINE:
 207                                 if (sc.atLineStart) {
 208                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 209                                 }
 210                                 break;
 211                         case SCE_COFFEESCRIPT_STRING:
 212                                 if (sc.ch == '\\') {
 213                                         if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
 214                                                 sc.Forward();
 215                                         }
 216                                 } else if (sc.ch == '\"') {
 217                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 218                                 } else if (sc.ch == '#' && sc.chNext == '{' && inner_string_count < INNER_STRINGS_MAX_COUNT) {
 219                                         // process interpolated code #{ ... }
 220                                         enterInnerExpression(inner_string_types,
 221                                                              inner_expn_brace_counts,
 222                                                              inner_string_count,
 223                                                              sc.state,
 224                                                              brace_counts);
 225                                         sc.SetState(SCE_COFFEESCRIPT_OPERATOR);
 226                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 227                                 }
 228                                 break;
 229                         case SCE_COFFEESCRIPT_CHARACTER:
 230                                 if (sc.ch == '\\') {
 231                                         if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
 232                                                 sc.Forward();
 233                                         }
 234                                 } else if (sc.ch == '\'') {
 235                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 236                                 }
 237                                 break;
 238                         case SCE_COFFEESCRIPT_REGEX:
 239                                 if (sc.atLineStart) {
 240                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 241                                 } else if (sc.ch == '/') {
 242                                         sc.Forward();
 243                                         while ((sc.ch < 0x80) && islower(sc.ch))
 244                                                 sc.Forward();    // gobble regex flags
 245                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 246                                 } else if (sc.ch == '\\') {
 247                                         // Gobble up the quoted character
 248                                         if (sc.chNext == '\\' || sc.chNext == '/') {
 249                                                 sc.Forward();
 250                                         }
 251                                 }
 252                                 break;
 253                         case SCE_COFFEESCRIPT_STRINGEOL:
 254                                 if (sc.atLineStart) {
 255                                         sc.SetState(SCE_COFFEESCRIPT_DEFAULT);
 256                                 }
 257                                 break;
 258                         case SCE_COFFEESCRIPT_COMMENTBLOCK:
 259                                 if (sc.Match("###")) {
 260                                         sc.Forward();
 261                                         sc.Forward();
 262                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 263                                 } else if (sc.ch == '\\') {
 264                                         sc.Forward();
 265                                 }
 266                                 break;
 267                         case SCE_COFFEESCRIPT_VERBOSE_REGEX:
 268                                 if (sc.Match("///")) {
 269                                         sc.Forward();
 270                                         sc.Forward();
 271                                         sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT);
 272                                 } else if (sc.Match('#')) {
 273                                         sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT);
 274                                 } else if (sc.ch == '\\') {
 275                                         sc.Forward();
 276                                 }
 277                                 break;
 278                         case SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT:
 279                                 if (sc.atLineStart) {
 280                                         sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX);
 281                                 }
 282                                 break;
 283                 }
 284
 285                 // Determine if a new state should be entered.
 286                 if (sc.state == SCE_COFFEESCRIPT_DEFAULT) {
 287                         if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 288                                 sc.SetState(SCE_COFFEESCRIPT_NUMBER);
 289                         } else if (setWordStart.Contains(sc.ch)) {
 290                                 sc.SetState(SCE_COFFEESCRIPT_IDENTIFIER);
 291                         } else if (sc.Match("///")) {
 292                                 sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX);
 293                                 sc.Forward();
 294                                 sc.Forward();
 295                         } else if (sc.ch == '/'
 296                                    && (setOKBeforeRE.Contains(chPrevNonWhite)
 297                                        || followsKeyword(sc, styler))
 298                                    && (!setCouldBePostOp.Contains(chPrevNonWhite)
 299                                        || !FollowsPostfixOperator(sc, styler))) {
 300                                 sc.SetState(SCE_COFFEESCRIPT_REGEX);    // JavaScript's RegEx
 301                         } else if (sc.ch == '\"') {
 302                                 sc.SetState(SCE_COFFEESCRIPT_STRING);
 303                         } else if (sc.ch == '\'') {
 304                                 sc.SetState(SCE_COFFEESCRIPT_CHARACTER);
 305                         } else if (sc.ch == '#') {
 306                                 if (sc.Match("###")) {
 307                                         sc.SetState(SCE_COFFEESCRIPT_COMMENTBLOCK);
 308                                         sc.Forward();
 309                                         sc.Forward();
 310                                 } else {
 311                                         sc.SetState(SCE_COFFEESCRIPT_COMMENTLINE);
 312                                 }
 313                         } else if (isoperator(static_cast<char>(sc.ch))) {
 314                                 sc.SetState(SCE_COFFEESCRIPT_OPERATOR);
 315                                 // Handle '..' and '...' operators correctly.
 316                                 if (sc.ch == '.') {
 317                                         for (int i = 0; i < 2 && sc.chNext == '.'; i++, sc.Forward()) ;
 318                                 } else if (sc.ch == '{') {
 319                                         ++brace_counts;
 320                                 } else if (sc.ch == '}' && --brace_counts <= 0 && inner_string_count > 0) {
 321                                         // Return to previous state before #{ ... }
 322                                         sc.ForwardSetState(exitInnerExpression(inner_string_types,
 323                                                                                inner_expn_brace_counts,
 324                                                                                inner_string_count,
 325                                                                                brace_counts));
 326                                         continue; // skip sc.Forward() at loop end
 327                                 }
 328                         }
 329                 }
 330
 331                 if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) {
 332                         chPrevNonWhite = sc.ch;
 333                         visibleChars++;
 334                 }
 335                 sc.Forward();
 336         }
 337         sc.Complete();
 338 }
 339
 340 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
 341         Sci_Position pos = styler.LineStart(line);
 342         Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 343         for (Sci_Position i = pos; i < eol_pos; i++) {
 344                 char ch = styler[i];
 345                 if (ch == '#')
 346                         return true;
 347                 else if (ch != ' ' && ch != '\t')
 348                         return false;
 349         }
 350         return false;
 351 }
 352
 353 static void FoldCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int,
 354                                 WordList *[], Accessor &styler) {
 355         // A simplified version of FoldPyDoc
 356         const Sci_Position maxPos = startPos + length;
 357         const Sci_Position maxLines = styler.GetLine(maxPos - 1);             // Requested last line
 358         const Sci_Position docLines = styler.GetLine(styler.Length() - 1);  // Available last line
 359
 360         // property fold.coffeescript.comment
 361         const bool foldComment = styler.GetPropertyInt("fold.coffeescript.comment") != 0;
 362
 363         const bool foldCompact = styler.GetPropertyInt("fold.compact") != 0;
 364
 365         // Backtrack to previous non-blank line so we can determine indent level
 366         // for any white space lines
 367         // and so we can fix any preceding fold level (which is why we go back
 368         // at least one line in all cases)
 369         int spaceFlags = 0;
 370         Sci_Position lineCurrent = styler.GetLine(startPos);
 371         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 372         while (lineCurrent > 0) {
 373                 lineCurrent--;
 374                 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 375                 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)
 376                     && !IsCommentLine(lineCurrent, styler))
 377                         break;
 378         }
 379         int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 380
 381         // Set up initial loop state
 382         int prevComment = 0;
 383         if (lineCurrent >= 1)
 384                 prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
 385
 386         // Process all characters to end of requested range
 387         // or comment that hangs over the end of the range.  Cap processing in all cases
 388         // to end of document (in case of comment at end).
 389         while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevComment)) {
 390
 391                 // Gather info
 392                 int lev = indentCurrent;
 393                 Sci_Position lineNext = lineCurrent + 1;
 394                 int indentNext = indentCurrent;
 395                 if (lineNext <= docLines) {
 396                         // Information about next line is only available if not at end of document
 397                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 398                 }
 399                 const int comment = foldComment && IsCommentLine(lineCurrent, styler);
 400                 const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
 401                                            IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE));
 402                 const int comment_continue = (comment && prevComment);
 403                 if (!comment)
 404                         indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 405                 if (indentNext & SC_FOLDLEVELWHITEFLAG)
 406                         indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
 407
 408                 if (comment_start) {
 409                         // Place fold point at start of a block of comments
 410                         lev |= SC_FOLDLEVELHEADERFLAG;
 411                 } else if (comment_continue) {
 412                         // Add level to rest of lines in the block
 413                         lev = lev + 1;
 414                 }
 415
 416                 // Skip past any blank lines for next indent level info; we skip also
 417                 // comments (all comments, not just those starting in column 0)
 418                 // which effectively folds them into surrounding code rather
 419                 // than screwing up folding.
 420
 421                 while ((lineNext < docLines) &&
 422                         ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
 423                          (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
 424
 425                         lineNext++;
 426                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 427                 }
 428
 429                 const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
 430                 const int levelBeforeComments = Platform::Maximum(indentCurrentLevel,levelAfterComments);
 431
 432                 // Now set all the indent levels on the lines we skipped
 433                 // Do this from end to start.  Once we encounter one line
 434                 // which is indented more than the line after the end of
 435                 // the comment-block, use the level of the block before
 436
 437                 Sci_Position skipLine = lineNext;
 438                 int skipLevel = levelAfterComments;
 439
 440                 while (--skipLine > lineCurrent) {
 441                         int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
 442
 443                         if (foldCompact) {
 444                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
 445                                         skipLevel = levelBeforeComments;
 446
 447                                 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
 448
 449                                 styler.SetLevel(skipLine, skipLevel | whiteFlag);
 450                         } else {
 451                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
 452                                         !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
 453                                         !IsCommentLine(skipLine, styler))
 454                                         skipLevel = levelBeforeComments;
 455
 456                                 styler.SetLevel(skipLine, skipLevel);
 457                         }
 458                 }
 459
 460                 // Set fold header on non-comment line
 461                 if (!comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
 462                         if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
 463                                 lev |= SC_FOLDLEVELHEADERFLAG;
 464                 }
 465
 466                 // Keep track of block comment state of previous line
 467                 prevComment = comment_start || comment_continue;
 468
 469                 // Set fold level for this line and move to next line
 470                 styler.SetLevel(lineCurrent, lev);
 471                 indentCurrent = indentNext;
 472                 lineCurrent = lineNext;
 473         }
 474 }
 475
 476 static const char *const csWordLists[] = {
 477             "Keywords",
 478             "Secondary keywords",
 479             "Unused",
 480             "Global classes",
 481             0,
 482 };
 483
 484 LexerModule lmCoffeeScript(SCLEX_COFFEESCRIPT, ColouriseCoffeeScriptDoc, "coffeescript", FoldCoffeeScriptDoc, csWordLists);