scintilla/lexers/LexD.cxx

   1 /** @file LexD.cxx
   2  ** Lexer for D.
   3  **
   4  ** Copyright (c) 2006 by Waldemar Augustyn <waldemar@wdmsys.com>
   5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   6  **/
   7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "StyleContext.h"
  27 #include "CharacterSet.h"
  28 #include "LexerModule.h"
  29 #include "OptionSet.h"
  30
  31 #ifdef SCI_NAMESPACE
  32 using namespace Scintilla;
  33 #endif
  34
  35 /* Nested comments require keeping the value of the nesting level for every
  36    position in the document.  But since scintilla always styles line by line,
  37    we only need to store one value per line. The non-negative number indicates
  38    nesting level at the end of the line.
  39 */
  40
  41 // Underscore, letter, digit and universal alphas from C99 Appendix D.
  42
  43 static bool IsWordStart(int ch) {
  44         return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
  45 }
  46
  47 static bool IsWord(int ch) {
  48         return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
  49 }
  50
  51 static bool IsDoxygen(int ch) {
  52         if (IsASCII(ch) && islower(ch))
  53                 return true;
  54         if (ch == '$' || ch == '@' || ch == '\\' ||
  55                 ch == '&' || ch == '#' || ch == '<' || ch == '>' ||
  56                 ch == '{' || ch == '}' || ch == '[' || ch == ']')
  57                 return true;
  58         return false;
  59 }
  60
  61 static bool IsStringSuffix(int ch) {
  62         return ch == 'c' || ch == 'w' || ch == 'd';
  63 }
  64
  65 static bool IsStreamCommentStyle(int style) {
  66         return style == SCE_D_COMMENT ||
  67                 style == SCE_D_COMMENTDOC ||
  68                 style == SCE_D_COMMENTDOCKEYWORD ||
  69                 style == SCE_D_COMMENTDOCKEYWORDERROR;
  70 }
  71
  72 // An individual named option for use in an OptionSet
  73
  74 // Options used for LexerD
  75 struct OptionsD {
  76         bool fold;
  77         bool foldSyntaxBased;
  78         bool foldComment;
  79         bool foldCommentMultiline;
  80         bool foldCommentExplicit;
  81         std::string foldExplicitStart;
  82         std::string foldExplicitEnd;
  83         bool foldExplicitAnywhere;
  84         bool foldCompact;
  85         int  foldAtElseInt;
  86         bool foldAtElse;
  87         OptionsD() {
  88                 fold = false;
  89                 foldSyntaxBased = true;
  90                 foldComment = false;
  91                 foldCommentMultiline = true;
  92                 foldCommentExplicit = true;
  93                 foldExplicitStart = "";
  94                 foldExplicitEnd   = "";
  95                 foldExplicitAnywhere = false;
  96                 foldCompact = true;
  97                 foldAtElseInt = -1;
  98                 foldAtElse = false;
  99         }
 100 };
 101
 102 static const char * const dWordLists[] = {
 103                         "Primary keywords and identifiers",
 104                         "Secondary keywords and identifiers",
 105                         "Documentation comment keywords",
 106                         "Type definitions and aliases",
 107                         "Keywords 5",
 108                         "Keywords 6",
 109                         "Keywords 7",
 110                         0,
 111                 };
 112
 113 struct OptionSetD : public OptionSet<OptionsD> {
 114         OptionSetD() {
 115                 DefineProperty("fold", &OptionsD::fold);
 116
 117                 DefineProperty("fold.d.syntax.based", &OptionsD::foldSyntaxBased,
 118                         "Set this property to 0 to disable syntax based folding.");
 119
 120                 DefineProperty("fold.comment", &OptionsD::foldComment);
 121
 122                 DefineProperty("fold.d.comment.multiline", &OptionsD::foldCommentMultiline,
 123                         "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
 124
 125                 DefineProperty("fold.d.comment.explicit", &OptionsD::foldCommentExplicit,
 126                         "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
 127
 128                 DefineProperty("fold.d.explicit.start", &OptionsD::foldExplicitStart,
 129                         "The string to use for explicit fold start points, replacing the standard //{.");
 130
 131                 DefineProperty("fold.d.explicit.end", &OptionsD::foldExplicitEnd,
 132                         "The string to use for explicit fold end points, replacing the standard //}.");
 133
 134                 DefineProperty("fold.d.explicit.anywhere", &OptionsD::foldExplicitAnywhere,
 135                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 136
 137                 DefineProperty("fold.compact", &OptionsD::foldCompact);
 138
 139                 DefineProperty("lexer.d.fold.at.else", &OptionsD::foldAtElseInt,
 140                         "This option enables D folding on a \"} else {\" line of an if statement.");
 141
 142                 DefineProperty("fold.at.else", &OptionsD::foldAtElse);
 143
 144                 DefineWordListSets(dWordLists);
 145         }
 146 };
 147
 148 class LexerD : public ILexer {
 149         bool caseSensitive;
 150         WordList keywords;
 151         WordList keywords2;
 152         WordList keywords3;
 153         WordList keywords4;
 154         WordList keywords5;
 155         WordList keywords6;
 156         WordList keywords7;
 157         OptionsD options;
 158         OptionSetD osD;
 159 public:
 160         LexerD(bool caseSensitive_) :
 161                 caseSensitive(caseSensitive_) {
 162         }
 163         virtual ~LexerD() {
 164         }
 165         void SCI_METHOD Release() {
 166                 delete this;
 167         }
 168         int SCI_METHOD Version() const {
 169                 return lvOriginal;
 170         }
 171         const char * SCI_METHOD PropertyNames() {
 172                 return osD.PropertyNames();
 173         }
 174         int SCI_METHOD PropertyType(const char *name) {
 175                 return osD.PropertyType(name);
 176         }
 177         const char * SCI_METHOD DescribeProperty(const char *name) {
 178                 return osD.DescribeProperty(name);
 179         }
 180         int SCI_METHOD PropertySet(const char *key, const char *val);
 181         const char * SCI_METHOD DescribeWordListSets() {
 182                 return osD.DescribeWordListSets();
 183         }
 184         int SCI_METHOD WordListSet(int n, const char *wl);
 185         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 186         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 187
 188         void * SCI_METHOD PrivateCall(int, void *) {
 189                 return 0;
 190         }
 191
 192         static ILexer *LexerFactoryD() {
 193                 return new LexerD(true);
 194         }
 195         static ILexer *LexerFactoryDInsensitive() {
 196                 return new LexerD(false);
 197         }
 198 };
 199
 200 int SCI_METHOD LexerD::PropertySet(const char *key, const char *val) {
 201         if (osD.PropertySet(&options, key, val)) {
 202                 return 0;
 203         }
 204         return -1;
 205 }
 206
 207 int SCI_METHOD LexerD::WordListSet(int n, const char *wl) {
 208         WordList *wordListN = 0;
 209         switch (n) {
 210         case 0:
 211                 wordListN = &keywords;
 212                 break;
 213         case 1:
 214                 wordListN = &keywords2;
 215                 break;
 216         case 2:
 217                 wordListN = &keywords3;
 218                 break;
 219         case 3:
 220                 wordListN = &keywords4;
 221                 break;
 222         case 4:
 223                 wordListN = &keywords5;
 224                 break;
 225         case 5:
 226                 wordListN = &keywords6;
 227                 break;
 228         case 6:
 229                 wordListN = &keywords7;
 230                 break;
 231         }
 232         int firstModification = -1;
 233         if (wordListN) {
 234                 WordList wlNew;
 235                 wlNew.Set(wl);
 236                 if (*wordListN != wlNew) {
 237                         wordListN->Set(wl);
 238                         firstModification = 0;
 239                 }
 240         }
 241         return firstModification;
 242 }
 243
 244 void SCI_METHOD LexerD::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 245         LexAccessor styler(pAccess);
 246
 247         int styleBeforeDCKeyword = SCE_D_DEFAULT;
 248
 249         StyleContext sc(startPos, length, initStyle, styler);
 250
 251         int curLine = styler.GetLine(startPos);
 252         int curNcLevel = curLine > 0? styler.GetLineState(curLine-1): 0;
 253         bool numFloat = false; // Float literals have '+' and '-' signs
 254         bool numHex = false;
 255
 256         for (; sc.More(); sc.Forward()) {
 257
 258                 if (sc.atLineStart) {
 259                         curLine = styler.GetLine(sc.currentPos);
 260                         styler.SetLineState(curLine, curNcLevel);
 261                 }
 262
 263                 // Determine if the current state should terminate.
 264                 switch (sc.state) {
 265                         case SCE_D_OPERATOR:
 266                                 sc.SetState(SCE_D_DEFAULT);
 267                                 break;
 268                         case SCE_D_NUMBER:
 269                                 // We accept almost anything because of hex. and number suffixes
 270                                 if (IsASCII(sc.ch) && (isalnum(sc.ch) || sc.ch == '_')) {
 271                                         continue;
 272                                 } else if (sc.ch == '.' && sc.chNext != '.' && !numFloat) {
 273                                         // Don't parse 0..2 as number.
 274                                         numFloat=true;
 275                                         continue;
 276                                 } else if ( ( sc.ch == '-' || sc.ch == '+' ) && (               /*sign and*/
 277                                         ( !numHex && ( sc.chPrev == 'e' || sc.chPrev == 'E' ) ) || /*decimal or*/
 278                                         ( sc.chPrev == 'p' || sc.chPrev == 'P' ) ) ) {          /*hex*/
 279                                         // Parse exponent sign in float literals: 2e+10 0x2e+10
 280                                         continue;
 281                                 } else {
 282                                         sc.SetState(SCE_D_DEFAULT);
 283                                 }
 284                                 break;
 285                         case SCE_D_IDENTIFIER:
 286                                 if (!IsWord(sc.ch)) {
 287                                         char s[1000];
 288                                         if (caseSensitive) {
 289                                                 sc.GetCurrent(s, sizeof(s));
 290                                         } else {
 291                                                 sc.GetCurrentLowered(s, sizeof(s));
 292                                         }
 293                                         if (keywords.InList(s)) {
 294                                                 sc.ChangeState(SCE_D_WORD);
 295                                         } else if (keywords2.InList(s)) {
 296                                                 sc.ChangeState(SCE_D_WORD2);
 297                                         } else if (keywords4.InList(s)) {
 298                                                 sc.ChangeState(SCE_D_TYPEDEF);
 299                                         } else if (keywords5.InList(s)) {
 300                                                 sc.ChangeState(SCE_D_WORD5);
 301                                         } else if (keywords6.InList(s)) {
 302                                                 sc.ChangeState(SCE_D_WORD6);
 303                                         } else if (keywords7.InList(s)) {
 304                                                 sc.ChangeState(SCE_D_WORD7);
 305                                         }
 306                                         sc.SetState(SCE_D_DEFAULT);
 307                                 }
 308                                 break;
 309                         case SCE_D_COMMENT:
 310                                 if (sc.Match('*', '/')) {
 311                                         sc.Forward();
 312                                         sc.ForwardSetState(SCE_D_DEFAULT);
 313                                 }
 314                                 break;
 315                         case SCE_D_COMMENTDOC:
 316                                 if (sc.Match('*', '/')) {
 317                                         sc.Forward();
 318                                         sc.ForwardSetState(SCE_D_DEFAULT);
 319                                 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
 320                                         // Verify that we have the conditions to mark a comment-doc-keyword
 321                                         if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
 322                                                 styleBeforeDCKeyword = SCE_D_COMMENTDOC;
 323                                                 sc.SetState(SCE_D_COMMENTDOCKEYWORD);
 324                                         }
 325                                 }
 326                                 break;
 327                         case SCE_D_COMMENTLINE:
 328                                 if (sc.atLineStart) {
 329                                         sc.SetState(SCE_D_DEFAULT);
 330                                 }
 331                                 break;
 332                         case SCE_D_COMMENTLINEDOC:
 333                                 if (sc.atLineStart) {
 334                                         sc.SetState(SCE_D_DEFAULT);
 335                                 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
 336                                         // Verify that we have the conditions to mark a comment-doc-keyword
 337                                         if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
 338                                                 styleBeforeDCKeyword = SCE_D_COMMENTLINEDOC;
 339                                                 sc.SetState(SCE_D_COMMENTDOCKEYWORD);
 340                                         }
 341                                 }
 342                                 break;
 343                         case SCE_D_COMMENTDOCKEYWORD:
 344                                 if ((styleBeforeDCKeyword == SCE_D_COMMENTDOC) && sc.Match('*', '/')) {
 345                                         sc.ChangeState(SCE_D_COMMENTDOCKEYWORDERROR);
 346                                         sc.Forward();
 347                                         sc.ForwardSetState(SCE_D_DEFAULT);
 348                                 } else if (!IsDoxygen(sc.ch)) {
 349                                         char s[100];
 350                                         if (caseSensitive) {
 351                                                 sc.GetCurrent(s, sizeof(s));
 352                                         } else {
 353                                                 sc.GetCurrentLowered(s, sizeof(s));
 354                                         }
 355                                         if (!IsASpace(sc.ch) || !keywords3.InList(s + 1)) {
 356                                                 sc.ChangeState(SCE_D_COMMENTDOCKEYWORDERROR);
 357                                         }
 358                                         sc.SetState(styleBeforeDCKeyword);
 359                                 }
 360                                 break;
 361                         case SCE_D_COMMENTNESTED:
 362                                 if (sc.Match('+', '/')) {
 363                                         if (curNcLevel > 0)
 364                                                 curNcLevel -= 1;
 365                                         curLine = styler.GetLine(sc.currentPos);
 366                                         styler.SetLineState(curLine, curNcLevel);
 367                                         sc.Forward();
 368                                         if (curNcLevel == 0) {
 369                                                 sc.ForwardSetState(SCE_D_DEFAULT);
 370                                         }
 371                                 } else if (sc.Match('/','+')) {
 372                                         curNcLevel += 1;
 373                                         curLine = styler.GetLine(sc.currentPos);
 374                                         styler.SetLineState(curLine, curNcLevel);
 375                                         sc.Forward();
 376                                 }
 377                                 break;
 378                         case SCE_D_STRING:
 379                                 if (sc.ch == '\\') {
 380                                         if (sc.chNext == '"' || sc.chNext == '\\') {
 381                                                 sc.Forward();
 382                                         }
 383                                 } else if (sc.ch == '"') {
 384                                         if(IsStringSuffix(sc.chNext))
 385                                                 sc.Forward();
 386                                         sc.ForwardSetState(SCE_D_DEFAULT);
 387                                 }
 388                                 break;
 389                         case SCE_D_CHARACTER:
 390                                 if (sc.atLineEnd) {
 391                                         sc.ChangeState(SCE_D_STRINGEOL);
 392                                 } else if (sc.ch == '\\') {
 393                                         if (sc.chNext == '\'' || sc.chNext == '\\') {
 394                                                 sc.Forward();
 395                                         }
 396                                 } else if (sc.ch == '\'') {
 397                                         // Char has no suffixes
 398                                         sc.ForwardSetState(SCE_D_DEFAULT);
 399                                 }
 400                                 break;
 401                         case SCE_D_STRINGEOL:
 402                                 if (sc.atLineStart) {
 403                                         sc.SetState(SCE_D_DEFAULT);
 404                                 }
 405                                 break;
 406                         case SCE_D_STRINGB:
 407                                 if (sc.ch == '`') {
 408                                         if(IsStringSuffix(sc.chNext))
 409                                                 sc.Forward();
 410                                         sc.ForwardSetState(SCE_D_DEFAULT);
 411                                 }
 412                                 break;
 413                         case SCE_D_STRINGR:
 414                                 if (sc.ch == '"') {
 415                                         if(IsStringSuffix(sc.chNext))
 416                                                 sc.Forward();
 417                                         sc.ForwardSetState(SCE_D_DEFAULT);
 418                                 }
 419                                 break;
 420                 }
 421
 422                 // Determine if a new state should be entered.
 423                 if (sc.state == SCE_D_DEFAULT) {
 424                         if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 425                                 sc.SetState(SCE_D_NUMBER);
 426                                 numFloat = sc.ch == '.';
 427                                 // Remember hex literal
 428                                 numHex = sc.ch == '0' && ( sc.chNext == 'x' || sc.chNext == 'X' );
 429                         } else if ( (sc.ch == 'r' || sc.ch == 'x' || sc.ch == 'q')
 430                                 && sc.chNext == '"' ) {
 431                                 // Limited support for hex and delimited strings: parse as r""
 432                                 sc.SetState(SCE_D_STRINGR);
 433                                 sc.Forward();
 434                         } else if (IsWordStart(sc.ch) || sc.ch == '$') {
 435                                 sc.SetState(SCE_D_IDENTIFIER);
 436                         } else if (sc.Match('/','+')) {
 437                                 curNcLevel += 1;
 438                                 curLine = styler.GetLine(sc.currentPos);
 439                                 styler.SetLineState(curLine, curNcLevel);
 440                                 sc.SetState(SCE_D_COMMENTNESTED);
 441                                 sc.Forward();
 442                         } else if (sc.Match('/', '*')) {
 443                                 if (sc.Match("/**") || sc.Match("/*!")) {   // Support of Qt/Doxygen doc. style
 444                                         sc.SetState(SCE_D_COMMENTDOC);
 445                                 } else {
 446                                         sc.SetState(SCE_D_COMMENT);
 447                                 }
 448                                 sc.Forward();   // Eat the * so it isn't used for the end of the comment
 449                         } else if (sc.Match('/', '/')) {
 450                                 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
 451                                         // Support of Qt/Doxygen doc. style
 452                                         sc.SetState(SCE_D_COMMENTLINEDOC);
 453                                 else
 454                                         sc.SetState(SCE_D_COMMENTLINE);
 455                         } else if (sc.ch == '"') {
 456                                 sc.SetState(SCE_D_STRING);
 457                         } else if (sc.ch == '\'') {
 458                                 sc.SetState(SCE_D_CHARACTER);
 459                         } else if (sc.ch == '`') {
 460                                 sc.SetState(SCE_D_STRINGB);
 461                         } else if (isoperator(static_cast<char>(sc.ch))) {
 462                                 sc.SetState(SCE_D_OPERATOR);
 463                                 if (sc.ch == '.' && sc.chNext == '.') sc.Forward(); // Range operator
 464                         }
 465                 }
 466         }
 467         sc.Complete();
 468 }
 469
 470 // Store both the current line's fold level and the next lines in the
 471 // level store to make it easy to pick up with each increment
 472 // and to make it possible to fiddle the current level for "} else {".
 473
 474 void SCI_METHOD LexerD::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 475
 476         if (!options.fold)
 477                 return;
 478
 479         LexAccessor styler(pAccess);
 480
 481         unsigned int endPos = startPos + length;
 482         int visibleChars = 0;
 483         int lineCurrent = styler.GetLine(startPos);
 484         int levelCurrent = SC_FOLDLEVELBASE;
 485         if (lineCurrent > 0)
 486                 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
 487         int levelMinCurrent = levelCurrent;
 488         int levelNext = levelCurrent;
 489         char chNext = styler[startPos];
 490         int styleNext = styler.StyleAt(startPos);
 491         int style = initStyle;
 492         bool foldAtElse = options.foldAtElseInt >= 0 ? options.foldAtElseInt != 0 : options.foldAtElse;
 493         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 494         for (unsigned int i = startPos; i < endPos; i++) {
 495                 char ch = chNext;
 496                 chNext = styler.SafeGetCharAt(i + 1);
 497                 int stylePrev = style;
 498                 style = styleNext;
 499                 styleNext = styler.StyleAt(i + 1);
 500                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
 501                 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style)) {
 502                         if (!IsStreamCommentStyle(stylePrev)) {
 503                                 levelNext++;
 504                         } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
 505                                 // Comments don't end at end of line and the next character may be unstyled.
 506                                 levelNext--;
 507                         }
 508                 }
 509                 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_D_COMMENTLINE) || options.foldExplicitAnywhere)) {
 510                         if (userDefinedFoldMarkers) {
 511                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 512                                         levelNext++;
 513                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 514                                         levelNext--;
 515                                 }
 516                         } else {
 517                                 if ((ch == '/') && (chNext == '/')) {
 518                                         char chNext2 = styler.SafeGetCharAt(i + 2);
 519                                         if (chNext2 == '{') {
 520                                                 levelNext++;
 521                                         } else if (chNext2 == '}') {
 522                                                 levelNext--;
 523                                         }
 524                                 }
 525                         }
 526                 }
 527                 if (options.foldSyntaxBased && (style == SCE_D_OPERATOR)) {
 528                         if (ch == '{') {
 529                                 // Measure the minimum before a '{' to allow
 530                                 // folding on "} else {"
 531                                 if (levelMinCurrent > levelNext) {
 532                                         levelMinCurrent = levelNext;
 533                                 }
 534                                 levelNext++;
 535                         } else if (ch == '}') {
 536                                 levelNext--;
 537                         }
 538                 }
 539                 if (atEOL || (i == endPos-1)) {
 540                         if (options.foldComment && options.foldCommentMultiline) {  // Handle nested comments
 541                                 int nc;
 542                                 nc =  styler.GetLineState(lineCurrent);
 543                                 nc -= lineCurrent>0? styler.GetLineState(lineCurrent-1): 0;
 544                                 levelNext += nc;
 545                         }
 546                         int levelUse = levelCurrent;
 547                         if (options.foldSyntaxBased && foldAtElse) {
 548                                 levelUse = levelMinCurrent;
 549                         }
 550                         int lev = levelUse | levelNext << 16;
 551                         if (visibleChars == 0 && options.foldCompact)
 552                                 lev |= SC_FOLDLEVELWHITEFLAG;
 553                         if (levelUse < levelNext)
 554                                 lev |= SC_FOLDLEVELHEADERFLAG;
 555                         if (lev != styler.LevelAt(lineCurrent)) {
 556                                 styler.SetLevel(lineCurrent, lev);
 557                         }
 558                         lineCurrent++;
 559                         levelCurrent = levelNext;
 560                         levelMinCurrent = levelCurrent;
 561                         visibleChars = 0;
 562                 }
 563                 if (!IsASpace(ch))
 564                         visibleChars++;
 565         }
 566 }
 567
 568 LexerModule lmD(SCLEX_D, LexerD::LexerFactoryD, "d", dWordLists);