scintilla/lexers/LexBasic.cxx

   1 // Scintilla source code edit control
   2 /** @file LexBasic.cxx
   3  ** Lexer for BlitzBasic and PureBasic.
   4  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
   7 // The License.txt file describes the conditions under which this software may be distributed.
   8
   9 // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
  10 // and derivatives. Once they diverge enough, might want to split it into multiple
  11 // lexers for more code clearity.
  12 //
  13 // Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
  14
  15 // Folding only works for simple things like functions or types.
  16
  17 // You may want to have a look at my ctags lexer as well, if you additionally to coloring
  18 // and folding need to extract things like label tags in your editor.
  19
  20 #include <stdlib.h>
  21 #include <string.h>
  22 #include <stdio.h>
  23 #include <stdarg.h>
  24 #include <assert.h>
  25 #include <ctype.h>
  26
  27 #include <string>
  28 #include <map>
  29
  30 #include "ILexer.h"
  31 #include "Scintilla.h"
  32 #include "SciLexer.h"
  33
  34 #include "WordList.h"
  35 #include "LexAccessor.h"
  36 #include "StyleContext.h"
  37 #include "CharacterSet.h"
  38 #include "LexerModule.h"
  39 #include "OptionSet.h"
  40
  41 #ifdef SCI_NAMESPACE
  42 using namespace Scintilla;
  43 #endif
  44
  45 /* Bits:
  46  * 1  - whitespace
  47  * 2  - operator
  48  * 4  - identifier
  49  * 8  - decimal digit
  50  * 16 - hex digit
  51  * 32 - bin digit
  52  * 64 - letter
  53  */
  54 static int character_classification[128] =
  55 {
  56                 0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0,
  57                 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  58                 1,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  10, 2,
  59          60, 60, 28, 28, 28, 28, 28, 28, 28, 28,  2,  2,  2,  2,  2,  2,
  60                 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
  61          68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,  2,  2,  2,  2, 68,
  62                 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
  63          68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,  2,  2,  2,  2,  0
  64 };
  65
  66 static bool IsSpace(int c) {
  67         return c < 128 && (character_classification[c] & 1);
  68 }
  69
  70 static bool IsOperator(int c) {
  71         return c < 128 && (character_classification[c] & 2);
  72 }
  73
  74 static bool IsIdentifier(int c) {
  75         return c < 128 && (character_classification[c] & 4);
  76 }
  77
  78 static bool IsDigit(int c) {
  79         return c < 128 && (character_classification[c] & 8);
  80 }
  81
  82 static bool IsHexDigit(int c) {
  83         return c < 128 && (character_classification[c] & 16);
  84 }
  85
  86 static bool IsBinDigit(int c) {
  87         return c < 128 && (character_classification[c] & 32);
  88 }
  89
  90 static bool IsLetter(int c) {
  91         return c < 128 && (character_classification[c] & 64);
  92 }
  93
  94 static int LowerCase(int c)
  95 {
  96         if (c >= 'A' && c <= 'Z')
  97                 return 'a' + c - 'A';
  98         return c;
  99 }
 100
 101 static int CheckBlitzFoldPoint(char const *token, int &level) {
 102         if (!strcmp(token, "function") ||
 103                 !strcmp(token, "type")) {
 104                 level |= SC_FOLDLEVELHEADERFLAG;
 105                 return 1;
 106         }
 107         if (!strcmp(token, "end function") ||
 108                 !strcmp(token, "end type")) {
 109                 return -1;
 110         }
 111         return 0;
 112 }
 113
 114 static int CheckPureFoldPoint(char const *token, int &level) {
 115         if (!strcmp(token, "procedure") ||
 116                 !strcmp(token, "enumeration") ||
 117                 !strcmp(token, "interface") ||
 118                 !strcmp(token, "structure")) {
 119                 level |= SC_FOLDLEVELHEADERFLAG;
 120                 return 1;
 121         }
 122         if (!strcmp(token, "endprocedure") ||
 123                 !strcmp(token, "endenumeration") ||
 124                 !strcmp(token, "endinterface") ||
 125                 !strcmp(token, "endstructure")) {
 126                 return -1;
 127         }
 128         return 0;
 129 }
 130
 131 static int CheckFreeFoldPoint(char const *token, int &level) {
 132         if (!strcmp(token, "function") ||
 133                 !strcmp(token, "sub") ||
 134                 !strcmp(token, "enum") ||
 135                 !strcmp(token, "type") ||
 136                 !strcmp(token, "union") ||
 137                 !strcmp(token, "property") ||
 138                 !strcmp(token, "destructor") ||
 139                 !strcmp(token, "constructor")) {
 140                 level |= SC_FOLDLEVELHEADERFLAG;
 141                 return 1;
 142         }
 143         if (!strcmp(token, "end function") ||
 144                 !strcmp(token, "end sub") ||
 145                 !strcmp(token, "end enum") ||
 146                 !strcmp(token, "end type") ||
 147                 !strcmp(token, "end union") ||
 148                 !strcmp(token, "end property") ||
 149                 !strcmp(token, "end destructor") ||
 150                 !strcmp(token, "end constructor")) {
 151                 return -1;
 152         }
 153         return 0;
 154 }
 155
 156 // An individual named option for use in an OptionSet
 157
 158 // Options used for LexerBasic
 159 struct OptionsBasic {
 160         bool fold;
 161         bool foldSyntaxBased;
 162         bool foldCommentExplicit;
 163         std::string foldExplicitStart;
 164         std::string foldExplicitEnd;
 165         bool foldExplicitAnywhere;
 166         bool foldCompact;
 167         OptionsBasic() {
 168                 fold = false;
 169                 foldSyntaxBased = true;
 170                 foldCommentExplicit = false;
 171                 foldExplicitStart = "";
 172                 foldExplicitEnd   = "";
 173                 foldExplicitAnywhere = false;
 174                 foldCompact = true;
 175         }
 176 };
 177
 178 static const char * const blitzbasicWordListDesc[] = {
 179         "BlitzBasic Keywords",
 180         "user1",
 181         "user2",
 182         "user3",
 183         0
 184 };
 185
 186 static const char * const purebasicWordListDesc[] = {
 187         "PureBasic Keywords",
 188         "PureBasic PreProcessor Keywords",
 189         "user defined 1",
 190         "user defined 2",
 191         0
 192 };
 193
 194 static const char * const freebasicWordListDesc[] = {
 195         "FreeBasic Keywords",
 196         "FreeBasic PreProcessor Keywords",
 197         "user defined 1",
 198         "user defined 2",
 199         0
 200 };
 201
 202 struct OptionSetBasic : public OptionSet<OptionsBasic> {
 203         OptionSetBasic(const char * const wordListDescriptions[]) {
 204                 DefineProperty("fold", &OptionsBasic::fold);
 205
 206                 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
 207                         "Set this property to 0 to disable syntax based folding.");
 208
 209                 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
 210                         "This option enables folding explicit fold points when using the Basic lexer. "
 211                         "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
 212                         "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
 213
 214                 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
 215                         "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
 216
 217                 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
 218                         "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
 219
 220                 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
 221                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 222
 223                 DefineProperty("fold.compact", &OptionsBasic::foldCompact);
 224
 225                 DefineWordListSets(wordListDescriptions);
 226         }
 227 };
 228
 229 class LexerBasic : public ILexer {
 230         char comment_char;
 231         int (*CheckFoldPoint)(char const *, int &);
 232         WordList keywordlists[4];
 233         OptionsBasic options;
 234         OptionSetBasic osBasic;
 235 public:
 236         LexerBasic(char comment_char_, int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) :
 237                                                  comment_char(comment_char_),
 238                                                  CheckFoldPoint(CheckFoldPoint_),
 239                                                  osBasic(wordListDescriptions) {
 240         }
 241         virtual ~LexerBasic() {
 242         }
 243         void SCI_METHOD Release() {
 244                 delete this;
 245         }
 246         int SCI_METHOD Version() const {
 247                 return lvOriginal;
 248         }
 249         const char * SCI_METHOD PropertyNames() {
 250                 return osBasic.PropertyNames();
 251         }
 252         int SCI_METHOD PropertyType(const char *name) {
 253                 return osBasic.PropertyType(name);
 254         }
 255         const char * SCI_METHOD DescribeProperty(const char *name) {
 256                 return osBasic.DescribeProperty(name);
 257         }
 258         int SCI_METHOD PropertySet(const char *key, const char *val);
 259         const char * SCI_METHOD DescribeWordListSets() {
 260                 return osBasic.DescribeWordListSets();
 261         }
 262         int SCI_METHOD WordListSet(int n, const char *wl);
 263         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 264         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 265
 266         void * SCI_METHOD PrivateCall(int, void *) {
 267                 return 0;
 268         }
 269         static ILexer *LexerFactoryBlitzBasic() {
 270                 return new LexerBasic(';', CheckBlitzFoldPoint, blitzbasicWordListDesc);
 271         }
 272         static ILexer *LexerFactoryPureBasic() {
 273                 return new LexerBasic(';', CheckPureFoldPoint, purebasicWordListDesc);
 274         }
 275         static ILexer *LexerFactoryFreeBasic() {
 276                 return new LexerBasic('\'', CheckFreeFoldPoint, freebasicWordListDesc );
 277         }
 278 };
 279
 280 int SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) {
 281         if (osBasic.PropertySet(&options, key, val)) {
 282                 return 0;
 283         }
 284         return -1;
 285 }
 286
 287 int SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
 288         WordList *wordListN = 0;
 289         switch (n) {
 290         case 0:
 291                 wordListN = &keywordlists[0];
 292                 break;
 293         case 1:
 294                 wordListN = &keywordlists[1];
 295                 break;
 296         case 2:
 297                 wordListN = &keywordlists[2];
 298                 break;
 299         case 3:
 300                 wordListN = &keywordlists[3];
 301                 break;
 302         }
 303         int firstModification = -1;
 304         if (wordListN) {
 305                 WordList wlNew;
 306                 wlNew.Set(wl);
 307                 if (*wordListN != wlNew) {
 308                         wordListN->Set(wl);
 309                         firstModification = 0;
 310                 }
 311         }
 312         return firstModification;
 313 }
 314
 315 void SCI_METHOD LexerBasic::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 316         LexAccessor styler(pAccess);
 317
 318         bool wasfirst = true, isfirst = true; // true if first token in a line
 319         styler.StartAt(startPos);
 320         int styleBeforeKeyword = SCE_B_DEFAULT;
 321
 322         StyleContext sc(startPos, length, initStyle, styler);
 323
 324         // Can't use sc.More() here else we miss the last character
 325         for (; ; sc.Forward()) {
 326                 if (sc.state == SCE_B_IDENTIFIER) {
 327                         if (!IsIdentifier(sc.ch)) {
 328                                 // Labels
 329                                 if (wasfirst && sc.Match(':')) {
 330                                         sc.ChangeState(SCE_B_LABEL);
 331                                         sc.ForwardSetState(SCE_B_DEFAULT);
 332                                 } else {
 333                                         char s[100];
 334                                         int kstates[4] = {
 335                                                 SCE_B_KEYWORD,
 336                                                 SCE_B_KEYWORD2,
 337                                                 SCE_B_KEYWORD3,
 338                                                 SCE_B_KEYWORD4,
 339                                         };
 340                                         sc.GetCurrentLowered(s, sizeof(s));
 341                                         for (int i = 0; i < 4; i++) {
 342                                                 if (keywordlists[i].InList(s)) {
 343                                                         sc.ChangeState(kstates[i]);
 344                                                 }
 345                                         }
 346                                         // Types, must set them as operator else they will be
 347                                         // matched as number/constant
 348                                         if (sc.Match('.') || sc.Match('$') || sc.Match('%') ||
 349                                                 sc.Match('#')) {
 350                                                 sc.SetState(SCE_B_OPERATOR);
 351                                         } else {
 352                                                 sc.SetState(SCE_B_DEFAULT);
 353                                         }
 354                                 }
 355                         }
 356                 } else if (sc.state == SCE_B_OPERATOR) {
 357                         if (!IsOperator(sc.ch) || sc.Match('#'))
 358                                 sc.SetState(SCE_B_DEFAULT);
 359                 } else if (sc.state == SCE_B_LABEL) {
 360                         if (!IsIdentifier(sc.ch))
 361                                 sc.SetState(SCE_B_DEFAULT);
 362                 } else if (sc.state == SCE_B_CONSTANT) {
 363                         if (!IsIdentifier(sc.ch))
 364                                 sc.SetState(SCE_B_DEFAULT);
 365                 } else if (sc.state == SCE_B_NUMBER) {
 366                         if (!IsDigit(sc.ch))
 367                                 sc.SetState(SCE_B_DEFAULT);
 368                 } else if (sc.state == SCE_B_HEXNUMBER) {
 369                         if (!IsHexDigit(sc.ch))
 370                                 sc.SetState(SCE_B_DEFAULT);
 371                 } else if (sc.state == SCE_B_BINNUMBER) {
 372                         if (!IsBinDigit(sc.ch))
 373                                 sc.SetState(SCE_B_DEFAULT);
 374                 } else if (sc.state == SCE_B_STRING) {
 375                         if (sc.ch == '"') {
 376                                 sc.ForwardSetState(SCE_B_DEFAULT);
 377                         }
 378                         if (sc.atLineEnd) {
 379                                 sc.ChangeState(SCE_B_ERROR);
 380                                 sc.SetState(SCE_B_DEFAULT);
 381                         }
 382                 } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) {
 383                         if (sc.atLineEnd) {
 384                                 sc.SetState(SCE_B_DEFAULT);
 385                         }
 386                 } else if (sc.state == SCE_B_DOCLINE) {
 387                         if (sc.atLineEnd) {
 388                                 sc.SetState(SCE_B_DEFAULT);
 389                         } else if (sc.ch == '\\' || sc.ch == '@') {
 390                                 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
 391                                         styleBeforeKeyword = sc.state;
 392                                         sc.SetState(SCE_B_DOCKEYWORD);
 393                                 };
 394                         }
 395                 } else if (sc.state == SCE_B_DOCKEYWORD) {
 396                         if (IsSpace(sc.ch)) {
 397                                 sc.SetState(styleBeforeKeyword);
 398                         }       else if (sc.atLineEnd && styleBeforeKeyword == SCE_B_DOCLINE) {
 399                                 sc.SetState(SCE_B_DEFAULT);
 400                         }
 401                 } else if (sc.state == SCE_B_COMMENTBLOCK) {
 402                         if (sc.Match("\'/")) {
 403                                 sc.Forward();
 404                                 sc.ForwardSetState(SCE_B_DEFAULT);
 405                         }
 406                 } else if (sc.state == SCE_B_DOCBLOCK) {
 407                         if (sc.Match("\'/")) {
 408                                 sc.Forward();
 409                                 sc.ForwardSetState(SCE_B_DEFAULT);
 410                         } else if (sc.ch == '\\' || sc.ch == '@') {
 411                                 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
 412                                         styleBeforeKeyword = sc.state;
 413                                         sc.SetState(SCE_B_DOCKEYWORD);
 414                                 };
 415                         }
 416                 }
 417
 418                 if (sc.atLineStart)
 419                         isfirst = true;
 420
 421                 if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) {
 422                         if (isfirst && sc.Match('.') && comment_char != '\'') {
 423                                         sc.SetState(SCE_B_LABEL);
 424                         } else if (isfirst && sc.Match('#')) {
 425                                 wasfirst = isfirst;
 426                                 sc.SetState(SCE_B_IDENTIFIER);
 427                         } else if (sc.Match(comment_char)) {
 428                                 // Hack to make deprecated QBASIC '$Include show
 429                                 // up in freebasic with SCE_B_PREPROCESSOR.
 430                                 if (comment_char == '\'' && sc.Match(comment_char, '$'))
 431                                         sc.SetState(SCE_B_PREPROCESSOR);
 432                                 else if (sc.Match("\'*") || sc.Match("\'!")) {
 433                                         sc.SetState(SCE_B_DOCLINE);
 434                                 } else {
 435                                         sc.SetState(SCE_B_COMMENT);
 436                                 }
 437                         } else if (sc.Match("/\'")) {
 438                                 if (sc.Match("/\'*") || sc.Match("/\'!")) {     // Support of gtk-doc/Doxygen doc. style
 439                                         sc.SetState(SCE_B_DOCBLOCK);
 440                                 } else {
 441                                         sc.SetState(SCE_B_COMMENTBLOCK);
 442                                 }
 443                                 sc.Forward();   // Eat the ' so it isn't used for the end of the comment
 444                         } else if (sc.Match('"')) {
 445                                 sc.SetState(SCE_B_STRING);
 446                         } else if (IsDigit(sc.ch)) {
 447                                 sc.SetState(SCE_B_NUMBER);
 448                         } else if (sc.Match('$') || sc.Match("&h") || sc.Match("&H") || sc.Match("&o") || sc.Match("&O")) {
 449                                 sc.SetState(SCE_B_HEXNUMBER);
 450                         } else if (sc.Match('%') || sc.Match("&b") || sc.Match("&B")) {
 451                                 sc.SetState(SCE_B_BINNUMBER);
 452                         } else if (sc.Match('#')) {
 453                                 sc.SetState(SCE_B_CONSTANT);
 454                         } else if (IsOperator(sc.ch)) {
 455                                 sc.SetState(SCE_B_OPERATOR);
 456                         } else if (IsIdentifier(sc.ch)) {
 457                                 wasfirst = isfirst;
 458                                 sc.SetState(SCE_B_IDENTIFIER);
 459                         } else if (!IsSpace(sc.ch)) {
 460                                 sc.SetState(SCE_B_ERROR);
 461                         }
 462                 }
 463
 464                 if (!IsSpace(sc.ch))
 465                         isfirst = false;
 466
 467                 if (!sc.More())
 468                         break;
 469         }
 470         sc.Complete();
 471 }
 472
 473
 474 void SCI_METHOD LexerBasic::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
 475
 476         if (!options.fold)
 477                 return;
 478
 479         LexAccessor styler(pAccess);
 480
 481         int line = styler.GetLine(startPos);
 482         int level = styler.LevelAt(line);
 483         int go = 0, done = 0;
 484         int endPos = startPos + length;
 485         char word[256];
 486         int wordlen = 0;
 487         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 488         int cNext = styler[startPos];
 489
 490         // Scan for tokens at the start of the line (they may include
 491         // whitespace, for tokens like "End Function"
 492         for (int i = startPos; i < endPos; i++) {
 493                 int c = cNext;
 494                 cNext = styler.SafeGetCharAt(i + 1);
 495                 bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n');
 496                 if (options.foldSyntaxBased && !done && !go) {
 497                         if (wordlen) { // are we scanning a token already?
 498                                 word[wordlen] = static_cast<char>(LowerCase(c));
 499                                 if (!IsIdentifier(c)) { // done with token
 500                                         word[wordlen] = '\0';
 501                                         go = CheckFoldPoint(word, level);
 502                                         if (!go) {
 503                                                 // Treat any whitespace as single blank, for
 504                                                 // things like "End   Function".
 505                                                 if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) {
 506                                                         word[wordlen] = ' ';
 507                                                         if (wordlen < 255)
 508                                                                 wordlen++;
 509                                                 }
 510                                                 else // done with this line
 511                                                         done = 1;
 512                                         }
 513                                 } else if (wordlen < 255) {
 514                                         wordlen++;
 515                                 }
 516                         } else { // start scanning at first non-whitespace character
 517                                 if (!IsSpace(c)) {
 518                                         if (IsIdentifier(c)) {
 519                                                 word[0] = static_cast<char>(LowerCase(c));
 520                                                 wordlen = 1;
 521                                         } else // done with this line
 522                                                 done = 1;
 523                                 }
 524                         }
 525                 }
 526                 if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) {
 527                         if (userDefinedFoldMarkers) {
 528                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 529                                         level |= SC_FOLDLEVELHEADERFLAG;
 530                                         go = 1;
 531                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 532                                         go = -1;
 533                                 }
 534                         } else {
 535                                 if (c == comment_char) {
 536                                         if (cNext == '{') {
 537                                                 level |= SC_FOLDLEVELHEADERFLAG;
 538                                                 go = 1;
 539                                         } else if (cNext == '}') {
 540                                                 go = -1;
 541                                         }
 542                                 }
 543                         }
 544                 }
 545                 if (atEOL) { // line end
 546                         if (!done && wordlen == 0 && options.foldCompact) // line was only space
 547                                 level |= SC_FOLDLEVELWHITEFLAG;
 548                         if (level != styler.LevelAt(line))
 549                                 styler.SetLevel(line, level);
 550                         level += go;
 551                         line++;
 552                         // reset state
 553                         wordlen = 0;
 554                         level &= ~SC_FOLDLEVELHEADERFLAG;
 555                         level &= ~SC_FOLDLEVELWHITEFLAG;
 556                         go = 0;
 557                         done = 0;
 558                 }
 559         }
 560 }
 561
 562 LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
 563
 564 LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
 565
 566 LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);