ext/scintilla/lexers/LexBasic.cxx

   1 // Scintilla source code edit control
   2 /** @file LexBasic.cxx
   3  ** Lexer for BlitzBasic and PureBasic.
   4  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
   7 // The License.txt file describes the conditions under which this software may be distributed.
   8
   9 // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
  10 // and derivatives. Once they diverge enough, might want to split it into multiple
  11 // lexers for more code clearity.
  12 //
  13 // Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
  14
  15 // Folding only works for simple things like functions or types.
  16
  17 // You may want to have a look at my ctags lexer as well, if you additionally to coloring
  18 // and folding need to extract things like label tags in your editor.
  19
  20 #include <stdlib.h>
  21 #include <string.h>
  22 #include <stdio.h>
  23 #include <stdarg.h>
  24 #include <assert.h>
  25 #include <ctype.h>
  26
  27 #ifdef _MSC_VER
  28 #pragma warning(disable: 4786)
  29 #endif
  30
  31 #include <string>
  32 #include <map>
  33
  34 #include "ILexer.h"
  35 #include "Scintilla.h"
  36 #include "SciLexer.h"
  37
  38 #include "WordList.h"
  39 #include "LexAccessor.h"
  40 #include "StyleContext.h"
  41 #include "CharacterSet.h"
  42 #include "LexerModule.h"
  43 #include "OptionSet.h"
  44
  45 #ifdef SCI_NAMESPACE
  46 using namespace Scintilla;
  47 #endif
  48
  49 /* Bits:
  50  * 1  - whitespace
  51  * 2  - operator
  52  * 4  - identifier
  53  * 8  - decimal digit
  54  * 16 - hex digit
  55  * 32 - bin digit
  56  */
  57 static int character_classification[128] =
  58 {
  59     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0,
  60     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  61     1,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  10, 2,
  62     60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2,  2,  2,  2,  2,  2,
  63     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
  64     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  4,
  65     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
  66     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  0
  67 };
  68
  69 static bool IsSpace(int c) {
  70         return c < 128 && (character_classification[c] & 1);
  71 }
  72
  73 static bool IsOperator(int c) {
  74         return c < 128 && (character_classification[c] & 2);
  75 }
  76
  77 static bool IsIdentifier(int c) {
  78         return c < 128 && (character_classification[c] & 4);
  79 }
  80
  81 static bool IsDigit(int c) {
  82         return c < 128 && (character_classification[c] & 8);
  83 }
  84
  85 static bool IsHexDigit(int c) {
  86         return c < 128 && (character_classification[c] & 16);
  87 }
  88
  89 static bool IsBinDigit(int c) {
  90         return c < 128 && (character_classification[c] & 32);
  91 }
  92
  93 static int LowerCase(int c)
  94 {
  95         if (c >= 'A' && c <= 'Z')
  96                 return 'a' + c - 'A';
  97         return c;
  98 }
  99
 100 static int CheckBlitzFoldPoint(char const *token, int &level) {
 101         if (!strcmp(token, "function") ||
 102                 !strcmp(token, "type")) {
 103                 level |= SC_FOLDLEVELHEADERFLAG;
 104                 return 1;
 105         }
 106         if (!strcmp(token, "end function") ||
 107                 !strcmp(token, "end type")) {
 108                 return -1;
 109         }
 110         return 0;
 111 }
 112
 113 static int CheckPureFoldPoint(char const *token, int &level) {
 114         if (!strcmp(token, "procedure") ||
 115                 !strcmp(token, "enumeration") ||
 116                 !strcmp(token, "interface") ||
 117                 !strcmp(token, "structure")) {
 118                 level |= SC_FOLDLEVELHEADERFLAG;
 119                 return 1;
 120         }
 121         if (!strcmp(token, "endprocedure") ||
 122                 !strcmp(token, "endenumeration") ||
 123                 !strcmp(token, "endinterface") ||
 124                 !strcmp(token, "endstructure")) {
 125                 return -1;
 126         }
 127         return 0;
 128 }
 129
 130 static int CheckFreeFoldPoint(char const *token, int &level) {
 131         if (!strcmp(token, "function") ||
 132                 !strcmp(token, "sub") ||
 133                 !strcmp(token, "type")) {
 134                 level |= SC_FOLDLEVELHEADERFLAG;
 135                 return 1;
 136         }
 137         if (!strcmp(token, "end function") ||
 138                 !strcmp(token, "end sub") ||
 139                 !strcmp(token, "end type")) {
 140                 return -1;
 141         }
 142         return 0;
 143 }
 144
 145 // An individual named option for use in an OptionSet
 146
 147 // Options used for LexerBasic
 148 struct OptionsBasic {
 149         bool fold;
 150         bool foldSyntaxBased;
 151         bool foldCommentExplicit;
 152         std::string foldExplicitStart;
 153         std::string foldExplicitEnd;
 154         bool foldExplicitAnywhere;
 155         bool foldCompact;
 156         OptionsBasic() {
 157                 fold = false;
 158                 foldSyntaxBased = true;
 159                 foldCommentExplicit = false;
 160                 foldExplicitStart = "";
 161                 foldExplicitEnd   = "";
 162                 foldExplicitAnywhere = false;
 163                 foldCompact = true;
 164         }
 165 };
 166
 167 static const char * const blitzbasicWordListDesc[] = {
 168         "BlitzBasic Keywords",
 169         "user1",
 170         "user2",
 171         "user3",
 172         0
 173 };
 174
 175 static const char * const purebasicWordListDesc[] = {
 176         "PureBasic Keywords",
 177         "PureBasic PreProcessor Keywords",
 178         "user defined 1",
 179         "user defined 2",
 180         0
 181 };
 182
 183 static const char * const freebasicWordListDesc[] = {
 184         "FreeBasic Keywords",
 185         "FreeBasic PreProcessor Keywords",
 186         "user defined 1",
 187         "user defined 2",
 188         0
 189 };
 190
 191 struct OptionSetBasic : public OptionSet<OptionsBasic> {
 192         OptionSetBasic(const char * const wordListDescriptions[]) {
 193                 DefineProperty("fold", &OptionsBasic::fold);
 194
 195                 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
 196                         "Set this property to 0 to disable syntax based folding.");
 197
 198                 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
 199                         "This option enables folding explicit fold points when using the Basic lexer. "
 200                         "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
 201                         "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
 202
 203                 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
 204                         "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
 205
 206                 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
 207                         "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
 208
 209                 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
 210                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 211
 212                 DefineProperty("fold.compact", &OptionsBasic::foldCompact);
 213
 214                 DefineWordListSets(wordListDescriptions);
 215         }
 216 };
 217
 218 class LexerBasic : public ILexer {
 219         char comment_char;
 220         int (*CheckFoldPoint)(char const *, int &);
 221         WordList keywordlists[4];
 222         OptionsBasic options;
 223         OptionSetBasic osBasic;
 224 public:
 225         LexerBasic(char comment_char_, int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) :
 226                    comment_char(comment_char_),
 227                    CheckFoldPoint(CheckFoldPoint_),
 228                    osBasic(wordListDescriptions) {
 229         }
 230         ~LexerBasic() {
 231         }
 232         void SCI_METHOD Release() {
 233                 delete this;
 234         }
 235         int SCI_METHOD Version() const {
 236                 return lvOriginal;
 237         }
 238         const char * SCI_METHOD PropertyNames() {
 239                 return osBasic.PropertyNames();
 240         }
 241         int SCI_METHOD PropertyType(const char *name) {
 242                 return osBasic.PropertyType(name);
 243         }
 244         const char * SCI_METHOD DescribeProperty(const char *name) {
 245                 return osBasic.DescribeProperty(name);
 246         }
 247         int SCI_METHOD PropertySet(const char *key, const char *val);
 248         const char * SCI_METHOD DescribeWordListSets() {
 249                 return osBasic.DescribeWordListSets();
 250         }
 251         int SCI_METHOD WordListSet(int n, const char *wl);
 252         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 253         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 254
 255         void * SCI_METHOD PrivateCall(int, void *) {
 256                 return 0;
 257         }
 258         static ILexer *LexerFactoryBlitzBasic() {
 259                 return new LexerBasic(';', CheckBlitzFoldPoint, blitzbasicWordListDesc);
 260         }
 261         static ILexer *LexerFactoryPureBasic() {
 262                 return new LexerBasic(';', CheckPureFoldPoint, purebasicWordListDesc);
 263         }
 264         static ILexer *LexerFactoryFreeBasic() {
 265                 return new LexerBasic('\'', CheckFreeFoldPoint, freebasicWordListDesc );
 266         }
 267 };
 268
 269 int SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) {
 270         if (osBasic.PropertySet(&options, key, val)) {
 271                 return 0;
 272         }
 273         return -1;
 274 }
 275
 276 int SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
 277         WordList *wordListN = 0;
 278         switch (n) {
 279         case 0:
 280                 wordListN = &keywordlists[0];
 281                 break;
 282         case 1:
 283                 wordListN = &keywordlists[1];
 284                 break;
 285         case 2:
 286                 wordListN = &keywordlists[2];
 287                 break;
 288         case 3:
 289                 wordListN = &keywordlists[3];
 290                 break;
 291         }
 292         int firstModification = -1;
 293         if (wordListN) {
 294                 WordList wlNew;
 295                 wlNew.Set(wl);
 296                 if (*wordListN != wlNew) {
 297                         wordListN->Set(wl);
 298                         firstModification = 0;
 299                 }
 300         }
 301         return firstModification;
 302 }
 303
 304 void SCI_METHOD LexerBasic::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 305         LexAccessor styler(pAccess);
 306
 307         bool wasfirst = true, isfirst = true; // true if first token in a line
 308         styler.StartAt(startPos);
 309
 310         StyleContext sc(startPos, length, initStyle, styler);
 311
 312         // Can't use sc.More() here else we miss the last character
 313         for (; ; sc.Forward()) {
 314                 if (sc.state == SCE_B_IDENTIFIER) {
 315                         if (!IsIdentifier(sc.ch)) {
 316                                 // Labels
 317                                 if (wasfirst && sc.Match(':')) {
 318                                         sc.ChangeState(SCE_B_LABEL);
 319                                         sc.ForwardSetState(SCE_B_DEFAULT);
 320                                 } else {
 321                                         char s[100];
 322                                         int kstates[4] = {
 323                                                 SCE_B_KEYWORD,
 324                                                 SCE_B_KEYWORD2,
 325                                                 SCE_B_KEYWORD3,
 326                                                 SCE_B_KEYWORD4,
 327                                         };
 328                                         sc.GetCurrentLowered(s, sizeof(s));
 329                                         for (int i = 0; i < 4; i++) {
 330                                                 if (keywordlists[i].InList(s)) {
 331                                                         sc.ChangeState(kstates[i]);
 332                                                 }
 333                                         }
 334                                         // Types, must set them as operator else they will be
 335                                         // matched as number/constant
 336                                         if (sc.Match('.') || sc.Match('$') || sc.Match('%') ||
 337                                                 sc.Match('#')) {
 338                                                 sc.SetState(SCE_B_OPERATOR);
 339                                         } else {
 340                                                 sc.SetState(SCE_B_DEFAULT);
 341                                         }
 342                                 }
 343                         }
 344                 } else if (sc.state == SCE_B_OPERATOR) {
 345                         if (!IsOperator(sc.ch) || sc.Match('#'))
 346                                 sc.SetState(SCE_B_DEFAULT);
 347                 } else if (sc.state == SCE_B_LABEL) {
 348                         if (!IsIdentifier(sc.ch))
 349                                 sc.SetState(SCE_B_DEFAULT);
 350                 } else if (sc.state == SCE_B_CONSTANT) {
 351                         if (!IsIdentifier(sc.ch))
 352                                 sc.SetState(SCE_B_DEFAULT);
 353                 } else if (sc.state == SCE_B_NUMBER) {
 354                         if (!IsDigit(sc.ch))
 355                                 sc.SetState(SCE_B_DEFAULT);
 356                 } else if (sc.state == SCE_B_HEXNUMBER) {
 357                         if (!IsHexDigit(sc.ch))
 358                                 sc.SetState(SCE_B_DEFAULT);
 359                 } else if (sc.state == SCE_B_BINNUMBER) {
 360                         if (!IsBinDigit(sc.ch))
 361                                 sc.SetState(SCE_B_DEFAULT);
 362                 } else if (sc.state == SCE_B_STRING) {
 363                         if (sc.ch == '"') {
 364                                 sc.ForwardSetState(SCE_B_DEFAULT);
 365                         }
 366                         if (sc.atLineEnd) {
 367                                 sc.ChangeState(SCE_B_ERROR);
 368                                 sc.SetState(SCE_B_DEFAULT);
 369                         }
 370                 } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) {
 371                         if (sc.atLineEnd) {
 372                                 sc.SetState(SCE_B_DEFAULT);
 373                         }
 374                 }
 375
 376                 if (sc.atLineStart)
 377                         isfirst = true;
 378
 379                 if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) {
 380                         if (isfirst && sc.Match('.')) {
 381                                 sc.SetState(SCE_B_LABEL);
 382                         } else if (isfirst && sc.Match('#')) {
 383                                 wasfirst = isfirst;
 384                                 sc.SetState(SCE_B_IDENTIFIER);
 385                         } else if (sc.Match(comment_char)) {
 386                                 // Hack to make deprecated QBASIC '$Include show
 387                                 // up in freebasic with SCE_B_PREPROCESSOR.
 388                                 if (comment_char == '\'' && sc.Match(comment_char, '$'))
 389                                         sc.SetState(SCE_B_PREPROCESSOR);
 390                                 else
 391                                         sc.SetState(SCE_B_COMMENT);
 392                         } else if (sc.Match('"')) {
 393                                 sc.SetState(SCE_B_STRING);
 394                         } else if (IsDigit(sc.ch)) {
 395                                 sc.SetState(SCE_B_NUMBER);
 396                         } else if (sc.Match('$')) {
 397                                 sc.SetState(SCE_B_HEXNUMBER);
 398                         } else if (sc.Match('%')) {
 399                                 sc.SetState(SCE_B_BINNUMBER);
 400                         } else if (sc.Match('#')) {
 401                                 sc.SetState(SCE_B_CONSTANT);
 402                         } else if (IsOperator(sc.ch)) {
 403                                 sc.SetState(SCE_B_OPERATOR);
 404                         } else if (IsIdentifier(sc.ch)) {
 405                                 wasfirst = isfirst;
 406                                 sc.SetState(SCE_B_IDENTIFIER);
 407                         } else if (!IsSpace(sc.ch)) {
 408                                 sc.SetState(SCE_B_ERROR);
 409                         }
 410                 }
 411
 412                 if (!IsSpace(sc.ch))
 413                         isfirst = false;
 414
 415                 if (!sc.More())
 416                         break;
 417         }
 418         sc.Complete();
 419 }
 420
 421
 422 void SCI_METHOD LexerBasic::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
 423
 424         if (!options.fold)
 425                 return;
 426
 427         LexAccessor styler(pAccess);
 428
 429         int line = styler.GetLine(startPos);
 430         int level = styler.LevelAt(line);
 431         int go = 0, done = 0;
 432         int endPos = startPos + length;
 433         char word[256];
 434         int wordlen = 0;
 435         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 436         int cNext = styler[startPos];
 437
 438         // Scan for tokens at the start of the line (they may include
 439         // whitespace, for tokens like "End Function"
 440         for (int i = startPos; i < endPos; i++) {
 441                 int c = cNext;
 442                 cNext = styler.SafeGetCharAt(i + 1);
 443                 bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n');
 444                 if (options.foldSyntaxBased && !done && !go) {
 445                         if (wordlen) { // are we scanning a token already?
 446                                 word[wordlen] = static_cast<char>(LowerCase(c));
 447                                 if (!IsIdentifier(c)) { // done with token
 448                                         word[wordlen] = '\0';
 449                                         go = CheckFoldPoint(word, level);
 450                                         if (!go) {
 451                                                 // Treat any whitespace as single blank, for
 452                                                 // things like "End   Function".
 453                                                 if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) {
 454                                                         word[wordlen] = ' ';
 455                                                         if (wordlen < 255)
 456                                                                 wordlen++;
 457                                                 }
 458                                                 else // done with this line
 459                                                         done = 1;
 460                                         }
 461                                 } else if (wordlen < 255) {
 462                                         wordlen++;
 463                                 }
 464                         } else { // start scanning at first non-whitespace character
 465                                 if (!IsSpace(c)) {
 466                                         if (IsIdentifier(c)) {
 467                                                 word[0] = static_cast<char>(LowerCase(c));
 468                                                 wordlen = 1;
 469                                         } else // done with this line
 470                                                 done = 1;
 471                                 }
 472                         }
 473                 }
 474                 if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) {
 475                         if (userDefinedFoldMarkers) {
 476                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 477                                         level |= SC_FOLDLEVELHEADERFLAG;
 478                                         go = 1;
 479                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 480                                         go = -1;
 481                                 }
 482                         } else {
 483                                 if (c == comment_char) {
 484                                         if (cNext == '{') {
 485                                                 level |= SC_FOLDLEVELHEADERFLAG;
 486                                                 go = 1;
 487                                         } else if (cNext == '}') {
 488                                                 go = -1;
 489                                         }
 490                                 }
 491                         }
 492                 }
 493                 if (atEOL) { // line end
 494                         if (!done && wordlen == 0 && options.foldCompact) // line was only space
 495                                 level |= SC_FOLDLEVELWHITEFLAG;
 496                         if (level != styler.LevelAt(line))
 497                                 styler.SetLevel(line, level);
 498                         level += go;
 499                         line++;
 500                         // reset state
 501                         wordlen = 0;
 502                         level &= ~SC_FOLDLEVELHEADERFLAG;
 503                         level &= ~SC_FOLDLEVELWHITEFLAG;
 504                         go = 0;
 505                         done = 0;
 506                 }
 507         }
 508 }
 509
 510 LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
 511
 512 LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
 513
 514 LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);