1 // Scintilla source code edit control
3 ** Lexer for BlitzBasic and PureBasic.
4 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
10 // and derivatives. Once they diverge enough, might want to split it into multiple
11 // lexers for more code clearity.
13 // Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
15 // Folding only works for simple things like functions or types.
17 // You may want to have a look at my ctags lexer as well, if you additionally to coloring
18 // and folding need to extract things like label tags in your editor.
31 #include "Scintilla.h"
35 #include "LexAccessor.h"
36 #include "StyleContext.h"
37 #include "CharacterSet.h"
38 #include "LexerModule.h"
39 #include "OptionSet.h"
42 using namespace Scintilla
;
54 static int character_classification
[128] =
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2,
59 60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2, 2,
60 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
61 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 68,
62 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
63 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 0
66 static bool IsSpace(int c
) {
67 return c
< 128 && (character_classification
[c
] & 1);
70 static bool IsOperator(int c
) {
71 return c
< 128 && (character_classification
[c
] & 2);
74 static bool IsIdentifier(int c
) {
75 return c
< 128 && (character_classification
[c
] & 4);
78 static bool IsDigit(int c
) {
79 return c
< 128 && (character_classification
[c
] & 8);
82 static bool IsHexDigit(int c
) {
83 return c
< 128 && (character_classification
[c
] & 16);
86 static bool IsBinDigit(int c
) {
87 return c
< 128 && (character_classification
[c
] & 32);
90 static bool IsLetter(int c
) {
91 return c
< 128 && (character_classification
[c
] & 64);
94 static int LowerCase(int c
)
96 if (c
>= 'A' && c
<= 'Z')
101 static int CheckBlitzFoldPoint(char const *token
, int &level
) {
102 if (!strcmp(token
, "function") ||
103 !strcmp(token
, "type")) {
104 level
|= SC_FOLDLEVELHEADERFLAG
;
107 if (!strcmp(token
, "end function") ||
108 !strcmp(token
, "end type")) {
114 static int CheckPureFoldPoint(char const *token
, int &level
) {
115 if (!strcmp(token
, "procedure") ||
116 !strcmp(token
, "enumeration") ||
117 !strcmp(token
, "interface") ||
118 !strcmp(token
, "structure")) {
119 level
|= SC_FOLDLEVELHEADERFLAG
;
122 if (!strcmp(token
, "endprocedure") ||
123 !strcmp(token
, "endenumeration") ||
124 !strcmp(token
, "endinterface") ||
125 !strcmp(token
, "endstructure")) {
131 static int CheckFreeFoldPoint(char const *token
, int &level
) {
132 if (!strcmp(token
, "function") ||
133 !strcmp(token
, "sub") ||
134 !strcmp(token
, "enum") ||
135 !strcmp(token
, "type") ||
136 !strcmp(token
, "union") ||
137 !strcmp(token
, "property") ||
138 !strcmp(token
, "destructor") ||
139 !strcmp(token
, "constructor")) {
140 level
|= SC_FOLDLEVELHEADERFLAG
;
143 if (!strcmp(token
, "end function") ||
144 !strcmp(token
, "end sub") ||
145 !strcmp(token
, "end enum") ||
146 !strcmp(token
, "end type") ||
147 !strcmp(token
, "end union") ||
148 !strcmp(token
, "end property") ||
149 !strcmp(token
, "end destructor") ||
150 !strcmp(token
, "end constructor")) {
156 // An individual named option for use in an OptionSet
158 // Options used for LexerBasic
159 struct OptionsBasic
{
161 bool foldSyntaxBased
;
162 bool foldCommentExplicit
;
163 std::string foldExplicitStart
;
164 std::string foldExplicitEnd
;
165 bool foldExplicitAnywhere
;
169 foldSyntaxBased
= true;
170 foldCommentExplicit
= false;
171 foldExplicitStart
= "";
172 foldExplicitEnd
= "";
173 foldExplicitAnywhere
= false;
178 static const char * const blitzbasicWordListDesc
[] = {
179 "BlitzBasic Keywords",
186 static const char * const purebasicWordListDesc
[] = {
187 "PureBasic Keywords",
188 "PureBasic PreProcessor Keywords",
194 static const char * const freebasicWordListDesc
[] = {
195 "FreeBasic Keywords",
196 "FreeBasic PreProcessor Keywords",
202 struct OptionSetBasic
: public OptionSet
<OptionsBasic
> {
203 OptionSetBasic(const char * const wordListDescriptions
[]) {
204 DefineProperty("fold", &OptionsBasic::fold
);
206 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased
,
207 "Set this property to 0 to disable syntax based folding.");
209 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit
,
210 "This option enables folding explicit fold points when using the Basic lexer. "
211 "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
212 "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
214 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart
,
215 "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
217 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd
,
218 "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
220 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere
,
221 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
223 DefineProperty("fold.compact", &OptionsBasic::foldCompact
);
225 DefineWordListSets(wordListDescriptions
);
229 class LexerBasic
: public ILexer
{
231 int (*CheckFoldPoint
)(char const *, int &);
232 WordList keywordlists
[4];
233 OptionsBasic options
;
234 OptionSetBasic osBasic
;
236 LexerBasic(char comment_char_
, int (*CheckFoldPoint_
)(char const *, int &), const char * const wordListDescriptions
[]) :
237 comment_char(comment_char_
),
238 CheckFoldPoint(CheckFoldPoint_
),
239 osBasic(wordListDescriptions
) {
241 virtual ~LexerBasic() {
243 void SCI_METHOD
Release() {
246 int SCI_METHOD
Version() const {
249 const char * SCI_METHOD
PropertyNames() {
250 return osBasic
.PropertyNames();
252 int SCI_METHOD
PropertyType(const char *name
) {
253 return osBasic
.PropertyType(name
);
255 const char * SCI_METHOD
DescribeProperty(const char *name
) {
256 return osBasic
.DescribeProperty(name
);
258 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
);
259 const char * SCI_METHOD
DescribeWordListSets() {
260 return osBasic
.DescribeWordListSets();
262 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
);
263 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
264 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
266 void * SCI_METHOD
PrivateCall(int, void *) {
269 static ILexer
*LexerFactoryBlitzBasic() {
270 return new LexerBasic(';', CheckBlitzFoldPoint
, blitzbasicWordListDesc
);
272 static ILexer
*LexerFactoryPureBasic() {
273 return new LexerBasic(';', CheckPureFoldPoint
, purebasicWordListDesc
);
275 static ILexer
*LexerFactoryFreeBasic() {
276 return new LexerBasic('\'', CheckFreeFoldPoint
, freebasicWordListDesc
);
280 Sci_Position SCI_METHOD
LexerBasic::PropertySet(const char *key
, const char *val
) {
281 if (osBasic
.PropertySet(&options
, key
, val
)) {
287 Sci_Position SCI_METHOD
LexerBasic::WordListSet(int n
, const char *wl
) {
288 WordList
*wordListN
= 0;
291 wordListN
= &keywordlists
[0];
294 wordListN
= &keywordlists
[1];
297 wordListN
= &keywordlists
[2];
300 wordListN
= &keywordlists
[3];
303 Sci_Position firstModification
= -1;
307 if (*wordListN
!= wlNew
) {
309 firstModification
= 0;
312 return firstModification
;
315 void SCI_METHOD
LexerBasic::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
316 LexAccessor
styler(pAccess
);
318 bool wasfirst
= true, isfirst
= true; // true if first token in a line
319 styler
.StartAt(startPos
);
320 int styleBeforeKeyword
= SCE_B_DEFAULT
;
322 StyleContext
sc(startPos
, length
, initStyle
, styler
);
324 // Can't use sc.More() here else we miss the last character
325 for (; ; sc
.Forward()) {
326 if (sc
.state
== SCE_B_IDENTIFIER
) {
327 if (!IsIdentifier(sc
.ch
)) {
329 if (wasfirst
&& sc
.Match(':')) {
330 sc
.ChangeState(SCE_B_LABEL
);
331 sc
.ForwardSetState(SCE_B_DEFAULT
);
340 sc
.GetCurrentLowered(s
, sizeof(s
));
341 for (int i
= 0; i
< 4; i
++) {
342 if (keywordlists
[i
].InList(s
)) {
343 sc
.ChangeState(kstates
[i
]);
346 // Types, must set them as operator else they will be
347 // matched as number/constant
348 if (sc
.Match('.') || sc
.Match('$') || sc
.Match('%') ||
350 sc
.SetState(SCE_B_OPERATOR
);
352 sc
.SetState(SCE_B_DEFAULT
);
356 } else if (sc
.state
== SCE_B_OPERATOR
) {
357 if (!IsOperator(sc
.ch
) || sc
.Match('#'))
358 sc
.SetState(SCE_B_DEFAULT
);
359 } else if (sc
.state
== SCE_B_LABEL
) {
360 if (!IsIdentifier(sc
.ch
))
361 sc
.SetState(SCE_B_DEFAULT
);
362 } else if (sc
.state
== SCE_B_CONSTANT
) {
363 if (!IsIdentifier(sc
.ch
))
364 sc
.SetState(SCE_B_DEFAULT
);
365 } else if (sc
.state
== SCE_B_NUMBER
) {
367 sc
.SetState(SCE_B_DEFAULT
);
368 } else if (sc
.state
== SCE_B_HEXNUMBER
) {
369 if (!IsHexDigit(sc
.ch
))
370 sc
.SetState(SCE_B_DEFAULT
);
371 } else if (sc
.state
== SCE_B_BINNUMBER
) {
372 if (!IsBinDigit(sc
.ch
))
373 sc
.SetState(SCE_B_DEFAULT
);
374 } else if (sc
.state
== SCE_B_STRING
) {
376 sc
.ForwardSetState(SCE_B_DEFAULT
);
379 sc
.ChangeState(SCE_B_ERROR
);
380 sc
.SetState(SCE_B_DEFAULT
);
382 } else if (sc
.state
== SCE_B_COMMENT
|| sc
.state
== SCE_B_PREPROCESSOR
) {
384 sc
.SetState(SCE_B_DEFAULT
);
386 } else if (sc
.state
== SCE_B_DOCLINE
) {
388 sc
.SetState(SCE_B_DEFAULT
);
389 } else if (sc
.ch
== '\\' || sc
.ch
== '@') {
390 if (IsLetter(sc
.chNext
) && sc
.chPrev
!= '\\') {
391 styleBeforeKeyword
= sc
.state
;
392 sc
.SetState(SCE_B_DOCKEYWORD
);
395 } else if (sc
.state
== SCE_B_DOCKEYWORD
) {
396 if (IsSpace(sc
.ch
)) {
397 sc
.SetState(styleBeforeKeyword
);
398 } else if (sc
.atLineEnd
&& styleBeforeKeyword
== SCE_B_DOCLINE
) {
399 sc
.SetState(SCE_B_DEFAULT
);
401 } else if (sc
.state
== SCE_B_COMMENTBLOCK
) {
402 if (sc
.Match("\'/")) {
404 sc
.ForwardSetState(SCE_B_DEFAULT
);
406 } else if (sc
.state
== SCE_B_DOCBLOCK
) {
407 if (sc
.Match("\'/")) {
409 sc
.ForwardSetState(SCE_B_DEFAULT
);
410 } else if (sc
.ch
== '\\' || sc
.ch
== '@') {
411 if (IsLetter(sc
.chNext
) && sc
.chPrev
!= '\\') {
412 styleBeforeKeyword
= sc
.state
;
413 sc
.SetState(SCE_B_DOCKEYWORD
);
421 if (sc
.state
== SCE_B_DEFAULT
|| sc
.state
== SCE_B_ERROR
) {
422 if (isfirst
&& sc
.Match('.') && comment_char
!= '\'') {
423 sc
.SetState(SCE_B_LABEL
);
424 } else if (isfirst
&& sc
.Match('#')) {
426 sc
.SetState(SCE_B_IDENTIFIER
);
427 } else if (sc
.Match(comment_char
)) {
428 // Hack to make deprecated QBASIC '$Include show
429 // up in freebasic with SCE_B_PREPROCESSOR.
430 if (comment_char
== '\'' && sc
.Match(comment_char
, '$'))
431 sc
.SetState(SCE_B_PREPROCESSOR
);
432 else if (sc
.Match("\'*") || sc
.Match("\'!")) {
433 sc
.SetState(SCE_B_DOCLINE
);
435 sc
.SetState(SCE_B_COMMENT
);
437 } else if (sc
.Match("/\'")) {
438 if (sc
.Match("/\'*") || sc
.Match("/\'!")) { // Support of gtk-doc/Doxygen doc. style
439 sc
.SetState(SCE_B_DOCBLOCK
);
441 sc
.SetState(SCE_B_COMMENTBLOCK
);
443 sc
.Forward(); // Eat the ' so it isn't used for the end of the comment
444 } else if (sc
.Match('"')) {
445 sc
.SetState(SCE_B_STRING
);
446 } else if (IsDigit(sc
.ch
)) {
447 sc
.SetState(SCE_B_NUMBER
);
448 } else if (sc
.Match('$') || sc
.Match("&h") || sc
.Match("&H") || sc
.Match("&o") || sc
.Match("&O")) {
449 sc
.SetState(SCE_B_HEXNUMBER
);
450 } else if (sc
.Match('%') || sc
.Match("&b") || sc
.Match("&B")) {
451 sc
.SetState(SCE_B_BINNUMBER
);
452 } else if (sc
.Match('#')) {
453 sc
.SetState(SCE_B_CONSTANT
);
454 } else if (IsOperator(sc
.ch
)) {
455 sc
.SetState(SCE_B_OPERATOR
);
456 } else if (IsIdentifier(sc
.ch
)) {
458 sc
.SetState(SCE_B_IDENTIFIER
);
459 } else if (!IsSpace(sc
.ch
)) {
460 sc
.SetState(SCE_B_ERROR
);
474 void SCI_METHOD
LexerBasic::Fold(Sci_PositionU startPos
, Sci_Position length
, int /* initStyle */, IDocument
*pAccess
) {
479 LexAccessor
styler(pAccess
);
481 Sci_Position line
= styler
.GetLine(startPos
);
482 int level
= styler
.LevelAt(line
);
483 int go
= 0, done
= 0;
484 Sci_Position endPos
= startPos
+ length
;
487 const bool userDefinedFoldMarkers
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty();
488 int cNext
= styler
[startPos
];
490 // Scan for tokens at the start of the line (they may include
491 // whitespace, for tokens like "End Function"
492 for (Sci_Position i
= startPos
; i
< endPos
; i
++) {
494 cNext
= styler
.SafeGetCharAt(i
+ 1);
495 bool atEOL
= (c
== '\r' && cNext
!= '\n') || (c
== '\n');
496 if (options
.foldSyntaxBased
&& !done
&& !go
) {
497 if (wordlen
) { // are we scanning a token already?
498 word
[wordlen
] = static_cast<char>(LowerCase(c
));
499 if (!IsIdentifier(c
)) { // done with token
500 word
[wordlen
] = '\0';
501 go
= CheckFoldPoint(word
, level
);
503 // Treat any whitespace as single blank, for
504 // things like "End Function".
505 if (IsSpace(c
) && IsIdentifier(word
[wordlen
- 1])) {
510 else // done with this line
513 } else if (wordlen
< 255) {
516 } else { // start scanning at first non-whitespace character
518 if (IsIdentifier(c
)) {
519 word
[0] = static_cast<char>(LowerCase(c
));
521 } else // done with this line
526 if (options
.foldCommentExplicit
&& ((styler
.StyleAt(i
) == SCE_B_COMMENT
) || options
.foldExplicitAnywhere
)) {
527 if (userDefinedFoldMarkers
) {
528 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) {
529 level
|= SC_FOLDLEVELHEADERFLAG
;
531 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) {
535 if (c
== comment_char
) {
537 level
|= SC_FOLDLEVELHEADERFLAG
;
539 } else if (cNext
== '}') {
545 if (atEOL
) { // line end
546 if (!done
&& wordlen
== 0 && options
.foldCompact
) // line was only space
547 level
|= SC_FOLDLEVELWHITEFLAG
;
548 if (level
!= styler
.LevelAt(line
))
549 styler
.SetLevel(line
, level
);
554 level
&= ~SC_FOLDLEVELHEADERFLAG
;
555 level
&= ~SC_FOLDLEVELWHITEFLAG
;
562 LexerModule
lmBlitzBasic(SCLEX_BLITZBASIC
, LexerBasic::LexerFactoryBlitzBasic
, "blitzbasic", blitzbasicWordListDesc
);
564 LexerModule
lmPureBasic(SCLEX_PUREBASIC
, LexerBasic::LexerFactoryPureBasic
, "purebasic", purebasicWordListDesc
);
566 LexerModule
lmFreeBasic(SCLEX_FREEBASIC
, LexerBasic::LexerFactoryFreeBasic
, "freebasic", freebasicWordListDesc
);