Rewrite PHP tag creation script using up to date upstream tag definitions
[geany-mirror.git] / scintilla / lexers / LexBasic.cxx
blobb73fac8b6e8adde3dff3f281e64edbd4bf96f70c
1 // Scintilla source code edit control
2 /** @file LexBasic.cxx
3 ** Lexer for BlitzBasic and PureBasic.
4 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
10 // and derivatives. Once they diverge enough, might want to split it into multiple
11 // lexers for more code clearity.
13 // Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
15 // Folding only works for simple things like functions or types.
17 // You may want to have a look at my ctags lexer as well, if you additionally to coloring
18 // and folding need to extract things like label tags in your editor.
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdarg.h>
24 #include <assert.h>
25 #include <ctype.h>
27 #include <string>
28 #include <map>
30 #include "ILexer.h"
31 #include "Scintilla.h"
32 #include "SciLexer.h"
34 #include "WordList.h"
35 #include "LexAccessor.h"
36 #include "StyleContext.h"
37 #include "CharacterSet.h"
38 #include "LexerModule.h"
39 #include "OptionSet.h"
41 #ifdef SCI_NAMESPACE
42 using namespace Scintilla;
43 #endif
45 /* Bits:
46 * 1 - whitespace
47 * 2 - operator
48 * 4 - identifier
49 * 8 - decimal digit
50 * 16 - hex digit
51 * 32 - bin digit
52 * 64 - letter
54 static int character_classification[128] =
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2,
59 60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2, 2,
60 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
61 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 68,
62 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
63 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 0
66 static bool IsSpace(int c) {
67 return c < 128 && (character_classification[c] & 1);
70 static bool IsOperator(int c) {
71 return c < 128 && (character_classification[c] & 2);
74 static bool IsIdentifier(int c) {
75 return c < 128 && (character_classification[c] & 4);
78 static bool IsDigit(int c) {
79 return c < 128 && (character_classification[c] & 8);
82 static bool IsHexDigit(int c) {
83 return c < 128 && (character_classification[c] & 16);
86 static bool IsBinDigit(int c) {
87 return c < 128 && (character_classification[c] & 32);
90 static bool IsLetter(int c) {
91 return c < 128 && (character_classification[c] & 64);
94 static int LowerCase(int c)
96 if (c >= 'A' && c <= 'Z')
97 return 'a' + c - 'A';
98 return c;
101 static int CheckBlitzFoldPoint(char const *token, int &level) {
102 if (!strcmp(token, "function") ||
103 !strcmp(token, "type")) {
104 level |= SC_FOLDLEVELHEADERFLAG;
105 return 1;
107 if (!strcmp(token, "end function") ||
108 !strcmp(token, "end type")) {
109 return -1;
111 return 0;
114 static int CheckPureFoldPoint(char const *token, int &level) {
115 if (!strcmp(token, "procedure") ||
116 !strcmp(token, "enumeration") ||
117 !strcmp(token, "interface") ||
118 !strcmp(token, "structure")) {
119 level |= SC_FOLDLEVELHEADERFLAG;
120 return 1;
122 if (!strcmp(token, "endprocedure") ||
123 !strcmp(token, "endenumeration") ||
124 !strcmp(token, "endinterface") ||
125 !strcmp(token, "endstructure")) {
126 return -1;
128 return 0;
131 static int CheckFreeFoldPoint(char const *token, int &level) {
132 if (!strcmp(token, "function") ||
133 !strcmp(token, "sub") ||
134 !strcmp(token, "enum") ||
135 !strcmp(token, "type") ||
136 !strcmp(token, "union") ||
137 !strcmp(token, "property") ||
138 !strcmp(token, "destructor") ||
139 !strcmp(token, "constructor")) {
140 level |= SC_FOLDLEVELHEADERFLAG;
141 return 1;
143 if (!strcmp(token, "end function") ||
144 !strcmp(token, "end sub") ||
145 !strcmp(token, "end enum") ||
146 !strcmp(token, "end type") ||
147 !strcmp(token, "end union") ||
148 !strcmp(token, "end property") ||
149 !strcmp(token, "end destructor") ||
150 !strcmp(token, "end constructor")) {
151 return -1;
153 return 0;
156 // An individual named option for use in an OptionSet
158 // Options used for LexerBasic
159 struct OptionsBasic {
160 bool fold;
161 bool foldSyntaxBased;
162 bool foldCommentExplicit;
163 std::string foldExplicitStart;
164 std::string foldExplicitEnd;
165 bool foldExplicitAnywhere;
166 bool foldCompact;
167 OptionsBasic() {
168 fold = false;
169 foldSyntaxBased = true;
170 foldCommentExplicit = false;
171 foldExplicitStart = "";
172 foldExplicitEnd = "";
173 foldExplicitAnywhere = false;
174 foldCompact = true;
178 static const char * const blitzbasicWordListDesc[] = {
179 "BlitzBasic Keywords",
180 "user1",
181 "user2",
182 "user3",
186 static const char * const purebasicWordListDesc[] = {
187 "PureBasic Keywords",
188 "PureBasic PreProcessor Keywords",
189 "user defined 1",
190 "user defined 2",
194 static const char * const freebasicWordListDesc[] = {
195 "FreeBasic Keywords",
196 "FreeBasic PreProcessor Keywords",
197 "user defined 1",
198 "user defined 2",
202 struct OptionSetBasic : public OptionSet<OptionsBasic> {
203 OptionSetBasic(const char * const wordListDescriptions[]) {
204 DefineProperty("fold", &OptionsBasic::fold);
206 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
207 "Set this property to 0 to disable syntax based folding.");
209 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
210 "This option enables folding explicit fold points when using the Basic lexer. "
211 "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
212 "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
214 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
215 "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
217 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
218 "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
220 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
221 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
223 DefineProperty("fold.compact", &OptionsBasic::foldCompact);
225 DefineWordListSets(wordListDescriptions);
229 class LexerBasic : public ILexer {
230 char comment_char;
231 int (*CheckFoldPoint)(char const *, int &);
232 WordList keywordlists[4];
233 OptionsBasic options;
234 OptionSetBasic osBasic;
235 public:
236 LexerBasic(char comment_char_, int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) :
237 comment_char(comment_char_),
238 CheckFoldPoint(CheckFoldPoint_),
239 osBasic(wordListDescriptions) {
241 virtual ~LexerBasic() {
243 void SCI_METHOD Release() {
244 delete this;
246 int SCI_METHOD Version() const {
247 return lvOriginal;
249 const char * SCI_METHOD PropertyNames() {
250 return osBasic.PropertyNames();
252 int SCI_METHOD PropertyType(const char *name) {
253 return osBasic.PropertyType(name);
255 const char * SCI_METHOD DescribeProperty(const char *name) {
256 return osBasic.DescribeProperty(name);
258 int SCI_METHOD PropertySet(const char *key, const char *val);
259 const char * SCI_METHOD DescribeWordListSets() {
260 return osBasic.DescribeWordListSets();
262 int SCI_METHOD WordListSet(int n, const char *wl);
263 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
264 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
266 void * SCI_METHOD PrivateCall(int, void *) {
267 return 0;
269 static ILexer *LexerFactoryBlitzBasic() {
270 return new LexerBasic(';', CheckBlitzFoldPoint, blitzbasicWordListDesc);
272 static ILexer *LexerFactoryPureBasic() {
273 return new LexerBasic(';', CheckPureFoldPoint, purebasicWordListDesc);
275 static ILexer *LexerFactoryFreeBasic() {
276 return new LexerBasic('\'', CheckFreeFoldPoint, freebasicWordListDesc );
280 int SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) {
281 if (osBasic.PropertySet(&options, key, val)) {
282 return 0;
284 return -1;
287 int SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
288 WordList *wordListN = 0;
289 switch (n) {
290 case 0:
291 wordListN = &keywordlists[0];
292 break;
293 case 1:
294 wordListN = &keywordlists[1];
295 break;
296 case 2:
297 wordListN = &keywordlists[2];
298 break;
299 case 3:
300 wordListN = &keywordlists[3];
301 break;
303 int firstModification = -1;
304 if (wordListN) {
305 WordList wlNew;
306 wlNew.Set(wl);
307 if (*wordListN != wlNew) {
308 wordListN->Set(wl);
309 firstModification = 0;
312 return firstModification;
315 void SCI_METHOD LexerBasic::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
316 LexAccessor styler(pAccess);
318 bool wasfirst = true, isfirst = true; // true if first token in a line
319 styler.StartAt(startPos);
320 int styleBeforeKeyword = SCE_B_DEFAULT;
322 StyleContext sc(startPos, length, initStyle, styler);
324 // Can't use sc.More() here else we miss the last character
325 for (; ; sc.Forward()) {
326 if (sc.state == SCE_B_IDENTIFIER) {
327 if (!IsIdentifier(sc.ch)) {
328 // Labels
329 if (wasfirst && sc.Match(':')) {
330 sc.ChangeState(SCE_B_LABEL);
331 sc.ForwardSetState(SCE_B_DEFAULT);
332 } else {
333 char s[100];
334 int kstates[4] = {
335 SCE_B_KEYWORD,
336 SCE_B_KEYWORD2,
337 SCE_B_KEYWORD3,
338 SCE_B_KEYWORD4,
340 sc.GetCurrentLowered(s, sizeof(s));
341 for (int i = 0; i < 4; i++) {
342 if (keywordlists[i].InList(s)) {
343 sc.ChangeState(kstates[i]);
346 // Types, must set them as operator else they will be
347 // matched as number/constant
348 if (sc.Match('.') || sc.Match('$') || sc.Match('%') ||
349 sc.Match('#')) {
350 sc.SetState(SCE_B_OPERATOR);
351 } else {
352 sc.SetState(SCE_B_DEFAULT);
356 } else if (sc.state == SCE_B_OPERATOR) {
357 if (!IsOperator(sc.ch) || sc.Match('#'))
358 sc.SetState(SCE_B_DEFAULT);
359 } else if (sc.state == SCE_B_LABEL) {
360 if (!IsIdentifier(sc.ch))
361 sc.SetState(SCE_B_DEFAULT);
362 } else if (sc.state == SCE_B_CONSTANT) {
363 if (!IsIdentifier(sc.ch))
364 sc.SetState(SCE_B_DEFAULT);
365 } else if (sc.state == SCE_B_NUMBER) {
366 if (!IsDigit(sc.ch))
367 sc.SetState(SCE_B_DEFAULT);
368 } else if (sc.state == SCE_B_HEXNUMBER) {
369 if (!IsHexDigit(sc.ch))
370 sc.SetState(SCE_B_DEFAULT);
371 } else if (sc.state == SCE_B_BINNUMBER) {
372 if (!IsBinDigit(sc.ch))
373 sc.SetState(SCE_B_DEFAULT);
374 } else if (sc.state == SCE_B_STRING) {
375 if (sc.ch == '"') {
376 sc.ForwardSetState(SCE_B_DEFAULT);
378 if (sc.atLineEnd) {
379 sc.ChangeState(SCE_B_ERROR);
380 sc.SetState(SCE_B_DEFAULT);
382 } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) {
383 if (sc.atLineEnd) {
384 sc.SetState(SCE_B_DEFAULT);
386 } else if (sc.state == SCE_B_DOCLINE) {
387 if (sc.atLineEnd) {
388 sc.SetState(SCE_B_DEFAULT);
389 } else if (sc.ch == '\\' || sc.ch == '@') {
390 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
391 styleBeforeKeyword = sc.state;
392 sc.SetState(SCE_B_DOCKEYWORD);
395 } else if (sc.state == SCE_B_DOCKEYWORD) {
396 if (IsSpace(sc.ch)) {
397 sc.SetState(styleBeforeKeyword);
398 } else if (sc.atLineEnd && styleBeforeKeyword == SCE_B_DOCLINE) {
399 sc.SetState(SCE_B_DEFAULT);
401 } else if (sc.state == SCE_B_COMMENTBLOCK) {
402 if (sc.Match("\'/")) {
403 sc.Forward();
404 sc.ForwardSetState(SCE_B_DEFAULT);
406 } else if (sc.state == SCE_B_DOCBLOCK) {
407 if (sc.Match("\'/")) {
408 sc.Forward();
409 sc.ForwardSetState(SCE_B_DEFAULT);
410 } else if (sc.ch == '\\' || sc.ch == '@') {
411 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
412 styleBeforeKeyword = sc.state;
413 sc.SetState(SCE_B_DOCKEYWORD);
418 if (sc.atLineStart)
419 isfirst = true;
421 if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) {
422 if (isfirst && sc.Match('.') && comment_char != '\'') {
423 sc.SetState(SCE_B_LABEL);
424 } else if (isfirst && sc.Match('#')) {
425 wasfirst = isfirst;
426 sc.SetState(SCE_B_IDENTIFIER);
427 } else if (sc.Match(comment_char)) {
428 // Hack to make deprecated QBASIC '$Include show
429 // up in freebasic with SCE_B_PREPROCESSOR.
430 if (comment_char == '\'' && sc.Match(comment_char, '$'))
431 sc.SetState(SCE_B_PREPROCESSOR);
432 else if (sc.Match("\'*") || sc.Match("\'!")) {
433 sc.SetState(SCE_B_DOCLINE);
434 } else {
435 sc.SetState(SCE_B_COMMENT);
437 } else if (sc.Match("/\'")) {
438 if (sc.Match("/\'*") || sc.Match("/\'!")) { // Support of gtk-doc/Doxygen doc. style
439 sc.SetState(SCE_B_DOCBLOCK);
440 } else {
441 sc.SetState(SCE_B_COMMENTBLOCK);
443 sc.Forward(); // Eat the ' so it isn't used for the end of the comment
444 } else if (sc.Match('"')) {
445 sc.SetState(SCE_B_STRING);
446 } else if (IsDigit(sc.ch)) {
447 sc.SetState(SCE_B_NUMBER);
448 } else if (sc.Match('$') || sc.Match("&h") || sc.Match("&H") || sc.Match("&o") || sc.Match("&O")) {
449 sc.SetState(SCE_B_HEXNUMBER);
450 } else if (sc.Match('%') || sc.Match("&b") || sc.Match("&B")) {
451 sc.SetState(SCE_B_BINNUMBER);
452 } else if (sc.Match('#')) {
453 sc.SetState(SCE_B_CONSTANT);
454 } else if (IsOperator(sc.ch)) {
455 sc.SetState(SCE_B_OPERATOR);
456 } else if (IsIdentifier(sc.ch)) {
457 wasfirst = isfirst;
458 sc.SetState(SCE_B_IDENTIFIER);
459 } else if (!IsSpace(sc.ch)) {
460 sc.SetState(SCE_B_ERROR);
464 if (!IsSpace(sc.ch))
465 isfirst = false;
467 if (!sc.More())
468 break;
470 sc.Complete();
474 void SCI_METHOD LexerBasic::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
476 if (!options.fold)
477 return;
479 LexAccessor styler(pAccess);
481 int line = styler.GetLine(startPos);
482 int level = styler.LevelAt(line);
483 int go = 0, done = 0;
484 int endPos = startPos + length;
485 char word[256];
486 int wordlen = 0;
487 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
488 int cNext = styler[startPos];
490 // Scan for tokens at the start of the line (they may include
491 // whitespace, for tokens like "End Function"
492 for (int i = startPos; i < endPos; i++) {
493 int c = cNext;
494 cNext = styler.SafeGetCharAt(i + 1);
495 bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n');
496 if (options.foldSyntaxBased && !done && !go) {
497 if (wordlen) { // are we scanning a token already?
498 word[wordlen] = static_cast<char>(LowerCase(c));
499 if (!IsIdentifier(c)) { // done with token
500 word[wordlen] = '\0';
501 go = CheckFoldPoint(word, level);
502 if (!go) {
503 // Treat any whitespace as single blank, for
504 // things like "End Function".
505 if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) {
506 word[wordlen] = ' ';
507 if (wordlen < 255)
508 wordlen++;
510 else // done with this line
511 done = 1;
513 } else if (wordlen < 255) {
514 wordlen++;
516 } else { // start scanning at first non-whitespace character
517 if (!IsSpace(c)) {
518 if (IsIdentifier(c)) {
519 word[0] = static_cast<char>(LowerCase(c));
520 wordlen = 1;
521 } else // done with this line
522 done = 1;
526 if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) {
527 if (userDefinedFoldMarkers) {
528 if (styler.Match(i, options.foldExplicitStart.c_str())) {
529 level |= SC_FOLDLEVELHEADERFLAG;
530 go = 1;
531 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
532 go = -1;
534 } else {
535 if (c == comment_char) {
536 if (cNext == '{') {
537 level |= SC_FOLDLEVELHEADERFLAG;
538 go = 1;
539 } else if (cNext == '}') {
540 go = -1;
545 if (atEOL) { // line end
546 if (!done && wordlen == 0 && options.foldCompact) // line was only space
547 level |= SC_FOLDLEVELWHITEFLAG;
548 if (level != styler.LevelAt(line))
549 styler.SetLevel(line, level);
550 level += go;
551 line++;
552 // reset state
553 wordlen = 0;
554 level &= ~SC_FOLDLEVELHEADERFLAG;
555 level &= ~SC_FOLDLEVELWHITEFLAG;
556 go = 0;
557 done = 0;
562 LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
564 LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
566 LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);