1 // Scintilla source code edit control
3 ** Lexer for Lua language.
5 ** Written by Paul Winwood.
6 ** Folder by Alexey Yutkin.
7 ** Modified by Marcos E. Wurzius & Philippe Lhoste
20 #include "Scintilla.h"
23 #include "StringCopy.h"
25 #include "LexAccessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "LexerModule.h"
31 using namespace Scintilla
;
33 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
34 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
35 // The maximum number of '=' characters allowed is 254.
36 static int LongDelimCheck(StyleContext
&sc
) {
38 while (sc
.GetRelative(sep
) == '=' && sep
< 0xFF)
40 if (sc
.GetRelative(sep
) == sc
.ch
)
45 static void ColouriseLuaDoc(
46 Sci_PositionU startPos
,
49 WordList
*keywordlists
[],
52 const WordList
&keywords
= *keywordlists
[0];
53 const WordList
&keywords2
= *keywordlists
[1];
54 const WordList
&keywords3
= *keywordlists
[2];
55 const WordList
&keywords4
= *keywordlists
[3];
56 const WordList
&keywords5
= *keywordlists
[4];
57 const WordList
&keywords6
= *keywordlists
[5];
58 const WordList
&keywords7
= *keywordlists
[6];
59 const WordList
&keywords8
= *keywordlists
[7];
61 // Accepts accented characters
62 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_", 0x80, true);
63 CharacterSet
setWord(CharacterSet::setAlphaNum
, "_", 0x80, true);
64 // Not exactly following number definition (several dots are seen as OK, etc.)
65 // but probably enough in most cases. [pP] is for hex floats.
66 CharacterSet
setNumber(CharacterSet::setDigits
, ".-+abcdefpABCDEFP");
67 CharacterSet
setExponent(CharacterSet::setNone
, "eEpP");
68 CharacterSet
setLuaOperator(CharacterSet::setNone
, "*/-+()={}~[];<>,.^%:#&|");
69 CharacterSet
setEscapeSkip(CharacterSet::setNone
, "\"'\\");
71 Sci_Position currentLine
= styler
.GetLine(startPos
);
72 // Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
73 // if we are inside such a string. Block comment was introduced in Lua 5.0,
74 // blocks with separators [=[ ... ]=] in Lua 5.1.
75 // Continuation of a string (\z whitespace escaping) is controlled by stringWs.
79 if (initStyle
== SCE_LUA_LITERALSTRING
|| initStyle
== SCE_LUA_COMMENT
||
80 initStyle
== SCE_LUA_STRING
|| initStyle
== SCE_LUA_CHARACTER
) {
81 const int lineState
= styler
.GetLineState(currentLine
- 1);
82 nestLevel
= lineState
>> 9;
83 sepCount
= lineState
& 0xFF;
84 stringWs
= lineState
& 0x100;
87 // results of identifier/keyword matching
88 Sci_Position idenPos
= 0;
89 Sci_Position idenWordPos
= 0;
90 int idenStyle
= SCE_LUA_IDENTIFIER
;
91 bool foundGoto
= false;
93 // Do not leak onto next line
94 if (initStyle
== SCE_LUA_STRINGEOL
|| initStyle
== SCE_LUA_COMMENTLINE
|| initStyle
== SCE_LUA_PREPROCESSOR
) {
95 initStyle
= SCE_LUA_DEFAULT
;
98 StyleContext
sc(startPos
, length
, initStyle
, styler
);
99 if (startPos
== 0 && sc
.ch
== '#' && sc
.chNext
== '!') {
100 // shbang line: "#!" is a comment only if located at the start of the script
101 sc
.SetState(SCE_LUA_COMMENTLINE
);
103 for (; sc
.More(); sc
.Forward()) {
105 // Update the line state, so it can be seen by next line
106 currentLine
= styler
.GetLine(sc
.currentPos
);
108 case SCE_LUA_LITERALSTRING
:
109 case SCE_LUA_COMMENT
:
111 case SCE_LUA_CHARACTER
:
112 // Inside a literal string, block comment or string, we set the line state
113 styler
.SetLineState(currentLine
, (nestLevel
<< 9) | stringWs
| sepCount
);
116 // Reset the line state
117 styler
.SetLineState(currentLine
, 0);
121 if (sc
.atLineStart
&& (sc
.state
== SCE_LUA_STRING
)) {
122 // Prevent SCE_LUA_STRINGEOL from leaking back to previous line
123 sc
.SetState(SCE_LUA_STRING
);
126 // Handle string line continuation
127 if ((sc
.state
== SCE_LUA_STRING
|| sc
.state
== SCE_LUA_CHARACTER
) &&
129 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
131 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
138 // Determine if the current state should terminate.
139 if (sc
.state
== SCE_LUA_OPERATOR
) {
140 if (sc
.ch
== ':' && sc
.chPrev
== ':') { // :: <label> :: forward scan
143 while (IsASpaceOrTab(sc
.GetRelative(ln
))) // skip over spaces/tabs
145 Sci_Position ws1
= ln
;
146 if (setWordStart
.Contains(sc
.GetRelative(ln
))) {
149 while (setWord
.Contains(c
= sc
.GetRelative(ln
))) { // get potential label
151 s
[i
++] = static_cast<char>(c
);
154 s
[i
] = '\0'; Sci_Position lbl
= ln
;
155 if (!keywords
.InList(s
)) {
156 while (IsASpaceOrTab(sc
.GetRelative(ln
))) // skip over spaces/tabs
158 Sci_Position ws2
= ln
- lbl
;
159 if (sc
.GetRelative(ln
) == ':' && sc
.GetRelative(ln
+ 1) == ':') {
160 // final :: found, complete valid label construct
161 sc
.ChangeState(SCE_LUA_LABEL
);
163 sc
.SetState(SCE_LUA_DEFAULT
);
164 sc
.ForwardBytes(ws1
);
166 sc
.SetState(SCE_LUA_LABEL
);
167 sc
.ForwardBytes(lbl
- ws1
);
169 sc
.SetState(SCE_LUA_DEFAULT
);
170 sc
.ForwardBytes(ws2
);
172 sc
.SetState(SCE_LUA_LABEL
);
178 sc
.SetState(SCE_LUA_DEFAULT
);
179 } else if (sc
.state
== SCE_LUA_NUMBER
) {
180 // We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
181 if (!setNumber
.Contains(sc
.ch
)) {
182 sc
.SetState(SCE_LUA_DEFAULT
);
183 } else if (sc
.ch
== '-' || sc
.ch
== '+') {
184 if (!setExponent
.Contains(sc
.chPrev
))
185 sc
.SetState(SCE_LUA_DEFAULT
);
187 } else if (sc
.state
== SCE_LUA_IDENTIFIER
) {
188 idenPos
--; // commit already-scanned identitier/word parts
189 if (idenWordPos
> 0) {
191 sc
.ChangeState(idenStyle
);
192 sc
.ForwardBytes(idenWordPos
);
193 idenPos
-= idenWordPos
;
195 sc
.SetState(SCE_LUA_IDENTIFIER
);
196 sc
.ForwardBytes(idenPos
);
199 sc
.ForwardBytes(idenPos
);
201 sc
.SetState(SCE_LUA_DEFAULT
);
202 if (foundGoto
) { // goto <label> forward scan
203 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
205 if (setWordStart
.Contains(sc
.ch
)) {
206 sc
.SetState(SCE_LUA_LABEL
);
208 while (setWord
.Contains(sc
.ch
))
211 sc
.GetCurrent(s
, sizeof(s
));
212 if (keywords
.InList(s
)) // labels cannot be keywords
213 sc
.ChangeState(SCE_LUA_WORD
);
215 sc
.SetState(SCE_LUA_DEFAULT
);
217 } else if (sc
.state
== SCE_LUA_COMMENTLINE
|| sc
.state
== SCE_LUA_PREPROCESSOR
) {
219 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
221 } else if (sc
.state
== SCE_LUA_STRING
) {
223 if (!IsASpace(sc
.ch
))
227 if (setEscapeSkip
.Contains(sc
.chNext
)) {
229 } else if (sc
.chNext
== 'z') {
233 } else if (sc
.ch
== '\"') {
234 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
235 } else if (stringWs
== 0 && sc
.atLineEnd
) {
236 sc
.ChangeState(SCE_LUA_STRINGEOL
);
237 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
239 } else if (sc
.state
== SCE_LUA_CHARACTER
) {
241 if (!IsASpace(sc
.ch
))
245 if (setEscapeSkip
.Contains(sc
.chNext
)) {
247 } else if (sc
.chNext
== 'z') {
251 } else if (sc
.ch
== '\'') {
252 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
253 } else if (stringWs
== 0 && sc
.atLineEnd
) {
254 sc
.ChangeState(SCE_LUA_STRINGEOL
);
255 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
257 } else if (sc
.state
== SCE_LUA_LITERALSTRING
|| sc
.state
== SCE_LUA_COMMENT
) {
259 const int sep
= LongDelimCheck(sc
);
260 if (sep
== 1 && sepCount
== 1) { // [[-only allowed to nest
264 } else if (sc
.ch
== ']') {
265 int sep
= LongDelimCheck(sc
);
266 if (sep
== 1 && sepCount
== 1) { // un-nest with ]]-only
269 if (nestLevel
== 0) {
270 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
272 } else if (sep
> 1 && sep
== sepCount
) { // ]=]-style delim
274 sc
.ForwardSetState(SCE_LUA_DEFAULT
);
279 // Determine if a new state should be entered.
280 if (sc
.state
== SCE_LUA_DEFAULT
) {
281 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
282 sc
.SetState(SCE_LUA_NUMBER
);
283 if (sc
.ch
== '0' && toupper(sc
.chNext
) == 'X') {
286 } else if (setWordStart
.Contains(sc
.ch
)) {
287 // For matching various identifiers with dots and colons, multiple
288 // matches are done as identifier segments are added. Longest match is
289 // set to a word style. The non-matched part is in identifier style.
293 idenStyle
= SCE_LUA_IDENTIFIER
;
298 const Sci_Position idenPosOld
= idenPos
;
299 std::string identSeg
;
300 identSeg
+= static_cast<char>(sc
.GetRelative(idenPos
++));
301 while (setWord
.Contains(c
= sc
.GetRelative(idenPos
))) {
302 identSeg
+= static_cast<char>(c
);
305 if (keywords
.InList(identSeg
.c_str()) && (idenPosOld
> 0)) {
306 idenPos
= idenPosOld
- 1; // keywords cannot mix
311 const char* s
= ident
.c_str();
312 int newStyle
= SCE_LUA_IDENTIFIER
;
313 if (keywords
.InList(s
)) {
314 newStyle
= SCE_LUA_WORD
;
315 } else if (keywords2
.InList(s
)) {
316 newStyle
= SCE_LUA_WORD2
;
317 } else if (keywords3
.InList(s
)) {
318 newStyle
= SCE_LUA_WORD3
;
319 } else if (keywords4
.InList(s
)) {
320 newStyle
= SCE_LUA_WORD4
;
321 } else if (keywords5
.InList(s
)) {
322 newStyle
= SCE_LUA_WORD5
;
323 } else if (keywords6
.InList(s
)) {
324 newStyle
= SCE_LUA_WORD6
;
325 } else if (keywords7
.InList(s
)) {
326 newStyle
= SCE_LUA_WORD7
;
327 } else if (keywords8
.InList(s
)) {
328 newStyle
= SCE_LUA_WORD8
;
330 if (newStyle
!= SCE_LUA_IDENTIFIER
) {
331 idenStyle
= newStyle
;
332 idenWordPos
= idenPos
;
334 if (idenStyle
== SCE_LUA_WORD
) // keywords cannot mix
336 cNext
= sc
.GetRelative(idenPos
+ 1);
337 if ((c
== '.' || c
== ':') && setWordStart
.Contains(cNext
)) {
338 ident
+= static_cast<char>(c
);
344 if ((idenStyle
== SCE_LUA_WORD
) && (ident
.compare("goto") == 0)) {
347 sc
.SetState(SCE_LUA_IDENTIFIER
);
348 } else if (sc
.ch
== '\"') {
349 sc
.SetState(SCE_LUA_STRING
);
351 } else if (sc
.ch
== '\'') {
352 sc
.SetState(SCE_LUA_CHARACTER
);
354 } else if (sc
.ch
== '[') {
355 sepCount
= LongDelimCheck(sc
);
357 sc
.SetState(SCE_LUA_OPERATOR
);
360 sc
.SetState(SCE_LUA_LITERALSTRING
);
361 sc
.Forward(sepCount
);
363 } else if (sc
.Match('-', '-')) {
364 sc
.SetState(SCE_LUA_COMMENTLINE
);
365 if (sc
.Match("--[")) {
367 sepCount
= LongDelimCheck(sc
);
370 sc
.ChangeState(SCE_LUA_COMMENT
);
371 sc
.Forward(sepCount
);
376 } else if (sc
.atLineStart
&& sc
.Match('$')) {
377 sc
.SetState(SCE_LUA_PREPROCESSOR
); // Obsolete since Lua 4.0, but still in old code
378 } else if (setLuaOperator
.Contains(sc
.ch
)) {
379 sc
.SetState(SCE_LUA_OPERATOR
);
387 static void FoldLuaDoc(Sci_PositionU startPos
, Sci_Position length
, int /* initStyle */, WordList
*[],
389 const Sci_PositionU lengthDoc
= startPos
+ length
;
390 int visibleChars
= 0;
391 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
392 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
393 int levelCurrent
= levelPrev
;
394 char chNext
= styler
[startPos
];
395 const bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
396 int styleNext
= styler
.StyleAt(startPos
);
398 for (Sci_PositionU i
= startPos
; i
< lengthDoc
; i
++) {
399 const char ch
= chNext
;
400 chNext
= styler
.SafeGetCharAt(i
+ 1);
401 const int style
= styleNext
;
402 styleNext
= styler
.StyleAt(i
+ 1);
403 const bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
404 if (style
== SCE_LUA_WORD
) {
405 if (ch
== 'i' || ch
== 'd' || ch
== 'f' || ch
== 'e' || ch
== 'r' || ch
== 'u') {
407 for (Sci_PositionU j
= 0; j
< 8; j
++) {
408 if (!iswordchar(styler
[i
+ j
])) {
411 s
[j
] = styler
[i
+ j
];
415 if ((strcmp(s
, "if") == 0) || (strcmp(s
, "do") == 0) || (strcmp(s
, "function") == 0) || (strcmp(s
, "repeat") == 0)) {
418 if ((strcmp(s
, "end") == 0) || (strcmp(s
, "elseif") == 0) || (strcmp(s
, "until") == 0)) {
422 } else if (style
== SCE_LUA_OPERATOR
) {
423 if (ch
== '{' || ch
== '(') {
425 } else if (ch
== '}' || ch
== ')') {
428 } else if (style
== SCE_LUA_LITERALSTRING
|| style
== SCE_LUA_COMMENT
) {
431 } else if (ch
== ']') {
438 if (visibleChars
== 0 && foldCompact
) {
439 lev
|= SC_FOLDLEVELWHITEFLAG
;
441 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0)) {
442 lev
|= SC_FOLDLEVELHEADERFLAG
;
444 if (lev
!= styler
.LevelAt(lineCurrent
)) {
445 styler
.SetLevel(lineCurrent
, lev
);
448 levelPrev
= levelCurrent
;
451 if (!isspacechar(ch
)) {
455 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
457 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
458 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
461 static const char * const luaWordListDesc
[] = {
464 "String, (table) & math functions",
465 "(coroutines), I/O & system facilities",
475 LexicalClass lexicalClasses
[] = {
476 // Lexer Lua SCLEX_LUA SCE_LUA_:
477 0, "SCE_LUA_DEFAULT", "default", "White space: Visible only in View Whitespace mode (or if it has a back colour)",
478 1, "SCE_LUA_COMMENT", "comment", "Block comment (Lua 5.0)",
479 2, "SCE_LUA_COMMENTLINE", "comment line", "Line comment",
480 3, "SCE_LUA_COMMENTDOC", "comment documentation", "Doc comment -- Not used in Lua (yet?)",
481 4, "SCE_LUA_NUMBER", "literal numeric", "Number",
482 5, "SCE_LUA_WORD", "keyword", "Keyword",
483 6, "SCE_LUA_STRING", "literal string", "(Double quoted) String",
484 7, "SCE_LUA_CHARACTER", "literal string character", "Character (Single quoted string)",
485 8, "SCE_LUA_LITERALSTRING", "literal string", "Literal string",
486 9, "SCE_LUA_PREPROCESSOR", "preprocessor", "Preprocessor (obsolete in Lua 4.0 and up)",
487 10, "SCE_LUA_OPERATOR", "operator", "Operators",
488 11, "SCE_LUA_IDENTIFIER", "identifier", "Identifier (everything else...)",
489 12, "SCE_LUA_STRINGEOL", "error literal string", "End of line where string is not closed",
490 13, "SCE_LUA_WORD2", "identifier", "Other keywords",
491 14, "SCE_LUA_WORD3", "identifier", "Other keywords",
492 15, "SCE_LUA_WORD4", "identifier", "Other keywords",
493 16, "SCE_LUA_WORD5", "identifier", "Other keywords",
494 17, "SCE_LUA_WORD6", "identifier", "Other keywords",
495 18, "SCE_LUA_WORD7", "identifier", "Other keywords",
496 19, "SCE_LUA_WORD8", "identifier", "Other keywords",
497 20, "SCE_LUA_LABEL", "label", "Labels",
502 LexerModule
lmLua(SCLEX_LUA
, ColouriseLuaDoc
, "lua", FoldLuaDoc
, luaWordListDesc
, lexicalClasses
, ELEMENTS(lexicalClasses
));