Fixed issue #2175: TortoiseGitBlame fails to search if line has non-ascii chars and...
[TortoiseGit.git] / ext / scintilla / lexers / LexLua.cxx
bloba571320d12ec19fa10f62ba1d3b688b78b2a28f3
1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3 ** Lexer for Lua language.
4 **
5 ** Written by Paul Winwood.
6 ** Folder by Alexey Yutkin.
7 ** Modified by Marcos E. Wurzius & Philippe Lhoste
8 **/
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
17 #include "ILexer.h"
18 #include "Scintilla.h"
19 #include "SciLexer.h"
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
28 #ifdef SCI_NAMESPACE
29 using namespace Scintilla;
30 #endif
32 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
33 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
34 // The maximum number of '=' characters allowed is 254.
35 static int LongDelimCheck(StyleContext &sc) {
36 int sep = 1;
37 while (sc.GetRelative(sep) == '=' && sep < 0xFF)
38 sep++;
39 if (sc.GetRelative(sep) == sc.ch)
40 return sep;
41 return 0;
44 static void ColouriseLuaDoc(
45 unsigned int startPos,
46 int length,
47 int initStyle,
48 WordList *keywordlists[],
49 Accessor &styler) {
51 WordList &keywords = *keywordlists[0];
52 WordList &keywords2 = *keywordlists[1];
53 WordList &keywords3 = *keywordlists[2];
54 WordList &keywords4 = *keywordlists[3];
55 WordList &keywords5 = *keywordlists[4];
56 WordList &keywords6 = *keywordlists[5];
57 WordList &keywords7 = *keywordlists[6];
58 WordList &keywords8 = *keywordlists[7];
60 // Accepts accented characters
61 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
62 CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
63 // Not exactly following number definition (several dots are seen as OK, etc.)
64 // but probably enough in most cases. [pP] is for hex floats.
65 CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
66 CharacterSet setExponent(CharacterSet::setNone, "eEpP");
67 CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#");
68 CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
70 int currentLine = styler.GetLine(startPos);
71 // Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
72 // if we are inside such a string. Block comment was introduced in Lua 5.0,
73 // blocks with separators [=[ ... ]=] in Lua 5.1.
74 // Continuation of a string (\z whitespace escaping) is controlled by stringWs.
75 int nestLevel = 0;
76 int sepCount = 0;
77 int stringWs = 0;
78 if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
79 initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
80 int lineState = styler.GetLineState(currentLine - 1);
81 nestLevel = lineState >> 9;
82 sepCount = lineState & 0xFF;
83 stringWs = lineState & 0x100;
86 // Do not leak onto next line
87 if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
88 initStyle = SCE_LUA_DEFAULT;
91 StyleContext sc(startPos, length, initStyle, styler);
92 if (startPos == 0 && sc.ch == '#') {
93 // shbang line: # is a comment only if first char of the script
94 sc.SetState(SCE_LUA_COMMENTLINE);
96 for (; sc.More(); sc.Forward()) {
97 if (sc.atLineEnd) {
98 // Update the line state, so it can be seen by next line
99 currentLine = styler.GetLine(sc.currentPos);
100 switch (sc.state) {
101 case SCE_LUA_LITERALSTRING:
102 case SCE_LUA_COMMENT:
103 case SCE_LUA_STRING:
104 case SCE_LUA_CHARACTER:
105 // Inside a literal string, block comment or string, we set the line state
106 styler.SetLineState(currentLine, (nestLevel << 9) | stringWs | sepCount);
107 break;
108 default:
109 // Reset the line state
110 styler.SetLineState(currentLine, 0);
111 break;
114 if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
115 // Prevent SCE_LUA_STRINGEOL from leaking back to previous line
116 sc.SetState(SCE_LUA_STRING);
119 // Handle string line continuation
120 if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
121 sc.ch == '\\') {
122 if (sc.chNext == '\n' || sc.chNext == '\r') {
123 sc.Forward();
124 if (sc.ch == '\r' && sc.chNext == '\n') {
125 sc.Forward();
127 continue;
131 // Determine if the current state should terminate.
132 if (sc.state == SCE_LUA_OPERATOR) {
133 if (sc.ch == ':' && sc.chPrev == ':') { // :: <label> :: forward scan
134 sc.Forward();
135 int ln = 0;
136 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
137 ln++;
138 int ws1 = ln;
139 if (setWordStart.Contains(sc.GetRelative(ln))) {
140 int c, i = 0;
141 char s[100];
142 while (setWord.Contains(c = sc.GetRelative(ln))) { // get potential label
143 if (i < 90)
144 s[i++] = c;
145 ln++;
147 s[i] = '\0'; int lbl = ln;
148 if (!keywords.InList(s)) {
149 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
150 ln++;
151 int ws2 = ln - lbl;
152 if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') {
153 // final :: found, complete valid label construct
154 sc.ChangeState(SCE_LUA_LABEL);
155 if (ws1) {
156 sc.SetState(SCE_LUA_DEFAULT);
157 sc.ForwardBytes(ws1);
159 sc.SetState(SCE_LUA_LABEL);
160 sc.ForwardBytes(lbl - ws1);
161 if (ws2) {
162 sc.SetState(SCE_LUA_DEFAULT);
163 sc.ForwardBytes(ws2);
165 sc.SetState(SCE_LUA_LABEL);
166 sc.ForwardBytes(2);
171 sc.SetState(SCE_LUA_DEFAULT);
172 } else if (sc.state == SCE_LUA_NUMBER) {
173 // We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
174 if (!setNumber.Contains(sc.ch)) {
175 sc.SetState(SCE_LUA_DEFAULT);
176 } else if (sc.ch == '-' || sc.ch == '+') {
177 if (!setExponent.Contains(sc.chPrev))
178 sc.SetState(SCE_LUA_DEFAULT);
180 } else if (sc.state == SCE_LUA_IDENTIFIER) {
181 if (!(setWord.Contains(sc.ch) || sc.ch == '.') || sc.Match('.', '.')) {
182 char s[100];
183 sc.GetCurrent(s, sizeof(s));
184 if (keywords.InList(s)) {
185 sc.ChangeState(SCE_LUA_WORD);
186 if (strcmp(s, "goto") == 0) { // goto <label> forward scan
187 sc.SetState(SCE_LUA_DEFAULT);
188 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
189 sc.Forward();
190 if (setWordStart.Contains(sc.ch)) {
191 sc.SetState(SCE_LUA_LABEL);
192 sc.Forward();
193 while (setWord.Contains(sc.ch))
194 sc.Forward();
195 sc.GetCurrent(s, sizeof(s));
196 if (keywords.InList(s))
197 sc.ChangeState(SCE_LUA_WORD);
199 sc.SetState(SCE_LUA_DEFAULT);
201 } else if (keywords2.InList(s)) {
202 sc.ChangeState(SCE_LUA_WORD2);
203 } else if (keywords3.InList(s)) {
204 sc.ChangeState(SCE_LUA_WORD3);
205 } else if (keywords4.InList(s)) {
206 sc.ChangeState(SCE_LUA_WORD4);
207 } else if (keywords5.InList(s)) {
208 sc.ChangeState(SCE_LUA_WORD5);
209 } else if (keywords6.InList(s)) {
210 sc.ChangeState(SCE_LUA_WORD6);
211 } else if (keywords7.InList(s)) {
212 sc.ChangeState(SCE_LUA_WORD7);
213 } else if (keywords8.InList(s)) {
214 sc.ChangeState(SCE_LUA_WORD8);
216 sc.SetState(SCE_LUA_DEFAULT);
218 } else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
219 if (sc.atLineEnd) {
220 sc.ForwardSetState(SCE_LUA_DEFAULT);
222 } else if (sc.state == SCE_LUA_STRING) {
223 if (stringWs) {
224 if (!IsASpace(sc.ch))
225 stringWs = 0;
227 if (sc.ch == '\\') {
228 if (setEscapeSkip.Contains(sc.chNext)) {
229 sc.Forward();
230 } else if (sc.chNext == 'z') {
231 sc.Forward();
232 stringWs = 0x100;
234 } else if (sc.ch == '\"') {
235 sc.ForwardSetState(SCE_LUA_DEFAULT);
236 } else if (stringWs == 0 && sc.atLineEnd) {
237 sc.ChangeState(SCE_LUA_STRINGEOL);
238 sc.ForwardSetState(SCE_LUA_DEFAULT);
240 } else if (sc.state == SCE_LUA_CHARACTER) {
241 if (stringWs) {
242 if (!IsASpace(sc.ch))
243 stringWs = 0;
245 if (sc.ch == '\\') {
246 if (setEscapeSkip.Contains(sc.chNext)) {
247 sc.Forward();
248 } else if (sc.chNext == 'z') {
249 sc.Forward();
250 stringWs = 0x100;
252 } else if (sc.ch == '\'') {
253 sc.ForwardSetState(SCE_LUA_DEFAULT);
254 } else if (stringWs == 0 && sc.atLineEnd) {
255 sc.ChangeState(SCE_LUA_STRINGEOL);
256 sc.ForwardSetState(SCE_LUA_DEFAULT);
258 } else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
259 if (sc.ch == '[') {
260 int sep = LongDelimCheck(sc);
261 if (sep == 1 && sepCount == 1) { // [[-only allowed to nest
262 nestLevel++;
263 sc.Forward();
265 } else if (sc.ch == ']') {
266 int sep = LongDelimCheck(sc);
267 if (sep == 1 && sepCount == 1) { // un-nest with ]]-only
268 nestLevel--;
269 sc.Forward();
270 if (nestLevel == 0) {
271 sc.ForwardSetState(SCE_LUA_DEFAULT);
273 } else if (sep > 1 && sep == sepCount) { // ]=]-style delim
274 sc.Forward(sep);
275 sc.ForwardSetState(SCE_LUA_DEFAULT);
280 // Determine if a new state should be entered.
281 if (sc.state == SCE_LUA_DEFAULT) {
282 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
283 sc.SetState(SCE_LUA_NUMBER);
284 if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
285 sc.Forward();
287 } else if (setWordStart.Contains(sc.ch)) {
288 sc.SetState(SCE_LUA_IDENTIFIER);
289 } else if (sc.ch == '\"') {
290 sc.SetState(SCE_LUA_STRING);
291 stringWs = 0;
292 } else if (sc.ch == '\'') {
293 sc.SetState(SCE_LUA_CHARACTER);
294 stringWs = 0;
295 } else if (sc.ch == '[') {
296 sepCount = LongDelimCheck(sc);
297 if (sepCount == 0) {
298 sc.SetState(SCE_LUA_OPERATOR);
299 } else {
300 nestLevel = 1;
301 sc.SetState(SCE_LUA_LITERALSTRING);
302 sc.Forward(sepCount);
304 } else if (sc.Match('-', '-')) {
305 sc.SetState(SCE_LUA_COMMENTLINE);
306 if (sc.Match("--[")) {
307 sc.Forward(2);
308 sepCount = LongDelimCheck(sc);
309 if (sepCount > 0) {
310 nestLevel = 1;
311 sc.ChangeState(SCE_LUA_COMMENT);
312 sc.Forward(sepCount);
314 } else {
315 sc.Forward();
317 } else if (sc.atLineStart && sc.Match('$')) {
318 sc.SetState(SCE_LUA_PREPROCESSOR); // Obsolete since Lua 4.0, but still in old code
319 } else if (setLuaOperator.Contains(sc.ch)) {
320 sc.SetState(SCE_LUA_OPERATOR);
325 if (setWord.Contains(sc.chPrev) || sc.chPrev == '.') {
326 char s[100];
327 sc.GetCurrent(s, sizeof(s));
328 if (keywords.InList(s)) {
329 sc.ChangeState(SCE_LUA_WORD);
330 } else if (keywords2.InList(s)) {
331 sc.ChangeState(SCE_LUA_WORD2);
332 } else if (keywords3.InList(s)) {
333 sc.ChangeState(SCE_LUA_WORD3);
334 } else if (keywords4.InList(s)) {
335 sc.ChangeState(SCE_LUA_WORD4);
336 } else if (keywords5.InList(s)) {
337 sc.ChangeState(SCE_LUA_WORD5);
338 } else if (keywords6.InList(s)) {
339 sc.ChangeState(SCE_LUA_WORD6);
340 } else if (keywords7.InList(s)) {
341 sc.ChangeState(SCE_LUA_WORD7);
342 } else if (keywords8.InList(s)) {
343 sc.ChangeState(SCE_LUA_WORD8);
347 sc.Complete();
350 static void FoldLuaDoc(unsigned int startPos, int length, int /* initStyle */, WordList *[],
351 Accessor &styler) {
352 unsigned int lengthDoc = startPos + length;
353 int visibleChars = 0;
354 int lineCurrent = styler.GetLine(startPos);
355 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
356 int levelCurrent = levelPrev;
357 char chNext = styler[startPos];
358 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
359 int styleNext = styler.StyleAt(startPos);
361 for (unsigned int i = startPos; i < lengthDoc; i++) {
362 char ch = chNext;
363 chNext = styler.SafeGetCharAt(i + 1);
364 int style = styleNext;
365 styleNext = styler.StyleAt(i + 1);
366 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
367 if (style == SCE_LUA_WORD) {
368 if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
369 char s[10] = "";
370 for (unsigned int j = 0; j < 8; j++) {
371 if (!iswordchar(styler[i + j])) {
372 break;
374 s[j] = styler[i + j];
375 s[j + 1] = '\0';
378 if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
379 levelCurrent++;
381 if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
382 levelCurrent--;
385 } else if (style == SCE_LUA_OPERATOR) {
386 if (ch == '{' || ch == '(') {
387 levelCurrent++;
388 } else if (ch == '}' || ch == ')') {
389 levelCurrent--;
391 } else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
392 if (ch == '[') {
393 levelCurrent++;
394 } else if (ch == ']') {
395 levelCurrent--;
399 if (atEOL) {
400 int lev = levelPrev;
401 if (visibleChars == 0 && foldCompact) {
402 lev |= SC_FOLDLEVELWHITEFLAG;
404 if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
405 lev |= SC_FOLDLEVELHEADERFLAG;
407 if (lev != styler.LevelAt(lineCurrent)) {
408 styler.SetLevel(lineCurrent, lev);
410 lineCurrent++;
411 levelPrev = levelCurrent;
412 visibleChars = 0;
414 if (!isspacechar(ch)) {
415 visibleChars++;
418 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
420 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
421 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
424 static const char * const luaWordListDesc[] = {
425 "Keywords",
426 "Basic functions",
427 "String, (table) & math functions",
428 "(coroutines), I/O & system facilities",
429 "user1",
430 "user2",
431 "user3",
432 "user4",
436 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc);