Add an UI to enable/disable specific overlay handlers.
[TortoiseGit.git] / ext / scintilla / src / LexBash.cxx
blob5418137e16c95f7422d69ccecb703438c32f6dd9
1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
15 #include "Platform.h"
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "StyleContext.h"
20 #include "KeyWords.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 #include "CharacterSet.h"
25 #ifdef SCI_NAMESPACE
26 using namespace Scintilla;
27 #endif
29 #define HERE_DELIM_MAX 256
31 // define this if you want 'invalid octals' to be marked as errors
32 // usually, this is not a good idea, permissive lexing is better
33 #undef PEDANTIC_OCTAL
35 #define BASH_BASE_ERROR 65
36 #define BASH_BASE_DECIMAL 66
37 #define BASH_BASE_HEX 67
38 #ifdef PEDANTIC_OCTAL
39 #define BASH_BASE_OCTAL 68
40 #define BASH_BASE_OCTAL_ERROR 69
41 #endif
43 static inline int translateBashDigit(int ch) {
44 if (ch >= '0' && ch <= '9') {
45 return ch - '0';
46 } else if (ch >= 'a' && ch <= 'z') {
47 return ch - 'a' + 10;
48 } else if (ch >= 'A' && ch <= 'Z') {
49 return ch - 'A' + 36;
50 } else if (ch == '@') {
51 return 62;
52 } else if (ch == '_') {
53 return 63;
55 return BASH_BASE_ERROR;
58 static inline int getBashNumberBase(char *s) {
59 int i = 0;
60 int base = 0;
61 while (*s) {
62 base = base * 10 + (*s++ - '0');
63 i++;
65 if (base > 64 || i > 2) {
66 return BASH_BASE_ERROR;
68 return base;
71 static int opposite(int ch) {
72 if (ch == '(') return ')';
73 if (ch == '[') return ']';
74 if (ch == '{') return '}';
75 if (ch == '<') return '>';
76 return ch;
79 static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
80 WordList *keywordlists[], Accessor &styler) {
82 WordList &keywords = *keywordlists[0];
84 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
85 // note that [+-] are often parts of identifiers in shell scripts
86 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
87 CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
88 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
89 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
90 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!");
91 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!");
92 CharacterSet setLeftShift(CharacterSet::setDigits, "=$");
94 class HereDocCls { // Class to manage HERE document elements
95 public:
96 int State; // 0: '<<' encountered
97 // 1: collect the delimiter
98 // 2: here doc text (lines after the delimiter)
99 int Quote; // the char after '<<'
100 bool Quoted; // true if Quote in ('\'','"','`')
101 bool Indent; // indented delimiter (for <<-)
102 int DelimiterLength; // strlen(Delimiter)
103 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
104 HereDocCls() {
105 State = 0;
106 Quote = 0;
107 Quoted = false;
108 Indent = 0;
109 DelimiterLength = 0;
110 Delimiter = new char[HERE_DELIM_MAX];
111 Delimiter[0] = '\0';
113 void Append(int ch) {
114 Delimiter[DelimiterLength++] = static_cast<char>(ch);
115 Delimiter[DelimiterLength] = '\0';
117 ~HereDocCls() {
118 delete []Delimiter;
121 HereDocCls HereDoc;
123 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
124 public:
125 int Count;
126 int Up, Down;
127 QuoteCls() {
128 Count = 0;
129 Up = '\0';
130 Down = '\0';
132 void Open(int u) {
133 Count++;
134 Up = u;
135 Down = opposite(Up);
137 void Start(int u) {
138 Count = 0;
139 Open(u);
142 QuoteCls Quote;
144 int numBase = 0;
145 int digit;
146 unsigned int endPos = startPos + length;
148 // Backtrack to beginning of style if required...
149 // If in a long distance lexical state, backtrack to find quote characters
150 if (initStyle == SCE_SH_HERE_Q) {
151 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
152 startPos--;
154 startPos = styler.LineStart(styler.GetLine(startPos));
155 initStyle = styler.StyleAt(startPos - 1);
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
158 // Bash numbers have additional state during lexing, so backtrack too.
159 if (initStyle == SCE_SH_STRING
160 || initStyle == SCE_SH_BACKTICKS
161 || initStyle == SCE_SH_CHARACTER
162 || initStyle == SCE_SH_NUMBER
163 || initStyle == SCE_SH_IDENTIFIER
164 || initStyle == SCE_SH_COMMENTLINE) {
165 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
166 startPos--;
168 initStyle = SCE_SH_DEFAULT;
171 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
173 for (; sc.More(); sc.Forward()) {
175 // Determine if the current state should terminate.
176 switch (sc.state) {
177 case SCE_SH_OPERATOR:
178 sc.SetState(SCE_SH_DEFAULT);
179 break;
180 case SCE_SH_WORD:
181 // "." never used in Bash variable names but used in file names
182 if (!setWord.Contains(sc.ch)) {
183 char s[1000];
184 sc.GetCurrent(s, sizeof(s));
185 if (s[0] != '-' && // for file operators
186 !keywords.InList(s)) {
187 sc.ChangeState(SCE_SH_IDENTIFIER);
189 sc.SetState(SCE_SH_DEFAULT);
191 break;
192 case SCE_SH_IDENTIFIER:
193 if (sc.chPrev == '\\') { // for escaped chars
194 sc.ForwardSetState(SCE_SH_DEFAULT);
195 } else if (!setWord.Contains(sc.ch)) {
196 sc.SetState(SCE_SH_DEFAULT);
198 break;
199 case SCE_SH_NUMBER:
200 digit = translateBashDigit(sc.ch);
201 if (numBase == BASH_BASE_DECIMAL) {
202 if (sc.ch == '#') {
203 char s[10];
204 sc.GetCurrent(s, sizeof(s));
205 numBase = getBashNumberBase(s);
206 if (numBase != BASH_BASE_ERROR)
207 break;
208 } else if (IsADigit(sc.ch))
209 break;
210 } else if (numBase == BASH_BASE_HEX) {
211 if (IsADigit(sc.ch, 16))
212 break;
213 #ifdef PEDANTIC_OCTAL
214 } else if (numBase == BASH_BASE_OCTAL ||
215 numBase == BASH_BASE_OCTAL_ERROR) {
216 if (digit <= 7)
217 break;
218 if (digit <= 9) {
219 numBase = BASH_BASE_OCTAL_ERROR;
220 break;
222 #endif
223 } else if (numBase == BASH_BASE_ERROR) {
224 if (digit <= 9)
225 break;
226 } else { // DD#DDDD number style handling
227 if (digit != BASH_BASE_ERROR) {
228 if (numBase <= 36) {
229 // case-insensitive if base<=36
230 if (digit >= 36) digit -= 26;
232 if (digit < numBase)
233 break;
234 if (digit <= 9) {
235 numBase = BASH_BASE_ERROR;
236 break;
240 // fallthrough when number is at an end or error
241 if (numBase == BASH_BASE_ERROR
242 #ifdef PEDANTIC_OCTAL
243 || numBase == BASH_BASE_OCTAL_ERROR
244 #endif
246 sc.ChangeState(SCE_SH_ERROR);
248 sc.SetState(SCE_SH_DEFAULT);
249 break;
250 case SCE_SH_COMMENTLINE:
251 if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) {
252 // comment continuation
253 sc.Forward();
254 if (sc.ch == '\r' && sc.chNext == '\n') {
255 sc.Forward();
257 } else if (sc.atLineEnd) {
258 sc.ForwardSetState(SCE_SH_DEFAULT);
260 break;
261 case SCE_SH_HERE_DELIM:
262 // From Bash info:
263 // ---------------
264 // Specifier format is: <<[-]WORD
265 // Optional '-' is for removal of leading tabs from here-doc.
266 // Whitespace acceptable after <<[-] operator
268 if (HereDoc.State == 0) { // '<<' encountered
269 HereDoc.Quote = sc.chNext;
270 HereDoc.Quoted = false;
271 HereDoc.DelimiterLength = 0;
272 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
273 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
274 sc.Forward();
275 HereDoc.Quoted = true;
276 HereDoc.State = 1;
277 } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case
278 HereDoc.Indent = true;
279 } else if (setHereDoc.Contains(sc.chNext)) {
280 // an unquoted here-doc delimiter, no special handling
281 // TODO check what exactly bash considers part of the delim
282 HereDoc.State = 1;
283 } else if (sc.chNext == '<') { // HERE string <<<
284 sc.Forward();
285 sc.ForwardSetState(SCE_SH_DEFAULT);
286 } else if (IsASpace(sc.chNext)) {
287 // eat whitespace
288 } else if (setLeftShift.Contains(sc.chNext)) {
289 // left shift << or <<= operator cases
290 sc.ChangeState(SCE_SH_OPERATOR);
291 sc.ForwardSetState(SCE_SH_DEFAULT);
292 } else {
293 // symbols terminates; deprecated zero-length delimiter
294 HereDoc.State = 1;
296 } else if (HereDoc.State == 1) { // collect the delimiter
297 if (HereDoc.Quoted) { // a quoted here-doc delimiter
298 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
299 sc.ForwardSetState(SCE_SH_DEFAULT);
300 } else {
301 if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote
302 sc.Forward();
304 HereDoc.Append(sc.ch);
306 } else { // an unquoted here-doc delimiter
307 if (setHereDoc2.Contains(sc.ch)) {
308 HereDoc.Append(sc.ch);
309 } else if (sc.ch == '\\') {
310 // skip escape prefix
311 } else {
312 sc.SetState(SCE_SH_DEFAULT);
315 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
316 sc.SetState(SCE_SH_ERROR);
317 HereDoc.State = 0;
320 break;
321 case SCE_SH_HERE_Q:
322 // HereDoc.State == 2
323 if (sc.atLineStart) {
324 sc.SetState(SCE_SH_HERE_Q);
325 int prefixws = 0;
326 while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix
327 sc.Forward();
328 prefixws++;
330 if (prefixws > 0)
331 sc.SetState(SCE_SH_HERE_Q);
332 while (!sc.atLineEnd) {
333 sc.Forward();
335 char s[HERE_DELIM_MAX];
336 sc.GetCurrent(s, sizeof(s));
337 if (strcmp(HereDoc.Delimiter, s) == 0) {
338 if ((prefixws > 0 && HereDoc.Indent) || // indentation rule
339 (prefixws == 0 && !HereDoc.Indent)) {
340 sc.SetState(SCE_SH_DEFAULT);
341 break;
345 break;
346 case SCE_SH_SCALAR: // variable names
347 if (!setParam.Contains(sc.ch)) {
348 if (sc.LengthCurrent() == 1) {
349 // Special variable: $(, $_ etc.
350 sc.ForwardSetState(SCE_SH_DEFAULT);
351 } else {
352 sc.SetState(SCE_SH_DEFAULT);
355 break;
356 case SCE_SH_STRING: // delimited styles
357 case SCE_SH_CHARACTER:
358 case SCE_SH_BACKTICKS:
359 case SCE_SH_PARAM:
360 if (sc.ch == '\\' && Quote.Up != '\\') {
361 sc.Forward();
362 } else if (sc.ch == Quote.Down) {
363 Quote.Count--;
364 if (Quote.Count == 0) {
365 sc.ForwardSetState(SCE_SH_DEFAULT);
367 } else if (sc.ch == Quote.Up) {
368 Quote.Count++;
370 break;
373 // Must check end of HereDoc state 1 before default state is handled
374 if (HereDoc.State == 1 && sc.atLineEnd) {
375 // Begin of here-doc (the line after the here-doc delimiter):
376 // Lexically, the here-doc starts from the next line after the >>, but the
377 // first line of here-doc seem to follow the style of the last EOL sequence
378 HereDoc.State = 2;
379 if (HereDoc.Quoted) {
380 if (sc.state == SCE_SH_HERE_DELIM) {
381 // Missing quote at end of string! We are stricter than bash.
382 // Colour here-doc anyway while marking this bit as an error.
383 sc.ChangeState(SCE_SH_ERROR);
385 // HereDoc.Quote always == '\''
387 sc.SetState(SCE_SH_HERE_Q);
390 // Determine if a new state should be entered.
391 if (sc.state == SCE_SH_DEFAULT) {
392 if (sc.ch == '\\') { // escaped character
393 sc.SetState(SCE_SH_IDENTIFIER);
394 } else if (IsADigit(sc.ch)) {
395 sc.SetState(SCE_SH_NUMBER);
396 numBase = BASH_BASE_DECIMAL;
397 if (sc.ch == '0') { // hex,octal
398 if (sc.chNext == 'x' || sc.chNext == 'X') {
399 numBase = BASH_BASE_HEX;
400 sc.Forward();
401 } else if (IsADigit(sc.chNext)) {
402 #ifdef PEDANTIC_OCTAL
403 numBase = BASH_BASE_OCTAL;
404 #else
405 numBase = BASH_BASE_HEX;
406 #endif
409 } else if (setWordStart.Contains(sc.ch)) {
410 sc.SetState(SCE_SH_WORD);
411 } else if (sc.ch == '#') {
412 sc.SetState(SCE_SH_COMMENTLINE);
413 } else if (sc.ch == '\"') {
414 sc.SetState(SCE_SH_STRING);
415 Quote.Start(sc.ch);
416 } else if (sc.ch == '\'') {
417 sc.SetState(SCE_SH_CHARACTER);
418 Quote.Start(sc.ch);
419 } else if (sc.ch == '`') {
420 sc.SetState(SCE_SH_BACKTICKS);
421 Quote.Start(sc.ch);
422 } else if (sc.ch == '$') {
423 sc.SetState(SCE_SH_SCALAR);
424 sc.Forward();
425 if (sc.ch == '{') {
426 sc.ChangeState(SCE_SH_PARAM);
427 } else if (sc.ch == '\'') {
428 sc.ChangeState(SCE_SH_CHARACTER);
429 } else if (sc.ch == '"') {
430 sc.ChangeState(SCE_SH_STRING);
431 } else if (sc.ch == '(' || sc.ch == '`') {
432 sc.ChangeState(SCE_SH_BACKTICKS);
433 if (sc.chNext == '(') { // $(( is lexed as operator
434 sc.ChangeState(SCE_SH_OPERATOR);
436 } else {
437 continue; // scalar has no delimiter pair
439 // fallthrough, open delim for $[{'"(`]
440 Quote.Start(sc.ch);
441 } else if (sc.Match('<', '<')) {
442 sc.SetState(SCE_SH_HERE_DELIM);
443 HereDoc.State = 0;
444 HereDoc.Indent = false;
445 } else if (sc.ch == '-' && // one-char file test operators
446 setSingleCharOp.Contains(sc.chNext) &&
447 !setWord.Contains(sc.GetRelative(2)) &&
448 IsASpace(sc.chPrev)) {
449 sc.SetState(SCE_SH_WORD);
450 sc.Forward();
451 } else if (setBashOperator.Contains(sc.ch)) {
452 sc.SetState(SCE_SH_OPERATOR);
456 sc.Complete();
459 static bool IsCommentLine(int line, Accessor &styler) {
460 int pos = styler.LineStart(line);
461 int eol_pos = styler.LineStart(line + 1) - 1;
462 for (int i = pos; i < eol_pos; i++) {
463 char ch = styler[i];
464 if (ch == '#')
465 return true;
466 else if (ch != ' ' && ch != '\t')
467 return false;
469 return false;
472 static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
473 Accessor &styler) {
474 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
475 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
476 unsigned int endPos = startPos + length;
477 int visibleChars = 0;
478 int lineCurrent = styler.GetLine(startPos);
479 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
480 int levelCurrent = levelPrev;
481 char chNext = styler[startPos];
482 int styleNext = styler.StyleAt(startPos);
483 for (unsigned int i = startPos; i < endPos; i++) {
484 char ch = chNext;
485 chNext = styler.SafeGetCharAt(i + 1);
486 int style = styleNext;
487 styleNext = styler.StyleAt(i + 1);
488 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
489 // Comment folding
490 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
492 if (!IsCommentLine(lineCurrent - 1, styler)
493 && IsCommentLine(lineCurrent + 1, styler))
494 levelCurrent++;
495 else if (IsCommentLine(lineCurrent - 1, styler)
496 && !IsCommentLine(lineCurrent + 1, styler))
497 levelCurrent--;
499 if (style == SCE_SH_OPERATOR) {
500 if (ch == '{') {
501 levelCurrent++;
502 } else if (ch == '}') {
503 levelCurrent--;
506 if (atEOL) {
507 int lev = levelPrev;
508 if (visibleChars == 0 && foldCompact)
509 lev |= SC_FOLDLEVELWHITEFLAG;
510 if ((levelCurrent > levelPrev) && (visibleChars > 0))
511 lev |= SC_FOLDLEVELHEADERFLAG;
512 if (lev != styler.LevelAt(lineCurrent)) {
513 styler.SetLevel(lineCurrent, lev);
515 lineCurrent++;
516 levelPrev = levelCurrent;
517 visibleChars = 0;
519 if (!isspacechar(ch))
520 visibleChars++;
522 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
523 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
524 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
527 static const char * const bashWordListDesc[] = {
528 "Keywords",
532 LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);