1 // Scintilla source code edit control
5 // Copyright 2004-2010 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
16 #include "Scintilla.h"
20 #include "LexAccessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
27 using namespace Scintilla
;
30 #define HERE_DELIM_MAX 256
32 // define this if you want 'invalid octals' to be marked as errors
33 // usually, this is not a good idea, permissive lexing is better
36 #define BASH_BASE_ERROR 65
37 #define BASH_BASE_DECIMAL 66
38 #define BASH_BASE_HEX 67
40 #define BASH_BASE_OCTAL 68
41 #define BASH_BASE_OCTAL_ERROR 69
44 // state constants for parts of a bash command segment
45 #define BASH_CMD_BODY 0
46 #define BASH_CMD_START 1
47 #define BASH_CMD_WORD 2
48 #define BASH_CMD_TEST 3
49 #define BASH_CMD_ARITH 4
50 #define BASH_CMD_DELIM 5
52 static inline int translateBashDigit(int ch
) {
53 if (ch
>= '0' && ch
<= '9') {
55 } else if (ch
>= 'a' && ch
<= 'z') {
57 } else if (ch
>= 'A' && ch
<= 'Z') {
59 } else if (ch
== '@') {
61 } else if (ch
== '_') {
64 return BASH_BASE_ERROR
;
67 static inline int getBashNumberBase(char *s
) {
71 base
= base
* 10 + (*s
++ - '0');
74 if (base
> 64 || i
> 2) {
75 return BASH_BASE_ERROR
;
80 static int opposite(int ch
) {
81 if (ch
== '(') return ')';
82 if (ch
== '[') return ']';
83 if (ch
== '{') return '}';
84 if (ch
== '<') return '>';
88 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
89 WordList
*keywordlists
[], Accessor
&styler
) {
91 WordList
&keywords
= *keywordlists
[0];
92 WordList cmdDelimiter
, bashStruct
, bashStruct_in
;
93 cmdDelimiter
.Set("| || |& & && ; ;; ( ) { }");
94 bashStruct
.Set("if elif fi while until else then do done esac eval");
95 bashStruct_in
.Set("for case select");
97 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_");
98 // note that [+-] are often parts of identifiers in shell scripts
99 CharacterSet
setWord(CharacterSet::setAlphaNum
, "._+-");
100 CharacterSet
setBashOperator(CharacterSet::setNone
, "^&%()-+=|{}[]:;>,*/<?!.~@");
101 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
102 CharacterSet
setParam(CharacterSet::setAlphaNum
, "$_");
103 CharacterSet
setHereDoc(CharacterSet::setAlpha
, "_\\-+!");
104 CharacterSet
setHereDoc2(CharacterSet::setAlphaNum
, "_-+!");
105 CharacterSet
setLeftShift(CharacterSet::setDigits
, "=$");
107 class HereDocCls
{ // Class to manage HERE document elements
109 int State
; // 0: '<<' encountered
110 // 1: collect the delimiter
111 // 2: here doc text (lines after the delimiter)
112 int Quote
; // the char after '<<'
113 bool Quoted
; // true if Quote in ('\'','"','`')
114 bool Indent
; // indented delimiter (for <<-)
115 int DelimiterLength
; // strlen(Delimiter)
116 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
123 Delimiter
= new char[HERE_DELIM_MAX
];
126 void Append(int ch
) {
127 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
128 Delimiter
[DelimiterLength
] = '\0';
136 class QuoteCls
{ // Class to manage quote pairs (simplified vs LexPerl)
159 unsigned int endPos
= startPos
+ length
;
160 int cmdState
= BASH_CMD_START
;
161 int testExprType
= 0;
163 // Always backtracks to the start of a line that is not a continuation
164 // of the previous line (i.e. start of a bash command segment)
165 int ln
= styler
.GetLine(startPos
);
167 startPos
= styler
.LineStart(ln
);
168 if (ln
== 0 || styler
.GetLineState(ln
) == BASH_CMD_START
)
172 initStyle
= SCE_SH_DEFAULT
;
174 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
176 for (; sc
.More(); sc
.Forward()) {
178 // handle line continuation, updates per-line stored state
179 if (sc
.atLineStart
) {
180 ln
= styler
.GetLine(sc
.currentPos
);
181 if (sc
.state
== SCE_SH_STRING
182 || sc
.state
== SCE_SH_BACKTICKS
183 || sc
.state
== SCE_SH_CHARACTER
184 || sc
.state
== SCE_SH_HERE_Q
185 || sc
.state
== SCE_SH_COMMENTLINE
186 || sc
.state
== SCE_SH_PARAM
) {
187 // force backtrack while retaining cmdState
188 styler
.SetLineState(ln
, BASH_CMD_BODY
);
191 if ((sc
.GetRelative(-3) == '\\' && sc
.GetRelative(-2) == '\r' && sc
.chPrev
== '\n')
192 || sc
.GetRelative(-2) == '\\') { // handle '\' line continuation
193 // retain last line's state
195 cmdState
= BASH_CMD_START
;
197 styler
.SetLineState(ln
, cmdState
);
201 // controls change of cmdState at the end of a non-whitespace element
202 // states BODY|TEST|ARITH persist until the end of a command segment
203 // state WORD persist, but ends with 'in' or 'do' construct keywords
204 int cmdStateNew
= BASH_CMD_BODY
;
205 if (cmdState
== BASH_CMD_TEST
|| cmdState
== BASH_CMD_ARITH
|| cmdState
== BASH_CMD_WORD
)
206 cmdStateNew
= cmdState
;
207 int stylePrev
= sc
.state
;
209 // Determine if the current state should terminate.
211 case SCE_SH_OPERATOR
:
212 sc
.SetState(SCE_SH_DEFAULT
);
213 if (cmdState
== BASH_CMD_DELIM
) // if command delimiter, start new command
214 cmdStateNew
= BASH_CMD_START
;
215 else if (sc
.chPrev
== '\\') // propagate command state if line continued
216 cmdStateNew
= cmdState
;
219 // "." never used in Bash variable names but used in file names
220 if (!setWord
.Contains(sc
.ch
)) {
223 sc
.GetCurrent(s
, sizeof(s
));
224 // allow keywords ending in a whitespace or command delimiter
225 s2
[0] = static_cast<char>(sc
.ch
);
227 bool keywordEnds
= IsASpace(sc
.ch
) || cmdDelimiter
.InList(s2
);
228 // 'in' or 'do' may be construct keywords
229 if (cmdState
== BASH_CMD_WORD
) {
230 if (strcmp(s
, "in") == 0 && keywordEnds
)
231 cmdStateNew
= BASH_CMD_BODY
;
232 else if (strcmp(s
, "do") == 0 && keywordEnds
)
233 cmdStateNew
= BASH_CMD_START
;
235 sc
.ChangeState(SCE_SH_IDENTIFIER
);
236 sc
.SetState(SCE_SH_DEFAULT
);
239 // a 'test' keyword starts a test expression
240 if (strcmp(s
, "test") == 0) {
241 if (cmdState
== BASH_CMD_START
&& keywordEnds
) {
242 cmdStateNew
= BASH_CMD_TEST
;
245 sc
.ChangeState(SCE_SH_IDENTIFIER
);
247 // detect bash construct keywords
248 else if (bashStruct
.InList(s
)) {
249 if (cmdState
== BASH_CMD_START
&& keywordEnds
)
250 cmdStateNew
= BASH_CMD_START
;
252 sc
.ChangeState(SCE_SH_IDENTIFIER
);
254 // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
255 else if (bashStruct_in
.InList(s
)) {
256 if (cmdState
== BASH_CMD_START
&& keywordEnds
)
257 cmdStateNew
= BASH_CMD_WORD
;
259 sc
.ChangeState(SCE_SH_IDENTIFIER
);
261 // disambiguate option items and file test operators
262 else if (s
[0] == '-') {
263 if (cmdState
!= BASH_CMD_TEST
)
264 sc
.ChangeState(SCE_SH_IDENTIFIER
);
266 // disambiguate keywords and identifiers
267 else if (cmdState
!= BASH_CMD_START
268 || !(keywords
.InList(s
) && keywordEnds
)) {
269 sc
.ChangeState(SCE_SH_IDENTIFIER
);
271 sc
.SetState(SCE_SH_DEFAULT
);
274 case SCE_SH_IDENTIFIER
:
275 if (sc
.chPrev
== '\\') { // for escaped chars
276 sc
.ForwardSetState(SCE_SH_DEFAULT
);
277 } else if (!setWord
.Contains(sc
.ch
)) {
278 sc
.SetState(SCE_SH_DEFAULT
);
282 digit
= translateBashDigit(sc
.ch
);
283 if (numBase
== BASH_BASE_DECIMAL
) {
286 sc
.GetCurrent(s
, sizeof(s
));
287 numBase
= getBashNumberBase(s
);
288 if (numBase
!= BASH_BASE_ERROR
)
290 } else if (IsADigit(sc
.ch
))
292 } else if (numBase
== BASH_BASE_HEX
) {
293 if (IsADigit(sc
.ch
, 16))
295 #ifdef PEDANTIC_OCTAL
296 } else if (numBase
== BASH_BASE_OCTAL
||
297 numBase
== BASH_BASE_OCTAL_ERROR
) {
301 numBase
= BASH_BASE_OCTAL_ERROR
;
305 } else if (numBase
== BASH_BASE_ERROR
) {
308 } else { // DD#DDDD number style handling
309 if (digit
!= BASH_BASE_ERROR
) {
311 // case-insensitive if base<=36
312 if (digit
>= 36) digit
-= 26;
317 numBase
= BASH_BASE_ERROR
;
322 // fallthrough when number is at an end or error
323 if (numBase
== BASH_BASE_ERROR
324 #ifdef PEDANTIC_OCTAL
325 || numBase
== BASH_BASE_OCTAL_ERROR
328 sc
.ChangeState(SCE_SH_ERROR
);
330 sc
.SetState(SCE_SH_DEFAULT
);
332 case SCE_SH_COMMENTLINE
:
333 if (sc
.atLineEnd
&& sc
.chPrev
!= '\\') {
334 sc
.SetState(SCE_SH_DEFAULT
);
337 case SCE_SH_HERE_DELIM
:
340 // Specifier format is: <<[-]WORD
341 // Optional '-' is for removal of leading tabs from here-doc.
342 // Whitespace acceptable after <<[-] operator
344 if (HereDoc
.State
== 0) { // '<<' encountered
345 HereDoc
.Quote
= sc
.chNext
;
346 HereDoc
.Quoted
= false;
347 HereDoc
.DelimiterLength
= 0;
348 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
349 if (sc
.chNext
== '\'' || sc
.chNext
== '\"') { // a quoted here-doc delimiter (' or ")
351 HereDoc
.Quoted
= true;
353 } else if (!HereDoc
.Indent
&& sc
.chNext
== '-') { // <<- indent case
354 HereDoc
.Indent
= true;
355 } else if (setHereDoc
.Contains(sc
.chNext
)) {
356 // an unquoted here-doc delimiter, no special handling
357 // TODO check what exactly bash considers part of the delim
359 } else if (sc
.chNext
== '<') { // HERE string <<<
361 sc
.ForwardSetState(SCE_SH_DEFAULT
);
362 } else if (IsASpace(sc
.chNext
)) {
364 } else if (setLeftShift
.Contains(sc
.chNext
)) {
365 // left shift << or <<= operator cases
366 sc
.ChangeState(SCE_SH_OPERATOR
);
367 sc
.ForwardSetState(SCE_SH_DEFAULT
);
369 // symbols terminates; deprecated zero-length delimiter
372 } else if (HereDoc
.State
== 1) { // collect the delimiter
373 if (setHereDoc2
.Contains(sc
.ch
) || sc
.chPrev
== '\\') {
374 HereDoc
.Append(sc
.ch
);
375 } else if (HereDoc
.Quoted
&& sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
376 sc
.ForwardSetState(SCE_SH_DEFAULT
);
377 } else if (sc
.ch
== '\\') {
378 // skip escape prefix
380 sc
.SetState(SCE_SH_DEFAULT
);
382 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) { // force blowup
383 sc
.SetState(SCE_SH_ERROR
);
389 // HereDoc.State == 2
390 if (sc
.atLineStart
) {
391 sc
.SetState(SCE_SH_HERE_Q
);
393 while (IsASpace(sc
.ch
) && !sc
.atLineEnd
) { // whitespace prefix
398 sc
.SetState(SCE_SH_HERE_Q
);
399 while (!sc
.atLineEnd
) {
402 char s
[HERE_DELIM_MAX
];
403 sc
.GetCurrent(s
, sizeof(s
));
404 if (sc
.LengthCurrent() == 0)
406 if (s
[strlen(s
) - 1] == '\r')
407 s
[strlen(s
) - 1] = '\0';
408 if (strcmp(HereDoc
.Delimiter
, s
) == 0) {
409 if ((prefixws
== 0) || // indentation rule
410 (prefixws
> 0 && HereDoc
.Indent
)) {
411 sc
.SetState(SCE_SH_DEFAULT
);
417 case SCE_SH_SCALAR
: // variable names
418 if (!setParam
.Contains(sc
.ch
)) {
419 if (sc
.LengthCurrent() == 1) {
420 // Special variable: $(, $_ etc.
421 sc
.ForwardSetState(SCE_SH_DEFAULT
);
423 sc
.SetState(SCE_SH_DEFAULT
);
427 case SCE_SH_STRING
: // delimited styles
428 case SCE_SH_CHARACTER
:
429 case SCE_SH_BACKTICKS
:
431 if (sc
.ch
== '\\' && Quote
.Up
!= '\\') {
433 } else if (sc
.ch
== Quote
.Down
) {
435 if (Quote
.Count
== 0) {
436 sc
.ForwardSetState(SCE_SH_DEFAULT
);
438 } else if (sc
.ch
== Quote
.Up
) {
444 // Must check end of HereDoc state 1 before default state is handled
445 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
446 // Begin of here-doc (the line after the here-doc delimiter):
447 // Lexically, the here-doc starts from the next line after the >>, but the
448 // first line of here-doc seem to follow the style of the last EOL sequence
450 if (HereDoc
.Quoted
) {
451 if (sc
.state
== SCE_SH_HERE_DELIM
) {
452 // Missing quote at end of string! We are stricter than bash.
453 // Colour here-doc anyway while marking this bit as an error.
454 sc
.ChangeState(SCE_SH_ERROR
);
456 // HereDoc.Quote always == '\''
458 sc
.SetState(SCE_SH_HERE_Q
);
461 // update cmdState about the current command segment
462 if (stylePrev
!= SCE_SH_DEFAULT
&& sc
.state
== SCE_SH_DEFAULT
) {
463 cmdState
= cmdStateNew
;
465 // Determine if a new state should be entered.
466 if (sc
.state
== SCE_SH_DEFAULT
) {
468 // Bash can escape any non-newline as a literal
469 sc
.SetState(SCE_SH_IDENTIFIER
);
470 if (sc
.chNext
== '\r' || sc
.chNext
== '\n')
471 sc
.SetState(SCE_SH_OPERATOR
);
472 } else if (IsADigit(sc
.ch
)) {
473 sc
.SetState(SCE_SH_NUMBER
);
474 numBase
= BASH_BASE_DECIMAL
;
475 if (sc
.ch
== '0') { // hex,octal
476 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
477 numBase
= BASH_BASE_HEX
;
479 } else if (IsADigit(sc
.chNext
)) {
480 #ifdef PEDANTIC_OCTAL
481 numBase
= BASH_BASE_OCTAL
;
483 numBase
= BASH_BASE_HEX
;
487 } else if (setWordStart
.Contains(sc
.ch
)) {
488 sc
.SetState(SCE_SH_WORD
);
489 } else if (sc
.ch
== '#') {
490 sc
.SetState(SCE_SH_COMMENTLINE
);
491 } else if (sc
.ch
== '\"') {
492 sc
.SetState(SCE_SH_STRING
);
494 } else if (sc
.ch
== '\'') {
495 sc
.SetState(SCE_SH_CHARACTER
);
497 } else if (sc
.ch
== '`') {
498 sc
.SetState(SCE_SH_BACKTICKS
);
500 } else if (sc
.ch
== '$') {
501 if (sc
.Match("$((")) {
502 sc
.SetState(SCE_SH_OPERATOR
); // handle '((' later
505 sc
.SetState(SCE_SH_SCALAR
);
508 sc
.ChangeState(SCE_SH_PARAM
);
509 } else if (sc
.ch
== '\'') {
510 sc
.ChangeState(SCE_SH_CHARACTER
);
511 } else if (sc
.ch
== '"') {
512 sc
.ChangeState(SCE_SH_STRING
);
513 } else if (sc
.ch
== '(' || sc
.ch
== '`') {
514 sc
.ChangeState(SCE_SH_BACKTICKS
);
516 continue; // scalar has no delimiter pair
518 // fallthrough, open delim for $[{'"(`]
520 } else if (sc
.Match('<', '<')) {
521 sc
.SetState(SCE_SH_HERE_DELIM
);
523 HereDoc
.Indent
= false;
524 } else if (sc
.ch
== '-' && // one-char file test operators
525 setSingleCharOp
.Contains(sc
.chNext
) &&
526 !setWord
.Contains(sc
.GetRelative(2)) &&
527 IsASpace(sc
.chPrev
)) {
528 sc
.SetState(SCE_SH_WORD
);
530 } else if (setBashOperator
.Contains(sc
.ch
)) {
532 bool isCmdDelim
= false;
533 sc
.SetState(SCE_SH_OPERATOR
);
534 // handle opening delimiters for test/arithmetic expressions - ((,[[,[
535 if (cmdState
== BASH_CMD_START
536 || cmdState
== BASH_CMD_BODY
) {
537 if (sc
.Match('(', '(')) {
538 cmdState
= BASH_CMD_ARITH
;
540 } else if (sc
.Match('[', '[') && IsASpace(sc
.GetRelative(2))) {
541 cmdState
= BASH_CMD_TEST
;
544 } else if (sc
.ch
== '[' && IsASpace(sc
.chNext
)) {
545 cmdState
= BASH_CMD_TEST
;
549 // special state -- for ((x;y;z)) in ... looping
550 if (cmdState
== BASH_CMD_WORD
&& sc
.Match('(', '(')) {
551 cmdState
= BASH_CMD_ARITH
;
555 // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
556 if (cmdState
== BASH_CMD_START
557 || cmdState
== BASH_CMD_BODY
558 || cmdState
== BASH_CMD_WORD
559 || (cmdState
== BASH_CMD_TEST
&& testExprType
== 0)) {
560 s
[0] = static_cast<char>(sc
.ch
);
561 if (setBashOperator
.Contains(sc
.chNext
)) {
562 s
[1] = static_cast<char>(sc
.chNext
);
564 isCmdDelim
= cmdDelimiter
.InList(s
);
570 isCmdDelim
= cmdDelimiter
.InList(s
);
573 cmdState
= BASH_CMD_DELIM
;
577 // handle closing delimiters for test/arithmetic expressions - )),]],]
578 if (cmdState
== BASH_CMD_ARITH
&& sc
.Match(')', ')')) {
579 cmdState
= BASH_CMD_BODY
;
581 } else if (cmdState
== BASH_CMD_TEST
&& IsASpace(sc
.chPrev
)) {
582 if (sc
.Match(']', ']') && testExprType
== 1) {
584 cmdState
= BASH_CMD_BODY
;
585 } else if (sc
.ch
== ']' && testExprType
== 2) {
586 cmdState
= BASH_CMD_BODY
;
595 static bool IsCommentLine(int line
, Accessor
&styler
) {
596 int pos
= styler
.LineStart(line
);
597 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
598 for (int i
= pos
; i
< eol_pos
; i
++) {
602 else if (ch
!= ' ' && ch
!= '\t')
608 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
610 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
611 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
612 unsigned int endPos
= startPos
+ length
;
613 int visibleChars
= 0;
614 int lineCurrent
= styler
.GetLine(startPos
);
615 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
616 int levelCurrent
= levelPrev
;
617 char chNext
= styler
[startPos
];
618 int styleNext
= styler
.StyleAt(startPos
);
619 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
621 chNext
= styler
.SafeGetCharAt(i
+ 1);
622 int style
= styleNext
;
623 styleNext
= styler
.StyleAt(i
+ 1);
624 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
626 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
628 if (!IsCommentLine(lineCurrent
- 1, styler
)
629 && IsCommentLine(lineCurrent
+ 1, styler
))
631 else if (IsCommentLine(lineCurrent
- 1, styler
)
632 && !IsCommentLine(lineCurrent
+ 1, styler
))
635 if (style
== SCE_SH_OPERATOR
) {
638 } else if (ch
== '}') {
642 // Here Document folding
643 if (style
== SCE_SH_HERE_DELIM
) {
644 if (ch
== '<' && chNext
== '<') {
647 } else if (style
== SCE_SH_HERE_Q
&& styler
.StyleAt(i
+1) == SCE_PL_DEFAULT
) {
652 if (visibleChars
== 0 && foldCompact
)
653 lev
|= SC_FOLDLEVELWHITEFLAG
;
654 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
655 lev
|= SC_FOLDLEVELHEADERFLAG
;
656 if (lev
!= styler
.LevelAt(lineCurrent
)) {
657 styler
.SetLevel(lineCurrent
, lev
);
660 levelPrev
= levelCurrent
;
663 if (!isspacechar(ch
))
666 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
667 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
668 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
671 static const char * const bashWordListDesc
[] = {
676 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);