1 // Scintilla source code edit control
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
19 #include "StyleContext.h"
21 #include "Scintilla.h"
23 #include "CharacterSet.h"
26 using namespace Scintilla
;
29 #define HERE_DELIM_MAX 256
31 // define this if you want 'invalid octals' to be marked as errors
32 // usually, this is not a good idea, permissive lexing is better
35 #define BASH_BASE_ERROR 65
36 #define BASH_BASE_DECIMAL 66
37 #define BASH_BASE_HEX 67
39 #define BASH_BASE_OCTAL 68
40 #define BASH_BASE_OCTAL_ERROR 69
43 static inline int translateBashDigit(int ch
) {
44 if (ch
>= '0' && ch
<= '9') {
46 } else if (ch
>= 'a' && ch
<= 'z') {
48 } else if (ch
>= 'A' && ch
<= 'Z') {
50 } else if (ch
== '@') {
52 } else if (ch
== '_') {
55 return BASH_BASE_ERROR
;
58 static inline int getBashNumberBase(char *s
) {
62 base
= base
* 10 + (*s
++ - '0');
65 if (base
> 64 || i
> 2) {
66 return BASH_BASE_ERROR
;
71 static int opposite(int ch
) {
72 if (ch
== '(') return ')';
73 if (ch
== '[') return ']';
74 if (ch
== '{') return '}';
75 if (ch
== '<') return '>';
79 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
80 WordList
*keywordlists
[], Accessor
&styler
) {
82 WordList
&keywords
= *keywordlists
[0];
84 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_");
85 // note that [+-] are often parts of identifiers in shell scripts
86 CharacterSet
setWord(CharacterSet::setAlphaNum
, "._+-");
87 CharacterSet
setBashOperator(CharacterSet::setNone
, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
88 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
89 CharacterSet
setParam(CharacterSet::setAlphaNum
, "$_");
90 CharacterSet
setHereDoc(CharacterSet::setAlpha
, "_\\-+!");
91 CharacterSet
setHereDoc2(CharacterSet::setAlphaNum
, "_-+!");
92 CharacterSet
setLeftShift(CharacterSet::setDigits
, "=$");
94 class HereDocCls
{ // Class to manage HERE document elements
96 int State
; // 0: '<<' encountered
97 // 1: collect the delimiter
98 // 2: here doc text (lines after the delimiter)
99 int Quote
; // the char after '<<'
100 bool Quoted
; // true if Quote in ('\'','"','`')
101 bool Indent
; // indented delimiter (for <<-)
102 int DelimiterLength
; // strlen(Delimiter)
103 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
110 Delimiter
= new char[HERE_DELIM_MAX
];
113 void Append(int ch
) {
114 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
115 Delimiter
[DelimiterLength
] = '\0';
123 class QuoteCls
{ // Class to manage quote pairs (simplified vs LexPerl)
146 unsigned int endPos
= startPos
+ length
;
148 // Backtrack to beginning of style if required...
149 // If in a long distance lexical state, backtrack to find quote characters
150 if (initStyle
== SCE_SH_HERE_Q
) {
151 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
154 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
155 initStyle
= styler
.StyleAt(startPos
- 1);
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
158 // Bash numbers have additional state during lexing, so backtrack too.
159 if (initStyle
== SCE_SH_STRING
160 || initStyle
== SCE_SH_BACKTICKS
161 || initStyle
== SCE_SH_CHARACTER
162 || initStyle
== SCE_SH_NUMBER
163 || initStyle
== SCE_SH_IDENTIFIER
164 || initStyle
== SCE_SH_COMMENTLINE
) {
165 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
168 initStyle
= SCE_SH_DEFAULT
;
171 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
173 for (; sc
.More(); sc
.Forward()) {
175 // Determine if the current state should terminate.
177 case SCE_SH_OPERATOR
:
178 sc
.SetState(SCE_SH_DEFAULT
);
181 // "." never used in Bash variable names but used in file names
182 if (!setWord
.Contains(sc
.ch
)) {
184 sc
.GetCurrent(s
, sizeof(s
));
185 if (s
[0] != '-' && // for file operators
186 !keywords
.InList(s
)) {
187 sc
.ChangeState(SCE_SH_IDENTIFIER
);
189 sc
.SetState(SCE_SH_DEFAULT
);
192 case SCE_SH_IDENTIFIER
:
193 if (sc
.chPrev
== '\\') { // for escaped chars
194 sc
.ForwardSetState(SCE_SH_DEFAULT
);
195 } else if (!setWord
.Contains(sc
.ch
)) {
196 sc
.SetState(SCE_SH_DEFAULT
);
200 digit
= translateBashDigit(sc
.ch
);
201 if (numBase
== BASH_BASE_DECIMAL
) {
204 sc
.GetCurrent(s
, sizeof(s
));
205 numBase
= getBashNumberBase(s
);
206 if (numBase
!= BASH_BASE_ERROR
)
208 } else if (IsADigit(sc
.ch
))
210 } else if (numBase
== BASH_BASE_HEX
) {
211 if (IsADigit(sc
.ch
, 16))
213 #ifdef PEDANTIC_OCTAL
214 } else if (numBase
== BASH_BASE_OCTAL
||
215 numBase
== BASH_BASE_OCTAL_ERROR
) {
219 numBase
= BASH_BASE_OCTAL_ERROR
;
223 } else if (numBase
== BASH_BASE_ERROR
) {
226 } else { // DD#DDDD number style handling
227 if (digit
!= BASH_BASE_ERROR
) {
229 // case-insensitive if base<=36
230 if (digit
>= 36) digit
-= 26;
235 numBase
= BASH_BASE_ERROR
;
240 // fallthrough when number is at an end or error
241 if (numBase
== BASH_BASE_ERROR
242 #ifdef PEDANTIC_OCTAL
243 || numBase
== BASH_BASE_OCTAL_ERROR
246 sc
.ChangeState(SCE_SH_ERROR
);
248 sc
.SetState(SCE_SH_DEFAULT
);
250 case SCE_SH_COMMENTLINE
:
251 if (sc
.ch
== '\\' && (sc
.chNext
== '\r' || sc
.chNext
== '\n')) {
252 // comment continuation
254 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
257 } else if (sc
.atLineEnd
) {
258 sc
.ForwardSetState(SCE_SH_DEFAULT
);
261 case SCE_SH_HERE_DELIM
:
264 // Specifier format is: <<[-]WORD
265 // Optional '-' is for removal of leading tabs from here-doc.
266 // Whitespace acceptable after <<[-] operator
268 if (HereDoc
.State
== 0) { // '<<' encountered
269 HereDoc
.Quote
= sc
.chNext
;
270 HereDoc
.Quoted
= false;
271 HereDoc
.DelimiterLength
= 0;
272 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
273 if (sc
.chNext
== '\'' || sc
.chNext
== '\"') { // a quoted here-doc delimiter (' or ")
275 HereDoc
.Quoted
= true;
277 } else if (!HereDoc
.Indent
&& sc
.chNext
== '-') { // <<- indent case
278 HereDoc
.Indent
= true;
279 } else if (setHereDoc
.Contains(sc
.chNext
)) {
280 // an unquoted here-doc delimiter, no special handling
281 // TODO check what exactly bash considers part of the delim
283 } else if (sc
.chNext
== '<') { // HERE string <<<
285 sc
.ForwardSetState(SCE_SH_DEFAULT
);
286 } else if (IsASpace(sc
.chNext
)) {
288 } else if (setLeftShift
.Contains(sc
.chNext
)) {
289 // left shift << or <<= operator cases
290 sc
.ChangeState(SCE_SH_OPERATOR
);
291 sc
.ForwardSetState(SCE_SH_DEFAULT
);
293 // symbols terminates; deprecated zero-length delimiter
296 } else if (HereDoc
.State
== 1) { // collect the delimiter
297 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
298 if (sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
299 sc
.ForwardSetState(SCE_SH_DEFAULT
);
301 if (sc
.ch
== '\\' && sc
.chNext
== HereDoc
.Quote
) { // escaped quote
304 HereDoc
.Append(sc
.ch
);
306 } else { // an unquoted here-doc delimiter
307 if (setHereDoc2
.Contains(sc
.ch
)) {
308 HereDoc
.Append(sc
.ch
);
309 } else if (sc
.ch
== '\\') {
310 // skip escape prefix
312 sc
.SetState(SCE_SH_DEFAULT
);
315 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) { // force blowup
316 sc
.SetState(SCE_SH_ERROR
);
322 // HereDoc.State == 2
323 if (sc
.atLineStart
) {
324 sc
.SetState(SCE_SH_HERE_Q
);
326 while (IsASpace(sc
.ch
) && !sc
.atLineEnd
) { // whitespace prefix
331 sc
.SetState(SCE_SH_HERE_Q
);
332 while (!sc
.atLineEnd
) {
335 char s
[HERE_DELIM_MAX
];
336 sc
.GetCurrent(s
, sizeof(s
));
337 if (strcmp(HereDoc
.Delimiter
, s
) == 0) {
338 if ((prefixws
> 0 && HereDoc
.Indent
) || // indentation rule
339 (prefixws
== 0 && !HereDoc
.Indent
)) {
340 sc
.SetState(SCE_SH_DEFAULT
);
346 case SCE_SH_SCALAR
: // variable names
347 if (!setParam
.Contains(sc
.ch
)) {
348 if (sc
.LengthCurrent() == 1) {
349 // Special variable: $(, $_ etc.
350 sc
.ForwardSetState(SCE_SH_DEFAULT
);
352 sc
.SetState(SCE_SH_DEFAULT
);
356 case SCE_SH_STRING
: // delimited styles
357 case SCE_SH_CHARACTER
:
358 case SCE_SH_BACKTICKS
:
360 if (sc
.ch
== '\\' && Quote
.Up
!= '\\') {
362 } else if (sc
.ch
== Quote
.Down
) {
364 if (Quote
.Count
== 0) {
365 sc
.ForwardSetState(SCE_SH_DEFAULT
);
367 } else if (sc
.ch
== Quote
.Up
) {
373 // Must check end of HereDoc state 1 before default state is handled
374 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
375 // Begin of here-doc (the line after the here-doc delimiter):
376 // Lexically, the here-doc starts from the next line after the >>, but the
377 // first line of here-doc seem to follow the style of the last EOL sequence
379 if (HereDoc
.Quoted
) {
380 if (sc
.state
== SCE_SH_HERE_DELIM
) {
381 // Missing quote at end of string! We are stricter than bash.
382 // Colour here-doc anyway while marking this bit as an error.
383 sc
.ChangeState(SCE_SH_ERROR
);
385 // HereDoc.Quote always == '\''
387 sc
.SetState(SCE_SH_HERE_Q
);
390 // Determine if a new state should be entered.
391 if (sc
.state
== SCE_SH_DEFAULT
) {
392 if (sc
.ch
== '\\') { // escaped character
393 sc
.SetState(SCE_SH_IDENTIFIER
);
394 } else if (IsADigit(sc
.ch
)) {
395 sc
.SetState(SCE_SH_NUMBER
);
396 numBase
= BASH_BASE_DECIMAL
;
397 if (sc
.ch
== '0') { // hex,octal
398 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
399 numBase
= BASH_BASE_HEX
;
401 } else if (IsADigit(sc
.chNext
)) {
402 #ifdef PEDANTIC_OCTAL
403 numBase
= BASH_BASE_OCTAL
;
405 numBase
= BASH_BASE_HEX
;
409 } else if (setWordStart
.Contains(sc
.ch
)) {
410 sc
.SetState(SCE_SH_WORD
);
411 } else if (sc
.ch
== '#') {
412 sc
.SetState(SCE_SH_COMMENTLINE
);
413 } else if (sc
.ch
== '\"') {
414 sc
.SetState(SCE_SH_STRING
);
416 } else if (sc
.ch
== '\'') {
417 sc
.SetState(SCE_SH_CHARACTER
);
419 } else if (sc
.ch
== '`') {
420 sc
.SetState(SCE_SH_BACKTICKS
);
422 } else if (sc
.ch
== '$') {
423 sc
.SetState(SCE_SH_SCALAR
);
426 sc
.ChangeState(SCE_SH_PARAM
);
427 } else if (sc
.ch
== '\'') {
428 sc
.ChangeState(SCE_SH_CHARACTER
);
429 } else if (sc
.ch
== '"') {
430 sc
.ChangeState(SCE_SH_STRING
);
431 } else if (sc
.ch
== '(' || sc
.ch
== '`') {
432 sc
.ChangeState(SCE_SH_BACKTICKS
);
433 if (sc
.chNext
== '(') { // $(( is lexed as operator
434 sc
.ChangeState(SCE_SH_OPERATOR
);
437 continue; // scalar has no delimiter pair
439 // fallthrough, open delim for $[{'"(`]
441 } else if (sc
.Match('<', '<')) {
442 sc
.SetState(SCE_SH_HERE_DELIM
);
444 HereDoc
.Indent
= false;
445 } else if (sc
.ch
== '-' && // one-char file test operators
446 setSingleCharOp
.Contains(sc
.chNext
) &&
447 !setWord
.Contains(sc
.GetRelative(2)) &&
448 IsASpace(sc
.chPrev
)) {
449 sc
.SetState(SCE_SH_WORD
);
451 } else if (setBashOperator
.Contains(sc
.ch
)) {
452 sc
.SetState(SCE_SH_OPERATOR
);
459 static bool IsCommentLine(int line
, Accessor
&styler
) {
460 int pos
= styler
.LineStart(line
);
461 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
462 for (int i
= pos
; i
< eol_pos
; i
++) {
466 else if (ch
!= ' ' && ch
!= '\t')
472 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
474 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
475 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
476 unsigned int endPos
= startPos
+ length
;
477 int visibleChars
= 0;
478 int lineCurrent
= styler
.GetLine(startPos
);
479 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
480 int levelCurrent
= levelPrev
;
481 char chNext
= styler
[startPos
];
482 int styleNext
= styler
.StyleAt(startPos
);
483 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
485 chNext
= styler
.SafeGetCharAt(i
+ 1);
486 int style
= styleNext
;
487 styleNext
= styler
.StyleAt(i
+ 1);
488 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
490 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
492 if (!IsCommentLine(lineCurrent
- 1, styler
)
493 && IsCommentLine(lineCurrent
+ 1, styler
))
495 else if (IsCommentLine(lineCurrent
- 1, styler
)
496 && !IsCommentLine(lineCurrent
+ 1, styler
))
499 if (style
== SCE_SH_OPERATOR
) {
502 } else if (ch
== '}') {
508 if (visibleChars
== 0 && foldCompact
)
509 lev
|= SC_FOLDLEVELWHITEFLAG
;
510 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
511 lev
|= SC_FOLDLEVELHEADERFLAG
;
512 if (lev
!= styler
.LevelAt(lineCurrent
)) {
513 styler
.SetLevel(lineCurrent
, lev
);
516 levelPrev
= levelCurrent
;
519 if (!isspacechar(ch
))
522 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
523 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
524 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
527 static const char * const bashWordListDesc
[] = {
532 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);