1 // Scintilla source code edit control
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
19 #include "StyleContext.h"
21 #include "Scintilla.h"
23 #include "CharacterSet.h"
26 using namespace Scintilla
;
29 #define HERE_DELIM_MAX 256
31 // define this if you want 'invalid octals' to be marked as errors
32 // usually, this is not a good idea, permissive lexing is better
35 #define BASH_BASE_ERROR 65
36 #define BASH_BASE_DECIMAL 66
37 #define BASH_BASE_HEX 67
39 #define BASH_BASE_OCTAL 68
40 #define BASH_BASE_OCTAL_ERROR 69
43 static inline int translateBashDigit(int ch
) {
44 if (ch
>= '0' && ch
<= '9') {
46 } else if (ch
>= 'a' && ch
<= 'z') {
48 } else if (ch
>= 'A' && ch
<= 'Z') {
50 } else if (ch
== '@') {
52 } else if (ch
== '_') {
55 return BASH_BASE_ERROR
;
58 static inline int getBashNumberBase(char *s
) {
62 base
= base
* 10 + (*s
++ - '0');
65 if (base
> 64 || i
> 2) {
66 return BASH_BASE_ERROR
;
71 static int opposite(int ch
) {
72 if (ch
== '(') return ')';
73 if (ch
== '[') return ']';
74 if (ch
== '{') return '}';
75 if (ch
== '<') return '>';
79 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
80 WordList
*keywordlists
[], Accessor
&styler
) {
82 WordList
&keywords
= *keywordlists
[0];
84 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_");
85 // note that [+-] are often parts of identifiers in shell scripts
86 CharacterSet
setWord(CharacterSet::setAlphaNum
, "._+-");
87 CharacterSet
setBashOperator(CharacterSet::setNone
, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
88 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
89 CharacterSet
setParam(CharacterSet::setAlphaNum
, "$_");
90 CharacterSet
setHereDoc(CharacterSet::setAlpha
, "_\\-+!");
91 CharacterSet
setHereDoc2(CharacterSet::setAlphaNum
, "_-+!");
92 CharacterSet
setLeftShift(CharacterSet::setDigits
, "=$");
94 class HereDocCls
{ // Class to manage HERE document elements
96 int State
; // 0: '<<' encountered
97 // 1: collect the delimiter
98 // 2: here doc text (lines after the delimiter)
99 int Quote
; // the char after '<<'
100 bool Quoted
; // true if Quote in ('\'','"','`')
101 bool Indent
; // indented delimiter (for <<-)
102 int DelimiterLength
; // strlen(Delimiter)
103 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
110 Delimiter
= new char[HERE_DELIM_MAX
];
113 void Append(int ch
) {
114 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
115 Delimiter
[DelimiterLength
] = '\0';
123 class QuoteCls
{ // Class to manage quote pairs (simplified vs LexPerl)
146 unsigned int endPos
= startPos
+ length
;
148 // Backtrack to beginning of style if required...
149 // If in a long distance lexical state, backtrack to find quote characters
150 if (initStyle
== SCE_SH_HERE_Q
) {
151 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
154 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
155 initStyle
= styler
.StyleAt(startPos
- 1);
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
158 // Bash numbers have additional state during lexing, so backtrack too.
159 if (initStyle
== SCE_SH_STRING
160 || initStyle
== SCE_SH_BACKTICKS
161 || initStyle
== SCE_SH_CHARACTER
162 || initStyle
== SCE_SH_NUMBER
163 || initStyle
== SCE_SH_IDENTIFIER
164 || initStyle
== SCE_SH_COMMENTLINE
) {
165 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
168 initStyle
= SCE_SH_DEFAULT
;
171 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
173 for (; sc
.More(); sc
.Forward()) {
175 // Determine if the current state should terminate.
177 case SCE_SH_OPERATOR
:
178 sc
.SetState(SCE_SH_DEFAULT
);
181 // "." never used in Bash variable names but used in file names
182 if (!setWord
.Contains(sc
.ch
)) {
184 sc
.GetCurrent(s
, sizeof(s
));
185 if (s
[0] != '-' && // for file operators
186 !keywords
.InList(s
)) {
187 sc
.ChangeState(SCE_SH_IDENTIFIER
);
189 sc
.SetState(SCE_SH_DEFAULT
);
192 case SCE_SH_IDENTIFIER
:
193 if (sc
.chPrev
== '\\') { // for escaped chars
194 sc
.ForwardSetState(SCE_SH_DEFAULT
);
195 } else if (!setWord
.Contains(sc
.ch
)) {
196 sc
.SetState(SCE_SH_DEFAULT
);
200 digit
= translateBashDigit(sc
.ch
);
201 if (numBase
== BASH_BASE_DECIMAL
) {
204 sc
.GetCurrent(s
, sizeof(s
));
205 numBase
= getBashNumberBase(s
);
206 if (numBase
!= BASH_BASE_ERROR
)
208 } else if (IsADigit(sc
.ch
))
210 } else if (numBase
== BASH_BASE_HEX
) {
211 if (IsADigit(sc
.ch
, 16))
213 #ifdef PEDANTIC_OCTAL
214 } else if (numBase
== BASH_BASE_OCTAL
||
215 numBase
== BASH_BASE_OCTAL_ERROR
) {
219 numBase
= BASH_BASE_OCTAL_ERROR
;
223 } else if (numBase
== BASH_BASE_ERROR
) {
226 } else { // DD#DDDD number style handling
227 if (digit
!= BASH_BASE_ERROR
) {
229 // case-insensitive if base<=36
230 if (digit
>= 36) digit
-= 26;
235 numBase
= BASH_BASE_ERROR
;
240 // fallthrough when number is at an end or error
241 if (numBase
== BASH_BASE_ERROR
242 #ifdef PEDANTIC_OCTAL
243 || numBase
== BASH_BASE_OCTAL_ERROR
246 sc
.ChangeState(SCE_SH_ERROR
);
248 sc
.SetState(SCE_SH_DEFAULT
);
250 case SCE_SH_COMMENTLINE
:
251 if (sc
.atLineEnd
&& sc
.chPrev
!= '\\') {
252 sc
.SetState(SCE_SH_DEFAULT
);
255 case SCE_SH_HERE_DELIM
:
258 // Specifier format is: <<[-]WORD
259 // Optional '-' is for removal of leading tabs from here-doc.
260 // Whitespace acceptable after <<[-] operator
262 if (HereDoc
.State
== 0) { // '<<' encountered
263 HereDoc
.Quote
= sc
.chNext
;
264 HereDoc
.Quoted
= false;
265 HereDoc
.DelimiterLength
= 0;
266 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
267 if (sc
.chNext
== '\'' || sc
.chNext
== '\"') { // a quoted here-doc delimiter (' or ")
269 HereDoc
.Quoted
= true;
271 } else if (!HereDoc
.Indent
&& sc
.chNext
== '-') { // <<- indent case
272 HereDoc
.Indent
= true;
273 } else if (setHereDoc
.Contains(sc
.chNext
)) {
274 // an unquoted here-doc delimiter, no special handling
275 // TODO check what exactly bash considers part of the delim
277 } else if (sc
.chNext
== '<') { // HERE string <<<
279 sc
.ForwardSetState(SCE_SH_DEFAULT
);
280 } else if (IsASpace(sc
.chNext
)) {
282 } else if (setLeftShift
.Contains(sc
.chNext
)) {
283 // left shift << or <<= operator cases
284 sc
.ChangeState(SCE_SH_OPERATOR
);
285 sc
.ForwardSetState(SCE_SH_DEFAULT
);
287 // symbols terminates; deprecated zero-length delimiter
290 } else if (HereDoc
.State
== 1) { // collect the delimiter
291 if (setHereDoc2
.Contains(sc
.ch
) || sc
.chPrev
== '\\') {
292 HereDoc
.Append(sc
.ch
);
293 } else if (HereDoc
.Quoted
&& sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
294 sc
.ForwardSetState(SCE_SH_DEFAULT
);
295 } else if (sc
.ch
== '\\') {
296 // skip escape prefix
298 sc
.SetState(SCE_SH_DEFAULT
);
300 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) { // force blowup
301 sc
.SetState(SCE_SH_ERROR
);
307 // HereDoc.State == 2
308 if (sc
.atLineStart
) {
309 sc
.SetState(SCE_SH_HERE_Q
);
311 while (IsASpace(sc
.ch
) && !sc
.atLineEnd
) { // whitespace prefix
316 sc
.SetState(SCE_SH_HERE_Q
);
317 while (!sc
.atLineEnd
) {
320 char s
[HERE_DELIM_MAX
];
321 sc
.GetCurrent(s
, sizeof(s
));
322 if (sc
.LengthCurrent() == 0)
324 if (s
[strlen(s
) - 1] == '\r')
325 s
[strlen(s
) - 1] = '\0';
326 if (strcmp(HereDoc
.Delimiter
, s
) == 0) {
327 if ((prefixws
> 0 && HereDoc
.Indent
) || // indentation rule
328 (prefixws
== 0 && !HereDoc
.Indent
)) {
329 sc
.SetState(SCE_SH_DEFAULT
);
335 case SCE_SH_SCALAR
: // variable names
336 if (!setParam
.Contains(sc
.ch
)) {
337 if (sc
.LengthCurrent() == 1) {
338 // Special variable: $(, $_ etc.
339 sc
.ForwardSetState(SCE_SH_DEFAULT
);
341 sc
.SetState(SCE_SH_DEFAULT
);
345 case SCE_SH_STRING
: // delimited styles
346 case SCE_SH_CHARACTER
:
347 case SCE_SH_BACKTICKS
:
349 if (sc
.ch
== '\\' && Quote
.Up
!= '\\') {
351 } else if (sc
.ch
== Quote
.Down
) {
353 if (Quote
.Count
== 0) {
354 sc
.ForwardSetState(SCE_SH_DEFAULT
);
356 } else if (sc
.ch
== Quote
.Up
) {
362 // Must check end of HereDoc state 1 before default state is handled
363 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
364 // Begin of here-doc (the line after the here-doc delimiter):
365 // Lexically, the here-doc starts from the next line after the >>, but the
366 // first line of here-doc seem to follow the style of the last EOL sequence
368 if (HereDoc
.Quoted
) {
369 if (sc
.state
== SCE_SH_HERE_DELIM
) {
370 // Missing quote at end of string! We are stricter than bash.
371 // Colour here-doc anyway while marking this bit as an error.
372 sc
.ChangeState(SCE_SH_ERROR
);
374 // HereDoc.Quote always == '\''
376 sc
.SetState(SCE_SH_HERE_Q
);
379 // Determine if a new state should be entered.
380 if (sc
.state
== SCE_SH_DEFAULT
) {
381 if (sc
.ch
== '\\') { // escaped character
382 sc
.SetState(SCE_SH_IDENTIFIER
);
383 } else if (IsADigit(sc
.ch
)) {
384 sc
.SetState(SCE_SH_NUMBER
);
385 numBase
= BASH_BASE_DECIMAL
;
386 if (sc
.ch
== '0') { // hex,octal
387 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
388 numBase
= BASH_BASE_HEX
;
390 } else if (IsADigit(sc
.chNext
)) {
391 #ifdef PEDANTIC_OCTAL
392 numBase
= BASH_BASE_OCTAL
;
394 numBase
= BASH_BASE_HEX
;
398 } else if (setWordStart
.Contains(sc
.ch
)) {
399 sc
.SetState(SCE_SH_WORD
);
400 } else if (sc
.ch
== '#') {
401 sc
.SetState(SCE_SH_COMMENTLINE
);
402 } else if (sc
.ch
== '\"') {
403 sc
.SetState(SCE_SH_STRING
);
405 } else if (sc
.ch
== '\'') {
406 sc
.SetState(SCE_SH_CHARACTER
);
408 } else if (sc
.ch
== '`') {
409 sc
.SetState(SCE_SH_BACKTICKS
);
411 } else if (sc
.ch
== '$') {
412 sc
.SetState(SCE_SH_SCALAR
);
415 sc
.ChangeState(SCE_SH_PARAM
);
416 } else if (sc
.ch
== '\'') {
417 sc
.ChangeState(SCE_SH_CHARACTER
);
418 } else if (sc
.ch
== '"') {
419 sc
.ChangeState(SCE_SH_STRING
);
420 } else if (sc
.ch
== '(' || sc
.ch
== '`') {
421 sc
.ChangeState(SCE_SH_BACKTICKS
);
422 if (sc
.chNext
== '(') { // $(( is lexed as operator
423 sc
.ChangeState(SCE_SH_OPERATOR
);
426 continue; // scalar has no delimiter pair
428 // fallthrough, open delim for $[{'"(`]
430 } else if (sc
.Match('<', '<')) {
431 sc
.SetState(SCE_SH_HERE_DELIM
);
433 HereDoc
.Indent
= false;
434 } else if (sc
.ch
== '-' && // one-char file test operators
435 setSingleCharOp
.Contains(sc
.chNext
) &&
436 !setWord
.Contains(sc
.GetRelative(2)) &&
437 IsASpace(sc
.chPrev
)) {
438 sc
.SetState(SCE_SH_WORD
);
440 } else if (setBashOperator
.Contains(sc
.ch
)) {
441 sc
.SetState(SCE_SH_OPERATOR
);
448 static bool IsCommentLine(int line
, Accessor
&styler
) {
449 int pos
= styler
.LineStart(line
);
450 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
451 for (int i
= pos
; i
< eol_pos
; i
++) {
455 else if (ch
!= ' ' && ch
!= '\t')
461 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
463 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
464 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
465 unsigned int endPos
= startPos
+ length
;
466 int visibleChars
= 0;
467 int lineCurrent
= styler
.GetLine(startPos
);
468 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
469 int levelCurrent
= levelPrev
;
470 char chNext
= styler
[startPos
];
471 int styleNext
= styler
.StyleAt(startPos
);
472 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
474 chNext
= styler
.SafeGetCharAt(i
+ 1);
475 int style
= styleNext
;
476 styleNext
= styler
.StyleAt(i
+ 1);
477 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
479 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
481 if (!IsCommentLine(lineCurrent
- 1, styler
)
482 && IsCommentLine(lineCurrent
+ 1, styler
))
484 else if (IsCommentLine(lineCurrent
- 1, styler
)
485 && !IsCommentLine(lineCurrent
+ 1, styler
))
488 if (style
== SCE_SH_OPERATOR
) {
491 } else if (ch
== '}') {
497 if (visibleChars
== 0 && foldCompact
)
498 lev
|= SC_FOLDLEVELWHITEFLAG
;
499 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
500 lev
|= SC_FOLDLEVELHEADERFLAG
;
501 if (lev
!= styler
.LevelAt(lineCurrent
)) {
502 styler
.SetLevel(lineCurrent
, lev
);
505 levelPrev
= levelCurrent
;
508 if (!isspacechar(ch
))
511 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
512 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
513 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
516 static const char * const bashWordListDesc
[] = {
521 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);