Merge pull request #2212 from TwlyY29/bibtex-parser
[geany-mirror.git] / scintilla / lexers / LexBash.cxx
blob45832b77d4a7614fa9dcadeb35d99b33fc18d291
1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5 // Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
15 #include <string>
16 #include <vector>
17 #include <map>
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "SubStyles.h"
31 #include "DefaultLexer.h"
33 using namespace Scintilla;
35 #define HERE_DELIM_MAX 256
37 // define this if you want 'invalid octals' to be marked as errors
38 // usually, this is not a good idea, permissive lexing is better
39 #undef PEDANTIC_OCTAL
41 #define BASH_BASE_ERROR 65
42 #define BASH_BASE_DECIMAL 66
43 #define BASH_BASE_HEX 67
44 #ifdef PEDANTIC_OCTAL
45 #define BASH_BASE_OCTAL 68
46 #define BASH_BASE_OCTAL_ERROR 69
47 #endif
49 // state constants for parts of a bash command segment
50 #define BASH_CMD_BODY 0
51 #define BASH_CMD_START 1
52 #define BASH_CMD_WORD 2
53 #define BASH_CMD_TEST 3
54 #define BASH_CMD_ARITH 4
55 #define BASH_CMD_DELIM 5
57 // state constants for nested delimiter pairs, used by
58 // SCE_SH_STRING and SCE_SH_BACKTICKS processing
59 #define BASH_DELIM_LITERAL 0
60 #define BASH_DELIM_STRING 1
61 #define BASH_DELIM_CSTRING 2
62 #define BASH_DELIM_LSTRING 3
63 #define BASH_DELIM_COMMAND 4
64 #define BASH_DELIM_BACKTICK 5
66 #define BASH_DELIM_STACK_MAX 7
68 namespace {
70 inline int translateBashDigit(int ch) {
71 if (ch >= '0' && ch <= '9') {
72 return ch - '0';
73 } else if (ch >= 'a' && ch <= 'z') {
74 return ch - 'a' + 10;
75 } else if (ch >= 'A' && ch <= 'Z') {
76 return ch - 'A' + 36;
77 } else if (ch == '@') {
78 return 62;
79 } else if (ch == '_') {
80 return 63;
82 return BASH_BASE_ERROR;
85 inline int getBashNumberBase(char *s) {
86 int i = 0;
87 int base = 0;
88 while (*s) {
89 base = base * 10 + (*s++ - '0');
90 i++;
92 if (base > 64 || i > 2) {
93 return BASH_BASE_ERROR;
95 return base;
98 int opposite(int ch) {
99 if (ch == '(') return ')';
100 if (ch == '[') return ']';
101 if (ch == '{') return '}';
102 if (ch == '<') return '>';
103 return ch;
106 int GlobScan(StyleContext &sc) {
107 // forward scan for zsh globs, disambiguate versus bash arrays
108 // complex expressions may still fail, e.g. unbalanced () '' "" etc
109 int c, sLen = 0;
110 int pCount = 0;
111 int hash = 0;
112 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
113 if (IsASpace(c)) {
114 return 0;
115 } else if (c == '\'' || c == '\"') {
116 if (hash != 2) return 0;
117 } else if (c == '#' && hash == 0) {
118 hash = (sLen == 1) ? 2:1;
119 } else if (c == '(') {
120 pCount++;
121 } else if (c == ')') {
122 if (pCount == 0) {
123 if (hash) return sLen;
124 return 0;
126 pCount--;
129 return 0;
132 bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
133 Sci_Position pos = styler.LineStart(line);
134 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
135 for (Sci_Position i = pos; i < eol_pos; i++) {
136 char ch = styler[i];
137 if (ch == '#')
138 return true;
139 else if (ch != ' ' && ch != '\t')
140 return false;
142 return false;
145 struct OptionsBash {
146 bool fold;
147 bool foldComment;
148 bool foldCompact;
150 OptionsBash() {
151 fold = false;
152 foldComment = false;
153 foldCompact = true;
157 const char * const bashWordListDesc[] = {
158 "Keywords",
162 struct OptionSetBash : public OptionSet<OptionsBash> {
163 OptionSetBash() {
164 DefineProperty("fold", &OptionsBash::fold);
166 DefineProperty("fold.comment", &OptionsBash::foldComment);
168 DefineProperty("fold.compact", &OptionsBash::foldCompact);
170 DefineWordListSets(bashWordListDesc);
174 const char styleSubable[] = { SCE_SH_IDENTIFIER, SCE_SH_SCALAR, 0 };
176 LexicalClass lexicalClasses[] = {
177 // Lexer Bash SCLEX_BASH SCE_SH_:
178 0, "SCE_SH_DEFAULT", "default", "White space",
179 1, "SCE_SH_ERROR", "error", "Error",
180 2, "SCE_SH_COMMENTLINE", "comment line", "Line comment: #",
181 3, "SCE_SH_NUMBER", "literal numeric", "Number",
182 4, "SCE_SH_WORD", "keyword", "Keyword",
183 5, "SCE_SH_STRING", "literal string", "String",
184 6, "SCE_SH_CHARACTER", "literal string", "Single quoted string",
185 7, "SCE_SH_OPERATOR", "operator", "Operators",
186 8, "SCE_SH_IDENTIFIER", "identifier", "Identifiers",
187 9, "SCE_SH_SCALAR", "identifier", "Scalar variable",
188 10, "SCE_SH_PARAM", "identifier", "Parameter",
189 11, "SCE_SH_BACKTICKS", "literal string", "Backtick quoted command",
190 12, "SCE_SH_HERE_DELIM", "operator", "Heredoc delimiter",
191 13, "SCE_SH_HERE_Q", "literal string", "Heredoc quoted string",
196 class LexerBash : public DefaultLexer {
197 WordList keywords;
198 OptionsBash options;
199 OptionSetBash osBash;
200 enum { ssIdentifier, ssScalar };
201 SubStyles subStyles;
202 public:
203 LexerBash() :
204 DefaultLexer(lexicalClasses, ELEMENTS(lexicalClasses)),
205 subStyles(styleSubable, 0x80, 0x40, 0) {
207 virtual ~LexerBash() {
209 void SCI_METHOD Release() override {
210 delete this;
212 int SCI_METHOD Version() const override {
213 return lvMetaData;
215 const char * SCI_METHOD PropertyNames() override {
216 return osBash.PropertyNames();
218 int SCI_METHOD PropertyType(const char* name) override {
219 return osBash.PropertyType(name);
221 const char * SCI_METHOD DescribeProperty(const char *name) override {
222 return osBash.DescribeProperty(name);
224 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
225 const char * SCI_METHOD DescribeWordListSets() override {
226 return osBash.DescribeWordListSets();
228 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
229 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
230 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
232 void * SCI_METHOD PrivateCall(int, void *) override {
233 return 0;
236 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
237 return subStyles.Allocate(styleBase, numberStyles);
239 int SCI_METHOD SubStylesStart(int styleBase) override {
240 return subStyles.Start(styleBase);
242 int SCI_METHOD SubStylesLength(int styleBase) override {
243 return subStyles.Length(styleBase);
245 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
246 const int styleBase = subStyles.BaseStyle(subStyle);
247 return styleBase;
249 int SCI_METHOD PrimaryStyleFromStyle(int style) override {
250 return style;
252 void SCI_METHOD FreeSubStyles() override {
253 subStyles.Free();
255 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
256 subStyles.SetIdentifiers(style, identifiers);
258 int SCI_METHOD DistanceToSecondaryStyles() override {
259 return 0;
261 const char *SCI_METHOD GetSubStyleBases() override {
262 return styleSubable;
265 static ILexer *LexerFactoryBash() {
266 return new LexerBash();
270 Sci_Position SCI_METHOD LexerBash::PropertySet(const char *key, const char *val) {
271 if (osBash.PropertySet(&options, key, val)) {
272 return 0;
274 return -1;
277 Sci_Position SCI_METHOD LexerBash::WordListSet(int n, const char *wl) {
278 WordList *wordListN = 0;
279 switch (n) {
280 case 0:
281 wordListN = &keywords;
282 break;
284 Sci_Position firstModification = -1;
285 if (wordListN) {
286 WordList wlNew;
287 wlNew.Set(wl);
288 if (*wordListN != wlNew) {
289 wordListN->Set(wl);
290 firstModification = 0;
293 return firstModification;
296 void SCI_METHOD LexerBash::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
297 WordList cmdDelimiter, bashStruct, bashStruct_in;
298 cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
299 bashStruct.Set("if elif fi while until else then do done esac eval");
300 bashStruct_in.Set("for case select");
302 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
303 // note that [+-] are often parts of identifiers in shell scripts
304 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
305 CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
306 setMetaCharacter.Add(0);
307 CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
308 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
309 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
310 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
311 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
312 CharacterSet setLeftShift(CharacterSet::setDigits, "$");
314 class HereDocCls { // Class to manage HERE document elements
315 public:
316 int State; // 0: '<<' encountered
317 // 1: collect the delimiter
318 // 2: here doc text (lines after the delimiter)
319 int Quote; // the char after '<<'
320 bool Quoted; // true if Quote in ('\'','"','`')
321 bool Indent; // indented delimiter (for <<-)
322 int DelimiterLength; // strlen(Delimiter)
323 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
324 HereDocCls() {
325 State = 0;
326 Quote = 0;
327 Quoted = false;
328 Indent = 0;
329 DelimiterLength = 0;
330 Delimiter[0] = '\0';
332 void Append(int ch) {
333 Delimiter[DelimiterLength++] = static_cast<char>(ch);
334 Delimiter[DelimiterLength] = '\0';
336 ~HereDocCls() {
339 HereDocCls HereDoc;
341 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
342 public:
343 int Count;
344 int Up, Down;
345 QuoteCls() {
346 Count = 0;
347 Up = '\0';
348 Down = '\0';
350 void Open(int u) {
351 Count++;
352 Up = u;
353 Down = opposite(Up);
355 void Start(int u) {
356 Count = 0;
357 Open(u);
360 QuoteCls Quote;
362 class QuoteStackCls { // Class to manage quote pairs that nest
363 public:
364 int Count;
365 int Up, Down;
366 int Style;
367 int Depth; // levels pushed
368 int CountStack[BASH_DELIM_STACK_MAX];
369 int UpStack [BASH_DELIM_STACK_MAX];
370 int StyleStack[BASH_DELIM_STACK_MAX];
371 QuoteStackCls() {
372 Count = 0;
373 Up = '\0';
374 Down = '\0';
375 Style = 0;
376 Depth = 0;
378 void Start(int u, int s) {
379 Count = 1;
380 Up = u;
381 Down = opposite(Up);
382 Style = s;
384 void Push(int u, int s) {
385 if (Depth >= BASH_DELIM_STACK_MAX)
386 return;
387 CountStack[Depth] = Count;
388 UpStack [Depth] = Up;
389 StyleStack[Depth] = Style;
390 Depth++;
391 Count = 1;
392 Up = u;
393 Down = opposite(Up);
394 Style = s;
396 void Pop(void) {
397 if (Depth <= 0)
398 return;
399 Depth--;
400 Count = CountStack[Depth];
401 Up = UpStack [Depth];
402 Style = StyleStack[Depth];
403 Down = opposite(Up);
405 ~QuoteStackCls() {
408 QuoteStackCls QuoteStack;
410 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_SH_IDENTIFIER);
411 const WordClassifier &classifierScalars = subStyles.Classifier(SCE_SH_SCALAR);
413 int numBase = 0;
414 int digit;
415 Sci_PositionU endPos = startPos + length;
416 int cmdState = BASH_CMD_START;
417 int testExprType = 0;
418 LexAccessor styler(pAccess);
420 // Always backtracks to the start of a line that is not a continuation
421 // of the previous line (i.e. start of a bash command segment)
422 Sci_Position ln = styler.GetLine(startPos);
423 if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
424 ln--;
425 for (;;) {
426 startPos = styler.LineStart(ln);
427 if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
428 break;
429 ln--;
431 initStyle = SCE_SH_DEFAULT;
433 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
435 for (; sc.More(); sc.Forward()) {
437 // handle line continuation, updates per-line stored state
438 if (sc.atLineStart) {
439 ln = styler.GetLine(sc.currentPos);
440 if (sc.state == SCE_SH_STRING
441 || sc.state == SCE_SH_BACKTICKS
442 || sc.state == SCE_SH_CHARACTER
443 || sc.state == SCE_SH_HERE_Q
444 || sc.state == SCE_SH_COMMENTLINE
445 || sc.state == SCE_SH_PARAM) {
446 // force backtrack while retaining cmdState
447 styler.SetLineState(ln, BASH_CMD_BODY);
448 } else {
449 if (ln > 0) {
450 if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
451 || sc.GetRelative(-2) == '\\') { // handle '\' line continuation
452 // retain last line's state
453 } else
454 cmdState = BASH_CMD_START;
456 styler.SetLineState(ln, cmdState);
460 // controls change of cmdState at the end of a non-whitespace element
461 // states BODY|TEST|ARITH persist until the end of a command segment
462 // state WORD persist, but ends with 'in' or 'do' construct keywords
463 int cmdStateNew = BASH_CMD_BODY;
464 if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
465 cmdStateNew = cmdState;
466 int stylePrev = sc.state;
468 // Determine if the current state should terminate.
469 switch (sc.state) {
470 case SCE_SH_OPERATOR:
471 sc.SetState(SCE_SH_DEFAULT);
472 if (cmdState == BASH_CMD_DELIM) // if command delimiter, start new command
473 cmdStateNew = BASH_CMD_START;
474 else if (sc.chPrev == '\\') // propagate command state if line continued
475 cmdStateNew = cmdState;
476 break;
477 case SCE_SH_WORD:
478 // "." never used in Bash variable names but used in file names
479 if (!setWord.Contains(sc.ch)) {
480 char s[500];
481 char s2[10];
482 sc.GetCurrent(s, sizeof(s));
483 int identifierStyle = SCE_SH_IDENTIFIER;
484 int subStyle = classifierIdentifiers.ValueFor(s);
485 if (subStyle >= 0) {
486 identifierStyle = subStyle;
488 // allow keywords ending in a whitespace or command delimiter
489 s2[0] = static_cast<char>(sc.ch);
490 s2[1] = '\0';
491 bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
492 // 'in' or 'do' may be construct keywords
493 if (cmdState == BASH_CMD_WORD) {
494 if (strcmp(s, "in") == 0 && keywordEnds)
495 cmdStateNew = BASH_CMD_BODY;
496 else if (strcmp(s, "do") == 0 && keywordEnds)
497 cmdStateNew = BASH_CMD_START;
498 else
499 sc.ChangeState(identifierStyle);
500 sc.SetState(SCE_SH_DEFAULT);
501 break;
503 // a 'test' keyword starts a test expression
504 if (strcmp(s, "test") == 0) {
505 if (cmdState == BASH_CMD_START && keywordEnds) {
506 cmdStateNew = BASH_CMD_TEST;
507 testExprType = 0;
508 } else
509 sc.ChangeState(identifierStyle);
511 // detect bash construct keywords
512 else if (bashStruct.InList(s)) {
513 if (cmdState == BASH_CMD_START && keywordEnds)
514 cmdStateNew = BASH_CMD_START;
515 else
516 sc.ChangeState(identifierStyle);
518 // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
519 else if (bashStruct_in.InList(s)) {
520 if (cmdState == BASH_CMD_START && keywordEnds)
521 cmdStateNew = BASH_CMD_WORD;
522 else
523 sc.ChangeState(identifierStyle);
525 // disambiguate option items and file test operators
526 else if (s[0] == '-') {
527 if (cmdState != BASH_CMD_TEST)
528 sc.ChangeState(identifierStyle);
530 // disambiguate keywords and identifiers
531 else if (cmdState != BASH_CMD_START
532 || !(keywords.InList(s) && keywordEnds)) {
533 sc.ChangeState(identifierStyle);
535 sc.SetState(SCE_SH_DEFAULT);
537 break;
538 case SCE_SH_IDENTIFIER:
539 if (sc.chPrev == '\\' || !setWord.Contains(sc.ch) ||
540 (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch))) {
541 char s[500];
542 sc.GetCurrent(s, sizeof(s));
543 int subStyle = classifierIdentifiers.ValueFor(s);
544 if (subStyle >= 0) {
545 sc.ChangeState(subStyle);
547 if (sc.chPrev == '\\') { // for escaped chars
548 sc.ForwardSetState(SCE_SH_DEFAULT);
549 } else {
550 sc.SetState(SCE_SH_DEFAULT);
553 break;
554 case SCE_SH_NUMBER:
555 digit = translateBashDigit(sc.ch);
556 if (numBase == BASH_BASE_DECIMAL) {
557 if (sc.ch == '#') {
558 char s[10];
559 sc.GetCurrent(s, sizeof(s));
560 numBase = getBashNumberBase(s);
561 if (numBase != BASH_BASE_ERROR)
562 break;
563 } else if (IsADigit(sc.ch))
564 break;
565 } else if (numBase == BASH_BASE_HEX) {
566 if (IsADigit(sc.ch, 16))
567 break;
568 #ifdef PEDANTIC_OCTAL
569 } else if (numBase == BASH_BASE_OCTAL ||
570 numBase == BASH_BASE_OCTAL_ERROR) {
571 if (digit <= 7)
572 break;
573 if (digit <= 9) {
574 numBase = BASH_BASE_OCTAL_ERROR;
575 break;
577 #endif
578 } else if (numBase == BASH_BASE_ERROR) {
579 if (digit <= 9)
580 break;
581 } else { // DD#DDDD number style handling
582 if (digit != BASH_BASE_ERROR) {
583 if (numBase <= 36) {
584 // case-insensitive if base<=36
585 if (digit >= 36) digit -= 26;
587 if (digit < numBase)
588 break;
589 if (digit <= 9) {
590 numBase = BASH_BASE_ERROR;
591 break;
595 // fallthrough when number is at an end or error
596 if (numBase == BASH_BASE_ERROR
597 #ifdef PEDANTIC_OCTAL
598 || numBase == BASH_BASE_OCTAL_ERROR
599 #endif
601 sc.ChangeState(SCE_SH_ERROR);
603 sc.SetState(SCE_SH_DEFAULT);
604 break;
605 case SCE_SH_COMMENTLINE:
606 if (sc.atLineEnd && sc.chPrev != '\\') {
607 sc.SetState(SCE_SH_DEFAULT);
609 break;
610 case SCE_SH_HERE_DELIM:
611 // From Bash info:
612 // ---------------
613 // Specifier format is: <<[-]WORD
614 // Optional '-' is for removal of leading tabs from here-doc.
615 // Whitespace acceptable after <<[-] operator
617 if (HereDoc.State == 0) { // '<<' encountered
618 HereDoc.Quote = sc.chNext;
619 HereDoc.Quoted = false;
620 HereDoc.DelimiterLength = 0;
621 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
622 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
623 sc.Forward();
624 HereDoc.Quoted = true;
625 HereDoc.State = 1;
626 } else if (setHereDoc.Contains(sc.chNext) ||
627 (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
628 // an unquoted here-doc delimiter, no special handling
629 HereDoc.State = 1;
630 } else if (sc.chNext == '<') { // HERE string <<<
631 sc.Forward();
632 sc.ForwardSetState(SCE_SH_DEFAULT);
633 } else if (IsASpace(sc.chNext)) {
634 // eat whitespace
635 } else if (setLeftShift.Contains(sc.chNext) ||
636 (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
637 // left shift <<$var or <<= cases
638 sc.ChangeState(SCE_SH_OPERATOR);
639 sc.ForwardSetState(SCE_SH_DEFAULT);
640 } else {
641 // symbols terminates; deprecated zero-length delimiter
642 HereDoc.State = 1;
644 } else if (HereDoc.State == 1) { // collect the delimiter
645 // * if single quoted, there's no escape
646 // * if double quoted, there are \\ and \" escapes
647 if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
648 (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
649 (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
650 (setHereDoc2.Contains(sc.ch))) {
651 HereDoc.Append(sc.ch);
652 } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
653 sc.ForwardSetState(SCE_SH_DEFAULT);
654 } else if (sc.ch == '\\') {
655 if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
656 // in quoted prefixes only \ and the quote eat the escape
657 HereDoc.Append(sc.ch);
658 } else {
659 // skip escape prefix
661 } else if (!HereDoc.Quoted) {
662 sc.SetState(SCE_SH_DEFAULT);
664 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
665 sc.SetState(SCE_SH_ERROR);
666 HereDoc.State = 0;
669 break;
670 case SCE_SH_HERE_Q:
671 // HereDoc.State == 2
672 if (sc.atLineStart) {
673 sc.SetState(SCE_SH_HERE_Q);
674 int prefixws = 0;
675 while (sc.ch == '\t' && !sc.atLineEnd) { // tabulation prefix
676 sc.Forward();
677 prefixws++;
679 if (prefixws > 0)
680 sc.SetState(SCE_SH_HERE_Q);
681 while (!sc.atLineEnd) {
682 sc.Forward();
684 char s[HERE_DELIM_MAX];
685 sc.GetCurrent(s, sizeof(s));
686 if (sc.LengthCurrent() == 0) { // '' or "" delimiters
687 if ((prefixws == 0 || HereDoc.Indent) &&
688 HereDoc.Quoted && HereDoc.DelimiterLength == 0)
689 sc.SetState(SCE_SH_DEFAULT);
690 break;
692 if (s[strlen(s) - 1] == '\r')
693 s[strlen(s) - 1] = '\0';
694 if (strcmp(HereDoc.Delimiter, s) == 0) {
695 if ((prefixws == 0) || // indentation rule
696 (prefixws > 0 && HereDoc.Indent)) {
697 sc.SetState(SCE_SH_DEFAULT);
698 break;
702 break;
703 case SCE_SH_SCALAR: // variable names
704 if (!setParam.Contains(sc.ch)) {
705 char s[500];
706 sc.GetCurrent(s, sizeof(s));
707 int subStyle = classifierScalars.ValueFor(&s[1]); // skip the $
708 if (subStyle >= 0) {
709 sc.ChangeState(subStyle);
711 if (sc.LengthCurrent() == 1) {
712 // Special variable: $(, $_ etc.
713 sc.ForwardSetState(SCE_SH_DEFAULT);
714 } else {
715 sc.SetState(SCE_SH_DEFAULT);
718 break;
719 case SCE_SH_STRING: // delimited styles, can nest
720 case SCE_SH_BACKTICKS:
721 if (sc.ch == '\\' && QuoteStack.Up != '\\') {
722 if (QuoteStack.Style != BASH_DELIM_LITERAL)
723 sc.Forward();
724 } else if (sc.ch == QuoteStack.Down) {
725 QuoteStack.Count--;
726 if (QuoteStack.Count == 0) {
727 if (QuoteStack.Depth > 0) {
728 QuoteStack.Pop();
729 } else
730 sc.ForwardSetState(SCE_SH_DEFAULT);
732 } else if (sc.ch == QuoteStack.Up) {
733 QuoteStack.Count++;
734 } else {
735 if (QuoteStack.Style == BASH_DELIM_STRING ||
736 QuoteStack.Style == BASH_DELIM_LSTRING
737 ) { // do nesting for "string", $"locale-string"
738 if (sc.ch == '`') {
739 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
740 } else if (sc.ch == '$' && sc.chNext == '(') {
741 sc.Forward();
742 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
744 } else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
745 QuoteStack.Style == BASH_DELIM_BACKTICK
746 ) { // do nesting for $(command), `command`
747 if (sc.ch == '\'') {
748 QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
749 } else if (sc.ch == '\"') {
750 QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
751 } else if (sc.ch == '`') {
752 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
753 } else if (sc.ch == '$') {
754 if (sc.chNext == '\'') {
755 sc.Forward();
756 QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
757 } else if (sc.chNext == '\"') {
758 sc.Forward();
759 QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
760 } else if (sc.chNext == '(') {
761 sc.Forward();
762 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
767 break;
768 case SCE_SH_PARAM: // ${parameter}
769 if (sc.ch == '\\' && Quote.Up != '\\') {
770 sc.Forward();
771 } else if (sc.ch == Quote.Down) {
772 Quote.Count--;
773 if (Quote.Count == 0) {
774 sc.ForwardSetState(SCE_SH_DEFAULT);
776 } else if (sc.ch == Quote.Up) {
777 Quote.Count++;
779 break;
780 case SCE_SH_CHARACTER: // singly-quoted strings
781 if (sc.ch == Quote.Down) {
782 Quote.Count--;
783 if (Quote.Count == 0) {
784 sc.ForwardSetState(SCE_SH_DEFAULT);
787 break;
790 // Must check end of HereDoc state 1 before default state is handled
791 if (HereDoc.State == 1 && sc.atLineEnd) {
792 // Begin of here-doc (the line after the here-doc delimiter):
793 // Lexically, the here-doc starts from the next line after the >>, but the
794 // first line of here-doc seem to follow the style of the last EOL sequence
795 HereDoc.State = 2;
796 if (HereDoc.Quoted) {
797 if (sc.state == SCE_SH_HERE_DELIM) {
798 // Missing quote at end of string! Syntax error in bash 4.3
799 // Mark this bit as an error, do not colour any here-doc
800 sc.ChangeState(SCE_SH_ERROR);
801 sc.SetState(SCE_SH_DEFAULT);
802 } else {
803 // HereDoc.Quote always == '\''
804 sc.SetState(SCE_SH_HERE_Q);
806 } else if (HereDoc.DelimiterLength == 0) {
807 // no delimiter, illegal (but '' and "" are legal)
808 sc.ChangeState(SCE_SH_ERROR);
809 sc.SetState(SCE_SH_DEFAULT);
810 } else {
811 sc.SetState(SCE_SH_HERE_Q);
815 // update cmdState about the current command segment
816 if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
817 cmdState = cmdStateNew;
819 // Determine if a new state should be entered.
820 if (sc.state == SCE_SH_DEFAULT) {
821 if (sc.ch == '\\') {
822 // Bash can escape any non-newline as a literal
823 sc.SetState(SCE_SH_IDENTIFIER);
824 if (sc.chNext == '\r' || sc.chNext == '\n')
825 sc.SetState(SCE_SH_OPERATOR);
826 } else if (IsADigit(sc.ch)) {
827 sc.SetState(SCE_SH_NUMBER);
828 numBase = BASH_BASE_DECIMAL;
829 if (sc.ch == '0') { // hex,octal
830 if (sc.chNext == 'x' || sc.chNext == 'X') {
831 numBase = BASH_BASE_HEX;
832 sc.Forward();
833 } else if (IsADigit(sc.chNext)) {
834 #ifdef PEDANTIC_OCTAL
835 numBase = BASH_BASE_OCTAL;
836 #else
837 numBase = BASH_BASE_HEX;
838 #endif
841 } else if (setWordStart.Contains(sc.ch)) {
842 sc.SetState(SCE_SH_WORD);
843 } else if (sc.ch == '#') {
844 if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
845 (sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
846 sc.SetState(SCE_SH_COMMENTLINE);
847 } else {
848 sc.SetState(SCE_SH_WORD);
850 // handle some zsh features within arithmetic expressions only
851 if (cmdState == BASH_CMD_ARITH) {
852 if (sc.chPrev == '[') { // [#8] [##8] output digit setting
853 sc.SetState(SCE_SH_WORD);
854 if (sc.chNext == '#') {
855 sc.Forward();
857 } else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) { // ##^A
858 sc.SetState(SCE_SH_IDENTIFIER);
859 sc.Forward(3);
860 } else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) { // ##a
861 sc.SetState(SCE_SH_IDENTIFIER);
862 sc.Forward(2);
863 } else if (setWordStart.Contains(sc.chNext)) { // #name
864 sc.SetState(SCE_SH_IDENTIFIER);
867 } else if (sc.ch == '\"') {
868 sc.SetState(SCE_SH_STRING);
869 QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
870 } else if (sc.ch == '\'') {
871 sc.SetState(SCE_SH_CHARACTER);
872 Quote.Start(sc.ch);
873 } else if (sc.ch == '`') {
874 sc.SetState(SCE_SH_BACKTICKS);
875 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
876 } else if (sc.ch == '$') {
877 if (sc.Match("$((")) {
878 sc.SetState(SCE_SH_OPERATOR); // handle '((' later
879 continue;
881 sc.SetState(SCE_SH_SCALAR);
882 sc.Forward();
883 if (sc.ch == '{') {
884 sc.ChangeState(SCE_SH_PARAM);
885 Quote.Start(sc.ch);
886 } else if (sc.ch == '\'') {
887 sc.ChangeState(SCE_SH_STRING);
888 QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
889 } else if (sc.ch == '"') {
890 sc.ChangeState(SCE_SH_STRING);
891 QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
892 } else if (sc.ch == '(') {
893 sc.ChangeState(SCE_SH_BACKTICKS);
894 QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
895 } else if (sc.ch == '`') { // $` seen in a configure script, valid?
896 sc.ChangeState(SCE_SH_BACKTICKS);
897 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
898 } else {
899 continue; // scalar has no delimiter pair
901 } else if (sc.Match('<', '<')) {
902 sc.SetState(SCE_SH_HERE_DELIM);
903 HereDoc.State = 0;
904 if (sc.GetRelative(2) == '-') { // <<- indent case
905 HereDoc.Indent = true;
906 sc.Forward();
907 } else {
908 HereDoc.Indent = false;
910 } else if (sc.ch == '-' && // one-char file test operators
911 setSingleCharOp.Contains(sc.chNext) &&
912 !setWord.Contains(sc.GetRelative(2)) &&
913 IsASpace(sc.chPrev)) {
914 sc.SetState(SCE_SH_WORD);
915 sc.Forward();
916 } else if (setBashOperator.Contains(sc.ch)) {
917 char s[10];
918 bool isCmdDelim = false;
919 sc.SetState(SCE_SH_OPERATOR);
920 // globs have no whitespace, do not appear in arithmetic expressions
921 if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
922 int i = GlobScan(sc);
923 if (i > 1) {
924 sc.SetState(SCE_SH_IDENTIFIER);
925 sc.Forward(i);
926 continue;
929 // handle opening delimiters for test/arithmetic expressions - ((,[[,[
930 if (cmdState == BASH_CMD_START
931 || cmdState == BASH_CMD_BODY) {
932 if (sc.Match('(', '(')) {
933 cmdState = BASH_CMD_ARITH;
934 sc.Forward();
935 } else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
936 cmdState = BASH_CMD_TEST;
937 testExprType = 1;
938 sc.Forward();
939 } else if (sc.ch == '[' && IsASpace(sc.chNext)) {
940 cmdState = BASH_CMD_TEST;
941 testExprType = 2;
944 // special state -- for ((x;y;z)) in ... looping
945 if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
946 cmdState = BASH_CMD_ARITH;
947 sc.Forward();
948 continue;
950 // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
951 if (cmdState == BASH_CMD_START
952 || cmdState == BASH_CMD_BODY
953 || cmdState == BASH_CMD_WORD
954 || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
955 s[0] = static_cast<char>(sc.ch);
956 if (setBashOperator.Contains(sc.chNext)) {
957 s[1] = static_cast<char>(sc.chNext);
958 s[2] = '\0';
959 isCmdDelim = cmdDelimiter.InList(s);
960 if (isCmdDelim)
961 sc.Forward();
963 if (!isCmdDelim) {
964 s[1] = '\0';
965 isCmdDelim = cmdDelimiter.InList(s);
967 if (isCmdDelim) {
968 cmdState = BASH_CMD_DELIM;
969 continue;
972 // handle closing delimiters for test/arithmetic expressions - )),]],]
973 if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
974 cmdState = BASH_CMD_BODY;
975 sc.Forward();
976 } else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
977 if (sc.Match(']', ']') && testExprType == 1) {
978 sc.Forward();
979 cmdState = BASH_CMD_BODY;
980 } else if (sc.ch == ']' && testExprType == 2) {
981 cmdState = BASH_CMD_BODY;
985 }// sc.state
987 sc.Complete();
988 if (sc.state == SCE_SH_HERE_Q) {
989 styler.ChangeLexerState(sc.currentPos, styler.Length());
991 sc.Complete();
994 void SCI_METHOD LexerBash::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
995 if(!options.fold)
996 return;
998 LexAccessor styler(pAccess);
1000 Sci_PositionU endPos = startPos + length;
1001 int visibleChars = 0;
1002 int skipHereCh = 0;
1003 Sci_Position lineCurrent = styler.GetLine(startPos);
1004 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1005 int levelCurrent = levelPrev;
1006 char chNext = styler[startPos];
1007 int styleNext = styler.StyleAt(startPos);
1008 char word[8] = { '\0' }; // we're not interested in long words anyway
1009 unsigned int wordlen = 0;
1010 for (Sci_PositionU i = startPos; i < endPos; i++) {
1011 char ch = chNext;
1012 chNext = styler.SafeGetCharAt(i + 1);
1013 int style = styleNext;
1014 styleNext = styler.StyleAt(i + 1);
1015 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1016 // Comment folding
1017 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1019 if (!IsCommentLine(lineCurrent - 1, styler)
1020 && IsCommentLine(lineCurrent + 1, styler))
1021 levelCurrent++;
1022 else if (IsCommentLine(lineCurrent - 1, styler)
1023 && !IsCommentLine(lineCurrent + 1, styler))
1024 levelCurrent--;
1026 if (style == SCE_SH_WORD) {
1027 if ((wordlen + 1) < sizeof(word))
1028 word[wordlen++] = ch;
1029 if (styleNext != style) {
1030 word[wordlen] = '\0';
1031 wordlen = 0;
1032 if (strcmp(word, "if") == 0 || strcmp(word, "case") == 0 || strcmp(word, "do") == 0) {
1033 levelCurrent++;
1034 } else if (strcmp(word, "fi") == 0 || strcmp(word, "esac") == 0 || strcmp(word, "done") == 0) {
1035 levelCurrent--;
1039 if (style == SCE_SH_OPERATOR) {
1040 if (ch == '{') {
1041 levelCurrent++;
1042 } else if (ch == '}') {
1043 levelCurrent--;
1046 // Here Document folding
1047 if (style == SCE_SH_HERE_DELIM) {
1048 if (ch == '<' && chNext == '<') {
1049 if (styler.SafeGetCharAt(i + 2) == '<') {
1050 skipHereCh = 1;
1051 } else {
1052 if (skipHereCh == 0) {
1053 levelCurrent++;
1054 } else {
1055 skipHereCh = 0;
1059 } else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
1060 levelCurrent--;
1062 if (atEOL) {
1063 int lev = levelPrev;
1064 if (visibleChars == 0 && options.foldCompact)
1065 lev |= SC_FOLDLEVELWHITEFLAG;
1066 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1067 lev |= SC_FOLDLEVELHEADERFLAG;
1068 if (lev != styler.LevelAt(lineCurrent)) {
1069 styler.SetLevel(lineCurrent, lev);
1071 lineCurrent++;
1072 levelPrev = levelCurrent;
1073 visibleChars = 0;
1075 if (!isspacechar(ch))
1076 visibleChars++;
1078 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1079 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1080 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1083 LexerModule lmBash(SCLEX_BASH, LexerBash::LexerFactoryBash, "bash", bashWordListDesc);