Set release date.
[geany-mirror.git] / scintilla / LexBash.cxx
blob1f97e4829644e1f448ef7e46046d2fbce5ad9556
1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
15 #include "Platform.h"
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "StyleContext.h"
20 #include "KeyWords.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 #include "CharacterSet.h"
25 #ifdef SCI_NAMESPACE
26 using namespace Scintilla;
27 #endif
29 #define HERE_DELIM_MAX 256
31 // define this if you want 'invalid octals' to be marked as errors
32 // usually, this is not a good idea, permissive lexing is better
33 #undef PEDANTIC_OCTAL
35 #define BASH_BASE_ERROR 65
36 #define BASH_BASE_DECIMAL 66
37 #define BASH_BASE_HEX 67
38 #ifdef PEDANTIC_OCTAL
39 #define BASH_BASE_OCTAL 68
40 #define BASH_BASE_OCTAL_ERROR 69
41 #endif
43 static inline int translateBashDigit(int ch) {
44 if (ch >= '0' && ch <= '9') {
45 return ch - '0';
46 } else if (ch >= 'a' && ch <= 'z') {
47 return ch - 'a' + 10;
48 } else if (ch >= 'A' && ch <= 'Z') {
49 return ch - 'A' + 36;
50 } else if (ch == '@') {
51 return 62;
52 } else if (ch == '_') {
53 return 63;
55 return BASH_BASE_ERROR;
58 static inline int getBashNumberBase(char *s) {
59 int i = 0;
60 int base = 0;
61 while (*s) {
62 base = base * 10 + (*s++ - '0');
63 i++;
65 if (base > 64 || i > 2) {
66 return BASH_BASE_ERROR;
68 return base;
71 static int opposite(int ch) {
72 if (ch == '(') return ')';
73 if (ch == '[') return ']';
74 if (ch == '{') return '}';
75 if (ch == '<') return '>';
76 return ch;
79 static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
80 WordList *keywordlists[], Accessor &styler) {
82 WordList &keywords = *keywordlists[0];
84 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
85 // note that [+-] are often parts of identifiers in shell scripts
86 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
87 CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
88 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
89 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
90 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!");
91 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!");
92 CharacterSet setLeftShift(CharacterSet::setDigits, "=$");
94 class HereDocCls { // Class to manage HERE document elements
95 public:
96 int State; // 0: '<<' encountered
97 // 1: collect the delimiter
98 // 2: here doc text (lines after the delimiter)
99 int Quote; // the char after '<<'
100 bool Quoted; // true if Quote in ('\'','"','`')
101 bool Indent; // indented delimiter (for <<-)
102 int DelimiterLength; // strlen(Delimiter)
103 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
104 HereDocCls() {
105 State = 0;
106 Quote = 0;
107 Quoted = false;
108 Indent = 0;
109 DelimiterLength = 0;
110 Delimiter = new char[HERE_DELIM_MAX];
111 Delimiter[0] = '\0';
113 void Append(int ch) {
114 Delimiter[DelimiterLength++] = static_cast<char>(ch);
115 Delimiter[DelimiterLength] = '\0';
117 ~HereDocCls() {
118 delete []Delimiter;
121 HereDocCls HereDoc;
123 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
124 public:
125 int Count;
126 int Up, Down;
127 QuoteCls() {
128 Count = 0;
129 Up = '\0';
130 Down = '\0';
132 void Open(int u) {
133 Count++;
134 Up = u;
135 Down = opposite(Up);
137 void Start(int u) {
138 Count = 0;
139 Open(u);
142 QuoteCls Quote;
144 int numBase = 0;
145 int digit;
146 unsigned int endPos = startPos + length;
148 // Backtrack to beginning of style if required...
149 // If in a long distance lexical state, backtrack to find quote characters
150 if (initStyle == SCE_SH_HERE_Q) {
151 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
152 startPos--;
154 startPos = styler.LineStart(styler.GetLine(startPos));
155 initStyle = styler.StyleAt(startPos - 1);
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
158 // Bash numbers have additional state during lexing, so backtrack too.
159 if (initStyle == SCE_SH_STRING
160 || initStyle == SCE_SH_BACKTICKS
161 || initStyle == SCE_SH_CHARACTER
162 || initStyle == SCE_SH_NUMBER
163 || initStyle == SCE_SH_IDENTIFIER
164 || initStyle == SCE_SH_COMMENTLINE) {
165 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
166 startPos--;
168 initStyle = SCE_SH_DEFAULT;
171 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
173 for (; sc.More(); sc.Forward()) {
175 // Determine if the current state should terminate.
176 switch (sc.state) {
177 case SCE_SH_OPERATOR:
178 sc.SetState(SCE_SH_DEFAULT);
179 break;
180 case SCE_SH_WORD:
181 // "." never used in Bash variable names but used in file names
182 if (!setWord.Contains(sc.ch)) {
183 char s[1000];
184 sc.GetCurrent(s, sizeof(s));
185 if (s[0] != '-' && // for file operators
186 !keywords.InList(s)) {
187 sc.ChangeState(SCE_SH_IDENTIFIER);
189 sc.SetState(SCE_SH_DEFAULT);
191 break;
192 case SCE_SH_IDENTIFIER:
193 if (sc.chPrev == '\\') { // for escaped chars
194 sc.ForwardSetState(SCE_SH_DEFAULT);
195 } else if (!setWord.Contains(sc.ch)) {
196 sc.SetState(SCE_SH_DEFAULT);
198 break;
199 case SCE_SH_NUMBER:
200 digit = translateBashDigit(sc.ch);
201 if (numBase == BASH_BASE_DECIMAL) {
202 if (sc.ch == '#') {
203 char s[10];
204 sc.GetCurrent(s, sizeof(s));
205 numBase = getBashNumberBase(s);
206 if (numBase != BASH_BASE_ERROR)
207 break;
208 } else if (IsADigit(sc.ch))
209 break;
210 } else if (numBase == BASH_BASE_HEX) {
211 if (IsADigit(sc.ch, 16))
212 break;
213 #ifdef PEDANTIC_OCTAL
214 } else if (numBase == BASH_BASE_OCTAL ||
215 numBase == BASH_BASE_OCTAL_ERROR) {
216 if (digit <= 7)
217 break;
218 if (digit <= 9) {
219 numBase = BASH_BASE_OCTAL_ERROR;
220 break;
222 #endif
223 } else if (numBase == BASH_BASE_ERROR) {
224 if (digit <= 9)
225 break;
226 } else { // DD#DDDD number style handling
227 if (digit != BASH_BASE_ERROR) {
228 if (numBase <= 36) {
229 // case-insensitive if base<=36
230 if (digit >= 36) digit -= 26;
232 if (digit < numBase)
233 break;
234 if (digit <= 9) {
235 numBase = BASH_BASE_ERROR;
236 break;
240 // fallthrough when number is at an end or error
241 if (numBase == BASH_BASE_ERROR
242 #ifdef PEDANTIC_OCTAL
243 || numBase == BASH_BASE_OCTAL_ERROR
244 #endif
246 sc.ChangeState(SCE_SH_ERROR);
248 sc.SetState(SCE_SH_DEFAULT);
249 break;
250 case SCE_SH_COMMENTLINE:
251 if (sc.atLineEnd && sc.chPrev != '\\') {
252 sc.SetState(SCE_SH_DEFAULT);
254 break;
255 case SCE_SH_HERE_DELIM:
256 // From Bash info:
257 // ---------------
258 // Specifier format is: <<[-]WORD
259 // Optional '-' is for removal of leading tabs from here-doc.
260 // Whitespace acceptable after <<[-] operator
262 if (HereDoc.State == 0) { // '<<' encountered
263 HereDoc.Quote = sc.chNext;
264 HereDoc.Quoted = false;
265 HereDoc.DelimiterLength = 0;
266 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
267 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
268 sc.Forward();
269 HereDoc.Quoted = true;
270 HereDoc.State = 1;
271 } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case
272 HereDoc.Indent = true;
273 } else if (setHereDoc.Contains(sc.chNext)) {
274 // an unquoted here-doc delimiter, no special handling
275 // TODO check what exactly bash considers part of the delim
276 HereDoc.State = 1;
277 } else if (sc.chNext == '<') { // HERE string <<<
278 sc.Forward();
279 sc.ForwardSetState(SCE_SH_DEFAULT);
280 } else if (IsASpace(sc.chNext)) {
281 // eat whitespace
282 } else if (setLeftShift.Contains(sc.chNext)) {
283 // left shift << or <<= operator cases
284 sc.ChangeState(SCE_SH_OPERATOR);
285 sc.ForwardSetState(SCE_SH_DEFAULT);
286 } else {
287 // symbols terminates; deprecated zero-length delimiter
288 HereDoc.State = 1;
290 } else if (HereDoc.State == 1) { // collect the delimiter
291 if (setHereDoc2.Contains(sc.ch) || sc.chPrev == '\\') {
292 HereDoc.Append(sc.ch);
293 } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
294 sc.ForwardSetState(SCE_SH_DEFAULT);
295 } else if (sc.ch == '\\') {
296 // skip escape prefix
297 } else {
298 sc.SetState(SCE_SH_DEFAULT);
300 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
301 sc.SetState(SCE_SH_ERROR);
302 HereDoc.State = 0;
305 break;
306 case SCE_SH_HERE_Q:
307 // HereDoc.State == 2
308 if (sc.atLineStart) {
309 sc.SetState(SCE_SH_HERE_Q);
310 int prefixws = 0;
311 while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix
312 sc.Forward();
313 prefixws++;
315 if (prefixws > 0)
316 sc.SetState(SCE_SH_HERE_Q);
317 while (!sc.atLineEnd) {
318 sc.Forward();
320 char s[HERE_DELIM_MAX];
321 sc.GetCurrent(s, sizeof(s));
322 if (sc.LengthCurrent() == 0)
323 break;
324 if (s[strlen(s) - 1] == '\r')
325 s[strlen(s) - 1] = '\0';
326 if (strcmp(HereDoc.Delimiter, s) == 0) {
327 if ((prefixws > 0 && HereDoc.Indent) || // indentation rule
328 (prefixws == 0 && !HereDoc.Indent)) {
329 sc.SetState(SCE_SH_DEFAULT);
330 break;
334 break;
335 case SCE_SH_SCALAR: // variable names
336 if (!setParam.Contains(sc.ch)) {
337 if (sc.LengthCurrent() == 1) {
338 // Special variable: $(, $_ etc.
339 sc.ForwardSetState(SCE_SH_DEFAULT);
340 } else {
341 sc.SetState(SCE_SH_DEFAULT);
344 break;
345 case SCE_SH_STRING: // delimited styles
346 case SCE_SH_CHARACTER:
347 case SCE_SH_BACKTICKS:
348 case SCE_SH_PARAM:
349 if (sc.ch == '\\' && Quote.Up != '\\') {
350 sc.Forward();
351 } else if (sc.ch == Quote.Down) {
352 Quote.Count--;
353 if (Quote.Count == 0) {
354 sc.ForwardSetState(SCE_SH_DEFAULT);
356 } else if (sc.ch == Quote.Up) {
357 Quote.Count++;
359 break;
362 // Must check end of HereDoc state 1 before default state is handled
363 if (HereDoc.State == 1 && sc.atLineEnd) {
364 // Begin of here-doc (the line after the here-doc delimiter):
365 // Lexically, the here-doc starts from the next line after the >>, but the
366 // first line of here-doc seem to follow the style of the last EOL sequence
367 HereDoc.State = 2;
368 if (HereDoc.Quoted) {
369 if (sc.state == SCE_SH_HERE_DELIM) {
370 // Missing quote at end of string! We are stricter than bash.
371 // Colour here-doc anyway while marking this bit as an error.
372 sc.ChangeState(SCE_SH_ERROR);
374 // HereDoc.Quote always == '\''
376 sc.SetState(SCE_SH_HERE_Q);
379 // Determine if a new state should be entered.
380 if (sc.state == SCE_SH_DEFAULT) {
381 if (sc.ch == '\\') { // escaped character
382 sc.SetState(SCE_SH_IDENTIFIER);
383 } else if (IsADigit(sc.ch)) {
384 sc.SetState(SCE_SH_NUMBER);
385 numBase = BASH_BASE_DECIMAL;
386 if (sc.ch == '0') { // hex,octal
387 if (sc.chNext == 'x' || sc.chNext == 'X') {
388 numBase = BASH_BASE_HEX;
389 sc.Forward();
390 } else if (IsADigit(sc.chNext)) {
391 #ifdef PEDANTIC_OCTAL
392 numBase = BASH_BASE_OCTAL;
393 #else
394 numBase = BASH_BASE_HEX;
395 #endif
398 } else if (setWordStart.Contains(sc.ch)) {
399 sc.SetState(SCE_SH_WORD);
400 } else if (sc.ch == '#') {
401 sc.SetState(SCE_SH_COMMENTLINE);
402 } else if (sc.ch == '\"') {
403 sc.SetState(SCE_SH_STRING);
404 Quote.Start(sc.ch);
405 } else if (sc.ch == '\'') {
406 sc.SetState(SCE_SH_CHARACTER);
407 Quote.Start(sc.ch);
408 } else if (sc.ch == '`') {
409 sc.SetState(SCE_SH_BACKTICKS);
410 Quote.Start(sc.ch);
411 } else if (sc.ch == '$') {
412 sc.SetState(SCE_SH_SCALAR);
413 sc.Forward();
414 if (sc.ch == '{') {
415 sc.ChangeState(SCE_SH_PARAM);
416 } else if (sc.ch == '\'') {
417 sc.ChangeState(SCE_SH_CHARACTER);
418 } else if (sc.ch == '"') {
419 sc.ChangeState(SCE_SH_STRING);
420 } else if (sc.ch == '(' || sc.ch == '`') {
421 sc.ChangeState(SCE_SH_BACKTICKS);
422 if (sc.chNext == '(') { // $(( is lexed as operator
423 sc.ChangeState(SCE_SH_OPERATOR);
425 } else {
426 continue; // scalar has no delimiter pair
428 // fallthrough, open delim for $[{'"(`]
429 Quote.Start(sc.ch);
430 } else if (sc.Match('<', '<')) {
431 sc.SetState(SCE_SH_HERE_DELIM);
432 HereDoc.State = 0;
433 HereDoc.Indent = false;
434 } else if (sc.ch == '-' && // one-char file test operators
435 setSingleCharOp.Contains(sc.chNext) &&
436 !setWord.Contains(sc.GetRelative(2)) &&
437 IsASpace(sc.chPrev)) {
438 sc.SetState(SCE_SH_WORD);
439 sc.Forward();
440 } else if (setBashOperator.Contains(sc.ch)) {
441 sc.SetState(SCE_SH_OPERATOR);
445 sc.Complete();
448 static bool IsCommentLine(int line, Accessor &styler) {
449 int pos = styler.LineStart(line);
450 int eol_pos = styler.LineStart(line + 1) - 1;
451 for (int i = pos; i < eol_pos; i++) {
452 char ch = styler[i];
453 if (ch == '#')
454 return true;
455 else if (ch != ' ' && ch != '\t')
456 return false;
458 return false;
461 static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
462 Accessor &styler) {
463 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
464 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
465 unsigned int endPos = startPos + length;
466 int visibleChars = 0;
467 int lineCurrent = styler.GetLine(startPos);
468 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
469 int levelCurrent = levelPrev;
470 char chNext = styler[startPos];
471 int styleNext = styler.StyleAt(startPos);
472 for (unsigned int i = startPos; i < endPos; i++) {
473 char ch = chNext;
474 chNext = styler.SafeGetCharAt(i + 1);
475 int style = styleNext;
476 styleNext = styler.StyleAt(i + 1);
477 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
478 // Comment folding
479 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
481 if (!IsCommentLine(lineCurrent - 1, styler)
482 && IsCommentLine(lineCurrent + 1, styler))
483 levelCurrent++;
484 else if (IsCommentLine(lineCurrent - 1, styler)
485 && !IsCommentLine(lineCurrent + 1, styler))
486 levelCurrent--;
488 if (style == SCE_SH_OPERATOR) {
489 if (ch == '{') {
490 levelCurrent++;
491 } else if (ch == '}') {
492 levelCurrent--;
495 if (atEOL) {
496 int lev = levelPrev;
497 if (visibleChars == 0 && foldCompact)
498 lev |= SC_FOLDLEVELWHITEFLAG;
499 if ((levelCurrent > levelPrev) && (visibleChars > 0))
500 lev |= SC_FOLDLEVELHEADERFLAG;
501 if (lev != styler.LevelAt(lineCurrent)) {
502 styler.SetLevel(lineCurrent, lev);
504 lineCurrent++;
505 levelPrev = levelCurrent;
506 visibleChars = 0;
508 if (!isspacechar(ch))
509 visibleChars++;
511 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
512 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
513 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
516 static const char * const bashWordListDesc[] = {
517 "Keywords",
521 LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);