Update Scintilla to version 3.4.4
[geany-mirror.git] / scintilla / lexers / LexPerl.cxx
blobb4a2dd12ebeac7ec624d02855b95c5bd3539f976
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for Perl.
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
17 #include <string>
18 #include <map>
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
42 // quoted text.
43 // * If quoted, the type of quotes you use determines the treatment of the
44 // text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 // (If you put a space it will be treated as a null identifier,
48 // which is valid, and matches the first empty line.)
49 // (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 // with no surrounding whitespace) on the terminating line.
53 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
55 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
56 #define PERLNUM_HEX 2
57 #define PERLNUM_OCTAL 3
58 #define PERLNUM_FLOAT_EXP 4 // exponent part only
59 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR 6
61 #define PERLNUM_V_VECTOR 7
62 #define PERLNUM_BAD 8
64 #define BACK_NONE 0 // lookback state for bareword disambiguation:
65 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
66 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
68 // all interpolated styles are different from their parent styles by a constant difference
69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
70 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
73 // old-style keyword matcher; needed because GetCurrent() needs
74 // current segment to be committed, but we may abandon early...
75 char s[100];
76 unsigned int i, len = end - start;
77 if (len > 30) { len = 30; }
78 for (i = 0; i < len; i++, start++) s[i] = styler[start];
79 s[i] = '\0';
80 return keywords.InList(s);
83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
84 int backFlag, unsigned int backPos, unsigned int endPos) {
85 // identifiers are recognized by Perl as barewords under some
86 // conditions, the following attempts to do the disambiguation
87 // by looking backward and forward; result in 2 LSB
88 int result = 0;
89 bool moreback = false; // true if passed newline/comments
90 bool brace = false; // true if opening brace found
91 // if BACK_NONE, neither operator nor keyword, so skip test
92 if (backFlag == BACK_NONE)
93 return result;
94 // first look backwards past whitespace/comments to set EOL flag
95 // (some disambiguation patterns must be on a single line)
96 if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
97 moreback = true;
98 // look backwards at last significant lexed item for disambiguation
99 bk = backPos - 1;
100 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
101 if (ch == '{' && !moreback) {
102 // {bareword: possible variable spec
103 brace = true;
104 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
105 // &bareword: subroutine call
106 || styler.Match(bk - 1, "->")
107 // ->bareword: part of variable spec
108 || styler.Match(bk - 2, "sub")) {
109 // sub bareword: subroutine declaration
110 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
111 result |= 1;
113 // next, scan forward after word past tab/spaces only;
114 // if ch isn't one of '[{(,' we can skip the test
115 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
116 && fw < endPos) {
117 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
118 IsASpaceOrTab(ch) && fw < endPos) {
119 fw++;
121 if ((ch == '}' && brace)
122 // {bareword}: variable spec
123 || styler.Match(fw, "=>")) {
124 // [{(, bareword=>: hash literal
125 result |= 2;
128 return result;
131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
132 // when backtracking, we need to skip whitespace and comments
133 int style;
134 while ((p > 0) && (style = styler.StyleAt(p),
135 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
136 p--;
139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
140 // backtrack to find open '{' corresponding to a '}', balanced
141 // return significant style to be tested for '/' disambiguation
142 int braceCount = 1;
143 if (bk == 0)
144 return SCE_PL_DEFAULT;
145 while (--bk > 0) {
146 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
147 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
148 if (bkch == ';') { // early out
149 break;
150 } else if (bkch == '}') {
151 braceCount++;
152 } else if (bkch == '{') {
153 if (--braceCount == 0) break;
157 if (bk > 0 && braceCount == 0) {
158 // balanced { found, bk > 0, skip more whitespace/comments
159 bk--;
160 skipWhitespaceComment(styler, bk);
161 return styler.StyleAt(bk);
163 return SCE_PL_DEFAULT;
166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
167 // backtrack to classify sub-styles of identifier under test
168 // return sub-style to be tested for '/' disambiguation
169 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
170 return 1;
171 // backtrack to check for possible "->" or "::" before identifier
172 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
173 bk--;
175 while (bk > 0) {
176 int bkstyle = styler.StyleAt(bk);
177 if (bkstyle == SCE_PL_DEFAULT
178 || bkstyle == SCE_PL_COMMENTLINE) {
179 // skip whitespace, comments
180 } else if (bkstyle == SCE_PL_OPERATOR) {
181 // test for "->" and "::"
182 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
183 return 2;
184 } else
185 return 3; // bare identifier
186 bk--;
188 return 0;
191 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
192 // forward scan the current line to classify line for POD style
193 int state = -1;
194 while (pos < endPos) {
195 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
196 if (ch == '\n' || ch == '\r') {
197 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
198 break;
200 if (IsASpaceOrTab(ch)) { // whitespace, take note
201 if (state == -1)
202 state = SCE_PL_DEFAULT;
203 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
204 state = SCE_PL_POD_VERB;
205 } else if (state != SCE_PL_POD_VERB) { // regular POD line
206 state = SCE_PL_POD;
208 pos++;
210 if (state == -1)
211 state = SCE_PL_DEFAULT;
212 return state;
215 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
216 // backtrack to identify if we're starting a subroutine prototype
217 // we also need to ignore whitespace/comments:
218 // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
219 styler.Flush();
220 skipWhitespaceComment(styler, bk);
221 if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
222 return false;
223 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
224 bk--;
226 skipWhitespaceComment(styler, bk);
227 if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
228 || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
229 return false;
230 return true;
233 static int actualNumStyle(int numberStyle) {
234 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
235 return SCE_PL_STRING;
236 } else if (numberStyle == PERLNUM_BAD) {
237 return SCE_PL_ERROR;
239 return SCE_PL_NUMBER;
242 static int opposite(int ch) {
243 if (ch == '(') return ')';
244 if (ch == '[') return ']';
245 if (ch == '{') return '}';
246 if (ch == '<') return '>';
247 return ch;
250 static bool IsCommentLine(int line, LexAccessor &styler) {
251 int pos = styler.LineStart(line);
252 int eol_pos = styler.LineStart(line + 1) - 1;
253 for (int i = pos; i < eol_pos; i++) {
254 char ch = styler[i];
255 int style = styler.StyleAt(i);
256 if (ch == '#' && style == SCE_PL_COMMENTLINE)
257 return true;
258 else if (!IsASpaceOrTab(ch))
259 return false;
261 return false;
264 static bool IsPackageLine(int line, LexAccessor &styler) {
265 int pos = styler.LineStart(line);
266 int style = styler.StyleAt(pos);
267 if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
268 return true;
270 return false;
273 static int PodHeadingLevel(int pos, LexAccessor &styler) {
274 int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
275 if (lvl >= '1' && lvl <= '4') {
276 return lvl - '0';
278 return 0;
281 // An individual named option for use in an OptionSet
283 // Options used for LexerPerl
284 struct OptionsPerl {
285 bool fold;
286 bool foldComment;
287 bool foldCompact;
288 // Custom folding of POD and packages
289 bool foldPOD; // fold.perl.pod
290 // Enable folding Pod blocks when using the Perl lexer.
291 bool foldPackage; // fold.perl.package
292 // Enable folding packages when using the Perl lexer.
294 bool foldCommentExplicit;
296 bool foldAtElse;
298 OptionsPerl() {
299 fold = false;
300 foldComment = false;
301 foldCompact = true;
302 foldPOD = true;
303 foldPackage = true;
304 foldCommentExplicit = true;
305 foldAtElse = false;
309 static const char *const perlWordListDesc[] = {
310 "Keywords",
314 struct OptionSetPerl : public OptionSet<OptionsPerl> {
315 OptionSetPerl() {
316 DefineProperty("fold", &OptionsPerl::fold);
318 DefineProperty("fold.comment", &OptionsPerl::foldComment);
320 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
322 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
323 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
325 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
326 "Set to 0 to disable folding packages when using the Perl lexer.");
328 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
329 "Set to 0 to disable explicit folding.");
331 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
332 "This option enables Perl folding on a \"} else {\" line of an if statement.");
334 DefineWordListSets(perlWordListDesc);
338 class LexerPerl : public ILexer {
339 CharacterSet setWordStart;
340 CharacterSet setWord;
341 CharacterSet setSpecialVar;
342 CharacterSet setControlVar;
343 WordList keywords;
344 OptionsPerl options;
345 OptionSetPerl osPerl;
346 public:
347 LexerPerl() :
348 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
349 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
350 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
351 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
353 virtual ~LexerPerl() {
355 void SCI_METHOD Release() {
356 delete this;
358 int SCI_METHOD Version() const {
359 return lvOriginal;
361 const char *SCI_METHOD PropertyNames() {
362 return osPerl.PropertyNames();
364 int SCI_METHOD PropertyType(const char *name) {
365 return osPerl.PropertyType(name);
367 const char *SCI_METHOD DescribeProperty(const char *name) {
368 return osPerl.DescribeProperty(name);
370 int SCI_METHOD PropertySet(const char *key, const char *val);
371 const char *SCI_METHOD DescribeWordListSets() {
372 return osPerl.DescribeWordListSets();
374 int SCI_METHOD WordListSet(int n, const char *wl);
375 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
376 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
378 void *SCI_METHOD PrivateCall(int, void *) {
379 return 0;
382 static ILexer *LexerFactoryPerl() {
383 return new LexerPerl();
385 int InputSymbolScan(StyleContext &sc);
386 void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
389 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
390 if (osPerl.PropertySet(&options, key, val)) {
391 return 0;
393 return -1;
396 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
397 WordList *wordListN = 0;
398 switch (n) {
399 case 0:
400 wordListN = &keywords;
401 break;
403 int firstModification = -1;
404 if (wordListN) {
405 WordList wlNew;
406 wlNew.Set(wl);
407 if (*wordListN != wlNew) {
408 wordListN->Set(wl);
409 firstModification = 0;
412 return firstModification;
415 int LexerPerl::InputSymbolScan(StyleContext &sc) {
416 // forward scan for matching > on same line; file handles
417 int c, sLen = 0;
418 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
419 if (c == '\r' || c == '\n') {
420 return 0;
421 } else if (c == '>') {
422 if (sc.Match("<=>")) // '<=>' case
423 return 0;
424 return sLen;
427 return 0;
430 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
431 // interpolate a segment (with no active backslashes or delimiters within)
432 // switch in or out of an interpolation style or continue current style
433 // commit variable patterns if found, trim segment, repeat until done
434 while (maxSeg > 0) {
435 bool isVar = false;
436 int sLen = 0;
437 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
438 // $#[$]*word [$@][$]*word (where word or {word} is always present)
439 bool braces = false;
440 sLen = 1;
441 if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
442 sLen++;
444 while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within
445 sLen++;
446 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word}
447 sLen++;
448 braces = true;
450 if (maxSeg > sLen) {
451 int c = sc.GetRelativeCharacter(sLen);
452 if (setWordStart.Contains(c)) { // word (various)
453 sLen++;
454 isVar = true;
455 while (maxSeg > sLen) {
456 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
457 break;
458 sLen++;
460 } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
461 sLen++;
462 isVar = true;
465 if (braces) {
466 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word}
467 sLen++;
468 } else
469 isVar = false;
472 if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
473 int c = sc.chNext;
474 if (sc.ch == '$') {
475 sLen = 1;
476 if (IsADigit(c)) { // $[0-9] and slurp trailing digits
477 sLen++;
478 isVar = true;
479 while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
480 sLen++;
481 } else if (setSpecialVar.Contains(c)) { // $ special variables
482 sLen++;
483 isVar = true;
484 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
485 sLen++;
486 isVar = true;
487 } else if (c == '^') { // $^A control-char style
488 sLen++;
489 if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
490 sLen++;
491 isVar = true;
494 } else if (sc.ch == '@') {
495 sLen = 1;
496 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
497 sLen++;
498 isVar = true;
502 if (isVar) { // commit as interpolated variable or normal character
503 if (sc.state < SCE_PL_STRING_VAR)
504 sc.SetState(sc.state + INTERPOLATE_SHIFT);
505 sc.Forward(sLen);
506 maxSeg -= sLen;
507 } else {
508 if (sc.state >= SCE_PL_STRING_VAR)
509 sc.SetState(sc.state - INTERPOLATE_SHIFT);
510 sc.Forward();
511 maxSeg--;
514 if (sc.state >= SCE_PL_STRING_VAR)
515 sc.SetState(sc.state - INTERPOLATE_SHIFT);
518 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
519 LexAccessor styler(pAccess);
521 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
522 WordList reWords;
523 reWords.Set("elsif if split while");
525 // charset classes
526 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
527 // lexing of "%*</" operators is non-trivial; these are missing in the set below
528 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
529 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
530 CharacterSet setModifiers(CharacterSet::setAlpha);
531 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
532 // setArray and setHash also accepts chars for special vars like $_,
533 // which are then truncated when the next char does not match setVar
534 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
535 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
536 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
537 CharacterSet &setPOD = setModifiers;
538 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
539 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
540 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");
541 // for format identifiers
542 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
543 CharacterSet &setFormat = setHereDocDelim;
545 // Lexer for perl often has to backtrack to start of current style to determine
546 // which characters are being used as quotes, how deeply nested is the
547 // start position and what the termination string is for HERE documents.
549 class HereDocCls { // Class to manage HERE doc sequence
550 public:
551 int State;
552 // 0: '<<' encountered
553 // 1: collect the delimiter
554 // 2: here doc text (lines after the delimiter)
555 int Quote; // the char after '<<'
556 bool Quoted; // true if Quote in ('\'','"','`')
557 int DelimiterLength; // strlen(Delimiter)
558 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
559 HereDocCls() {
560 State = 0;
561 Quote = 0;
562 Quoted = false;
563 DelimiterLength = 0;
564 Delimiter = new char[HERE_DELIM_MAX];
565 Delimiter[0] = '\0';
567 void Append(int ch) {
568 Delimiter[DelimiterLength++] = static_cast<char>(ch);
569 Delimiter[DelimiterLength] = '\0';
571 ~HereDocCls() {
572 delete []Delimiter;
575 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
577 class QuoteCls { // Class to manage quote pairs
578 public:
579 int Rep;
580 int Count;
581 int Up, Down;
582 QuoteCls() {
583 New(1);
585 void New(int r = 1) {
586 Rep = r;
587 Count = 0;
588 Up = '\0';
589 Down = '\0';
591 void Open(int u) {
592 Count++;
593 Up = u;
594 Down = opposite(Up);
597 QuoteCls Quote;
599 // additional state for number lexing
600 int numState = PERLNUM_DECIMAL;
601 int dotCount = 0;
603 unsigned int endPos = startPos + length;
605 // Backtrack to beginning of style if required...
606 // If in a long distance lexical state, backtrack to find quote characters.
607 // Includes strings (may be multi-line), numbers (additional state), format
608 // bodies, as well as POD sections.
609 if (initStyle == SCE_PL_HERE_Q
610 || initStyle == SCE_PL_HERE_QQ
611 || initStyle == SCE_PL_HERE_QX
612 || initStyle == SCE_PL_FORMAT
613 || initStyle == SCE_PL_HERE_QQ_VAR
614 || initStyle == SCE_PL_HERE_QX_VAR
616 // backtrack through multiple styles to reach the delimiter start
617 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
618 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
619 startPos--;
621 startPos = styler.LineStart(styler.GetLine(startPos));
622 initStyle = styler.StyleAt(startPos - 1);
624 if (initStyle == SCE_PL_STRING
625 || initStyle == SCE_PL_STRING_QQ
626 || initStyle == SCE_PL_BACKTICKS
627 || initStyle == SCE_PL_STRING_QX
628 || initStyle == SCE_PL_REGEX
629 || initStyle == SCE_PL_STRING_QR
630 || initStyle == SCE_PL_REGSUBST
631 || initStyle == SCE_PL_STRING_VAR
632 || initStyle == SCE_PL_STRING_QQ_VAR
633 || initStyle == SCE_PL_BACKTICKS_VAR
634 || initStyle == SCE_PL_STRING_QX_VAR
635 || initStyle == SCE_PL_REGEX_VAR
636 || initStyle == SCE_PL_STRING_QR_VAR
637 || initStyle == SCE_PL_REGSUBST_VAR
639 // for interpolation, must backtrack through a mix of two different styles
640 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
641 initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
642 while (startPos > 1) {
643 int st = styler.StyleAt(startPos - 1);
644 if ((st != initStyle) && (st != otherStyle))
645 break;
646 startPos--;
648 initStyle = SCE_PL_DEFAULT;
649 } else if (initStyle == SCE_PL_STRING_Q
650 || initStyle == SCE_PL_STRING_QW
651 || initStyle == SCE_PL_XLAT
652 || initStyle == SCE_PL_CHARACTER
653 || initStyle == SCE_PL_NUMBER
654 || initStyle == SCE_PL_IDENTIFIER
655 || initStyle == SCE_PL_ERROR
656 || initStyle == SCE_PL_SUB_PROTOTYPE
658 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
659 startPos--;
661 initStyle = SCE_PL_DEFAULT;
662 } else if (initStyle == SCE_PL_POD
663 || initStyle == SCE_PL_POD_VERB
665 // POD backtracking finds preceding blank lines and goes back past them
666 int ln = styler.GetLine(startPos);
667 if (ln > 0) {
668 initStyle = styler.StyleAt(styler.LineStart(--ln));
669 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
670 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
671 ln--;
673 startPos = styler.LineStart(++ln);
674 initStyle = styler.StyleAt(startPos - 1);
675 } else {
676 startPos = 0;
677 initStyle = SCE_PL_DEFAULT;
681 // backFlag, backPos are additional state to aid identifier corner cases.
682 // Look backwards past whitespace and comments in order to detect either
683 // operator or keyword. Later updated as we go along.
684 int backFlag = BACK_NONE;
685 unsigned int backPos = startPos;
686 if (backPos > 0) {
687 backPos--;
688 skipWhitespaceComment(styler, backPos);
689 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
690 backFlag = BACK_OPERATOR;
691 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
692 backFlag = BACK_KEYWORD;
693 backPos++;
696 StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
698 for (; sc.More(); sc.Forward()) {
700 // Determine if the current state should terminate.
701 switch (sc.state) {
702 case SCE_PL_OPERATOR:
703 sc.SetState(SCE_PL_DEFAULT);
704 backFlag = BACK_OPERATOR;
705 backPos = sc.currentPos;
706 break;
707 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
708 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
709 || sc.Match('.', '.')
710 || sc.chPrev == '>') { // end of inputsymbol
711 sc.SetState(SCE_PL_DEFAULT);
713 break;
714 case SCE_PL_WORD: // keyword, plus special cases
715 if (!setWord.Contains(sc.ch)) {
716 char s[100];
717 sc.GetCurrent(s, sizeof(s));
718 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
719 sc.ChangeState(SCE_PL_DATASECTION);
720 } else {
721 if ((strcmp(s, "format") == 0)) {
722 sc.SetState(SCE_PL_FORMAT_IDENT);
723 HereDoc.State = 0;
724 } else {
725 sc.SetState(SCE_PL_DEFAULT);
727 backFlag = BACK_KEYWORD;
728 backPos = sc.currentPos;
731 break;
732 case SCE_PL_SCALAR:
733 case SCE_PL_ARRAY:
734 case SCE_PL_HASH:
735 case SCE_PL_SYMBOLTABLE:
736 if (sc.Match(':', ':')) { // skip ::
737 sc.Forward();
738 } else if (!setVar.Contains(sc.ch)) {
739 if (sc.LengthCurrent() == 1) {
740 // Special variable: $(, $_ etc.
741 sc.Forward();
743 sc.SetState(SCE_PL_DEFAULT);
745 break;
746 case SCE_PL_NUMBER:
747 // if no early break, number style is terminated at "(go through)"
748 if (sc.ch == '.') {
749 if (sc.chNext == '.') {
750 // double dot is always an operator (go through)
751 } else if (numState <= PERLNUM_FLOAT_EXP) {
752 // non-decimal number or float exponent, consume next dot
753 sc.SetState(SCE_PL_OPERATOR);
754 break;
755 } else { // decimal or vectors allows dots
756 dotCount++;
757 if (numState == PERLNUM_DECIMAL) {
758 if (dotCount <= 1) // number with one dot in it
759 break;
760 if (IsADigit(sc.chNext)) { // really a vector
761 numState = PERLNUM_VECTOR;
762 break;
764 // number then dot (go through)
765 } else if (IsADigit(sc.chNext)) // vectors
766 break;
767 // vector then dot (go through)
769 } else if (sc.ch == '_') {
770 // permissive underscoring for number and vector literals
771 break;
772 } else if (numState == PERLNUM_DECIMAL) {
773 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
774 numState = PERLNUM_FLOAT_EXP;
775 if (sc.chNext == '+' || sc.chNext == '-') {
776 sc.Forward();
778 break;
779 } else if (IsADigit(sc.ch))
780 break;
781 // number then word (go through)
782 } else if (numState == PERLNUM_HEX) {
783 if (IsADigit(sc.ch, 16))
784 break;
785 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
786 if (IsADigit(sc.ch)) // vector
787 break;
788 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
789 sc.ChangeState(SCE_PL_IDENTIFIER);
790 break;
792 // vector then word (go through)
793 } else if (IsADigit(sc.ch)) {
794 if (numState == PERLNUM_FLOAT_EXP) {
795 break;
796 } else if (numState == PERLNUM_OCTAL) {
797 if (sc.ch <= '7') break;
798 } else if (numState == PERLNUM_BINARY) {
799 if (sc.ch <= '1') break;
801 // mark invalid octal, binary numbers (go through)
802 numState = PERLNUM_BAD;
803 break;
805 // complete current number or vector
806 sc.ChangeState(actualNumStyle(numState));
807 sc.SetState(SCE_PL_DEFAULT);
808 break;
809 case SCE_PL_COMMENTLINE:
810 if (sc.atLineEnd) {
811 sc.SetState(SCE_PL_DEFAULT);
813 break;
814 case SCE_PL_HERE_DELIM:
815 if (HereDoc.State == 0) { // '<<' encountered
816 int delim_ch = sc.chNext;
817 int ws_skip = 0;
818 HereDoc.State = 1; // pre-init HERE doc class
819 HereDoc.Quote = sc.chNext;
820 HereDoc.Quoted = false;
821 HereDoc.DelimiterLength = 0;
822 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
823 if (IsASpaceOrTab(delim_ch)) {
824 // skip whitespace; legal only for quoted delimiters
825 unsigned int i = sc.currentPos + 1;
826 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
827 i++;
828 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
830 ws_skip = i - sc.currentPos - 1;
832 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
833 // a quoted here-doc delimiter; skip any whitespace
834 sc.Forward(ws_skip + 1);
835 HereDoc.Quote = delim_ch;
836 HereDoc.Quoted = true;
837 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
838 || ws_skip > 0) {
839 // left shift << or <<= operator cases
840 // restore position if operator
841 sc.ChangeState(SCE_PL_OPERATOR);
842 sc.ForwardSetState(SCE_PL_DEFAULT);
843 backFlag = BACK_OPERATOR;
844 backPos = sc.currentPos;
845 HereDoc.State = 0;
846 } else {
847 // specially handle initial '\' for identifier
848 if (ws_skip == 0 && HereDoc.Quote == '\\')
849 sc.Forward();
850 // an unquoted here-doc delimiter, no special handling
851 // (cannot be prefixed by spaces/tabs), or
852 // symbols terminates; deprecated zero-length delimiter
854 } else if (HereDoc.State == 1) { // collect the delimiter
855 backFlag = BACK_NONE;
856 if (HereDoc.Quoted) { // a quoted here-doc delimiter
857 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
858 sc.ForwardSetState(SCE_PL_DEFAULT);
859 } else if (!sc.atLineEnd) {
860 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
861 sc.Forward();
863 if (sc.ch != '\r') { // skip CR if CRLF
864 int i = 0; // else append char, possibly an extended char
865 while (i < sc.width) {
866 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
867 i++;
871 } else { // an unquoted here-doc delimiter, no extended charsets
872 if (setHereDocDelim.Contains(sc.ch)) {
873 HereDoc.Append(sc.ch);
874 } else {
875 sc.SetState(SCE_PL_DEFAULT);
878 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
879 sc.SetState(SCE_PL_ERROR);
880 HereDoc.State = 0;
883 break;
884 case SCE_PL_HERE_Q:
885 case SCE_PL_HERE_QQ:
886 case SCE_PL_HERE_QX:
887 // also implies HereDoc.State == 2
888 sc.Complete();
889 if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
890 int c = sc.GetRelative(HereDoc.DelimiterLength);
891 if (c == '\r' || c == '\n') { // peek first, do not consume match
892 sc.ForwardBytes(HereDoc.DelimiterLength);
893 sc.SetState(SCE_PL_DEFAULT);
894 backFlag = BACK_NONE;
895 HereDoc.State = 0;
896 if (!sc.atLineEnd)
897 sc.Forward();
898 break;
901 if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
902 while (!sc.atLineEnd)
903 sc.Forward();
904 break;
906 while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
907 int c, sLen = 0, endType = 0;
908 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
909 // scan to break string into segments
910 if (c == '\\') {
911 endType = 1; break;
912 } else if (c == '\r' || c == '\n') {
913 endType = 2; break;
915 sLen++;
917 if (sLen > 0) // process non-empty segments
918 InterpolateSegment(sc, sLen);
919 if (endType == 1) {
920 sc.Forward();
921 // \ at end-of-line does not appear to have any effect, skip
922 if (sc.ch != '\r' && sc.ch != '\n')
923 sc.Forward();
924 } else if (endType == 2) {
925 if (!sc.atLineEnd)
926 sc.Forward();
929 break;
930 case SCE_PL_POD:
931 case SCE_PL_POD_VERB: {
932 unsigned int fw = sc.currentPos;
933 int ln = styler.GetLine(fw);
934 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
935 sc.SetState(SCE_PL_POD);
936 sc.Forward(4);
937 sc.SetState(SCE_PL_DEFAULT);
938 styler.SetLineState(ln, SCE_PL_POD);
939 break;
941 int pod = podLineScan(styler, fw, endPos); // classify POD line
942 styler.SetLineState(ln, pod);
943 if (pod == SCE_PL_DEFAULT) {
944 if (sc.state == SCE_PL_POD_VERB) {
945 unsigned int fw2 = fw;
946 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
947 fw = fw2++; // penultimate line (last blank line)
948 pod = podLineScan(styler, fw2, endPos);
949 styler.SetLineState(styler.GetLine(fw2), pod);
951 if (pod == SCE_PL_POD) { // truncate verbatim POD early
952 sc.SetState(SCE_PL_POD);
953 } else
954 fw = fw2;
956 } else {
957 if (pod == SCE_PL_POD_VERB // still part of current paragraph
958 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
959 pod = SCE_PL_POD;
960 styler.SetLineState(ln, pod);
961 } else if (pod == SCE_PL_POD
962 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
963 pod = SCE_PL_POD_VERB;
964 styler.SetLineState(ln, pod);
966 sc.SetState(pod);
968 sc.ForwardBytes(fw - sc.currentPos); // commit style
970 break;
971 case SCE_PL_REGEX:
972 case SCE_PL_STRING_QR:
973 if (Quote.Rep <= 0) {
974 if (!setModifiers.Contains(sc.ch))
975 sc.SetState(SCE_PL_DEFAULT);
976 } else if (!Quote.Up && !IsASpace(sc.ch)) {
977 Quote.Open(sc.ch);
978 } else {
979 int c, sLen = 0, endType = 0;
980 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
981 // scan to break string into segments
982 if (IsASpace(c)) {
983 break;
984 } else if (c == '\\' && Quote.Up != '\\') {
985 endType = 1; break;
986 } else if (c == Quote.Down) {
987 Quote.Count--;
988 if (Quote.Count == 0) {
989 Quote.Rep--;
990 break;
992 } else if (c == Quote.Up)
993 Quote.Count++;
994 sLen++;
996 if (sLen > 0) { // process non-empty segments
997 if (Quote.Up != '\'') {
998 InterpolateSegment(sc, sLen, true);
999 } else // non-interpolated path
1000 sc.Forward(sLen);
1002 if (endType == 1)
1003 sc.Forward();
1005 break;
1006 case SCE_PL_REGSUBST:
1007 case SCE_PL_XLAT:
1008 if (Quote.Rep <= 0) {
1009 if (!setModifiers.Contains(sc.ch))
1010 sc.SetState(SCE_PL_DEFAULT);
1011 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1012 Quote.Open(sc.ch);
1013 } else {
1014 int c, sLen = 0, endType = 0;
1015 bool isPattern = (Quote.Rep == 2);
1016 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1017 // scan to break string into segments
1018 if (c == '\\' && Quote.Up != '\\') {
1019 endType = 2; break;
1020 } else if (Quote.Count == 0 && Quote.Rep == 1) {
1021 // We matched something like s(...) or tr{...}, Perl 5.10
1022 // appears to allow almost any character for use as the
1023 // next delimiters. Whitespace and comments are accepted in
1024 // between, but we'll limit to whitespace here.
1025 // For '#', if no whitespace in between, it's a delimiter.
1026 if (IsASpace(c)) {
1027 // Keep going
1028 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1029 endType = 3;
1030 } else
1031 Quote.Open(c);
1032 break;
1033 } else if (c == Quote.Down) {
1034 Quote.Count--;
1035 if (Quote.Count == 0) {
1036 Quote.Rep--;
1037 endType = 1;
1039 if (Quote.Up == Quote.Down)
1040 Quote.Count++;
1041 if (endType == 1)
1042 break;
1043 } else if (c == Quote.Up) {
1044 Quote.Count++;
1045 } else if (IsASpace(c))
1046 break;
1047 sLen++;
1049 if (sLen > 0) { // process non-empty segments
1050 if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1051 InterpolateSegment(sc, sLen, isPattern);
1052 } else // non-interpolated path
1053 sc.Forward(sLen);
1055 if (endType == 2) {
1056 sc.Forward();
1057 } else if (endType == 3)
1058 sc.SetState(SCE_PL_DEFAULT);
1060 break;
1061 case SCE_PL_STRING_Q:
1062 case SCE_PL_STRING_QQ:
1063 case SCE_PL_STRING_QX:
1064 case SCE_PL_STRING_QW:
1065 case SCE_PL_STRING:
1066 case SCE_PL_CHARACTER:
1067 case SCE_PL_BACKTICKS:
1068 if (!Quote.Down && !IsASpace(sc.ch)) {
1069 Quote.Open(sc.ch);
1070 } else {
1071 int c, sLen = 0, endType = 0;
1072 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1073 // scan to break string into segments
1074 if (IsASpace(c)) {
1075 break;
1076 } else if (c == '\\' && Quote.Up != '\\') {
1077 endType = 2; break;
1078 } else if (c == Quote.Down) {
1079 Quote.Count--;
1080 if (Quote.Count == 0) {
1081 endType = 3; break;
1083 } else if (c == Quote.Up)
1084 Quote.Count++;
1085 sLen++;
1087 if (sLen > 0) { // process non-empty segments
1088 switch (sc.state) {
1089 case SCE_PL_STRING:
1090 case SCE_PL_STRING_QQ:
1091 case SCE_PL_BACKTICKS:
1092 InterpolateSegment(sc, sLen);
1093 break;
1094 case SCE_PL_STRING_QX:
1095 if (Quote.Up != '\'') {
1096 InterpolateSegment(sc, sLen);
1097 break;
1099 // (continued for ' delim)
1100 default: // non-interpolated path
1101 sc.Forward(sLen);
1104 if (endType == 2) {
1105 sc.Forward();
1106 } else if (endType == 3)
1107 sc.ForwardSetState(SCE_PL_DEFAULT);
1109 break;
1110 case SCE_PL_SUB_PROTOTYPE: {
1111 int i = 0;
1112 // forward scan; must all be valid proto characters
1113 while (setSubPrototype.Contains(sc.GetRelative(i)))
1114 i++;
1115 if (sc.GetRelative(i) == ')') { // valid sub prototype
1116 sc.ForwardBytes(i);
1117 sc.ForwardSetState(SCE_PL_DEFAULT);
1118 } else {
1119 // abandon prototype, restart from '('
1120 sc.ChangeState(SCE_PL_OPERATOR);
1121 sc.SetState(SCE_PL_DEFAULT);
1124 break;
1125 case SCE_PL_FORMAT: {
1126 sc.Complete();
1127 if (sc.Match('.')) {
1128 sc.Forward();
1129 if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1130 sc.SetState(SCE_PL_DEFAULT);
1132 while (!sc.atLineEnd)
1133 sc.Forward();
1135 break;
1136 case SCE_PL_ERROR:
1137 break;
1139 // Needed for specific continuation styles (one follows the other)
1140 switch (sc.state) {
1141 // continued from SCE_PL_WORD
1142 case SCE_PL_FORMAT_IDENT:
1143 // occupies HereDoc state 3 to avoid clashing with HERE docs
1144 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
1145 sc.ChangeState(SCE_PL_DEFAULT);
1146 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1147 sc.Forward();
1148 sc.SetState(SCE_PL_FORMAT_IDENT);
1150 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
1151 if (sc.ch != '=') {
1152 do {
1153 sc.Forward();
1154 } while (setFormat.Contains(sc.ch));
1156 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1157 sc.Forward();
1158 if (sc.ch == '=') {
1159 sc.ForwardSetState(SCE_PL_DEFAULT);
1160 HereDoc.State = 3;
1161 } else {
1162 // invalid identifier; inexact fallback, but hey
1163 sc.ChangeState(SCE_PL_IDENTIFIER);
1164 sc.SetState(SCE_PL_DEFAULT);
1166 } else {
1167 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1169 backFlag = BACK_NONE;
1170 break;
1173 // Must check end of HereDoc states here before default state is handled
1174 if (HereDoc.State == 1 && sc.atLineEnd) {
1175 // Begin of here-doc (the line after the here-doc delimiter):
1176 // Lexically, the here-doc starts from the next line after the >>, but the
1177 // first line of here-doc seem to follow the style of the last EOL sequence
1178 int st_new = SCE_PL_HERE_QQ;
1179 HereDoc.State = 2;
1180 if (HereDoc.Quoted) {
1181 if (sc.state == SCE_PL_HERE_DELIM) {
1182 // Missing quote at end of string! We are stricter than perl.
1183 // Colour here-doc anyway while marking this bit as an error.
1184 sc.ChangeState(SCE_PL_ERROR);
1186 switch (HereDoc.Quote) {
1187 case '\'':
1188 st_new = SCE_PL_HERE_Q;
1189 break;
1190 case '"' :
1191 st_new = SCE_PL_HERE_QQ;
1192 break;
1193 case '`' :
1194 st_new = SCE_PL_HERE_QX;
1195 break;
1197 } else {
1198 if (HereDoc.Quote == '\\')
1199 st_new = SCE_PL_HERE_Q;
1201 sc.SetState(st_new);
1203 if (HereDoc.State == 3 && sc.atLineEnd) {
1204 // Start of format body.
1205 HereDoc.State = 0;
1206 sc.SetState(SCE_PL_FORMAT);
1209 // Determine if a new state should be entered.
1210 if (sc.state == SCE_PL_DEFAULT) {
1211 if (IsADigit(sc.ch) ||
1212 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1213 sc.SetState(SCE_PL_NUMBER);
1214 backFlag = BACK_NONE;
1215 numState = PERLNUM_DECIMAL;
1216 dotCount = 0;
1217 if (sc.ch == '0') { // hex,bin,octal
1218 if (sc.chNext == 'x' || sc.chNext == 'X') {
1219 numState = PERLNUM_HEX;
1220 } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1221 numState = PERLNUM_BINARY;
1222 } else if (IsADigit(sc.chNext)) {
1223 numState = PERLNUM_OCTAL;
1225 if (numState != PERLNUM_DECIMAL) {
1226 sc.Forward();
1228 } else if (sc.ch == 'v') { // vector
1229 numState = PERLNUM_V_VECTOR;
1231 } else if (setWord.Contains(sc.ch)) {
1232 // if immediately prefixed by '::', always a bareword
1233 sc.SetState(SCE_PL_WORD);
1234 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1235 sc.ChangeState(SCE_PL_IDENTIFIER);
1237 unsigned int bk = sc.currentPos;
1238 unsigned int fw = sc.currentPos + 1;
1239 // first check for possible quote-like delimiter
1240 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1241 sc.ChangeState(SCE_PL_REGSUBST);
1242 Quote.New(2);
1243 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1244 sc.ChangeState(SCE_PL_REGEX);
1245 Quote.New();
1246 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1247 sc.ChangeState(SCE_PL_STRING_Q);
1248 Quote.New();
1249 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1250 sc.ChangeState(SCE_PL_XLAT);
1251 Quote.New(2);
1252 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1253 sc.ChangeState(SCE_PL_XLAT);
1254 Quote.New(2);
1255 sc.Forward();
1256 fw++;
1257 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1258 && !setWord.Contains(sc.GetRelative(2))) {
1259 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1260 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1261 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1262 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1263 Quote.New();
1264 sc.Forward();
1265 fw++;
1266 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1267 !setWord.Contains(sc.chNext) ||
1268 (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1269 sc.ChangeState(SCE_PL_OPERATOR);
1271 // if potentially a keyword, scan forward and grab word, then check
1272 // if it's really one; if yes, disambiguation test is performed
1273 // otherwise it is always a bareword and we skip a lot of scanning
1274 if (sc.state == SCE_PL_WORD) {
1275 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1276 fw++;
1277 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1278 sc.ChangeState(SCE_PL_IDENTIFIER);
1281 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1282 // for quote-like delimiters/keywords, attempt to disambiguate
1283 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1284 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1285 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1286 sc.ChangeState(SCE_PL_IDENTIFIER);
1288 backFlag = BACK_NONE;
1289 } else if (sc.ch == '#') {
1290 sc.SetState(SCE_PL_COMMENTLINE);
1291 } else if (sc.ch == '\"') {
1292 sc.SetState(SCE_PL_STRING);
1293 Quote.New();
1294 Quote.Open(sc.ch);
1295 backFlag = BACK_NONE;
1296 } else if (sc.ch == '\'') {
1297 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1298 // Archaic call
1299 sc.SetState(SCE_PL_IDENTIFIER);
1300 } else {
1301 sc.SetState(SCE_PL_CHARACTER);
1302 Quote.New();
1303 Quote.Open(sc.ch);
1305 backFlag = BACK_NONE;
1306 } else if (sc.ch == '`') {
1307 sc.SetState(SCE_PL_BACKTICKS);
1308 Quote.New();
1309 Quote.Open(sc.ch);
1310 backFlag = BACK_NONE;
1311 } else if (sc.ch == '$') {
1312 sc.SetState(SCE_PL_SCALAR);
1313 if (sc.chNext == '{') {
1314 sc.ForwardSetState(SCE_PL_OPERATOR);
1315 } else if (IsASpace(sc.chNext)) {
1316 sc.ForwardSetState(SCE_PL_DEFAULT);
1317 } else {
1318 sc.Forward();
1319 if (sc.Match('`', '`') || sc.Match(':', ':')) {
1320 sc.Forward();
1323 backFlag = BACK_NONE;
1324 } else if (sc.ch == '@') {
1325 sc.SetState(SCE_PL_ARRAY);
1326 if (setArray.Contains(sc.chNext)) {
1327 // no special treatment
1328 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1329 sc.ForwardBytes(2);
1330 } else if (sc.chNext == '{' || sc.chNext == '[') {
1331 sc.ForwardSetState(SCE_PL_OPERATOR);
1332 } else {
1333 sc.ChangeState(SCE_PL_OPERATOR);
1335 backFlag = BACK_NONE;
1336 } else if (setPreferRE.Contains(sc.ch)) {
1337 // Explicit backward peeking to set a consistent preferRE for
1338 // any slash found, so no longer need to track preferRE state.
1339 // Find first previous significant lexed element and interpret.
1340 // A few symbols shares this code for disambiguation.
1341 bool preferRE = false;
1342 bool isHereDoc = sc.Match('<', '<');
1343 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1344 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1345 sc.Complete();
1346 styler.Flush();
1347 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1348 hereDocSpace = true;
1349 skipWhitespaceComment(styler, bk);
1350 if (bk == 0) {
1351 // avoid backward scanning breakage
1352 preferRE = true;
1353 } else {
1354 int bkstyle = styler.StyleAt(bk);
1355 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1356 switch (bkstyle) {
1357 case SCE_PL_OPERATOR:
1358 preferRE = true;
1359 if (bkch == ')' || bkch == ']') {
1360 preferRE = false;
1361 } else if (bkch == '}') {
1362 // backtrack by counting balanced brace pairs
1363 // needed to test for variables like ${}, @{} etc.
1364 bkstyle = styleBeforeBracePair(styler, bk);
1365 if (bkstyle == SCE_PL_SCALAR
1366 || bkstyle == SCE_PL_ARRAY
1367 || bkstyle == SCE_PL_HASH
1368 || bkstyle == SCE_PL_SYMBOLTABLE
1369 || bkstyle == SCE_PL_OPERATOR) {
1370 preferRE = false;
1372 } else if (bkch == '+' || bkch == '-') {
1373 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1374 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1375 // exceptions for operators: unary suffixes ++, --
1376 preferRE = false;
1378 break;
1379 case SCE_PL_IDENTIFIER:
1380 preferRE = true;
1381 bkstyle = styleCheckIdentifier(styler, bk);
1382 if ((bkstyle == 1) || (bkstyle == 2)) {
1383 // inputsymbol or var with "->" or "::" before identifier
1384 preferRE = false;
1385 } else if (bkstyle == 3) {
1386 // bare identifier, test cases follows:
1387 if (sc.ch == '/') {
1388 // if '/', /PATTERN/ unless digit/space immediately after '/'
1389 // if '//', always expect defined-or operator to follow identifier
1390 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1391 preferRE = false;
1392 } else if (sc.ch == '*' || sc.ch == '%') {
1393 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1394 preferRE = false;
1395 } else if (sc.ch == '<') {
1396 if (IsASpace(sc.chNext) || sc.chNext == '=')
1397 preferRE = false;
1400 break;
1401 case SCE_PL_SCALAR: // for $var<< case:
1402 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1403 preferRE = true;
1404 break;
1405 case SCE_PL_WORD:
1406 preferRE = true;
1407 // for HERE docs, always true
1408 if (sc.ch == '/') {
1409 // adopt heuristics similar to vim-style rules:
1410 // keywords always forced as /PATTERN/: split, if, elsif, while
1411 // everything else /PATTERN/ unless digit/space immediately after '/'
1412 // for '//', defined-or favoured unless special keywords
1413 unsigned int bkend = bk + 1;
1414 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1415 bk--;
1417 if (isPerlKeyword(bk, bkend, reWords, styler))
1418 break;
1419 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1420 preferRE = false;
1421 } else if (sc.ch == '*' || sc.ch == '%') {
1422 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1423 preferRE = false;
1424 } else if (sc.ch == '<') {
1425 if (IsASpace(sc.chNext) || sc.chNext == '=')
1426 preferRE = false;
1428 break;
1430 // other styles uses the default, preferRE=false
1431 case SCE_PL_POD:
1432 case SCE_PL_HERE_Q:
1433 case SCE_PL_HERE_QQ:
1434 case SCE_PL_HERE_QX:
1435 preferRE = true;
1436 break;
1439 backFlag = BACK_NONE;
1440 if (isHereDoc) { // handle '<<', HERE doc
1441 if (preferRE) {
1442 sc.SetState(SCE_PL_HERE_DELIM);
1443 HereDoc.State = 0;
1444 } else { // << operator
1445 sc.SetState(SCE_PL_OPERATOR);
1446 sc.Forward();
1448 } else if (sc.ch == '*') { // handle '*', typeglob
1449 if (preferRE) {
1450 sc.SetState(SCE_PL_SYMBOLTABLE);
1451 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1452 sc.ForwardBytes(2);
1453 } else if (sc.chNext == '{') {
1454 sc.ForwardSetState(SCE_PL_OPERATOR);
1455 } else {
1456 sc.Forward();
1458 } else {
1459 sc.SetState(SCE_PL_OPERATOR);
1460 if (sc.chNext == '*') // exponentiation
1461 sc.Forward();
1463 } else if (sc.ch == '%') { // handle '%', hash
1464 if (preferRE) {
1465 sc.SetState(SCE_PL_HASH);
1466 if (setHash.Contains(sc.chNext)) {
1467 sc.Forward();
1468 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1469 sc.ForwardBytes(2);
1470 } else if (sc.chNext == '{') {
1471 sc.ForwardSetState(SCE_PL_OPERATOR);
1472 } else {
1473 sc.ChangeState(SCE_PL_OPERATOR);
1475 } else {
1476 sc.SetState(SCE_PL_OPERATOR);
1478 } else if (sc.ch == '<') { // handle '<', inputsymbol
1479 if (preferRE) {
1480 // forward scan
1481 int i = InputSymbolScan(sc);
1482 if (i > 0) {
1483 sc.SetState(SCE_PL_IDENTIFIER);
1484 sc.Forward(i);
1485 } else {
1486 sc.SetState(SCE_PL_OPERATOR);
1488 } else {
1489 sc.SetState(SCE_PL_OPERATOR);
1491 } else { // handle '/', regexp
1492 if (preferRE) {
1493 sc.SetState(SCE_PL_REGEX);
1494 Quote.New();
1495 Quote.Open(sc.ch);
1496 } else { // / and // operators
1497 sc.SetState(SCE_PL_OPERATOR);
1498 if (sc.chNext == '/') {
1499 sc.Forward();
1503 } else if (sc.ch == '=' // POD
1504 && setPOD.Contains(sc.chNext)
1505 && sc.atLineStart) {
1506 sc.SetState(SCE_PL_POD);
1507 backFlag = BACK_NONE;
1508 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1509 unsigned int bk = sc.currentPos;
1510 unsigned int fw = 2;
1511 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1512 !setWord.Contains(sc.GetRelative(2))) {
1513 sc.SetState(SCE_PL_WORD);
1514 } else {
1515 // nominally a minus and bareword; find extent of bareword
1516 while (setWord.Contains(sc.GetRelative(fw)))
1517 fw++;
1518 sc.SetState(SCE_PL_OPERATOR);
1520 // force to bareword for hash key => or {variable literal} cases
1521 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1522 sc.ChangeState(SCE_PL_IDENTIFIER);
1524 backFlag = BACK_NONE;
1525 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1526 sc.Complete();
1527 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1528 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1529 backFlag = BACK_NONE;
1530 } else {
1531 sc.SetState(SCE_PL_OPERATOR);
1533 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1534 sc.SetState(SCE_PL_OPERATOR);
1535 if (sc.Match('.', '.')) { // .. and ...
1536 sc.Forward();
1537 if (sc.chNext == '.') sc.Forward();
1539 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1540 sc.SetState(SCE_PL_DATASECTION);
1541 } else {
1542 // keep colouring defaults
1543 sc.Complete();
1547 sc.Complete();
1548 if (sc.state == SCE_PL_HERE_Q
1549 || sc.state == SCE_PL_HERE_QQ
1550 || sc.state == SCE_PL_HERE_QX
1551 || sc.state == SCE_PL_FORMAT) {
1552 styler.ChangeLexerState(sc.currentPos, styler.Length());
1554 sc.Complete();
1557 #define PERL_HEADFOLD_SHIFT 4
1558 #define PERL_HEADFOLD_MASK 0xF0
1560 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1562 if (!options.fold)
1563 return;
1565 LexAccessor styler(pAccess);
1567 unsigned int endPos = startPos + length;
1568 int visibleChars = 0;
1569 int lineCurrent = styler.GetLine(startPos);
1571 // Backtrack to previous line in case need to fix its fold status
1572 if (startPos > 0) {
1573 if (lineCurrent > 0) {
1574 lineCurrent--;
1575 startPos = styler.LineStart(lineCurrent);
1579 int levelPrev = SC_FOLDLEVELBASE;
1580 if (lineCurrent > 0)
1581 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1582 int levelCurrent = levelPrev;
1583 char chNext = styler[startPos];
1584 char chPrev = styler.SafeGetCharAt(startPos - 1);
1585 int styleNext = styler.StyleAt(startPos);
1586 // Used at end of line to determine if the line was a package definition
1587 bool isPackageLine = false;
1588 int podHeading = 0;
1589 for (unsigned int i = startPos; i < endPos; i++) {
1590 char ch = chNext;
1591 chNext = styler.SafeGetCharAt(i + 1);
1592 int style = styleNext;
1593 styleNext = styler.StyleAt(i + 1);
1594 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1595 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1596 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1597 // Comment folding
1598 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1599 if (!IsCommentLine(lineCurrent - 1, styler)
1600 && IsCommentLine(lineCurrent + 1, styler))
1601 levelCurrent++;
1602 else if (IsCommentLine(lineCurrent - 1, styler)
1603 && !IsCommentLine(lineCurrent + 1, styler))
1604 levelCurrent--;
1606 // {} [] block folding
1607 if (style == SCE_PL_OPERATOR) {
1608 if (ch == '{') {
1609 if (options.foldAtElse && levelCurrent < levelPrev)
1610 --levelPrev;
1611 levelCurrent++;
1612 } else if (ch == '}') {
1613 levelCurrent--;
1615 if (ch == '[') {
1616 if (options.foldAtElse && levelCurrent < levelPrev)
1617 --levelPrev;
1618 levelCurrent++;
1619 } else if (ch == ']') {
1620 levelCurrent--;
1623 // POD folding
1624 if (options.foldPOD && atLineStart) {
1625 if (style == SCE_PL_POD) {
1626 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1627 levelCurrent++;
1628 else if (styler.Match(i, "=cut"))
1629 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1630 else if (styler.Match(i, "=head"))
1631 podHeading = PodHeadingLevel(i, styler);
1632 } else if (style == SCE_PL_DATASECTION) {
1633 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1634 levelCurrent++;
1635 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1636 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1637 else if (styler.Match(i, "=head"))
1638 podHeading = PodHeadingLevel(i, styler);
1639 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1640 // reset needed as level test is vs. SC_FOLDLEVELBASE
1641 else if (stylePrevCh != SCE_PL_DATASECTION)
1642 levelCurrent = SC_FOLDLEVELBASE;
1645 // package folding
1646 if (options.foldPackage && atLineStart) {
1647 if (IsPackageLine(lineCurrent, styler)
1648 && !IsPackageLine(lineCurrent + 1, styler))
1649 isPackageLine = true;
1652 //heredoc folding
1653 switch (style) {
1654 case SCE_PL_HERE_QQ :
1655 case SCE_PL_HERE_Q :
1656 case SCE_PL_HERE_QX :
1657 switch (stylePrevCh) {
1658 case SCE_PL_HERE_QQ :
1659 case SCE_PL_HERE_Q :
1660 case SCE_PL_HERE_QX :
1661 //do nothing;
1662 break;
1663 default :
1664 levelCurrent++;
1665 break;
1667 break;
1668 default:
1669 switch (stylePrevCh) {
1670 case SCE_PL_HERE_QQ :
1671 case SCE_PL_HERE_Q :
1672 case SCE_PL_HERE_QX :
1673 levelCurrent--;
1674 break;
1675 default :
1676 //do nothing;
1677 break;
1679 break;
1682 //explicit folding
1683 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1684 if (chNext == '{') {
1685 levelCurrent++;
1686 } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
1687 levelCurrent--;
1691 if (atEOL) {
1692 int lev = levelPrev;
1693 // POD headings occupy bits 7-4, leaving some breathing room for
1694 // non-standard practice -- POD sections stuck in blocks, etc.
1695 if (podHeading > 0) {
1696 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1697 lev = levelCurrent - 1;
1698 lev |= SC_FOLDLEVELHEADERFLAG;
1699 podHeading = 0;
1701 // Check if line was a package declaration
1702 // because packages need "special" treatment
1703 if (isPackageLine) {
1704 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1705 levelCurrent = SC_FOLDLEVELBASE + 1;
1706 isPackageLine = false;
1708 lev |= levelCurrent << 16;
1709 if (visibleChars == 0 && options.foldCompact)
1710 lev |= SC_FOLDLEVELWHITEFLAG;
1711 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1712 lev |= SC_FOLDLEVELHEADERFLAG;
1713 if (lev != styler.LevelAt(lineCurrent)) {
1714 styler.SetLevel(lineCurrent, lev);
1716 lineCurrent++;
1717 levelPrev = levelCurrent;
1718 visibleChars = 0;
1720 if (!isspacechar(ch))
1721 visibleChars++;
1722 chPrev = ch;
1724 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1725 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1726 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1729 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);