1 // Scintilla source code edit control
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
21 #include "Scintilla.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
32 using namespace Scintilla
;
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
43 // * If quoted, the type of quotes you use determines the treatment of the
44 // text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 // (If you put a space it will be treated as a null identifier,
48 // which is valid, and matches the first empty line.)
49 // (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 // with no surrounding whitespace) on the terminating line.
53 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
55 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
57 #define PERLNUM_OCTAL 3
58 #define PERLNUM_FLOAT_EXP 4 // exponent part only
59 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR 6
61 #define PERLNUM_V_VECTOR 7
64 #define BACK_NONE 0 // lookback state for bareword disambiguation:
65 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
66 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
68 // all interpolated styles are different from their parent styles by a constant difference
69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
70 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
72 static bool isPerlKeyword(unsigned int start
, unsigned int end
, WordList
&keywords
, LexAccessor
&styler
) {
73 // old-style keyword matcher; needed because GetCurrent() needs
74 // current segment to be committed, but we may abandon early...
76 unsigned int i
, len
= end
- start
;
77 if (len
> 30) { len
= 30; }
78 for (i
= 0; i
< len
; i
++, start
++) s
[i
] = styler
[start
];
80 return keywords
.InList(s
);
83 static int disambiguateBareword(LexAccessor
&styler
, unsigned int bk
, unsigned int fw
,
84 int backFlag
, unsigned int backPos
, unsigned int endPos
) {
85 // identifiers are recognized by Perl as barewords under some
86 // conditions, the following attempts to do the disambiguation
87 // by looking backward and forward; result in 2 LSB
89 bool moreback
= false; // true if passed newline/comments
90 bool brace
= false; // true if opening brace found
91 // if BACK_NONE, neither operator nor keyword, so skip test
92 if (backFlag
== BACK_NONE
)
94 // first look backwards past whitespace/comments to set EOL flag
95 // (some disambiguation patterns must be on a single line)
96 if (backPos
<= static_cast<unsigned int>(styler
.LineStart(styler
.GetLine(bk
))))
98 // look backwards at last significant lexed item for disambiguation
100 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
101 if (ch
== '{' && !moreback
) {
102 // {bareword: possible variable spec
104 } else if ((ch
== '&' && styler
.SafeGetCharAt(bk
- 1) != '&')
105 // &bareword: subroutine call
106 || styler
.Match(bk
- 1, "->")
107 // ->bareword: part of variable spec
108 || styler
.Match(bk
- 2, "sub")) {
109 // sub bareword: subroutine declaration
110 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
113 // next, scan forward after word past tab/spaces only;
114 // if ch isn't one of '[{(,' we can skip the test
115 if ((ch
== '{' || ch
== '(' || ch
== '['|| ch
== ',')
117 while (ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(fw
)),
118 IsASpaceOrTab(ch
) && fw
< endPos
) {
121 if ((ch
== '}' && brace
)
122 // {bareword}: variable spec
123 || styler
.Match(fw
, "=>")) {
124 // [{(, bareword=>: hash literal
131 static void skipWhitespaceComment(LexAccessor
&styler
, unsigned int &p
) {
132 // when backtracking, we need to skip whitespace and comments
134 while ((p
> 0) && (style
= styler
.StyleAt(p
),
135 style
== SCE_PL_DEFAULT
|| style
== SCE_PL_COMMENTLINE
))
139 static int styleBeforeBracePair(LexAccessor
&styler
, unsigned int bk
) {
140 // backtrack to find open '{' corresponding to a '}', balanced
141 // return significant style to be tested for '/' disambiguation
144 return SCE_PL_DEFAULT
;
146 if (styler
.StyleAt(bk
) == SCE_PL_OPERATOR
) {
147 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
148 if (bkch
== ';') { // early out
150 } else if (bkch
== '}') {
152 } else if (bkch
== '{') {
153 if (--braceCount
== 0) break;
157 if (bk
> 0 && braceCount
== 0) {
158 // balanced { found, bk > 0, skip more whitespace/comments
160 skipWhitespaceComment(styler
, bk
);
161 return styler
.StyleAt(bk
);
163 return SCE_PL_DEFAULT
;
166 static int styleCheckIdentifier(LexAccessor
&styler
, unsigned int bk
) {
167 // backtrack to classify sub-styles of identifier under test
168 // return sub-style to be tested for '/' disambiguation
169 if (styler
.SafeGetCharAt(bk
) == '>') // inputsymbol, like <foo>
171 // backtrack to check for possible "->" or "::" before identifier
172 while (bk
> 0 && styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
) {
176 int bkstyle
= styler
.StyleAt(bk
);
177 if (bkstyle
== SCE_PL_DEFAULT
178 || bkstyle
== SCE_PL_COMMENTLINE
) {
179 // skip whitespace, comments
180 } else if (bkstyle
== SCE_PL_OPERATOR
) {
181 // test for "->" and "::"
182 if (styler
.Match(bk
- 1, "->") || styler
.Match(bk
- 1, "::"))
185 return 3; // bare identifier
191 static int podLineScan(LexAccessor
&styler
, unsigned int &pos
, unsigned int endPos
) {
192 // forward scan the current line to classify line for POD style
194 while (pos
<= endPos
) {
195 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
));
196 if (ch
== '\n' || ch
== '\r' || pos
>= endPos
) {
197 if (ch
== '\r' && styler
.SafeGetCharAt(pos
+ 1) == '\n') pos
++;
200 if (IsASpaceOrTab(ch
)) { // whitespace, take note
202 state
= SCE_PL_DEFAULT
;
203 } else if (state
== SCE_PL_DEFAULT
) { // verbatim POD line
204 state
= SCE_PL_POD_VERB
;
205 } else if (state
!= SCE_PL_POD_VERB
) { // regular POD line
211 state
= SCE_PL_DEFAULT
;
215 static bool styleCheckSubPrototype(LexAccessor
&styler
, unsigned int bk
) {
216 // backtrack to identify if we're starting a subroutine prototype
217 // we also need to ignore whitespace/comments:
218 // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
220 skipWhitespaceComment(styler
, bk
);
221 if (bk
== 0 || styler
.StyleAt(bk
) != SCE_PL_IDENTIFIER
) // check identifier
223 while (bk
> 0 && (styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
)) {
226 skipWhitespaceComment(styler
, bk
);
227 if (bk
< 2 || styler
.StyleAt(bk
) != SCE_PL_WORD
// check "sub" keyword
228 || !styler
.Match(bk
- 2, "sub")) // assume suffix is unique!
233 static int actualNumStyle(int numberStyle
) {
234 if (numberStyle
== PERLNUM_VECTOR
|| numberStyle
== PERLNUM_V_VECTOR
) {
235 return SCE_PL_STRING
;
236 } else if (numberStyle
== PERLNUM_BAD
) {
239 return SCE_PL_NUMBER
;
242 static int opposite(int ch
) {
243 if (ch
== '(') return ')';
244 if (ch
== '[') return ']';
245 if (ch
== '{') return '}';
246 if (ch
== '<') return '>';
250 static bool IsCommentLine(int line
, LexAccessor
&styler
) {
251 int pos
= styler
.LineStart(line
);
252 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
253 for (int i
= pos
; i
< eol_pos
; i
++) {
255 int style
= styler
.StyleAt(i
);
256 if (ch
== '#' && style
== SCE_PL_COMMENTLINE
)
258 else if (!IsASpaceOrTab(ch
))
264 static bool IsPackageLine(int line
, LexAccessor
&styler
) {
265 int pos
= styler
.LineStart(line
);
266 int style
= styler
.StyleAt(pos
);
267 if (style
== SCE_PL_WORD
&& styler
.Match(pos
, "package")) {
273 static int PodHeadingLevel(int pos
, LexAccessor
&styler
) {
274 int lvl
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
+ 5));
275 if (lvl
>= '1' && lvl
<= '4') {
281 // An individual named option for use in an OptionSet
283 // Options used for LexerPerl
288 // Custom folding of POD and packages
289 bool foldPOD
; // fold.perl.pod
290 // Enable folding Pod blocks when using the Perl lexer.
291 bool foldPackage
; // fold.perl.package
292 // Enable folding packages when using the Perl lexer.
294 bool foldCommentExplicit
;
304 foldCommentExplicit
= true;
309 static const char *const perlWordListDesc
[] = {
314 struct OptionSetPerl
: public OptionSet
<OptionsPerl
> {
316 DefineProperty("fold", &OptionsPerl::fold
);
318 DefineProperty("fold.comment", &OptionsPerl::foldComment
);
320 DefineProperty("fold.compact", &OptionsPerl::foldCompact
);
322 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD
,
323 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
325 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage
,
326 "Set to 0 to disable folding packages when using the Perl lexer.");
328 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit
,
329 "Set to 0 to disable explicit folding.");
331 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse
,
332 "This option enables Perl folding on a \"} else {\" line of an if statement.");
334 DefineWordListSets(perlWordListDesc
);
338 class LexerPerl
: public ILexer
{
339 CharacterSet setWordStart
;
340 CharacterSet setWord
;
341 CharacterSet setSpecialVar
;
342 CharacterSet setControlVar
;
345 OptionSetPerl osPerl
;
348 setWordStart(CharacterSet::setAlpha
, "_", 0x80, true),
349 setWord(CharacterSet::setAlphaNum
, "_", 0x80, true),
350 setSpecialVar(CharacterSet::setNone
, "\"$;<>&`'+,./\\%:=~!?@[]"),
351 setControlVar(CharacterSet::setNone
, "ACDEFHILMNOPRSTVWX") {
353 virtual ~LexerPerl() {
355 void SCI_METHOD
Release() {
358 int SCI_METHOD
Version() const {
361 const char *SCI_METHOD
PropertyNames() {
362 return osPerl
.PropertyNames();
364 int SCI_METHOD
PropertyType(const char *name
) {
365 return osPerl
.PropertyType(name
);
367 const char *SCI_METHOD
DescribeProperty(const char *name
) {
368 return osPerl
.DescribeProperty(name
);
370 int SCI_METHOD
PropertySet(const char *key
, const char *val
);
371 const char *SCI_METHOD
DescribeWordListSets() {
372 return osPerl
.DescribeWordListSets();
374 int SCI_METHOD
WordListSet(int n
, const char *wl
);
375 void SCI_METHOD
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
376 void SCI_METHOD
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
378 void *SCI_METHOD
PrivateCall(int, void *) {
382 static ILexer
*LexerFactoryPerl() {
383 return new LexerPerl();
385 int InputSymbolScan(StyleContext
&sc
);
386 void InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
=false);
389 int SCI_METHOD
LexerPerl::PropertySet(const char *key
, const char *val
) {
390 if (osPerl
.PropertySet(&options
, key
, val
)) {
396 int SCI_METHOD
LexerPerl::WordListSet(int n
, const char *wl
) {
397 WordList
*wordListN
= 0;
400 wordListN
= &keywords
;
403 int firstModification
= -1;
407 if (*wordListN
!= wlNew
) {
409 firstModification
= 0;
412 return firstModification
;
415 int LexerPerl::InputSymbolScan(StyleContext
&sc
) {
416 // forward scan for matching > on same line; file handles
418 while ((c
= sc
.GetRelativeCharacter(++sLen
)) != 0) {
419 if (c
== '\r' || c
== '\n') {
421 } else if (c
== '>') {
422 if (sc
.Match("<=>")) // '<=>' case
430 void LexerPerl::InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
) {
431 // interpolate a segment (with no active backslashes or delimiters within)
432 // switch in or out of an interpolation style or continue current style
433 // commit variable patterns if found, trim segment, repeat until done
437 if ((maxSeg
> 1) && (sc
.ch
== '$' || sc
.ch
== '@')) {
438 // $#[$]*word [$@][$]*word (where word or {word} is always present)
441 if (sc
.ch
== '$' && sc
.chNext
== '#') { // starts with $#
444 while ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '$')) // >0 $ dereference within
446 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '{')) { // { start for {word}
451 int c
= sc
.GetRelativeCharacter(sLen
);
452 if (setWordStart
.Contains(c
)) { // word (various)
455 while (maxSeg
> sLen
) {
456 if (!setWord
.Contains(sc
.GetRelativeCharacter(sLen
)))
460 } else if (braces
&& IsADigit(c
) && (sLen
== 2)) { // digit for ${digit}
466 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '}')) { // } end for {word}
472 if (!isVar
&& (maxSeg
> 1)) { // $- or @-specific variable patterns
476 if (IsADigit(c
)) { // $[0-9] and slurp trailing digits
479 while ((maxSeg
> sLen
) && IsADigit(sc
.GetRelativeCharacter(sLen
)))
481 } else if (setSpecialVar
.Contains(c
)) { // $ special variables
484 } else if (!isPattern
&& ((c
== '(') || (c
== ')') || (c
== '|'))) { // $ additional
487 } else if (c
== '^') { // $^A control-char style
489 if ((maxSeg
> sLen
) && setControlVar
.Contains(sc
.GetRelativeCharacter(sLen
))) {
494 } else if (sc
.ch
== '@') {
496 if (!isPattern
&& ((c
== '+') || (c
== '-'))) { // @ specials non-pattern
502 if (isVar
) { // commit as interpolated variable or normal character
503 if (sc
.state
< SCE_PL_STRING_VAR
)
504 sc
.SetState(sc
.state
+ INTERPOLATE_SHIFT
);
508 if (sc
.state
>= SCE_PL_STRING_VAR
)
509 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
514 if (sc
.state
>= SCE_PL_STRING_VAR
)
515 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
518 void SCI_METHOD
LexerPerl::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
519 LexAccessor
styler(pAccess
);
521 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
523 reWords
.Set("elsif if split while");
526 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMAC");
527 // lexing of "%*</" operators is non-trivial; these are missing in the set below
528 CharacterSet
setPerlOperator(CharacterSet::setNone
, "^&\\()-+=|{}[]:;>,?!.~");
529 CharacterSet
setQDelim(CharacterSet::setNone
, "qrwx");
530 CharacterSet
setModifiers(CharacterSet::setAlpha
);
531 CharacterSet
setPreferRE(CharacterSet::setNone
, "*/<%");
532 // setArray and setHash also accepts chars for special vars like $_,
533 // which are then truncated when the next char does not match setVar
534 CharacterSet
setVar(CharacterSet::setAlphaNum
, "#$_'", 0x80, true);
535 CharacterSet
setArray(CharacterSet::setAlpha
, "#$_+-", 0x80, true);
536 CharacterSet
setHash(CharacterSet::setAlpha
, "#$_!^+-", 0x80, true);
537 CharacterSet
&setPOD
= setModifiers
;
538 CharacterSet
setNonHereDoc(CharacterSet::setDigits
, "=$@");
539 CharacterSet
setHereDocDelim(CharacterSet::setAlphaNum
, "_");
540 CharacterSet
setSubPrototype(CharacterSet::setNone
, "\\[$@%&*+];");
541 // for format identifiers
542 CharacterSet
setFormatStart(CharacterSet::setAlpha
, "_=");
543 CharacterSet
&setFormat
= setHereDocDelim
;
545 // Lexer for perl often has to backtrack to start of current style to determine
546 // which characters are being used as quotes, how deeply nested is the
547 // start position and what the termination string is for HERE documents.
549 class HereDocCls
{ // Class to manage HERE doc sequence
552 // 0: '<<' encountered
553 // 1: collect the delimiter
554 // 2: here doc text (lines after the delimiter)
555 int Quote
; // the char after '<<'
556 bool Quoted
; // true if Quote in ('\'','"','`')
557 int DelimiterLength
; // strlen(Delimiter)
558 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
564 Delimiter
= new char[HERE_DELIM_MAX
];
567 void Append(int ch
) {
568 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
569 Delimiter
[DelimiterLength
] = '\0';
575 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
577 class QuoteCls
{ // Class to manage quote pairs
585 void New(int r
= 1) {
599 // additional state for number lexing
600 int numState
= PERLNUM_DECIMAL
;
603 unsigned int endPos
= startPos
+ length
;
605 // Backtrack to beginning of style if required...
606 // If in a long distance lexical state, backtrack to find quote characters.
607 // Includes strings (may be multi-line), numbers (additional state), format
608 // bodies, as well as POD sections.
609 if (initStyle
== SCE_PL_HERE_Q
610 || initStyle
== SCE_PL_HERE_QQ
611 || initStyle
== SCE_PL_HERE_QX
612 || initStyle
== SCE_PL_FORMAT
613 || initStyle
== SCE_PL_HERE_QQ_VAR
614 || initStyle
== SCE_PL_HERE_QX_VAR
616 // backtrack through multiple styles to reach the delimiter start
617 int delim
= (initStyle
== SCE_PL_FORMAT
) ? SCE_PL_FORMAT_IDENT
:SCE_PL_HERE_DELIM
;
618 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != delim
)) {
621 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
622 initStyle
= styler
.StyleAt(startPos
- 1);
624 if (initStyle
== SCE_PL_STRING
625 || initStyle
== SCE_PL_STRING_QQ
626 || initStyle
== SCE_PL_BACKTICKS
627 || initStyle
== SCE_PL_STRING_QX
628 || initStyle
== SCE_PL_REGEX
629 || initStyle
== SCE_PL_STRING_QR
630 || initStyle
== SCE_PL_REGSUBST
631 || initStyle
== SCE_PL_STRING_VAR
632 || initStyle
== SCE_PL_STRING_QQ_VAR
633 || initStyle
== SCE_PL_BACKTICKS_VAR
634 || initStyle
== SCE_PL_STRING_QX_VAR
635 || initStyle
== SCE_PL_REGEX_VAR
636 || initStyle
== SCE_PL_STRING_QR_VAR
637 || initStyle
== SCE_PL_REGSUBST_VAR
639 // for interpolation, must backtrack through a mix of two different styles
640 int otherStyle
= (initStyle
>= SCE_PL_STRING_VAR
) ?
641 initStyle
- INTERPOLATE_SHIFT
: initStyle
+ INTERPOLATE_SHIFT
;
642 while (startPos
> 1) {
643 int st
= styler
.StyleAt(startPos
- 1);
644 if ((st
!= initStyle
) && (st
!= otherStyle
))
648 initStyle
= SCE_PL_DEFAULT
;
649 } else if (initStyle
== SCE_PL_STRING_Q
650 || initStyle
== SCE_PL_STRING_QW
651 || initStyle
== SCE_PL_XLAT
652 || initStyle
== SCE_PL_CHARACTER
653 || initStyle
== SCE_PL_NUMBER
654 || initStyle
== SCE_PL_IDENTIFIER
655 || initStyle
== SCE_PL_ERROR
656 || initStyle
== SCE_PL_SUB_PROTOTYPE
658 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
661 initStyle
= SCE_PL_DEFAULT
;
662 } else if (initStyle
== SCE_PL_POD
663 || initStyle
== SCE_PL_POD_VERB
665 // POD backtracking finds preceeding blank lines and goes back past them
666 int ln
= styler
.GetLine(startPos
);
668 initStyle
= styler
.StyleAt(styler
.LineStart(--ln
));
669 if (initStyle
== SCE_PL_POD
|| initStyle
== SCE_PL_POD_VERB
) {
670 while (ln
> 0 && styler
.GetLineState(ln
) == SCE_PL_DEFAULT
)
673 startPos
= styler
.LineStart(++ln
);
674 initStyle
= styler
.StyleAt(startPos
- 1);
677 initStyle
= SCE_PL_DEFAULT
;
681 // backFlag, backPos are additional state to aid identifier corner cases.
682 // Look backwards past whitespace and comments in order to detect either
683 // operator or keyword. Later updated as we go along.
684 int backFlag
= BACK_NONE
;
685 unsigned int backPos
= startPos
;
688 skipWhitespaceComment(styler
, backPos
);
689 if (styler
.StyleAt(backPos
) == SCE_PL_OPERATOR
)
690 backFlag
= BACK_OPERATOR
;
691 else if (styler
.StyleAt(backPos
) == SCE_PL_WORD
)
692 backFlag
= BACK_KEYWORD
;
696 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
, static_cast<char>(STYLE_MAX
));
698 for (; sc
.More(); sc
.Forward()) {
700 // Determine if the current state should terminate.
702 case SCE_PL_OPERATOR
:
703 sc
.SetState(SCE_PL_DEFAULT
);
704 backFlag
= BACK_OPERATOR
;
705 backPos
= sc
.currentPos
;
707 case SCE_PL_IDENTIFIER
: // identifier, bareword, inputsymbol
708 if ((!setWord
.Contains(sc
.ch
) && sc
.ch
!= '\'')
709 || sc
.Match('.', '.')
710 || sc
.chPrev
== '>') { // end of inputsymbol
711 sc
.SetState(SCE_PL_DEFAULT
);
714 case SCE_PL_WORD
: // keyword, plus special cases
715 if (!setWord
.Contains(sc
.ch
)) {
717 sc
.GetCurrent(s
, sizeof(s
));
718 if ((strcmp(s
, "__DATA__") == 0) || (strcmp(s
, "__END__") == 0)) {
719 sc
.ChangeState(SCE_PL_DATASECTION
);
721 if ((strcmp(s
, "format") == 0)) {
722 sc
.SetState(SCE_PL_FORMAT_IDENT
);
725 sc
.SetState(SCE_PL_DEFAULT
);
727 backFlag
= BACK_KEYWORD
;
728 backPos
= sc
.currentPos
;
735 case SCE_PL_SYMBOLTABLE
:
736 if (sc
.Match(':', ':')) { // skip ::
738 } else if (!setVar
.Contains(sc
.ch
)) {
739 if (sc
.LengthCurrent() == 1) {
740 // Special variable: $(, $_ etc.
743 sc
.SetState(SCE_PL_DEFAULT
);
747 // if no early break, number style is terminated at "(go through)"
749 if (sc
.chNext
== '.') {
750 // double dot is always an operator (go through)
751 } else if (numState
<= PERLNUM_FLOAT_EXP
) {
752 // non-decimal number or float exponent, consume next dot
753 sc
.SetState(SCE_PL_OPERATOR
);
755 } else { // decimal or vectors allows dots
757 if (numState
== PERLNUM_DECIMAL
) {
758 if (dotCount
<= 1) // number with one dot in it
760 if (IsADigit(sc
.chNext
)) { // really a vector
761 numState
= PERLNUM_VECTOR
;
764 // number then dot (go through)
765 } else if (IsADigit(sc
.chNext
)) // vectors
767 // vector then dot (go through)
769 } else if (sc
.ch
== '_') {
770 // permissive underscoring for number and vector literals
772 } else if (numState
== PERLNUM_DECIMAL
) {
773 if (sc
.ch
== 'E' || sc
.ch
== 'e') { // exponent, sign
774 numState
= PERLNUM_FLOAT_EXP
;
775 if (sc
.chNext
== '+' || sc
.chNext
== '-') {
779 } else if (IsADigit(sc
.ch
))
781 // number then word (go through)
782 } else if (numState
== PERLNUM_HEX
) {
783 if (IsADigit(sc
.ch
, 16))
785 } else if (numState
== PERLNUM_VECTOR
|| numState
== PERLNUM_V_VECTOR
) {
786 if (IsADigit(sc
.ch
)) // vector
788 if (setWord
.Contains(sc
.ch
) && dotCount
== 0) { // change to word
789 sc
.ChangeState(SCE_PL_IDENTIFIER
);
792 // vector then word (go through)
793 } else if (IsADigit(sc
.ch
)) {
794 if (numState
== PERLNUM_FLOAT_EXP
) {
796 } else if (numState
== PERLNUM_OCTAL
) {
797 if (sc
.ch
<= '7') break;
798 } else if (numState
== PERLNUM_BINARY
) {
799 if (sc
.ch
<= '1') break;
801 // mark invalid octal, binary numbers (go through)
802 numState
= PERLNUM_BAD
;
805 // complete current number or vector
806 sc
.ChangeState(actualNumStyle(numState
));
807 sc
.SetState(SCE_PL_DEFAULT
);
809 case SCE_PL_COMMENTLINE
:
811 sc
.SetState(SCE_PL_DEFAULT
);
814 case SCE_PL_HERE_DELIM
:
815 if (HereDoc
.State
== 0) { // '<<' encountered
816 int delim_ch
= sc
.chNext
;
818 HereDoc
.State
= 1; // pre-init HERE doc class
819 HereDoc
.Quote
= sc
.chNext
;
820 HereDoc
.Quoted
= false;
821 HereDoc
.DelimiterLength
= 0;
822 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
823 if (IsASpaceOrTab(delim_ch
)) {
824 // skip whitespace; legal only for quoted delimiters
825 unsigned int i
= sc
.currentPos
+ 1;
826 while ((i
< endPos
) && IsASpaceOrTab(delim_ch
)) {
828 delim_ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(i
));
830 ws_skip
= i
- sc
.currentPos
- 1;
832 if (delim_ch
== '\'' || delim_ch
== '"' || delim_ch
== '`') {
833 // a quoted here-doc delimiter; skip any whitespace
834 sc
.Forward(ws_skip
+ 1);
835 HereDoc
.Quote
= delim_ch
;
836 HereDoc
.Quoted
= true;
837 } else if ((ws_skip
== 0 && setNonHereDoc
.Contains(sc
.chNext
))
839 // left shift << or <<= operator cases
840 // restore position if operator
841 sc
.ChangeState(SCE_PL_OPERATOR
);
842 sc
.ForwardSetState(SCE_PL_DEFAULT
);
843 backFlag
= BACK_OPERATOR
;
844 backPos
= sc
.currentPos
;
847 // specially handle initial '\' for identifier
848 if (ws_skip
== 0 && HereDoc
.Quote
== '\\')
850 // an unquoted here-doc delimiter, no special handling
851 // (cannot be prefixed by spaces/tabs), or
852 // symbols terminates; deprecated zero-length delimiter
854 } else if (HereDoc
.State
== 1) { // collect the delimiter
855 backFlag
= BACK_NONE
;
856 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
857 if (sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
858 sc
.ForwardSetState(SCE_PL_DEFAULT
);
859 } else if (!sc
.atLineEnd
) {
860 if (sc
.Match('\\', static_cast<char>(HereDoc
.Quote
))) { // escaped quote
863 if (sc
.ch
!= '\r') { // skip CR if CRLF
864 HereDoc
.Append(sc
.ch
);
867 } else { // an unquoted here-doc delimiter
868 if (setHereDocDelim
.Contains(sc
.ch
)) {
869 HereDoc
.Append(sc
.ch
);
871 sc
.SetState(SCE_PL_DEFAULT
);
874 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
875 sc
.SetState(SCE_PL_ERROR
);
883 // also implies HereDoc.State == 2
885 if (HereDoc
.DelimiterLength
== 0 || sc
.Match(HereDoc
.Delimiter
)) {
886 int c
= sc
.GetRelative(HereDoc
.DelimiterLength
);
887 if (c
== '\r' || c
== '\n') { // peek first, do not consume match
888 sc
.Forward(HereDoc
.DelimiterLength
);
889 sc
.SetState(SCE_PL_DEFAULT
);
890 backFlag
= BACK_NONE
;
897 if (sc
.state
== SCE_PL_HERE_Q
) { // \EOF and 'EOF' non-interpolated
898 while (!sc
.atLineEnd
)
902 while (!sc
.atLineEnd
) { // "EOF" and `EOF` interpolated
903 int c
, sLen
= 0, endType
= 0;
904 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
905 // scan to break string into segments
908 } else if (c
== '\r' || c
== '\n') {
913 if (sLen
> 0) // process non-empty segments
914 InterpolateSegment(sc
, sLen
);
917 // \ at end-of-line does not appear to have any effect, skip
918 if (sc
.ch
!= '\r' && sc
.ch
!= '\n')
920 } else if (endType
== 2) {
927 case SCE_PL_POD_VERB
: {
928 unsigned int fw
= sc
.currentPos
;
929 int ln
= styler
.GetLine(fw
);
930 if (sc
.atLineStart
&& sc
.Match("=cut")) { // end of POD
931 sc
.SetState(SCE_PL_POD
);
933 sc
.SetState(SCE_PL_DEFAULT
);
934 styler
.SetLineState(ln
, SCE_PL_POD
);
937 int pod
= podLineScan(styler
, fw
, endPos
); // classify POD line
938 styler
.SetLineState(ln
, pod
);
939 if (pod
== SCE_PL_DEFAULT
) {
940 if (sc
.state
== SCE_PL_POD_VERB
) {
941 unsigned int fw2
= fw
;
942 while (fw2
<= endPos
&& pod
== SCE_PL_DEFAULT
) {
943 fw
= fw2
++; // penultimate line (last blank line)
944 pod
= podLineScan(styler
, fw2
, endPos
);
945 styler
.SetLineState(styler
.GetLine(fw2
), pod
);
947 if (pod
== SCE_PL_POD
) { // truncate verbatim POD early
948 sc
.SetState(SCE_PL_POD
);
953 if (pod
== SCE_PL_POD_VERB
// still part of current paragraph
954 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD
)) {
956 styler
.SetLineState(ln
, pod
);
957 } else if (pod
== SCE_PL_POD
958 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD_VERB
)) {
959 pod
= SCE_PL_POD_VERB
;
960 styler
.SetLineState(ln
, pod
);
964 sc
.Forward(fw
- sc
.currentPos
); // commit style
968 case SCE_PL_STRING_QR
:
969 if (Quote
.Rep
<= 0) {
970 if (!setModifiers
.Contains(sc
.ch
))
971 sc
.SetState(SCE_PL_DEFAULT
);
972 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
975 int c
, sLen
= 0, endType
= 0;
976 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
977 // scan to break string into segments
980 } else if (c
== '\\' && Quote
.Up
!= '\\') {
982 } else if (c
== Quote
.Down
) {
984 if (Quote
.Count
== 0) {
988 } else if (c
== Quote
.Up
)
992 if (sLen
> 0) { // process non-empty segments
993 if (Quote
.Up
!= '\'') {
994 InterpolateSegment(sc
, sLen
, true);
995 } else // non-interpolated path
1002 case SCE_PL_REGSUBST
:
1004 if (Quote
.Rep
<= 0) {
1005 if (!setModifiers
.Contains(sc
.ch
))
1006 sc
.SetState(SCE_PL_DEFAULT
);
1007 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
1010 int c
, sLen
= 0, endType
= 0;
1011 bool isPattern
= (Quote
.Rep
== 2);
1012 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1013 // scan to break string into segments
1014 if (c
== '\\' && Quote
.Up
!= '\\') {
1016 } else if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
1017 // We matched something like s(...) or tr{...}, Perl 5.10
1018 // appears to allow almost any character for use as the
1019 // next delimiters. Whitespace and comments are accepted in
1020 // between, but we'll limit to whitespace here.
1021 // For '#', if no whitespace in between, it's a delimiter.
1024 } else if (c
== '#' && IsASpaceOrTab(sc
.GetRelative(sLen
- 1))) {
1029 } else if (c
== Quote
.Down
) {
1031 if (Quote
.Count
== 0) {
1035 if (Quote
.Up
== Quote
.Down
)
1039 } else if (c
== Quote
.Up
) {
1041 } else if (IsASpace(c
))
1045 if (sLen
> 0) { // process non-empty segments
1046 if (sc
.state
== SCE_PL_REGSUBST
&& Quote
.Up
!= '\'') {
1047 InterpolateSegment(sc
, sLen
, isPattern
);
1048 } else // non-interpolated path
1053 } else if (endType
== 3)
1054 sc
.SetState(SCE_PL_DEFAULT
);
1057 case SCE_PL_STRING_Q
:
1058 case SCE_PL_STRING_QQ
:
1059 case SCE_PL_STRING_QX
:
1060 case SCE_PL_STRING_QW
:
1062 case SCE_PL_CHARACTER
:
1063 case SCE_PL_BACKTICKS
:
1064 if (!Quote
.Down
&& !IsASpace(sc
.ch
)) {
1067 int c
, sLen
= 0, endType
= 0;
1068 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1069 // scan to break string into segments
1072 } else if (c
== '\\' && Quote
.Up
!= '\\') {
1074 } else if (c
== Quote
.Down
) {
1076 if (Quote
.Count
== 0) {
1079 } else if (c
== Quote
.Up
)
1083 if (sLen
> 0) { // process non-empty segments
1086 case SCE_PL_STRING_QQ
:
1087 case SCE_PL_BACKTICKS
:
1088 InterpolateSegment(sc
, sLen
);
1090 case SCE_PL_STRING_QX
:
1091 if (Quote
.Up
!= '\'') {
1092 InterpolateSegment(sc
, sLen
);
1095 // (continued for ' delim)
1096 default: // non-interpolated path
1102 } else if (endType
== 3)
1103 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1106 case SCE_PL_SUB_PROTOTYPE
: {
1108 // forward scan; must all be valid proto characters
1109 while (setSubPrototype
.Contains(sc
.GetRelative(i
)))
1111 if (sc
.GetRelative(i
) == ')') { // valid sub prototype
1113 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1115 // abandon prototype, restart from '('
1116 sc
.ChangeState(SCE_PL_OPERATOR
);
1117 sc
.SetState(SCE_PL_DEFAULT
);
1121 case SCE_PL_FORMAT
: {
1123 if (sc
.Match('.')) {
1125 if (sc
.atLineEnd
|| ((sc
.ch
== '\r' && sc
.chNext
== '\n')))
1126 sc
.SetState(SCE_PL_DEFAULT
);
1128 while (!sc
.atLineEnd
)
1135 // Needed for specific continuation styles (one follows the other)
1137 // continued from SCE_PL_WORD
1138 case SCE_PL_FORMAT_IDENT
:
1139 // occupies HereDoc state 3 to avoid clashing with HERE docs
1140 if (IsASpaceOrTab(sc
.ch
)) { // skip whitespace
1141 sc
.ChangeState(SCE_PL_DEFAULT
);
1142 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1144 sc
.SetState(SCE_PL_FORMAT_IDENT
);
1146 if (setFormatStart
.Contains(sc
.ch
)) { // identifier or '='
1150 } while (setFormat
.Contains(sc
.ch
));
1152 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1155 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1158 // invalid indentifier; inexact fallback, but hey
1159 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1160 sc
.SetState(SCE_PL_DEFAULT
);
1163 sc
.ChangeState(SCE_PL_DEFAULT
); // invalid indentifier
1165 backFlag
= BACK_NONE
;
1169 // Must check end of HereDoc states here before default state is handled
1170 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
1171 // Begin of here-doc (the line after the here-doc delimiter):
1172 // Lexically, the here-doc starts from the next line after the >>, but the
1173 // first line of here-doc seem to follow the style of the last EOL sequence
1174 int st_new
= SCE_PL_HERE_QQ
;
1176 if (HereDoc
.Quoted
) {
1177 if (sc
.state
== SCE_PL_HERE_DELIM
) {
1178 // Missing quote at end of string! We are stricter than perl.
1179 // Colour here-doc anyway while marking this bit as an error.
1180 sc
.ChangeState(SCE_PL_ERROR
);
1182 switch (HereDoc
.Quote
) {
1184 st_new
= SCE_PL_HERE_Q
;
1187 st_new
= SCE_PL_HERE_QQ
;
1190 st_new
= SCE_PL_HERE_QX
;
1194 if (HereDoc
.Quote
== '\\')
1195 st_new
= SCE_PL_HERE_Q
;
1197 sc
.SetState(st_new
);
1199 if (HereDoc
.State
== 3 && sc
.atLineEnd
) {
1200 // Start of format body.
1202 sc
.SetState(SCE_PL_FORMAT
);
1205 // Determine if a new state should be entered.
1206 if (sc
.state
== SCE_PL_DEFAULT
) {
1207 if (IsADigit(sc
.ch
) ||
1208 (IsADigit(sc
.chNext
) && (sc
.ch
== '.' || sc
.ch
== 'v'))) {
1209 sc
.SetState(SCE_PL_NUMBER
);
1210 backFlag
= BACK_NONE
;
1211 numState
= PERLNUM_DECIMAL
;
1213 if (sc
.ch
== '0') { // hex,bin,octal
1214 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
1215 numState
= PERLNUM_HEX
;
1216 } else if (sc
.chNext
== 'b' || sc
.chNext
== 'B') {
1217 numState
= PERLNUM_BINARY
;
1218 } else if (IsADigit(sc
.chNext
)) {
1219 numState
= PERLNUM_OCTAL
;
1221 if (numState
!= PERLNUM_DECIMAL
) {
1224 } else if (sc
.ch
== 'v') { // vector
1225 numState
= PERLNUM_V_VECTOR
;
1227 } else if (setWord
.Contains(sc
.ch
)) {
1228 // if immediately prefixed by '::', always a bareword
1229 sc
.SetState(SCE_PL_WORD
);
1230 if (sc
.chPrev
== ':' && sc
.GetRelative(-2) == ':') {
1231 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1233 unsigned int bk
= sc
.currentPos
;
1234 unsigned int fw
= sc
.currentPos
+ 1;
1235 // first check for possible quote-like delimiter
1236 if (sc
.ch
== 's' && !setWord
.Contains(sc
.chNext
)) {
1237 sc
.ChangeState(SCE_PL_REGSUBST
);
1239 } else if (sc
.ch
== 'm' && !setWord
.Contains(sc
.chNext
)) {
1240 sc
.ChangeState(SCE_PL_REGEX
);
1242 } else if (sc
.ch
== 'q' && !setWord
.Contains(sc
.chNext
)) {
1243 sc
.ChangeState(SCE_PL_STRING_Q
);
1245 } else if (sc
.ch
== 'y' && !setWord
.Contains(sc
.chNext
)) {
1246 sc
.ChangeState(SCE_PL_XLAT
);
1248 } else if (sc
.Match('t', 'r') && !setWord
.Contains(sc
.GetRelative(2))) {
1249 sc
.ChangeState(SCE_PL_XLAT
);
1253 } else if (sc
.ch
== 'q' && setQDelim
.Contains(sc
.chNext
)
1254 && !setWord
.Contains(sc
.GetRelative(2))) {
1255 if (sc
.chNext
== 'q') sc
.ChangeState(SCE_PL_STRING_QQ
);
1256 else if (sc
.chNext
== 'x') sc
.ChangeState(SCE_PL_STRING_QX
);
1257 else if (sc
.chNext
== 'r') sc
.ChangeState(SCE_PL_STRING_QR
);
1258 else sc
.ChangeState(SCE_PL_STRING_QW
); // sc.chNext == 'w'
1262 } else if (sc
.ch
== 'x' && (sc
.chNext
== '=' || // repetition
1263 !setWord
.Contains(sc
.chNext
) ||
1264 (IsADigit(sc
.chPrev
) && IsADigit(sc
.chNext
)))) {
1265 sc
.ChangeState(SCE_PL_OPERATOR
);
1267 // if potentially a keyword, scan forward and grab word, then check
1268 // if it's really one; if yes, disambiguation test is performed
1269 // otherwise it is always a bareword and we skip a lot of scanning
1270 if (sc
.state
== SCE_PL_WORD
) {
1271 while (setWord
.Contains(static_cast<unsigned char>(styler
.SafeGetCharAt(fw
))))
1273 if (!isPerlKeyword(styler
.GetStartSegment(), fw
, keywords
, styler
)) {
1274 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1277 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1278 // for quote-like delimiters/keywords, attempt to disambiguate
1279 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1280 if (sc
.state
!= SCE_PL_IDENTIFIER
&& bk
> 0) {
1281 if (disambiguateBareword(styler
, bk
, fw
, backFlag
, backPos
, endPos
))
1282 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1284 backFlag
= BACK_NONE
;
1285 } else if (sc
.ch
== '#') {
1286 sc
.SetState(SCE_PL_COMMENTLINE
);
1287 } else if (sc
.ch
== '\"') {
1288 sc
.SetState(SCE_PL_STRING
);
1291 backFlag
= BACK_NONE
;
1292 } else if (sc
.ch
== '\'') {
1293 if (sc
.chPrev
== '&' && setWordStart
.Contains(sc
.chNext
)) {
1295 sc
.SetState(SCE_PL_IDENTIFIER
);
1297 sc
.SetState(SCE_PL_CHARACTER
);
1301 backFlag
= BACK_NONE
;
1302 } else if (sc
.ch
== '`') {
1303 sc
.SetState(SCE_PL_BACKTICKS
);
1306 backFlag
= BACK_NONE
;
1307 } else if (sc
.ch
== '$') {
1308 sc
.SetState(SCE_PL_SCALAR
);
1309 if (sc
.chNext
== '{') {
1310 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1311 } else if (IsASpace(sc
.chNext
)) {
1312 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1315 if (sc
.Match('`', '`') || sc
.Match(':', ':')) {
1319 backFlag
= BACK_NONE
;
1320 } else if (sc
.ch
== '@') {
1321 sc
.SetState(SCE_PL_ARRAY
);
1322 if (setArray
.Contains(sc
.chNext
)) {
1323 // no special treatment
1324 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1326 } else if (sc
.chNext
== '{' || sc
.chNext
== '[') {
1327 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1329 sc
.ChangeState(SCE_PL_OPERATOR
);
1331 backFlag
= BACK_NONE
;
1332 } else if (setPreferRE
.Contains(sc
.ch
)) {
1333 // Explicit backward peeking to set a consistent preferRE for
1334 // any slash found, so no longer need to track preferRE state.
1335 // Find first previous significant lexed element and interpret.
1336 // A few symbols shares this code for disambiguation.
1337 bool preferRE
= false;
1338 bool isHereDoc
= sc
.Match('<', '<');
1339 bool hereDocSpace
= false; // for: SCALAR [whitespace] '<<'
1340 unsigned int bk
= (sc
.currentPos
> 0) ? sc
.currentPos
- 1: 0;
1343 if (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
)
1344 hereDocSpace
= true;
1345 skipWhitespaceComment(styler
, bk
);
1347 // avoid backward scanning breakage
1350 int bkstyle
= styler
.StyleAt(bk
);
1351 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
1353 case SCE_PL_OPERATOR
:
1355 if (bkch
== ')' || bkch
== ']') {
1357 } else if (bkch
== '}') {
1358 // backtrack by counting balanced brace pairs
1359 // needed to test for variables like ${}, @{} etc.
1360 bkstyle
= styleBeforeBracePair(styler
, bk
);
1361 if (bkstyle
== SCE_PL_SCALAR
1362 || bkstyle
== SCE_PL_ARRAY
1363 || bkstyle
== SCE_PL_HASH
1364 || bkstyle
== SCE_PL_SYMBOLTABLE
1365 || bkstyle
== SCE_PL_OPERATOR
) {
1368 } else if (bkch
== '+' || bkch
== '-') {
1369 if (bkch
== static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 1))
1370 && bkch
!= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 2)))
1371 // exceptions for operators: unary suffixes ++, --
1375 case SCE_PL_IDENTIFIER
:
1377 bkstyle
= styleCheckIdentifier(styler
, bk
);
1378 if ((bkstyle
== 1) || (bkstyle
== 2)) {
1379 // inputsymbol or var with "->" or "::" before identifier
1381 } else if (bkstyle
== 3) {
1382 // bare identifier, test cases follows:
1384 // if '/', /PATTERN/ unless digit/space immediately after '/'
1385 // if '//', always expect defined-or operator to follow identifier
1386 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1388 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1389 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1391 } else if (sc
.ch
== '<') {
1392 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1397 case SCE_PL_SCALAR
: // for $var<< case:
1398 if (isHereDoc
&& hereDocSpace
) // if SCALAR whitespace '<<', *always* a HERE doc
1403 // for HERE docs, always true
1405 // adopt heuristics similar to vim-style rules:
1406 // keywords always forced as /PATTERN/: split, if, elsif, while
1407 // everything else /PATTERN/ unless digit/space immediately after '/'
1408 // for '//', defined-or favoured unless special keywords
1409 unsigned int bkend
= bk
+ 1;
1410 while (bk
> 0 && styler
.StyleAt(bk
- 1) == SCE_PL_WORD
) {
1413 if (isPerlKeyword(bk
, bkend
, reWords
, styler
))
1415 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1417 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1418 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1420 } else if (sc
.ch
== '<') {
1421 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1426 // other styles uses the default, preferRE=false
1429 case SCE_PL_HERE_QQ
:
1430 case SCE_PL_HERE_QX
:
1435 backFlag
= BACK_NONE
;
1436 if (isHereDoc
) { // handle '<<', HERE doc
1438 sc
.SetState(SCE_PL_HERE_DELIM
);
1440 } else { // << operator
1441 sc
.SetState(SCE_PL_OPERATOR
);
1444 } else if (sc
.ch
== '*') { // handle '*', typeglob
1446 sc
.SetState(SCE_PL_SYMBOLTABLE
);
1447 if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1449 } else if (sc
.chNext
== '{') {
1450 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1455 sc
.SetState(SCE_PL_OPERATOR
);
1456 if (sc
.chNext
== '*') // exponentiation
1459 } else if (sc
.ch
== '%') { // handle '%', hash
1461 sc
.SetState(SCE_PL_HASH
);
1462 if (setHash
.Contains(sc
.chNext
)) {
1464 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1466 } else if (sc
.chNext
== '{') {
1467 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1469 sc
.ChangeState(SCE_PL_OPERATOR
);
1472 sc
.SetState(SCE_PL_OPERATOR
);
1474 } else if (sc
.ch
== '<') { // handle '<', inputsymbol
1477 int i
= InputSymbolScan(sc
);
1479 sc
.SetState(SCE_PL_IDENTIFIER
);
1482 sc
.SetState(SCE_PL_OPERATOR
);
1485 sc
.SetState(SCE_PL_OPERATOR
);
1487 } else { // handle '/', regexp
1489 sc
.SetState(SCE_PL_REGEX
);
1492 } else { // / and // operators
1493 sc
.SetState(SCE_PL_OPERATOR
);
1494 if (sc
.chNext
== '/') {
1499 } else if (sc
.ch
== '=' // POD
1500 && setPOD
.Contains(sc
.chNext
)
1501 && sc
.atLineStart
) {
1502 sc
.SetState(SCE_PL_POD
);
1503 backFlag
= BACK_NONE
;
1504 } else if (sc
.ch
== '-' && setWordStart
.Contains(sc
.chNext
)) { // extended '-' cases
1505 unsigned int bk
= sc
.currentPos
;
1506 unsigned int fw
= 2;
1507 if (setSingleCharOp
.Contains(sc
.chNext
) && // file test operators
1508 !setWord
.Contains(sc
.GetRelative(2))) {
1509 sc
.SetState(SCE_PL_WORD
);
1511 // nominally a minus and bareword; find extent of bareword
1512 while (setWord
.Contains(sc
.GetRelative(fw
)))
1514 sc
.SetState(SCE_PL_OPERATOR
);
1516 // force to bareword for hash key => or {variable literal} cases
1517 if (disambiguateBareword(styler
, bk
, bk
+ fw
, backFlag
, backPos
, endPos
) & 2) {
1518 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1520 backFlag
= BACK_NONE
;
1521 } else if (sc
.ch
== '(' && sc
.currentPos
> 0) { // '(' or subroutine prototype
1523 if (styleCheckSubPrototype(styler
, sc
.currentPos
- 1)) {
1524 sc
.SetState(SCE_PL_SUB_PROTOTYPE
);
1525 backFlag
= BACK_NONE
;
1527 sc
.SetState(SCE_PL_OPERATOR
);
1529 } else if (setPerlOperator
.Contains(sc
.ch
)) { // operators
1530 sc
.SetState(SCE_PL_OPERATOR
);
1531 if (sc
.Match('.', '.')) { // .. and ...
1533 if (sc
.chNext
== '.') sc
.Forward();
1535 } else if (sc
.ch
== 4 || sc
.ch
== 26) { // ^D and ^Z ends valid perl source
1536 sc
.SetState(SCE_PL_DATASECTION
);
1538 // keep colouring defaults
1544 if (sc
.state
== SCE_PL_HERE_Q
1545 || sc
.state
== SCE_PL_HERE_QQ
1546 || sc
.state
== SCE_PL_HERE_QX
1547 || sc
.state
== SCE_PL_FORMAT
) {
1548 styler
.ChangeLexerState(sc
.currentPos
, styler
.Length());
1553 #define PERL_HEADFOLD_SHIFT 4
1554 #define PERL_HEADFOLD_MASK 0xF0
1556 void SCI_METHOD
LexerPerl::Fold(unsigned int startPos
, int length
, int /* initStyle */, IDocument
*pAccess
) {
1561 LexAccessor
styler(pAccess
);
1563 unsigned int endPos
= startPos
+ length
;
1564 int visibleChars
= 0;
1565 int lineCurrent
= styler
.GetLine(startPos
);
1567 // Backtrack to previous line in case need to fix its fold status
1569 if (lineCurrent
> 0) {
1571 startPos
= styler
.LineStart(lineCurrent
);
1575 int levelPrev
= SC_FOLDLEVELBASE
;
1576 if (lineCurrent
> 0)
1577 levelPrev
= styler
.LevelAt(lineCurrent
- 1) >> 16;
1578 int levelCurrent
= levelPrev
;
1579 char chNext
= styler
[startPos
];
1580 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
1581 int styleNext
= styler
.StyleAt(startPos
);
1582 // Used at end of line to determine if the line was a package definition
1583 bool isPackageLine
= false;
1585 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
1587 chNext
= styler
.SafeGetCharAt(i
+ 1);
1588 int style
= styleNext
;
1589 styleNext
= styler
.StyleAt(i
+ 1);
1590 int stylePrevCh
= (i
) ? styler
.StyleAt(i
- 1):SCE_PL_DEFAULT
;
1591 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1592 bool atLineStart
= ((chPrev
== '\r') || (chPrev
== '\n')) || i
== 0;
1594 if (options
.foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
)) {
1595 if (!IsCommentLine(lineCurrent
- 1, styler
)
1596 && IsCommentLine(lineCurrent
+ 1, styler
))
1598 else if (IsCommentLine(lineCurrent
- 1, styler
)
1599 && !IsCommentLine(lineCurrent
+ 1, styler
))
1602 // {} [] block folding
1603 if (style
== SCE_PL_OPERATOR
) {
1605 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1608 } else if (ch
== '}') {
1612 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1615 } else if (ch
== ']') {
1620 if (options
.foldPOD
&& atLineStart
) {
1621 if (style
== SCE_PL_POD
) {
1622 if (stylePrevCh
!= SCE_PL_POD
&& stylePrevCh
!= SCE_PL_POD_VERB
)
1624 else if (styler
.Match(i
, "=cut"))
1625 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1626 else if (styler
.Match(i
, "=head"))
1627 podHeading
= PodHeadingLevel(i
, styler
);
1628 } else if (style
== SCE_PL_DATASECTION
) {
1629 if (ch
== '=' && isascii(chNext
) && isalpha(chNext
) && levelCurrent
== SC_FOLDLEVELBASE
)
1631 else if (styler
.Match(i
, "=cut") && levelCurrent
> SC_FOLDLEVELBASE
)
1632 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1633 else if (styler
.Match(i
, "=head"))
1634 podHeading
= PodHeadingLevel(i
, styler
);
1635 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1636 // reset needed as level test is vs. SC_FOLDLEVELBASE
1637 else if (stylePrevCh
!= SCE_PL_DATASECTION
)
1638 levelCurrent
= SC_FOLDLEVELBASE
;
1642 if (options
.foldPackage
&& atLineStart
) {
1643 if (IsPackageLine(lineCurrent
, styler
)
1644 && !IsPackageLine(lineCurrent
+ 1, styler
))
1645 isPackageLine
= true;
1650 case SCE_PL_HERE_QQ
:
1651 case SCE_PL_HERE_Q
:
1652 case SCE_PL_HERE_QX
:
1653 switch (stylePrevCh
) {
1654 case SCE_PL_HERE_QQ
:
1655 case SCE_PL_HERE_Q
:
1656 case SCE_PL_HERE_QX
:
1665 switch (stylePrevCh
) {
1666 case SCE_PL_HERE_QQ
:
1667 case SCE_PL_HERE_Q
:
1668 case SCE_PL_HERE_QX
:
1679 if (options
.foldCommentExplicit
&& style
== SCE_PL_COMMENTLINE
&& ch
== '#') {
1680 if (chNext
== '{') {
1682 } else if (levelCurrent
> SC_FOLDLEVELBASE
&& chNext
== '}') {
1688 int lev
= levelPrev
;
1689 // POD headings occupy bits 7-4, leaving some breathing room for
1690 // non-standard practice -- POD sections stuck in blocks, etc.
1691 if (podHeading
> 0) {
1692 levelCurrent
= (lev
& ~PERL_HEADFOLD_MASK
) | (podHeading
<< PERL_HEADFOLD_SHIFT
);
1693 lev
= levelCurrent
- 1;
1694 lev
|= SC_FOLDLEVELHEADERFLAG
;
1697 // Check if line was a package declaration
1698 // because packages need "special" treatment
1699 if (isPackageLine
) {
1700 lev
= SC_FOLDLEVELBASE
| SC_FOLDLEVELHEADERFLAG
;
1701 levelCurrent
= SC_FOLDLEVELBASE
+ 1;
1702 isPackageLine
= false;
1704 lev
|= levelCurrent
<< 16;
1705 if (visibleChars
== 0 && options
.foldCompact
)
1706 lev
|= SC_FOLDLEVELWHITEFLAG
;
1707 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1708 lev
|= SC_FOLDLEVELHEADERFLAG
;
1709 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1710 styler
.SetLevel(lineCurrent
, lev
);
1713 levelPrev
= levelCurrent
;
1716 if (!isspacechar(ch
))
1720 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1721 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
1722 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
1725 LexerModule
lmPerl(SCLEX_PERL
, LexerPerl::LexerFactoryPerl
, "perl", perlWordListDesc
, 8);