1 // Scintilla source code edit control
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
21 #include "Scintilla.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
32 using namespace Scintilla
;
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
43 // * If quoted, the type of quotes you use determines the treatment of the
44 // text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 // (If you put a space it will be treated as a null identifier,
48 // which is valid, and matches the first empty line.)
49 // (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 // with no surrounding whitespace) on the terminating line.
53 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
55 #define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
56 #define PERLNUM_OCTAL 2
57 #define PERLNUM_FLOAT_EXP 3 // exponent part only
58 #define PERLNUM_HEX 4 // may be a hex float
59 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR 6
61 #define PERLNUM_V_VECTOR 7
64 #define BACK_NONE 0 // lookback state for bareword disambiguation:
65 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
66 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
68 #define SUB_BEGIN 0 // states for subroutine prototype scan:
69 #define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
70 #define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
71 #define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
72 #define SUB_HAS_SUB 4 // 'sub' keyword
74 // all interpolated styles are different from their parent styles by a constant difference
75 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
76 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
78 static bool isPerlKeyword(Sci_PositionU start
, Sci_PositionU end
, WordList
&keywords
, LexAccessor
&styler
) {
79 // old-style keyword matcher; needed because GetCurrent() needs
80 // current segment to be committed, but we may abandon early...
82 Sci_PositionU i
, len
= end
- start
;
83 if (len
> 30) { len
= 30; }
84 for (i
= 0; i
< len
; i
++, start
++) s
[i
] = styler
[start
];
86 return keywords
.InList(s
);
89 static int disambiguateBareword(LexAccessor
&styler
, Sci_PositionU bk
, Sci_PositionU fw
,
90 int backFlag
, Sci_PositionU backPos
, Sci_PositionU endPos
) {
91 // identifiers are recognized by Perl as barewords under some
92 // conditions, the following attempts to do the disambiguation
93 // by looking backward and forward; result in 2 LSB
95 bool moreback
= false; // true if passed newline/comments
96 bool brace
= false; // true if opening brace found
97 // if BACK_NONE, neither operator nor keyword, so skip test
98 if (backFlag
== BACK_NONE
)
100 // first look backwards past whitespace/comments to set EOL flag
101 // (some disambiguation patterns must be on a single line)
102 if (backPos
<= static_cast<Sci_PositionU
>(styler
.LineStart(styler
.GetLine(bk
))))
104 // look backwards at last significant lexed item for disambiguation
106 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
107 if (ch
== '{' && !moreback
) {
108 // {bareword: possible variable spec
110 } else if ((ch
== '&' && styler
.SafeGetCharAt(bk
- 1) != '&')
111 // &bareword: subroutine call
112 || styler
.Match(bk
- 1, "->")
113 // ->bareword: part of variable spec
114 || styler
.Match(bk
- 1, "::")
115 // ::bareword: part of module spec
116 || styler
.Match(bk
- 2, "sub")) {
117 // sub bareword: subroutine declaration
118 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
121 // next, scan forward after word past tab/spaces only;
122 // if ch isn't one of '[{(,' we can skip the test
123 if ((ch
== '{' || ch
== '(' || ch
== '['|| ch
== ',')
125 while (ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(fw
)),
126 IsASpaceOrTab(ch
) && fw
< endPos
) {
129 if ((ch
== '}' && brace
)
130 // {bareword}: variable spec
131 || styler
.Match(fw
, "=>")) {
132 // [{(, bareword=>: hash literal
139 static void skipWhitespaceComment(LexAccessor
&styler
, Sci_PositionU
&p
) {
140 // when backtracking, we need to skip whitespace and comments
142 while ((p
> 0) && (style
= styler
.StyleAt(p
),
143 style
== SCE_PL_DEFAULT
|| style
== SCE_PL_COMMENTLINE
))
147 static int findPrevLexeme(LexAccessor
&styler
, Sci_PositionU
&bk
, int &style
) {
148 // scan backward past whitespace and comments to find a lexeme
149 skipWhitespaceComment(styler
, bk
);
153 style
= styler
.StyleAt(bk
);
154 while (bk
> 0) { // find extent of lexeme
155 if (styler
.StyleAt(bk
- 1) == style
) {
163 static int styleBeforeBracePair(LexAccessor
&styler
, Sci_PositionU bk
) {
164 // backtrack to find open '{' corresponding to a '}', balanced
165 // return significant style to be tested for '/' disambiguation
168 return SCE_PL_DEFAULT
;
170 if (styler
.StyleAt(bk
) == SCE_PL_OPERATOR
) {
171 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
172 if (bkch
== ';') { // early out
174 } else if (bkch
== '}') {
176 } else if (bkch
== '{') {
177 if (--braceCount
== 0) break;
181 if (bk
> 0 && braceCount
== 0) {
182 // balanced { found, bk > 0, skip more whitespace/comments
184 skipWhitespaceComment(styler
, bk
);
185 return styler
.StyleAt(bk
);
187 return SCE_PL_DEFAULT
;
190 static int styleCheckIdentifier(LexAccessor
&styler
, Sci_PositionU bk
) {
191 // backtrack to classify sub-styles of identifier under test
192 // return sub-style to be tested for '/' disambiguation
193 if (styler
.SafeGetCharAt(bk
) == '>') // inputsymbol, like <foo>
195 // backtrack to check for possible "->" or "::" before identifier
196 while (bk
> 0 && styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
) {
200 int bkstyle
= styler
.StyleAt(bk
);
201 if (bkstyle
== SCE_PL_DEFAULT
202 || bkstyle
== SCE_PL_COMMENTLINE
) {
203 // skip whitespace, comments
204 } else if (bkstyle
== SCE_PL_OPERATOR
) {
205 // test for "->" and "::"
206 if (styler
.Match(bk
- 1, "->") || styler
.Match(bk
- 1, "::"))
209 return 3; // bare identifier
215 static int podLineScan(LexAccessor
&styler
, Sci_PositionU
&pos
, Sci_PositionU endPos
) {
216 // forward scan the current line to classify line for POD style
218 while (pos
< endPos
) {
219 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
));
220 if (ch
== '\n' || ch
== '\r') {
221 if (ch
== '\r' && styler
.SafeGetCharAt(pos
+ 1) == '\n') pos
++;
224 if (IsASpaceOrTab(ch
)) { // whitespace, take note
226 state
= SCE_PL_DEFAULT
;
227 } else if (state
== SCE_PL_DEFAULT
) { // verbatim POD line
228 state
= SCE_PL_POD_VERB
;
229 } else if (state
!= SCE_PL_POD_VERB
) { // regular POD line
235 state
= SCE_PL_DEFAULT
;
239 static bool styleCheckSubPrototype(LexAccessor
&styler
, Sci_PositionU bk
) {
240 // backtrack to identify if we're starting a subroutine prototype
241 // we also need to ignore whitespace/comments, format is like:
242 // sub abc::pqr :const :prototype(...)
243 // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
244 // and a state machine generates legal subroutine syntax matches
246 int state
= SUB_BEGIN
;
248 // find two lexemes, lexeme 2 follows lexeme 1
249 int style2
= SCE_PL_DEFAULT
;
250 Sci_PositionU pos2
= bk
;
251 int len2
= findPrevLexeme(styler
, pos2
, style2
);
252 int style1
= SCE_PL_DEFAULT
;
253 Sci_PositionU pos1
= pos2
;
254 if (pos1
> 0) pos1
--;
255 int len1
= findPrevLexeme(styler
, pos1
, style1
);
256 if (len1
== 0 || len2
== 0) // lexeme pair must exist
259 // match parts of syntax, if invalid subroutine syntax, break off
260 if (style1
== SCE_PL_OPERATOR
&& len1
== 1 &&
261 styler
.SafeGetCharAt(pos1
) == ':') { // ':'
262 if (style2
== SCE_PL_IDENTIFIER
|| style2
== SCE_PL_WORD
) {
263 if (len2
== 9 && styler
.Match(pos2
, "prototype")) { // ':' 'prototype'
264 if (state
== SUB_BEGIN
) {
265 state
= SUB_HAS_PROTO
;
268 } else { // ':' <attribute>
269 if (state
== SUB_HAS_PROTO
|| state
== SUB_HAS_ATTRIB
) {
270 state
= SUB_HAS_ATTRIB
;
276 } else if (style1
== SCE_PL_OPERATOR
&& len1
== 2 &&
277 styler
.Match(pos1
, "::")) { // '::'
278 if (style2
== SCE_PL_IDENTIFIER
) { // '::' <identifier>
279 state
= SUB_HAS_MODULE
;
282 } else if (style1
== SCE_PL_WORD
&& len1
== 3 &&
283 styler
.Match(pos1
, "sub")) { // 'sub'
284 if (style2
== SCE_PL_IDENTIFIER
) { // 'sub' <identifier>
290 bk
= pos1
; // set position for finding next lexeme pair
292 } while (state
!= SUB_HAS_SUB
);
293 return (state
== SUB_HAS_SUB
);
296 static int actualNumStyle(int numberStyle
) {
297 if (numberStyle
== PERLNUM_VECTOR
|| numberStyle
== PERLNUM_V_VECTOR
) {
298 return SCE_PL_STRING
;
299 } else if (numberStyle
== PERLNUM_BAD
) {
302 return SCE_PL_NUMBER
;
305 static int opposite(int ch
) {
306 if (ch
== '(') return ')';
307 if (ch
== '[') return ']';
308 if (ch
== '{') return '}';
309 if (ch
== '<') return '>';
313 static bool IsCommentLine(Sci_Position line
, LexAccessor
&styler
) {
314 Sci_Position pos
= styler
.LineStart(line
);
315 Sci_Position eol_pos
= styler
.LineStart(line
+ 1) - 1;
316 for (Sci_Position i
= pos
; i
< eol_pos
; i
++) {
318 int style
= styler
.StyleAt(i
);
319 if (ch
== '#' && style
== SCE_PL_COMMENTLINE
)
321 else if (!IsASpaceOrTab(ch
))
327 static bool IsPackageLine(Sci_Position line
, LexAccessor
&styler
) {
328 Sci_Position pos
= styler
.LineStart(line
);
329 int style
= styler
.StyleAt(pos
);
330 if (style
== SCE_PL_WORD
&& styler
.Match(pos
, "package")) {
336 static int PodHeadingLevel(Sci_Position pos
, LexAccessor
&styler
) {
337 int lvl
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
+ 5));
338 if (lvl
>= '1' && lvl
<= '4') {
344 // An individual named option for use in an OptionSet
346 // Options used for LexerPerl
351 // Custom folding of POD and packages
352 bool foldPOD
; // fold.perl.pod
353 // Enable folding Pod blocks when using the Perl lexer.
354 bool foldPackage
; // fold.perl.package
355 // Enable folding packages when using the Perl lexer.
357 bool foldCommentExplicit
;
367 foldCommentExplicit
= true;
372 static const char *const perlWordListDesc
[] = {
377 struct OptionSetPerl
: public OptionSet
<OptionsPerl
> {
379 DefineProperty("fold", &OptionsPerl::fold
);
381 DefineProperty("fold.comment", &OptionsPerl::foldComment
);
383 DefineProperty("fold.compact", &OptionsPerl::foldCompact
);
385 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD
,
386 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
388 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage
,
389 "Set to 0 to disable folding packages when using the Perl lexer.");
391 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit
,
392 "Set to 0 to disable explicit folding.");
394 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse
,
395 "This option enables Perl folding on a \"} else {\" line of an if statement.");
397 DefineWordListSets(perlWordListDesc
);
401 class LexerPerl
: public ILexer
{
402 CharacterSet setWordStart
;
403 CharacterSet setWord
;
404 CharacterSet setSpecialVar
;
405 CharacterSet setControlVar
;
408 OptionSetPerl osPerl
;
411 setWordStart(CharacterSet::setAlpha
, "_", 0x80, true),
412 setWord(CharacterSet::setAlphaNum
, "_", 0x80, true),
413 setSpecialVar(CharacterSet::setNone
, "\"$;<>&`'+,./\\%:=~!?@[]"),
414 setControlVar(CharacterSet::setNone
, "ACDEFHILMNOPRSTVWX") {
416 virtual ~LexerPerl() {
418 void SCI_METHOD
Release() {
421 int SCI_METHOD
Version() const {
424 const char *SCI_METHOD
PropertyNames() {
425 return osPerl
.PropertyNames();
427 int SCI_METHOD
PropertyType(const char *name
) {
428 return osPerl
.PropertyType(name
);
430 const char *SCI_METHOD
DescribeProperty(const char *name
) {
431 return osPerl
.DescribeProperty(name
);
433 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
);
434 const char *SCI_METHOD
DescribeWordListSets() {
435 return osPerl
.DescribeWordListSets();
437 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
);
438 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
439 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
441 void *SCI_METHOD
PrivateCall(int, void *) {
445 static ILexer
*LexerFactoryPerl() {
446 return new LexerPerl();
448 int InputSymbolScan(StyleContext
&sc
);
449 void InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
=false);
452 Sci_Position SCI_METHOD
LexerPerl::PropertySet(const char *key
, const char *val
) {
453 if (osPerl
.PropertySet(&options
, key
, val
)) {
459 Sci_Position SCI_METHOD
LexerPerl::WordListSet(int n
, const char *wl
) {
460 WordList
*wordListN
= 0;
463 wordListN
= &keywords
;
466 Sci_Position firstModification
= -1;
470 if (*wordListN
!= wlNew
) {
472 firstModification
= 0;
475 return firstModification
;
478 int LexerPerl::InputSymbolScan(StyleContext
&sc
) {
479 // forward scan for matching > on same line; file handles
481 while ((c
= sc
.GetRelativeCharacter(++sLen
)) != 0) {
482 if (c
== '\r' || c
== '\n') {
484 } else if (c
== '>') {
485 if (sc
.Match("<=>")) // '<=>' case
493 void LexerPerl::InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
) {
494 // interpolate a segment (with no active backslashes or delimiters within)
495 // switch in or out of an interpolation style or continue current style
496 // commit variable patterns if found, trim segment, repeat until done
500 if ((maxSeg
> 1) && (sc
.ch
== '$' || sc
.ch
== '@')) {
501 // $#[$]*word [$@][$]*word (where word or {word} is always present)
504 if (sc
.ch
== '$' && sc
.chNext
== '#') { // starts with $#
507 while ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '$')) // >0 $ dereference within
509 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '{')) { // { start for {word}
514 int c
= sc
.GetRelativeCharacter(sLen
);
515 if (setWordStart
.Contains(c
)) { // word (various)
518 while (maxSeg
> sLen
) {
519 if (!setWord
.Contains(sc
.GetRelativeCharacter(sLen
)))
523 } else if (braces
&& IsADigit(c
) && (sLen
== 2)) { // digit for ${digit}
529 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '}')) { // } end for {word}
535 if (!isVar
&& (maxSeg
> 1)) { // $- or @-specific variable patterns
539 if (IsADigit(c
)) { // $[0-9] and slurp trailing digits
542 while ((maxSeg
> sLen
) && IsADigit(sc
.GetRelativeCharacter(sLen
)))
544 } else if (setSpecialVar
.Contains(c
)) { // $ special variables
547 } else if (!isPattern
&& ((c
== '(') || (c
== ')') || (c
== '|'))) { // $ additional
550 } else if (c
== '^') { // $^A control-char style
552 if ((maxSeg
> sLen
) && setControlVar
.Contains(sc
.GetRelativeCharacter(sLen
))) {
557 } else if (sc
.ch
== '@') {
559 if (!isPattern
&& ((c
== '+') || (c
== '-'))) { // @ specials non-pattern
565 if (isVar
) { // commit as interpolated variable or normal character
566 if (sc
.state
< SCE_PL_STRING_VAR
)
567 sc
.SetState(sc
.state
+ INTERPOLATE_SHIFT
);
571 if (sc
.state
>= SCE_PL_STRING_VAR
)
572 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
577 if (sc
.state
>= SCE_PL_STRING_VAR
)
578 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
581 void SCI_METHOD
LexerPerl::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
582 LexAccessor
styler(pAccess
);
584 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
586 reWords
.Set("elsif if split while");
589 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMAC");
590 // lexing of "%*</" operators is non-trivial; these are missing in the set below
591 CharacterSet
setPerlOperator(CharacterSet::setNone
, "^&\\()-+=|{}[]:;>,?!.~");
592 CharacterSet
setQDelim(CharacterSet::setNone
, "qrwx");
593 CharacterSet
setModifiers(CharacterSet::setAlpha
);
594 CharacterSet
setPreferRE(CharacterSet::setNone
, "*/<%");
595 // setArray and setHash also accepts chars for special vars like $_,
596 // which are then truncated when the next char does not match setVar
597 CharacterSet
setVar(CharacterSet::setAlphaNum
, "#$_'", 0x80, true);
598 CharacterSet
setArray(CharacterSet::setAlpha
, "#$_+-", 0x80, true);
599 CharacterSet
setHash(CharacterSet::setAlpha
, "#$_!^+-", 0x80, true);
600 CharacterSet
&setPOD
= setModifiers
;
601 CharacterSet
setNonHereDoc(CharacterSet::setDigits
, "=$@");
602 CharacterSet
setHereDocDelim(CharacterSet::setAlphaNum
, "_");
603 CharacterSet
setSubPrototype(CharacterSet::setNone
, "\\[$@%&*+];_ \t");
604 CharacterSet
setRepetition(CharacterSet::setDigits
, ")\"'");
605 // for format identifiers
606 CharacterSet
setFormatStart(CharacterSet::setAlpha
, "_=");
607 CharacterSet
&setFormat
= setHereDocDelim
;
609 // Lexer for perl often has to backtrack to start of current style to determine
610 // which characters are being used as quotes, how deeply nested is the
611 // start position and what the termination string is for HERE documents.
613 class HereDocCls
{ // Class to manage HERE doc sequence
616 // 0: '<<' encountered
617 // 1: collect the delimiter
618 // 2: here doc text (lines after the delimiter)
619 int Quote
; // the char after '<<'
620 bool Quoted
; // true if Quote in ('\'','"','`')
621 int DelimiterLength
; // strlen(Delimiter)
622 char Delimiter
[HERE_DELIM_MAX
]; // the Delimiter
630 void Append(int ch
) {
631 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
632 Delimiter
[DelimiterLength
] = '\0';
637 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
639 class QuoteCls
{ // Class to manage quote pairs
647 void New(int r
= 1) {
661 // additional state for number lexing
662 int numState
= PERLNUM_DECIMAL
;
665 Sci_PositionU endPos
= startPos
+ length
;
667 // Backtrack to beginning of style if required...
668 // If in a long distance lexical state, backtrack to find quote characters.
669 // Includes strings (may be multi-line), numbers (additional state), format
670 // bodies, as well as POD sections.
671 if (initStyle
== SCE_PL_HERE_Q
672 || initStyle
== SCE_PL_HERE_QQ
673 || initStyle
== SCE_PL_HERE_QX
674 || initStyle
== SCE_PL_FORMAT
675 || initStyle
== SCE_PL_HERE_QQ_VAR
676 || initStyle
== SCE_PL_HERE_QX_VAR
678 // backtrack through multiple styles to reach the delimiter start
679 int delim
= (initStyle
== SCE_PL_FORMAT
) ? SCE_PL_FORMAT_IDENT
:SCE_PL_HERE_DELIM
;
680 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != delim
)) {
683 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
684 initStyle
= styler
.StyleAt(startPos
- 1);
686 if (initStyle
== SCE_PL_STRING
687 || initStyle
== SCE_PL_STRING_QQ
688 || initStyle
== SCE_PL_BACKTICKS
689 || initStyle
== SCE_PL_STRING_QX
690 || initStyle
== SCE_PL_REGEX
691 || initStyle
== SCE_PL_STRING_QR
692 || initStyle
== SCE_PL_REGSUBST
693 || initStyle
== SCE_PL_STRING_VAR
694 || initStyle
== SCE_PL_STRING_QQ_VAR
695 || initStyle
== SCE_PL_BACKTICKS_VAR
696 || initStyle
== SCE_PL_STRING_QX_VAR
697 || initStyle
== SCE_PL_REGEX_VAR
698 || initStyle
== SCE_PL_STRING_QR_VAR
699 || initStyle
== SCE_PL_REGSUBST_VAR
701 // for interpolation, must backtrack through a mix of two different styles
702 int otherStyle
= (initStyle
>= SCE_PL_STRING_VAR
) ?
703 initStyle
- INTERPOLATE_SHIFT
: initStyle
+ INTERPOLATE_SHIFT
;
704 while (startPos
> 1) {
705 int st
= styler
.StyleAt(startPos
- 1);
706 if ((st
!= initStyle
) && (st
!= otherStyle
))
710 initStyle
= SCE_PL_DEFAULT
;
711 } else if (initStyle
== SCE_PL_STRING_Q
712 || initStyle
== SCE_PL_STRING_QW
713 || initStyle
== SCE_PL_XLAT
714 || initStyle
== SCE_PL_CHARACTER
715 || initStyle
== SCE_PL_NUMBER
716 || initStyle
== SCE_PL_IDENTIFIER
717 || initStyle
== SCE_PL_ERROR
718 || initStyle
== SCE_PL_SUB_PROTOTYPE
720 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
723 initStyle
= SCE_PL_DEFAULT
;
724 } else if (initStyle
== SCE_PL_POD
725 || initStyle
== SCE_PL_POD_VERB
727 // POD backtracking finds preceding blank lines and goes back past them
728 Sci_Position ln
= styler
.GetLine(startPos
);
730 initStyle
= styler
.StyleAt(styler
.LineStart(--ln
));
731 if (initStyle
== SCE_PL_POD
|| initStyle
== SCE_PL_POD_VERB
) {
732 while (ln
> 0 && styler
.GetLineState(ln
) == SCE_PL_DEFAULT
)
735 startPos
= styler
.LineStart(++ln
);
736 initStyle
= styler
.StyleAt(startPos
- 1);
739 initStyle
= SCE_PL_DEFAULT
;
743 // backFlag, backPos are additional state to aid identifier corner cases.
744 // Look backwards past whitespace and comments in order to detect either
745 // operator or keyword. Later updated as we go along.
746 int backFlag
= BACK_NONE
;
747 Sci_PositionU backPos
= startPos
;
750 skipWhitespaceComment(styler
, backPos
);
751 if (styler
.StyleAt(backPos
) == SCE_PL_OPERATOR
)
752 backFlag
= BACK_OPERATOR
;
753 else if (styler
.StyleAt(backPos
) == SCE_PL_WORD
)
754 backFlag
= BACK_KEYWORD
;
758 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
, static_cast<char>(STYLE_MAX
));
760 for (; sc
.More(); sc
.Forward()) {
762 // Determine if the current state should terminate.
764 case SCE_PL_OPERATOR
:
765 sc
.SetState(SCE_PL_DEFAULT
);
766 backFlag
= BACK_OPERATOR
;
767 backPos
= sc
.currentPos
;
769 case SCE_PL_IDENTIFIER
: // identifier, bareword, inputsymbol
770 if ((!setWord
.Contains(sc
.ch
) && sc
.ch
!= '\'')
771 || sc
.Match('.', '.')
772 || sc
.chPrev
== '>') { // end of inputsymbol
773 sc
.SetState(SCE_PL_DEFAULT
);
776 case SCE_PL_WORD
: // keyword, plus special cases
777 if (!setWord
.Contains(sc
.ch
)) {
779 sc
.GetCurrent(s
, sizeof(s
));
780 if ((strcmp(s
, "__DATA__") == 0) || (strcmp(s
, "__END__") == 0)) {
781 sc
.ChangeState(SCE_PL_DATASECTION
);
783 if ((strcmp(s
, "format") == 0)) {
784 sc
.SetState(SCE_PL_FORMAT_IDENT
);
787 sc
.SetState(SCE_PL_DEFAULT
);
789 backFlag
= BACK_KEYWORD
;
790 backPos
= sc
.currentPos
;
797 case SCE_PL_SYMBOLTABLE
:
798 if (sc
.Match(':', ':')) { // skip ::
800 } else if (!setVar
.Contains(sc
.ch
)) {
801 if (sc
.LengthCurrent() == 1) {
802 // Special variable: $(, $_ etc.
805 sc
.SetState(SCE_PL_DEFAULT
);
809 // if no early break, number style is terminated at "(go through)"
811 if (sc
.chNext
== '.') {
812 // double dot is always an operator (go through)
813 } else if (numState
<= PERLNUM_FLOAT_EXP
) {
814 // non-decimal number or float exponent, consume next dot
815 sc
.SetState(SCE_PL_OPERATOR
);
817 } else { // decimal or vectors allows dots
819 if (numState
== PERLNUM_DECIMAL
) {
820 if (dotCount
<= 1) // number with one dot in it
822 if (IsADigit(sc
.chNext
)) { // really a vector
823 numState
= PERLNUM_VECTOR
;
826 // number then dot (go through)
827 } else if (numState
== PERLNUM_HEX
) {
828 if (dotCount
<= 1 && IsADigit(sc
.chNext
, 16)) {
829 break; // hex with one dot is a hex float
831 sc
.SetState(SCE_PL_OPERATOR
);
834 // hex then dot (go through)
835 } else if (IsADigit(sc
.chNext
)) // vectors
837 // vector then dot (go through)
839 } else if (sc
.ch
== '_') {
840 // permissive underscoring for number and vector literals
842 } else if (numState
== PERLNUM_DECIMAL
) {
843 if (sc
.ch
== 'E' || sc
.ch
== 'e') { // exponent, sign
844 numState
= PERLNUM_FLOAT_EXP
;
845 if (sc
.chNext
== '+' || sc
.chNext
== '-') {
849 } else if (IsADigit(sc
.ch
))
851 // number then word (go through)
852 } else if (numState
== PERLNUM_HEX
) {
853 if (sc
.ch
== 'P' || sc
.ch
== 'p') { // hex float exponent, sign
854 numState
= PERLNUM_FLOAT_EXP
;
855 if (sc
.chNext
== '+' || sc
.chNext
== '-') {
859 } else if (IsADigit(sc
.ch
, 16))
861 // hex or hex float then word (go through)
862 } else if (numState
== PERLNUM_VECTOR
|| numState
== PERLNUM_V_VECTOR
) {
863 if (IsADigit(sc
.ch
)) // vector
865 if (setWord
.Contains(sc
.ch
) && dotCount
== 0) { // change to word
866 sc
.ChangeState(SCE_PL_IDENTIFIER
);
869 // vector then word (go through)
870 } else if (IsADigit(sc
.ch
)) {
871 if (numState
== PERLNUM_FLOAT_EXP
) {
873 } else if (numState
== PERLNUM_OCTAL
) {
874 if (sc
.ch
<= '7') break;
875 } else if (numState
== PERLNUM_BINARY
) {
876 if (sc
.ch
<= '1') break;
878 // mark invalid octal, binary numbers (go through)
879 numState
= PERLNUM_BAD
;
882 // complete current number or vector
883 sc
.ChangeState(actualNumStyle(numState
));
884 sc
.SetState(SCE_PL_DEFAULT
);
886 case SCE_PL_COMMENTLINE
:
888 sc
.SetState(SCE_PL_DEFAULT
);
891 case SCE_PL_HERE_DELIM
:
892 if (HereDoc
.State
== 0) { // '<<' encountered
893 int delim_ch
= sc
.chNext
;
894 Sci_Position ws_skip
= 0;
895 HereDoc
.State
= 1; // pre-init HERE doc class
896 HereDoc
.Quote
= sc
.chNext
;
897 HereDoc
.Quoted
= false;
898 HereDoc
.DelimiterLength
= 0;
899 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
900 if (IsASpaceOrTab(delim_ch
)) {
901 // skip whitespace; legal only for quoted delimiters
902 Sci_PositionU i
= sc
.currentPos
+ 1;
903 while ((i
< endPos
) && IsASpaceOrTab(delim_ch
)) {
905 delim_ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(i
));
907 ws_skip
= i
- sc
.currentPos
- 1;
909 if (delim_ch
== '\'' || delim_ch
== '"' || delim_ch
== '`') {
910 // a quoted here-doc delimiter; skip any whitespace
911 sc
.Forward(ws_skip
+ 1);
912 HereDoc
.Quote
= delim_ch
;
913 HereDoc
.Quoted
= true;
914 } else if ((ws_skip
== 0 && setNonHereDoc
.Contains(sc
.chNext
))
916 // left shift << or <<= operator cases
917 // restore position if operator
918 sc
.ChangeState(SCE_PL_OPERATOR
);
919 sc
.ForwardSetState(SCE_PL_DEFAULT
);
920 backFlag
= BACK_OPERATOR
;
921 backPos
= sc
.currentPos
;
924 // specially handle initial '\' for identifier
925 if (ws_skip
== 0 && HereDoc
.Quote
== '\\')
927 // an unquoted here-doc delimiter, no special handling
928 // (cannot be prefixed by spaces/tabs), or
929 // symbols terminates; deprecated zero-length delimiter
931 } else if (HereDoc
.State
== 1) { // collect the delimiter
932 backFlag
= BACK_NONE
;
933 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
934 if (sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
935 sc
.ForwardSetState(SCE_PL_DEFAULT
);
936 } else if (!sc
.atLineEnd
) {
937 if (sc
.Match('\\', static_cast<char>(HereDoc
.Quote
))) { // escaped quote
940 if (sc
.ch
!= '\r') { // skip CR if CRLF
941 int i
= 0; // else append char, possibly an extended char
942 while (i
< sc
.width
) {
943 HereDoc
.Append(static_cast<unsigned char>(styler
.SafeGetCharAt(sc
.currentPos
+ i
)));
948 } else { // an unquoted here-doc delimiter, no extended charsets
949 if (setHereDocDelim
.Contains(sc
.ch
)) {
950 HereDoc
.Append(sc
.ch
);
952 sc
.SetState(SCE_PL_DEFAULT
);
955 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
956 sc
.SetState(SCE_PL_ERROR
);
964 // also implies HereDoc.State == 2
966 if (HereDoc
.DelimiterLength
== 0 || sc
.Match(HereDoc
.Delimiter
)) {
967 int c
= sc
.GetRelative(HereDoc
.DelimiterLength
);
968 if (c
== '\r' || c
== '\n') { // peek first, do not consume match
969 sc
.ForwardBytes(HereDoc
.DelimiterLength
);
970 sc
.SetState(SCE_PL_DEFAULT
);
971 backFlag
= BACK_NONE
;
978 if (sc
.state
== SCE_PL_HERE_Q
) { // \EOF and 'EOF' non-interpolated
979 while (!sc
.atLineEnd
)
983 while (!sc
.atLineEnd
) { // "EOF" and `EOF` interpolated
984 int c
, sLen
= 0, endType
= 0;
985 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
986 // scan to break string into segments
989 } else if (c
== '\r' || c
== '\n') {
994 if (sLen
> 0) // process non-empty segments
995 InterpolateSegment(sc
, sLen
);
998 // \ at end-of-line does not appear to have any effect, skip
999 if (sc
.ch
!= '\r' && sc
.ch
!= '\n')
1001 } else if (endType
== 2) {
1008 case SCE_PL_POD_VERB
: {
1009 Sci_PositionU fw
= sc
.currentPos
;
1010 Sci_Position ln
= styler
.GetLine(fw
);
1011 if (sc
.atLineStart
&& sc
.Match("=cut")) { // end of POD
1012 sc
.SetState(SCE_PL_POD
);
1014 sc
.SetState(SCE_PL_DEFAULT
);
1015 styler
.SetLineState(ln
, SCE_PL_POD
);
1018 int pod
= podLineScan(styler
, fw
, endPos
); // classify POD line
1019 styler
.SetLineState(ln
, pod
);
1020 if (pod
== SCE_PL_DEFAULT
) {
1021 if (sc
.state
== SCE_PL_POD_VERB
) {
1022 Sci_PositionU fw2
= fw
;
1023 while (fw2
< (endPos
- 1) && pod
== SCE_PL_DEFAULT
) {
1024 fw
= fw2
++; // penultimate line (last blank line)
1025 pod
= podLineScan(styler
, fw2
, endPos
);
1026 styler
.SetLineState(styler
.GetLine(fw2
), pod
);
1028 if (pod
== SCE_PL_POD
) { // truncate verbatim POD early
1029 sc
.SetState(SCE_PL_POD
);
1034 if (pod
== SCE_PL_POD_VERB
// still part of current paragraph
1035 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD
)) {
1037 styler
.SetLineState(ln
, pod
);
1038 } else if (pod
== SCE_PL_POD
1039 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD_VERB
)) {
1040 pod
= SCE_PL_POD_VERB
;
1041 styler
.SetLineState(ln
, pod
);
1045 sc
.ForwardBytes(fw
- sc
.currentPos
); // commit style
1049 case SCE_PL_STRING_QR
:
1050 if (Quote
.Rep
<= 0) {
1051 if (!setModifiers
.Contains(sc
.ch
))
1052 sc
.SetState(SCE_PL_DEFAULT
);
1053 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
1056 int c
, sLen
= 0, endType
= 0;
1057 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1058 // scan to break string into segments
1061 } else if (c
== '\\' && Quote
.Up
!= '\\') {
1063 } else if (c
== Quote
.Down
) {
1065 if (Quote
.Count
== 0) {
1069 } else if (c
== Quote
.Up
)
1073 if (sLen
> 0) { // process non-empty segments
1074 if (Quote
.Up
!= '\'') {
1075 InterpolateSegment(sc
, sLen
, true);
1076 } else // non-interpolated path
1083 case SCE_PL_REGSUBST
:
1085 if (Quote
.Rep
<= 0) {
1086 if (!setModifiers
.Contains(sc
.ch
))
1087 sc
.SetState(SCE_PL_DEFAULT
);
1088 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
1091 int c
, sLen
= 0, endType
= 0;
1092 bool isPattern
= (Quote
.Rep
== 2);
1093 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1094 // scan to break string into segments
1095 if (c
== '\\' && Quote
.Up
!= '\\') {
1097 } else if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
1098 // We matched something like s(...) or tr{...}, Perl 5.10
1099 // appears to allow almost any character for use as the
1100 // next delimiters. Whitespace and comments are accepted in
1101 // between, but we'll limit to whitespace here.
1102 // For '#', if no whitespace in between, it's a delimiter.
1105 } else if (c
== '#' && IsASpaceOrTab(sc
.GetRelativeCharacter(sLen
- 1))) {
1110 } else if (c
== Quote
.Down
) {
1112 if (Quote
.Count
== 0) {
1116 if (Quote
.Up
== Quote
.Down
)
1120 } else if (c
== Quote
.Up
) {
1122 } else if (IsASpace(c
))
1126 if (sLen
> 0) { // process non-empty segments
1127 if (sc
.state
== SCE_PL_REGSUBST
&& Quote
.Up
!= '\'') {
1128 InterpolateSegment(sc
, sLen
, isPattern
);
1129 } else // non-interpolated path
1134 } else if (endType
== 3)
1135 sc
.SetState(SCE_PL_DEFAULT
);
1138 case SCE_PL_STRING_Q
:
1139 case SCE_PL_STRING_QQ
:
1140 case SCE_PL_STRING_QX
:
1141 case SCE_PL_STRING_QW
:
1143 case SCE_PL_CHARACTER
:
1144 case SCE_PL_BACKTICKS
:
1145 if (!Quote
.Down
&& !IsASpace(sc
.ch
)) {
1148 int c
, sLen
= 0, endType
= 0;
1149 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1150 // scan to break string into segments
1153 } else if (c
== '\\' && Quote
.Up
!= '\\') {
1155 } else if (c
== Quote
.Down
) {
1157 if (Quote
.Count
== 0) {
1160 } else if (c
== Quote
.Up
)
1164 if (sLen
> 0) { // process non-empty segments
1167 case SCE_PL_STRING_QQ
:
1168 case SCE_PL_BACKTICKS
:
1169 InterpolateSegment(sc
, sLen
);
1171 case SCE_PL_STRING_QX
:
1172 if (Quote
.Up
!= '\'') {
1173 InterpolateSegment(sc
, sLen
);
1176 // (continued for ' delim)
1177 default: // non-interpolated path
1183 } else if (endType
== 3)
1184 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1187 case SCE_PL_SUB_PROTOTYPE
: {
1189 // forward scan; must all be valid proto characters
1190 while (setSubPrototype
.Contains(sc
.GetRelative(i
)))
1192 if (sc
.GetRelative(i
) == ')') { // valid sub prototype
1194 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1196 // abandon prototype, restart from '('
1197 sc
.ChangeState(SCE_PL_OPERATOR
);
1198 sc
.SetState(SCE_PL_DEFAULT
);
1202 case SCE_PL_FORMAT
: {
1204 if (sc
.Match('.')) {
1206 if (sc
.atLineEnd
|| ((sc
.ch
== '\r' && sc
.chNext
== '\n')))
1207 sc
.SetState(SCE_PL_DEFAULT
);
1209 while (!sc
.atLineEnd
)
1216 // Needed for specific continuation styles (one follows the other)
1218 // continued from SCE_PL_WORD
1219 case SCE_PL_FORMAT_IDENT
:
1220 // occupies HereDoc state 3 to avoid clashing with HERE docs
1221 if (IsASpaceOrTab(sc
.ch
)) { // skip whitespace
1222 sc
.ChangeState(SCE_PL_DEFAULT
);
1223 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1225 sc
.SetState(SCE_PL_FORMAT_IDENT
);
1227 if (setFormatStart
.Contains(sc
.ch
)) { // identifier or '='
1231 } while (setFormat
.Contains(sc
.ch
));
1233 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1236 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1239 // invalid identifier; inexact fallback, but hey
1240 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1241 sc
.SetState(SCE_PL_DEFAULT
);
1244 sc
.ChangeState(SCE_PL_DEFAULT
); // invalid identifier
1246 backFlag
= BACK_NONE
;
1250 // Must check end of HereDoc states here before default state is handled
1251 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
1252 // Begin of here-doc (the line after the here-doc delimiter):
1253 // Lexically, the here-doc starts from the next line after the >>, but the
1254 // first line of here-doc seem to follow the style of the last EOL sequence
1255 int st_new
= SCE_PL_HERE_QQ
;
1257 if (HereDoc
.Quoted
) {
1258 if (sc
.state
== SCE_PL_HERE_DELIM
) {
1259 // Missing quote at end of string! We are stricter than perl.
1260 // Colour here-doc anyway while marking this bit as an error.
1261 sc
.ChangeState(SCE_PL_ERROR
);
1263 switch (HereDoc
.Quote
) {
1265 st_new
= SCE_PL_HERE_Q
;
1268 st_new
= SCE_PL_HERE_QQ
;
1271 st_new
= SCE_PL_HERE_QX
;
1275 if (HereDoc
.Quote
== '\\')
1276 st_new
= SCE_PL_HERE_Q
;
1278 sc
.SetState(st_new
);
1280 if (HereDoc
.State
== 3 && sc
.atLineEnd
) {
1281 // Start of format body.
1283 sc
.SetState(SCE_PL_FORMAT
);
1286 // Determine if a new state should be entered.
1287 if (sc
.state
== SCE_PL_DEFAULT
) {
1288 if (IsADigit(sc
.ch
) ||
1289 (IsADigit(sc
.chNext
) && (sc
.ch
== '.' || sc
.ch
== 'v'))) {
1290 sc
.SetState(SCE_PL_NUMBER
);
1291 backFlag
= BACK_NONE
;
1292 numState
= PERLNUM_DECIMAL
;
1294 if (sc
.ch
== '0') { // hex,bin,octal
1295 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
1296 numState
= PERLNUM_HEX
;
1297 } else if (sc
.chNext
== 'b' || sc
.chNext
== 'B') {
1298 numState
= PERLNUM_BINARY
;
1299 } else if (IsADigit(sc
.chNext
)) {
1300 numState
= PERLNUM_OCTAL
;
1302 if (numState
!= PERLNUM_DECIMAL
) {
1305 } else if (sc
.ch
== 'v') { // vector
1306 numState
= PERLNUM_V_VECTOR
;
1308 } else if (setWord
.Contains(sc
.ch
)) {
1309 // if immediately prefixed by '::', always a bareword
1310 sc
.SetState(SCE_PL_WORD
);
1311 if (sc
.chPrev
== ':' && sc
.GetRelative(-2) == ':') {
1312 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1314 Sci_PositionU bk
= sc
.currentPos
;
1315 Sci_PositionU fw
= sc
.currentPos
+ 1;
1316 // first check for possible quote-like delimiter
1317 if (sc
.ch
== 's' && !setWord
.Contains(sc
.chNext
)) {
1318 sc
.ChangeState(SCE_PL_REGSUBST
);
1320 } else if (sc
.ch
== 'm' && !setWord
.Contains(sc
.chNext
)) {
1321 sc
.ChangeState(SCE_PL_REGEX
);
1323 } else if (sc
.ch
== 'q' && !setWord
.Contains(sc
.chNext
)) {
1324 sc
.ChangeState(SCE_PL_STRING_Q
);
1326 } else if (sc
.ch
== 'y' && !setWord
.Contains(sc
.chNext
)) {
1327 sc
.ChangeState(SCE_PL_XLAT
);
1329 } else if (sc
.Match('t', 'r') && !setWord
.Contains(sc
.GetRelative(2))) {
1330 sc
.ChangeState(SCE_PL_XLAT
);
1334 } else if (sc
.ch
== 'q' && setQDelim
.Contains(sc
.chNext
)
1335 && !setWord
.Contains(sc
.GetRelative(2))) {
1336 if (sc
.chNext
== 'q') sc
.ChangeState(SCE_PL_STRING_QQ
);
1337 else if (sc
.chNext
== 'x') sc
.ChangeState(SCE_PL_STRING_QX
);
1338 else if (sc
.chNext
== 'r') sc
.ChangeState(SCE_PL_STRING_QR
);
1339 else sc
.ChangeState(SCE_PL_STRING_QW
); // sc.chNext == 'w'
1343 } else if (sc
.ch
== 'x' && (sc
.chNext
== '=' || // repetition
1344 !setWord
.Contains(sc
.chNext
) ||
1345 (setRepetition
.Contains(sc
.chPrev
) && IsADigit(sc
.chNext
)))) {
1346 sc
.ChangeState(SCE_PL_OPERATOR
);
1348 // if potentially a keyword, scan forward and grab word, then check
1349 // if it's really one; if yes, disambiguation test is performed
1350 // otherwise it is always a bareword and we skip a lot of scanning
1351 if (sc
.state
== SCE_PL_WORD
) {
1352 while (setWord
.Contains(static_cast<unsigned char>(styler
.SafeGetCharAt(fw
))))
1354 if (!isPerlKeyword(styler
.GetStartSegment(), fw
, keywords
, styler
)) {
1355 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1358 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1359 // for quote-like delimiters/keywords, attempt to disambiguate
1360 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1361 if (sc
.state
!= SCE_PL_IDENTIFIER
&& bk
> 0) {
1362 if (disambiguateBareword(styler
, bk
, fw
, backFlag
, backPos
, endPos
))
1363 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1365 backFlag
= BACK_NONE
;
1366 } else if (sc
.ch
== '#') {
1367 sc
.SetState(SCE_PL_COMMENTLINE
);
1368 } else if (sc
.ch
== '\"') {
1369 sc
.SetState(SCE_PL_STRING
);
1372 backFlag
= BACK_NONE
;
1373 } else if (sc
.ch
== '\'') {
1374 if (sc
.chPrev
== '&' && setWordStart
.Contains(sc
.chNext
)) {
1376 sc
.SetState(SCE_PL_IDENTIFIER
);
1378 sc
.SetState(SCE_PL_CHARACTER
);
1382 backFlag
= BACK_NONE
;
1383 } else if (sc
.ch
== '`') {
1384 sc
.SetState(SCE_PL_BACKTICKS
);
1387 backFlag
= BACK_NONE
;
1388 } else if (sc
.ch
== '$') {
1389 sc
.SetState(SCE_PL_SCALAR
);
1390 if (sc
.chNext
== '{') {
1391 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1392 } else if (IsASpace(sc
.chNext
)) {
1393 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1396 if (sc
.Match('`', '`') || sc
.Match(':', ':')) {
1400 backFlag
= BACK_NONE
;
1401 } else if (sc
.ch
== '@') {
1402 sc
.SetState(SCE_PL_ARRAY
);
1403 if (setArray
.Contains(sc
.chNext
)) {
1404 // no special treatment
1405 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1407 } else if (sc
.chNext
== '{' || sc
.chNext
== '[') {
1408 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1410 sc
.ChangeState(SCE_PL_OPERATOR
);
1412 backFlag
= BACK_NONE
;
1413 } else if (setPreferRE
.Contains(sc
.ch
)) {
1414 // Explicit backward peeking to set a consistent preferRE for
1415 // any slash found, so no longer need to track preferRE state.
1416 // Find first previous significant lexed element and interpret.
1417 // A few symbols shares this code for disambiguation.
1418 bool preferRE
= false;
1419 bool isHereDoc
= sc
.Match('<', '<');
1420 bool hereDocSpace
= false; // for: SCALAR [whitespace] '<<'
1421 Sci_PositionU bk
= (sc
.currentPos
> 0) ? sc
.currentPos
- 1: 0;
1424 if (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
)
1425 hereDocSpace
= true;
1426 skipWhitespaceComment(styler
, bk
);
1428 // avoid backward scanning breakage
1431 int bkstyle
= styler
.StyleAt(bk
);
1432 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
1434 case SCE_PL_OPERATOR
:
1436 if (bkch
== ')' || bkch
== ']') {
1438 } else if (bkch
== '}') {
1439 // backtrack by counting balanced brace pairs
1440 // needed to test for variables like ${}, @{} etc.
1441 bkstyle
= styleBeforeBracePair(styler
, bk
);
1442 if (bkstyle
== SCE_PL_SCALAR
1443 || bkstyle
== SCE_PL_ARRAY
1444 || bkstyle
== SCE_PL_HASH
1445 || bkstyle
== SCE_PL_SYMBOLTABLE
1446 || bkstyle
== SCE_PL_OPERATOR
) {
1449 } else if (bkch
== '+' || bkch
== '-') {
1450 if (bkch
== static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 1))
1451 && bkch
!= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 2)))
1452 // exceptions for operators: unary suffixes ++, --
1456 case SCE_PL_IDENTIFIER
:
1458 bkstyle
= styleCheckIdentifier(styler
, bk
);
1459 if ((bkstyle
== 1) || (bkstyle
== 2)) {
1460 // inputsymbol or var with "->" or "::" before identifier
1462 } else if (bkstyle
== 3) {
1463 // bare identifier, test cases follows:
1465 // if '/', /PATTERN/ unless digit/space immediately after '/'
1466 // if '//', always expect defined-or operator to follow identifier
1467 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1469 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1470 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1472 } else if (sc
.ch
== '<') {
1473 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1478 case SCE_PL_SCALAR
: // for $var<< case:
1479 if (isHereDoc
&& hereDocSpace
) // if SCALAR whitespace '<<', *always* a HERE doc
1484 // for HERE docs, always true
1486 // adopt heuristics similar to vim-style rules:
1487 // keywords always forced as /PATTERN/: split, if, elsif, while
1488 // everything else /PATTERN/ unless digit/space immediately after '/'
1489 // for '//', defined-or favoured unless special keywords
1490 Sci_PositionU bkend
= bk
+ 1;
1491 while (bk
> 0 && styler
.StyleAt(bk
- 1) == SCE_PL_WORD
) {
1494 if (isPerlKeyword(bk
, bkend
, reWords
, styler
))
1496 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1498 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1499 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1501 } else if (sc
.ch
== '<') {
1502 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1507 // other styles uses the default, preferRE=false
1510 case SCE_PL_HERE_QQ
:
1511 case SCE_PL_HERE_QX
:
1516 backFlag
= BACK_NONE
;
1517 if (isHereDoc
) { // handle '<<', HERE doc
1518 if (sc
.Match("<<>>")) { // double-diamond operator (5.22)
1519 sc
.SetState(SCE_PL_OPERATOR
);
1521 } else if (preferRE
) {
1522 sc
.SetState(SCE_PL_HERE_DELIM
);
1524 } else { // << operator
1525 sc
.SetState(SCE_PL_OPERATOR
);
1528 } else if (sc
.ch
== '*') { // handle '*', typeglob
1530 sc
.SetState(SCE_PL_SYMBOLTABLE
);
1531 if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1533 } else if (sc
.chNext
== '{') {
1534 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1539 sc
.SetState(SCE_PL_OPERATOR
);
1540 if (sc
.chNext
== '*') // exponentiation
1543 } else if (sc
.ch
== '%') { // handle '%', hash
1545 sc
.SetState(SCE_PL_HASH
);
1546 if (setHash
.Contains(sc
.chNext
)) {
1548 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1550 } else if (sc
.chNext
== '{') {
1551 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1553 sc
.ChangeState(SCE_PL_OPERATOR
);
1556 sc
.SetState(SCE_PL_OPERATOR
);
1558 } else if (sc
.ch
== '<') { // handle '<', inputsymbol
1561 int i
= InputSymbolScan(sc
);
1563 sc
.SetState(SCE_PL_IDENTIFIER
);
1566 sc
.SetState(SCE_PL_OPERATOR
);
1569 sc
.SetState(SCE_PL_OPERATOR
);
1571 } else { // handle '/', regexp
1573 sc
.SetState(SCE_PL_REGEX
);
1576 } else { // / and // operators
1577 sc
.SetState(SCE_PL_OPERATOR
);
1578 if (sc
.chNext
== '/') {
1583 } else if (sc
.ch
== '=' // POD
1584 && setPOD
.Contains(sc
.chNext
)
1585 && sc
.atLineStart
) {
1586 sc
.SetState(SCE_PL_POD
);
1587 backFlag
= BACK_NONE
;
1588 } else if (sc
.ch
== '-' && setWordStart
.Contains(sc
.chNext
)) { // extended '-' cases
1589 Sci_PositionU bk
= sc
.currentPos
;
1590 Sci_PositionU fw
= 2;
1591 if (setSingleCharOp
.Contains(sc
.chNext
) && // file test operators
1592 !setWord
.Contains(sc
.GetRelative(2))) {
1593 sc
.SetState(SCE_PL_WORD
);
1595 // nominally a minus and bareword; find extent of bareword
1596 while (setWord
.Contains(sc
.GetRelative(fw
)))
1598 sc
.SetState(SCE_PL_OPERATOR
);
1600 // force to bareword for hash key => or {variable literal} cases
1601 if (disambiguateBareword(styler
, bk
, bk
+ fw
, backFlag
, backPos
, endPos
) & 2) {
1602 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1604 backFlag
= BACK_NONE
;
1605 } else if (sc
.ch
== '(' && sc
.currentPos
> 0) { // '(' or subroutine prototype
1607 if (styleCheckSubPrototype(styler
, sc
.currentPos
- 1)) {
1608 sc
.SetState(SCE_PL_SUB_PROTOTYPE
);
1609 backFlag
= BACK_NONE
;
1611 sc
.SetState(SCE_PL_OPERATOR
);
1613 } else if (setPerlOperator
.Contains(sc
.ch
)) { // operators
1614 sc
.SetState(SCE_PL_OPERATOR
);
1615 if (sc
.Match('.', '.')) { // .. and ...
1617 if (sc
.chNext
== '.') sc
.Forward();
1619 } else if (sc
.ch
== 4 || sc
.ch
== 26) { // ^D and ^Z ends valid perl source
1620 sc
.SetState(SCE_PL_DATASECTION
);
1622 // keep colouring defaults
1628 if (sc
.state
== SCE_PL_HERE_Q
1629 || sc
.state
== SCE_PL_HERE_QQ
1630 || sc
.state
== SCE_PL_HERE_QX
1631 || sc
.state
== SCE_PL_FORMAT
) {
1632 styler
.ChangeLexerState(sc
.currentPos
, styler
.Length());
1637 #define PERL_HEADFOLD_SHIFT 4
1638 #define PERL_HEADFOLD_MASK 0xF0
1640 void SCI_METHOD
LexerPerl::Fold(Sci_PositionU startPos
, Sci_Position length
, int /* initStyle */, IDocument
*pAccess
) {
1645 LexAccessor
styler(pAccess
);
1647 Sci_PositionU endPos
= startPos
+ length
;
1648 int visibleChars
= 0;
1649 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
1651 // Backtrack to previous line in case need to fix its fold status
1653 if (lineCurrent
> 0) {
1655 startPos
= styler
.LineStart(lineCurrent
);
1659 int levelPrev
= SC_FOLDLEVELBASE
;
1660 if (lineCurrent
> 0)
1661 levelPrev
= styler
.LevelAt(lineCurrent
- 1) >> 16;
1662 int levelCurrent
= levelPrev
;
1663 char chNext
= styler
[startPos
];
1664 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
1665 int styleNext
= styler
.StyleAt(startPos
);
1666 // Used at end of line to determine if the line was a package definition
1667 bool isPackageLine
= false;
1669 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
1671 chNext
= styler
.SafeGetCharAt(i
+ 1);
1672 int style
= styleNext
;
1673 styleNext
= styler
.StyleAt(i
+ 1);
1674 int stylePrevCh
= (i
) ? styler
.StyleAt(i
- 1):SCE_PL_DEFAULT
;
1675 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1676 bool atLineStart
= ((chPrev
== '\r') || (chPrev
== '\n')) || i
== 0;
1678 if (options
.foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
)) {
1679 if (!IsCommentLine(lineCurrent
- 1, styler
)
1680 && IsCommentLine(lineCurrent
+ 1, styler
))
1682 else if (IsCommentLine(lineCurrent
- 1, styler
)
1683 && !IsCommentLine(lineCurrent
+ 1, styler
))
1686 // {} [] block folding
1687 if (style
== SCE_PL_OPERATOR
) {
1689 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1692 } else if (ch
== '}') {
1696 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1699 } else if (ch
== ']') {
1704 if (options
.foldPOD
&& atLineStart
) {
1705 if (style
== SCE_PL_POD
) {
1706 if (stylePrevCh
!= SCE_PL_POD
&& stylePrevCh
!= SCE_PL_POD_VERB
)
1708 else if (styler
.Match(i
, "=cut"))
1709 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1710 else if (styler
.Match(i
, "=head"))
1711 podHeading
= PodHeadingLevel(i
, styler
);
1712 } else if (style
== SCE_PL_DATASECTION
) {
1713 if (ch
== '=' && IsASCII(chNext
) && isalpha(chNext
) && levelCurrent
== SC_FOLDLEVELBASE
)
1715 else if (styler
.Match(i
, "=cut") && levelCurrent
> SC_FOLDLEVELBASE
)
1716 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1717 else if (styler
.Match(i
, "=head"))
1718 podHeading
= PodHeadingLevel(i
, styler
);
1719 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1720 // reset needed as level test is vs. SC_FOLDLEVELBASE
1721 else if (stylePrevCh
!= SCE_PL_DATASECTION
)
1722 levelCurrent
= SC_FOLDLEVELBASE
;
1726 if (options
.foldPackage
&& atLineStart
) {
1727 if (IsPackageLine(lineCurrent
, styler
)
1728 && !IsPackageLine(lineCurrent
+ 1, styler
))
1729 isPackageLine
= true;
1734 case SCE_PL_HERE_QQ
:
1735 case SCE_PL_HERE_Q
:
1736 case SCE_PL_HERE_QX
:
1737 switch (stylePrevCh
) {
1738 case SCE_PL_HERE_QQ
:
1739 case SCE_PL_HERE_Q
:
1740 case SCE_PL_HERE_QX
:
1749 switch (stylePrevCh
) {
1750 case SCE_PL_HERE_QQ
:
1751 case SCE_PL_HERE_Q
:
1752 case SCE_PL_HERE_QX
:
1763 if (options
.foldCommentExplicit
&& style
== SCE_PL_COMMENTLINE
&& ch
== '#') {
1764 if (chNext
== '{') {
1766 } else if (levelCurrent
> SC_FOLDLEVELBASE
&& chNext
== '}') {
1772 int lev
= levelPrev
;
1773 // POD headings occupy bits 7-4, leaving some breathing room for
1774 // non-standard practice -- POD sections stuck in blocks, etc.
1775 if (podHeading
> 0) {
1776 levelCurrent
= (lev
& ~PERL_HEADFOLD_MASK
) | (podHeading
<< PERL_HEADFOLD_SHIFT
);
1777 lev
= levelCurrent
- 1;
1778 lev
|= SC_FOLDLEVELHEADERFLAG
;
1781 // Check if line was a package declaration
1782 // because packages need "special" treatment
1783 if (isPackageLine
) {
1784 lev
= SC_FOLDLEVELBASE
| SC_FOLDLEVELHEADERFLAG
;
1785 levelCurrent
= SC_FOLDLEVELBASE
+ 1;
1786 isPackageLine
= false;
1788 lev
|= levelCurrent
<< 16;
1789 if (visibleChars
== 0 && options
.foldCompact
)
1790 lev
|= SC_FOLDLEVELWHITEFLAG
;
1791 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1792 lev
|= SC_FOLDLEVELHEADERFLAG
;
1793 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1794 styler
.SetLevel(lineCurrent
, lev
);
1797 levelPrev
= levelCurrent
;
1800 if (!isspacechar(ch
))
1804 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1805 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
1806 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
1809 LexerModule
lmPerl(SCLEX_PERL
, LexerPerl::LexerFactoryPerl
, "perl", perlWordListDesc
);