scintilla: Update scintilla with changeset 3662:1d1c06df8a2f using gtk+3
[anjuta-extras.git] / plugins / scintilla / scintilla / LexPerl.cxx
blob7f0cbcf6278482d6b3e5e06674395894d4fa906d
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for Perl.
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
17 #ifdef _MSC_VER
18 #pragma warning(disable: 4786)
19 #endif
21 #include <string>
22 #include <map>
24 #include "ILexer.h"
25 #include "Scintilla.h"
26 #include "SciLexer.h"
28 #include "WordList.h"
29 #include "LexAccessor.h"
30 #include "StyleContext.h"
31 #include "CharacterSet.h"
32 #include "LexerModule.h"
33 #include "OptionSet.h"
35 #ifdef SCI_NAMESPACE
36 using namespace Scintilla;
37 #endif
39 // Info for HERE document handling from perldata.pod (reformatted):
40 // ----------------------------------------------------------------
41 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
42 // Following a << you specify a string to terminate the quoted material, and
43 // all lines following the current line down to the terminating string are
44 // the value of the item.
45 // * The terminating string may be either an identifier (a word), or some
46 // quoted text.
47 // * If quoted, the type of quotes you use determines the treatment of the
48 // text, just as in regular quoting.
49 // * An unquoted identifier works like double quotes.
50 // * There must be no space between the << and the identifier.
51 // (If you put a space it will be treated as a null identifier,
52 // which is valid, and matches the first empty line.)
53 // (This is deprecated, -w warns of this syntax)
54 // * The terminating string must appear by itself (unquoted and
55 // with no surrounding whitespace) on the terminating line.
57 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
59 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
60 #define PERLNUM_HEX 2
61 #define PERLNUM_OCTAL 3
62 #define PERLNUM_FLOAT_EXP 4 // exponent part only
63 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
64 #define PERLNUM_VECTOR 6
65 #define PERLNUM_V_VECTOR 7
66 #define PERLNUM_BAD 8
68 #define BACK_NONE 0 // lookback state for bareword disambiguation:
69 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
70 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
73 // old-style keyword matcher; needed because GetCurrent() needs
74 // current segment to be committed, but we may abandon early...
75 char s[100];
76 unsigned int i, len = end - start;
77 if (len > 30) { len = 30; }
78 for (i = 0; i < len; i++, start++) s[i] = styler[start];
79 s[i] = '\0';
80 return keywords.InList(s);
83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
84 int backFlag, unsigned int backPos, unsigned int endPos) {
85 // identifiers are recognized by Perl as barewords under some
86 // conditions, the following attempts to do the disambiguation
87 // by looking backward and forward; result in 2 LSB
88 int result = 0;
89 bool moreback = false; // true if passed newline/comments
90 bool brace = false; // true if opening brace found
91 // if BACK_NONE, neither operator nor keyword, so skip test
92 if (backFlag == BACK_NONE)
93 return result;
94 // first look backwards past whitespace/comments to set EOL flag
95 // (some disambiguation patterns must be on a single line)
96 if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
97 moreback = true;
98 // look backwards at last significant lexed item for disambiguation
99 bk = backPos - 1;
100 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
101 if (ch == '{' && !moreback) {
102 // {bareword: possible variable spec
103 brace = true;
104 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
105 // &bareword: subroutine call
106 || styler.Match(bk - 1, "->")
107 // ->bareword: part of variable spec
108 || styler.Match(bk - 2, "sub")) {
109 // sub bareword: subroutine declaration
110 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
111 result |= 1;
113 // next, scan forward after word past tab/spaces only;
114 // if ch isn't one of '[{(,' we can skip the test
115 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
116 && fw < endPos) {
117 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
118 IsASpaceOrTab(ch) && fw < endPos) {
119 fw++;
121 if ((ch == '}' && brace)
122 // {bareword}: variable spec
123 || styler.Match(fw, "=>")) {
124 // [{(, bareword=>: hash literal
125 result |= 2;
128 return result;
131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
132 // when backtracking, we need to skip whitespace and comments
133 int style;
134 while ((p > 0) && (style = styler.StyleAt(p),
135 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
136 p--;
139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
140 // backtrack to find open '{' corresponding to a '}', balanced
141 // return significant style to be tested for '/' disambiguation
142 int braceCount = 1;
143 if (bk == 0)
144 return SCE_PL_DEFAULT;
145 while (--bk > 0) {
146 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
147 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
148 if (bkch == ';') { // early out
149 break;
150 } else if (bkch == '}') {
151 braceCount++;
152 } else if (bkch == '{') {
153 if (--braceCount == 0) break;
157 if (bk > 0 && braceCount == 0) {
158 // balanced { found, bk > 0, skip more whitespace/comments
159 bk--;
160 skipWhitespaceComment(styler, bk);
161 return styler.StyleAt(bk);
163 return SCE_PL_DEFAULT;
166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
167 // backtrack to classify sub-styles of identifier under test
168 // return sub-style to be tested for '/' disambiguation
169 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
170 return 1;
171 // backtrack to check for possible "->" or "::" before identifier
172 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
173 bk--;
175 while (bk > 0) {
176 int bkstyle = styler.StyleAt(bk);
177 if (bkstyle == SCE_PL_DEFAULT
178 || bkstyle == SCE_PL_COMMENTLINE) {
179 // skip whitespace, comments
180 } else if (bkstyle == SCE_PL_OPERATOR) {
181 // test for "->" and "::"
182 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
183 return 2;
184 } else
185 return 3; // bare identifier
186 bk--;
188 return 0;
191 static int inputsymbolScan(LexAccessor &styler, unsigned int pos, unsigned int endPos) {
192 // looks forward for matching > on same line; a bit ugly
193 unsigned int fw = pos;
194 while (++fw < endPos) {
195 int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
196 if (fwch == '\r' || fwch == '\n') {
197 return 0;
198 } else if (fwch == '>') {
199 if (styler.Match(fw - 2, "<=>")) // '<=>' case
200 return 0;
201 return fw - pos;
204 return 0;
207 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
208 // forward scan the current line to classify line for POD style
209 int state = -1;
210 while (pos <= endPos) {
211 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
212 if (ch == '\n' || ch == '\r' || pos >= endPos) {
213 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
214 break;
216 if (IsASpaceOrTab(ch)) { // whitespace, take note
217 if (state == -1)
218 state = SCE_PL_DEFAULT;
219 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
220 state = SCE_PL_POD_VERB;
221 } else if (state != SCE_PL_POD_VERB) { // regular POD line
222 state = SCE_PL_POD;
224 pos++;
226 if (state == -1)
227 state = SCE_PL_DEFAULT;
228 return state;
231 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
232 // backtrack to identify if we're starting a subroutine prototype
233 // we also need to ignore whitespace/comments:
234 // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
235 styler.Flush();
236 skipWhitespaceComment(styler, bk);
237 if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
238 return false;
239 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
240 bk--;
242 skipWhitespaceComment(styler, bk);
243 if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
244 || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
245 return false;
246 return true;
249 static bool isMatch(const char *sref, char *s) {
250 // match per-line delimiter - must kill trailing CR if CRLF
251 int i = strlen(s);
252 if (i != 0 && s[i - 1] == '\r')
253 s[i - 1] = '\0';
254 return (strcmp(sref, s) == 0);
257 static int actualNumStyle(int numberStyle) {
258 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
259 return SCE_PL_STRING;
260 } else if (numberStyle == PERLNUM_BAD) {
261 return SCE_PL_ERROR;
263 return SCE_PL_NUMBER;
266 static int opposite(int ch) {
267 if (ch == '(') return ')';
268 if (ch == '[') return ']';
269 if (ch == '{') return '}';
270 if (ch == '<') return '>';
271 return ch;
274 static bool IsCommentLine(int line, LexAccessor &styler) {
275 int pos = styler.LineStart(line);
276 int eol_pos = styler.LineStart(line + 1) - 1;
277 for (int i = pos; i < eol_pos; i++) {
278 char ch = styler[i];
279 int style = styler.StyleAt(i);
280 if (ch == '#' && style == SCE_PL_COMMENTLINE)
281 return true;
282 else if (!IsASpaceOrTab(ch))
283 return false;
285 return false;
288 static bool IsPackageLine(int line, LexAccessor &styler) {
289 int pos = styler.LineStart(line);
290 int style = styler.StyleAt(pos);
291 if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
292 return true;
294 return false;
297 static int PodHeadingLevel(int pos, LexAccessor &styler) {
298 int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
299 if (lvl >= '1' && lvl <= '4') {
300 return lvl - '0';
302 return 0;
305 // An individual named option for use in an OptionSet
307 // Options used for LexerPerl
308 struct OptionsPerl {
309 bool fold;
310 bool foldComment;
311 bool foldCompact;
312 // Custom folding of POD and packages
313 bool foldPOD; // fold.perl.pod
314 // Enable folding Pod blocks when using the Perl lexer.
315 bool foldPackage; // fold.perl.package
316 // Enable folding packages when using the Perl lexer.
318 bool foldCommentExplicit;
320 bool foldAtElse;
322 OptionsPerl() {
323 fold = false;
324 foldComment = false;
325 foldCompact = true;
326 foldPOD = true;
327 foldPackage = true;
328 foldCommentExplicit = true;
329 foldAtElse = false;
333 static const char *const perlWordListDesc[] = {
334 "Keywords",
338 struct OptionSetPerl : public OptionSet<OptionsPerl> {
339 OptionSetPerl() {
340 DefineProperty("fold", &OptionsPerl::fold);
342 DefineProperty("fold.comment", &OptionsPerl::foldComment);
344 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
346 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
347 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
349 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
350 "Set to 0 to disable folding packages when using the Perl lexer.");
352 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
353 "Set to 0 to disable explicit folding.");
355 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
356 "This option enables Perl folding on a \"} else {\" line of an if statement.");
358 DefineWordListSets(perlWordListDesc);
362 class LexerPerl : public ILexer {
363 WordList keywords;
364 OptionsPerl options;
365 OptionSetPerl osPerl;
366 public:
367 LexerPerl() {
369 ~LexerPerl() {
371 void SCI_METHOD Release() {
372 delete this;
374 int SCI_METHOD Version() const {
375 return lvOriginal;
377 const char *SCI_METHOD PropertyNames() {
378 return osPerl.PropertyNames();
380 int SCI_METHOD PropertyType(const char *name) {
381 return osPerl.PropertyType(name);
383 const char *SCI_METHOD DescribeProperty(const char *name) {
384 return osPerl.DescribeProperty(name);
386 int SCI_METHOD PropertySet(const char *key, const char *val);
387 const char *SCI_METHOD DescribeWordListSets() {
388 return osPerl.DescribeWordListSets();
390 int SCI_METHOD WordListSet(int n, const char *wl);
391 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
392 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
394 void *SCI_METHOD PrivateCall(int, void *) {
395 return 0;
398 static ILexer *LexerFactoryPerl() {
399 return new LexerPerl();
403 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
404 if (osPerl.PropertySet(&options, key, val)) {
405 return 0;
407 return -1;
410 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
411 WordList *wordListN = 0;
412 switch (n) {
413 case 0:
414 wordListN = &keywords;
415 break;
417 int firstModification = -1;
418 if (wordListN) {
419 WordList wlNew;
420 wlNew.Set(wl);
421 if (*wordListN != wlNew) {
422 wordListN->Set(wl);
423 firstModification = 0;
426 return firstModification;
429 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
430 LexAccessor styler(pAccess);
432 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
433 WordList reWords;
434 reWords.Set("elsif if split while");
436 // charset classes
437 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
438 CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
439 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
440 // lexing of "%*</" operators is non-trivial; these are missing in the set below
441 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
442 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
443 CharacterSet setModifiers(CharacterSet::setAlpha);
444 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
445 // setArray and setHash also accepts chars for special vars like $_,
446 // which are then truncated when the next char does not match setVar
447 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
448 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
449 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
450 CharacterSet &setPOD = setModifiers;
451 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
452 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
453 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];");
454 // for format identifiers
455 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
456 CharacterSet &setFormat = setHereDocDelim;
458 // Lexer for perl often has to backtrack to start of current style to determine
459 // which characters are being used as quotes, how deeply nested is the
460 // start position and what the termination string is for HERE documents.
462 class HereDocCls { // Class to manage HERE doc sequence
463 public:
464 int State;
465 // 0: '<<' encountered
466 // 1: collect the delimiter
467 // 2: here doc text (lines after the delimiter)
468 int Quote; // the char after '<<'
469 bool Quoted; // true if Quote in ('\'','"','`')
470 int DelimiterLength; // strlen(Delimiter)
471 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
472 HereDocCls() {
473 State = 0;
474 Quote = 0;
475 Quoted = false;
476 DelimiterLength = 0;
477 Delimiter = new char[HERE_DELIM_MAX];
478 Delimiter[0] = '\0';
480 void Append(int ch) {
481 Delimiter[DelimiterLength++] = static_cast<char>(ch);
482 Delimiter[DelimiterLength] = '\0';
484 ~HereDocCls() {
485 delete []Delimiter;
488 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
490 class QuoteCls { // Class to manage quote pairs
491 public:
492 int Rep;
493 int Count;
494 int Up, Down;
495 QuoteCls() {
496 this->New(1);
498 void New(int r = 1) {
499 Rep = r;
500 Count = 0;
501 Up = '\0';
502 Down = '\0';
504 void Open(int u) {
505 Count++;
506 Up = u;
507 Down = opposite(Up);
510 QuoteCls Quote;
512 // additional state for number lexing
513 int numState = PERLNUM_DECIMAL;
514 int dotCount = 0;
516 unsigned int endPos = startPos + length;
518 // Backtrack to beginning of style if required...
519 // If in a long distance lexical state, backtrack to find quote characters.
520 // Includes strings (may be multi-line), numbers (additional state), format
521 // bodies, as well as POD sections.
522 if (initStyle == SCE_PL_HERE_Q
523 || initStyle == SCE_PL_HERE_QQ
524 || initStyle == SCE_PL_HERE_QX
525 || initStyle == SCE_PL_FORMAT
527 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
528 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
529 startPos--;
531 startPos = styler.LineStart(styler.GetLine(startPos));
532 initStyle = styler.StyleAt(startPos - 1);
534 if (initStyle == SCE_PL_STRING_Q
535 || initStyle == SCE_PL_STRING_QQ
536 || initStyle == SCE_PL_STRING_QX
537 || initStyle == SCE_PL_STRING_QR
538 || initStyle == SCE_PL_STRING_QW
539 || initStyle == SCE_PL_REGEX
540 || initStyle == SCE_PL_REGSUBST
541 || initStyle == SCE_PL_STRING
542 || initStyle == SCE_PL_BACKTICKS
543 || initStyle == SCE_PL_CHARACTER
544 || initStyle == SCE_PL_NUMBER
545 || initStyle == SCE_PL_IDENTIFIER
546 || initStyle == SCE_PL_ERROR
547 || initStyle == SCE_PL_SUB_PROTOTYPE
549 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
550 startPos--;
552 initStyle = SCE_PL_DEFAULT;
553 } else if (initStyle == SCE_PL_POD
554 || initStyle == SCE_PL_POD_VERB
556 // POD backtracking finds preceeding blank lines and goes back past them
557 int ln = styler.GetLine(startPos);
558 if (ln > 0) {
559 initStyle = styler.StyleAt(styler.LineStart(--ln));
560 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
561 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
562 ln--;
564 startPos = styler.LineStart(++ln);
565 initStyle = styler.StyleAt(startPos - 1);
566 } else {
567 startPos = 0;
568 initStyle = SCE_PL_DEFAULT;
572 // backFlag, backPos are additional state to aid identifier corner cases.
573 // Look backwards past whitespace and comments in order to detect either
574 // operator or keyword. Later updated as we go along.
575 int backFlag = BACK_NONE;
576 unsigned int backPos = startPos;
577 if (backPos > 0) {
578 backPos--;
579 skipWhitespaceComment(styler, backPos);
580 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
581 backFlag = BACK_OPERATOR;
582 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
583 backFlag = BACK_KEYWORD;
584 backPos++;
587 StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
589 for (; sc.More(); sc.Forward()) {
591 // Determine if the current state should terminate.
592 switch (sc.state) {
593 case SCE_PL_OPERATOR:
594 sc.SetState(SCE_PL_DEFAULT);
595 backFlag = BACK_OPERATOR;
596 backPos = sc.currentPos;
597 break;
598 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
599 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
600 || sc.Match('.', '.')
601 || sc.chPrev == '>') { // end of inputsymbol
602 sc.SetState(SCE_PL_DEFAULT);
604 break;
605 case SCE_PL_WORD: // keyword, plus special cases
606 if (!setWord.Contains(sc.ch)) {
607 char s[100];
608 sc.GetCurrent(s, sizeof(s));
609 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
610 sc.ChangeState(SCE_PL_DATASECTION);
611 } else {
612 if ((strcmp(s, "format") == 0)) {
613 sc.SetState(SCE_PL_FORMAT_IDENT);
614 HereDoc.State = 0;
615 } else {
616 sc.SetState(SCE_PL_DEFAULT);
618 backFlag = BACK_KEYWORD;
619 backPos = sc.currentPos;
622 break;
623 case SCE_PL_SCALAR:
624 case SCE_PL_ARRAY:
625 case SCE_PL_HASH:
626 case SCE_PL_SYMBOLTABLE:
627 if (sc.Match(':', ':')) { // skip ::
628 sc.Forward();
629 } else if (!setVar.Contains(sc.ch)) {
630 if (sc.LengthCurrent() == 1) {
631 // Special variable: $(, $_ etc.
632 sc.Forward();
634 sc.SetState(SCE_PL_DEFAULT);
636 break;
637 case SCE_PL_NUMBER:
638 // if no early break, number style is terminated at "(go through)"
639 if (sc.ch == '.') {
640 if (sc.chNext == '.') {
641 // double dot is always an operator (go through)
642 } else if (numState <= PERLNUM_FLOAT_EXP) {
643 // non-decimal number or float exponent, consume next dot
644 sc.SetState(SCE_PL_OPERATOR);
645 break;
646 } else { // decimal or vectors allows dots
647 dotCount++;
648 if (numState == PERLNUM_DECIMAL) {
649 if (dotCount <= 1) // number with one dot in it
650 break;
651 if (IsADigit(sc.chNext)) { // really a vector
652 numState = PERLNUM_VECTOR;
653 break;
655 // number then dot (go through)
656 } else if (IsADigit(sc.chNext)) // vectors
657 break;
658 // vector then dot (go through)
660 } else if (sc.ch == '_') {
661 // permissive underscoring for number and vector literals
662 break;
663 } else if (numState == PERLNUM_DECIMAL) {
664 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
665 numState = PERLNUM_FLOAT_EXP;
666 if (sc.chNext == '+' || sc.chNext == '-') {
667 sc.Forward();
669 break;
670 } else if (IsADigit(sc.ch))
671 break;
672 // number then word (go through)
673 } else if (numState == PERLNUM_HEX) {
674 if (IsADigit(sc.ch, 16))
675 break;
676 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
677 if (IsADigit(sc.ch)) // vector
678 break;
679 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
680 sc.ChangeState(SCE_PL_IDENTIFIER);
681 break;
683 // vector then word (go through)
684 } else if (IsADigit(sc.ch)) {
685 if (numState == PERLNUM_FLOAT_EXP) {
686 break;
687 } else if (numState == PERLNUM_OCTAL) {
688 if (sc.ch <= '7') break;
689 } else if (numState == PERLNUM_BINARY) {
690 if (sc.ch <= '1') break;
692 // mark invalid octal, binary numbers (go through)
693 numState = PERLNUM_BAD;
694 break;
696 // complete current number or vector
697 sc.ChangeState(actualNumStyle(numState));
698 sc.SetState(SCE_PL_DEFAULT);
699 break;
700 case SCE_PL_COMMENTLINE:
701 if (sc.atLineEnd) {
702 sc.SetState(SCE_PL_DEFAULT);
704 break;
705 case SCE_PL_HERE_DELIM:
706 if (HereDoc.State == 0) { // '<<' encountered
707 int delim_ch = sc.chNext;
708 int ws_skip = 0;
709 HereDoc.State = 1; // pre-init HERE doc class
710 HereDoc.Quote = sc.chNext;
711 HereDoc.Quoted = false;
712 HereDoc.DelimiterLength = 0;
713 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
714 if (IsASpaceOrTab(delim_ch)) {
715 // skip whitespace; legal only for quoted delimiters
716 unsigned int i = sc.currentPos + 1;
717 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
718 i++;
719 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
721 ws_skip = i - sc.currentPos - 1;
723 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
724 // a quoted here-doc delimiter; skip any whitespace
725 sc.Forward(ws_skip + 1);
726 HereDoc.Quote = delim_ch;
727 HereDoc.Quoted = true;
728 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
729 || ws_skip > 0) {
730 // left shift << or <<= operator cases
731 // restore position if operator
732 sc.ChangeState(SCE_PL_OPERATOR);
733 sc.ForwardSetState(SCE_PL_DEFAULT);
734 backFlag = BACK_OPERATOR;
735 backPos = sc.currentPos;
736 HereDoc.State = 0;
737 } else {
738 // specially handle initial '\' for identifier
739 if (ws_skip == 0 && HereDoc.Quote == '\\')
740 sc.Forward();
741 // an unquoted here-doc delimiter, no special handling
742 // (cannot be prefixed by spaces/tabs), or
743 // symbols terminates; deprecated zero-length delimiter
745 } else if (HereDoc.State == 1) { // collect the delimiter
746 backFlag = BACK_NONE;
747 if (HereDoc.Quoted) { // a quoted here-doc delimiter
748 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
749 sc.ForwardSetState(SCE_PL_DEFAULT);
750 } else if (!sc.atLineEnd) {
751 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
752 sc.Forward();
754 if (sc.ch != '\r') { // skip CR if CRLF
755 HereDoc.Append(sc.ch);
758 } else { // an unquoted here-doc delimiter
759 if (setHereDocDelim.Contains(sc.ch)) {
760 HereDoc.Append(sc.ch);
761 } else {
762 sc.SetState(SCE_PL_DEFAULT);
765 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
766 sc.SetState(SCE_PL_ERROR);
767 HereDoc.State = 0;
770 break;
771 case SCE_PL_HERE_Q:
772 case SCE_PL_HERE_QQ:
773 case SCE_PL_HERE_QX: {
774 // also implies HereDoc.State == 2
775 sc.Complete();
776 while (!sc.atLineEnd)
777 sc.Forward();
778 char s[HERE_DELIM_MAX];
779 sc.GetCurrent(s, sizeof(s));
780 if (isMatch(HereDoc.Delimiter, s)) {
781 sc.SetState(SCE_PL_DEFAULT);
782 backFlag = BACK_NONE;
783 HereDoc.State = 0;
786 break;
787 case SCE_PL_POD:
788 case SCE_PL_POD_VERB: {
789 unsigned int fw = sc.currentPos;
790 int ln = styler.GetLine(fw);
791 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
792 sc.SetState(SCE_PL_POD);
793 sc.Forward(4);
794 sc.SetState(SCE_PL_DEFAULT);
795 styler.SetLineState(ln, SCE_PL_POD);
796 break;
798 int pod = podLineScan(styler, fw, endPos); // classify POD line
799 styler.SetLineState(ln, pod);
800 if (pod == SCE_PL_DEFAULT) {
801 if (sc.state == SCE_PL_POD_VERB) {
802 unsigned int fw2 = fw;
803 while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
804 fw = fw2++; // penultimate line (last blank line)
805 pod = podLineScan(styler, fw2, endPos);
806 styler.SetLineState(styler.GetLine(fw2), pod);
808 if (pod == SCE_PL_POD) { // truncate verbatim POD early
809 sc.SetState(SCE_PL_POD);
810 } else
811 fw = fw2;
812 } else
813 pod = SCE_PL_POD;
814 } else {
815 if (pod == SCE_PL_POD_VERB // still part of current paragraph
816 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
817 pod = SCE_PL_POD;
818 styler.SetLineState(ln, pod);
819 } else if (pod == SCE_PL_POD
820 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
821 pod = SCE_PL_POD_VERB;
822 styler.SetLineState(ln, pod);
824 sc.SetState(pod);
826 sc.Forward(fw - sc.currentPos); // commit style
828 break;
829 case SCE_PL_REGEX:
830 case SCE_PL_STRING_QR:
831 if (Quote.Rep <= 0) {
832 if (!setModifiers.Contains(sc.ch))
833 sc.SetState(SCE_PL_DEFAULT);
834 } else if (!Quote.Up && !IsASpace(sc.ch)) {
835 Quote.Open(sc.ch);
836 } else if (sc.ch == '\\' && Quote.Up != '\\') {
837 sc.Forward();
838 } else if (sc.ch == Quote.Down) {
839 Quote.Count--;
840 if (Quote.Count == 0)
841 Quote.Rep--;
842 } else if (sc.ch == Quote.Up) {
843 Quote.Count++;
845 break;
846 case SCE_PL_REGSUBST:
847 if (Quote.Rep <= 0) {
848 if (!setModifiers.Contains(sc.ch))
849 sc.SetState(SCE_PL_DEFAULT);
850 } else if (!Quote.Up && !IsASpace(sc.ch)) {
851 Quote.Open(sc.ch);
852 } else if (sc.ch == '\\' && Quote.Up != '\\') {
853 sc.Forward();
854 } else if (Quote.Count == 0 && Quote.Rep == 1) {
855 // We matched something like s(...) or tr{...}, Perl 5.10
856 // appears to allow almost any character for use as the
857 // next delimiters. Whitespace and comments are accepted in
858 // between, but we'll limit to whitespace here.
859 // For '#', if no whitespace in between, it's a delimiter.
860 if (IsASpace(sc.ch)) {
861 // Keep going
862 } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
863 sc.SetState(SCE_PL_DEFAULT);
864 } else {
865 Quote.Open(sc.ch);
867 } else if (sc.ch == Quote.Down) {
868 Quote.Count--;
869 if (Quote.Count == 0)
870 Quote.Rep--;
871 if (Quote.Up == Quote.Down)
872 Quote.Count++;
873 } else if (sc.ch == Quote.Up) {
874 Quote.Count++;
876 break;
877 case SCE_PL_STRING_Q:
878 case SCE_PL_STRING_QQ:
879 case SCE_PL_STRING_QX:
880 case SCE_PL_STRING_QW:
881 case SCE_PL_STRING:
882 case SCE_PL_CHARACTER:
883 case SCE_PL_BACKTICKS:
884 if (!Quote.Down && !IsASpace(sc.ch)) {
885 Quote.Open(sc.ch);
886 } else if (sc.ch == '\\' && Quote.Up != '\\') {
887 sc.Forward();
888 } else if (sc.ch == Quote.Down) {
889 Quote.Count--;
890 if (Quote.Count == 0)
891 sc.ForwardSetState(SCE_PL_DEFAULT);
892 } else if (sc.ch == Quote.Up) {
893 Quote.Count++;
895 break;
896 case SCE_PL_SUB_PROTOTYPE: {
897 int i = 0;
898 // forward scan; must all be valid proto characters
899 while (setSubPrototype.Contains(sc.GetRelative(i)))
900 i++;
901 if (sc.GetRelative(i) == ')') { // valid sub prototype
902 sc.Forward(i);
903 sc.ForwardSetState(SCE_PL_DEFAULT);
904 } else {
905 // abandon prototype, restart from '('
906 sc.ChangeState(SCE_PL_OPERATOR);
907 sc.SetState(SCE_PL_DEFAULT);
910 break;
911 case SCE_PL_FORMAT: {
912 sc.Complete();
913 while (!sc.atLineEnd)
914 sc.Forward();
915 char s[10];
916 sc.GetCurrent(s, sizeof(s));
917 if (isMatch(".", s))
918 sc.SetState(SCE_PL_DEFAULT);
920 break;
921 case SCE_PL_ERROR:
922 break;
924 // Needed for specific continuation styles (one follows the other)
925 switch (sc.state) {
926 // continued from SCE_PL_WORD
927 case SCE_PL_FORMAT_IDENT:
928 // occupies HereDoc state 3 to avoid clashing with HERE docs
929 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
930 sc.ChangeState(SCE_PL_DEFAULT);
931 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
932 sc.Forward();
933 sc.SetState(SCE_PL_FORMAT_IDENT);
935 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
936 if (sc.ch != '=') {
937 do {
938 sc.Forward();
939 } while (setFormat.Contains(sc.ch));
941 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
942 sc.Forward();
943 if (sc.ch == '=') {
944 sc.ForwardSetState(SCE_PL_DEFAULT);
945 HereDoc.State = 3;
946 } else {
947 // invalid indentifier; inexact fallback, but hey
948 sc.ChangeState(SCE_PL_IDENTIFIER);
949 sc.SetState(SCE_PL_DEFAULT);
951 } else {
952 sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
954 backFlag = BACK_NONE;
955 break;
958 // Must check end of HereDoc states here before default state is handled
959 if (HereDoc.State == 1 && sc.atLineEnd) {
960 // Begin of here-doc (the line after the here-doc delimiter):
961 // Lexically, the here-doc starts from the next line after the >>, but the
962 // first line of here-doc seem to follow the style of the last EOL sequence
963 int st_new = SCE_PL_HERE_QQ;
964 HereDoc.State = 2;
965 if (HereDoc.Quoted) {
966 if (sc.state == SCE_PL_HERE_DELIM) {
967 // Missing quote at end of string! We are stricter than perl.
968 // Colour here-doc anyway while marking this bit as an error.
969 sc.ChangeState(SCE_PL_ERROR);
971 switch (HereDoc.Quote) {
972 case '\'':
973 st_new = SCE_PL_HERE_Q ;
974 break;
975 case '"' :
976 st_new = SCE_PL_HERE_QQ;
977 break;
978 case '`' :
979 st_new = SCE_PL_HERE_QX;
980 break;
982 } else {
983 if (HereDoc.Quote == '\\')
984 st_new = SCE_PL_HERE_Q;
986 sc.SetState(st_new);
988 if (HereDoc.State == 3 && sc.atLineEnd) {
989 // Start of format body.
990 HereDoc.State = 0;
991 sc.SetState(SCE_PL_FORMAT);
994 // Determine if a new state should be entered.
995 if (sc.state == SCE_PL_DEFAULT) {
996 if (IsADigit(sc.ch) ||
997 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
998 sc.SetState(SCE_PL_NUMBER);
999 backFlag = BACK_NONE;
1000 numState = PERLNUM_DECIMAL;
1001 dotCount = 0;
1002 if (sc.ch == '0') { // hex,bin,octal
1003 if (sc.chNext == 'x') {
1004 numState = PERLNUM_HEX;
1005 } else if (sc.chNext == 'b') {
1006 numState = PERLNUM_BINARY;
1007 } else if (IsADigit(sc.chNext)) {
1008 numState = PERLNUM_OCTAL;
1010 if (numState != PERLNUM_DECIMAL) {
1011 sc.Forward();
1013 } else if (sc.ch == 'v') { // vector
1014 numState = PERLNUM_V_VECTOR;
1016 } else if (setWord.Contains(sc.ch)) {
1017 // if immediately prefixed by '::', always a bareword
1018 sc.SetState(SCE_PL_WORD);
1019 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1020 sc.ChangeState(SCE_PL_IDENTIFIER);
1022 unsigned int bk = sc.currentPos;
1023 unsigned int fw = sc.currentPos + 1;
1024 // first check for possible quote-like delimiter
1025 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1026 sc.ChangeState(SCE_PL_REGSUBST);
1027 Quote.New(2);
1028 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1029 sc.ChangeState(SCE_PL_REGEX);
1030 Quote.New();
1031 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1032 sc.ChangeState(SCE_PL_STRING_Q);
1033 Quote.New();
1034 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1035 sc.ChangeState(SCE_PL_REGSUBST);
1036 Quote.New(2);
1037 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1038 sc.ChangeState(SCE_PL_REGSUBST);
1039 Quote.New(2);
1040 sc.Forward();
1041 fw++;
1042 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1043 && !setWord.Contains(sc.GetRelative(2))) {
1044 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1045 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1046 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1047 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1048 Quote.New();
1049 sc.Forward();
1050 fw++;
1051 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1052 !setWord.Contains(sc.chNext) ||
1053 (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1054 sc.ChangeState(SCE_PL_OPERATOR);
1056 // if potentially a keyword, scan forward and grab word, then check
1057 // if it's really one; if yes, disambiguation test is performed
1058 // otherwise it is always a bareword and we skip a lot of scanning
1059 if (sc.state == SCE_PL_WORD) {
1060 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1061 fw++;
1062 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1063 sc.ChangeState(SCE_PL_IDENTIFIER);
1066 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1067 // for quote-like delimiters/keywords, attempt to disambiguate
1068 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1069 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1070 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1071 sc.ChangeState(SCE_PL_IDENTIFIER);
1073 backFlag = BACK_NONE;
1074 } else if (sc.ch == '#') {
1075 sc.SetState(SCE_PL_COMMENTLINE);
1076 } else if (sc.ch == '\"') {
1077 sc.SetState(SCE_PL_STRING);
1078 Quote.New();
1079 Quote.Open(sc.ch);
1080 backFlag = BACK_NONE;
1081 } else if (sc.ch == '\'') {
1082 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1083 // Archaic call
1084 sc.SetState(SCE_PL_IDENTIFIER);
1085 } else {
1086 sc.SetState(SCE_PL_CHARACTER);
1087 Quote.New();
1088 Quote.Open(sc.ch);
1090 backFlag = BACK_NONE;
1091 } else if (sc.ch == '`') {
1092 sc.SetState(SCE_PL_BACKTICKS);
1093 Quote.New();
1094 Quote.Open(sc.ch);
1095 backFlag = BACK_NONE;
1096 } else if (sc.ch == '$') {
1097 sc.SetState(SCE_PL_SCALAR);
1098 if (sc.chNext == '{') {
1099 sc.ForwardSetState(SCE_PL_OPERATOR);
1100 } else if (IsASpace(sc.chNext)) {
1101 sc.ForwardSetState(SCE_PL_DEFAULT);
1102 } else {
1103 sc.Forward();
1104 if (sc.Match('`', '`') || sc.Match(':', ':')) {
1105 sc.Forward();
1108 backFlag = BACK_NONE;
1109 } else if (sc.ch == '@') {
1110 sc.SetState(SCE_PL_ARRAY);
1111 if (setArray.Contains(sc.chNext)) {
1112 // no special treatment
1113 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1114 sc.Forward(2);
1115 } else if (sc.chNext == '{' || sc.chNext == '[') {
1116 sc.ForwardSetState(SCE_PL_OPERATOR);
1117 } else {
1118 sc.ChangeState(SCE_PL_OPERATOR);
1120 backFlag = BACK_NONE;
1121 } else if (setPreferRE.Contains(sc.ch)) {
1122 // Explicit backward peeking to set a consistent preferRE for
1123 // any slash found, so no longer need to track preferRE state.
1124 // Find first previous significant lexed element and interpret.
1125 // A few symbols shares this code for disambiguation.
1126 bool preferRE = false;
1127 bool isHereDoc = sc.Match('<', '<');
1128 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1129 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1130 unsigned int bkend;
1131 sc.Complete();
1132 styler.Flush();
1133 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1134 hereDocSpace = true;
1135 skipWhitespaceComment(styler, bk);
1136 if (bk == 0) {
1137 // avoid backward scanning breakage
1138 preferRE = true;
1139 } else {
1140 int bkstyle = styler.StyleAt(bk);
1141 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1142 switch (bkstyle) {
1143 case SCE_PL_OPERATOR:
1144 preferRE = true;
1145 if (bkch == ')' || bkch == ']') {
1146 preferRE = false;
1147 } else if (bkch == '}') {
1148 // backtrack by counting balanced brace pairs
1149 // needed to test for variables like ${}, @{} etc.
1150 bkstyle = styleBeforeBracePair(styler, bk);
1151 if (bkstyle == SCE_PL_SCALAR
1152 || bkstyle == SCE_PL_ARRAY
1153 || bkstyle == SCE_PL_HASH
1154 || bkstyle == SCE_PL_SYMBOLTABLE
1155 || bkstyle == SCE_PL_OPERATOR) {
1156 preferRE = false;
1158 } else if (bkch == '+' || bkch == '-') {
1159 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1160 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1161 // exceptions for operators: unary suffixes ++, --
1162 preferRE = false;
1164 break;
1165 case SCE_PL_IDENTIFIER:
1166 preferRE = true;
1167 bkstyle = styleCheckIdentifier(styler, bk);
1168 if ((bkstyle == 1) || (bkstyle == 2)) {
1169 // inputsymbol or var with "->" or "::" before identifier
1170 preferRE = false;
1171 } else if (bkstyle == 3) {
1172 // bare identifier, test cases follows:
1173 if (sc.ch == '/') {
1174 // if '/', /PATTERN/ unless digit/space immediately after '/'
1175 // if '//', always expect defined-or operator to follow identifier
1176 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1177 preferRE = false;
1178 } else if (sc.ch == '*' || sc.ch == '%') {
1179 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1180 preferRE = false;
1181 } else if (sc.ch == '<') {
1182 if (IsASpace(sc.chNext) || sc.chNext == '=')
1183 preferRE = false;
1186 break;
1187 case SCE_PL_SCALAR: // for $var<< case:
1188 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1189 preferRE = true;
1190 break;
1191 case SCE_PL_WORD:
1192 preferRE = true;
1193 // for HERE docs, always true
1194 if (sc.ch == '/') {
1195 // adopt heuristics similar to vim-style rules:
1196 // keywords always forced as /PATTERN/: split, if, elsif, while
1197 // everything else /PATTERN/ unless digit/space immediately after '/'
1198 // for '//', defined-or favoured unless special keywords
1199 bkend = bk + 1;
1200 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1201 bk--;
1203 if (isPerlKeyword(bk, bkend, reWords, styler))
1204 break;
1205 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1206 preferRE = false;
1207 } else if (sc.ch == '*' || sc.ch == '%') {
1208 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1209 preferRE = false;
1210 } else if (sc.ch == '<') {
1211 if (IsASpace(sc.chNext) || sc.chNext == '=')
1212 preferRE = false;
1214 break;
1216 // other styles uses the default, preferRE=false
1217 case SCE_PL_POD:
1218 case SCE_PL_HERE_Q:
1219 case SCE_PL_HERE_QQ:
1220 case SCE_PL_HERE_QX:
1221 preferRE = true;
1222 break;
1225 backFlag = BACK_NONE;
1226 if (isHereDoc) { // handle '<<', HERE doc
1227 if (preferRE) {
1228 sc.SetState(SCE_PL_HERE_DELIM);
1229 HereDoc.State = 0;
1230 } else { // << operator
1231 sc.SetState(SCE_PL_OPERATOR);
1232 sc.Forward();
1234 } else if (sc.ch == '*') { // handle '*', typeglob
1235 if (preferRE) {
1236 sc.SetState(SCE_PL_SYMBOLTABLE);
1237 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1238 sc.Forward(2);
1239 } else if (sc.chNext == '{') {
1240 sc.ForwardSetState(SCE_PL_OPERATOR);
1241 } else {
1242 sc.Forward();
1244 } else {
1245 sc.SetState(SCE_PL_OPERATOR);
1246 if (sc.chNext == '*') // exponentiation
1247 sc.Forward();
1249 } else if (sc.ch == '%') { // handle '%', hash
1250 if (preferRE) {
1251 sc.SetState(SCE_PL_HASH);
1252 if (setHash.Contains(sc.chNext)) {
1253 sc.Forward();
1254 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1255 sc.Forward(2);
1256 } else if (sc.chNext == '{') {
1257 sc.ForwardSetState(SCE_PL_OPERATOR);
1258 } else {
1259 sc.ChangeState(SCE_PL_OPERATOR);
1261 } else {
1262 sc.SetState(SCE_PL_OPERATOR);
1264 } else if (sc.ch == '<') { // handle '<', inputsymbol
1265 if (preferRE) {
1266 // forward scan
1267 int i = inputsymbolScan(styler, sc.currentPos, endPos);
1268 if (i > 0) {
1269 sc.SetState(SCE_PL_IDENTIFIER);
1270 sc.Forward(i);
1271 } else {
1272 sc.SetState(SCE_PL_OPERATOR);
1274 } else {
1275 sc.SetState(SCE_PL_OPERATOR);
1277 } else { // handle '/', regexp
1278 if (preferRE) {
1279 sc.SetState(SCE_PL_REGEX);
1280 Quote.New();
1281 Quote.Open(sc.ch);
1282 } else { // / and // operators
1283 sc.SetState(SCE_PL_OPERATOR);
1284 if (sc.chNext == '/') {
1285 sc.Forward();
1289 } else if (sc.ch == '=' // POD
1290 && setPOD.Contains(sc.chNext)
1291 && sc.atLineStart) {
1292 sc.SetState(SCE_PL_POD);
1293 backFlag = BACK_NONE;
1294 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1295 unsigned int bk = sc.currentPos;
1296 unsigned int fw = 2;
1297 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1298 !setWord.Contains(sc.GetRelative(2))) {
1299 sc.SetState(SCE_PL_WORD);
1300 } else {
1301 // nominally a minus and bareword; find extent of bareword
1302 while (setWord.Contains(sc.GetRelative(fw)))
1303 fw++;
1304 sc.SetState(SCE_PL_OPERATOR);
1306 // force to bareword for hash key => or {variable literal} cases
1307 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1308 sc.ChangeState(SCE_PL_IDENTIFIER);
1310 backFlag = BACK_NONE;
1311 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1312 sc.Complete();
1313 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1314 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1315 backFlag = BACK_NONE;
1316 } else {
1317 sc.SetState(SCE_PL_OPERATOR);
1319 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1320 sc.SetState(SCE_PL_OPERATOR);
1321 if (sc.Match('.', '.')) { // .. and ...
1322 sc.Forward();
1323 if (sc.chNext == '.') sc.Forward();
1325 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1326 sc.SetState(SCE_PL_DATASECTION);
1327 } else {
1328 // keep colouring defaults
1329 sc.Complete();
1333 sc.Complete();
1334 if (sc.state == SCE_PL_HERE_Q
1335 || sc.state == SCE_PL_HERE_QQ
1336 || sc.state == SCE_PL_HERE_QX
1337 || sc.state == SCE_PL_FORMAT) {
1338 styler.ChangeLexerState(sc.currentPos, styler.Length());
1340 sc.Complete();
1343 #define PERL_HEADFOLD_SHIFT 4
1344 #define PERL_HEADFOLD_MASK 0xF0
1346 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1348 if (!options.fold)
1349 return;
1351 LexAccessor styler(pAccess);
1353 unsigned int endPos = startPos + length;
1354 int visibleChars = 0;
1355 int lineCurrent = styler.GetLine(startPos);
1357 // Backtrack to previous line in case need to fix its fold status
1358 if (startPos > 0) {
1359 if (lineCurrent > 0) {
1360 lineCurrent--;
1361 startPos = styler.LineStart(lineCurrent);
1365 int levelPrev = SC_FOLDLEVELBASE;
1366 if (lineCurrent > 0)
1367 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1368 int levelCurrent = levelPrev;
1369 char chNext = styler[startPos];
1370 char chPrev = styler.SafeGetCharAt(startPos - 1);
1371 int styleNext = styler.StyleAt(startPos);
1372 // Used at end of line to determine if the line was a package definition
1373 bool isPackageLine = false;
1374 int podHeading = 0;
1375 for (unsigned int i = startPos; i < endPos; i++) {
1376 char ch = chNext;
1377 chNext = styler.SafeGetCharAt(i + 1);
1378 int style = styleNext;
1379 styleNext = styler.StyleAt(i + 1);
1380 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1381 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1382 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1383 // Comment folding
1384 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1385 if (!IsCommentLine(lineCurrent - 1, styler)
1386 && IsCommentLine(lineCurrent + 1, styler))
1387 levelCurrent++;
1388 else if (IsCommentLine(lineCurrent - 1, styler)
1389 && !IsCommentLine(lineCurrent + 1, styler))
1390 levelCurrent--;
1392 // {} [] block folding
1393 if (style == SCE_PL_OPERATOR) {
1394 if (ch == '{') {
1395 if (options.foldAtElse && levelCurrent < levelPrev)
1396 --levelPrev;
1397 levelCurrent++;
1398 } else if (ch == '}') {
1399 levelCurrent--;
1401 if (ch == '[') {
1402 if (options.foldAtElse && levelCurrent < levelPrev)
1403 --levelPrev;
1404 levelCurrent++;
1405 } else if (ch == ']') {
1406 levelCurrent--;
1409 // POD folding
1410 if (options.foldPOD && atLineStart) {
1411 if (style == SCE_PL_POD) {
1412 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1413 levelCurrent++;
1414 else if (styler.Match(i, "=cut"))
1415 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1416 else if (styler.Match(i, "=head"))
1417 podHeading = PodHeadingLevel(i, styler);
1418 } else if (style == SCE_PL_DATASECTION) {
1419 if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1420 levelCurrent++;
1421 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1422 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1423 else if (styler.Match(i, "=head"))
1424 podHeading = PodHeadingLevel(i, styler);
1425 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1426 // reset needed as level test is vs. SC_FOLDLEVELBASE
1427 else if (stylePrevCh != SCE_PL_DATASECTION)
1428 levelCurrent = SC_FOLDLEVELBASE;
1431 // package folding
1432 if (options.foldPackage && atLineStart) {
1433 if (IsPackageLine(lineCurrent, styler)
1434 && !IsPackageLine(lineCurrent + 1, styler))
1435 isPackageLine = true;
1438 //heredoc folding
1439 switch (style) {
1440 case SCE_PL_HERE_QQ :
1441 case SCE_PL_HERE_Q :
1442 case SCE_PL_HERE_QX :
1443 switch (stylePrevCh) {
1444 case SCE_PL_HERE_QQ :
1445 case SCE_PL_HERE_Q :
1446 case SCE_PL_HERE_QX :
1447 //do nothing;
1448 break;
1449 default :
1450 levelCurrent++;
1451 break;
1453 break;
1454 default:
1455 switch (stylePrevCh) {
1456 case SCE_PL_HERE_QQ :
1457 case SCE_PL_HERE_Q :
1458 case SCE_PL_HERE_QX :
1459 levelCurrent--;
1460 break;
1461 default :
1462 //do nothing;
1463 break;
1465 break;
1468 //explicit folding
1469 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1470 if (chNext == '{') {
1471 levelCurrent++;
1472 } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
1473 levelCurrent--;
1477 if (atEOL) {
1478 int lev = levelPrev;
1479 // POD headings occupy bits 7-4, leaving some breathing room for
1480 // non-standard practice -- POD sections stuck in blocks, etc.
1481 if (podHeading > 0) {
1482 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1483 lev = levelCurrent - 1;
1484 lev |= SC_FOLDLEVELHEADERFLAG;
1485 podHeading = 0;
1487 // Check if line was a package declaration
1488 // because packages need "special" treatment
1489 if (isPackageLine) {
1490 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1491 levelCurrent = SC_FOLDLEVELBASE + 1;
1492 isPackageLine = false;
1494 lev |= levelCurrent << 16;
1495 if (visibleChars == 0 && options.foldCompact)
1496 lev |= SC_FOLDLEVELWHITEFLAG;
1497 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1498 lev |= SC_FOLDLEVELHEADERFLAG;
1499 if (lev != styler.LevelAt(lineCurrent)) {
1500 styler.SetLevel(lineCurrent, lev);
1502 lineCurrent++;
1503 levelPrev = levelCurrent;
1504 visibleChars = 0;
1506 if (!isspacechar(ch))
1507 visibleChars++;
1508 chPrev = ch;
1510 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1511 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1512 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1515 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc, 8);