scintilla/lexers/LexPerl.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPerl.cxx
   3  ** Lexer for Perl.
   4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
   7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "StyleContext.h"
  27 #include "CharacterSet.h"
  28 #include "LexerModule.h"
  29 #include "OptionSet.h"
  30
  31 #ifdef SCI_NAMESPACE
  32 using namespace Scintilla;
  33 #endif
  34
  35 // Info for HERE document handling from perldata.pod (reformatted):
  36 // ----------------------------------------------------------------
  37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
  38 // Following a << you specify a string to terminate the quoted material, and
  39 // all lines following the current line down to the terminating string are
  40 // the value of the item.
  41 // * The terminating string may be either an identifier (a word), or some
  42 //   quoted text.
  43 // * If quoted, the type of quotes you use determines the treatment of the
  44 //   text, just as in regular quoting.
  45 // * An unquoted identifier works like double quotes.
  46 // * There must be no space between the << and the identifier.
  47 //   (If you put a space it will be treated as a null identifier,
  48 //    which is valid, and matches the first empty line.)
  49 //   (This is deprecated, -w warns of this syntax)
  50 // * The terminating string must appear by itself (unquoted and
  51 //   with no surrounding whitespace) on the terminating line.
  52
  53 #define HERE_DELIM_MAX 256              // maximum length of HERE doc delimiter
  54
  55 #define PERLNUM_BINARY          1       // order is significant: 1-3 cannot have a dot
  56 #define PERLNUM_OCTAL           2
  57 #define PERLNUM_FLOAT_EXP       3       // exponent part only
  58 #define PERLNUM_HEX                     4       // may be a hex float
  59 #define PERLNUM_DECIMAL         5       // 1-5 are numbers; 6-7 are strings
  60 #define PERLNUM_VECTOR          6
  61 #define PERLNUM_V_VECTOR        7
  62 #define PERLNUM_BAD                     8
  63
  64 #define BACK_NONE               0       // lookback state for bareword disambiguation:
  65 #define BACK_OPERATOR   1       // whitespace/comments are insignificant
  66 #define BACK_KEYWORD    2       // operators/keywords are needed for disambiguation
  67
  68 #define SUB_BEGIN               0       // states for subroutine prototype scan:
  69 #define SUB_HAS_PROTO   1       // only 'prototype' attribute allows prototypes
  70 #define SUB_HAS_ATTRIB  2       // other attributes can exist leftward
  71 #define SUB_HAS_MODULE  3       // sub name can have a ::identifier part
  72 #define SUB_HAS_SUB             4       // 'sub' keyword
  73
  74 // all interpolated styles are different from their parent styles by a constant difference
  75 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
  76 #define INTERPOLATE_SHIFT       (SCE_PL_STRING_VAR - SCE_PL_STRING)
  77
  78 static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
  79         // old-style keyword matcher; needed because GetCurrent() needs
  80         // current segment to be committed, but we may abandon early...
  81         char s[100];
  82         Sci_PositionU i, len = end - start;
  83         if (len > 30) { len = 30; }
  84         for (i = 0; i < len; i++, start++) s[i] = styler[start];
  85         s[i] = '\0';
  86         return keywords.InList(s);
  87 }
  88
  89 static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
  90         int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
  91         // identifiers are recognized by Perl as barewords under some
  92         // conditions, the following attempts to do the disambiguation
  93         // by looking backward and forward; result in 2 LSB
  94         int result = 0;
  95         bool moreback = false;          // true if passed newline/comments
  96         bool brace = false;                     // true if opening brace found
  97         // if BACK_NONE, neither operator nor keyword, so skip test
  98         if (backFlag == BACK_NONE)
  99                 return result;
 100         // first look backwards past whitespace/comments to set EOL flag
 101         // (some disambiguation patterns must be on a single line)
 102         if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
 103                 moreback = true;
 104         // look backwards at last significant lexed item for disambiguation
 105         bk = backPos - 1;
 106         int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 107         if (ch == '{' && !moreback) {
 108                 // {bareword: possible variable spec
 109                 brace = true;
 110         } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
 111                 // &bareword: subroutine call
 112                 || styler.Match(bk - 1, "->")
 113                 // ->bareword: part of variable spec
 114                 || styler.Match(bk - 1, "::")
 115                 // ::bareword: part of module spec
 116                 || styler.Match(bk - 2, "sub")) {
 117                 // sub bareword: subroutine declaration
 118                 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
 119                 result |= 1;
 120         }
 121         // next, scan forward after word past tab/spaces only;
 122         // if ch isn't one of '[{(,' we can skip the test
 123         if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
 124                 && fw < endPos) {
 125                 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
 126                         IsASpaceOrTab(ch) && fw < endPos) {
 127                         fw++;
 128                 }
 129                 if ((ch == '}' && brace)
 130                         // {bareword}: variable spec
 131                         || styler.Match(fw, "=>")) {
 132                         // [{(, bareword=>: hash literal
 133                         result |= 2;
 134                 }
 135         }
 136         return result;
 137 }
 138
 139 static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
 140         // when backtracking, we need to skip whitespace and comments
 141         int style;
 142         while ((p > 0) && (style = styler.StyleAt(p),
 143                 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
 144                 p--;
 145 }
 146
 147 static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
 148         // scan backward past whitespace and comments to find a lexeme
 149         skipWhitespaceComment(styler, bk);
 150         if (bk == 0)
 151                 return 0;
 152         int sz = 1;
 153         style = styler.StyleAt(bk);
 154         while (bk > 0) {        // find extent of lexeme
 155                 if (styler.StyleAt(bk - 1) == style) {
 156                         bk--; sz++;
 157                 } else
 158                         break;
 159         }
 160         return sz;
 161 }
 162
 163 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
 164         // backtrack to find open '{' corresponding to a '}', balanced
 165         // return significant style to be tested for '/' disambiguation
 166         int braceCount = 1;
 167         if (bk == 0)
 168                 return SCE_PL_DEFAULT;
 169         while (--bk > 0) {
 170                 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
 171                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 172                         if (bkch == ';') {      // early out
 173                                 break;
 174                         } else if (bkch == '}') {
 175                                 braceCount++;
 176                         } else if (bkch == '{') {
 177                                 if (--braceCount == 0) break;
 178                         }
 179                 }
 180         }
 181         if (bk > 0 && braceCount == 0) {
 182                 // balanced { found, bk > 0, skip more whitespace/comments
 183                 bk--;
 184                 skipWhitespaceComment(styler, bk);
 185                 return styler.StyleAt(bk);
 186         }
 187         return SCE_PL_DEFAULT;
 188 }
 189
 190 static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
 191         // backtrack to classify sub-styles of identifier under test
 192         // return sub-style to be tested for '/' disambiguation
 193         if (styler.SafeGetCharAt(bk) == '>')    // inputsymbol, like <foo>
 194                 return 1;
 195         // backtrack to check for possible "->" or "::" before identifier
 196         while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
 197                 bk--;
 198         }
 199         while (bk > 0) {
 200                 int bkstyle = styler.StyleAt(bk);
 201                 if (bkstyle == SCE_PL_DEFAULT
 202                         || bkstyle == SCE_PL_COMMENTLINE) {
 203                         // skip whitespace, comments
 204                 } else if (bkstyle == SCE_PL_OPERATOR) {
 205                         // test for "->" and "::"
 206                         if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
 207                                 return 2;
 208                 } else
 209                         return 3;       // bare identifier
 210                 bk--;
 211         }
 212         return 0;
 213 }
 214
 215 static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
 216         // forward scan the current line to classify line for POD style
 217         int state = -1;
 218         while (pos < endPos) {
 219                 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
 220                 if (ch == '\n' || ch == '\r') {
 221                         if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
 222                         break;
 223                 }
 224                 if (IsASpaceOrTab(ch)) {        // whitespace, take note
 225                         if (state == -1)
 226                                 state = SCE_PL_DEFAULT;
 227                 } else if (state == SCE_PL_DEFAULT) {   // verbatim POD line
 228                         state = SCE_PL_POD_VERB;
 229                 } else if (state != SCE_PL_POD_VERB) {  // regular POD line
 230                         state = SCE_PL_POD;
 231                 }
 232                 pos++;
 233         }
 234         if (state == -1)
 235                 state = SCE_PL_DEFAULT;
 236         return state;
 237 }
 238
 239 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
 240         // backtrack to identify if we're starting a subroutine prototype
 241         // we also need to ignore whitespace/comments, format is like:
 242         //     sub abc::pqr :const :prototype(...)
 243         // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
 244         // and a state machine generates legal subroutine syntax matches
 245         styler.Flush();
 246         int state = SUB_BEGIN;
 247         do {
 248                 // find two lexemes, lexeme 2 follows lexeme 1
 249                 int style2 = SCE_PL_DEFAULT;
 250                 Sci_PositionU pos2 = bk;
 251                 int len2 = findPrevLexeme(styler, pos2, style2);
 252                 int style1 = SCE_PL_DEFAULT;
 253                 Sci_PositionU pos1 = pos2;
 254                 if (pos1 > 0) pos1--;
 255                 int len1 = findPrevLexeme(styler, pos1, style1);
 256                 if (len1 == 0 || len2 == 0)             // lexeme pair must exist
 257                         break;
 258
 259                 // match parts of syntax, if invalid subroutine syntax, break off
 260                 if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
 261                     styler.SafeGetCharAt(pos1) == ':') {        // ':'
 262                         if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
 263                                 if (len2 == 9 && styler.Match(pos2, "prototype")) {     // ':' 'prototype'
 264                                         if (state == SUB_BEGIN) {
 265                                                 state = SUB_HAS_PROTO;
 266                                         } else
 267                                                 break;
 268                                 } else {        // ':' <attribute>
 269                                         if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
 270                                                 state = SUB_HAS_ATTRIB;
 271                                         } else
 272                                                 break;
 273                                 }
 274                         } else
 275                                 break;
 276                 } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
 277                            styler.Match(pos1, "::")) {  // '::'
 278                         if (style2 == SCE_PL_IDENTIFIER) {      // '::' <identifier>
 279                                 state = SUB_HAS_MODULE;
 280                         } else
 281                                 break;
 282                 } else if (style1 == SCE_PL_WORD && len1 == 3 &&
 283                            styler.Match(pos1, "sub")) { // 'sub'
 284                         if (style2 == SCE_PL_IDENTIFIER) {      // 'sub' <identifier>
 285                                 state = SUB_HAS_SUB;
 286                         } else
 287                                 break;
 288                 } else
 289                         break;
 290                 bk = pos1;                      // set position for finding next lexeme pair
 291                 if (bk > 0) bk--;
 292         } while (state != SUB_HAS_SUB);
 293         return (state == SUB_HAS_SUB);
 294 }
 295
 296 static int actualNumStyle(int numberStyle) {
 297         if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
 298                 return SCE_PL_STRING;
 299         } else if (numberStyle == PERLNUM_BAD) {
 300                 return SCE_PL_ERROR;
 301         }
 302         return SCE_PL_NUMBER;
 303 }
 304
 305 static int opposite(int ch) {
 306         if (ch == '(') return ')';
 307         if (ch == '[') return ']';
 308         if (ch == '{') return '}';
 309         if (ch == '<') return '>';
 310         return ch;
 311 }
 312
 313 static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
 314         Sci_Position pos = styler.LineStart(line);
 315         Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 316         for (Sci_Position i = pos; i < eol_pos; i++) {
 317                 char ch = styler[i];
 318                 int style = styler.StyleAt(i);
 319                 if (ch == '#' && style == SCE_PL_COMMENTLINE)
 320                         return true;
 321                 else if (!IsASpaceOrTab(ch))
 322                         return false;
 323         }
 324         return false;
 325 }
 326
 327 static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
 328         Sci_Position pos = styler.LineStart(line);
 329         int style = styler.StyleAt(pos);
 330         if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
 331                 return true;
 332         }
 333         return false;
 334 }
 335
 336 static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
 337         int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
 338         if (lvl >= '1' && lvl <= '4') {
 339                 return lvl - '0';
 340         }
 341         return 0;
 342 }
 343
 344 // An individual named option for use in an OptionSet
 345
 346 // Options used for LexerPerl
 347 struct OptionsPerl {
 348         bool fold;
 349         bool foldComment;
 350         bool foldCompact;
 351         // Custom folding of POD and packages
 352         bool foldPOD;            // fold.perl.pod
 353         // Enable folding Pod blocks when using the Perl lexer.
 354         bool foldPackage;        // fold.perl.package
 355         // Enable folding packages when using the Perl lexer.
 356
 357         bool foldCommentExplicit;
 358
 359         bool foldAtElse;
 360
 361         OptionsPerl() {
 362                 fold = false;
 363                 foldComment = false;
 364                 foldCompact = true;
 365                 foldPOD = true;
 366                 foldPackage = true;
 367                 foldCommentExplicit = true;
 368                 foldAtElse = false;
 369         }
 370 };
 371
 372 static const char *const perlWordListDesc[] = {
 373         "Keywords",
 374         0
 375 };
 376
 377 struct OptionSetPerl : public OptionSet<OptionsPerl> {
 378         OptionSetPerl() {
 379                 DefineProperty("fold", &OptionsPerl::fold);
 380
 381                 DefineProperty("fold.comment", &OptionsPerl::foldComment);
 382
 383                 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
 384
 385                 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
 386                         "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
 387
 388                 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
 389                         "Set to 0 to disable folding packages when using the Perl lexer.");
 390
 391                 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
 392                         "Set to 0 to disable explicit folding.");
 393
 394                 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
 395                                "This option enables Perl folding on a \"} else {\" line of an if statement.");
 396
 397                 DefineWordListSets(perlWordListDesc);
 398         }
 399 };
 400
 401 class LexerPerl : public ILexer {
 402         CharacterSet setWordStart;
 403         CharacterSet setWord;
 404         CharacterSet setSpecialVar;
 405         CharacterSet setControlVar;
 406         WordList keywords;
 407         OptionsPerl options;
 408         OptionSetPerl osPerl;
 409 public:
 410         LexerPerl() :
 411                 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
 412                 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
 413                 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
 414                 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
 415         }
 416         virtual ~LexerPerl() {
 417         }
 418         void SCI_METHOD Release() {
 419                 delete this;
 420         }
 421         int SCI_METHOD Version() const {
 422                 return lvOriginal;
 423         }
 424         const char *SCI_METHOD PropertyNames() {
 425                 return osPerl.PropertyNames();
 426         }
 427         int SCI_METHOD PropertyType(const char *name) {
 428                 return osPerl.PropertyType(name);
 429         }
 430         const char *SCI_METHOD DescribeProperty(const char *name) {
 431                 return osPerl.DescribeProperty(name);
 432         }
 433         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
 434         const char *SCI_METHOD DescribeWordListSets() {
 435                 return osPerl.DescribeWordListSets();
 436         }
 437         Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
 438         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
 439         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
 440
 441         void *SCI_METHOD PrivateCall(int, void *) {
 442                 return 0;
 443         }
 444
 445         static ILexer *LexerFactoryPerl() {
 446                 return new LexerPerl();
 447         }
 448         int InputSymbolScan(StyleContext &sc);
 449         void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
 450 };
 451
 452 Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
 453         if (osPerl.PropertySet(&options, key, val)) {
 454                 return 0;
 455         }
 456         return -1;
 457 }
 458
 459 Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
 460         WordList *wordListN = 0;
 461         switch (n) {
 462         case 0:
 463                 wordListN = &keywords;
 464                 break;
 465         }
 466         Sci_Position firstModification = -1;
 467         if (wordListN) {
 468                 WordList wlNew;
 469                 wlNew.Set(wl);
 470                 if (*wordListN != wlNew) {
 471                         wordListN->Set(wl);
 472                         firstModification = 0;
 473                 }
 474         }
 475         return firstModification;
 476 }
 477
 478 int LexerPerl::InputSymbolScan(StyleContext &sc) {
 479         // forward scan for matching > on same line; file handles
 480         int c, sLen = 0;
 481         while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
 482                 if (c == '\r' || c == '\n') {
 483                         return 0;
 484                 } else if (c == '>') {
 485                         if (sc.Match("<=>"))    // '<=>' case
 486                                 return 0;
 487                         return sLen;
 488                 }
 489         }
 490         return 0;
 491 }
 492
 493 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
 494         // interpolate a segment (with no active backslashes or delimiters within)
 495         // switch in or out of an interpolation style or continue current style
 496         // commit variable patterns if found, trim segment, repeat until done
 497         while (maxSeg > 0) {
 498                 bool isVar = false;
 499                 int sLen = 0;
 500                 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
 501                         // $#[$]*word [$@][$]*word (where word or {word} is always present)
 502                         bool braces = false;
 503                         sLen = 1;
 504                         if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
 505                                 sLen++;
 506                         }
 507                         while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))       // >0 $ dereference within
 508                                 sLen++;
 509                         if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {        // { start for {word}
 510                                 sLen++;
 511                                 braces = true;
 512                         }
 513                         if (maxSeg > sLen) {
 514                                 int c = sc.GetRelativeCharacter(sLen);
 515                                 if (setWordStart.Contains(c)) { // word (various)
 516                                         sLen++;
 517                                         isVar = true;
 518                                         while (maxSeg > sLen) {
 519                                                 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
 520                                                         break;
 521                                                 sLen++;
 522                                         }
 523                                 } else if (braces && IsADigit(c) && (sLen == 2)) {      // digit for ${digit}
 524                                         sLen++;
 525                                         isVar = true;
 526                                 }
 527                         }
 528                         if (braces) {
 529                                 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {        // } end for {word}
 530                                         sLen++;
 531                                 } else
 532                                         isVar = false;
 533                         }
 534                 }
 535                 if (!isVar && (maxSeg > 1)) {   // $- or @-specific variable patterns
 536                         int c = sc.chNext;
 537                         if (sc.ch == '$') {
 538                                 sLen = 1;
 539                                 if (IsADigit(c)) {      // $[0-9] and slurp trailing digits
 540                                         sLen++;
 541                                         isVar = true;
 542                                         while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
 543                                                 sLen++;
 544                                 } else if (setSpecialVar.Contains(c)) { // $ special variables
 545                                         sLen++;
 546                                         isVar = true;
 547                                 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {    // $ additional
 548                                         sLen++;
 549                                         isVar = true;
 550                                 } else if (c == '^') {  // $^A control-char style
 551                                         sLen++;
 552                                         if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
 553                                                 sLen++;
 554                                                 isVar = true;
 555                                         }
 556                                 }
 557                         } else if (sc.ch == '@') {
 558                                 sLen = 1;
 559                                 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
 560                                         sLen++;
 561                                         isVar = true;
 562                                 }
 563                         }
 564                 }
 565                 if (isVar) {    // commit as interpolated variable or normal character
 566                         if (sc.state < SCE_PL_STRING_VAR)
 567                                 sc.SetState(sc.state + INTERPOLATE_SHIFT);
 568                         sc.Forward(sLen);
 569                         maxSeg -= sLen;
 570                 } else {
 571                         if (sc.state >= SCE_PL_STRING_VAR)
 572                                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 573                         sc.Forward();
 574                         maxSeg--;
 575                 }
 576         }
 577         if (sc.state >= SCE_PL_STRING_VAR)
 578                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 579 }
 580
 581 void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 582         LexAccessor styler(pAccess);
 583
 584         // keywords that forces /PATTERN/ at all times; should track vim's behaviour
 585         WordList reWords;
 586         reWords.Set("elsif if split while");
 587
 588         // charset classes
 589         CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
 590         // lexing of "%*</" operators is non-trivial; these are missing in the set below
 591         CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
 592         CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
 593         CharacterSet setModifiers(CharacterSet::setAlpha);
 594         CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
 595         // setArray and setHash also accepts chars for special vars like $_,
 596         // which are then truncated when the next char does not match setVar
 597         CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
 598         CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
 599         CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
 600         CharacterSet &setPOD = setModifiers;
 601         CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
 602         CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
 603         CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
 604         CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
 605         // for format identifiers
 606         CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
 607         CharacterSet &setFormat = setHereDocDelim;
 608
 609         // Lexer for perl often has to backtrack to start of current style to determine
 610         // which characters are being used as quotes, how deeply nested is the
 611         // start position and what the termination string is for HERE documents.
 612
 613         class HereDocCls {      // Class to manage HERE doc sequence
 614         public:
 615                 int State;
 616                 // 0: '<<' encountered
 617                 // 1: collect the delimiter
 618                 // 2: here doc text (lines after the delimiter)
 619                 int Quote;              // the char after '<<'
 620                 bool Quoted;            // true if Quote in ('\'','"','`')
 621                 int DelimiterLength;    // strlen(Delimiter)
 622                 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
 623                 HereDocCls() {
 624                         State = 0;
 625                         Quote = 0;
 626                         Quoted = false;
 627                         DelimiterLength = 0;
 628                         Delimiter[0] = '\0';
 629                 }
 630                 void Append(int ch) {
 631                         Delimiter[DelimiterLength++] = static_cast<char>(ch);
 632                         Delimiter[DelimiterLength] = '\0';
 633                 }
 634                 ~HereDocCls() {
 635                 }
 636         };
 637         HereDocCls HereDoc;             // TODO: FIFO for stacked here-docs
 638
 639         class QuoteCls {        // Class to manage quote pairs
 640         public:
 641                 int Rep;
 642                 int Count;
 643                 int Up, Down;
 644                 QuoteCls() {
 645                         New(1);
 646                 }
 647                 void New(int r = 1) {
 648                         Rep   = r;
 649                         Count = 0;
 650                         Up    = '\0';
 651                         Down  = '\0';
 652                 }
 653                 void Open(int u) {
 654                         Count++;
 655                         Up    = u;
 656                         Down  = opposite(Up);
 657                 }
 658         };
 659         QuoteCls Quote;
 660
 661         // additional state for number lexing
 662         int numState = PERLNUM_DECIMAL;
 663         int dotCount = 0;
 664
 665         Sci_PositionU endPos = startPos + length;
 666
 667         // Backtrack to beginning of style if required...
 668         // If in a long distance lexical state, backtrack to find quote characters.
 669         // Includes strings (may be multi-line), numbers (additional state), format
 670         // bodies, as well as POD sections.
 671         if (initStyle == SCE_PL_HERE_Q
 672             || initStyle == SCE_PL_HERE_QQ
 673             || initStyle == SCE_PL_HERE_QX
 674             || initStyle == SCE_PL_FORMAT
 675             || initStyle == SCE_PL_HERE_QQ_VAR
 676             || initStyle == SCE_PL_HERE_QX_VAR
 677            ) {
 678                 // backtrack through multiple styles to reach the delimiter start
 679                 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
 680                 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
 681                         startPos--;
 682                 }
 683                 startPos = styler.LineStart(styler.GetLine(startPos));
 684                 initStyle = styler.StyleAt(startPos - 1);
 685         }
 686         if (initStyle == SCE_PL_STRING
 687             || initStyle == SCE_PL_STRING_QQ
 688             || initStyle == SCE_PL_BACKTICKS
 689             || initStyle == SCE_PL_STRING_QX
 690             || initStyle == SCE_PL_REGEX
 691             || initStyle == SCE_PL_STRING_QR
 692             || initStyle == SCE_PL_REGSUBST
 693             || initStyle == SCE_PL_STRING_VAR
 694             || initStyle == SCE_PL_STRING_QQ_VAR
 695             || initStyle == SCE_PL_BACKTICKS_VAR
 696             || initStyle == SCE_PL_STRING_QX_VAR
 697             || initStyle == SCE_PL_REGEX_VAR
 698             || initStyle == SCE_PL_STRING_QR_VAR
 699             || initStyle == SCE_PL_REGSUBST_VAR
 700            ) {
 701                 // for interpolation, must backtrack through a mix of two different styles
 702                 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
 703                         initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
 704                 while (startPos > 1) {
 705                         int st = styler.StyleAt(startPos - 1);
 706                         if ((st != initStyle) && (st != otherStyle))
 707                                 break;
 708                         startPos--;
 709                 }
 710                 initStyle = SCE_PL_DEFAULT;
 711         } else if (initStyle == SCE_PL_STRING_Q
 712                 || initStyle == SCE_PL_STRING_QW
 713                 || initStyle == SCE_PL_XLAT
 714                 || initStyle == SCE_PL_CHARACTER
 715                 || initStyle == SCE_PL_NUMBER
 716                 || initStyle == SCE_PL_IDENTIFIER
 717                 || initStyle == SCE_PL_ERROR
 718                 || initStyle == SCE_PL_SUB_PROTOTYPE
 719            ) {
 720                 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
 721                         startPos--;
 722                 }
 723                 initStyle = SCE_PL_DEFAULT;
 724         } else if (initStyle == SCE_PL_POD
 725                 || initStyle == SCE_PL_POD_VERB
 726                   ) {
 727                 // POD backtracking finds preceding blank lines and goes back past them
 728                 Sci_Position ln = styler.GetLine(startPos);
 729                 if (ln > 0) {
 730                         initStyle = styler.StyleAt(styler.LineStart(--ln));
 731                         if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
 732                                 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
 733                                         ln--;
 734                         }
 735                         startPos = styler.LineStart(++ln);
 736                         initStyle = styler.StyleAt(startPos - 1);
 737                 } else {
 738                         startPos = 0;
 739                         initStyle = SCE_PL_DEFAULT;
 740                 }
 741         }
 742
 743         // backFlag, backPos are additional state to aid identifier corner cases.
 744         // Look backwards past whitespace and comments in order to detect either
 745         // operator or keyword. Later updated as we go along.
 746         int backFlag = BACK_NONE;
 747         Sci_PositionU backPos = startPos;
 748         if (backPos > 0) {
 749                 backPos--;
 750                 skipWhitespaceComment(styler, backPos);
 751                 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
 752                         backFlag = BACK_OPERATOR;
 753                 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
 754                         backFlag = BACK_KEYWORD;
 755                 backPos++;
 756         }
 757
 758         StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
 759
 760         for (; sc.More(); sc.Forward()) {
 761
 762                 // Determine if the current state should terminate.
 763                 switch (sc.state) {
 764                 case SCE_PL_OPERATOR:
 765                         sc.SetState(SCE_PL_DEFAULT);
 766                         backFlag = BACK_OPERATOR;
 767                         backPos = sc.currentPos;
 768                         break;
 769                 case SCE_PL_IDENTIFIER:         // identifier, bareword, inputsymbol
 770                         if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
 771                                 || sc.Match('.', '.')
 772                                 || sc.chPrev == '>') {  // end of inputsymbol
 773                                 sc.SetState(SCE_PL_DEFAULT);
 774                         }
 775                         break;
 776                 case SCE_PL_WORD:               // keyword, plus special cases
 777                         if (!setWord.Contains(sc.ch)) {
 778                                 char s[100];
 779                                 sc.GetCurrent(s, sizeof(s));
 780                                 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
 781                                         sc.ChangeState(SCE_PL_DATASECTION);
 782                                 } else {
 783                                         if ((strcmp(s, "format") == 0)) {
 784                                                 sc.SetState(SCE_PL_FORMAT_IDENT);
 785                                                 HereDoc.State = 0;
 786                                         } else {
 787                                                 sc.SetState(SCE_PL_DEFAULT);
 788                                         }
 789                                         backFlag = BACK_KEYWORD;
 790                                         backPos = sc.currentPos;
 791                                 }
 792                         }
 793                         break;
 794                 case SCE_PL_SCALAR:
 795                 case SCE_PL_ARRAY:
 796                 case SCE_PL_HASH:
 797                 case SCE_PL_SYMBOLTABLE:
 798                         if (sc.Match(':', ':')) {       // skip ::
 799                                 sc.Forward();
 800                         } else if (!setVar.Contains(sc.ch)) {
 801                                 if (sc.LengthCurrent() == 1) {
 802                                         // Special variable: $(, $_ etc.
 803                                         sc.Forward();
 804                                 }
 805                                 sc.SetState(SCE_PL_DEFAULT);
 806                         }
 807                         break;
 808                 case SCE_PL_NUMBER:
 809                         // if no early break, number style is terminated at "(go through)"
 810                         if (sc.ch == '.') {
 811                                 if (sc.chNext == '.') {
 812                                         // double dot is always an operator (go through)
 813                                 } else if (numState <= PERLNUM_FLOAT_EXP) {
 814                                         // non-decimal number or float exponent, consume next dot
 815                                         sc.SetState(SCE_PL_OPERATOR);
 816                                         break;
 817                                 } else {        // decimal or vectors allows dots
 818                                         dotCount++;
 819                                         if (numState == PERLNUM_DECIMAL) {
 820                                                 if (dotCount <= 1)      // number with one dot in it
 821                                                         break;
 822                                                 if (IsADigit(sc.chNext)) {      // really a vector
 823                                                         numState = PERLNUM_VECTOR;
 824                                                         break;
 825                                                 }
 826                                                 // number then dot (go through)
 827                                         } else if (numState == PERLNUM_HEX) {
 828                                                 if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
 829                                                         break;  // hex with one dot is a hex float
 830                                                 } else {
 831                                                         sc.SetState(SCE_PL_OPERATOR);
 832                                                         break;
 833                                                 }
 834                                                 // hex then dot (go through)
 835                                         } else if (IsADigit(sc.chNext)) // vectors
 836                                                 break;
 837                                         // vector then dot (go through)
 838                                 }
 839                         } else if (sc.ch == '_') {
 840                                 // permissive underscoring for number and vector literals
 841                                 break;
 842                         } else if (numState == PERLNUM_DECIMAL) {
 843                                 if (sc.ch == 'E' || sc.ch == 'e') {     // exponent, sign
 844                                         numState = PERLNUM_FLOAT_EXP;
 845                                         if (sc.chNext == '+' || sc.chNext == '-') {
 846                                                 sc.Forward();
 847                                         }
 848                                         break;
 849                                 } else if (IsADigit(sc.ch))
 850                                         break;
 851                                 // number then word (go through)
 852                         } else if (numState == PERLNUM_HEX) {
 853                                 if (sc.ch == 'P' || sc.ch == 'p') {     // hex float exponent, sign
 854                                         numState = PERLNUM_FLOAT_EXP;
 855                                         if (sc.chNext == '+' || sc.chNext == '-') {
 856                                                 sc.Forward();
 857                                         }
 858                                         break;
 859                                 } else if (IsADigit(sc.ch, 16))
 860                                         break;
 861                                 // hex or hex float then word (go through)
 862                         } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
 863                                 if (IsADigit(sc.ch))    // vector
 864                                         break;
 865                                 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
 866                                         sc.ChangeState(SCE_PL_IDENTIFIER);
 867                                         break;
 868                                 }
 869                                 // vector then word (go through)
 870                         } else if (IsADigit(sc.ch)) {
 871                                 if (numState == PERLNUM_FLOAT_EXP) {
 872                                         break;
 873                                 } else if (numState == PERLNUM_OCTAL) {
 874                                         if (sc.ch <= '7') break;
 875                                 } else if (numState == PERLNUM_BINARY) {
 876                                         if (sc.ch <= '1') break;
 877                                 }
 878                                 // mark invalid octal, binary numbers (go through)
 879                                 numState = PERLNUM_BAD;
 880                                 break;
 881                         }
 882                         // complete current number or vector
 883                         sc.ChangeState(actualNumStyle(numState));
 884                         sc.SetState(SCE_PL_DEFAULT);
 885                         break;
 886                 case SCE_PL_COMMENTLINE:
 887                         if (sc.atLineEnd) {
 888                                 sc.SetState(SCE_PL_DEFAULT);
 889                         }
 890                         break;
 891                 case SCE_PL_HERE_DELIM:
 892                         if (HereDoc.State == 0) { // '<<' encountered
 893                                 int delim_ch = sc.chNext;
 894                                 Sci_Position ws_skip = 0;
 895                                 HereDoc.State = 1;      // pre-init HERE doc class
 896                                 HereDoc.Quote = sc.chNext;
 897                                 HereDoc.Quoted = false;
 898                                 HereDoc.DelimiterLength = 0;
 899                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 900                                 if (IsASpaceOrTab(delim_ch)) {
 901                                         // skip whitespace; legal only for quoted delimiters
 902                                         Sci_PositionU i = sc.currentPos + 1;
 903                                         while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
 904                                                 i++;
 905                                                 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 906                                         }
 907                                         ws_skip = i - sc.currentPos - 1;
 908                                 }
 909                                 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
 910                                         // a quoted here-doc delimiter; skip any whitespace
 911                                         sc.Forward(ws_skip + 1);
 912                                         HereDoc.Quote = delim_ch;
 913                                         HereDoc.Quoted = true;
 914                                 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
 915                                         || ws_skip > 0) {
 916                                         // left shift << or <<= operator cases
 917                                         // restore position if operator
 918                                         sc.ChangeState(SCE_PL_OPERATOR);
 919                                         sc.ForwardSetState(SCE_PL_DEFAULT);
 920                                         backFlag = BACK_OPERATOR;
 921                                         backPos = sc.currentPos;
 922                                         HereDoc.State = 0;
 923                                 } else {
 924                                         // specially handle initial '\' for identifier
 925                                         if (ws_skip == 0 && HereDoc.Quote == '\\')
 926                                                 sc.Forward();
 927                                         // an unquoted here-doc delimiter, no special handling
 928                                         // (cannot be prefixed by spaces/tabs), or
 929                                         // symbols terminates; deprecated zero-length delimiter
 930                                 }
 931                         } else if (HereDoc.State == 1) { // collect the delimiter
 932                                 backFlag = BACK_NONE;
 933                                 if (HereDoc.Quoted) { // a quoted here-doc delimiter
 934                                         if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
 935                                                 sc.ForwardSetState(SCE_PL_DEFAULT);
 936                                         } else if (!sc.atLineEnd) {
 937                                                 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
 938                                                         sc.Forward();
 939                                                 }
 940                                                 if (sc.ch != '\r') {    // skip CR if CRLF
 941                                                         int i = 0;                      // else append char, possibly an extended char
 942                                                         while (i < sc.width) {
 943                                                                 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
 944                                                                 i++;
 945                                                         }
 946                                                 }
 947                                         }
 948                                 } else { // an unquoted here-doc delimiter, no extended charsets
 949                                         if (setHereDocDelim.Contains(sc.ch)) {
 950                                                 HereDoc.Append(sc.ch);
 951                                         } else {
 952                                                 sc.SetState(SCE_PL_DEFAULT);
 953                                         }
 954                                 }
 955                                 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
 956                                         sc.SetState(SCE_PL_ERROR);
 957                                         HereDoc.State = 0;
 958                                 }
 959                         }
 960                         break;
 961                 case SCE_PL_HERE_Q:
 962                 case SCE_PL_HERE_QQ:
 963                 case SCE_PL_HERE_QX:
 964                         // also implies HereDoc.State == 2
 965                         sc.Complete();
 966                         if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
 967                                 int c = sc.GetRelative(HereDoc.DelimiterLength);
 968                                 if (c == '\r' || c == '\n') {   // peek first, do not consume match
 969                                         sc.ForwardBytes(HereDoc.DelimiterLength);
 970                                         sc.SetState(SCE_PL_DEFAULT);
 971                                         backFlag = BACK_NONE;
 972                                         HereDoc.State = 0;
 973                                         if (!sc.atLineEnd)
 974                                                 sc.Forward();
 975                                         break;
 976                                 }
 977                         }
 978                         if (sc.state == SCE_PL_HERE_Q) {        // \EOF and 'EOF' non-interpolated
 979                                 while (!sc.atLineEnd)
 980                                         sc.Forward();
 981                                 break;
 982                         }
 983                         while (!sc.atLineEnd) {         // "EOF" and `EOF` interpolated
 984                                 int c, sLen = 0, endType = 0;
 985                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
 986                                         // scan to break string into segments
 987                                         if (c == '\\') {
 988                                                 endType = 1; break;
 989                                         } else if (c == '\r' || c == '\n') {
 990                                                 endType = 2; break;
 991                                         }
 992                                         sLen++;
 993                                 }
 994                                 if (sLen > 0)   // process non-empty segments
 995                                         InterpolateSegment(sc, sLen);
 996                                 if (endType == 1) {
 997                                         sc.Forward();
 998                                         // \ at end-of-line does not appear to have any effect, skip
 999                                         if (sc.ch != '\r' && sc.ch != '\n')
1000                                                 sc.Forward();
1001                                 } else if (endType == 2) {
1002                                         if (!sc.atLineEnd)
1003                                                 sc.Forward();
1004                                 }
1005                         }
1006                         break;
1007                 case SCE_PL_POD:
1008                 case SCE_PL_POD_VERB: {
1009                                 Sci_PositionU fw = sc.currentPos;
1010                                 Sci_Position ln = styler.GetLine(fw);
1011                                 if (sc.atLineStart && sc.Match("=cut")) {       // end of POD
1012                                         sc.SetState(SCE_PL_POD);
1013                                         sc.Forward(4);
1014                                         sc.SetState(SCE_PL_DEFAULT);
1015                                         styler.SetLineState(ln, SCE_PL_POD);
1016                                         break;
1017                                 }
1018                                 int pod = podLineScan(styler, fw, endPos);      // classify POD line
1019                                 styler.SetLineState(ln, pod);
1020                                 if (pod == SCE_PL_DEFAULT) {
1021                                         if (sc.state == SCE_PL_POD_VERB) {
1022                                                 Sci_PositionU fw2 = fw;
1023                                                 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1024                                                         fw = fw2++;     // penultimate line (last blank line)
1025                                                         pod = podLineScan(styler, fw2, endPos);
1026                                                         styler.SetLineState(styler.GetLine(fw2), pod);
1027                                                 }
1028                                                 if (pod == SCE_PL_POD) {        // truncate verbatim POD early
1029                                                         sc.SetState(SCE_PL_POD);
1030                                                 } else
1031                                                         fw = fw2;
1032                                         }
1033                                 } else {
1034                                         if (pod == SCE_PL_POD_VERB      // still part of current paragraph
1035                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1036                                                 pod = SCE_PL_POD;
1037                                                 styler.SetLineState(ln, pod);
1038                                         } else if (pod == SCE_PL_POD
1039                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1040                                                 pod = SCE_PL_POD_VERB;
1041                                                 styler.SetLineState(ln, pod);
1042                                         }
1043                                         sc.SetState(pod);
1044                                 }
1045                                 sc.ForwardBytes(fw - sc.currentPos);    // commit style
1046                         }
1047                         break;
1048                 case SCE_PL_REGEX:
1049                 case SCE_PL_STRING_QR:
1050                         if (Quote.Rep <= 0) {
1051                                 if (!setModifiers.Contains(sc.ch))
1052                                         sc.SetState(SCE_PL_DEFAULT);
1053                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1054                                 Quote.Open(sc.ch);
1055                         } else {
1056                                 int c, sLen = 0, endType = 0;
1057                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1058                                         // scan to break string into segments
1059                                         if (IsASpace(c)) {
1060                                                 break;
1061                                         } else if (c == '\\' && Quote.Up != '\\') {
1062                                                 endType = 1; break;
1063                                         } else if (c == Quote.Down) {
1064                                                 Quote.Count--;
1065                                                 if (Quote.Count == 0) {
1066                                                         Quote.Rep--;
1067                                                         break;
1068                                                 }
1069                                         } else if (c == Quote.Up)
1070                                                 Quote.Count++;
1071                                         sLen++;
1072                                 }
1073                                 if (sLen > 0) { // process non-empty segments
1074                                         if (Quote.Up != '\'') {
1075                                                 InterpolateSegment(sc, sLen, true);
1076                                         } else          // non-interpolated path
1077                                                 sc.Forward(sLen);
1078                                 }
1079                                 if (endType == 1)
1080                                         sc.Forward();
1081                         }
1082                         break;
1083                 case SCE_PL_REGSUBST:
1084                 case SCE_PL_XLAT:
1085                         if (Quote.Rep <= 0) {
1086                                 if (!setModifiers.Contains(sc.ch))
1087                                         sc.SetState(SCE_PL_DEFAULT);
1088                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1089                                 Quote.Open(sc.ch);
1090                         } else {
1091                                 int c, sLen = 0, endType = 0;
1092                                 bool isPattern = (Quote.Rep == 2);
1093                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1094                                         // scan to break string into segments
1095                                         if (c == '\\' && Quote.Up != '\\') {
1096                                                 endType = 2; break;
1097                                         } else if (Quote.Count == 0 && Quote.Rep == 1) {
1098                                                 // We matched something like s(...) or tr{...}, Perl 5.10
1099                                                 // appears to allow almost any character for use as the
1100                                                 // next delimiters. Whitespace and comments are accepted in
1101                                                 // between, but we'll limit to whitespace here.
1102                                                 // For '#', if no whitespace in between, it's a delimiter.
1103                                                 if (IsASpace(c)) {
1104                                                         // Keep going
1105                                                 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1106                                                         endType = 3;
1107                                                 } else
1108                                                         Quote.Open(c);
1109                                                 break;
1110                                         } else if (c == Quote.Down) {
1111                                                 Quote.Count--;
1112                                                 if (Quote.Count == 0) {
1113                                                         Quote.Rep--;
1114                                                         endType = 1;
1115                                                 }
1116                                                 if (Quote.Up == Quote.Down)
1117                                                         Quote.Count++;
1118                                                 if (endType == 1)
1119                                                         break;
1120                                         } else if (c == Quote.Up) {
1121                                                 Quote.Count++;
1122                                         } else if (IsASpace(c))
1123                                                 break;
1124                                         sLen++;
1125                                 }
1126                                 if (sLen > 0) { // process non-empty segments
1127                                         if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1128                                                 InterpolateSegment(sc, sLen, isPattern);
1129                                         } else          // non-interpolated path
1130                                                 sc.Forward(sLen);
1131                                 }
1132                                 if (endType == 2) {
1133                                         sc.Forward();
1134                                 } else if (endType == 3)
1135                                         sc.SetState(SCE_PL_DEFAULT);
1136                         }
1137                         break;
1138                 case SCE_PL_STRING_Q:
1139                 case SCE_PL_STRING_QQ:
1140                 case SCE_PL_STRING_QX:
1141                 case SCE_PL_STRING_QW:
1142                 case SCE_PL_STRING:
1143                 case SCE_PL_CHARACTER:
1144                 case SCE_PL_BACKTICKS:
1145                         if (!Quote.Down && !IsASpace(sc.ch)) {
1146                                 Quote.Open(sc.ch);
1147                         } else {
1148                                 int c, sLen = 0, endType = 0;
1149                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1150                                         // scan to break string into segments
1151                                         if (IsASpace(c)) {
1152                                                 break;
1153                                         } else if (c == '\\' && Quote.Up != '\\') {
1154                                                 endType = 2; break;
1155                                         } else if (c == Quote.Down) {
1156                                                 Quote.Count--;
1157                                                 if (Quote.Count == 0) {
1158                                                         endType = 3; break;
1159                                                 }
1160                                         } else if (c == Quote.Up)
1161                                                 Quote.Count++;
1162                                         sLen++;
1163                                 }
1164                                 if (sLen > 0) { // process non-empty segments
1165                                         switch (sc.state) {
1166                                         case SCE_PL_STRING:
1167                                         case SCE_PL_STRING_QQ:
1168                                         case SCE_PL_BACKTICKS:
1169                                                 InterpolateSegment(sc, sLen);
1170                                                 break;
1171                                         case SCE_PL_STRING_QX:
1172                                                 if (Quote.Up != '\'') {
1173                                                         InterpolateSegment(sc, sLen);
1174                                                         break;
1175                                                 }
1176                                                 // (continued for ' delim)
1177                                         default:        // non-interpolated path
1178                                                 sc.Forward(sLen);
1179                                         }
1180                                 }
1181                                 if (endType == 2) {
1182                                         sc.Forward();
1183                                 } else if (endType == 3)
1184                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1185                         }
1186                         break;
1187                 case SCE_PL_SUB_PROTOTYPE: {
1188                                 int i = 0;
1189                                 // forward scan; must all be valid proto characters
1190                                 while (setSubPrototype.Contains(sc.GetRelative(i)))
1191                                         i++;
1192                                 if (sc.GetRelative(i) == ')') { // valid sub prototype
1193                                         sc.ForwardBytes(i);
1194                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1195                                 } else {
1196                                         // abandon prototype, restart from '('
1197                                         sc.ChangeState(SCE_PL_OPERATOR);
1198                                         sc.SetState(SCE_PL_DEFAULT);
1199                                 }
1200                         }
1201                         break;
1202                 case SCE_PL_FORMAT: {
1203                                 sc.Complete();
1204                                 if (sc.Match('.')) {
1205                                         sc.Forward();
1206                                         if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1207                                                 sc.SetState(SCE_PL_DEFAULT);
1208                                 }
1209                                 while (!sc.atLineEnd)
1210                                         sc.Forward();
1211                         }
1212                         break;
1213                 case SCE_PL_ERROR:
1214                         break;
1215                 }
1216                 // Needed for specific continuation styles (one follows the other)
1217                 switch (sc.state) {
1218                         // continued from SCE_PL_WORD
1219                 case SCE_PL_FORMAT_IDENT:
1220                         // occupies HereDoc state 3 to avoid clashing with HERE docs
1221                         if (IsASpaceOrTab(sc.ch)) {             // skip whitespace
1222                                 sc.ChangeState(SCE_PL_DEFAULT);
1223                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1224                                         sc.Forward();
1225                                 sc.SetState(SCE_PL_FORMAT_IDENT);
1226                         }
1227                         if (setFormatStart.Contains(sc.ch)) {   // identifier or '='
1228                                 if (sc.ch != '=') {
1229                                         do {
1230                                                 sc.Forward();
1231                                         } while (setFormat.Contains(sc.ch));
1232                                 }
1233                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1234                                         sc.Forward();
1235                                 if (sc.ch == '=') {
1236                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1237                                         HereDoc.State = 3;
1238                                 } else {
1239                                         // invalid identifier; inexact fallback, but hey
1240                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1241                                         sc.SetState(SCE_PL_DEFAULT);
1242                                 }
1243                         } else {
1244                                 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1245                         }
1246                         backFlag = BACK_NONE;
1247                         break;
1248                 }
1249
1250                 // Must check end of HereDoc states here before default state is handled
1251                 if (HereDoc.State == 1 && sc.atLineEnd) {
1252                         // Begin of here-doc (the line after the here-doc delimiter):
1253                         // Lexically, the here-doc starts from the next line after the >>, but the
1254                         // first line of here-doc seem to follow the style of the last EOL sequence
1255                         int st_new = SCE_PL_HERE_QQ;
1256                         HereDoc.State = 2;
1257                         if (HereDoc.Quoted) {
1258                                 if (sc.state == SCE_PL_HERE_DELIM) {
1259                                         // Missing quote at end of string! We are stricter than perl.
1260                                         // Colour here-doc anyway while marking this bit as an error.
1261                                         sc.ChangeState(SCE_PL_ERROR);
1262                                 }
1263                                 switch (HereDoc.Quote) {
1264                                 case '\'':
1265                                         st_new = SCE_PL_HERE_Q;
1266                                         break;
1267                                 case '"' :
1268                                         st_new = SCE_PL_HERE_QQ;
1269                                         break;
1270                                 case '`' :
1271                                         st_new = SCE_PL_HERE_QX;
1272                                         break;
1273                                 }
1274                         } else {
1275                                 if (HereDoc.Quote == '\\')
1276                                         st_new = SCE_PL_HERE_Q;
1277                         }
1278                         sc.SetState(st_new);
1279                 }
1280                 if (HereDoc.State == 3 && sc.atLineEnd) {
1281                         // Start of format body.
1282                         HereDoc.State = 0;
1283                         sc.SetState(SCE_PL_FORMAT);
1284                 }
1285
1286                 // Determine if a new state should be entered.
1287                 if (sc.state == SCE_PL_DEFAULT) {
1288                         if (IsADigit(sc.ch) ||
1289                                 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1290                                 sc.SetState(SCE_PL_NUMBER);
1291                                 backFlag = BACK_NONE;
1292                                 numState = PERLNUM_DECIMAL;
1293                                 dotCount = 0;
1294                                 if (sc.ch == '0') {             // hex,bin,octal
1295                                         if (sc.chNext == 'x' || sc.chNext == 'X') {
1296                                                 numState = PERLNUM_HEX;
1297                                         } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1298                                                 numState = PERLNUM_BINARY;
1299                                         } else if (IsADigit(sc.chNext)) {
1300                                                 numState = PERLNUM_OCTAL;
1301                                         }
1302                                         if (numState != PERLNUM_DECIMAL) {
1303                                                 sc.Forward();
1304                                         }
1305                                 } else if (sc.ch == 'v') {              // vector
1306                                         numState = PERLNUM_V_VECTOR;
1307                                 }
1308                         } else if (setWord.Contains(sc.ch)) {
1309                                 // if immediately prefixed by '::', always a bareword
1310                                 sc.SetState(SCE_PL_WORD);
1311                                 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1312                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1313                                 }
1314                                 Sci_PositionU bk = sc.currentPos;
1315                                 Sci_PositionU fw = sc.currentPos + 1;
1316                                 // first check for possible quote-like delimiter
1317                                 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1318                                         sc.ChangeState(SCE_PL_REGSUBST);
1319                                         Quote.New(2);
1320                                 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1321                                         sc.ChangeState(SCE_PL_REGEX);
1322                                         Quote.New();
1323                                 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1324                                         sc.ChangeState(SCE_PL_STRING_Q);
1325                                         Quote.New();
1326                                 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1327                                         sc.ChangeState(SCE_PL_XLAT);
1328                                         Quote.New(2);
1329                                 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1330                                         sc.ChangeState(SCE_PL_XLAT);
1331                                         Quote.New(2);
1332                                         sc.Forward();
1333                                         fw++;
1334                                 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1335                                         && !setWord.Contains(sc.GetRelative(2))) {
1336                                         if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1337                                         else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1338                                         else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1339                                         else sc.ChangeState(SCE_PL_STRING_QW);  // sc.chNext == 'w'
1340                                         Quote.New();
1341                                         sc.Forward();
1342                                         fw++;
1343                                 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1344                                         !setWord.Contains(sc.chNext) ||
1345                                         (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1346                                         sc.ChangeState(SCE_PL_OPERATOR);
1347                                 }
1348                                 // if potentially a keyword, scan forward and grab word, then check
1349                                 // if it's really one; if yes, disambiguation test is performed
1350                                 // otherwise it is always a bareword and we skip a lot of scanning
1351                                 if (sc.state == SCE_PL_WORD) {
1352                                         while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1353                                                 fw++;
1354                                         if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1355                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1356                                         }
1357                                 }
1358                                 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1359                                 // for quote-like delimiters/keywords, attempt to disambiguate
1360                                 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1361                                 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1362                                         if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1363                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1364                                 }
1365                                 backFlag = BACK_NONE;
1366                         } else if (sc.ch == '#') {
1367                                 sc.SetState(SCE_PL_COMMENTLINE);
1368                         } else if (sc.ch == '\"') {
1369                                 sc.SetState(SCE_PL_STRING);
1370                                 Quote.New();
1371                                 Quote.Open(sc.ch);
1372                                 backFlag = BACK_NONE;
1373                         } else if (sc.ch == '\'') {
1374                                 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1375                                         // Archaic call
1376                                         sc.SetState(SCE_PL_IDENTIFIER);
1377                                 } else {
1378                                         sc.SetState(SCE_PL_CHARACTER);
1379                                         Quote.New();
1380                                         Quote.Open(sc.ch);
1381                                 }
1382                                 backFlag = BACK_NONE;
1383                         } else if (sc.ch == '`') {
1384                                 sc.SetState(SCE_PL_BACKTICKS);
1385                                 Quote.New();
1386                                 Quote.Open(sc.ch);
1387                                 backFlag = BACK_NONE;
1388                         } else if (sc.ch == '$') {
1389                                 sc.SetState(SCE_PL_SCALAR);
1390                                 if (sc.chNext == '{') {
1391                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1392                                 } else if (IsASpace(sc.chNext)) {
1393                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1394                                 } else {
1395                                         sc.Forward();
1396                                         if (sc.Match('`', '`') || sc.Match(':', ':')) {
1397                                                 sc.Forward();
1398                                         }
1399                                 }
1400                                 backFlag = BACK_NONE;
1401                         } else if (sc.ch == '@') {
1402                                 sc.SetState(SCE_PL_ARRAY);
1403                                 if (setArray.Contains(sc.chNext)) {
1404                                         // no special treatment
1405                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1406                                         sc.ForwardBytes(2);
1407                                 } else if (sc.chNext == '{' || sc.chNext == '[') {
1408                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1409                                 } else {
1410                                         sc.ChangeState(SCE_PL_OPERATOR);
1411                                 }
1412                                 backFlag = BACK_NONE;
1413                         } else if (setPreferRE.Contains(sc.ch)) {
1414                                 // Explicit backward peeking to set a consistent preferRE for
1415                                 // any slash found, so no longer need to track preferRE state.
1416                                 // Find first previous significant lexed element and interpret.
1417                                 // A few symbols shares this code for disambiguation.
1418                                 bool preferRE = false;
1419                                 bool isHereDoc = sc.Match('<', '<');
1420                                 bool hereDocSpace = false;              // for: SCALAR [whitespace] '<<'
1421                                 Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1422                                 sc.Complete();
1423                                 styler.Flush();
1424                                 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1425                                         hereDocSpace = true;
1426                                 skipWhitespaceComment(styler, bk);
1427                                 if (bk == 0) {
1428                                         // avoid backward scanning breakage
1429                                         preferRE = true;
1430                                 } else {
1431                                         int bkstyle = styler.StyleAt(bk);
1432                                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1433                                         switch (bkstyle) {
1434                                         case SCE_PL_OPERATOR:
1435                                                 preferRE = true;
1436                                                 if (bkch == ')' || bkch == ']') {
1437                                                         preferRE = false;
1438                                                 } else if (bkch == '}') {
1439                                                         // backtrack by counting balanced brace pairs
1440                                                         // needed to test for variables like ${}, @{} etc.
1441                                                         bkstyle = styleBeforeBracePair(styler, bk);
1442                                                         if (bkstyle == SCE_PL_SCALAR
1443                                                                 || bkstyle == SCE_PL_ARRAY
1444                                                                 || bkstyle == SCE_PL_HASH
1445                                                                 || bkstyle == SCE_PL_SYMBOLTABLE
1446                                                                 || bkstyle == SCE_PL_OPERATOR) {
1447                                                                 preferRE = false;
1448                                                         }
1449                                                 } else if (bkch == '+' || bkch == '-') {
1450                                                         if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1451                                                                 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1452                                                                 // exceptions for operators: unary suffixes ++, --
1453                                                                 preferRE = false;
1454                                                 }
1455                                                 break;
1456                                         case SCE_PL_IDENTIFIER:
1457                                                 preferRE = true;
1458                                                 bkstyle = styleCheckIdentifier(styler, bk);
1459                                                 if ((bkstyle == 1) || (bkstyle == 2)) {
1460                                                         // inputsymbol or var with "->" or "::" before identifier
1461                                                         preferRE = false;
1462                                                 } else if (bkstyle == 3) {
1463                                                         // bare identifier, test cases follows:
1464                                                         if (sc.ch == '/') {
1465                                                                 // if '/', /PATTERN/ unless digit/space immediately after '/'
1466                                                                 // if '//', always expect defined-or operator to follow identifier
1467                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1468                                                                         preferRE = false;
1469                                                         } else if (sc.ch == '*' || sc.ch == '%') {
1470                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1471                                                                         preferRE = false;
1472                                                         } else if (sc.ch == '<') {
1473                                                                 if (IsASpace(sc.chNext) || sc.chNext == '=')
1474                                                                         preferRE = false;
1475                                                         }
1476                                                 }
1477                                                 break;
1478                                         case SCE_PL_SCALAR:             // for $var<< case:
1479                                                 if (isHereDoc && hereDocSpace)  // if SCALAR whitespace '<<', *always* a HERE doc
1480                                                         preferRE = true;
1481                                                 break;
1482                                         case SCE_PL_WORD:
1483                                                 preferRE = true;
1484                                                 // for HERE docs, always true
1485                                                 if (sc.ch == '/') {
1486                                                         // adopt heuristics similar to vim-style rules:
1487                                                         // keywords always forced as /PATTERN/: split, if, elsif, while
1488                                                         // everything else /PATTERN/ unless digit/space immediately after '/'
1489                                                         // for '//', defined-or favoured unless special keywords
1490                                                         Sci_PositionU bkend = bk + 1;
1491                                                         while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1492                                                                 bk--;
1493                                                         }
1494                                                         if (isPerlKeyword(bk, bkend, reWords, styler))
1495                                                                 break;
1496                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1497                                                                 preferRE = false;
1498                                                 } else if (sc.ch == '*' || sc.ch == '%') {
1499                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1500                                                                 preferRE = false;
1501                                                 } else if (sc.ch == '<') {
1502                                                         if (IsASpace(sc.chNext) || sc.chNext == '=')
1503                                                                 preferRE = false;
1504                                                 }
1505                                                 break;
1506
1507                                                 // other styles uses the default, preferRE=false
1508                                         case SCE_PL_POD:
1509                                         case SCE_PL_HERE_Q:
1510                                         case SCE_PL_HERE_QQ:
1511                                         case SCE_PL_HERE_QX:
1512                                                 preferRE = true;
1513                                                 break;
1514                                         }
1515                                 }
1516                                 backFlag = BACK_NONE;
1517                                 if (isHereDoc) {        // handle '<<', HERE doc
1518                                         if (sc.Match("<<>>")) {         // double-diamond operator (5.22)
1519                                                 sc.SetState(SCE_PL_OPERATOR);
1520                                                 sc.Forward(3);
1521                                         } else if (preferRE) {
1522                                                 sc.SetState(SCE_PL_HERE_DELIM);
1523                                                 HereDoc.State = 0;
1524                                         } else {                // << operator
1525                                                 sc.SetState(SCE_PL_OPERATOR);
1526                                                 sc.Forward();
1527                                         }
1528                                 } else if (sc.ch == '*') {      // handle '*', typeglob
1529                                         if (preferRE) {
1530                                                 sc.SetState(SCE_PL_SYMBOLTABLE);
1531                                                 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1532                                                         sc.ForwardBytes(2);
1533                                                 } else if (sc.chNext == '{') {
1534                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1535                                                 } else {
1536                                                         sc.Forward();
1537                                                 }
1538                                         } else {
1539                                                 sc.SetState(SCE_PL_OPERATOR);
1540                                                 if (sc.chNext == '*')   // exponentiation
1541                                                         sc.Forward();
1542                                         }
1543                                 } else if (sc.ch == '%') {      // handle '%', hash
1544                                         if (preferRE) {
1545                                                 sc.SetState(SCE_PL_HASH);
1546                                                 if (setHash.Contains(sc.chNext)) {
1547                                                         sc.Forward();
1548                                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1549                                                         sc.ForwardBytes(2);
1550                                                 } else if (sc.chNext == '{') {
1551                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1552                                                 } else {
1553                                                         sc.ChangeState(SCE_PL_OPERATOR);
1554                                                 }
1555                                         } else {
1556                                                 sc.SetState(SCE_PL_OPERATOR);
1557                                         }
1558                                 } else if (sc.ch == '<') {      // handle '<', inputsymbol
1559                                         if (preferRE) {
1560                                                 // forward scan
1561                                                 int i = InputSymbolScan(sc);
1562                                                 if (i > 0) {
1563                                                         sc.SetState(SCE_PL_IDENTIFIER);
1564                                                         sc.Forward(i);
1565                                                 } else {
1566                                                         sc.SetState(SCE_PL_OPERATOR);
1567                                                 }
1568                                         } else {
1569                                                 sc.SetState(SCE_PL_OPERATOR);
1570                                         }
1571                                 } else {                        // handle '/', regexp
1572                                         if (preferRE) {
1573                                                 sc.SetState(SCE_PL_REGEX);
1574                                                 Quote.New();
1575                                                 Quote.Open(sc.ch);
1576                                         } else {                // / and // operators
1577                                                 sc.SetState(SCE_PL_OPERATOR);
1578                                                 if (sc.chNext == '/') {
1579                                                         sc.Forward();
1580                                                 }
1581                                         }
1582                                 }
1583                         } else if (sc.ch == '='         // POD
1584                                 && setPOD.Contains(sc.chNext)
1585                                 && sc.atLineStart) {
1586                                 sc.SetState(SCE_PL_POD);
1587                                 backFlag = BACK_NONE;
1588                         } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {  // extended '-' cases
1589                                 Sci_PositionU bk = sc.currentPos;
1590                                 Sci_PositionU fw = 2;
1591                                 if (setSingleCharOp.Contains(sc.chNext) &&      // file test operators
1592                                         !setWord.Contains(sc.GetRelative(2))) {
1593                                         sc.SetState(SCE_PL_WORD);
1594                                 } else {
1595                                         // nominally a minus and bareword; find extent of bareword
1596                                         while (setWord.Contains(sc.GetRelative(fw)))
1597                                                 fw++;
1598                                         sc.SetState(SCE_PL_OPERATOR);
1599                                 }
1600                                 // force to bareword for hash key => or {variable literal} cases
1601                                 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1602                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1603                                 }
1604                                 backFlag = BACK_NONE;
1605                         } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1606                                 sc.Complete();
1607                                 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1608                                         sc.SetState(SCE_PL_SUB_PROTOTYPE);
1609                                         backFlag = BACK_NONE;
1610                                 } else {
1611                                         sc.SetState(SCE_PL_OPERATOR);
1612                                 }
1613                         } else if (setPerlOperator.Contains(sc.ch)) {   // operators
1614                                 sc.SetState(SCE_PL_OPERATOR);
1615                                 if (sc.Match('.', '.')) {       // .. and ...
1616                                         sc.Forward();
1617                                         if (sc.chNext == '.') sc.Forward();
1618                                 }
1619                         } else if (sc.ch == 4 || sc.ch == 26) {         // ^D and ^Z ends valid perl source
1620                                 sc.SetState(SCE_PL_DATASECTION);
1621                         } else {
1622                                 // keep colouring defaults
1623                                 sc.Complete();
1624                         }
1625                 }
1626         }
1627         sc.Complete();
1628         if (sc.state == SCE_PL_HERE_Q
1629                 || sc.state == SCE_PL_HERE_QQ
1630                 || sc.state == SCE_PL_HERE_QX
1631                 || sc.state == SCE_PL_FORMAT) {
1632                 styler.ChangeLexerState(sc.currentPos, styler.Length());
1633         }
1634         sc.Complete();
1635 }
1636
1637 #define PERL_HEADFOLD_SHIFT             4
1638 #define PERL_HEADFOLD_MASK              0xF0
1639
1640 void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1641
1642         if (!options.fold)
1643                 return;
1644
1645         LexAccessor styler(pAccess);
1646
1647         Sci_PositionU endPos = startPos + length;
1648         int visibleChars = 0;
1649         Sci_Position lineCurrent = styler.GetLine(startPos);
1650
1651         // Backtrack to previous line in case need to fix its fold status
1652         if (startPos > 0) {
1653                 if (lineCurrent > 0) {
1654                         lineCurrent--;
1655                         startPos = styler.LineStart(lineCurrent);
1656                 }
1657         }
1658
1659         int levelPrev = SC_FOLDLEVELBASE;
1660         if (lineCurrent > 0)
1661                 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1662         int levelCurrent = levelPrev;
1663         char chNext = styler[startPos];
1664         char chPrev = styler.SafeGetCharAt(startPos - 1);
1665         int styleNext = styler.StyleAt(startPos);
1666         // Used at end of line to determine if the line was a package definition
1667         bool isPackageLine = false;
1668         int podHeading = 0;
1669         for (Sci_PositionU i = startPos; i < endPos; i++) {
1670                 char ch = chNext;
1671                 chNext = styler.SafeGetCharAt(i + 1);
1672                 int style = styleNext;
1673                 styleNext = styler.StyleAt(i + 1);
1674                 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1675                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1676                 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1677                 // Comment folding
1678                 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1679                         if (!IsCommentLine(lineCurrent - 1, styler)
1680                                 && IsCommentLine(lineCurrent + 1, styler))
1681                                 levelCurrent++;
1682                         else if (IsCommentLine(lineCurrent - 1, styler)
1683                                 && !IsCommentLine(lineCurrent + 1, styler))
1684                                 levelCurrent--;
1685                 }
1686                 // {} [] block folding
1687                 if (style == SCE_PL_OPERATOR) {
1688                         if (ch == '{') {
1689                                 if (options.foldAtElse && levelCurrent < levelPrev)
1690                                         --levelPrev;
1691                                 levelCurrent++;
1692                         } else if (ch == '}') {
1693                                 levelCurrent--;
1694                         }
1695                         if (ch == '[') {
1696                                 if (options.foldAtElse && levelCurrent < levelPrev)
1697                                         --levelPrev;
1698                                 levelCurrent++;
1699                         } else if (ch == ']') {
1700                                 levelCurrent--;
1701                         }
1702                 }
1703                 // POD folding
1704                 if (options.foldPOD && atLineStart) {
1705                         if (style == SCE_PL_POD) {
1706                                 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1707                                         levelCurrent++;
1708                                 else if (styler.Match(i, "=cut"))
1709                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1710                                 else if (styler.Match(i, "=head"))
1711                                         podHeading = PodHeadingLevel(i, styler);
1712                         } else if (style == SCE_PL_DATASECTION) {
1713                                 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1714                                         levelCurrent++;
1715                                 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1716                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1717                                 else if (styler.Match(i, "=head"))
1718                                         podHeading = PodHeadingLevel(i, styler);
1719                                 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1720                                 // reset needed as level test is vs. SC_FOLDLEVELBASE
1721                                 else if (stylePrevCh != SCE_PL_DATASECTION)
1722                                         levelCurrent = SC_FOLDLEVELBASE;
1723                         }
1724                 }
1725                 // package folding
1726                 if (options.foldPackage && atLineStart) {
1727                         if (IsPackageLine(lineCurrent, styler)
1728                                 && !IsPackageLine(lineCurrent + 1, styler))
1729                                 isPackageLine = true;
1730                 }
1731
1732                 //heredoc folding
1733                 switch (style) {
1734                 case SCE_PL_HERE_QQ :
1735                 case SCE_PL_HERE_Q :
1736                 case SCE_PL_HERE_QX :
1737                         switch (stylePrevCh) {
1738                         case SCE_PL_HERE_QQ :
1739                         case SCE_PL_HERE_Q :
1740                         case SCE_PL_HERE_QX :
1741                                 //do nothing;
1742                                 break;
1743                         default :
1744                                 levelCurrent++;
1745                                 break;
1746                         }
1747                         break;
1748                 default:
1749                         switch (stylePrevCh) {
1750                         case SCE_PL_HERE_QQ :
1751                         case SCE_PL_HERE_Q :
1752                         case SCE_PL_HERE_QX :
1753                                 levelCurrent--;
1754                                 break;
1755                         default :
1756                                 //do nothing;
1757                                 break;
1758                         }
1759                         break;
1760                 }
1761
1762                 //explicit folding
1763                 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1764                         if (chNext == '{') {
1765                                 levelCurrent++;
1766                         } else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1767                                 levelCurrent--;
1768                         }
1769                 }
1770
1771                 if (atEOL) {
1772                         int lev = levelPrev;
1773                         // POD headings occupy bits 7-4, leaving some breathing room for
1774                         // non-standard practice -- POD sections stuck in blocks, etc.
1775                         if (podHeading > 0) {
1776                                 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1777                                 lev = levelCurrent - 1;
1778                                 lev |= SC_FOLDLEVELHEADERFLAG;
1779                                 podHeading = 0;
1780                         }
1781                         // Check if line was a package declaration
1782                         // because packages need "special" treatment
1783                         if (isPackageLine) {
1784                                 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1785                                 levelCurrent = SC_FOLDLEVELBASE + 1;
1786                                 isPackageLine = false;
1787                         }
1788                         lev |= levelCurrent << 16;
1789                         if (visibleChars == 0 && options.foldCompact)
1790                                 lev |= SC_FOLDLEVELWHITEFLAG;
1791                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1792                                 lev |= SC_FOLDLEVELHEADERFLAG;
1793                         if (lev != styler.LevelAt(lineCurrent)) {
1794                                 styler.SetLevel(lineCurrent, lev);
1795                         }
1796                         lineCurrent++;
1797                         levelPrev = levelCurrent;
1798                         visibleChars = 0;
1799                 }
1800                 if (!isspacechar(ch))
1801                         visibleChars++;
1802                 chPrev = ch;
1803         }
1804         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1805         int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1806         styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1807 }
1808
1809 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);