scintilla/lexers/LexPerl.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPerl.cxx
   3  ** Lexer for Perl.
   4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
   7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "StyleContext.h"
  27 #include "CharacterSet.h"
  28 #include "LexerModule.h"
  29 #include "OptionSet.h"
  30
  31 #ifdef SCI_NAMESPACE
  32 using namespace Scintilla;
  33 #endif
  34
  35 // Info for HERE document handling from perldata.pod (reformatted):
  36 // ----------------------------------------------------------------
  37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
  38 // Following a << you specify a string to terminate the quoted material, and
  39 // all lines following the current line down to the terminating string are
  40 // the value of the item.
  41 // * The terminating string may be either an identifier (a word), or some
  42 //   quoted text.
  43 // * If quoted, the type of quotes you use determines the treatment of the
  44 //   text, just as in regular quoting.
  45 // * An unquoted identifier works like double quotes.
  46 // * There must be no space between the << and the identifier.
  47 //   (If you put a space it will be treated as a null identifier,
  48 //    which is valid, and matches the first empty line.)
  49 //   (This is deprecated, -w warns of this syntax)
  50 // * The terminating string must appear by itself (unquoted and
  51 //   with no surrounding whitespace) on the terminating line.
  52
  53 #define HERE_DELIM_MAX 256              // maximum length of HERE doc delimiter
  54
  55 #define PERLNUM_BINARY          1       // order is significant: 1-4 cannot have a dot
  56 #define PERLNUM_HEX                     2
  57 #define PERLNUM_OCTAL           3
  58 #define PERLNUM_FLOAT_EXP       4       // exponent part only
  59 #define PERLNUM_DECIMAL         5       // 1-5 are numbers; 6-7 are strings
  60 #define PERLNUM_VECTOR          6
  61 #define PERLNUM_V_VECTOR        7
  62 #define PERLNUM_BAD                     8
  63
  64 #define BACK_NONE               0       // lookback state for bareword disambiguation:
  65 #define BACK_OPERATOR   1       // whitespace/comments are insignificant
  66 #define BACK_KEYWORD    2       // operators/keywords are needed for disambiguation
  67
  68 // all interpolated styles are different from their parent styles by a constant difference
  69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
  70 #define INTERPOLATE_SHIFT       (SCE_PL_STRING_VAR - SCE_PL_STRING)
  71
  72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
  73         // old-style keyword matcher; needed because GetCurrent() needs
  74         // current segment to be committed, but we may abandon early...
  75         char s[100];
  76         unsigned int i, len = end - start;
  77         if (len > 30) { len = 30; }
  78         for (i = 0; i < len; i++, start++) s[i] = styler[start];
  79         s[i] = '\0';
  80         return keywords.InList(s);
  81 }
  82
  83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
  84         int backFlag, unsigned int backPos, unsigned int endPos) {
  85         // identifiers are recognized by Perl as barewords under some
  86         // conditions, the following attempts to do the disambiguation
  87         // by looking backward and forward; result in 2 LSB
  88         int result = 0;
  89         bool moreback = false;          // true if passed newline/comments
  90         bool brace = false;                     // true if opening brace found
  91         // if BACK_NONE, neither operator nor keyword, so skip test
  92         if (backFlag == BACK_NONE)
  93                 return result;
  94         // first look backwards past whitespace/comments to set EOL flag
  95         // (some disambiguation patterns must be on a single line)
  96         if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
  97                 moreback = true;
  98         // look backwards at last significant lexed item for disambiguation
  99         bk = backPos - 1;
 100         int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 101         if (ch == '{' && !moreback) {
 102                 // {bareword: possible variable spec
 103                 brace = true;
 104         } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
 105                 // &bareword: subroutine call
 106                 || styler.Match(bk - 1, "->")
 107                 // ->bareword: part of variable spec
 108                 || styler.Match(bk - 2, "sub")) {
 109                 // sub bareword: subroutine declaration
 110                 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
 111                 result |= 1;
 112         }
 113         // next, scan forward after word past tab/spaces only;
 114         // if ch isn't one of '[{(,' we can skip the test
 115         if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
 116                 && fw < endPos) {
 117                 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
 118                         IsASpaceOrTab(ch) && fw < endPos) {
 119                         fw++;
 120                 }
 121                 if ((ch == '}' && brace)
 122                         // {bareword}: variable spec
 123                         || styler.Match(fw, "=>")) {
 124                         // [{(, bareword=>: hash literal
 125                         result |= 2;
 126                 }
 127         }
 128         return result;
 129 }
 130
 131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
 132         // when backtracking, we need to skip whitespace and comments
 133         int style;
 134         while ((p > 0) && (style = styler.StyleAt(p),
 135                 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
 136                 p--;
 137 }
 138
 139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
 140         // backtrack to find open '{' corresponding to a '}', balanced
 141         // return significant style to be tested for '/' disambiguation
 142         int braceCount = 1;
 143         if (bk == 0)
 144                 return SCE_PL_DEFAULT;
 145         while (--bk > 0) {
 146                 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
 147                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 148                         if (bkch == ';') {      // early out
 149                                 break;
 150                         } else if (bkch == '}') {
 151                                 braceCount++;
 152                         } else if (bkch == '{') {
 153                                 if (--braceCount == 0) break;
 154                         }
 155                 }
 156         }
 157         if (bk > 0 && braceCount == 0) {
 158                 // balanced { found, bk > 0, skip more whitespace/comments
 159                 bk--;
 160                 skipWhitespaceComment(styler, bk);
 161                 return styler.StyleAt(bk);
 162         }
 163         return SCE_PL_DEFAULT;
 164 }
 165
 166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
 167         // backtrack to classify sub-styles of identifier under test
 168         // return sub-style to be tested for '/' disambiguation
 169         if (styler.SafeGetCharAt(bk) == '>')    // inputsymbol, like <foo>
 170                 return 1;
 171         // backtrack to check for possible "->" or "::" before identifier
 172         while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
 173                 bk--;
 174         }
 175         while (bk > 0) {
 176                 int bkstyle = styler.StyleAt(bk);
 177                 if (bkstyle == SCE_PL_DEFAULT
 178                         || bkstyle == SCE_PL_COMMENTLINE) {
 179                         // skip whitespace, comments
 180                 } else if (bkstyle == SCE_PL_OPERATOR) {
 181                         // test for "->" and "::"
 182                         if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
 183                                 return 2;
 184                 } else
 185                         return 3;       // bare identifier
 186                 bk--;
 187         }
 188         return 0;
 189 }
 190
 191 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
 192         // forward scan the current line to classify line for POD style
 193         int state = -1;
 194         while (pos < endPos) {
 195                 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
 196                 if (ch == '\n' || ch == '\r') {
 197                         if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
 198                         break;
 199                 }
 200                 if (IsASpaceOrTab(ch)) {        // whitespace, take note
 201                         if (state == -1)
 202                                 state = SCE_PL_DEFAULT;
 203                 } else if (state == SCE_PL_DEFAULT) {   // verbatim POD line
 204                         state = SCE_PL_POD_VERB;
 205                 } else if (state != SCE_PL_POD_VERB) {  // regular POD line
 206                         state = SCE_PL_POD;
 207                 }
 208                 pos++;
 209         }
 210         if (state == -1)
 211                 state = SCE_PL_DEFAULT;
 212         return state;
 213 }
 214
 215 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
 216         // backtrack to identify if we're starting a subroutine prototype
 217         // we also need to ignore whitespace/comments:
 218         // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
 219         styler.Flush();
 220         skipWhitespaceComment(styler, bk);
 221         if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
 222                 return false;
 223         while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
 224                 bk--;
 225         }
 226         skipWhitespaceComment(styler, bk);
 227         if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
 228                 || !styler.Match(bk - 2, "sub"))        // assume suffix is unique!
 229                 return false;
 230         return true;
 231 }
 232
 233 static int actualNumStyle(int numberStyle) {
 234         if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
 235                 return SCE_PL_STRING;
 236         } else if (numberStyle == PERLNUM_BAD) {
 237                 return SCE_PL_ERROR;
 238         }
 239         return SCE_PL_NUMBER;
 240 }
 241
 242 static int opposite(int ch) {
 243         if (ch == '(') return ')';
 244         if (ch == '[') return ']';
 245         if (ch == '{') return '}';
 246         if (ch == '<') return '>';
 247         return ch;
 248 }
 249
 250 static bool IsCommentLine(int line, LexAccessor &styler) {
 251         int pos = styler.LineStart(line);
 252         int eol_pos = styler.LineStart(line + 1) - 1;
 253         for (int i = pos; i < eol_pos; i++) {
 254                 char ch = styler[i];
 255                 int style = styler.StyleAt(i);
 256                 if (ch == '#' && style == SCE_PL_COMMENTLINE)
 257                         return true;
 258                 else if (!IsASpaceOrTab(ch))
 259                         return false;
 260         }
 261         return false;
 262 }
 263
 264 static bool IsPackageLine(int line, LexAccessor &styler) {
 265         int pos = styler.LineStart(line);
 266         int style = styler.StyleAt(pos);
 267         if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
 268                 return true;
 269         }
 270         return false;
 271 }
 272
 273 static int PodHeadingLevel(int pos, LexAccessor &styler) {
 274         int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
 275         if (lvl >= '1' && lvl <= '4') {
 276                 return lvl - '0';
 277         }
 278         return 0;
 279 }
 280
 281 // An individual named option for use in an OptionSet
 282
 283 // Options used for LexerPerl
 284 struct OptionsPerl {
 285         bool fold;
 286         bool foldComment;
 287         bool foldCompact;
 288         // Custom folding of POD and packages
 289         bool foldPOD;            // fold.perl.pod
 290         // Enable folding Pod blocks when using the Perl lexer.
 291         bool foldPackage;        // fold.perl.package
 292         // Enable folding packages when using the Perl lexer.
 293
 294         bool foldCommentExplicit;
 295
 296         bool foldAtElse;
 297
 298         OptionsPerl() {
 299                 fold = false;
 300                 foldComment = false;
 301                 foldCompact = true;
 302                 foldPOD = true;
 303                 foldPackage = true;
 304                 foldCommentExplicit = true;
 305                 foldAtElse = false;
 306         }
 307 };
 308
 309 static const char *const perlWordListDesc[] = {
 310         "Keywords",
 311         0
 312 };
 313
 314 struct OptionSetPerl : public OptionSet<OptionsPerl> {
 315         OptionSetPerl() {
 316                 DefineProperty("fold", &OptionsPerl::fold);
 317
 318                 DefineProperty("fold.comment", &OptionsPerl::foldComment);
 319
 320                 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
 321
 322                 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
 323                         "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
 324
 325                 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
 326                         "Set to 0 to disable folding packages when using the Perl lexer.");
 327
 328                 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
 329                         "Set to 0 to disable explicit folding.");
 330
 331                 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
 332                                "This option enables Perl folding on a \"} else {\" line of an if statement.");
 333
 334                 DefineWordListSets(perlWordListDesc);
 335         }
 336 };
 337
 338 class LexerPerl : public ILexer {
 339         CharacterSet setWordStart;
 340         CharacterSet setWord;
 341         CharacterSet setSpecialVar;
 342         CharacterSet setControlVar;
 343         WordList keywords;
 344         OptionsPerl options;
 345         OptionSetPerl osPerl;
 346 public:
 347         LexerPerl() :
 348                 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
 349                 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
 350                 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
 351                 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
 352         }
 353         virtual ~LexerPerl() {
 354         }
 355         void SCI_METHOD Release() {
 356                 delete this;
 357         }
 358         int SCI_METHOD Version() const {
 359                 return lvOriginal;
 360         }
 361         const char *SCI_METHOD PropertyNames() {
 362                 return osPerl.PropertyNames();
 363         }
 364         int SCI_METHOD PropertyType(const char *name) {
 365                 return osPerl.PropertyType(name);
 366         }
 367         const char *SCI_METHOD DescribeProperty(const char *name) {
 368                 return osPerl.DescribeProperty(name);
 369         }
 370         int SCI_METHOD PropertySet(const char *key, const char *val);
 371         const char *SCI_METHOD DescribeWordListSets() {
 372                 return osPerl.DescribeWordListSets();
 373         }
 374         int SCI_METHOD WordListSet(int n, const char *wl);
 375         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 376         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 377
 378         void *SCI_METHOD PrivateCall(int, void *) {
 379                 return 0;
 380         }
 381
 382         static ILexer *LexerFactoryPerl() {
 383                 return new LexerPerl();
 384         }
 385         int InputSymbolScan(StyleContext &sc);
 386         void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
 387 };
 388
 389 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
 390         if (osPerl.PropertySet(&options, key, val)) {
 391                 return 0;
 392         }
 393         return -1;
 394 }
 395
 396 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
 397         WordList *wordListN = 0;
 398         switch (n) {
 399         case 0:
 400                 wordListN = &keywords;
 401                 break;
 402         }
 403         int firstModification = -1;
 404         if (wordListN) {
 405                 WordList wlNew;
 406                 wlNew.Set(wl);
 407                 if (*wordListN != wlNew) {
 408                         wordListN->Set(wl);
 409                         firstModification = 0;
 410                 }
 411         }
 412         return firstModification;
 413 }
 414
 415 int LexerPerl::InputSymbolScan(StyleContext &sc) {
 416         // forward scan for matching > on same line; file handles
 417         int c, sLen = 0;
 418         while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
 419                 if (c == '\r' || c == '\n') {
 420                         return 0;
 421                 } else if (c == '>') {
 422                         if (sc.Match("<=>"))    // '<=>' case
 423                                 return 0;
 424                         return sLen;
 425                 }
 426         }
 427         return 0;
 428 }
 429
 430 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
 431         // interpolate a segment (with no active backslashes or delimiters within)
 432         // switch in or out of an interpolation style or continue current style
 433         // commit variable patterns if found, trim segment, repeat until done
 434         while (maxSeg > 0) {
 435                 bool isVar = false;
 436                 int sLen = 0;
 437                 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
 438                         // $#[$]*word [$@][$]*word (where word or {word} is always present)
 439                         bool braces = false;
 440                         sLen = 1;
 441                         if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
 442                                 sLen++;
 443                         }
 444                         while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))       // >0 $ dereference within
 445                                 sLen++;
 446                         if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {        // { start for {word}
 447                                 sLen++;
 448                                 braces = true;
 449                         }
 450                         if (maxSeg > sLen) {
 451                                 int c = sc.GetRelativeCharacter(sLen);
 452                                 if (setWordStart.Contains(c)) { // word (various)
 453                                         sLen++;
 454                                         isVar = true;
 455                                         while (maxSeg > sLen) {
 456                                                 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
 457                                                         break;
 458                                                 sLen++;
 459                                         }
 460                                 } else if (braces && IsADigit(c) && (sLen == 2)) {      // digit for ${digit}
 461                                         sLen++;
 462                                         isVar = true;
 463                                 }
 464                         }
 465                         if (braces) {
 466                                 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {        // } end for {word}
 467                                         sLen++;
 468                                 } else
 469                                         isVar = false;
 470                         }
 471                 }
 472                 if (!isVar && (maxSeg > 1)) {   // $- or @-specific variable patterns
 473                         int c = sc.chNext;
 474                         if (sc.ch == '$') {
 475                                 sLen = 1;
 476                                 if (IsADigit(c)) {      // $[0-9] and slurp trailing digits
 477                                         sLen++;
 478                                         isVar = true;
 479                                         while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
 480                                                 sLen++;
 481                                 } else if (setSpecialVar.Contains(c)) { // $ special variables
 482                                         sLen++;
 483                                         isVar = true;
 484                                 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {    // $ additional
 485                                         sLen++;
 486                                         isVar = true;
 487                                 } else if (c == '^') {  // $^A control-char style
 488                                         sLen++;
 489                                         if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
 490                                                 sLen++;
 491                                                 isVar = true;
 492                                         }
 493                                 }
 494                         } else if (sc.ch == '@') {
 495                                 sLen = 1;
 496                                 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
 497                                         sLen++;
 498                                         isVar = true;
 499                                 }
 500                         }
 501                 }
 502                 if (isVar) {    // commit as interpolated variable or normal character
 503                         if (sc.state < SCE_PL_STRING_VAR)
 504                                 sc.SetState(sc.state + INTERPOLATE_SHIFT);
 505                         sc.Forward(sLen);
 506                         maxSeg -= sLen;
 507                 } else {
 508                         if (sc.state >= SCE_PL_STRING_VAR)
 509                                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 510                         sc.Forward();
 511                         maxSeg--;
 512                 }
 513         }
 514         if (sc.state >= SCE_PL_STRING_VAR)
 515                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 516 }
 517
 518 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 519         LexAccessor styler(pAccess);
 520
 521         // keywords that forces /PATTERN/ at all times; should track vim's behaviour
 522         WordList reWords;
 523         reWords.Set("elsif if split while");
 524
 525         // charset classes
 526         CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
 527         // lexing of "%*</" operators is non-trivial; these are missing in the set below
 528         CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
 529         CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
 530         CharacterSet setModifiers(CharacterSet::setAlpha);
 531         CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
 532         // setArray and setHash also accepts chars for special vars like $_,
 533         // which are then truncated when the next char does not match setVar
 534         CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
 535         CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
 536         CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
 537         CharacterSet &setPOD = setModifiers;
 538         CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
 539         CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
 540         CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");
 541         // for format identifiers
 542         CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
 543         CharacterSet &setFormat = setHereDocDelim;
 544
 545         // Lexer for perl often has to backtrack to start of current style to determine
 546         // which characters are being used as quotes, how deeply nested is the
 547         // start position and what the termination string is for HERE documents.
 548
 549         class HereDocCls {      // Class to manage HERE doc sequence
 550         public:
 551                 int State;
 552                 // 0: '<<' encountered
 553                 // 1: collect the delimiter
 554                 // 2: here doc text (lines after the delimiter)
 555                 int Quote;              // the char after '<<'
 556                 bool Quoted;            // true if Quote in ('\'','"','`')
 557                 int DelimiterLength;    // strlen(Delimiter)
 558                 char *Delimiter;        // the Delimiter, 256: sizeof PL_tokenbuf
 559                 HereDocCls() {
 560                         State = 0;
 561                         Quote = 0;
 562                         Quoted = false;
 563                         DelimiterLength = 0;
 564                         Delimiter = new char[HERE_DELIM_MAX];
 565                         Delimiter[0] = '\0';
 566                 }
 567                 void Append(int ch) {
 568                         Delimiter[DelimiterLength++] = static_cast<char>(ch);
 569                         Delimiter[DelimiterLength] = '\0';
 570                 }
 571                 ~HereDocCls() {
 572                         delete []Delimiter;
 573                 }
 574         };
 575         HereDocCls HereDoc;             // TODO: FIFO for stacked here-docs
 576
 577         class QuoteCls {        // Class to manage quote pairs
 578         public:
 579                 int Rep;
 580                 int Count;
 581                 int Up, Down;
 582                 QuoteCls() {
 583                         New(1);
 584                 }
 585                 void New(int r = 1) {
 586                         Rep   = r;
 587                         Count = 0;
 588                         Up    = '\0';
 589                         Down  = '\0';
 590                 }
 591                 void Open(int u) {
 592                         Count++;
 593                         Up    = u;
 594                         Down  = opposite(Up);
 595                 }
 596         };
 597         QuoteCls Quote;
 598
 599         // additional state for number lexing
 600         int numState = PERLNUM_DECIMAL;
 601         int dotCount = 0;
 602
 603         unsigned int endPos = startPos + length;
 604
 605         // Backtrack to beginning of style if required...
 606         // If in a long distance lexical state, backtrack to find quote characters.
 607         // Includes strings (may be multi-line), numbers (additional state), format
 608         // bodies, as well as POD sections.
 609         if (initStyle == SCE_PL_HERE_Q
 610             || initStyle == SCE_PL_HERE_QQ
 611             || initStyle == SCE_PL_HERE_QX
 612             || initStyle == SCE_PL_FORMAT
 613             || initStyle == SCE_PL_HERE_QQ_VAR
 614             || initStyle == SCE_PL_HERE_QX_VAR
 615            ) {
 616                 // backtrack through multiple styles to reach the delimiter start
 617                 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
 618                 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
 619                         startPos--;
 620                 }
 621                 startPos = styler.LineStart(styler.GetLine(startPos));
 622                 initStyle = styler.StyleAt(startPos - 1);
 623         }
 624         if (initStyle == SCE_PL_STRING
 625             || initStyle == SCE_PL_STRING_QQ
 626             || initStyle == SCE_PL_BACKTICKS
 627             || initStyle == SCE_PL_STRING_QX
 628             || initStyle == SCE_PL_REGEX
 629             || initStyle == SCE_PL_STRING_QR
 630             || initStyle == SCE_PL_REGSUBST
 631             || initStyle == SCE_PL_STRING_VAR
 632             || initStyle == SCE_PL_STRING_QQ_VAR
 633             || initStyle == SCE_PL_BACKTICKS_VAR
 634             || initStyle == SCE_PL_STRING_QX_VAR
 635             || initStyle == SCE_PL_REGEX_VAR
 636             || initStyle == SCE_PL_STRING_QR_VAR
 637             || initStyle == SCE_PL_REGSUBST_VAR
 638            ) {
 639                 // for interpolation, must backtrack through a mix of two different styles
 640                 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
 641                         initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
 642                 while (startPos > 1) {
 643                         int st = styler.StyleAt(startPos - 1);
 644                         if ((st != initStyle) && (st != otherStyle))
 645                                 break;
 646                         startPos--;
 647                 }
 648                 initStyle = SCE_PL_DEFAULT;
 649         } else if (initStyle == SCE_PL_STRING_Q
 650                 || initStyle == SCE_PL_STRING_QW
 651                 || initStyle == SCE_PL_XLAT
 652                 || initStyle == SCE_PL_CHARACTER
 653                 || initStyle == SCE_PL_NUMBER
 654                 || initStyle == SCE_PL_IDENTIFIER
 655                 || initStyle == SCE_PL_ERROR
 656                 || initStyle == SCE_PL_SUB_PROTOTYPE
 657            ) {
 658                 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
 659                         startPos--;
 660                 }
 661                 initStyle = SCE_PL_DEFAULT;
 662         } else if (initStyle == SCE_PL_POD
 663                 || initStyle == SCE_PL_POD_VERB
 664                   ) {
 665                 // POD backtracking finds preceding blank lines and goes back past them
 666                 int ln = styler.GetLine(startPos);
 667                 if (ln > 0) {
 668                         initStyle = styler.StyleAt(styler.LineStart(--ln));
 669                         if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
 670                                 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
 671                                         ln--;
 672                         }
 673                         startPos = styler.LineStart(++ln);
 674                         initStyle = styler.StyleAt(startPos - 1);
 675                 } else {
 676                         startPos = 0;
 677                         initStyle = SCE_PL_DEFAULT;
 678                 }
 679         }
 680
 681         // backFlag, backPos are additional state to aid identifier corner cases.
 682         // Look backwards past whitespace and comments in order to detect either
 683         // operator or keyword. Later updated as we go along.
 684         int backFlag = BACK_NONE;
 685         unsigned int backPos = startPos;
 686         if (backPos > 0) {
 687                 backPos--;
 688                 skipWhitespaceComment(styler, backPos);
 689                 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
 690                         backFlag = BACK_OPERATOR;
 691                 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
 692                         backFlag = BACK_KEYWORD;
 693                 backPos++;
 694         }
 695
 696         StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
 697
 698         for (; sc.More(); sc.Forward()) {
 699
 700                 // Determine if the current state should terminate.
 701                 switch (sc.state) {
 702                 case SCE_PL_OPERATOR:
 703                         sc.SetState(SCE_PL_DEFAULT);
 704                         backFlag = BACK_OPERATOR;
 705                         backPos = sc.currentPos;
 706                         break;
 707                 case SCE_PL_IDENTIFIER:         // identifier, bareword, inputsymbol
 708                         if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
 709                                 || sc.Match('.', '.')
 710                                 || sc.chPrev == '>') {  // end of inputsymbol
 711                                 sc.SetState(SCE_PL_DEFAULT);
 712                         }
 713                         break;
 714                 case SCE_PL_WORD:               // keyword, plus special cases
 715                         if (!setWord.Contains(sc.ch)) {
 716                                 char s[100];
 717                                 sc.GetCurrent(s, sizeof(s));
 718                                 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
 719                                         sc.ChangeState(SCE_PL_DATASECTION);
 720                                 } else {
 721                                         if ((strcmp(s, "format") == 0)) {
 722                                                 sc.SetState(SCE_PL_FORMAT_IDENT);
 723                                                 HereDoc.State = 0;
 724                                         } else {
 725                                                 sc.SetState(SCE_PL_DEFAULT);
 726                                         }
 727                                         backFlag = BACK_KEYWORD;
 728                                         backPos = sc.currentPos;
 729                                 }
 730                         }
 731                         break;
 732                 case SCE_PL_SCALAR:
 733                 case SCE_PL_ARRAY:
 734                 case SCE_PL_HASH:
 735                 case SCE_PL_SYMBOLTABLE:
 736                         if (sc.Match(':', ':')) {       // skip ::
 737                                 sc.Forward();
 738                         } else if (!setVar.Contains(sc.ch)) {
 739                                 if (sc.LengthCurrent() == 1) {
 740                                         // Special variable: $(, $_ etc.
 741                                         sc.Forward();
 742                                 }
 743                                 sc.SetState(SCE_PL_DEFAULT);
 744                         }
 745                         break;
 746                 case SCE_PL_NUMBER:
 747                         // if no early break, number style is terminated at "(go through)"
 748                         if (sc.ch == '.') {
 749                                 if (sc.chNext == '.') {
 750                                         // double dot is always an operator (go through)
 751                                 } else if (numState <= PERLNUM_FLOAT_EXP) {
 752                                         // non-decimal number or float exponent, consume next dot
 753                                         sc.SetState(SCE_PL_OPERATOR);
 754                                         break;
 755                                 } else {        // decimal or vectors allows dots
 756                                         dotCount++;
 757                                         if (numState == PERLNUM_DECIMAL) {
 758                                                 if (dotCount <= 1)      // number with one dot in it
 759                                                         break;
 760                                                 if (IsADigit(sc.chNext)) {      // really a vector
 761                                                         numState = PERLNUM_VECTOR;
 762                                                         break;
 763                                                 }
 764                                                 // number then dot (go through)
 765                                         } else if (IsADigit(sc.chNext)) // vectors
 766                                                 break;
 767                                         // vector then dot (go through)
 768                                 }
 769                         } else if (sc.ch == '_') {
 770                                 // permissive underscoring for number and vector literals
 771                                 break;
 772                         } else if (numState == PERLNUM_DECIMAL) {
 773                                 if (sc.ch == 'E' || sc.ch == 'e') {     // exponent, sign
 774                                         numState = PERLNUM_FLOAT_EXP;
 775                                         if (sc.chNext == '+' || sc.chNext == '-') {
 776                                                 sc.Forward();
 777                                         }
 778                                         break;
 779                                 } else if (IsADigit(sc.ch))
 780                                         break;
 781                                 // number then word (go through)
 782                         } else if (numState == PERLNUM_HEX) {
 783                                 if (IsADigit(sc.ch, 16))
 784                                         break;
 785                         } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
 786                                 if (IsADigit(sc.ch))    // vector
 787                                         break;
 788                                 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
 789                                         sc.ChangeState(SCE_PL_IDENTIFIER);
 790                                         break;
 791                                 }
 792                                 // vector then word (go through)
 793                         } else if (IsADigit(sc.ch)) {
 794                                 if (numState == PERLNUM_FLOAT_EXP) {
 795                                         break;
 796                                 } else if (numState == PERLNUM_OCTAL) {
 797                                         if (sc.ch <= '7') break;
 798                                 } else if (numState == PERLNUM_BINARY) {
 799                                         if (sc.ch <= '1') break;
 800                                 }
 801                                 // mark invalid octal, binary numbers (go through)
 802                                 numState = PERLNUM_BAD;
 803                                 break;
 804                         }
 805                         // complete current number or vector
 806                         sc.ChangeState(actualNumStyle(numState));
 807                         sc.SetState(SCE_PL_DEFAULT);
 808                         break;
 809                 case SCE_PL_COMMENTLINE:
 810                         if (sc.atLineEnd) {
 811                                 sc.SetState(SCE_PL_DEFAULT);
 812                         }
 813                         break;
 814                 case SCE_PL_HERE_DELIM:
 815                         if (HereDoc.State == 0) { // '<<' encountered
 816                                 int delim_ch = sc.chNext;
 817                                 int ws_skip = 0;
 818                                 HereDoc.State = 1;      // pre-init HERE doc class
 819                                 HereDoc.Quote = sc.chNext;
 820                                 HereDoc.Quoted = false;
 821                                 HereDoc.DelimiterLength = 0;
 822                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 823                                 if (IsASpaceOrTab(delim_ch)) {
 824                                         // skip whitespace; legal only for quoted delimiters
 825                                         unsigned int i = sc.currentPos + 1;
 826                                         while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
 827                                                 i++;
 828                                                 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 829                                         }
 830                                         ws_skip = i - sc.currentPos - 1;
 831                                 }
 832                                 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
 833                                         // a quoted here-doc delimiter; skip any whitespace
 834                                         sc.Forward(ws_skip + 1);
 835                                         HereDoc.Quote = delim_ch;
 836                                         HereDoc.Quoted = true;
 837                                 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
 838                                         || ws_skip > 0) {
 839                                         // left shift << or <<= operator cases
 840                                         // restore position if operator
 841                                         sc.ChangeState(SCE_PL_OPERATOR);
 842                                         sc.ForwardSetState(SCE_PL_DEFAULT);
 843                                         backFlag = BACK_OPERATOR;
 844                                         backPos = sc.currentPos;
 845                                         HereDoc.State = 0;
 846                                 } else {
 847                                         // specially handle initial '\' for identifier
 848                                         if (ws_skip == 0 && HereDoc.Quote == '\\')
 849                                                 sc.Forward();
 850                                         // an unquoted here-doc delimiter, no special handling
 851                                         // (cannot be prefixed by spaces/tabs), or
 852                                         // symbols terminates; deprecated zero-length delimiter
 853                                 }
 854                         } else if (HereDoc.State == 1) { // collect the delimiter
 855                                 backFlag = BACK_NONE;
 856                                 if (HereDoc.Quoted) { // a quoted here-doc delimiter
 857                                         if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
 858                                                 sc.ForwardSetState(SCE_PL_DEFAULT);
 859                                         } else if (!sc.atLineEnd) {
 860                                                 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
 861                                                         sc.Forward();
 862                                                 }
 863                                                 if (sc.ch != '\r') {    // skip CR if CRLF
 864                                                         int i = 0;                      // else append char, possibly an extended char
 865                                                         while (i < sc.width) {
 866                                                                 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
 867                                                                 i++;
 868                                                         }
 869                                                 }
 870                                         }
 871                                 } else { // an unquoted here-doc delimiter, no extended charsets
 872                                         if (setHereDocDelim.Contains(sc.ch)) {
 873                                                 HereDoc.Append(sc.ch);
 874                                         } else {
 875                                                 sc.SetState(SCE_PL_DEFAULT);
 876                                         }
 877                                 }
 878                                 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
 879                                         sc.SetState(SCE_PL_ERROR);
 880                                         HereDoc.State = 0;
 881                                 }
 882                         }
 883                         break;
 884                 case SCE_PL_HERE_Q:
 885                 case SCE_PL_HERE_QQ:
 886                 case SCE_PL_HERE_QX:
 887                         // also implies HereDoc.State == 2
 888                         sc.Complete();
 889                         if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
 890                                 int c = sc.GetRelative(HereDoc.DelimiterLength);
 891                                 if (c == '\r' || c == '\n') {   // peek first, do not consume match
 892                                         sc.ForwardBytes(HereDoc.DelimiterLength);
 893                                         sc.SetState(SCE_PL_DEFAULT);
 894                                         backFlag = BACK_NONE;
 895                                         HereDoc.State = 0;
 896                                         if (!sc.atLineEnd)
 897                                                 sc.Forward();
 898                                         break;
 899                                 }
 900                         }
 901                         if (sc.state == SCE_PL_HERE_Q) {        // \EOF and 'EOF' non-interpolated
 902                                 while (!sc.atLineEnd)
 903                                         sc.Forward();
 904                                 break;
 905                         }
 906                         while (!sc.atLineEnd) {         // "EOF" and `EOF` interpolated
 907                                 int c, sLen = 0, endType = 0;
 908                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
 909                                         // scan to break string into segments
 910                                         if (c == '\\') {
 911                                                 endType = 1; break;
 912                                         } else if (c == '\r' || c == '\n') {
 913                                                 endType = 2; break;
 914                                         }
 915                                         sLen++;
 916                                 }
 917                                 if (sLen > 0)   // process non-empty segments
 918                                         InterpolateSegment(sc, sLen);
 919                                 if (endType == 1) {
 920                                         sc.Forward();
 921                                         // \ at end-of-line does not appear to have any effect, skip
 922                                         if (sc.ch != '\r' && sc.ch != '\n')
 923                                                 sc.Forward();
 924                                 } else if (endType == 2) {
 925                                         if (!sc.atLineEnd)
 926                                                 sc.Forward();
 927                                 }
 928                         }
 929                         break;
 930                 case SCE_PL_POD:
 931                 case SCE_PL_POD_VERB: {
 932                                 unsigned int fw = sc.currentPos;
 933                                 int ln = styler.GetLine(fw);
 934                                 if (sc.atLineStart && sc.Match("=cut")) {       // end of POD
 935                                         sc.SetState(SCE_PL_POD);
 936                                         sc.Forward(4);
 937                                         sc.SetState(SCE_PL_DEFAULT);
 938                                         styler.SetLineState(ln, SCE_PL_POD);
 939                                         break;
 940                                 }
 941                                 int pod = podLineScan(styler, fw, endPos);      // classify POD line
 942                                 styler.SetLineState(ln, pod);
 943                                 if (pod == SCE_PL_DEFAULT) {
 944                                         if (sc.state == SCE_PL_POD_VERB) {
 945                                                 unsigned int fw2 = fw;
 946                                                 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
 947                                                         fw = fw2++;     // penultimate line (last blank line)
 948                                                         pod = podLineScan(styler, fw2, endPos);
 949                                                         styler.SetLineState(styler.GetLine(fw2), pod);
 950                                                 }
 951                                                 if (pod == SCE_PL_POD) {        // truncate verbatim POD early
 952                                                         sc.SetState(SCE_PL_POD);
 953                                                 } else
 954                                                         fw = fw2;
 955                                         }
 956                                 } else {
 957                                         if (pod == SCE_PL_POD_VERB      // still part of current paragraph
 958                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
 959                                                 pod = SCE_PL_POD;
 960                                                 styler.SetLineState(ln, pod);
 961                                         } else if (pod == SCE_PL_POD
 962                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
 963                                                 pod = SCE_PL_POD_VERB;
 964                                                 styler.SetLineState(ln, pod);
 965                                         }
 966                                         sc.SetState(pod);
 967                                 }
 968                                 sc.ForwardBytes(fw - sc.currentPos);    // commit style
 969                         }
 970                         break;
 971                 case SCE_PL_REGEX:
 972                 case SCE_PL_STRING_QR:
 973                         if (Quote.Rep <= 0) {
 974                                 if (!setModifiers.Contains(sc.ch))
 975                                         sc.SetState(SCE_PL_DEFAULT);
 976                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
 977                                 Quote.Open(sc.ch);
 978                         } else {
 979                                 int c, sLen = 0, endType = 0;
 980                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
 981                                         // scan to break string into segments
 982                                         if (IsASpace(c)) {
 983                                                 break;
 984                                         } else if (c == '\\' && Quote.Up != '\\') {
 985                                                 endType = 1; break;
 986                                         } else if (c == Quote.Down) {
 987                                                 Quote.Count--;
 988                                                 if (Quote.Count == 0) {
 989                                                         Quote.Rep--;
 990                                                         break;
 991                                                 }
 992                                         } else if (c == Quote.Up)
 993                                                 Quote.Count++;
 994                                         sLen++;
 995                                 }
 996                                 if (sLen > 0) { // process non-empty segments
 997                                         if (Quote.Up != '\'') {
 998                                                 InterpolateSegment(sc, sLen, true);
 999                                         } else          // non-interpolated path
1000                                                 sc.Forward(sLen);
1001                                 }
1002                                 if (endType == 1)
1003                                         sc.Forward();
1004                         }
1005                         break;
1006                 case SCE_PL_REGSUBST:
1007                 case SCE_PL_XLAT:
1008                         if (Quote.Rep <= 0) {
1009                                 if (!setModifiers.Contains(sc.ch))
1010                                         sc.SetState(SCE_PL_DEFAULT);
1011                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1012                                 Quote.Open(sc.ch);
1013                         } else {
1014                                 int c, sLen = 0, endType = 0;
1015                                 bool isPattern = (Quote.Rep == 2);
1016                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1017                                         // scan to break string into segments
1018                                         if (c == '\\' && Quote.Up != '\\') {
1019                                                 endType = 2; break;
1020                                         } else if (Quote.Count == 0 && Quote.Rep == 1) {
1021                                                 // We matched something like s(...) or tr{...}, Perl 5.10
1022                                                 // appears to allow almost any character for use as the
1023                                                 // next delimiters. Whitespace and comments are accepted in
1024                                                 // between, but we'll limit to whitespace here.
1025                                                 // For '#', if no whitespace in between, it's a delimiter.
1026                                                 if (IsASpace(c)) {
1027                                                         // Keep going
1028                                                 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1029                                                         endType = 3;
1030                                                 } else
1031                                                         Quote.Open(c);
1032                                                 break;
1033                                         } else if (c == Quote.Down) {
1034                                                 Quote.Count--;
1035                                                 if (Quote.Count == 0) {
1036                                                         Quote.Rep--;
1037                                                         endType = 1;
1038                                                 }
1039                                                 if (Quote.Up == Quote.Down)
1040                                                         Quote.Count++;
1041                                                 if (endType == 1)
1042                                                         break;
1043                                         } else if (c == Quote.Up) {
1044                                                 Quote.Count++;
1045                                         } else if (IsASpace(c))
1046                                                 break;
1047                                         sLen++;
1048                                 }
1049                                 if (sLen > 0) { // process non-empty segments
1050                                         if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1051                                                 InterpolateSegment(sc, sLen, isPattern);
1052                                         } else          // non-interpolated path
1053                                                 sc.Forward(sLen);
1054                                 }
1055                                 if (endType == 2) {
1056                                         sc.Forward();
1057                                 } else if (endType == 3)
1058                                         sc.SetState(SCE_PL_DEFAULT);
1059                         }
1060                         break;
1061                 case SCE_PL_STRING_Q:
1062                 case SCE_PL_STRING_QQ:
1063                 case SCE_PL_STRING_QX:
1064                 case SCE_PL_STRING_QW:
1065                 case SCE_PL_STRING:
1066                 case SCE_PL_CHARACTER:
1067                 case SCE_PL_BACKTICKS:
1068                         if (!Quote.Down && !IsASpace(sc.ch)) {
1069                                 Quote.Open(sc.ch);
1070                         } else {
1071                                 int c, sLen = 0, endType = 0;
1072                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1073                                         // scan to break string into segments
1074                                         if (IsASpace(c)) {
1075                                                 break;
1076                                         } else if (c == '\\' && Quote.Up != '\\') {
1077                                                 endType = 2; break;
1078                                         } else if (c == Quote.Down) {
1079                                                 Quote.Count--;
1080                                                 if (Quote.Count == 0) {
1081                                                         endType = 3; break;
1082                                                 }
1083                                         } else if (c == Quote.Up)
1084                                                 Quote.Count++;
1085                                         sLen++;
1086                                 }
1087                                 if (sLen > 0) { // process non-empty segments
1088                                         switch (sc.state) {
1089                                         case SCE_PL_STRING:
1090                                         case SCE_PL_STRING_QQ:
1091                                         case SCE_PL_BACKTICKS:
1092                                                 InterpolateSegment(sc, sLen);
1093                                                 break;
1094                                         case SCE_PL_STRING_QX:
1095                                                 if (Quote.Up != '\'') {
1096                                                         InterpolateSegment(sc, sLen);
1097                                                         break;
1098                                                 }
1099                                                 // (continued for ' delim)
1100                                         default:        // non-interpolated path
1101                                                 sc.Forward(sLen);
1102                                         }
1103                                 }
1104                                 if (endType == 2) {
1105                                         sc.Forward();
1106                                 } else if (endType == 3)
1107                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1108                         }
1109                         break;
1110                 case SCE_PL_SUB_PROTOTYPE: {
1111                                 int i = 0;
1112                                 // forward scan; must all be valid proto characters
1113                                 while (setSubPrototype.Contains(sc.GetRelative(i)))
1114                                         i++;
1115                                 if (sc.GetRelative(i) == ')') { // valid sub prototype
1116                                         sc.ForwardBytes(i);
1117                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1118                                 } else {
1119                                         // abandon prototype, restart from '('
1120                                         sc.ChangeState(SCE_PL_OPERATOR);
1121                                         sc.SetState(SCE_PL_DEFAULT);
1122                                 }
1123                         }
1124                         break;
1125                 case SCE_PL_FORMAT: {
1126                                 sc.Complete();
1127                                 if (sc.Match('.')) {
1128                                         sc.Forward();
1129                                         if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1130                                                 sc.SetState(SCE_PL_DEFAULT);
1131                                 }
1132                                 while (!sc.atLineEnd)
1133                                         sc.Forward();
1134                         }
1135                         break;
1136                 case SCE_PL_ERROR:
1137                         break;
1138                 }
1139                 // Needed for specific continuation styles (one follows the other)
1140                 switch (sc.state) {
1141                         // continued from SCE_PL_WORD
1142                 case SCE_PL_FORMAT_IDENT:
1143                         // occupies HereDoc state 3 to avoid clashing with HERE docs
1144                         if (IsASpaceOrTab(sc.ch)) {             // skip whitespace
1145                                 sc.ChangeState(SCE_PL_DEFAULT);
1146                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1147                                         sc.Forward();
1148                                 sc.SetState(SCE_PL_FORMAT_IDENT);
1149                         }
1150                         if (setFormatStart.Contains(sc.ch)) {   // identifier or '='
1151                                 if (sc.ch != '=') {
1152                                         do {
1153                                                 sc.Forward();
1154                                         } while (setFormat.Contains(sc.ch));
1155                                 }
1156                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1157                                         sc.Forward();
1158                                 if (sc.ch == '=') {
1159                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1160                                         HereDoc.State = 3;
1161                                 } else {
1162                                         // invalid identifier; inexact fallback, but hey
1163                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1164                                         sc.SetState(SCE_PL_DEFAULT);
1165                                 }
1166                         } else {
1167                                 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1168                         }
1169                         backFlag = BACK_NONE;
1170                         break;
1171                 }
1172
1173                 // Must check end of HereDoc states here before default state is handled
1174                 if (HereDoc.State == 1 && sc.atLineEnd) {
1175                         // Begin of here-doc (the line after the here-doc delimiter):
1176                         // Lexically, the here-doc starts from the next line after the >>, but the
1177                         // first line of here-doc seem to follow the style of the last EOL sequence
1178                         int st_new = SCE_PL_HERE_QQ;
1179                         HereDoc.State = 2;
1180                         if (HereDoc.Quoted) {
1181                                 if (sc.state == SCE_PL_HERE_DELIM) {
1182                                         // Missing quote at end of string! We are stricter than perl.
1183                                         // Colour here-doc anyway while marking this bit as an error.
1184                                         sc.ChangeState(SCE_PL_ERROR);
1185                                 }
1186                                 switch (HereDoc.Quote) {
1187                                 case '\'':
1188                                         st_new = SCE_PL_HERE_Q;
1189                                         break;
1190                                 case '"' :
1191                                         st_new = SCE_PL_HERE_QQ;
1192                                         break;
1193                                 case '`' :
1194                                         st_new = SCE_PL_HERE_QX;
1195                                         break;
1196                                 }
1197                         } else {
1198                                 if (HereDoc.Quote == '\\')
1199                                         st_new = SCE_PL_HERE_Q;
1200                         }
1201                         sc.SetState(st_new);
1202                 }
1203                 if (HereDoc.State == 3 && sc.atLineEnd) {
1204                         // Start of format body.
1205                         HereDoc.State = 0;
1206                         sc.SetState(SCE_PL_FORMAT);
1207                 }
1208
1209                 // Determine if a new state should be entered.
1210                 if (sc.state == SCE_PL_DEFAULT) {
1211                         if (IsADigit(sc.ch) ||
1212                                 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1213                                 sc.SetState(SCE_PL_NUMBER);
1214                                 backFlag = BACK_NONE;
1215                                 numState = PERLNUM_DECIMAL;
1216                                 dotCount = 0;
1217                                 if (sc.ch == '0') {             // hex,bin,octal
1218                                         if (sc.chNext == 'x' || sc.chNext == 'X') {
1219                                                 numState = PERLNUM_HEX;
1220                                         } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1221                                                 numState = PERLNUM_BINARY;
1222                                         } else if (IsADigit(sc.chNext)) {
1223                                                 numState = PERLNUM_OCTAL;
1224                                         }
1225                                         if (numState != PERLNUM_DECIMAL) {
1226                                                 sc.Forward();
1227                                         }
1228                                 } else if (sc.ch == 'v') {              // vector
1229                                         numState = PERLNUM_V_VECTOR;
1230                                 }
1231                         } else if (setWord.Contains(sc.ch)) {
1232                                 // if immediately prefixed by '::', always a bareword
1233                                 sc.SetState(SCE_PL_WORD);
1234                                 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1235                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1236                                 }
1237                                 unsigned int bk = sc.currentPos;
1238                                 unsigned int fw = sc.currentPos + 1;
1239                                 // first check for possible quote-like delimiter
1240                                 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1241                                         sc.ChangeState(SCE_PL_REGSUBST);
1242                                         Quote.New(2);
1243                                 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1244                                         sc.ChangeState(SCE_PL_REGEX);
1245                                         Quote.New();
1246                                 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1247                                         sc.ChangeState(SCE_PL_STRING_Q);
1248                                         Quote.New();
1249                                 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1250                                         sc.ChangeState(SCE_PL_XLAT);
1251                                         Quote.New(2);
1252                                 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1253                                         sc.ChangeState(SCE_PL_XLAT);
1254                                         Quote.New(2);
1255                                         sc.Forward();
1256                                         fw++;
1257                                 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1258                                         && !setWord.Contains(sc.GetRelative(2))) {
1259                                         if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1260                                         else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1261                                         else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1262                                         else sc.ChangeState(SCE_PL_STRING_QW);  // sc.chNext == 'w'
1263                                         Quote.New();
1264                                         sc.Forward();
1265                                         fw++;
1266                                 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1267                                         !setWord.Contains(sc.chNext) ||
1268                                         (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1269                                         sc.ChangeState(SCE_PL_OPERATOR);
1270                                 }
1271                                 // if potentially a keyword, scan forward and grab word, then check
1272                                 // if it's really one; if yes, disambiguation test is performed
1273                                 // otherwise it is always a bareword and we skip a lot of scanning
1274                                 if (sc.state == SCE_PL_WORD) {
1275                                         while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1276                                                 fw++;
1277                                         if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1278                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1279                                         }
1280                                 }
1281                                 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1282                                 // for quote-like delimiters/keywords, attempt to disambiguate
1283                                 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1284                                 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1285                                         if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1286                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1287                                 }
1288                                 backFlag = BACK_NONE;
1289                         } else if (sc.ch == '#') {
1290                                 sc.SetState(SCE_PL_COMMENTLINE);
1291                         } else if (sc.ch == '\"') {
1292                                 sc.SetState(SCE_PL_STRING);
1293                                 Quote.New();
1294                                 Quote.Open(sc.ch);
1295                                 backFlag = BACK_NONE;
1296                         } else if (sc.ch == '\'') {
1297                                 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1298                                         // Archaic call
1299                                         sc.SetState(SCE_PL_IDENTIFIER);
1300                                 } else {
1301                                         sc.SetState(SCE_PL_CHARACTER);
1302                                         Quote.New();
1303                                         Quote.Open(sc.ch);
1304                                 }
1305                                 backFlag = BACK_NONE;
1306                         } else if (sc.ch == '`') {
1307                                 sc.SetState(SCE_PL_BACKTICKS);
1308                                 Quote.New();
1309                                 Quote.Open(sc.ch);
1310                                 backFlag = BACK_NONE;
1311                         } else if (sc.ch == '$') {
1312                                 sc.SetState(SCE_PL_SCALAR);
1313                                 if (sc.chNext == '{') {
1314                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1315                                 } else if (IsASpace(sc.chNext)) {
1316                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1317                                 } else {
1318                                         sc.Forward();
1319                                         if (sc.Match('`', '`') || sc.Match(':', ':')) {
1320                                                 sc.Forward();
1321                                         }
1322                                 }
1323                                 backFlag = BACK_NONE;
1324                         } else if (sc.ch == '@') {
1325                                 sc.SetState(SCE_PL_ARRAY);
1326                                 if (setArray.Contains(sc.chNext)) {
1327                                         // no special treatment
1328                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1329                                         sc.ForwardBytes(2);
1330                                 } else if (sc.chNext == '{' || sc.chNext == '[') {
1331                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1332                                 } else {
1333                                         sc.ChangeState(SCE_PL_OPERATOR);
1334                                 }
1335                                 backFlag = BACK_NONE;
1336                         } else if (setPreferRE.Contains(sc.ch)) {
1337                                 // Explicit backward peeking to set a consistent preferRE for
1338                                 // any slash found, so no longer need to track preferRE state.
1339                                 // Find first previous significant lexed element and interpret.
1340                                 // A few symbols shares this code for disambiguation.
1341                                 bool preferRE = false;
1342                                 bool isHereDoc = sc.Match('<', '<');
1343                                 bool hereDocSpace = false;              // for: SCALAR [whitespace] '<<'
1344                                 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1345                                 sc.Complete();
1346                                 styler.Flush();
1347                                 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1348                                         hereDocSpace = true;
1349                                 skipWhitespaceComment(styler, bk);
1350                                 if (bk == 0) {
1351                                         // avoid backward scanning breakage
1352                                         preferRE = true;
1353                                 } else {
1354                                         int bkstyle = styler.StyleAt(bk);
1355                                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1356                                         switch (bkstyle) {
1357                                         case SCE_PL_OPERATOR:
1358                                                 preferRE = true;
1359                                                 if (bkch == ')' || bkch == ']') {
1360                                                         preferRE = false;
1361                                                 } else if (bkch == '}') {
1362                                                         // backtrack by counting balanced brace pairs
1363                                                         // needed to test for variables like ${}, @{} etc.
1364                                                         bkstyle = styleBeforeBracePair(styler, bk);
1365                                                         if (bkstyle == SCE_PL_SCALAR
1366                                                                 || bkstyle == SCE_PL_ARRAY
1367                                                                 || bkstyle == SCE_PL_HASH
1368                                                                 || bkstyle == SCE_PL_SYMBOLTABLE
1369                                                                 || bkstyle == SCE_PL_OPERATOR) {
1370                                                                 preferRE = false;
1371                                                         }
1372                                                 } else if (bkch == '+' || bkch == '-') {
1373                                                         if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1374                                                                 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1375                                                                 // exceptions for operators: unary suffixes ++, --
1376                                                                 preferRE = false;
1377                                                 }
1378                                                 break;
1379                                         case SCE_PL_IDENTIFIER:
1380                                                 preferRE = true;
1381                                                 bkstyle = styleCheckIdentifier(styler, bk);
1382                                                 if ((bkstyle == 1) || (bkstyle == 2)) {
1383                                                         // inputsymbol or var with "->" or "::" before identifier
1384                                                         preferRE = false;
1385                                                 } else if (bkstyle == 3) {
1386                                                         // bare identifier, test cases follows:
1387                                                         if (sc.ch == '/') {
1388                                                                 // if '/', /PATTERN/ unless digit/space immediately after '/'
1389                                                                 // if '//', always expect defined-or operator to follow identifier
1390                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1391                                                                         preferRE = false;
1392                                                         } else if (sc.ch == '*' || sc.ch == '%') {
1393                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1394                                                                         preferRE = false;
1395                                                         } else if (sc.ch == '<') {
1396                                                                 if (IsASpace(sc.chNext) || sc.chNext == '=')
1397                                                                         preferRE = false;
1398                                                         }
1399                                                 }
1400                                                 break;
1401                                         case SCE_PL_SCALAR:             // for $var<< case:
1402                                                 if (isHereDoc && hereDocSpace)  // if SCALAR whitespace '<<', *always* a HERE doc
1403                                                         preferRE = true;
1404                                                 break;
1405                                         case SCE_PL_WORD:
1406                                                 preferRE = true;
1407                                                 // for HERE docs, always true
1408                                                 if (sc.ch == '/') {
1409                                                         // adopt heuristics similar to vim-style rules:
1410                                                         // keywords always forced as /PATTERN/: split, if, elsif, while
1411                                                         // everything else /PATTERN/ unless digit/space immediately after '/'
1412                                                         // for '//', defined-or favoured unless special keywords
1413                                                         unsigned int bkend = bk + 1;
1414                                                         while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1415                                                                 bk--;
1416                                                         }
1417                                                         if (isPerlKeyword(bk, bkend, reWords, styler))
1418                                                                 break;
1419                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1420                                                                 preferRE = false;
1421                                                 } else if (sc.ch == '*' || sc.ch == '%') {
1422                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1423                                                                 preferRE = false;
1424                                                 } else if (sc.ch == '<') {
1425                                                         if (IsASpace(sc.chNext) || sc.chNext == '=')
1426                                                                 preferRE = false;
1427                                                 }
1428                                                 break;
1429
1430                                                 // other styles uses the default, preferRE=false
1431                                         case SCE_PL_POD:
1432                                         case SCE_PL_HERE_Q:
1433                                         case SCE_PL_HERE_QQ:
1434                                         case SCE_PL_HERE_QX:
1435                                                 preferRE = true;
1436                                                 break;
1437                                         }
1438                                 }
1439                                 backFlag = BACK_NONE;
1440                                 if (isHereDoc) {        // handle '<<', HERE doc
1441                                         if (preferRE) {
1442                                                 sc.SetState(SCE_PL_HERE_DELIM);
1443                                                 HereDoc.State = 0;
1444                                         } else {                // << operator
1445                                                 sc.SetState(SCE_PL_OPERATOR);
1446                                                 sc.Forward();
1447                                         }
1448                                 } else if (sc.ch == '*') {      // handle '*', typeglob
1449                                         if (preferRE) {
1450                                                 sc.SetState(SCE_PL_SYMBOLTABLE);
1451                                                 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1452                                                         sc.ForwardBytes(2);
1453                                                 } else if (sc.chNext == '{') {
1454                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1455                                                 } else {
1456                                                         sc.Forward();
1457                                                 }
1458                                         } else {
1459                                                 sc.SetState(SCE_PL_OPERATOR);
1460                                                 if (sc.chNext == '*')   // exponentiation
1461                                                         sc.Forward();
1462                                         }
1463                                 } else if (sc.ch == '%') {      // handle '%', hash
1464                                         if (preferRE) {
1465                                                 sc.SetState(SCE_PL_HASH);
1466                                                 if (setHash.Contains(sc.chNext)) {
1467                                                         sc.Forward();
1468                                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1469                                                         sc.ForwardBytes(2);
1470                                                 } else if (sc.chNext == '{') {
1471                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1472                                                 } else {
1473                                                         sc.ChangeState(SCE_PL_OPERATOR);
1474                                                 }
1475                                         } else {
1476                                                 sc.SetState(SCE_PL_OPERATOR);
1477                                         }
1478                                 } else if (sc.ch == '<') {      // handle '<', inputsymbol
1479                                         if (preferRE) {
1480                                                 // forward scan
1481                                                 int i = InputSymbolScan(sc);
1482                                                 if (i > 0) {
1483                                                         sc.SetState(SCE_PL_IDENTIFIER);
1484                                                         sc.Forward(i);
1485                                                 } else {
1486                                                         sc.SetState(SCE_PL_OPERATOR);
1487                                                 }
1488                                         } else {
1489                                                 sc.SetState(SCE_PL_OPERATOR);
1490                                         }
1491                                 } else {                        // handle '/', regexp
1492                                         if (preferRE) {
1493                                                 sc.SetState(SCE_PL_REGEX);
1494                                                 Quote.New();
1495                                                 Quote.Open(sc.ch);
1496                                         } else {                // / and // operators
1497                                                 sc.SetState(SCE_PL_OPERATOR);
1498                                                 if (sc.chNext == '/') {
1499                                                         sc.Forward();
1500                                                 }
1501                                         }
1502                                 }
1503                         } else if (sc.ch == '='         // POD
1504                                 && setPOD.Contains(sc.chNext)
1505                                 && sc.atLineStart) {
1506                                 sc.SetState(SCE_PL_POD);
1507                                 backFlag = BACK_NONE;
1508                         } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {  // extended '-' cases
1509                                 unsigned int bk = sc.currentPos;
1510                                 unsigned int fw = 2;
1511                                 if (setSingleCharOp.Contains(sc.chNext) &&      // file test operators
1512                                         !setWord.Contains(sc.GetRelative(2))) {
1513                                         sc.SetState(SCE_PL_WORD);
1514                                 } else {
1515                                         // nominally a minus and bareword; find extent of bareword
1516                                         while (setWord.Contains(sc.GetRelative(fw)))
1517                                                 fw++;
1518                                         sc.SetState(SCE_PL_OPERATOR);
1519                                 }
1520                                 // force to bareword for hash key => or {variable literal} cases
1521                                 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1522                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1523                                 }
1524                                 backFlag = BACK_NONE;
1525                         } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1526                                 sc.Complete();
1527                                 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1528                                         sc.SetState(SCE_PL_SUB_PROTOTYPE);
1529                                         backFlag = BACK_NONE;
1530                                 } else {
1531                                         sc.SetState(SCE_PL_OPERATOR);
1532                                 }
1533                         } else if (setPerlOperator.Contains(sc.ch)) {   // operators
1534                                 sc.SetState(SCE_PL_OPERATOR);
1535                                 if (sc.Match('.', '.')) {       // .. and ...
1536                                         sc.Forward();
1537                                         if (sc.chNext == '.') sc.Forward();
1538                                 }
1539                         } else if (sc.ch == 4 || sc.ch == 26) {         // ^D and ^Z ends valid perl source
1540                                 sc.SetState(SCE_PL_DATASECTION);
1541                         } else {
1542                                 // keep colouring defaults
1543                                 sc.Complete();
1544                         }
1545                 }
1546         }
1547         sc.Complete();
1548         if (sc.state == SCE_PL_HERE_Q
1549                 || sc.state == SCE_PL_HERE_QQ
1550                 || sc.state == SCE_PL_HERE_QX
1551                 || sc.state == SCE_PL_FORMAT) {
1552                 styler.ChangeLexerState(sc.currentPos, styler.Length());
1553         }
1554         sc.Complete();
1555 }
1556
1557 #define PERL_HEADFOLD_SHIFT             4
1558 #define PERL_HEADFOLD_MASK              0xF0
1559
1560 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1561
1562         if (!options.fold)
1563                 return;
1564
1565         LexAccessor styler(pAccess);
1566
1567         unsigned int endPos = startPos + length;
1568         int visibleChars = 0;
1569         int lineCurrent = styler.GetLine(startPos);
1570
1571         // Backtrack to previous line in case need to fix its fold status
1572         if (startPos > 0) {
1573                 if (lineCurrent > 0) {
1574                         lineCurrent--;
1575                         startPos = styler.LineStart(lineCurrent);
1576                 }
1577         }
1578
1579         int levelPrev = SC_FOLDLEVELBASE;
1580         if (lineCurrent > 0)
1581                 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1582         int levelCurrent = levelPrev;
1583         char chNext = styler[startPos];
1584         char chPrev = styler.SafeGetCharAt(startPos - 1);
1585         int styleNext = styler.StyleAt(startPos);
1586         // Used at end of line to determine if the line was a package definition
1587         bool isPackageLine = false;
1588         int podHeading = 0;
1589         for (unsigned int i = startPos; i < endPos; i++) {
1590                 char ch = chNext;
1591                 chNext = styler.SafeGetCharAt(i + 1);
1592                 int style = styleNext;
1593                 styleNext = styler.StyleAt(i + 1);
1594                 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1595                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1596                 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1597                 // Comment folding
1598                 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1599                         if (!IsCommentLine(lineCurrent - 1, styler)
1600                                 && IsCommentLine(lineCurrent + 1, styler))
1601                                 levelCurrent++;
1602                         else if (IsCommentLine(lineCurrent - 1, styler)
1603                                 && !IsCommentLine(lineCurrent + 1, styler))
1604                                 levelCurrent--;
1605                 }
1606                 // {} [] block folding
1607                 if (style == SCE_PL_OPERATOR) {
1608                         if (ch == '{') {
1609                                 if (options.foldAtElse && levelCurrent < levelPrev)
1610                                         --levelPrev;
1611                                 levelCurrent++;
1612                         } else if (ch == '}') {
1613                                 levelCurrent--;
1614                         }
1615                         if (ch == '[') {
1616                                 if (options.foldAtElse && levelCurrent < levelPrev)
1617                                         --levelPrev;
1618                                 levelCurrent++;
1619                         } else if (ch == ']') {
1620                                 levelCurrent--;
1621                         }
1622                 }
1623                 // POD folding
1624                 if (options.foldPOD && atLineStart) {
1625                         if (style == SCE_PL_POD) {
1626                                 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1627                                         levelCurrent++;
1628                                 else if (styler.Match(i, "=cut"))
1629                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1630                                 else if (styler.Match(i, "=head"))
1631                                         podHeading = PodHeadingLevel(i, styler);
1632                         } else if (style == SCE_PL_DATASECTION) {
1633                                 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1634                                         levelCurrent++;
1635                                 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1636                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1637                                 else if (styler.Match(i, "=head"))
1638                                         podHeading = PodHeadingLevel(i, styler);
1639                                 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1640                                 // reset needed as level test is vs. SC_FOLDLEVELBASE
1641                                 else if (stylePrevCh != SCE_PL_DATASECTION)
1642                                         levelCurrent = SC_FOLDLEVELBASE;
1643                         }
1644                 }
1645                 // package folding
1646                 if (options.foldPackage && atLineStart) {
1647                         if (IsPackageLine(lineCurrent, styler)
1648                                 && !IsPackageLine(lineCurrent + 1, styler))
1649                                 isPackageLine = true;
1650                 }
1651
1652                 //heredoc folding
1653                 switch (style) {
1654                 case SCE_PL_HERE_QQ :
1655                 case SCE_PL_HERE_Q :
1656                 case SCE_PL_HERE_QX :
1657                         switch (stylePrevCh) {
1658                         case SCE_PL_HERE_QQ :
1659                         case SCE_PL_HERE_Q :
1660                         case SCE_PL_HERE_QX :
1661                                 //do nothing;
1662                                 break;
1663                         default :
1664                                 levelCurrent++;
1665                                 break;
1666                         }
1667                         break;
1668                 default:
1669                         switch (stylePrevCh) {
1670                         case SCE_PL_HERE_QQ :
1671                         case SCE_PL_HERE_Q :
1672                         case SCE_PL_HERE_QX :
1673                                 levelCurrent--;
1674                                 break;
1675                         default :
1676                                 //do nothing;
1677                                 break;
1678                         }
1679                         break;
1680                 }
1681
1682                 //explicit folding
1683                 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1684                         if (chNext == '{') {
1685                                 levelCurrent++;
1686                         } else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1687                                 levelCurrent--;
1688                         }
1689                 }
1690
1691                 if (atEOL) {
1692                         int lev = levelPrev;
1693                         // POD headings occupy bits 7-4, leaving some breathing room for
1694                         // non-standard practice -- POD sections stuck in blocks, etc.
1695                         if (podHeading > 0) {
1696                                 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1697                                 lev = levelCurrent - 1;
1698                                 lev |= SC_FOLDLEVELHEADERFLAG;
1699                                 podHeading = 0;
1700                         }
1701                         // Check if line was a package declaration
1702                         // because packages need "special" treatment
1703                         if (isPackageLine) {
1704                                 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1705                                 levelCurrent = SC_FOLDLEVELBASE + 1;
1706                                 isPackageLine = false;
1707                         }
1708                         lev |= levelCurrent << 16;
1709                         if (visibleChars == 0 && options.foldCompact)
1710                                 lev |= SC_FOLDLEVELWHITEFLAG;
1711                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1712                                 lev |= SC_FOLDLEVELHEADERFLAG;
1713                         if (lev != styler.LevelAt(lineCurrent)) {
1714                                 styler.SetLevel(lineCurrent, lev);
1715                         }
1716                         lineCurrent++;
1717                         levelPrev = levelCurrent;
1718                         visibleChars = 0;
1719                 }
1720                 if (!isspacechar(ch))
1721                         visibleChars++;
1722                 chPrev = ch;
1723         }
1724         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1725         int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1726         styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1727 }
1728
1729 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);