scintilla/lexers/LexPython.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPython.cxx
   3  ** Lexer for Python.
   4  **/
   5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <stdarg.h>
  12 #include <assert.h>
  13 #include <ctype.h>
  14
  15 #include <string>
  16 #include <vector>
  17 #include <map>
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21 #include "SciLexer.h"
  22
  23 #include "StringCopy.h"
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "Accessor.h"
  27 #include "StyleContext.h"
  28 #include "CharacterSet.h"
  29 #include "CharacterCategory.h"
  30 #include "LexerModule.h"
  31 #include "OptionSet.h"
  32 #include "SubStyles.h"
  33
  34 #ifdef SCI_NAMESPACE
  35 using namespace Scintilla;
  36 #endif
  37
  38 namespace {
  39 // Use an unnamed namespace to protect the functions and classes from name conflicts
  40
  41 /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
  42    have arbitrary expressions in {}.  The tokens in the expressions are lexed as if
  43    they were outside of any string.  Expressions may contain { and } characters as
  44    long as there is a closing } for every {, may be 2+ lines in a triple quoted
  45    string, and may have a formatting specifier following a ! or :, but both !
  46    and : are valid inside of a bracketed expression and != is a valid
  47    expression token even outside of a bracketed expression.
  48
  49    When in an f-string expression, the lexer keeps track of the state value of
  50    the f-string and the nesting count for the expression (# of [, (, { seen - # of
  51    }, ), ] seen).  f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
  52    states and nesting counts is kept.  If a f-string expression continues beyond
  53    the end of a line, this stack is saved in a std::map that maps a line number to
  54    the stack at the end of that line.  std::vector is used for the stack.
  55
  56    The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
  57 */
  58 struct SingleFStringExpState {
  59         int state;
  60         int nestingCount;
  61 };
  62
  63 /* kwCDef, kwCTypeName only used for Cython */
  64 enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
  65
  66 enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };
  67
  68 const int indicatorWhitespace = 1;
  69
  70 bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
  71         return len > 0 && styler[pos] == '#';
  72 }
  73
  74 bool IsPyStringTypeChar(int ch, literalsAllowed allowed) {
  75         return
  76                 ((allowed & litB) && (ch == 'b' || ch == 'B')) ||
  77                 ((allowed & litU) && (ch == 'u' || ch == 'U')) ||
  78                 ((allowed & litF) && (ch == 'f' || ch == 'F'));
  79 }
  80
  81 bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
  82         if (ch == '\'' || ch == '"')
  83                 return true;
  84         if (IsPyStringTypeChar(ch, allowed)) {
  85                 if (chNext == '"' || chNext == '\'')
  86                         return true;
  87                 if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
  88                         return true;
  89         }
  90         if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
  91                 return true;
  92
  93         return false;
  94 }
  95
  96 bool IsPyFStringState(int st) {
  97         return ((st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING) ||
  98                 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
  99 }
 100
 101 bool IsPySingleQuoteStringState(int st) {
 102         return ((st == SCE_P_CHARACTER) || (st == SCE_P_STRING) ||
 103                 (st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING));
 104 }
 105
 106 bool IsPyTripleQuoteStringState(int st) {
 107         return ((st == SCE_P_TRIPLE) || (st == SCE_P_TRIPLEDOUBLE) ||
 108                 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
 109 }
 110
 111 char GetPyStringQuoteChar(int st) {
 112         if ((st == SCE_P_CHARACTER) || (st == SCE_P_FCHARACTER) ||
 113                         (st == SCE_P_TRIPLE) || (st == SCE_P_FTRIPLE))
 114                 return '\'';
 115         if ((st == SCE_P_STRING) || (st == SCE_P_FSTRING) ||
 116                         (st == SCE_P_TRIPLEDOUBLE) || (st == SCE_P_FTRIPLEDOUBLE))
 117                 return '"';
 118
 119         return '\0';
 120 }
 121
 122 void PushStateToStack(int state, std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
 123         SingleFStringExpState single = {state, 0};
 124         stack.push_back(single);
 125
 126         currentFStringExp = &stack.back();
 127 }
 128
 129 int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
 130         int state = 0;
 131
 132         if (!stack.empty()) {
 133                 state = stack.back().state;
 134                 stack.pop_back();
 135         }
 136
 137         if (stack.empty()) {
 138                 currentFStringExp = NULL;
 139         } else {
 140                 currentFStringExp = &stack.back();
 141         }
 142
 143         return state;
 144 }
 145
 146 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
 147 int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
 148         char ch = styler.SafeGetCharAt(i);
 149         char chNext = styler.SafeGetCharAt(i + 1);
 150         const int firstIsF = (ch == 'f' || ch == 'F');
 151
 152         // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
 153         if (ch == 'r' || ch == 'R') {
 154                 i++;
 155                 ch = styler.SafeGetCharAt(i);
 156                 chNext = styler.SafeGetCharAt(i + 1);
 157         } else if (IsPyStringTypeChar(ch, allowed)) {
 158                 if (chNext == 'r' || chNext == 'R')
 159                         i += 2;
 160                 else
 161                         i += 1;
 162                 ch = styler.SafeGetCharAt(i);
 163                 chNext = styler.SafeGetCharAt(i + 1);
 164         }
 165
 166         if (ch != '"' && ch != '\'') {
 167                 *nextIndex = i + 1;
 168                 return SCE_P_DEFAULT;
 169         }
 170
 171         if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
 172                 *nextIndex = i + 3;
 173
 174                 if (ch == '"')
 175                         return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
 176                 else
 177                         return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
 178         } else {
 179                 *nextIndex = i + 1;
 180
 181                 if (ch == '"')
 182                         return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
 183                 else
 184                         return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
 185         }
 186 }
 187
 188 inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
 189         if (ch < 0x80)
 190                 return (isalnum(ch) || ch == '.' || ch == '_');
 191
 192         if (!unicodeIdentifiers)
 193                 return false;
 194
 195         // Python uses the XID_Continue set from unicode data
 196         return IsXidContinue(ch);
 197 }
 198
 199 inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
 200         if (ch < 0x80)
 201                 return (isalpha(ch) || ch == '_');
 202
 203         if (!unicodeIdentifiers)
 204                 return false;
 205
 206         // Python uses the XID_Start set from unicode data
 207         return IsXidStart(ch);
 208 }
 209
 210 static bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
 211         Sci_Position line = styler.GetLine(pos);
 212         Sci_Position start_pos = styler.LineStart(line);
 213         for (Sci_Position i = start_pos; i < pos; i++) {
 214                 const char ch = styler[i];
 215                 if (!(ch == ' ' || ch == '\t'))
 216                         return false;
 217         }
 218         return true;
 219 }
 220
 221 // Options used for LexerPython
 222 struct OptionsPython {
 223         int whingeLevel;
 224         bool base2or8Literals;
 225         bool stringsU;
 226         bool stringsB;
 227         bool stringsF;
 228         bool stringsOverNewline;
 229         bool keywords2NoSubIdentifiers;
 230         bool fold;
 231         bool foldQuotes;
 232         bool foldCompact;
 233         bool unicodeIdentifiers;
 234
 235         OptionsPython() {
 236                 whingeLevel = 0;
 237                 base2or8Literals = true;
 238                 stringsU = true;
 239                 stringsB = true;
 240                 stringsF = true;
 241                 stringsOverNewline = false;
 242                 keywords2NoSubIdentifiers = false;
 243                 fold = false;
 244                 foldQuotes = false;
 245                 foldCompact = false;
 246                 unicodeIdentifiers = true;
 247         }
 248
 249         literalsAllowed AllowedLiterals() const {
 250                 literalsAllowed allowedLiterals = stringsU ? litU : litNone;
 251                 if (stringsB)
 252                         allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
 253                 if (stringsF)
 254                         allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF);
 255                 return allowedLiterals;
 256         }
 257 };
 258
 259 static const char *const pythonWordListDesc[] = {
 260         "Keywords",
 261         "Highlighted identifiers",
 262         0
 263 };
 264
 265 struct OptionSetPython : public OptionSet<OptionsPython> {
 266         OptionSetPython() {
 267                 DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel,
 268                                "For Python code, checks whether indenting is consistent. "
 269                                "The default, 0 turns off indentation checking, "
 270                                "1 checks whether each line is potentially inconsistent with the previous line, "
 271                                "2 checks whether any space characters occur before a tab character in the indentation, "
 272                                "3 checks whether any spaces are in the indentation, and "
 273                                "4 checks for any tab characters in the indentation. "
 274                                "1 is a good level to use.");
 275
 276                 DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals,
 277                                "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
 278
 279                 DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU,
 280                                "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
 281
 282                 DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
 283                                "Set to 0 to not recognise Python 3 bytes literals b\"x\".");
 284
 285                 DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
 286                                "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
 287
 288                 DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
 289                                "Set to 1 to allow strings to span newline characters.");
 290
 291                 DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers,
 292                                "When enabled, it will not style keywords2 items that are used as a sub-identifier. "
 293                                "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
 294
 295                 DefineProperty("fold", &OptionsPython::fold);
 296
 297                 DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes,
 298                                "This option enables folding multi-line quoted strings when using the Python lexer.");
 299
 300                 DefineProperty("fold.compact", &OptionsPython::foldCompact);
 301
 302                 DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
 303                                "Set to 0 to not recognise Python 3 unicode identifiers.");
 304
 305                 DefineWordListSets(pythonWordListDesc);
 306         }
 307 };
 308
 309 const char styleSubable[] = { SCE_P_IDENTIFIER, 0 };
 310
 311 }
 312
 313 class LexerPython : public ILexerWithSubStyles {
 314         WordList keywords;
 315         WordList keywords2;
 316         OptionsPython options;
 317         OptionSetPython osPython;
 318         enum { ssIdentifier };
 319         SubStyles subStyles;
 320         std::map<int, std::vector<SingleFStringExpState> > ftripleStateAtEol;
 321 public:
 322         explicit LexerPython() :
 323                 subStyles(styleSubable, 0x80, 0x40, 0) {
 324         }
 325         virtual ~LexerPython() {
 326         }
 327         void SCI_METHOD Release() override {
 328                 delete this;
 329         }
 330         int SCI_METHOD Version() const override {
 331                 return lvSubStyles;
 332         }
 333         const char *SCI_METHOD PropertyNames() override {
 334                 return osPython.PropertyNames();
 335         }
 336         int SCI_METHOD PropertyType(const char *name) override {
 337                 return osPython.PropertyType(name);
 338         }
 339         const char *SCI_METHOD DescribeProperty(const char *name) override {
 340                 return osPython.DescribeProperty(name);
 341         }
 342         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
 343         const char *SCI_METHOD DescribeWordListSets() override {
 344                 return osPython.DescribeWordListSets();
 345         }
 346         Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
 347         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 348         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 349
 350         void *SCI_METHOD PrivateCall(int, void *) override {
 351                 return 0;
 352         }
 353
 354         int SCI_METHOD LineEndTypesSupported() override {
 355                 return SC_LINE_END_TYPE_UNICODE;
 356         }
 357
 358         int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
 359                 return subStyles.Allocate(styleBase, numberStyles);
 360         }
 361         int SCI_METHOD SubStylesStart(int styleBase) override {
 362                 return subStyles.Start(styleBase);
 363         }
 364         int SCI_METHOD SubStylesLength(int styleBase) override {
 365                 return subStyles.Length(styleBase);
 366         }
 367         int SCI_METHOD StyleFromSubStyle(int subStyle) override {
 368                 const int styleBase = subStyles.BaseStyle(subStyle);
 369                 return styleBase;
 370         }
 371         int SCI_METHOD PrimaryStyleFromStyle(int style) override {
 372                 return style;
 373         }
 374         void SCI_METHOD FreeSubStyles() override {
 375                 subStyles.Free();
 376         }
 377         void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
 378                 subStyles.SetIdentifiers(style, identifiers);
 379         }
 380         int SCI_METHOD DistanceToSecondaryStyles() override {
 381                 return 0;
 382         }
 383         const char *SCI_METHOD GetSubStyleBases() override {
 384                 return styleSubable;
 385         }
 386
 387         static ILexer *LexerFactoryPython() {
 388                 return new LexerPython();
 389         }
 390
 391 private:
 392         void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString);
 393 };
 394
 395 Sci_Position SCI_METHOD LexerPython::PropertySet(const char *key, const char *val) {
 396         if (osPython.PropertySet(&options, key, val)) {
 397                 return 0;
 398         }
 399         return -1;
 400 }
 401
 402 Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
 403         WordList *wordListN = 0;
 404         switch (n) {
 405         case 0:
 406                 wordListN = &keywords;
 407                 break;
 408         case 1:
 409                 wordListN = &keywords2;
 410                 break;
 411         }
 412         Sci_Position firstModification = -1;
 413         if (wordListN) {
 414                 WordList wlNew;
 415                 wlNew.Set(wl);
 416                 if (*wordListN != wlNew) {
 417                         wordListN->Set(wl);
 418                         firstModification = 0;
 419                 }
 420         }
 421         return firstModification;
 422 }
 423
 424 void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString) {
 425         long deepestSingleStateIndex = -1;
 426         unsigned long i;
 427
 428         // Find the deepest single quote state because that string will end; no \ continuation in f-string
 429         for (i = 0; i < fstringStateStack.size(); i++) {
 430                 if (IsPySingleQuoteStringState(fstringStateStack[i].state)) {
 431                         deepestSingleStateIndex = i;
 432                         break;
 433                 }
 434         }
 435
 436         if (deepestSingleStateIndex != -1) {
 437                 sc.SetState(fstringStateStack[deepestSingleStateIndex].state);
 438                 while (fstringStateStack.size() > static_cast<unsigned long>(deepestSingleStateIndex)) {
 439                         PopFromStateStack(fstringStateStack, currentFStringExp);
 440                 }
 441         }
 442         if (!fstringStateStack.empty()) {
 443                 std::pair<int, std::vector<SingleFStringExpState> > val;
 444                 val.first = sc.currentLine;
 445                 val.second = fstringStateStack;
 446
 447                 ftripleStateAtEol.insert(val);
 448         }
 449
 450         if ((sc.state == SCE_P_DEFAULT)
 451                         || IsPyTripleQuoteStringState(sc.state)) {
 452                 // Perform colourisation of white space and triple quoted strings at end of each line to allow
 453                 // tab marking to work inside white space and triple quoted strings
 454                 sc.SetState(sc.state);
 455         }
 456         if (IsPySingleQuoteStringState(sc.state)) {
 457                 if (inContinuedString || options.stringsOverNewline) {
 458                         inContinuedString = false;
 459                 } else {
 460                         sc.ChangeState(SCE_P_STRINGEOL);
 461                         sc.ForwardSetState(SCE_P_DEFAULT);
 462                 }
 463         }
 464 }
 465
 466 void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 467         Accessor styler(pAccess, NULL);
 468
 469         // Track whether in f-string expression; vector is used for a stack to
 470         // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
 471         std::vector<SingleFStringExpState> fstringStateStack;
 472         SingleFStringExpState *currentFStringExp = NULL;
 473
 474         const Sci_Position endPos = startPos + length;
 475
 476         // Backtrack to previous line in case need to fix its tab whinging
 477         Sci_Position lineCurrent = styler.GetLine(startPos);
 478         if (startPos > 0) {
 479                 if (lineCurrent > 0) {
 480                         lineCurrent--;
 481                         // Look for backslash-continued lines
 482                         while (lineCurrent > 0) {
 483                                 Sci_Position eolPos = styler.LineStart(lineCurrent) - 1;
 484                                 const int eolStyle = styler.StyleAt(eolPos);
 485                                 if (eolStyle == SCE_P_STRING
 486                                                 || eolStyle == SCE_P_CHARACTER
 487                                                 || eolStyle == SCE_P_STRINGEOL) {
 488                                         lineCurrent -= 1;
 489                                 } else {
 490                                         break;
 491                                 }
 492                         }
 493                         startPos = styler.LineStart(lineCurrent);
 494                 }
 495                 initStyle = startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1);
 496         }
 497
 498         const literalsAllowed allowedLiterals = options.AllowedLiterals();
 499
 500         initStyle = initStyle & 31;
 501         if (initStyle == SCE_P_STRINGEOL) {
 502                 initStyle = SCE_P_DEFAULT;
 503         }
 504
 505         // Set up fstate stack from last line and remove any subsequent ftriple at eol states
 506         std::map<int, std::vector<SingleFStringExpState> >::iterator it;
 507         it = ftripleStateAtEol.find(lineCurrent - 1);
 508         if (it != ftripleStateAtEol.end() && !it->second.empty()) {
 509                 fstringStateStack = it->second;
 510                 currentFStringExp = &fstringStateStack.back();
 511         }
 512         it = ftripleStateAtEol.lower_bound(lineCurrent);
 513         if (it != ftripleStateAtEol.end()) {
 514                 ftripleStateAtEol.erase(it, ftripleStateAtEol.end());
 515         }
 516
 517         kwType kwLast = kwOther;
 518         int spaceFlags = 0;
 519         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 520         bool base_n_number = false;
 521
 522         const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_P_IDENTIFIER);
 523
 524         StyleContext sc(startPos, endPos - startPos, initStyle, styler);
 525
 526         bool indentGood = true;
 527         Sci_Position startIndicator = sc.currentPos;
 528         bool inContinuedString = false;
 529
 530         for (; sc.More(); sc.Forward()) {
 531
 532                 if (sc.atLineStart) {
 533                         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 534                         indentGood = true;
 535                         if (options.whingeLevel == 1) {
 536                                 indentGood = (spaceFlags & wsInconsistent) == 0;
 537                         } else if (options.whingeLevel == 2) {
 538                                 indentGood = (spaceFlags & wsSpaceTab) == 0;
 539                         } else if (options.whingeLevel == 3) {
 540                                 indentGood = (spaceFlags & wsSpace) == 0;
 541                         } else if (options.whingeLevel == 4) {
 542                                 indentGood = (spaceFlags & wsTab) == 0;
 543                         }
 544                         if (!indentGood) {
 545                                 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
 546                                 startIndicator = sc.currentPos;
 547                         }
 548                 }
 549
 550                 if (sc.atLineEnd) {
 551                         ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
 552                         lineCurrent++;
 553                         if (!sc.More())
 554                                 break;
 555                 }
 556
 557                 bool needEOLCheck = false;
 558
 559
 560                 if (sc.state == SCE_P_OPERATOR) {
 561                         kwLast = kwOther;
 562                         sc.SetState(SCE_P_DEFAULT);
 563                 } else if (sc.state == SCE_P_NUMBER) {
 564                         if (!IsAWordChar(sc.ch, false) &&
 565                                         !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
 566                                 sc.SetState(SCE_P_DEFAULT);
 567                         }
 568                 } else if (sc.state == SCE_P_IDENTIFIER) {
 569                         if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
 570                                 char s[100];
 571                                 sc.GetCurrent(s, sizeof(s));
 572                                 int style = SCE_P_IDENTIFIER;
 573                                 if ((kwLast == kwImport) && (strcmp(s, "as") == 0)) {
 574                                         style = SCE_P_WORD;
 575                                 } else if (keywords.InList(s)) {
 576                                         style = SCE_P_WORD;
 577                                 } else if (kwLast == kwClass) {
 578                                         style = SCE_P_CLASSNAME;
 579                                 } else if (kwLast == kwDef) {
 580                                         style = SCE_P_DEFNAME;
 581                                 } else if (kwLast == kwCDef || kwLast == kwCPDef) {
 582                                         Sci_Position pos = sc.currentPos;
 583                                         unsigned char ch = styler.SafeGetCharAt(pos, '\0');
 584                                         while (ch != '\0') {
 585                                                 if (ch == '(') {
 586                                                         style = SCE_P_DEFNAME;
 587                                                         break;
 588                                                 } else if (ch == ':') {
 589                                                         style = SCE_P_CLASSNAME;
 590                                                         break;
 591                                                 } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
 592                                                         pos++;
 593                                                         ch = styler.SafeGetCharAt(pos, '\0');
 594                                                 } else {
 595                                                         break;
 596                                                 }
 597                                         }
 598                                 } else if (keywords2.InList(s)) {
 599                                         if (options.keywords2NoSubIdentifiers) {
 600                                                 // We don't want to highlight keywords2
 601                                                 // that are used as a sub-identifier,
 602                                                 // i.e. not open in "foo.open".
 603                                                 Sci_Position pos = styler.GetStartSegment() - 1;
 604                                                 if (pos < 0 || (styler.SafeGetCharAt(pos, '\0') != '.'))
 605                                                         style = SCE_P_WORD2;
 606                                         } else {
 607                                                 style = SCE_P_WORD2;
 608                                         }
 609                                 } else {
 610                                         int subStyle = classifierIdentifiers.ValueFor(s);
 611                                         if (subStyle >= 0) {
 612                                                 style = subStyle;
 613                                         }
 614                                 }
 615                                 sc.ChangeState(style);
 616                                 sc.SetState(SCE_P_DEFAULT);
 617                                 if (style == SCE_P_WORD) {
 618                                         if (0 == strcmp(s, "class"))
 619                                                 kwLast = kwClass;
 620                                         else if (0 == strcmp(s, "def"))
 621                                                 kwLast = kwDef;
 622                                         else if (0 == strcmp(s, "import"))
 623                                                 kwLast = kwImport;
 624                                         else if (0 == strcmp(s, "cdef"))
 625                                                 kwLast = kwCDef;
 626                                         else if (0 == strcmp(s, "cpdef"))
 627                                                 kwLast = kwCPDef;
 628                                         else if (0 == strcmp(s, "cimport"))
 629                                                 kwLast = kwImport;
 630                                         else if (kwLast != kwCDef && kwLast != kwCPDef)
 631                                                 kwLast = kwOther;
 632                                 } else if (kwLast != kwCDef && kwLast != kwCPDef) {
 633                                         kwLast = kwOther;
 634                                 }
 635                         }
 636                 } else if ((sc.state == SCE_P_COMMENTLINE) || (sc.state == SCE_P_COMMENTBLOCK)) {
 637                         if (sc.ch == '\r' || sc.ch == '\n') {
 638                                 sc.SetState(SCE_P_DEFAULT);
 639                         }
 640                 } else if (sc.state == SCE_P_DECORATOR) {
 641                         if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 642                                 sc.SetState(SCE_P_DEFAULT);
 643                         }
 644                 } else if (IsPySingleQuoteStringState(sc.state)) {
 645                         if (sc.ch == '\\') {
 646                                 if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) {
 647                                         sc.Forward();
 648                                 }
 649                                 if (sc.chNext == '\n' || sc.chNext == '\r') {
 650                                         inContinuedString = true;
 651                                 } else {
 652                                         // Don't roll over the newline.
 653                                         sc.Forward();
 654                                 }
 655                         } else if (sc.ch == GetPyStringQuoteChar(sc.state)) {
 656                                 sc.ForwardSetState(SCE_P_DEFAULT);
 657                                 needEOLCheck = true;
 658                         }
 659                 } else if ((sc.state == SCE_P_TRIPLE) || (sc.state == SCE_P_FTRIPLE)) {
 660                         if (sc.ch == '\\') {
 661                                 sc.Forward();
 662                         } else if (sc.Match("\'\'\'")) {
 663                                 sc.Forward();
 664                                 sc.Forward();
 665                                 sc.ForwardSetState(SCE_P_DEFAULT);
 666                                 needEOLCheck = true;
 667                         }
 668                 } else if ((sc.state == SCE_P_TRIPLEDOUBLE) || (sc.state == SCE_P_FTRIPLEDOUBLE)) {
 669                         if (sc.ch == '\\') {
 670                                 sc.Forward();
 671                         } else if (sc.Match("\"\"\"")) {
 672                                 sc.Forward();
 673                                 sc.Forward();
 674                                 sc.ForwardSetState(SCE_P_DEFAULT);
 675                                 needEOLCheck = true;
 676                         }
 677                 }
 678
 679                 // Note if used and not if else because string states also match
 680                 // some of the above clauses
 681                 if (IsPyFStringState(sc.state) && sc.ch == '{') {
 682                         if (sc.chNext == '{') {
 683                                 sc.Forward();
 684                         } else {
 685                                 PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
 686                                 sc.ForwardSetState(SCE_P_DEFAULT);
 687                         }
 688                         needEOLCheck = true;
 689                 }
 690
 691                 // If in an f-string expression, check for the ending quote(s)
 692                 // and end f-string to handle syntactically incorrect cases like
 693                 // f'{' and f"""{"""
 694                 if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) {
 695                         long matching_stack_i = -1;
 696                         for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) {
 697                                 const int stack_state = fstringStateStack[stack_i].state;
 698                                 const char quote = GetPyStringQuoteChar(stack_state);
 699                                 if (sc.ch == quote) {
 700                                         if (IsPySingleQuoteStringState(stack_state)) {
 701                                                 matching_stack_i = stack_i;
 702                                         } else if (quote == '"' ? sc.Match("\"\"\"") : sc.Match("'''")) {
 703                                                 matching_stack_i = stack_i;
 704                                         }
 705                                 }
 706                         }
 707
 708                         if (matching_stack_i != -1) {
 709                                 sc.SetState(fstringStateStack[matching_stack_i].state);
 710                                 if (IsPyTripleQuoteStringState(fstringStateStack[matching_stack_i].state)) {
 711                                         sc.Forward();
 712                                         sc.Forward();
 713                                 }
 714                                 sc.ForwardSetState(SCE_P_DEFAULT);
 715                                 needEOLCheck = true;
 716
 717                                 while (fstringStateStack.size() > static_cast<unsigned long>(matching_stack_i)) {
 718                                         PopFromStateStack(fstringStateStack, currentFStringExp);
 719                                 }
 720                         }
 721                 }
 722                 // End of code to find the end of a state
 723
 724                 if (!indentGood && !IsASpaceOrTab(sc.ch)) {
 725                         styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1);
 726                         startIndicator = sc.currentPos;
 727                         indentGood = true;
 728                 }
 729
 730                 // One cdef or cpdef line, clear kwLast only at end of line
 731                 if ((kwLast == kwCDef || kwLast == kwCPDef) && sc.atLineEnd) {
 732                         kwLast = kwOther;
 733                 }
 734
 735                 // State exit code may have moved on to end of line
 736                 if (needEOLCheck && sc.atLineEnd) {
 737                         ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
 738                         lineCurrent++;
 739                         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 740                         if (!sc.More())
 741                                 break;
 742                 }
 743
 744                 // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
 745                 if (currentFStringExp != NULL && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) {
 746                         if (currentFStringExp->nestingCount == 0 && (sc.ch == '}' || sc.ch == ':' || (sc.ch == '!' && sc.chNext != '='))) {
 747                                 sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp));
 748                         } else {
 749                                 if (sc.ch == '{' || sc.ch == '[' || sc.ch == '(') {
 750                                         currentFStringExp->nestingCount++;
 751                                 } else if (sc.ch == '}' || sc.ch == ']' || sc.ch == ')') {
 752                                         currentFStringExp->nestingCount--;
 753                                 }
 754                         }
 755                 }
 756
 757                 // Check for a new state starting character
 758                 if (sc.state == SCE_P_DEFAULT) {
 759                         if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 760                                 if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) {
 761                                         base_n_number = true;
 762                                         sc.SetState(SCE_P_NUMBER);
 763                                 } else if (sc.ch == '0' &&
 764                                                 (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) {
 765                                         if (options.base2or8Literals) {
 766                                                 base_n_number = true;
 767                                                 sc.SetState(SCE_P_NUMBER);
 768                                         } else {
 769                                                 sc.SetState(SCE_P_NUMBER);
 770                                                 sc.ForwardSetState(SCE_P_IDENTIFIER);
 771                                         }
 772                                 } else {
 773                                         base_n_number = false;
 774                                         sc.SetState(SCE_P_NUMBER);
 775                                 }
 776                         } else if ((IsASCII(sc.ch) && isoperator(static_cast<char>(sc.ch))) || sc.ch == '`') {
 777                                 sc.SetState(SCE_P_OPERATOR);
 778                         } else if (sc.ch == '#') {
 779                                 sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);
 780                         } else if (sc.ch == '@') {
 781                                 if (IsFirstNonWhitespace(sc.currentPos, styler))
 782                                         sc.SetState(SCE_P_DECORATOR);
 783                                 else
 784                                         sc.SetState(SCE_P_OPERATOR);
 785                         } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) {
 786                                 Sci_PositionU nextIndex = 0;
 787                                 sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));
 788                                 while (nextIndex > (sc.currentPos + 1) && sc.More()) {
 789                                         sc.Forward();
 790                                 }
 791                         } else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 792                                 sc.SetState(SCE_P_IDENTIFIER);
 793                         }
 794                 }
 795         }
 796         styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
 797         sc.Complete();
 798 }
 799
 800 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
 801         Sci_Position pos = styler.LineStart(line);
 802         const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 803         for (Sci_Position i = pos; i < eol_pos; i++) {
 804                 const char ch = styler[i];
 805                 if (ch == '#')
 806                         return true;
 807                 else if (ch != ' ' && ch != '\t')
 808                         return false;
 809         }
 810         return false;
 811 }
 812
 813 static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
 814         const int style = styler.StyleAt(styler.LineStart(line)) & 31;
 815         return ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
 816 }
 817
 818
 819 void SCI_METHOD LexerPython::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) {
 820         if (!options.fold)
 821                 return;
 822
 823         Accessor styler(pAccess, NULL);
 824
 825         const Sci_Position maxPos = startPos + length;
 826         const Sci_Position maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - 1);        // Requested last line
 827         const Sci_Position docLines = styler.GetLine(styler.Length());  // Available last line
 828
 829         // Backtrack to previous non-blank line so we can determine indent level
 830         // for any white space lines (needed esp. within triple quoted strings)
 831         // and so we can fix any preceding fold level (which is why we go back
 832         // at least one line in all cases)
 833         int spaceFlags = 0;
 834         Sci_Position lineCurrent = styler.GetLine(startPos);
 835         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 836         while (lineCurrent > 0) {
 837                 lineCurrent--;
 838                 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 839                 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
 840                                 (!IsCommentLine(lineCurrent, styler)) &&
 841                                 (!IsQuoteLine(lineCurrent, styler)))
 842                         break;
 843         }
 844         int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 845
 846         // Set up initial loop state
 847         startPos = styler.LineStart(lineCurrent);
 848         int prev_state = SCE_P_DEFAULT & 31;
 849         if (lineCurrent >= 1)
 850                 prev_state = styler.StyleAt(startPos - 1) & 31;
 851         int prevQuote = options.foldQuotes && ((prev_state == SCE_P_TRIPLE) || (prev_state == SCE_P_TRIPLEDOUBLE));
 852
 853         // Process all characters to end of requested range or end of any triple quote
 854         //that hangs over the end of the range.  Cap processing in all cases
 855         // to end of document (in case of unclosed quote at end).
 856         while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevQuote)) {
 857
 858                 // Gather info
 859                 int lev = indentCurrent;
 860                 Sci_Position lineNext = lineCurrent + 1;
 861                 int indentNext = indentCurrent;
 862                 int quote = false;
 863                 if (lineNext <= docLines) {
 864                         // Information about next line is only available if not at end of document
 865                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 866                         Sci_Position lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - 1 : styler.LineStart(lineNext);
 867                         const int style = styler.StyleAt(lookAtPos) & 31;
 868                         quote = options.foldQuotes && ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
 869                 }
 870                 const int quote_start = (quote && !prevQuote);
 871                 const int quote_continue = (quote && prevQuote);
 872                 if (!quote || !prevQuote)
 873                         indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 874                 if (quote)
 875                         indentNext = indentCurrentLevel;
 876                 if (indentNext & SC_FOLDLEVELWHITEFLAG)
 877                         indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
 878
 879                 if (quote_start) {
 880                         // Place fold point at start of triple quoted string
 881                         lev |= SC_FOLDLEVELHEADERFLAG;
 882                 } else if (quote_continue || prevQuote) {
 883                         // Add level to rest of lines in the string
 884                         lev = lev + 1;
 885                 }
 886
 887                 // Skip past any blank lines for next indent level info; we skip also
 888                 // comments (all comments, not just those starting in column 0)
 889                 // which effectively folds them into surrounding code rather
 890                 // than screwing up folding.  If comments end file, use the min
 891                 // comment indent as the level after
 892
 893                 int minCommentLevel = indentCurrentLevel;
 894                 while (!quote &&
 895                                 (lineNext < docLines) &&
 896                                 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
 897                                  (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
 898
 899                         if (IsCommentLine(lineNext, styler) && indentNext < minCommentLevel) {
 900                                 minCommentLevel = indentNext;
 901                         }
 902
 903                         lineNext++;
 904                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
 905                 }
 906
 907                 const int levelAfterComments = ((lineNext < docLines) ? indentNext & SC_FOLDLEVELNUMBERMASK : minCommentLevel);
 908                 const int levelBeforeComments = Maximum(indentCurrentLevel, levelAfterComments);
 909
 910                 // Now set all the indent levels on the lines we skipped
 911                 // Do this from end to start.  Once we encounter one line
 912                 // which is indented more than the line after the end of
 913                 // the comment-block, use the level of the block before
 914
 915                 Sci_Position skipLine = lineNext;
 916                 int skipLevel = levelAfterComments;
 917
 918                 while (--skipLine > lineCurrent) {
 919                         const int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
 920
 921                         if (options.foldCompact) {
 922                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
 923                                         skipLevel = levelBeforeComments;
 924
 925                                 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
 926
 927                                 styler.SetLevel(skipLine, skipLevel | whiteFlag);
 928                         } else {
 929                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
 930                                                 !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
 931                                                 !IsCommentLine(skipLine, styler))
 932                                         skipLevel = levelBeforeComments;
 933
 934                                 styler.SetLevel(skipLine, skipLevel);
 935                         }
 936                 }
 937
 938                 // Set fold header on non-quote line
 939                 if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
 940                         if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
 941                                 lev |= SC_FOLDLEVELHEADERFLAG;
 942                 }
 943
 944                 // Keep track of triple quote state of previous line
 945                 prevQuote = quote;
 946
 947                 // Set fold level for this line and move to next line
 948                 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
 949                 indentCurrent = indentNext;
 950                 lineCurrent = lineNext;
 951         }
 952
 953         // NOTE: Cannot set level of last line here because indentCurrent doesn't have
 954         // header flag set; the loop above is crafted to take care of this case!
 955         //styler.SetLevel(lineCurrent, indentCurrent);
 956 }
 957
 958 LexerModule lmPython(SCLEX_PYTHON, LexerPython::LexerFactoryPython, "python",
 959                      pythonWordListDesc);