Theme improvements (#1382)
[geany-mirror.git] / scintilla / lexers / LexPython.cxx
blob97943ca2405a895c834be4b2a0f1f2e038ec7acb
1 // Scintilla source code edit control
2 /** @file LexPython.cxx
3 ** Lexer for Python.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include <string>
16 #include <vector>
17 #include <map>
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "Accessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "CharacterCategory.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SubStyles.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 namespace {
39 // Use an unnamed namespace to protect the functions and classes from name conflicts
41 /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
42 have arbitrary expressions in {}. The tokens in the expressions are lexed as if
43 they were outside of any string. Expressions may contain { and } characters as
44 long as there is a closing } for every {, may be 2+ lines in a triple quoted
45 string, and may have a formatting specifier following a ! or :, but both !
46 and : are valid inside of a bracketed expression and != is a valid
47 expression token even outside of a bracketed expression.
49 When in an f-string expression, the lexer keeps track of the state value of
50 the f-string and the nesting count for the expression (# of [, (, { seen - # of
51 }, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
52 states and nesting counts is kept. If a f-string expression continues beyond
53 the end of a line, this stack is saved in a std::map that maps a line number to
54 the stack at the end of that line. std::vector is used for the stack.
56 The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
58 struct SingleFStringExpState {
59 int state;
60 int nestingCount;
63 /* kwCDef, kwCTypeName only used for Cython */
64 enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
66 enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };
68 const int indicatorWhitespace = 1;
70 bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
71 return len > 0 && styler[pos] == '#';
74 bool IsPyStringTypeChar(int ch, literalsAllowed allowed) {
75 return
76 ((allowed & litB) && (ch == 'b' || ch == 'B')) ||
77 ((allowed & litU) && (ch == 'u' || ch == 'U')) ||
78 ((allowed & litF) && (ch == 'f' || ch == 'F'));
81 bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
82 if (ch == '\'' || ch == '"')
83 return true;
84 if (IsPyStringTypeChar(ch, allowed)) {
85 if (chNext == '"' || chNext == '\'')
86 return true;
87 if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
88 return true;
90 if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
91 return true;
93 return false;
96 bool IsPyFStringState(int st) {
97 return ((st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING) ||
98 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
101 bool IsPySingleQuoteStringState(int st) {
102 return ((st == SCE_P_CHARACTER) || (st == SCE_P_STRING) ||
103 (st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING));
106 bool IsPyTripleQuoteStringState(int st) {
107 return ((st == SCE_P_TRIPLE) || (st == SCE_P_TRIPLEDOUBLE) ||
108 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
111 char GetPyStringQuoteChar(int st) {
112 if ((st == SCE_P_CHARACTER) || (st == SCE_P_FCHARACTER) ||
113 (st == SCE_P_TRIPLE) || (st == SCE_P_FTRIPLE))
114 return '\'';
115 if ((st == SCE_P_STRING) || (st == SCE_P_FSTRING) ||
116 (st == SCE_P_TRIPLEDOUBLE) || (st == SCE_P_FTRIPLEDOUBLE))
117 return '"';
119 return '\0';
122 void PushStateToStack(int state, std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
123 SingleFStringExpState single = {state, 0};
124 stack.push_back(single);
126 currentFStringExp = &stack.back();
129 int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
130 int state = 0;
132 if (!stack.empty()) {
133 state = stack.back().state;
134 stack.pop_back();
137 if (stack.empty()) {
138 currentFStringExp = NULL;
139 } else {
140 currentFStringExp = &stack.back();
143 return state;
146 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
147 int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
148 char ch = styler.SafeGetCharAt(i);
149 char chNext = styler.SafeGetCharAt(i + 1);
150 const int firstIsF = (ch == 'f' || ch == 'F');
152 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
153 if (ch == 'r' || ch == 'R') {
154 i++;
155 ch = styler.SafeGetCharAt(i);
156 chNext = styler.SafeGetCharAt(i + 1);
157 } else if (IsPyStringTypeChar(ch, allowed)) {
158 if (chNext == 'r' || chNext == 'R')
159 i += 2;
160 else
161 i += 1;
162 ch = styler.SafeGetCharAt(i);
163 chNext = styler.SafeGetCharAt(i + 1);
166 if (ch != '"' && ch != '\'') {
167 *nextIndex = i + 1;
168 return SCE_P_DEFAULT;
171 if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
172 *nextIndex = i + 3;
174 if (ch == '"')
175 return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
176 else
177 return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
178 } else {
179 *nextIndex = i + 1;
181 if (ch == '"')
182 return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
183 else
184 return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
188 inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
189 if (ch < 0x80)
190 return (isalnum(ch) || ch == '.' || ch == '_');
192 if (!unicodeIdentifiers)
193 return false;
195 // Python uses the XID_Continue set from unicode data
196 return IsXidContinue(ch);
199 inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
200 if (ch < 0x80)
201 return (isalpha(ch) || ch == '_');
203 if (!unicodeIdentifiers)
204 return false;
206 // Python uses the XID_Start set from unicode data
207 return IsXidStart(ch);
210 static bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
211 Sci_Position line = styler.GetLine(pos);
212 Sci_Position start_pos = styler.LineStart(line);
213 for (Sci_Position i = start_pos; i < pos; i++) {
214 const char ch = styler[i];
215 if (!(ch == ' ' || ch == '\t'))
216 return false;
218 return true;
221 // Options used for LexerPython
222 struct OptionsPython {
223 int whingeLevel;
224 bool base2or8Literals;
225 bool stringsU;
226 bool stringsB;
227 bool stringsF;
228 bool stringsOverNewline;
229 bool keywords2NoSubIdentifiers;
230 bool fold;
231 bool foldQuotes;
232 bool foldCompact;
233 bool unicodeIdentifiers;
235 OptionsPython() {
236 whingeLevel = 0;
237 base2or8Literals = true;
238 stringsU = true;
239 stringsB = true;
240 stringsF = true;
241 stringsOverNewline = false;
242 keywords2NoSubIdentifiers = false;
243 fold = false;
244 foldQuotes = false;
245 foldCompact = false;
246 unicodeIdentifiers = true;
249 literalsAllowed AllowedLiterals() const {
250 literalsAllowed allowedLiterals = stringsU ? litU : litNone;
251 if (stringsB)
252 allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
253 if (stringsF)
254 allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF);
255 return allowedLiterals;
259 static const char *const pythonWordListDesc[] = {
260 "Keywords",
261 "Highlighted identifiers",
265 struct OptionSetPython : public OptionSet<OptionsPython> {
266 OptionSetPython() {
267 DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel,
268 "For Python code, checks whether indenting is consistent. "
269 "The default, 0 turns off indentation checking, "
270 "1 checks whether each line is potentially inconsistent with the previous line, "
271 "2 checks whether any space characters occur before a tab character in the indentation, "
272 "3 checks whether any spaces are in the indentation, and "
273 "4 checks for any tab characters in the indentation. "
274 "1 is a good level to use.");
276 DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals,
277 "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
279 DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU,
280 "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
282 DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
283 "Set to 0 to not recognise Python 3 bytes literals b\"x\".");
285 DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
286 "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
288 DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
289 "Set to 1 to allow strings to span newline characters.");
291 DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers,
292 "When enabled, it will not style keywords2 items that are used as a sub-identifier. "
293 "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
295 DefineProperty("fold", &OptionsPython::fold);
297 DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes,
298 "This option enables folding multi-line quoted strings when using the Python lexer.");
300 DefineProperty("fold.compact", &OptionsPython::foldCompact);
302 DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
303 "Set to 0 to not recognise Python 3 unicode identifiers.");
305 DefineWordListSets(pythonWordListDesc);
309 const char styleSubable[] = { SCE_P_IDENTIFIER, 0 };
313 class LexerPython : public ILexerWithSubStyles {
314 WordList keywords;
315 WordList keywords2;
316 OptionsPython options;
317 OptionSetPython osPython;
318 enum { ssIdentifier };
319 SubStyles subStyles;
320 std::map<int, std::vector<SingleFStringExpState> > ftripleStateAtEol;
321 public:
322 explicit LexerPython() :
323 subStyles(styleSubable, 0x80, 0x40, 0) {
325 virtual ~LexerPython() {
327 void SCI_METHOD Release() override {
328 delete this;
330 int SCI_METHOD Version() const override {
331 return lvSubStyles;
333 const char *SCI_METHOD PropertyNames() override {
334 return osPython.PropertyNames();
336 int SCI_METHOD PropertyType(const char *name) override {
337 return osPython.PropertyType(name);
339 const char *SCI_METHOD DescribeProperty(const char *name) override {
340 return osPython.DescribeProperty(name);
342 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
343 const char *SCI_METHOD DescribeWordListSets() override {
344 return osPython.DescribeWordListSets();
346 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
347 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
348 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
350 void *SCI_METHOD PrivateCall(int, void *) override {
351 return 0;
354 int SCI_METHOD LineEndTypesSupported() override {
355 return SC_LINE_END_TYPE_UNICODE;
358 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
359 return subStyles.Allocate(styleBase, numberStyles);
361 int SCI_METHOD SubStylesStart(int styleBase) override {
362 return subStyles.Start(styleBase);
364 int SCI_METHOD SubStylesLength(int styleBase) override {
365 return subStyles.Length(styleBase);
367 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
368 const int styleBase = subStyles.BaseStyle(subStyle);
369 return styleBase;
371 int SCI_METHOD PrimaryStyleFromStyle(int style) override {
372 return style;
374 void SCI_METHOD FreeSubStyles() override {
375 subStyles.Free();
377 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
378 subStyles.SetIdentifiers(style, identifiers);
380 int SCI_METHOD DistanceToSecondaryStyles() override {
381 return 0;
383 const char *SCI_METHOD GetSubStyleBases() override {
384 return styleSubable;
387 static ILexer *LexerFactoryPython() {
388 return new LexerPython();
391 private:
392 void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString);
395 Sci_Position SCI_METHOD LexerPython::PropertySet(const char *key, const char *val) {
396 if (osPython.PropertySet(&options, key, val)) {
397 return 0;
399 return -1;
402 Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
403 WordList *wordListN = 0;
404 switch (n) {
405 case 0:
406 wordListN = &keywords;
407 break;
408 case 1:
409 wordListN = &keywords2;
410 break;
412 Sci_Position firstModification = -1;
413 if (wordListN) {
414 WordList wlNew;
415 wlNew.Set(wl);
416 if (*wordListN != wlNew) {
417 wordListN->Set(wl);
418 firstModification = 0;
421 return firstModification;
424 void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString) {
425 long deepestSingleStateIndex = -1;
426 unsigned long i;
428 // Find the deepest single quote state because that string will end; no \ continuation in f-string
429 for (i = 0; i < fstringStateStack.size(); i++) {
430 if (IsPySingleQuoteStringState(fstringStateStack[i].state)) {
431 deepestSingleStateIndex = i;
432 break;
436 if (deepestSingleStateIndex != -1) {
437 sc.SetState(fstringStateStack[deepestSingleStateIndex].state);
438 while (fstringStateStack.size() > static_cast<unsigned long>(deepestSingleStateIndex)) {
439 PopFromStateStack(fstringStateStack, currentFStringExp);
442 if (!fstringStateStack.empty()) {
443 std::pair<int, std::vector<SingleFStringExpState> > val;
444 val.first = sc.currentLine;
445 val.second = fstringStateStack;
447 ftripleStateAtEol.insert(val);
450 if ((sc.state == SCE_P_DEFAULT)
451 || IsPyTripleQuoteStringState(sc.state)) {
452 // Perform colourisation of white space and triple quoted strings at end of each line to allow
453 // tab marking to work inside white space and triple quoted strings
454 sc.SetState(sc.state);
456 if (IsPySingleQuoteStringState(sc.state)) {
457 if (inContinuedString || options.stringsOverNewline) {
458 inContinuedString = false;
459 } else {
460 sc.ChangeState(SCE_P_STRINGEOL);
461 sc.ForwardSetState(SCE_P_DEFAULT);
466 void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
467 Accessor styler(pAccess, NULL);
469 // Track whether in f-string expression; vector is used for a stack to
470 // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
471 std::vector<SingleFStringExpState> fstringStateStack;
472 SingleFStringExpState *currentFStringExp = NULL;
474 const Sci_Position endPos = startPos + length;
476 // Backtrack to previous line in case need to fix its tab whinging
477 Sci_Position lineCurrent = styler.GetLine(startPos);
478 if (startPos > 0) {
479 if (lineCurrent > 0) {
480 lineCurrent--;
481 // Look for backslash-continued lines
482 while (lineCurrent > 0) {
483 Sci_Position eolPos = styler.LineStart(lineCurrent) - 1;
484 const int eolStyle = styler.StyleAt(eolPos);
485 if (eolStyle == SCE_P_STRING
486 || eolStyle == SCE_P_CHARACTER
487 || eolStyle == SCE_P_STRINGEOL) {
488 lineCurrent -= 1;
489 } else {
490 break;
493 startPos = styler.LineStart(lineCurrent);
495 initStyle = startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1);
498 const literalsAllowed allowedLiterals = options.AllowedLiterals();
500 initStyle = initStyle & 31;
501 if (initStyle == SCE_P_STRINGEOL) {
502 initStyle = SCE_P_DEFAULT;
505 // Set up fstate stack from last line and remove any subsequent ftriple at eol states
506 std::map<int, std::vector<SingleFStringExpState> >::iterator it;
507 it = ftripleStateAtEol.find(lineCurrent - 1);
508 if (it != ftripleStateAtEol.end() && !it->second.empty()) {
509 fstringStateStack = it->second;
510 currentFStringExp = &fstringStateStack.back();
512 it = ftripleStateAtEol.lower_bound(lineCurrent);
513 if (it != ftripleStateAtEol.end()) {
514 ftripleStateAtEol.erase(it, ftripleStateAtEol.end());
517 kwType kwLast = kwOther;
518 int spaceFlags = 0;
519 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
520 bool base_n_number = false;
522 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_P_IDENTIFIER);
524 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
526 bool indentGood = true;
527 Sci_Position startIndicator = sc.currentPos;
528 bool inContinuedString = false;
530 for (; sc.More(); sc.Forward()) {
532 if (sc.atLineStart) {
533 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
534 indentGood = true;
535 if (options.whingeLevel == 1) {
536 indentGood = (spaceFlags & wsInconsistent) == 0;
537 } else if (options.whingeLevel == 2) {
538 indentGood = (spaceFlags & wsSpaceTab) == 0;
539 } else if (options.whingeLevel == 3) {
540 indentGood = (spaceFlags & wsSpace) == 0;
541 } else if (options.whingeLevel == 4) {
542 indentGood = (spaceFlags & wsTab) == 0;
544 if (!indentGood) {
545 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
546 startIndicator = sc.currentPos;
550 if (sc.atLineEnd) {
551 ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
552 lineCurrent++;
553 if (!sc.More())
554 break;
557 bool needEOLCheck = false;
560 if (sc.state == SCE_P_OPERATOR) {
561 kwLast = kwOther;
562 sc.SetState(SCE_P_DEFAULT);
563 } else if (sc.state == SCE_P_NUMBER) {
564 if (!IsAWordChar(sc.ch, false) &&
565 !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
566 sc.SetState(SCE_P_DEFAULT);
568 } else if (sc.state == SCE_P_IDENTIFIER) {
569 if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
570 char s[100];
571 sc.GetCurrent(s, sizeof(s));
572 int style = SCE_P_IDENTIFIER;
573 if ((kwLast == kwImport) && (strcmp(s, "as") == 0)) {
574 style = SCE_P_WORD;
575 } else if (keywords.InList(s)) {
576 style = SCE_P_WORD;
577 } else if (kwLast == kwClass) {
578 style = SCE_P_CLASSNAME;
579 } else if (kwLast == kwDef) {
580 style = SCE_P_DEFNAME;
581 } else if (kwLast == kwCDef || kwLast == kwCPDef) {
582 Sci_Position pos = sc.currentPos;
583 unsigned char ch = styler.SafeGetCharAt(pos, '\0');
584 while (ch != '\0') {
585 if (ch == '(') {
586 style = SCE_P_DEFNAME;
587 break;
588 } else if (ch == ':') {
589 style = SCE_P_CLASSNAME;
590 break;
591 } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
592 pos++;
593 ch = styler.SafeGetCharAt(pos, '\0');
594 } else {
595 break;
598 } else if (keywords2.InList(s)) {
599 if (options.keywords2NoSubIdentifiers) {
600 // We don't want to highlight keywords2
601 // that are used as a sub-identifier,
602 // i.e. not open in "foo.open".
603 Sci_Position pos = styler.GetStartSegment() - 1;
604 if (pos < 0 || (styler.SafeGetCharAt(pos, '\0') != '.'))
605 style = SCE_P_WORD2;
606 } else {
607 style = SCE_P_WORD2;
609 } else {
610 int subStyle = classifierIdentifiers.ValueFor(s);
611 if (subStyle >= 0) {
612 style = subStyle;
615 sc.ChangeState(style);
616 sc.SetState(SCE_P_DEFAULT);
617 if (style == SCE_P_WORD) {
618 if (0 == strcmp(s, "class"))
619 kwLast = kwClass;
620 else if (0 == strcmp(s, "def"))
621 kwLast = kwDef;
622 else if (0 == strcmp(s, "import"))
623 kwLast = kwImport;
624 else if (0 == strcmp(s, "cdef"))
625 kwLast = kwCDef;
626 else if (0 == strcmp(s, "cpdef"))
627 kwLast = kwCPDef;
628 else if (0 == strcmp(s, "cimport"))
629 kwLast = kwImport;
630 else if (kwLast != kwCDef && kwLast != kwCPDef)
631 kwLast = kwOther;
632 } else if (kwLast != kwCDef && kwLast != kwCPDef) {
633 kwLast = kwOther;
636 } else if ((sc.state == SCE_P_COMMENTLINE) || (sc.state == SCE_P_COMMENTBLOCK)) {
637 if (sc.ch == '\r' || sc.ch == '\n') {
638 sc.SetState(SCE_P_DEFAULT);
640 } else if (sc.state == SCE_P_DECORATOR) {
641 if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
642 sc.SetState(SCE_P_DEFAULT);
644 } else if (IsPySingleQuoteStringState(sc.state)) {
645 if (sc.ch == '\\') {
646 if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) {
647 sc.Forward();
649 if (sc.chNext == '\n' || sc.chNext == '\r') {
650 inContinuedString = true;
651 } else {
652 // Don't roll over the newline.
653 sc.Forward();
655 } else if (sc.ch == GetPyStringQuoteChar(sc.state)) {
656 sc.ForwardSetState(SCE_P_DEFAULT);
657 needEOLCheck = true;
659 } else if ((sc.state == SCE_P_TRIPLE) || (sc.state == SCE_P_FTRIPLE)) {
660 if (sc.ch == '\\') {
661 sc.Forward();
662 } else if (sc.Match("\'\'\'")) {
663 sc.Forward();
664 sc.Forward();
665 sc.ForwardSetState(SCE_P_DEFAULT);
666 needEOLCheck = true;
668 } else if ((sc.state == SCE_P_TRIPLEDOUBLE) || (sc.state == SCE_P_FTRIPLEDOUBLE)) {
669 if (sc.ch == '\\') {
670 sc.Forward();
671 } else if (sc.Match("\"\"\"")) {
672 sc.Forward();
673 sc.Forward();
674 sc.ForwardSetState(SCE_P_DEFAULT);
675 needEOLCheck = true;
679 // Note if used and not if else because string states also match
680 // some of the above clauses
681 if (IsPyFStringState(sc.state) && sc.ch == '{') {
682 if (sc.chNext == '{') {
683 sc.Forward();
684 } else {
685 PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
686 sc.ForwardSetState(SCE_P_DEFAULT);
688 needEOLCheck = true;
691 // If in an f-string expression, check for the ending quote(s)
692 // and end f-string to handle syntactically incorrect cases like
693 // f'{' and f"""{"""
694 if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) {
695 long matching_stack_i = -1;
696 for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) {
697 const int stack_state = fstringStateStack[stack_i].state;
698 const char quote = GetPyStringQuoteChar(stack_state);
699 if (sc.ch == quote) {
700 if (IsPySingleQuoteStringState(stack_state)) {
701 matching_stack_i = stack_i;
702 } else if (quote == '"' ? sc.Match("\"\"\"") : sc.Match("'''")) {
703 matching_stack_i = stack_i;
708 if (matching_stack_i != -1) {
709 sc.SetState(fstringStateStack[matching_stack_i].state);
710 if (IsPyTripleQuoteStringState(fstringStateStack[matching_stack_i].state)) {
711 sc.Forward();
712 sc.Forward();
714 sc.ForwardSetState(SCE_P_DEFAULT);
715 needEOLCheck = true;
717 while (fstringStateStack.size() > static_cast<unsigned long>(matching_stack_i)) {
718 PopFromStateStack(fstringStateStack, currentFStringExp);
722 // End of code to find the end of a state
724 if (!indentGood && !IsASpaceOrTab(sc.ch)) {
725 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1);
726 startIndicator = sc.currentPos;
727 indentGood = true;
730 // One cdef or cpdef line, clear kwLast only at end of line
731 if ((kwLast == kwCDef || kwLast == kwCPDef) && sc.atLineEnd) {
732 kwLast = kwOther;
735 // State exit code may have moved on to end of line
736 if (needEOLCheck && sc.atLineEnd) {
737 ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
738 lineCurrent++;
739 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
740 if (!sc.More())
741 break;
744 // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
745 if (currentFStringExp != NULL && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) {
746 if (currentFStringExp->nestingCount == 0 && (sc.ch == '}' || sc.ch == ':' || (sc.ch == '!' && sc.chNext != '='))) {
747 sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp));
748 } else {
749 if (sc.ch == '{' || sc.ch == '[' || sc.ch == '(') {
750 currentFStringExp->nestingCount++;
751 } else if (sc.ch == '}' || sc.ch == ']' || sc.ch == ')') {
752 currentFStringExp->nestingCount--;
757 // Check for a new state starting character
758 if (sc.state == SCE_P_DEFAULT) {
759 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
760 if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) {
761 base_n_number = true;
762 sc.SetState(SCE_P_NUMBER);
763 } else if (sc.ch == '0' &&
764 (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) {
765 if (options.base2or8Literals) {
766 base_n_number = true;
767 sc.SetState(SCE_P_NUMBER);
768 } else {
769 sc.SetState(SCE_P_NUMBER);
770 sc.ForwardSetState(SCE_P_IDENTIFIER);
772 } else {
773 base_n_number = false;
774 sc.SetState(SCE_P_NUMBER);
776 } else if ((IsASCII(sc.ch) && isoperator(static_cast<char>(sc.ch))) || sc.ch == '`') {
777 sc.SetState(SCE_P_OPERATOR);
778 } else if (sc.ch == '#') {
779 sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);
780 } else if (sc.ch == '@') {
781 if (IsFirstNonWhitespace(sc.currentPos, styler))
782 sc.SetState(SCE_P_DECORATOR);
783 else
784 sc.SetState(SCE_P_OPERATOR);
785 } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) {
786 Sci_PositionU nextIndex = 0;
787 sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));
788 while (nextIndex > (sc.currentPos + 1) && sc.More()) {
789 sc.Forward();
791 } else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
792 sc.SetState(SCE_P_IDENTIFIER);
796 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
797 sc.Complete();
800 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
801 Sci_Position pos = styler.LineStart(line);
802 const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
803 for (Sci_Position i = pos; i < eol_pos; i++) {
804 const char ch = styler[i];
805 if (ch == '#')
806 return true;
807 else if (ch != ' ' && ch != '\t')
808 return false;
810 return false;
813 static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
814 const int style = styler.StyleAt(styler.LineStart(line)) & 31;
815 return ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
819 void SCI_METHOD LexerPython::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) {
820 if (!options.fold)
821 return;
823 Accessor styler(pAccess, NULL);
825 const Sci_Position maxPos = startPos + length;
826 const Sci_Position maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - 1); // Requested last line
827 const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
829 // Backtrack to previous non-blank line so we can determine indent level
830 // for any white space lines (needed esp. within triple quoted strings)
831 // and so we can fix any preceding fold level (which is why we go back
832 // at least one line in all cases)
833 int spaceFlags = 0;
834 Sci_Position lineCurrent = styler.GetLine(startPos);
835 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
836 while (lineCurrent > 0) {
837 lineCurrent--;
838 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
839 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
840 (!IsCommentLine(lineCurrent, styler)) &&
841 (!IsQuoteLine(lineCurrent, styler)))
842 break;
844 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
846 // Set up initial loop state
847 startPos = styler.LineStart(lineCurrent);
848 int prev_state = SCE_P_DEFAULT & 31;
849 if (lineCurrent >= 1)
850 prev_state = styler.StyleAt(startPos - 1) & 31;
851 int prevQuote = options.foldQuotes && ((prev_state == SCE_P_TRIPLE) || (prev_state == SCE_P_TRIPLEDOUBLE));
853 // Process all characters to end of requested range or end of any triple quote
854 //that hangs over the end of the range. Cap processing in all cases
855 // to end of document (in case of unclosed quote at end).
856 while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevQuote)) {
858 // Gather info
859 int lev = indentCurrent;
860 Sci_Position lineNext = lineCurrent + 1;
861 int indentNext = indentCurrent;
862 int quote = false;
863 if (lineNext <= docLines) {
864 // Information about next line is only available if not at end of document
865 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
866 Sci_Position lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - 1 : styler.LineStart(lineNext);
867 const int style = styler.StyleAt(lookAtPos) & 31;
868 quote = options.foldQuotes && ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
870 const int quote_start = (quote && !prevQuote);
871 const int quote_continue = (quote && prevQuote);
872 if (!quote || !prevQuote)
873 indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
874 if (quote)
875 indentNext = indentCurrentLevel;
876 if (indentNext & SC_FOLDLEVELWHITEFLAG)
877 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
879 if (quote_start) {
880 // Place fold point at start of triple quoted string
881 lev |= SC_FOLDLEVELHEADERFLAG;
882 } else if (quote_continue || prevQuote) {
883 // Add level to rest of lines in the string
884 lev = lev + 1;
887 // Skip past any blank lines for next indent level info; we skip also
888 // comments (all comments, not just those starting in column 0)
889 // which effectively folds them into surrounding code rather
890 // than screwing up folding. If comments end file, use the min
891 // comment indent as the level after
893 int minCommentLevel = indentCurrentLevel;
894 while (!quote &&
895 (lineNext < docLines) &&
896 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
897 (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
899 if (IsCommentLine(lineNext, styler) && indentNext < minCommentLevel) {
900 minCommentLevel = indentNext;
903 lineNext++;
904 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
907 const int levelAfterComments = ((lineNext < docLines) ? indentNext & SC_FOLDLEVELNUMBERMASK : minCommentLevel);
908 const int levelBeforeComments = Maximum(indentCurrentLevel, levelAfterComments);
910 // Now set all the indent levels on the lines we skipped
911 // Do this from end to start. Once we encounter one line
912 // which is indented more than the line after the end of
913 // the comment-block, use the level of the block before
915 Sci_Position skipLine = lineNext;
916 int skipLevel = levelAfterComments;
918 while (--skipLine > lineCurrent) {
919 const int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
921 if (options.foldCompact) {
922 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
923 skipLevel = levelBeforeComments;
925 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
927 styler.SetLevel(skipLine, skipLevel | whiteFlag);
928 } else {
929 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
930 !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
931 !IsCommentLine(skipLine, styler))
932 skipLevel = levelBeforeComments;
934 styler.SetLevel(skipLine, skipLevel);
938 // Set fold header on non-quote line
939 if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
940 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
941 lev |= SC_FOLDLEVELHEADERFLAG;
944 // Keep track of triple quote state of previous line
945 prevQuote = quote;
947 // Set fold level for this line and move to next line
948 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
949 indentCurrent = indentNext;
950 lineCurrent = lineNext;
953 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
954 // header flag set; the loop above is crafted to take care of this case!
955 //styler.SetLevel(lineCurrent, indentCurrent);
958 LexerModule lmPython(SCLEX_PYTHON, LexerPython::LexerFactoryPython, "python",
959 pythonWordListDesc);