1 // Scintilla source code edit control
2 /** @file LexPython.cxx
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 #include "StringCopy.h"
25 #include "LexAccessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "CharacterCategory.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SubStyles.h"
35 using namespace Scintilla
;
39 // Use an unnamed namespace to protect the functions and classes from name conflicts
41 /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
42 have arbitrary expressions in {}. The tokens in the expressions are lexed as if
43 they were outside of any string. Expressions may contain { and } characters as
44 long as there is a closing } for every {, may be 2+ lines in a triple quoted
45 string, and may have a formatting specifier following a ! or :, but both !
46 and : are valid inside of a bracketed expression and != is a valid
47 expression token even outside of a bracketed expression.
49 When in an f-string expression, the lexer keeps track of the state value of
50 the f-string and the nesting count for the expression (# of [, (, { seen - # of
51 }, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
52 states and nesting counts is kept. If a f-string expression continues beyond
53 the end of a line, this stack is saved in a std::map that maps a line number to
54 the stack at the end of that line. std::vector is used for the stack.
56 The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
58 struct SingleFStringExpState
{
63 /* kwCDef, kwCTypeName only used for Cython */
64 enum kwType
{ kwOther
, kwClass
, kwDef
, kwImport
, kwCDef
, kwCTypeName
, kwCPDef
};
66 enum literalsAllowed
{ litNone
= 0, litU
= 1, litB
= 2, litF
= 4 };
68 const int indicatorWhitespace
= 1;
70 bool IsPyComment(Accessor
&styler
, Sci_Position pos
, Sci_Position len
) {
71 return len
> 0 && styler
[pos
] == '#';
74 bool IsPyStringTypeChar(int ch
, literalsAllowed allowed
) {
76 ((allowed
& litB
) && (ch
== 'b' || ch
== 'B')) ||
77 ((allowed
& litU
) && (ch
== 'u' || ch
== 'U')) ||
78 ((allowed
& litF
) && (ch
== 'f' || ch
== 'F'));
81 bool IsPyStringStart(int ch
, int chNext
, int chNext2
, literalsAllowed allowed
) {
82 if (ch
== '\'' || ch
== '"')
84 if (IsPyStringTypeChar(ch
, allowed
)) {
85 if (chNext
== '"' || chNext
== '\'')
87 if ((chNext
== 'r' || chNext
== 'R') && (chNext2
== '"' || chNext2
== '\''))
90 if ((ch
== 'r' || ch
== 'R') && (chNext
== '"' || chNext
== '\''))
96 bool IsPyFStringState(int st
) {
97 return ((st
== SCE_P_FCHARACTER
) || (st
== SCE_P_FSTRING
) ||
98 (st
== SCE_P_FTRIPLE
) || (st
== SCE_P_FTRIPLEDOUBLE
));
101 bool IsPySingleQuoteStringState(int st
) {
102 return ((st
== SCE_P_CHARACTER
) || (st
== SCE_P_STRING
) ||
103 (st
== SCE_P_FCHARACTER
) || (st
== SCE_P_FSTRING
));
106 bool IsPyTripleQuoteStringState(int st
) {
107 return ((st
== SCE_P_TRIPLE
) || (st
== SCE_P_TRIPLEDOUBLE
) ||
108 (st
== SCE_P_FTRIPLE
) || (st
== SCE_P_FTRIPLEDOUBLE
));
111 char GetPyStringQuoteChar(int st
) {
112 if ((st
== SCE_P_CHARACTER
) || (st
== SCE_P_FCHARACTER
) ||
113 (st
== SCE_P_TRIPLE
) || (st
== SCE_P_FTRIPLE
))
115 if ((st
== SCE_P_STRING
) || (st
== SCE_P_FSTRING
) ||
116 (st
== SCE_P_TRIPLEDOUBLE
) || (st
== SCE_P_FTRIPLEDOUBLE
))
122 void PushStateToStack(int state
, std::vector
<SingleFStringExpState
> &stack
, SingleFStringExpState
*¤tFStringExp
) {
123 SingleFStringExpState single
= {state
, 0};
124 stack
.push_back(single
);
126 currentFStringExp
= &stack
.back();
129 int PopFromStateStack(std::vector
<SingleFStringExpState
> &stack
, SingleFStringExpState
*¤tFStringExp
) {
132 if (!stack
.empty()) {
133 state
= stack
.back().state
;
138 currentFStringExp
= NULL
;
140 currentFStringExp
= &stack
.back();
146 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
147 int GetPyStringState(Accessor
&styler
, Sci_Position i
, Sci_PositionU
*nextIndex
, literalsAllowed allowed
) {
148 char ch
= styler
.SafeGetCharAt(i
);
149 char chNext
= styler
.SafeGetCharAt(i
+ 1);
150 const int firstIsF
= (ch
== 'f' || ch
== 'F');
152 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
153 if (ch
== 'r' || ch
== 'R') {
155 ch
= styler
.SafeGetCharAt(i
);
156 chNext
= styler
.SafeGetCharAt(i
+ 1);
157 } else if (IsPyStringTypeChar(ch
, allowed
)) {
158 if (chNext
== 'r' || chNext
== 'R')
162 ch
= styler
.SafeGetCharAt(i
);
163 chNext
= styler
.SafeGetCharAt(i
+ 1);
166 if (ch
!= '"' && ch
!= '\'') {
168 return SCE_P_DEFAULT
;
171 if (ch
== chNext
&& ch
== styler
.SafeGetCharAt(i
+ 2)) {
175 return (firstIsF
? SCE_P_FTRIPLEDOUBLE
: SCE_P_TRIPLEDOUBLE
);
177 return (firstIsF
? SCE_P_FTRIPLE
: SCE_P_TRIPLE
);
182 return (firstIsF
? SCE_P_FSTRING
: SCE_P_STRING
);
184 return (firstIsF
? SCE_P_FCHARACTER
: SCE_P_CHARACTER
);
188 inline bool IsAWordChar(int ch
, bool unicodeIdentifiers
) {
190 return (isalnum(ch
) || ch
== '.' || ch
== '_');
192 if (!unicodeIdentifiers
)
195 // Python uses the XID_Continue set from unicode data
196 return IsXidContinue(ch
);
199 inline bool IsAWordStart(int ch
, bool unicodeIdentifiers
) {
201 return (isalpha(ch
) || ch
== '_');
203 if (!unicodeIdentifiers
)
206 // Python uses the XID_Start set from unicode data
207 return IsXidStart(ch
);
210 static bool IsFirstNonWhitespace(Sci_Position pos
, Accessor
&styler
) {
211 Sci_Position line
= styler
.GetLine(pos
);
212 Sci_Position start_pos
= styler
.LineStart(line
);
213 for (Sci_Position i
= start_pos
; i
< pos
; i
++) {
214 const char ch
= styler
[i
];
215 if (!(ch
== ' ' || ch
== '\t'))
221 // Options used for LexerPython
222 struct OptionsPython
{
224 bool base2or8Literals
;
228 bool stringsOverNewline
;
229 bool keywords2NoSubIdentifiers
;
233 bool unicodeIdentifiers
;
237 base2or8Literals
= true;
241 stringsOverNewline
= false;
242 keywords2NoSubIdentifiers
= false;
246 unicodeIdentifiers
= true;
249 literalsAllowed
AllowedLiterals() const {
250 literalsAllowed allowedLiterals
= stringsU
? litU
: litNone
;
252 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litB
);
254 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litF
);
255 return allowedLiterals
;
259 static const char *const pythonWordListDesc
[] = {
261 "Highlighted identifiers",
265 struct OptionSetPython
: public OptionSet
<OptionsPython
> {
267 DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel
,
268 "For Python code, checks whether indenting is consistent. "
269 "The default, 0 turns off indentation checking, "
270 "1 checks whether each line is potentially inconsistent with the previous line, "
271 "2 checks whether any space characters occur before a tab character in the indentation, "
272 "3 checks whether any spaces are in the indentation, and "
273 "4 checks for any tab characters in the indentation. "
274 "1 is a good level to use.");
276 DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals
,
277 "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
279 DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU
,
280 "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
282 DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB
,
283 "Set to 0 to not recognise Python 3 bytes literals b\"x\".");
285 DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF
,
286 "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
288 DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline
,
289 "Set to 1 to allow strings to span newline characters.");
291 DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers
,
292 "When enabled, it will not style keywords2 items that are used as a sub-identifier. "
293 "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
295 DefineProperty("fold", &OptionsPython::fold
);
297 DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes
,
298 "This option enables folding multi-line quoted strings when using the Python lexer.");
300 DefineProperty("fold.compact", &OptionsPython::foldCompact
);
302 DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers
,
303 "Set to 0 to not recognise Python 3 unicode identifiers.");
305 DefineWordListSets(pythonWordListDesc
);
309 const char styleSubable
[] = { SCE_P_IDENTIFIER
, 0 };
313 class LexerPython
: public ILexerWithSubStyles
{
316 OptionsPython options
;
317 OptionSetPython osPython
;
318 enum { ssIdentifier
};
320 std::map
<int, std::vector
<SingleFStringExpState
> > ftripleStateAtEol
;
322 explicit LexerPython() :
323 subStyles(styleSubable
, 0x80, 0x40, 0) {
325 virtual ~LexerPython() {
327 void SCI_METHOD
Release() override
{
330 int SCI_METHOD
Version() const override
{
333 const char *SCI_METHOD
PropertyNames() override
{
334 return osPython
.PropertyNames();
336 int SCI_METHOD
PropertyType(const char *name
) override
{
337 return osPython
.PropertyType(name
);
339 const char *SCI_METHOD
DescribeProperty(const char *name
) override
{
340 return osPython
.DescribeProperty(name
);
342 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
) override
;
343 const char *SCI_METHOD
DescribeWordListSets() override
{
344 return osPython
.DescribeWordListSets();
346 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
) override
;
347 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
348 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
350 void *SCI_METHOD
PrivateCall(int, void *) override
{
354 int SCI_METHOD
LineEndTypesSupported() override
{
355 return SC_LINE_END_TYPE_UNICODE
;
358 int SCI_METHOD
AllocateSubStyles(int styleBase
, int numberStyles
) override
{
359 return subStyles
.Allocate(styleBase
, numberStyles
);
361 int SCI_METHOD
SubStylesStart(int styleBase
) override
{
362 return subStyles
.Start(styleBase
);
364 int SCI_METHOD
SubStylesLength(int styleBase
) override
{
365 return subStyles
.Length(styleBase
);
367 int SCI_METHOD
StyleFromSubStyle(int subStyle
) override
{
368 const int styleBase
= subStyles
.BaseStyle(subStyle
);
371 int SCI_METHOD
PrimaryStyleFromStyle(int style
) override
{
374 void SCI_METHOD
FreeSubStyles() override
{
377 void SCI_METHOD
SetIdentifiers(int style
, const char *identifiers
) override
{
378 subStyles
.SetIdentifiers(style
, identifiers
);
380 int SCI_METHOD
DistanceToSecondaryStyles() override
{
383 const char *SCI_METHOD
GetSubStyleBases() override
{
387 static ILexer
*LexerFactoryPython() {
388 return new LexerPython();
392 void ProcessLineEnd(StyleContext
&sc
, std::vector
<SingleFStringExpState
> &fstringStateStack
, SingleFStringExpState
*¤tFStringExp
, bool &inContinuedString
);
395 Sci_Position SCI_METHOD
LexerPython::PropertySet(const char *key
, const char *val
) {
396 if (osPython
.PropertySet(&options
, key
, val
)) {
402 Sci_Position SCI_METHOD
LexerPython::WordListSet(int n
, const char *wl
) {
403 WordList
*wordListN
= 0;
406 wordListN
= &keywords
;
409 wordListN
= &keywords2
;
412 Sci_Position firstModification
= -1;
416 if (*wordListN
!= wlNew
) {
418 firstModification
= 0;
421 return firstModification
;
424 void LexerPython::ProcessLineEnd(StyleContext
&sc
, std::vector
<SingleFStringExpState
> &fstringStateStack
, SingleFStringExpState
*¤tFStringExp
, bool &inContinuedString
) {
425 long deepestSingleStateIndex
= -1;
428 // Find the deepest single quote state because that string will end; no \ continuation in f-string
429 for (i
= 0; i
< fstringStateStack
.size(); i
++) {
430 if (IsPySingleQuoteStringState(fstringStateStack
[i
].state
)) {
431 deepestSingleStateIndex
= i
;
436 if (deepestSingleStateIndex
!= -1) {
437 sc
.SetState(fstringStateStack
[deepestSingleStateIndex
].state
);
438 while (fstringStateStack
.size() > static_cast<unsigned long>(deepestSingleStateIndex
)) {
439 PopFromStateStack(fstringStateStack
, currentFStringExp
);
442 if (!fstringStateStack
.empty()) {
443 std::pair
<int, std::vector
<SingleFStringExpState
> > val
;
444 val
.first
= sc
.currentLine
;
445 val
.second
= fstringStateStack
;
447 ftripleStateAtEol
.insert(val
);
450 if ((sc
.state
== SCE_P_DEFAULT
)
451 || IsPyTripleQuoteStringState(sc
.state
)) {
452 // Perform colourisation of white space and triple quoted strings at end of each line to allow
453 // tab marking to work inside white space and triple quoted strings
454 sc
.SetState(sc
.state
);
456 if (IsPySingleQuoteStringState(sc
.state
)) {
457 if (inContinuedString
|| options
.stringsOverNewline
) {
458 inContinuedString
= false;
460 sc
.ChangeState(SCE_P_STRINGEOL
);
461 sc
.ForwardSetState(SCE_P_DEFAULT
);
466 void SCI_METHOD
LexerPython::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
467 Accessor
styler(pAccess
, NULL
);
469 // Track whether in f-string expression; vector is used for a stack to
470 // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
471 std::vector
<SingleFStringExpState
> fstringStateStack
;
472 SingleFStringExpState
*currentFStringExp
= NULL
;
474 const Sci_Position endPos
= startPos
+ length
;
476 // Backtrack to previous line in case need to fix its tab whinging
477 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
479 if (lineCurrent
> 0) {
481 // Look for backslash-continued lines
482 while (lineCurrent
> 0) {
483 Sci_Position eolPos
= styler
.LineStart(lineCurrent
) - 1;
484 const int eolStyle
= styler
.StyleAt(eolPos
);
485 if (eolStyle
== SCE_P_STRING
486 || eolStyle
== SCE_P_CHARACTER
487 || eolStyle
== SCE_P_STRINGEOL
) {
493 startPos
= styler
.LineStart(lineCurrent
);
495 initStyle
= startPos
== 0 ? SCE_P_DEFAULT
: styler
.StyleAt(startPos
- 1);
498 const literalsAllowed allowedLiterals
= options
.AllowedLiterals();
500 initStyle
= initStyle
& 31;
501 if (initStyle
== SCE_P_STRINGEOL
) {
502 initStyle
= SCE_P_DEFAULT
;
505 // Set up fstate stack from last line and remove any subsequent ftriple at eol states
506 std::map
<int, std::vector
<SingleFStringExpState
> >::iterator it
;
507 it
= ftripleStateAtEol
.find(lineCurrent
- 1);
508 if (it
!= ftripleStateAtEol
.end() && !it
->second
.empty()) {
509 fstringStateStack
= it
->second
;
510 currentFStringExp
= &fstringStateStack
.back();
512 it
= ftripleStateAtEol
.lower_bound(lineCurrent
);
513 if (it
!= ftripleStateAtEol
.end()) {
514 ftripleStateAtEol
.erase(it
, ftripleStateAtEol
.end());
517 kwType kwLast
= kwOther
;
519 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
520 bool base_n_number
= false;
522 const WordClassifier
&classifierIdentifiers
= subStyles
.Classifier(SCE_P_IDENTIFIER
);
524 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
526 bool indentGood
= true;
527 Sci_Position startIndicator
= sc
.currentPos
;
528 bool inContinuedString
= false;
530 for (; sc
.More(); sc
.Forward()) {
532 if (sc
.atLineStart
) {
533 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
535 if (options
.whingeLevel
== 1) {
536 indentGood
= (spaceFlags
& wsInconsistent
) == 0;
537 } else if (options
.whingeLevel
== 2) {
538 indentGood
= (spaceFlags
& wsSpaceTab
) == 0;
539 } else if (options
.whingeLevel
== 3) {
540 indentGood
= (spaceFlags
& wsSpace
) == 0;
541 } else if (options
.whingeLevel
== 4) {
542 indentGood
= (spaceFlags
& wsTab
) == 0;
545 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
546 startIndicator
= sc
.currentPos
;
551 ProcessLineEnd(sc
, fstringStateStack
, currentFStringExp
, inContinuedString
);
557 bool needEOLCheck
= false;
560 if (sc
.state
== SCE_P_OPERATOR
) {
562 sc
.SetState(SCE_P_DEFAULT
);
563 } else if (sc
.state
== SCE_P_NUMBER
) {
564 if (!IsAWordChar(sc
.ch
, false) &&
565 !(!base_n_number
&& ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
566 sc
.SetState(SCE_P_DEFAULT
);
568 } else if (sc
.state
== SCE_P_IDENTIFIER
) {
569 if ((sc
.ch
== '.') || (!IsAWordChar(sc
.ch
, options
.unicodeIdentifiers
))) {
571 sc
.GetCurrent(s
, sizeof(s
));
572 int style
= SCE_P_IDENTIFIER
;
573 if ((kwLast
== kwImport
) && (strcmp(s
, "as") == 0)) {
575 } else if (keywords
.InList(s
)) {
577 } else if (kwLast
== kwClass
) {
578 style
= SCE_P_CLASSNAME
;
579 } else if (kwLast
== kwDef
) {
580 style
= SCE_P_DEFNAME
;
581 } else if (kwLast
== kwCDef
|| kwLast
== kwCPDef
) {
582 Sci_Position pos
= sc
.currentPos
;
583 unsigned char ch
= styler
.SafeGetCharAt(pos
, '\0');
586 style
= SCE_P_DEFNAME
;
588 } else if (ch
== ':') {
589 style
= SCE_P_CLASSNAME
;
591 } else if (ch
== ' ' || ch
== '\t' || ch
== '\n' || ch
== '\r') {
593 ch
= styler
.SafeGetCharAt(pos
, '\0');
598 } else if (keywords2
.InList(s
)) {
599 if (options
.keywords2NoSubIdentifiers
) {
600 // We don't want to highlight keywords2
601 // that are used as a sub-identifier,
602 // i.e. not open in "foo.open".
603 Sci_Position pos
= styler
.GetStartSegment() - 1;
604 if (pos
< 0 || (styler
.SafeGetCharAt(pos
, '\0') != '.'))
610 int subStyle
= classifierIdentifiers
.ValueFor(s
);
615 sc
.ChangeState(style
);
616 sc
.SetState(SCE_P_DEFAULT
);
617 if (style
== SCE_P_WORD
) {
618 if (0 == strcmp(s
, "class"))
620 else if (0 == strcmp(s
, "def"))
622 else if (0 == strcmp(s
, "import"))
624 else if (0 == strcmp(s
, "cdef"))
626 else if (0 == strcmp(s
, "cpdef"))
628 else if (0 == strcmp(s
, "cimport"))
630 else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
)
632 } else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
) {
636 } else if ((sc
.state
== SCE_P_COMMENTLINE
) || (sc
.state
== SCE_P_COMMENTBLOCK
)) {
637 if (sc
.ch
== '\r' || sc
.ch
== '\n') {
638 sc
.SetState(SCE_P_DEFAULT
);
640 } else if (sc
.state
== SCE_P_DECORATOR
) {
641 if (!IsAWordStart(sc
.ch
, options
.unicodeIdentifiers
)) {
642 sc
.SetState(SCE_P_DEFAULT
);
644 } else if (IsPySingleQuoteStringState(sc
.state
)) {
646 if ((sc
.chNext
== '\r') && (sc
.GetRelative(2) == '\n')) {
649 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
650 inContinuedString
= true;
652 // Don't roll over the newline.
655 } else if (sc
.ch
== GetPyStringQuoteChar(sc
.state
)) {
656 sc
.ForwardSetState(SCE_P_DEFAULT
);
659 } else if ((sc
.state
== SCE_P_TRIPLE
) || (sc
.state
== SCE_P_FTRIPLE
)) {
662 } else if (sc
.Match("\'\'\'")) {
665 sc
.ForwardSetState(SCE_P_DEFAULT
);
668 } else if ((sc
.state
== SCE_P_TRIPLEDOUBLE
) || (sc
.state
== SCE_P_FTRIPLEDOUBLE
)) {
671 } else if (sc
.Match("\"\"\"")) {
674 sc
.ForwardSetState(SCE_P_DEFAULT
);
679 // Note if used and not if else because string states also match
680 // some of the above clauses
681 if (IsPyFStringState(sc
.state
) && sc
.ch
== '{') {
682 if (sc
.chNext
== '{') {
685 PushStateToStack(sc
.state
, fstringStateStack
, currentFStringExp
);
686 sc
.ForwardSetState(SCE_P_DEFAULT
);
691 // If in an f-string expression, check for the ending quote(s)
692 // and end f-string to handle syntactically incorrect cases like
694 if (!fstringStateStack
.empty() && (sc
.ch
== '\'' || sc
.ch
== '"')) {
695 long matching_stack_i
= -1;
696 for (unsigned long stack_i
= 0; stack_i
< fstringStateStack
.size() && matching_stack_i
== -1; stack_i
++) {
697 const int stack_state
= fstringStateStack
[stack_i
].state
;
698 const char quote
= GetPyStringQuoteChar(stack_state
);
699 if (sc
.ch
== quote
) {
700 if (IsPySingleQuoteStringState(stack_state
)) {
701 matching_stack_i
= stack_i
;
702 } else if (quote
== '"' ? sc
.Match("\"\"\"") : sc
.Match("'''")) {
703 matching_stack_i
= stack_i
;
708 if (matching_stack_i
!= -1) {
709 sc
.SetState(fstringStateStack
[matching_stack_i
].state
);
710 if (IsPyTripleQuoteStringState(fstringStateStack
[matching_stack_i
].state
)) {
714 sc
.ForwardSetState(SCE_P_DEFAULT
);
717 while (fstringStateStack
.size() > static_cast<unsigned long>(matching_stack_i
)) {
718 PopFromStateStack(fstringStateStack
, currentFStringExp
);
722 // End of code to find the end of a state
724 if (!indentGood
&& !IsASpaceOrTab(sc
.ch
)) {
725 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 1);
726 startIndicator
= sc
.currentPos
;
730 // One cdef or cpdef line, clear kwLast only at end of line
731 if ((kwLast
== kwCDef
|| kwLast
== kwCPDef
) && sc
.atLineEnd
) {
735 // State exit code may have moved on to end of line
736 if (needEOLCheck
&& sc
.atLineEnd
) {
737 ProcessLineEnd(sc
, fstringStateStack
, currentFStringExp
, inContinuedString
);
739 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
744 // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
745 if (currentFStringExp
!= NULL
&& !IsPySingleQuoteStringState(sc
.state
) && !IsPyTripleQuoteStringState(sc
.state
)) {
746 if (currentFStringExp
->nestingCount
== 0 && (sc
.ch
== '}' || sc
.ch
== ':' || (sc
.ch
== '!' && sc
.chNext
!= '='))) {
747 sc
.SetState(PopFromStateStack(fstringStateStack
, currentFStringExp
));
749 if (sc
.ch
== '{' || sc
.ch
== '[' || sc
.ch
== '(') {
750 currentFStringExp
->nestingCount
++;
751 } else if (sc
.ch
== '}' || sc
.ch
== ']' || sc
.ch
== ')') {
752 currentFStringExp
->nestingCount
--;
757 // Check for a new state starting character
758 if (sc
.state
== SCE_P_DEFAULT
) {
759 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
760 if (sc
.ch
== '0' && (sc
.chNext
== 'x' || sc
.chNext
== 'X')) {
761 base_n_number
= true;
762 sc
.SetState(SCE_P_NUMBER
);
763 } else if (sc
.ch
== '0' &&
764 (sc
.chNext
== 'o' || sc
.chNext
== 'O' || sc
.chNext
== 'b' || sc
.chNext
== 'B')) {
765 if (options
.base2or8Literals
) {
766 base_n_number
= true;
767 sc
.SetState(SCE_P_NUMBER
);
769 sc
.SetState(SCE_P_NUMBER
);
770 sc
.ForwardSetState(SCE_P_IDENTIFIER
);
773 base_n_number
= false;
774 sc
.SetState(SCE_P_NUMBER
);
776 } else if ((IsASCII(sc
.ch
) && isoperator(static_cast<char>(sc
.ch
))) || sc
.ch
== '`') {
777 sc
.SetState(SCE_P_OPERATOR
);
778 } else if (sc
.ch
== '#') {
779 sc
.SetState(sc
.chNext
== '#' ? SCE_P_COMMENTBLOCK
: SCE_P_COMMENTLINE
);
780 } else if (sc
.ch
== '@') {
781 if (IsFirstNonWhitespace(sc
.currentPos
, styler
))
782 sc
.SetState(SCE_P_DECORATOR
);
784 sc
.SetState(SCE_P_OPERATOR
);
785 } else if (IsPyStringStart(sc
.ch
, sc
.chNext
, sc
.GetRelative(2), allowedLiterals
)) {
786 Sci_PositionU nextIndex
= 0;
787 sc
.SetState(GetPyStringState(styler
, sc
.currentPos
, &nextIndex
, allowedLiterals
));
788 while (nextIndex
> (sc
.currentPos
+ 1) && sc
.More()) {
791 } else if (IsAWordStart(sc
.ch
, options
.unicodeIdentifiers
)) {
792 sc
.SetState(SCE_P_IDENTIFIER
);
796 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
800 static bool IsCommentLine(Sci_Position line
, Accessor
&styler
) {
801 Sci_Position pos
= styler
.LineStart(line
);
802 const Sci_Position eol_pos
= styler
.LineStart(line
+ 1) - 1;
803 for (Sci_Position i
= pos
; i
< eol_pos
; i
++) {
804 const char ch
= styler
[i
];
807 else if (ch
!= ' ' && ch
!= '\t')
813 static bool IsQuoteLine(Sci_Position line
, const Accessor
&styler
) {
814 const int style
= styler
.StyleAt(styler
.LineStart(line
)) & 31;
815 return ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
819 void SCI_METHOD
LexerPython::Fold(Sci_PositionU startPos
, Sci_Position length
, int /*initStyle - unused*/, IDocument
*pAccess
) {
823 Accessor
styler(pAccess
, NULL
);
825 const Sci_Position maxPos
= startPos
+ length
;
826 const Sci_Position maxLines
= (maxPos
== styler
.Length()) ? styler
.GetLine(maxPos
) : styler
.GetLine(maxPos
- 1); // Requested last line
827 const Sci_Position docLines
= styler
.GetLine(styler
.Length()); // Available last line
829 // Backtrack to previous non-blank line so we can determine indent level
830 // for any white space lines (needed esp. within triple quoted strings)
831 // and so we can fix any preceding fold level (which is why we go back
832 // at least one line in all cases)
834 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
835 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
836 while (lineCurrent
> 0) {
838 indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
839 if (!(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) &&
840 (!IsCommentLine(lineCurrent
, styler
)) &&
841 (!IsQuoteLine(lineCurrent
, styler
)))
844 int indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
846 // Set up initial loop state
847 startPos
= styler
.LineStart(lineCurrent
);
848 int prev_state
= SCE_P_DEFAULT
& 31;
849 if (lineCurrent
>= 1)
850 prev_state
= styler
.StyleAt(startPos
- 1) & 31;
851 int prevQuote
= options
.foldQuotes
&& ((prev_state
== SCE_P_TRIPLE
) || (prev_state
== SCE_P_TRIPLEDOUBLE
));
853 // Process all characters to end of requested range or end of any triple quote
854 //that hangs over the end of the range. Cap processing in all cases
855 // to end of document (in case of unclosed quote at end).
856 while ((lineCurrent
<= docLines
) && ((lineCurrent
<= maxLines
) || prevQuote
)) {
859 int lev
= indentCurrent
;
860 Sci_Position lineNext
= lineCurrent
+ 1;
861 int indentNext
= indentCurrent
;
863 if (lineNext
<= docLines
) {
864 // Information about next line is only available if not at end of document
865 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
866 Sci_Position lookAtPos
= (styler
.LineStart(lineNext
) == styler
.Length()) ? styler
.Length() - 1 : styler
.LineStart(lineNext
);
867 const int style
= styler
.StyleAt(lookAtPos
) & 31;
868 quote
= options
.foldQuotes
&& ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
870 const int quote_start
= (quote
&& !prevQuote
);
871 const int quote_continue
= (quote
&& prevQuote
);
872 if (!quote
|| !prevQuote
)
873 indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
875 indentNext
= indentCurrentLevel
;
876 if (indentNext
& SC_FOLDLEVELWHITEFLAG
)
877 indentNext
= SC_FOLDLEVELWHITEFLAG
| indentCurrentLevel
;
880 // Place fold point at start of triple quoted string
881 lev
|= SC_FOLDLEVELHEADERFLAG
;
882 } else if (quote_continue
|| prevQuote
) {
883 // Add level to rest of lines in the string
887 // Skip past any blank lines for next indent level info; we skip also
888 // comments (all comments, not just those starting in column 0)
889 // which effectively folds them into surrounding code rather
890 // than screwing up folding. If comments end file, use the min
891 // comment indent as the level after
893 int minCommentLevel
= indentCurrentLevel
;
895 (lineNext
< docLines
) &&
896 ((indentNext
& SC_FOLDLEVELWHITEFLAG
) ||
897 (lineNext
<= docLines
&& IsCommentLine(lineNext
, styler
)))) {
899 if (IsCommentLine(lineNext
, styler
) && indentNext
< minCommentLevel
) {
900 minCommentLevel
= indentNext
;
904 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
907 const int levelAfterComments
= ((lineNext
< docLines
) ? indentNext
& SC_FOLDLEVELNUMBERMASK
: minCommentLevel
);
908 const int levelBeforeComments
= Maximum(indentCurrentLevel
, levelAfterComments
);
910 // Now set all the indent levels on the lines we skipped
911 // Do this from end to start. Once we encounter one line
912 // which is indented more than the line after the end of
913 // the comment-block, use the level of the block before
915 Sci_Position skipLine
= lineNext
;
916 int skipLevel
= levelAfterComments
;
918 while (--skipLine
> lineCurrent
) {
919 const int skipLineIndent
= styler
.IndentAmount(skipLine
, &spaceFlags
, NULL
);
921 if (options
.foldCompact
) {
922 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
)
923 skipLevel
= levelBeforeComments
;
925 int whiteFlag
= skipLineIndent
& SC_FOLDLEVELWHITEFLAG
;
927 styler
.SetLevel(skipLine
, skipLevel
| whiteFlag
);
929 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
&&
930 !(skipLineIndent
& SC_FOLDLEVELWHITEFLAG
) &&
931 !IsCommentLine(skipLine
, styler
))
932 skipLevel
= levelBeforeComments
;
934 styler
.SetLevel(skipLine
, skipLevel
);
938 // Set fold header on non-quote line
939 if (!quote
&& !(indentCurrent
& SC_FOLDLEVELWHITEFLAG
)) {
940 if ((indentCurrent
& SC_FOLDLEVELNUMBERMASK
) < (indentNext
& SC_FOLDLEVELNUMBERMASK
))
941 lev
|= SC_FOLDLEVELHEADERFLAG
;
944 // Keep track of triple quote state of previous line
947 // Set fold level for this line and move to next line
948 styler
.SetLevel(lineCurrent
, options
.foldCompact
? lev
: lev
& ~SC_FOLDLEVELWHITEFLAG
);
949 indentCurrent
= indentNext
;
950 lineCurrent
= lineNext
;
953 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
954 // header flag set; the loop above is crafted to take care of this case!
955 //styler.SetLevel(lineCurrent, indentCurrent);
958 LexerModule
lmPython(SCLEX_PYTHON
, LexerPython::LexerFactoryPython
, "python",