1 // Scintilla source code edit control
2 /** @file LexPython.cxx
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
16 #include "Scintilla.h"
20 #include "LexAccessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
27 using namespace Scintilla
;
30 /* kwCDef, kwCTypeName only used for Cython */
31 enum kwType
{ kwOther
, kwClass
, kwDef
, kwImport
, kwCDef
, kwCTypeName
, kwCPDef
};
33 static const int indicatorWhitespace
= 1;
35 static bool IsPyComment(Accessor
&styler
, int pos
, int len
) {
36 return len
> 0 && styler
[pos
] == '#';
39 enum literalsAllowed
{ litNone
=0, litU
=1, litB
=2};
41 static bool IsPyStringTypeChar(int ch
, literalsAllowed allowed
) {
43 ((allowed
& litB
) && (ch
== 'b' || ch
== 'B')) ||
44 ((allowed
& litU
) && (ch
== 'u' || ch
== 'U'));
47 static bool IsPyStringStart(int ch
, int chNext
, int chNext2
, literalsAllowed allowed
) {
48 if (ch
== '\'' || ch
== '"')
50 if (IsPyStringTypeChar(ch
, allowed
)) {
51 if (chNext
== '"' || chNext
== '\'')
53 if ((chNext
== 'r' || chNext
== 'R') && (chNext2
== '"' || chNext2
== '\''))
56 if ((ch
== 'r' || ch
== 'R') && (chNext
== '"' || chNext
== '\''))
62 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
63 static int GetPyStringState(Accessor
&styler
, int i
, unsigned int *nextIndex
, literalsAllowed allowed
) {
64 char ch
= styler
.SafeGetCharAt(i
);
65 char chNext
= styler
.SafeGetCharAt(i
+ 1);
67 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
68 if (ch
== 'r' || ch
== 'R') {
70 ch
= styler
.SafeGetCharAt(i
);
71 chNext
= styler
.SafeGetCharAt(i
+ 1);
72 } else if (IsPyStringTypeChar(ch
, allowed
)) {
73 if (chNext
== 'r' || chNext
== 'R')
77 ch
= styler
.SafeGetCharAt(i
);
78 chNext
= styler
.SafeGetCharAt(i
+ 1);
81 if (ch
!= '"' && ch
!= '\'') {
86 if (ch
== chNext
&& ch
== styler
.SafeGetCharAt(i
+ 2)) {
90 return SCE_P_TRIPLEDOUBLE
;
99 return SCE_P_CHARACTER
;
103 static inline bool IsAWordChar(int ch
) {
104 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' || ch
== '_');
107 static inline bool IsAWordStart(int ch
) {
108 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_');
111 static void ColourisePyDoc(unsigned int startPos
, int length
, int initStyle
,
112 WordList
*keywordlists
[], Accessor
&styler
) {
114 int endPos
= startPos
+ length
;
116 // Backtrack to previous line in case need to fix its tab whinging
117 int lineCurrent
= styler
.GetLine(startPos
);
119 if (lineCurrent
> 0) {
121 // Look for backslash-continued lines
122 while (lineCurrent
> 0) {
123 int eolPos
= styler
.LineStart(lineCurrent
) - 1;
124 int eolStyle
= styler
.StyleAt(eolPos
);
125 if (eolStyle
== SCE_P_STRING
126 || eolStyle
== SCE_P_CHARACTER
127 || eolStyle
== SCE_P_STRINGEOL
) {
133 startPos
= styler
.LineStart(lineCurrent
);
135 initStyle
= startPos
== 0 ? SCE_P_DEFAULT
: styler
.StyleAt(startPos
- 1);
138 WordList
&keywords
= *keywordlists
[0];
139 WordList
&keywords2
= *keywordlists
[1];
141 // property tab.timmy.whinge.level
142 // For Python code, checks whether indenting is consistent.
143 // The default, 0 turns off indentation checking,
144 // 1 checks whether each line is potentially inconsistent with the previous line,
145 // 2 checks whether any space characters occur before a tab character in the indentation,
146 // 3 checks whether any spaces are in the indentation, and
147 // 4 checks for any tab characters in the indentation.
148 // 1 is a good level to use.
149 const int whingeLevel
= styler
.GetPropertyInt("tab.timmy.whinge.level");
151 // property lexer.python.literals.binary
152 // Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.
153 bool base2or8Literals
= styler
.GetPropertyInt("lexer.python.literals.binary", 1) != 0;
155 // property lexer.python.strings.u
156 // Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3.
157 literalsAllowed allowedLiterals
= (styler
.GetPropertyInt("lexer.python.strings.u", 1)) ? litU
: litNone
;
159 // property lexer.python.strings.b
160 // Set to 0 to not recognise Python 3 bytes literals b"x".
161 if (styler
.GetPropertyInt("lexer.python.strings.b", 1))
162 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litB
);
164 // property lexer.python.strings.over.newline
165 // Set to 1 to allow strings to span newline characters.
166 bool stringsOverNewline
= styler
.GetPropertyInt("lexer.python.strings.over.newline") != 0;
168 initStyle
= initStyle
& 31;
169 if (initStyle
== SCE_P_STRINGEOL
) {
170 initStyle
= SCE_P_DEFAULT
;
173 kwType kwLast
= kwOther
;
175 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
176 bool base_n_number
= false;
178 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
180 bool indentGood
= true;
181 int startIndicator
= sc
.currentPos
;
182 bool inContinuedString
= false;
184 for (; sc
.More(); sc
.Forward()) {
186 if (sc
.atLineStart
) {
187 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
189 if (whingeLevel
== 1) {
190 indentGood
= (spaceFlags
& wsInconsistent
) == 0;
191 } else if (whingeLevel
== 2) {
192 indentGood
= (spaceFlags
& wsSpaceTab
) == 0;
193 } else if (whingeLevel
== 3) {
194 indentGood
= (spaceFlags
& wsSpace
) == 0;
195 } else if (whingeLevel
== 4) {
196 indentGood
= (spaceFlags
& wsTab
) == 0;
199 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
200 startIndicator
= sc
.currentPos
;
205 if ((sc
.state
== SCE_P_DEFAULT
) ||
206 (sc
.state
== SCE_P_TRIPLE
) ||
207 (sc
.state
== SCE_P_TRIPLEDOUBLE
)) {
208 // Perform colourisation of white space and triple quoted strings at end of each line to allow
209 // tab marking to work inside white space and triple quoted strings
210 sc
.SetState(sc
.state
);
213 if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
214 if (inContinuedString
|| stringsOverNewline
) {
215 inContinuedString
= false;
217 sc
.ChangeState(SCE_P_STRINGEOL
);
218 sc
.ForwardSetState(SCE_P_DEFAULT
);
225 bool needEOLCheck
= false;
227 // Check for a state end
228 if (sc
.state
== SCE_P_OPERATOR
) {
230 sc
.SetState(SCE_P_DEFAULT
);
231 } else if (sc
.state
== SCE_P_NUMBER
) {
232 if (!IsAWordChar(sc
.ch
) &&
233 !(!base_n_number
&& ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
234 sc
.SetState(SCE_P_DEFAULT
);
236 } else if (sc
.state
== SCE_P_IDENTIFIER
) {
237 if ((sc
.ch
== '.') || (!IsAWordChar(sc
.ch
))) {
239 sc
.GetCurrent(s
, sizeof(s
));
240 int style
= SCE_P_IDENTIFIER
;
241 if ((kwLast
== kwImport
) && (strcmp(s
, "as") == 0)) {
243 } else if (keywords
.InList(s
)) {
245 } else if (kwLast
== kwClass
) {
246 style
= SCE_P_CLASSNAME
;
247 } else if (kwLast
== kwDef
) {
248 style
= SCE_P_DEFNAME
;
249 } else if (kwLast
== kwCDef
|| kwLast
== kwCPDef
) {
250 int pos
= sc
.currentPos
;
251 unsigned char ch
= styler
.SafeGetCharAt(pos
, '\0');
254 style
= SCE_P_DEFNAME
;
256 } else if (ch
== ':') {
257 style
= SCE_P_CLASSNAME
;
259 } else if (ch
== ' ' || ch
== '\t' || ch
== '\n' || ch
== '\r') {
261 ch
= styler
.SafeGetCharAt(pos
, '\0');
266 } else if (keywords2
.InList(s
)) {
269 sc
.ChangeState(style
);
270 sc
.SetState(SCE_P_DEFAULT
);
271 if (style
== SCE_P_WORD
) {
272 if (0 == strcmp(s
, "class"))
274 else if (0 == strcmp(s
, "def"))
276 else if (0 == strcmp(s
, "import"))
278 else if (0 == strcmp(s
, "cdef"))
280 else if (0 == strcmp(s
, "cpdef"))
282 else if (0 == strcmp(s
, "cimport"))
284 else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
)
286 } else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
) {
290 } else if ((sc
.state
== SCE_P_COMMENTLINE
) || (sc
.state
== SCE_P_COMMENTBLOCK
)) {
291 if (sc
.ch
== '\r' || sc
.ch
== '\n') {
292 sc
.SetState(SCE_P_DEFAULT
);
294 } else if (sc
.state
== SCE_P_DECORATOR
) {
295 if (!IsAWordChar(sc
.ch
)) {
296 sc
.SetState(SCE_P_DEFAULT
);
298 } else if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
300 if ((sc
.chNext
== '\r') && (sc
.GetRelative(2) == '\n')) {
303 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
304 inContinuedString
= true;
306 // Don't roll over the newline.
309 } else if ((sc
.state
== SCE_P_STRING
) && (sc
.ch
== '\"')) {
310 sc
.ForwardSetState(SCE_P_DEFAULT
);
312 } else if ((sc
.state
== SCE_P_CHARACTER
) && (sc
.ch
== '\'')) {
313 sc
.ForwardSetState(SCE_P_DEFAULT
);
316 } else if (sc
.state
== SCE_P_TRIPLE
) {
319 } else if (sc
.Match("\'\'\'")) {
322 sc
.ForwardSetState(SCE_P_DEFAULT
);
325 } else if (sc
.state
== SCE_P_TRIPLEDOUBLE
) {
328 } else if (sc
.Match("\"\"\"")) {
331 sc
.ForwardSetState(SCE_P_DEFAULT
);
336 if (!indentGood
&& !IsASpaceOrTab(sc
.ch
)) {
337 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 1);
338 startIndicator
= sc
.currentPos
;
342 // One cdef or cpdef line, clear kwLast only at end of line
343 if ((kwLast
== kwCDef
|| kwLast
== kwCPDef
) && sc
.atLineEnd
) {
347 // State exit code may have moved on to end of line
348 if (needEOLCheck
&& sc
.atLineEnd
) {
350 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
355 // Check for a new state starting character
356 if (sc
.state
== SCE_P_DEFAULT
) {
357 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
358 if (sc
.ch
== '0' && (sc
.chNext
== 'x' || sc
.chNext
== 'X')) {
359 base_n_number
= true;
360 sc
.SetState(SCE_P_NUMBER
);
361 } else if (sc
.ch
== '0' &&
362 (sc
.chNext
== 'o' || sc
.chNext
== 'O' || sc
.chNext
== 'b' || sc
.chNext
== 'B')) {
363 if (base2or8Literals
) {
364 base_n_number
= true;
365 sc
.SetState(SCE_P_NUMBER
);
367 sc
.SetState(SCE_P_NUMBER
);
368 sc
.ForwardSetState(SCE_P_IDENTIFIER
);
371 base_n_number
= false;
372 sc
.SetState(SCE_P_NUMBER
);
374 } else if ((isascii(sc
.ch
) && isoperator(static_cast<char>(sc
.ch
))) || sc
.ch
== '`') {
375 sc
.SetState(SCE_P_OPERATOR
);
376 } else if (sc
.ch
== '#') {
377 sc
.SetState(sc
.chNext
== '#' ? SCE_P_COMMENTBLOCK
: SCE_P_COMMENTLINE
);
378 } else if (sc
.ch
== '@') {
379 sc
.SetState(SCE_P_DECORATOR
);
380 } else if (IsPyStringStart(sc
.ch
, sc
.chNext
, sc
.GetRelative(2), allowedLiterals
)) {
381 unsigned int nextIndex
= 0;
382 sc
.SetState(GetPyStringState(styler
, sc
.currentPos
, &nextIndex
, allowedLiterals
));
383 while (nextIndex
> (sc
.currentPos
+ 1) && sc
.More()) {
386 } else if (IsAWordStart(sc
.ch
)) {
387 sc
.SetState(SCE_P_IDENTIFIER
);
391 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
395 static bool IsCommentLine(int line
, Accessor
&styler
) {
396 int pos
= styler
.LineStart(line
);
397 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
398 for (int i
= pos
; i
< eol_pos
; i
++) {
402 else if (ch
!= ' ' && ch
!= '\t')
408 static bool IsQuoteLine(int line
, Accessor
&styler
) {
409 int style
= styler
.StyleAt(styler
.LineStart(line
)) & 31;
410 return ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
414 static void FoldPyDoc(unsigned int startPos
, int length
, int /*initStyle - unused*/,
415 WordList
*[], Accessor
&styler
) {
416 const int maxPos
= startPos
+ length
;
417 const int maxLines
= styler
.GetLine(maxPos
- 1); // Requested last line
418 const int docLines
= styler
.GetLine(styler
.Length() - 1); // Available last line
420 // property fold.comment.python
421 // This option enables folding multi-line comments when using the Python lexer.
422 const bool foldComment
= styler
.GetPropertyInt("fold.comment.python") != 0;
424 // property fold.quotes.python
425 // This option enables folding multi-line quoted strings when using the Python lexer.
426 const bool foldQuotes
= styler
.GetPropertyInt("fold.quotes.python") != 0;
428 const bool foldCompact
= styler
.GetPropertyInt("fold.compact") != 0;
430 // Backtrack to previous non-blank line so we can determine indent level
431 // for any white space lines (needed esp. within triple quoted strings)
432 // and so we can fix any preceding fold level (which is why we go back
433 // at least one line in all cases)
435 int lineCurrent
= styler
.GetLine(startPos
);
436 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
437 while (lineCurrent
> 0) {
439 indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
440 if (!(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) &&
441 (!IsCommentLine(lineCurrent
, styler
)) &&
442 (!IsQuoteLine(lineCurrent
, styler
)))
445 int indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
447 // Set up initial loop state
448 startPos
= styler
.LineStart(lineCurrent
);
449 int prev_state
= SCE_P_DEFAULT
& 31;
450 if (lineCurrent
>= 1)
451 prev_state
= styler
.StyleAt(startPos
- 1) & 31;
452 int prevQuote
= foldQuotes
&& ((prev_state
== SCE_P_TRIPLE
) || (prev_state
== SCE_P_TRIPLEDOUBLE
));
454 if (lineCurrent
>= 1)
455 prevComment
= foldComment
&& IsCommentLine(lineCurrent
- 1, styler
);
457 // Process all characters to end of requested range or end of any triple quote
458 // or comment that hangs over the end of the range. Cap processing in all cases
459 // to end of document (in case of unclosed quote or comment at end).
460 while ((lineCurrent
<= docLines
) && ((lineCurrent
<= maxLines
) || prevQuote
|| prevComment
)) {
463 int lev
= indentCurrent
;
464 int lineNext
= lineCurrent
+ 1;
465 int indentNext
= indentCurrent
;
467 if (lineNext
<= docLines
) {
468 // Information about next line is only available if not at end of document
469 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
470 int style
= styler
.StyleAt(styler
.LineStart(lineNext
)) & 31;
471 quote
= foldQuotes
&& ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
473 const int quote_start
= (quote
&& !prevQuote
);
474 const int quote_continue
= (quote
&& prevQuote
);
475 const int comment
= foldComment
&& IsCommentLine(lineCurrent
, styler
);
476 const int comment_start
= (comment
&& !prevComment
&& (lineNext
<= docLines
) &&
477 IsCommentLine(lineNext
, styler
) && (lev
> SC_FOLDLEVELBASE
));
478 const int comment_continue
= (comment
&& prevComment
);
479 if ((!quote
|| !prevQuote
) && !comment
)
480 indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
482 indentNext
= indentCurrentLevel
;
483 if (indentNext
& SC_FOLDLEVELWHITEFLAG
)
484 indentNext
= SC_FOLDLEVELWHITEFLAG
| indentCurrentLevel
;
487 // Place fold point at start of triple quoted string
488 lev
|= SC_FOLDLEVELHEADERFLAG
;
489 } else if (quote_continue
|| prevQuote
) {
490 // Add level to rest of lines in the string
492 } else if (comment_start
) {
493 // Place fold point at start of a block of comments
494 lev
|= SC_FOLDLEVELHEADERFLAG
;
495 } else if (comment_continue
) {
496 // Add level to rest of lines in the block
500 // Skip past any blank lines for next indent level info; we skip also
501 // comments (all comments, not just those starting in column 0)
502 // which effectively folds them into surrounding code rather
503 // than screwing up folding.
506 (lineNext
< docLines
) &&
507 ((indentNext
& SC_FOLDLEVELWHITEFLAG
) ||
508 (lineNext
<= docLines
&& IsCommentLine(lineNext
, styler
)))) {
511 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
514 const int levelAfterComments
= indentNext
& SC_FOLDLEVELNUMBERMASK
;
515 const int levelBeforeComments
= Maximum(indentCurrentLevel
,levelAfterComments
);
517 // Now set all the indent levels on the lines we skipped
518 // Do this from end to start. Once we encounter one line
519 // which is indented more than the line after the end of
520 // the comment-block, use the level of the block before
522 int skipLine
= lineNext
;
523 int skipLevel
= levelAfterComments
;
525 while (--skipLine
> lineCurrent
) {
526 int skipLineIndent
= styler
.IndentAmount(skipLine
, &spaceFlags
, NULL
);
529 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
)
530 skipLevel
= levelBeforeComments
;
532 int whiteFlag
= skipLineIndent
& SC_FOLDLEVELWHITEFLAG
;
534 styler
.SetLevel(skipLine
, skipLevel
| whiteFlag
);
536 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
&&
537 !(skipLineIndent
& SC_FOLDLEVELWHITEFLAG
) &&
538 !IsCommentLine(skipLine
, styler
))
539 skipLevel
= levelBeforeComments
;
541 styler
.SetLevel(skipLine
, skipLevel
);
545 // Set fold header on non-quote/non-comment line
546 if (!quote
&& !comment
&& !(indentCurrent
& SC_FOLDLEVELWHITEFLAG
)) {
547 if ((indentCurrent
& SC_FOLDLEVELNUMBERMASK
) < (indentNext
& SC_FOLDLEVELNUMBERMASK
))
548 lev
|= SC_FOLDLEVELHEADERFLAG
;
551 // Keep track of triple quote and block comment state of previous line
553 prevComment
= comment_start
|| comment_continue
;
555 // Set fold level for this line and move to next line
556 styler
.SetLevel(lineCurrent
, lev
);
557 indentCurrent
= indentNext
;
558 lineCurrent
= lineNext
;
561 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
562 // header flag set; the loop above is crafted to take care of this case!
563 //styler.SetLevel(lineCurrent, indentCurrent);
566 static const char *const pythonWordListDesc
[] = {
568 "Highlighted identifiers",
572 LexerModule
lmPython(SCLEX_PYTHON
, ColourisePyDoc
, "python", FoldPyDoc
,