updated Scintilla to 2.29
[TortoiseGit.git] / ext / scintilla / lexers / LexPython.cxx
blobecd00f9adefb7ce095c3165c80d3f489160c9e5b
1 // Scintilla source code edit control
2 /** @file LexPython.cxx
3 ** Lexer for Python.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
30 /* kwCDef, kwCTypeName only used for Cython */
31 enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
33 static const int indicatorWhitespace = 1;
35 static bool IsPyComment(Accessor &styler, int pos, int len) {
36 return len > 0 && styler[pos] == '#';
39 enum literalsAllowed { litNone=0, litU=1, litB=2};
41 static bool IsPyStringTypeChar(int ch, literalsAllowed allowed) {
42 return
43 ((allowed & litB) && (ch == 'b' || ch == 'B')) ||
44 ((allowed & litU) && (ch == 'u' || ch == 'U'));
47 static bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
48 if (ch == '\'' || ch == '"')
49 return true;
50 if (IsPyStringTypeChar(ch, allowed)) {
51 if (chNext == '"' || chNext == '\'')
52 return true;
53 if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
54 return true;
56 if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
57 return true;
59 return false;
62 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
63 static int GetPyStringState(Accessor &styler, int i, unsigned int *nextIndex, literalsAllowed allowed) {
64 char ch = styler.SafeGetCharAt(i);
65 char chNext = styler.SafeGetCharAt(i + 1);
67 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
68 if (ch == 'r' || ch == 'R') {
69 i++;
70 ch = styler.SafeGetCharAt(i);
71 chNext = styler.SafeGetCharAt(i + 1);
72 } else if (IsPyStringTypeChar(ch, allowed)) {
73 if (chNext == 'r' || chNext == 'R')
74 i += 2;
75 else
76 i += 1;
77 ch = styler.SafeGetCharAt(i);
78 chNext = styler.SafeGetCharAt(i + 1);
81 if (ch != '"' && ch != '\'') {
82 *nextIndex = i + 1;
83 return SCE_P_DEFAULT;
86 if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
87 *nextIndex = i + 3;
89 if (ch == '"')
90 return SCE_P_TRIPLEDOUBLE;
91 else
92 return SCE_P_TRIPLE;
93 } else {
94 *nextIndex = i + 1;
96 if (ch == '"')
97 return SCE_P_STRING;
98 else
99 return SCE_P_CHARACTER;
103 static inline bool IsAWordChar(int ch) {
104 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
107 static inline bool IsAWordStart(int ch) {
108 return (ch < 0x80) && (isalnum(ch) || ch == '_');
111 static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,
112 WordList *keywordlists[], Accessor &styler) {
114 int endPos = startPos + length;
116 // Backtrack to previous line in case need to fix its tab whinging
117 int lineCurrent = styler.GetLine(startPos);
118 if (startPos > 0) {
119 if (lineCurrent > 0) {
120 lineCurrent--;
121 // Look for backslash-continued lines
122 while (lineCurrent > 0) {
123 int eolPos = styler.LineStart(lineCurrent) - 1;
124 int eolStyle = styler.StyleAt(eolPos);
125 if (eolStyle == SCE_P_STRING
126 || eolStyle == SCE_P_CHARACTER
127 || eolStyle == SCE_P_STRINGEOL) {
128 lineCurrent -= 1;
129 } else {
130 break;
133 startPos = styler.LineStart(lineCurrent);
135 initStyle = startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1);
138 WordList &keywords = *keywordlists[0];
139 WordList &keywords2 = *keywordlists[1];
141 // property tab.timmy.whinge.level
142 // For Python code, checks whether indenting is consistent.
143 // The default, 0 turns off indentation checking,
144 // 1 checks whether each line is potentially inconsistent with the previous line,
145 // 2 checks whether any space characters occur before a tab character in the indentation,
146 // 3 checks whether any spaces are in the indentation, and
147 // 4 checks for any tab characters in the indentation.
148 // 1 is a good level to use.
149 const int whingeLevel = styler.GetPropertyInt("tab.timmy.whinge.level");
151 // property lexer.python.literals.binary
152 // Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.
153 bool base2or8Literals = styler.GetPropertyInt("lexer.python.literals.binary", 1) != 0;
155 // property lexer.python.strings.u
156 // Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3.
157 literalsAllowed allowedLiterals = (styler.GetPropertyInt("lexer.python.strings.u", 1)) ? litU : litNone;
159 // property lexer.python.strings.b
160 // Set to 0 to not recognise Python 3 bytes literals b"x".
161 if (styler.GetPropertyInt("lexer.python.strings.b", 1))
162 allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
164 // property lexer.python.strings.over.newline
165 // Set to 1 to allow strings to span newline characters.
166 bool stringsOverNewline = styler.GetPropertyInt("lexer.python.strings.over.newline") != 0;
168 // property lexer.python.keywords2.no.sub.identifiers
169 // When enabled, it will not style keywords2 items that are used as a sub-identifier.
170 // Example: when set, will not highlight "foo.open" when "open" is a keywords2 item.
171 const bool keywords2NoSubIdentifiers = styler.GetPropertyInt("lexer.python.keywords2.no.sub.identifiers") != 0;
173 initStyle = initStyle & 31;
174 if (initStyle == SCE_P_STRINGEOL) {
175 initStyle = SCE_P_DEFAULT;
178 kwType kwLast = kwOther;
179 int spaceFlags = 0;
180 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
181 bool base_n_number = false;
183 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
185 bool indentGood = true;
186 int startIndicator = sc.currentPos;
187 bool inContinuedString = false;
189 for (; sc.More(); sc.Forward()) {
191 if (sc.atLineStart) {
192 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
193 indentGood = true;
194 if (whingeLevel == 1) {
195 indentGood = (spaceFlags & wsInconsistent) == 0;
196 } else if (whingeLevel == 2) {
197 indentGood = (spaceFlags & wsSpaceTab) == 0;
198 } else if (whingeLevel == 3) {
199 indentGood = (spaceFlags & wsSpace) == 0;
200 } else if (whingeLevel == 4) {
201 indentGood = (spaceFlags & wsTab) == 0;
203 if (!indentGood) {
204 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
205 startIndicator = sc.currentPos;
209 if (sc.atLineEnd) {
210 if ((sc.state == SCE_P_DEFAULT) ||
211 (sc.state == SCE_P_TRIPLE) ||
212 (sc.state == SCE_P_TRIPLEDOUBLE)) {
213 // Perform colourisation of white space and triple quoted strings at end of each line to allow
214 // tab marking to work inside white space and triple quoted strings
215 sc.SetState(sc.state);
217 lineCurrent++;
218 if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
219 if (inContinuedString || stringsOverNewline) {
220 inContinuedString = false;
221 } else {
222 sc.ChangeState(SCE_P_STRINGEOL);
223 sc.ForwardSetState(SCE_P_DEFAULT);
226 if (!sc.More())
227 break;
230 bool needEOLCheck = false;
232 // Check for a state end
233 if (sc.state == SCE_P_OPERATOR) {
234 kwLast = kwOther;
235 sc.SetState(SCE_P_DEFAULT);
236 } else if (sc.state == SCE_P_NUMBER) {
237 if (!IsAWordChar(sc.ch) &&
238 !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
239 sc.SetState(SCE_P_DEFAULT);
241 } else if (sc.state == SCE_P_IDENTIFIER) {
242 if ((sc.ch == '.') || (!IsAWordChar(sc.ch))) {
243 char s[100];
244 sc.GetCurrent(s, sizeof(s));
245 int style = SCE_P_IDENTIFIER;
246 if ((kwLast == kwImport) && (strcmp(s, "as") == 0)) {
247 style = SCE_P_WORD;
248 } else if (keywords.InList(s)) {
249 style = SCE_P_WORD;
250 } else if (kwLast == kwClass) {
251 style = SCE_P_CLASSNAME;
252 } else if (kwLast == kwDef) {
253 style = SCE_P_DEFNAME;
254 } else if (kwLast == kwCDef || kwLast == kwCPDef) {
255 int pos = sc.currentPos;
256 unsigned char ch = styler.SafeGetCharAt(pos, '\0');
257 while (ch != '\0') {
258 if (ch == '(') {
259 style = SCE_P_DEFNAME;
260 break;
261 } else if (ch == ':') {
262 style = SCE_P_CLASSNAME;
263 break;
264 } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
265 pos++;
266 ch = styler.SafeGetCharAt(pos, '\0');
267 } else {
268 break;
271 } else if (keywords2.InList(s)) {
272 if (keywords2NoSubIdentifiers) {
273 // We don't want to highlight keywords2
274 // that are used as a sub-identifier,
275 // i.e. not open in "foo.open".
276 int pos = styler.GetStartSegment() - 1;
277 if (pos < 0 || (styler.SafeGetCharAt(pos, '\0') != '.'))
278 style = SCE_P_WORD2;
279 } else {
280 style = SCE_P_WORD2;
283 sc.ChangeState(style);
284 sc.SetState(SCE_P_DEFAULT);
285 if (style == SCE_P_WORD) {
286 if (0 == strcmp(s, "class"))
287 kwLast = kwClass;
288 else if (0 == strcmp(s, "def"))
289 kwLast = kwDef;
290 else if (0 == strcmp(s, "import"))
291 kwLast = kwImport;
292 else if (0 == strcmp(s, "cdef"))
293 kwLast = kwCDef;
294 else if (0 == strcmp(s, "cpdef"))
295 kwLast = kwCPDef;
296 else if (0 == strcmp(s, "cimport"))
297 kwLast = kwImport;
298 else if (kwLast != kwCDef && kwLast != kwCPDef)
299 kwLast = kwOther;
300 } else if (kwLast != kwCDef && kwLast != kwCPDef) {
301 kwLast = kwOther;
304 } else if ((sc.state == SCE_P_COMMENTLINE) || (sc.state == SCE_P_COMMENTBLOCK)) {
305 if (sc.ch == '\r' || sc.ch == '\n') {
306 sc.SetState(SCE_P_DEFAULT);
308 } else if (sc.state == SCE_P_DECORATOR) {
309 if (!IsAWordChar(sc.ch)) {
310 sc.SetState(SCE_P_DEFAULT);
312 } else if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
313 if (sc.ch == '\\') {
314 if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) {
315 sc.Forward();
317 if (sc.chNext == '\n' || sc.chNext == '\r') {
318 inContinuedString = true;
319 } else {
320 // Don't roll over the newline.
321 sc.Forward();
323 } else if ((sc.state == SCE_P_STRING) && (sc.ch == '\"')) {
324 sc.ForwardSetState(SCE_P_DEFAULT);
325 needEOLCheck = true;
326 } else if ((sc.state == SCE_P_CHARACTER) && (sc.ch == '\'')) {
327 sc.ForwardSetState(SCE_P_DEFAULT);
328 needEOLCheck = true;
330 } else if (sc.state == SCE_P_TRIPLE) {
331 if (sc.ch == '\\') {
332 sc.Forward();
333 } else if (sc.Match("\'\'\'")) {
334 sc.Forward();
335 sc.Forward();
336 sc.ForwardSetState(SCE_P_DEFAULT);
337 needEOLCheck = true;
339 } else if (sc.state == SCE_P_TRIPLEDOUBLE) {
340 if (sc.ch == '\\') {
341 sc.Forward();
342 } else if (sc.Match("\"\"\"")) {
343 sc.Forward();
344 sc.Forward();
345 sc.ForwardSetState(SCE_P_DEFAULT);
346 needEOLCheck = true;
350 if (!indentGood && !IsASpaceOrTab(sc.ch)) {
351 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1);
352 startIndicator = sc.currentPos;
353 indentGood = true;
356 // One cdef or cpdef line, clear kwLast only at end of line
357 if ((kwLast == kwCDef || kwLast == kwCPDef) && sc.atLineEnd) {
358 kwLast = kwOther;
361 // State exit code may have moved on to end of line
362 if (needEOLCheck && sc.atLineEnd) {
363 lineCurrent++;
364 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
365 if (!sc.More())
366 break;
369 // Check for a new state starting character
370 if (sc.state == SCE_P_DEFAULT) {
371 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
372 if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) {
373 base_n_number = true;
374 sc.SetState(SCE_P_NUMBER);
375 } else if (sc.ch == '0' &&
376 (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) {
377 if (base2or8Literals) {
378 base_n_number = true;
379 sc.SetState(SCE_P_NUMBER);
380 } else {
381 sc.SetState(SCE_P_NUMBER);
382 sc.ForwardSetState(SCE_P_IDENTIFIER);
384 } else {
385 base_n_number = false;
386 sc.SetState(SCE_P_NUMBER);
388 } else if ((isascii(sc.ch) && isoperator(static_cast<char>(sc.ch))) || sc.ch == '`') {
389 sc.SetState(SCE_P_OPERATOR);
390 } else if (sc.ch == '#') {
391 sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);
392 } else if (sc.ch == '@') {
393 sc.SetState(SCE_P_DECORATOR);
394 } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) {
395 unsigned int nextIndex = 0;
396 sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));
397 while (nextIndex > (sc.currentPos + 1) && sc.More()) {
398 sc.Forward();
400 } else if (IsAWordStart(sc.ch)) {
401 sc.SetState(SCE_P_IDENTIFIER);
405 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
406 sc.Complete();
409 static bool IsCommentLine(int line, Accessor &styler) {
410 int pos = styler.LineStart(line);
411 int eol_pos = styler.LineStart(line + 1) - 1;
412 for (int i = pos; i < eol_pos; i++) {
413 char ch = styler[i];
414 if (ch == '#')
415 return true;
416 else if (ch != ' ' && ch != '\t')
417 return false;
419 return false;
422 static bool IsQuoteLine(int line, Accessor &styler) {
423 int style = styler.StyleAt(styler.LineStart(line)) & 31;
424 return ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
428 static void FoldPyDoc(unsigned int startPos, int length, int /*initStyle - unused*/,
429 WordList *[], Accessor &styler) {
430 const int maxPos = startPos + length;
431 const int maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - 1); // Requested last line
432 const int docLines = styler.GetLine(styler.Length()); // Available last line
434 // property fold.quotes.python
435 // This option enables folding multi-line quoted strings when using the Python lexer.
436 const bool foldQuotes = styler.GetPropertyInt("fold.quotes.python") != 0;
438 const bool foldCompact = styler.GetPropertyInt("fold.compact") != 0;
440 // Backtrack to previous non-blank line so we can determine indent level
441 // for any white space lines (needed esp. within triple quoted strings)
442 // and so we can fix any preceding fold level (which is why we go back
443 // at least one line in all cases)
444 int spaceFlags = 0;
445 int lineCurrent = styler.GetLine(startPos);
446 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
447 while (lineCurrent > 0) {
448 lineCurrent--;
449 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
450 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
451 (!IsCommentLine(lineCurrent, styler)) &&
452 (!IsQuoteLine(lineCurrent, styler)))
453 break;
455 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
457 // Set up initial loop state
458 startPos = styler.LineStart(lineCurrent);
459 int prev_state = SCE_P_DEFAULT & 31;
460 if (lineCurrent >= 1)
461 prev_state = styler.StyleAt(startPos - 1) & 31;
462 int prevQuote = foldQuotes && ((prev_state == SCE_P_TRIPLE) || (prev_state == SCE_P_TRIPLEDOUBLE));
464 // Process all characters to end of requested range or end of any triple quote
465 //that hangs over the end of the range. Cap processing in all cases
466 // to end of document (in case of unclosed quote at end).
467 while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevQuote)) {
469 // Gather info
470 int lev = indentCurrent;
471 int lineNext = lineCurrent + 1;
472 int indentNext = indentCurrent;
473 int quote = false;
474 if (lineNext <= docLines) {
475 // Information about next line is only available if not at end of document
476 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
477 int lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - 1 : styler.LineStart(lineNext);
478 int style = styler.StyleAt(lookAtPos) & 31;
479 quote = foldQuotes && ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
481 const int quote_start = (quote && !prevQuote);
482 const int quote_continue = (quote && prevQuote);
483 if (!quote || !prevQuote)
484 indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
485 if (quote)
486 indentNext = indentCurrentLevel;
487 if (indentNext & SC_FOLDLEVELWHITEFLAG)
488 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
490 if (quote_start) {
491 // Place fold point at start of triple quoted string
492 lev |= SC_FOLDLEVELHEADERFLAG;
493 } else if (quote_continue || prevQuote) {
494 // Add level to rest of lines in the string
495 lev = lev + 1;
498 // Skip past any blank lines for next indent level info; we skip also
499 // comments (all comments, not just those starting in column 0)
500 // which effectively folds them into surrounding code rather
501 // than screwing up folding.
503 while (!quote &&
504 (lineNext < docLines) &&
505 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
506 (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
508 lineNext++;
509 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
512 const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
513 const int levelBeforeComments = Maximum(indentCurrentLevel,levelAfterComments);
515 // Now set all the indent levels on the lines we skipped
516 // Do this from end to start. Once we encounter one line
517 // which is indented more than the line after the end of
518 // the comment-block, use the level of the block before
520 int skipLine = lineNext;
521 int skipLevel = levelAfterComments;
523 while (--skipLine > lineCurrent) {
524 int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
526 if (foldCompact) {
527 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
528 skipLevel = levelBeforeComments;
530 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
532 styler.SetLevel(skipLine, skipLevel | whiteFlag);
533 } else {
534 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
535 !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
536 !IsCommentLine(skipLine, styler))
537 skipLevel = levelBeforeComments;
539 styler.SetLevel(skipLine, skipLevel);
543 // Set fold header on non-quote line
544 if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
545 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
546 lev |= SC_FOLDLEVELHEADERFLAG;
549 // Keep track of triple quote state of previous line
550 prevQuote = quote;
552 // Set fold level for this line and move to next line
553 styler.SetLevel(lineCurrent, foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
554 indentCurrent = indentNext;
555 lineCurrent = lineNext;
558 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
559 // header flag set; the loop above is crafted to take care of this case!
560 //styler.SetLevel(lineCurrent, indentCurrent);
563 static const char *const pythonWordListDesc[] = {
564 "Keywords",
565 "Highlighted identifiers",
569 LexerModule lmPython(SCLEX_PYTHON, ColourisePyDoc, "python", FoldPyDoc,
570 pythonWordListDesc);