r5079
[geany-mirror.git] / scintilla / LexCPP.cxx
blob8a9395e179e941e88d1a90316f5e24751e673fa2
1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
14 #include "Platform.h"
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "CharacterSet.h"
24 #ifdef SCI_NAMESPACE
25 using namespace Scintilla;
26 #endif
28 static bool IsSpaceEquiv(int state) {
29 return (state <= SCE_C_COMMENTDOC) ||
30 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
31 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
32 (state == SCE_C_COMMENTDOCKEYWORDERROR);
35 // Preconditions: sc.currentPos points to a character after '+' or '-'.
36 // The test for pos reaching 0 should be redundant,
37 // and is in only for safety measures.
38 // Limitation: this code will give the incorrect answer for code like
39 // a = b+++/ptn/...
40 // Putting a space between the '++' post-inc operator and the '+' binary op
41 // fixes this, and is highly recommended for readability anyway.
42 static bool FollowsPostfixOperator(StyleContext &sc, Accessor &styler) {
43 int pos = (int) sc.currentPos;
44 while (--pos > 0) {
45 char ch = styler[pos];
46 if (ch == '+' || ch == '-') {
47 return styler[pos - 1] == ch;
50 return false;
53 static void ColouriseCppDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
54 Accessor &styler, bool caseSensitive) {
56 WordList &keywords = *keywordlists[0];
57 WordList &keywords2 = *keywordlists[1];
58 WordList &keywords3 = *keywordlists[2];
59 WordList &keywords4 = *keywordlists[3];
61 // property styling.within.preprocessor
62 // For C++ code, determines whether all preprocessor code is styled in the preprocessor style (0, the default)
63 // or only from the initial # to the end of the command word(1).
64 bool stylingWithinPreprocessor = styler.GetPropertyInt("styling.within.preprocessor") != 0;
66 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
67 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
69 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
71 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
72 CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true);
74 // property lexer.cpp.allow.dollars
75 // Set to 0 to disallow the '$' character in identifiers with the cpp lexer.
76 if (styler.GetPropertyInt("lexer.cpp.allow.dollars", 1) != 0) {
77 setWordStart.Add('$');
78 setWord.Add('$');
81 int chPrevNonWhite = ' ';
82 int visibleChars = 0;
83 bool lastWordWasUUID = false;
84 int styleBeforeDCKeyword = SCE_C_DEFAULT;
85 bool continuationLine = false;
86 bool isIncludePreprocessor = false;
88 if (initStyle == SCE_C_PREPROCESSOR) {
89 // Set continuationLine if last character of previous line is '\'
90 int lineCurrent = styler.GetLine(startPos);
91 if (lineCurrent > 0) {
92 int chBack = styler.SafeGetCharAt(startPos-1, 0);
93 int chBack2 = styler.SafeGetCharAt(startPos-2, 0);
94 int lineEndChar = '!';
95 if (chBack2 == '\r' && chBack == '\n') {
96 lineEndChar = styler.SafeGetCharAt(startPos-3, 0);
97 } else if (chBack == '\n' || chBack == '\r') {
98 lineEndChar = chBack2;
100 continuationLine = lineEndChar == '\\';
104 // look back to set chPrevNonWhite properly for better regex colouring
105 if (startPos > 0) {
106 int back = startPos;
107 while (--back && IsSpaceEquiv(styler.StyleAt(back)))
109 if (styler.StyleAt(back) == SCE_C_OPERATOR) {
110 chPrevNonWhite = styler.SafeGetCharAt(back);
114 StyleContext sc(startPos, length, initStyle, styler);
116 for (; sc.More(); sc.Forward()) {
118 if (sc.atLineStart) {
119 if (sc.state == SCE_C_STRING) {
120 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
121 // ends with a line continuation by locking in the state upto this position.
122 sc.SetState(SCE_C_STRING);
124 // Reset states to begining of colourise so no surprises
125 // if different sets of lines lexed.
126 visibleChars = 0;
127 lastWordWasUUID = false;
128 isIncludePreprocessor = false;
131 // Handle line continuation generically.
132 if (sc.ch == '\\') {
133 if (sc.chNext == '\n' || sc.chNext == '\r') {
134 sc.Forward();
135 if (sc.ch == '\r' && sc.chNext == '\n') {
136 sc.Forward();
138 continuationLine = true;
139 continue;
143 // Determine if the current state should terminate.
144 switch (sc.state) {
145 case SCE_C_OPERATOR:
146 sc.SetState(SCE_C_DEFAULT);
147 break;
148 case SCE_C_NUMBER:
149 // We accept almost anything because of hex. and number suffixes
150 if (!setWord.Contains(sc.ch)) {
151 sc.SetState(SCE_C_DEFAULT);
153 break;
154 case SCE_C_IDENTIFIER:
155 if (!setWord.Contains(sc.ch) || (sc.ch == '.')) {
156 char s[1000];
157 if (caseSensitive) {
158 sc.GetCurrent(s, sizeof(s));
159 } else {
160 sc.GetCurrentLowered(s, sizeof(s));
162 if (keywords.InList(s)) {
163 lastWordWasUUID = strcmp(s, "uuid") == 0;
164 sc.ChangeState(SCE_C_WORD);
165 } else if (keywords2.InList(s)) {
166 sc.ChangeState(SCE_C_WORD2);
167 } else if (keywords4.InList(s)) {
168 sc.ChangeState(SCE_C_GLOBALCLASS);
170 sc.SetState(SCE_C_DEFAULT);
172 break;
173 case SCE_C_PREPROCESSOR:
174 if (sc.atLineStart && !continuationLine) {
175 sc.SetState(SCE_C_DEFAULT);
176 } else if (stylingWithinPreprocessor) {
177 if (IsASpace(sc.ch)) {
178 sc.SetState(SCE_C_DEFAULT);
180 } else {
181 if (sc.Match('/', '*') || sc.Match('/', '/')) {
182 sc.SetState(SCE_C_DEFAULT);
185 break;
186 case SCE_C_COMMENT:
187 if (sc.Match('*', '/')) {
188 sc.Forward();
189 sc.ForwardSetState(SCE_C_DEFAULT);
191 break;
192 case SCE_C_COMMENTDOC:
193 if (sc.Match('*', '/')) {
194 sc.Forward();
195 sc.ForwardSetState(SCE_C_DEFAULT);
196 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
197 // Verify that we have the conditions to mark a comment-doc-keyword
198 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
199 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
200 sc.SetState(SCE_C_COMMENTDOCKEYWORD);
203 break;
204 case SCE_C_COMMENTLINE:
205 if (sc.atLineStart) {
206 sc.SetState(SCE_C_DEFAULT);
208 break;
209 case SCE_C_COMMENTLINEDOC:
210 if (sc.atLineStart) {
211 sc.SetState(SCE_C_DEFAULT);
212 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
213 // Verify that we have the conditions to mark a comment-doc-keyword
214 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
215 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
216 sc.SetState(SCE_C_COMMENTDOCKEYWORD);
219 break;
220 case SCE_C_COMMENTDOCKEYWORD:
221 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
222 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
223 sc.Forward();
224 sc.ForwardSetState(SCE_C_DEFAULT);
225 } else if (!setDoxygen.Contains(sc.ch)) {
226 char s[100];
227 if (caseSensitive) {
228 sc.GetCurrent(s, sizeof(s));
229 } else {
230 sc.GetCurrentLowered(s, sizeof(s));
232 if (!IsASpace(sc.ch) || !keywords3.InList(s + 1)) {
233 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
235 sc.SetState(styleBeforeDCKeyword);
237 break;
238 case SCE_C_STRING:
239 if (sc.atLineEnd) {
240 sc.ChangeState(SCE_C_STRINGEOL);
241 } else if (isIncludePreprocessor) {
242 if (sc.ch == '>') {
243 sc.ForwardSetState(SCE_C_DEFAULT);
244 isIncludePreprocessor = false;
246 } else if (sc.ch == '\\') {
247 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
248 sc.Forward();
250 } else if (sc.ch == '\"') {
251 sc.ForwardSetState(SCE_C_DEFAULT);
253 break;
254 case SCE_C_CHARACTER:
255 if (sc.atLineEnd) {
256 sc.ChangeState(SCE_C_STRINGEOL);
257 } else if (sc.ch == '\\') {
258 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
259 sc.Forward();
261 } else if (sc.ch == '\'') {
262 sc.ForwardSetState(SCE_C_DEFAULT);
264 break;
265 case SCE_C_REGEX:
266 if (sc.atLineStart) {
267 sc.SetState(SCE_C_DEFAULT);
268 } else if (sc.ch == '/') {
269 sc.Forward();
270 while ((sc.ch < 0x80) && islower(sc.ch))
271 sc.Forward(); // gobble regex flags
272 sc.SetState(SCE_C_DEFAULT);
273 } else if (sc.ch == '\\') {
274 // Gobble up the quoted character
275 if (sc.chNext == '\\' || sc.chNext == '/') {
276 sc.Forward();
279 break;
280 case SCE_C_STRINGEOL:
281 if (sc.atLineStart) {
282 sc.SetState(SCE_C_DEFAULT);
284 break;
285 case SCE_C_VERBATIM:
286 if (sc.ch == '\"') {
287 if (sc.chNext == '\"') {
288 sc.Forward();
289 } else {
290 sc.ForwardSetState(SCE_C_DEFAULT);
293 break;
294 case SCE_C_UUID:
295 if (sc.ch == '\r' || sc.ch == '\n' || sc.ch == ')') {
296 sc.SetState(SCE_C_DEFAULT);
300 // Determine if a new state should be entered.
301 if (sc.state == SCE_C_DEFAULT) {
302 if (sc.Match('@', '\"')) {
303 sc.SetState(SCE_C_VERBATIM);
304 sc.Forward();
305 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
306 if (lastWordWasUUID) {
307 sc.SetState(SCE_C_UUID);
308 lastWordWasUUID = false;
309 } else {
310 sc.SetState(SCE_C_NUMBER);
312 } else if (setWordStart.Contains(sc.ch) || (sc.ch == '@')) {
313 if (lastWordWasUUID) {
314 sc.SetState(SCE_C_UUID);
315 lastWordWasUUID = false;
316 } else {
317 sc.SetState(SCE_C_IDENTIFIER);
319 } else if (sc.Match('/', '*')) {
320 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
321 sc.SetState(SCE_C_COMMENTDOC);
322 } else {
323 sc.SetState(SCE_C_COMMENT);
325 sc.Forward(); // Eat the * so it isn't used for the end of the comment
326 } else if (sc.Match('/', '/')) {
327 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
328 // Support of Qt/Doxygen doc. style
329 sc.SetState(SCE_C_COMMENTLINEDOC);
330 else
331 sc.SetState(SCE_C_COMMENTLINE);
332 } else if (sc.ch == '/' && setOKBeforeRE.Contains(chPrevNonWhite) &&
333 (!setCouldBePostOp.Contains(chPrevNonWhite) || !FollowsPostfixOperator(sc, styler))) {
334 sc.SetState(SCE_C_REGEX); // JavaScript's RegEx
335 } else if (sc.ch == '\"') {
336 sc.SetState(SCE_C_STRING);
337 isIncludePreprocessor = false; // ensure that '>' won't end the string
338 } else if (isIncludePreprocessor && sc.ch == '<') {
339 sc.SetState(SCE_C_STRING);
340 } else if (sc.ch == '\'') {
341 sc.SetState(SCE_C_CHARACTER);
342 } else if (sc.ch == '#' && visibleChars == 0) {
343 // Preprocessor commands are alone on their line
344 sc.SetState(SCE_C_PREPROCESSOR);
345 // Skip whitespace between # and preprocessor word
346 do {
347 sc.Forward();
348 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
349 if (sc.atLineEnd) {
350 sc.SetState(SCE_C_DEFAULT);
351 } else if (sc.Match("include")) {
352 isIncludePreprocessor = true;
354 } else if (isoperator(static_cast<char>(sc.ch))) {
355 sc.SetState(SCE_C_OPERATOR);
359 if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) {
360 chPrevNonWhite = sc.ch;
361 visibleChars++;
363 continuationLine = false;
365 sc.Complete();
368 static bool IsStreamCommentStyle(int style) {
369 return style == SCE_C_COMMENT ||
370 style == SCE_C_COMMENTDOC ||
371 style == SCE_C_COMMENTDOCKEYWORD ||
372 style == SCE_C_COMMENTDOCKEYWORDERROR;
375 // Store both the current line's fold level and the next lines in the
376 // level store to make it easy to pick up with each increment
377 // and to make it possible to fiddle the current level for "} else {".
378 static void FoldCppDoc(unsigned int startPos, int length, int initStyle,
379 WordList *[], Accessor &styler) {
381 // property fold.comment
382 // This option enables folding multi-line comments and explicit fold points when using the C++ lexer.
383 // Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //}
384 // at the end of a section that should fold.
385 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
387 // property fold.preprocessor
388 // This option enables folding preprocessor directives when using the C++ lexer.
389 // Includes C#'s explicit #region and #endregion folding directives.
390 bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0;
392 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
394 // property fold.at.else
395 // This option enables C++ folding on a "} else {" line of an if statement.
396 bool foldAtElse = styler.GetPropertyInt("fold.at.else", 0) != 0;
398 unsigned int endPos = startPos + length;
399 int visibleChars = 0;
400 int lineCurrent = styler.GetLine(startPos);
401 int levelCurrent = SC_FOLDLEVELBASE;
402 if (lineCurrent > 0)
403 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
404 int levelMinCurrent = levelCurrent;
405 int levelNext = levelCurrent;
406 char chNext = styler[startPos];
407 int styleNext = styler.StyleAt(startPos);
408 int style = initStyle;
409 for (unsigned int i = startPos; i < endPos; i++) {
410 char ch = chNext;
411 chNext = styler.SafeGetCharAt(i + 1);
412 int stylePrev = style;
413 style = styleNext;
414 styleNext = styler.StyleAt(i + 1);
415 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
416 if (foldComment && IsStreamCommentStyle(style)) {
417 if (!IsStreamCommentStyle(stylePrev) && (stylePrev != SCE_C_COMMENTLINEDOC)) {
418 levelNext++;
419 } else if (!IsStreamCommentStyle(styleNext) && (styleNext != SCE_C_COMMENTLINEDOC) && !atEOL) {
420 // Comments don't end at end of line and the next character may be unstyled.
421 levelNext--;
424 if (foldComment && (style == SCE_C_COMMENTLINE)) {
425 if ((ch == '/') && (chNext == '/')) {
426 char chNext2 = styler.SafeGetCharAt(i + 2);
427 if (chNext2 == '{') {
428 levelNext++;
429 } else if (chNext2 == '}') {
430 levelNext--;
434 if (foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
435 if (ch == '#') {
436 unsigned int j = i + 1;
437 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
438 j++;
440 if (styler.Match(j, "region") || styler.Match(j, "if")) {
441 levelNext++;
442 } else if (styler.Match(j, "end")) {
443 levelNext--;
447 if (style == SCE_C_OPERATOR) {
448 if (ch == '{') {
449 // Measure the minimum before a '{' to allow
450 // folding on "} else {"
451 if (levelMinCurrent > levelNext) {
452 levelMinCurrent = levelNext;
454 levelNext++;
455 } else if (ch == '}') {
456 levelNext--;
459 if (!IsASpace(ch))
460 visibleChars++;
461 if (atEOL || (i == endPos-1)) {
462 int levelUse = levelCurrent;
463 if (foldAtElse) {
464 levelUse = levelMinCurrent;
466 int lev = levelUse | levelNext << 16;
467 if (visibleChars == 0 && foldCompact)
468 lev |= SC_FOLDLEVELWHITEFLAG;
469 if (levelUse < levelNext)
470 lev |= SC_FOLDLEVELHEADERFLAG;
471 if (lev != styler.LevelAt(lineCurrent)) {
472 styler.SetLevel(lineCurrent, lev);
474 lineCurrent++;
475 levelCurrent = levelNext;
476 levelMinCurrent = levelCurrent;
477 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
478 // There is an empty line at end of file so give it same level and empty
479 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
481 visibleChars = 0;
486 static const char *const cppWordLists[] = {
487 "Primary keywords and identifiers",
488 "Secondary keywords and identifiers",
489 "Documentation comment keywords",
490 "Unused",
491 "Global classes and typedefs",
495 static void ColouriseCppDocSensitive(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
496 Accessor &styler) {
497 ColouriseCppDoc(startPos, length, initStyle, keywordlists, styler, true);
500 static void ColouriseCppDocInsensitive(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
501 Accessor &styler) {
502 ColouriseCppDoc(startPos, length, initStyle, keywordlists, styler, false);
505 LexerModule lmCPP(SCLEX_CPP, ColouriseCppDocSensitive, "cpp", FoldCppDoc, cppWordLists);
506 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, ColouriseCppDocInsensitive, "cppnocase", FoldCppDoc, cppWordLists);