Upgraded to scintilla 3.2.3
[TortoiseGit.git] / ext / scintilla / lexers / LexCPP.cxx
blob0d7ea455357db4ebf513f3ff4bf328c768d34da7
1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
16 #include <string>
17 #include <vector>
18 #include <map>
19 #include <algorithm>
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 static bool IsSpaceEquiv(int state) {
39 return (state <= SCE_C_COMMENTDOC) ||
40 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
41 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
42 (state == SCE_C_COMMENTDOCKEYWORDERROR);
45 // Preconditions: sc.currentPos points to a character after '+' or '-'.
46 // The test for pos reaching 0 should be redundant,
47 // and is in only for safety measures.
48 // Limitation: this code will give the incorrect answer for code like
49 // a = b+++/ptn/...
50 // Putting a space between the '++' post-inc operator and the '+' binary op
51 // fixes this, and is highly recommended for readability anyway.
52 static bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
53 int pos = (int) sc.currentPos;
54 while (--pos > 0) {
55 char ch = styler[pos];
56 if (ch == '+' || ch == '-') {
57 return styler[pos - 1] == ch;
60 return false;
63 static bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
64 // Don't look at styles, so no need to flush.
65 int pos = (int) sc.currentPos;
66 int currentLine = styler.GetLine(pos);
67 int lineStartPos = styler.LineStart(currentLine);
68 char ch;
69 while (--pos > lineStartPos) {
70 ch = styler.SafeGetCharAt(pos);
71 if (ch != ' ' && ch != '\t') {
72 break;
75 const char *retBack = "nruter";
76 const char *s = retBack;
77 while (*s
78 && pos >= lineStartPos
79 && styler.SafeGetCharAt(pos) == *s) {
80 s++;
81 pos--;
83 return !*s;
86 static std::string GetRestOfLine(LexAccessor &styler, int start, bool allowSpace) {
87 std::string restOfLine;
88 int i =0;
89 char ch = styler.SafeGetCharAt(start, '\n');
90 while ((ch != '\r') && (ch != '\n')) {
91 if (allowSpace || (ch != ' '))
92 restOfLine += ch;
93 i++;
94 ch = styler.SafeGetCharAt(start + i, '\n');
96 return restOfLine;
99 static bool IsStreamCommentStyle(int style) {
100 return style == SCE_C_COMMENT ||
101 style == SCE_C_COMMENTDOC ||
102 style == SCE_C_COMMENTDOCKEYWORD ||
103 style == SCE_C_COMMENTDOCKEYWORDERROR;
106 static std::vector<std::string> Tokenize(const std::string &s) {
107 // Break into space separated tokens
108 std::string word;
109 std::vector<std::string> tokens;
110 for (const char *cp = s.c_str(); *cp; cp++) {
111 if ((*cp == ' ') || (*cp == '\t')) {
112 if (!word.empty()) {
113 tokens.push_back(word);
114 word = "";
116 } else {
117 word += *cp;
120 if (!word.empty()) {
121 tokens.push_back(word);
123 return tokens;
126 struct PPDefinition {
127 int line;
128 std::string key;
129 std::string value;
130 PPDefinition(int line_, const std::string &key_, const std::string &value_) :
131 line(line_), key(key_), value(value_) {
135 class LinePPState {
136 int state;
137 int ifTaken;
138 int level;
139 bool ValidLevel() const {
140 return level >= 0 && level < 32;
142 int maskLevel() const {
143 return 1 << level;
145 public:
146 LinePPState() : state(0), ifTaken(0), level(-1) {
148 bool IsInactive() const {
149 return state != 0;
151 bool CurrentIfTaken() {
152 return (ifTaken & maskLevel()) != 0;
154 void StartSection(bool on) {
155 level++;
156 if (ValidLevel()) {
157 if (on) {
158 state &= ~maskLevel();
159 ifTaken |= maskLevel();
160 } else {
161 state |= maskLevel();
162 ifTaken &= ~maskLevel();
166 void EndSection() {
167 if (ValidLevel()) {
168 state &= ~maskLevel();
169 ifTaken &= ~maskLevel();
171 level--;
173 void InvertCurrentLevel() {
174 if (ValidLevel()) {
175 state ^= maskLevel();
176 ifTaken |= maskLevel();
181 // Hold the preprocessor state for each line seen.
182 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
183 class PPStates {
184 std::vector<LinePPState> vlls;
185 public:
186 LinePPState ForLine(int line) {
187 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
188 return vlls[line];
189 } else {
190 return LinePPState();
193 void Add(int line, LinePPState lls) {
194 vlls.resize(line+1);
195 vlls[line] = lls;
199 // An individual named option for use in an OptionSet
201 // Options used for LexerCPP
202 struct OptionsCPP {
203 bool stylingWithinPreprocessor;
204 bool identifiersAllowDollars;
205 bool trackPreprocessor;
206 bool updatePreprocessor;
207 bool triplequotedStrings;
208 bool hashquotedStrings;
209 bool fold;
210 bool foldSyntaxBased;
211 bool foldComment;
212 bool foldCommentMultiline;
213 bool foldCommentExplicit;
214 std::string foldExplicitStart;
215 std::string foldExplicitEnd;
216 bool foldExplicitAnywhere;
217 bool foldPreprocessor;
218 bool foldCompact;
219 bool foldAtElse;
220 OptionsCPP() {
221 stylingWithinPreprocessor = false;
222 identifiersAllowDollars = true;
223 trackPreprocessor = true;
224 updatePreprocessor = true;
225 triplequotedStrings = false;
226 hashquotedStrings = false;
227 fold = false;
228 foldSyntaxBased = true;
229 foldComment = false;
230 foldCommentMultiline = true;
231 foldCommentExplicit = true;
232 foldExplicitStart = "";
233 foldExplicitEnd = "";
234 foldExplicitAnywhere = false;
235 foldPreprocessor = false;
236 foldCompact = false;
237 foldAtElse = false;
241 static const char *const cppWordLists[] = {
242 "Primary keywords and identifiers",
243 "Secondary keywords and identifiers",
244 "Documentation comment keywords",
245 "Global classes and typedefs",
246 "Preprocessor definitions",
250 struct OptionSetCPP : public OptionSet<OptionsCPP> {
251 OptionSetCPP() {
252 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
253 "For C++ code, determines whether all preprocessor code is styled in the "
254 "preprocessor style (0, the default) or only from the initial # to the end "
255 "of the command word(1).");
257 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
258 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
260 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
261 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
263 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
264 "Set to 1 to update preprocessor definitions when #define found.");
266 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
267 "Set to 1 to enable highlighting of triple-quoted strings.");
269 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
270 "Set to 1 to enable highlighting of hash-quoted strings.");
272 DefineProperty("fold", &OptionsCPP::fold);
274 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
275 "Set this property to 0 to disable syntax based folding.");
277 DefineProperty("fold.comment", &OptionsCPP::foldComment,
278 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
279 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
280 "at the end of a section that should fold.");
282 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
283 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
285 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
286 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
288 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
289 "The string to use for explicit fold start points, replacing the standard //{.");
291 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
292 "The string to use for explicit fold end points, replacing the standard //}.");
294 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
295 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
297 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
298 "This option enables folding preprocessor directives when using the C++ lexer. "
299 "Includes C#'s explicit #region and #endregion folding directives.");
301 DefineProperty("fold.compact", &OptionsCPP::foldCompact);
303 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
304 "This option enables C++ folding on a \"} else {\" line of an if statement.");
306 DefineWordListSets(cppWordLists);
310 class LexerCPP : public ILexer {
311 bool caseSensitive;
312 CharacterSet setWord;
313 CharacterSet setNegationOp;
314 CharacterSet setArithmethicOp;
315 CharacterSet setRelOp;
316 CharacterSet setLogicalOp;
317 PPStates vlls;
318 std::vector<PPDefinition> ppDefineHistory;
319 WordList keywords;
320 WordList keywords2;
321 WordList keywords3;
322 WordList keywords4;
323 WordList ppDefinitions;
324 std::map<std::string, std::string> preprocessorDefinitionsStart;
325 OptionsCPP options;
326 OptionSetCPP osCPP;
327 SparseState<std::string> rawStringTerminators;
328 enum { activeFlag = 0x40 };
329 public:
330 LexerCPP(bool caseSensitive_) :
331 caseSensitive(caseSensitive_),
332 setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
333 setNegationOp(CharacterSet::setNone, "!"),
334 setArithmethicOp(CharacterSet::setNone, "+-/*%"),
335 setRelOp(CharacterSet::setNone, "=!<>"),
336 setLogicalOp(CharacterSet::setNone, "|&") {
338 virtual ~LexerCPP() {
340 void SCI_METHOD Release() {
341 delete this;
343 int SCI_METHOD Version() const {
344 return lvOriginal;
346 const char * SCI_METHOD PropertyNames() {
347 return osCPP.PropertyNames();
349 int SCI_METHOD PropertyType(const char *name) {
350 return osCPP.PropertyType(name);
352 const char * SCI_METHOD DescribeProperty(const char *name) {
353 return osCPP.DescribeProperty(name);
355 int SCI_METHOD PropertySet(const char *key, const char *val);
356 const char * SCI_METHOD DescribeWordListSets() {
357 return osCPP.DescribeWordListSets();
359 int SCI_METHOD WordListSet(int n, const char *wl);
360 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
361 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
363 void * SCI_METHOD PrivateCall(int, void *) {
364 return 0;
367 static ILexer *LexerFactoryCPP() {
368 return new LexerCPP(true);
370 static ILexer *LexerFactoryCPPInsensitive() {
371 return new LexerCPP(false);
373 static int MaskActive(int style) {
374 return style & ~activeFlag;
376 void EvaluateTokens(std::vector<std::string> &tokens);
377 bool EvaluateExpression(const std::string &expr, const std::map<std::string, std::string> &preprocessorDefinitions);
380 int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
381 if (osCPP.PropertySet(&options, key, val)) {
382 if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
383 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
384 if (options.identifiersAllowDollars) {
385 setWord.Add('$');
388 return 0;
390 return -1;
393 int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
394 WordList *wordListN = 0;
395 switch (n) {
396 case 0:
397 wordListN = &keywords;
398 break;
399 case 1:
400 wordListN = &keywords2;
401 break;
402 case 2:
403 wordListN = &keywords3;
404 break;
405 case 3:
406 wordListN = &keywords4;
407 break;
408 case 4:
409 wordListN = &ppDefinitions;
410 break;
412 int firstModification = -1;
413 if (wordListN) {
414 WordList wlNew;
415 wlNew.Set(wl);
416 if (*wordListN != wlNew) {
417 wordListN->Set(wl);
418 firstModification = 0;
419 if (n == 4) {
420 // Rebuild preprocessorDefinitions
421 preprocessorDefinitionsStart.clear();
422 for (int nDefinition = 0; nDefinition < ppDefinitions.len; nDefinition++) {
423 char *cpDefinition = ppDefinitions.words[nDefinition];
424 char *cpEquals = strchr(cpDefinition, '=');
425 if (cpEquals) {
426 std::string name(cpDefinition, cpEquals - cpDefinition);
427 std::string val(cpEquals+1);
428 preprocessorDefinitionsStart[name] = val;
429 } else {
430 std::string name(cpDefinition);
431 std::string val("1");
432 preprocessorDefinitionsStart[name] = val;
438 return firstModification;
441 // Functor used to truncate history
442 struct After {
443 int line;
444 After(int line_) : line(line_) {}
445 bool operator()(PPDefinition &p) const {
446 return p.line > line;
450 void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
451 LexAccessor styler(pAccess);
453 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
454 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
456 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
458 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
460 if (options.identifiersAllowDollars) {
461 setWordStart.Add('$');
464 int chPrevNonWhite = ' ';
465 int visibleChars = 0;
466 bool lastWordWasUUID = false;
467 int styleBeforeDCKeyword = SCE_C_DEFAULT;
468 bool continuationLine = false;
469 bool isIncludePreprocessor = false;
470 bool isStringInPreprocessor = false;
472 int lineCurrent = styler.GetLine(startPos);
473 if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
474 (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
475 (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
476 // Set continuationLine if last character of previous line is '\'
477 if (lineCurrent > 0) {
478 int chBack = styler.SafeGetCharAt(startPos-1, 0);
479 int chBack2 = styler.SafeGetCharAt(startPos-2, 0);
480 int lineEndChar = '!';
481 if (chBack2 == '\r' && chBack == '\n') {
482 lineEndChar = styler.SafeGetCharAt(startPos-3, 0);
483 } else if (chBack == '\n' || chBack == '\r') {
484 lineEndChar = chBack2;
486 continuationLine = lineEndChar == '\\';
490 // look back to set chPrevNonWhite properly for better regex colouring
491 if (startPos > 0) {
492 int back = startPos;
493 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
495 if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
496 chPrevNonWhite = styler.SafeGetCharAt(back);
500 StyleContext sc(startPos, length, initStyle, styler, 0x7f);
501 LinePPState preproc = vlls.ForLine(lineCurrent);
503 bool definitionsChanged = false;
505 // Truncate ppDefineHistory before current line
507 if (!options.updatePreprocessor)
508 ppDefineHistory.clear();
510 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
511 if (itInvalid != ppDefineHistory.end()) {
512 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
513 definitionsChanged = true;
516 std::map<std::string, std::string> preprocessorDefinitions = preprocessorDefinitionsStart;
517 for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
518 preprocessorDefinitions[itDef->key] = itDef->value;
521 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
522 SparseState<std::string> rawSTNew(lineCurrent);
524 int activitySet = preproc.IsInactive() ? activeFlag : 0;
526 for (; sc.More();) {
528 if (sc.atLineStart) {
529 // Using MaskActive() is not needed in the following statement.
530 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
531 if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
532 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
533 // ends with a line continuation by locking in the state upto this position.
534 sc.SetState(sc.state);
536 if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
537 sc.SetState(SCE_C_DEFAULT|activitySet);
539 // Reset states to begining of colourise so no surprises
540 // if different sets of lines lexed.
541 visibleChars = 0;
542 lastWordWasUUID = false;
543 isIncludePreprocessor = false;
544 if (preproc.IsInactive()) {
545 activitySet = activeFlag;
546 sc.SetState(sc.state | activitySet);
550 if (sc.atLineEnd) {
551 lineCurrent++;
552 vlls.Add(lineCurrent, preproc);
553 if (rawStringTerminator != "") {
554 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
558 // Handle line continuation generically.
559 if (sc.ch == '\\') {
560 if (sc.chNext == '\n' || sc.chNext == '\r') {
561 lineCurrent++;
562 vlls.Add(lineCurrent, preproc);
563 sc.Forward();
564 if (sc.ch == '\r' && sc.chNext == '\n') {
565 sc.Forward();
567 continuationLine = true;
568 sc.Forward();
569 continue;
573 const bool atLineEndBeforeSwitch = sc.atLineEnd;
575 // Determine if the current state should terminate.
576 switch (MaskActive(sc.state)) {
577 case SCE_C_OPERATOR:
578 sc.SetState(SCE_C_DEFAULT|activitySet);
579 break;
580 case SCE_C_NUMBER:
581 // We accept almost anything because of hex. and number suffixes
582 if (!(setWord.Contains(sc.ch)
583 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
584 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
585 sc.SetState(SCE_C_DEFAULT|activitySet);
587 break;
588 case SCE_C_IDENTIFIER:
589 if (!setWord.Contains(sc.ch) || (sc.ch == '.')) {
590 char s[1000];
591 if (caseSensitive) {
592 sc.GetCurrent(s, sizeof(s));
593 } else {
594 sc.GetCurrentLowered(s, sizeof(s));
596 if (keywords.InList(s)) {
597 lastWordWasUUID = strcmp(s, "uuid") == 0;
598 sc.ChangeState(SCE_C_WORD|activitySet);
599 } else if (keywords2.InList(s)) {
600 sc.ChangeState(SCE_C_WORD2|activitySet);
601 } else if (keywords4.InList(s)) {
602 sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
604 const bool literalString = sc.ch == '\"';
605 if (literalString || sc.ch == '\'') {
606 size_t lenS = strlen(s);
607 const bool raw = literalString && sc.chPrev == 'R';
608 if (raw)
609 s[lenS--] = '\0';
610 bool valid =
611 (lenS == 0) ||
612 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
613 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
614 if (valid) {
615 if (literalString)
616 sc.ChangeState((raw ? SCE_C_STRINGRAW : SCE_C_STRING)|activitySet);
617 else
618 sc.ChangeState(SCE_C_CHARACTER|activitySet);
621 sc.SetState(SCE_C_DEFAULT|activitySet);
623 break;
624 case SCE_C_PREPROCESSOR:
625 if (options.stylingWithinPreprocessor) {
626 if (IsASpace(sc.ch)) {
627 sc.SetState(SCE_C_DEFAULT|activitySet);
629 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"'))) {
630 isStringInPreprocessor = false;
631 } else if (!isStringInPreprocessor) {
632 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
633 isStringInPreprocessor = true;
634 } else if (sc.Match('/', '*')) {
635 sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
636 sc.Forward(); // Eat the *
637 } else if (sc.Match('/', '/')) {
638 sc.SetState(SCE_C_DEFAULT|activitySet);
641 break;
642 case SCE_C_PREPROCESSORCOMMENT:
643 if (sc.Match('*', '/')) {
644 sc.Forward();
645 sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
646 continue; // Without advancing in case of '\'.
648 break;
649 case SCE_C_COMMENT:
650 if (sc.Match('*', '/')) {
651 sc.Forward();
652 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
654 break;
655 case SCE_C_COMMENTDOC:
656 if (sc.Match('*', '/')) {
657 sc.Forward();
658 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
659 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
660 // Verify that we have the conditions to mark a comment-doc-keyword
661 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
662 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
663 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
666 break;
667 case SCE_C_COMMENTLINE:
668 if (sc.atLineStart && !continuationLine) {
669 sc.SetState(SCE_C_DEFAULT|activitySet);
671 break;
672 case SCE_C_COMMENTLINEDOC:
673 if (sc.atLineStart && !continuationLine) {
674 sc.SetState(SCE_C_DEFAULT|activitySet);
675 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
676 // Verify that we have the conditions to mark a comment-doc-keyword
677 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
678 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
679 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
682 break;
683 case SCE_C_COMMENTDOCKEYWORD:
684 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
685 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
686 sc.Forward();
687 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
688 } else if (!setDoxygen.Contains(sc.ch)) {
689 char s[100];
690 if (caseSensitive) {
691 sc.GetCurrent(s, sizeof(s));
692 } else {
693 sc.GetCurrentLowered(s, sizeof(s));
695 if (!IsASpace(sc.ch) || !keywords3.InList(s + 1)) {
696 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
698 sc.SetState(styleBeforeDCKeyword|activitySet);
700 break;
701 case SCE_C_STRING:
702 if (sc.atLineEnd) {
703 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
704 } else if (isIncludePreprocessor) {
705 if (sc.ch == '>') {
706 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
707 isIncludePreprocessor = false;
709 } else if (sc.ch == '\\') {
710 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
711 sc.Forward();
713 } else if (sc.ch == '\"') {
714 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
716 break;
717 case SCE_C_HASHQUOTEDSTRING:
718 if (sc.ch == '\\') {
719 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
720 sc.Forward();
722 } else if (sc.ch == '\"') {
723 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
725 break;
726 case SCE_C_STRINGRAW:
727 if (sc.Match(rawStringTerminator.c_str())) {
728 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
729 sc.Forward();
730 sc.SetState(SCE_C_DEFAULT|activitySet);
731 rawStringTerminator = "";
733 break;
734 case SCE_C_CHARACTER:
735 if (sc.atLineEnd) {
736 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
737 } else if (sc.ch == '\\') {
738 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
739 sc.Forward();
741 } else if (sc.ch == '\'') {
742 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
744 break;
745 case SCE_C_REGEX:
746 if (sc.atLineStart) {
747 sc.SetState(SCE_C_DEFAULT|activitySet);
748 } else if (sc.ch == '/') {
749 sc.Forward();
750 while ((sc.ch < 0x80) && islower(sc.ch))
751 sc.Forward(); // gobble regex flags
752 sc.SetState(SCE_C_DEFAULT|activitySet);
753 } else if (sc.ch == '\\') {
754 // Gobble up the quoted character
755 if (sc.chNext == '\\' || sc.chNext == '/') {
756 sc.Forward();
759 break;
760 case SCE_C_STRINGEOL:
761 if (sc.atLineStart) {
762 sc.SetState(SCE_C_DEFAULT|activitySet);
764 break;
765 case SCE_C_VERBATIM:
766 if (sc.ch == '\"') {
767 if (sc.chNext == '\"') {
768 sc.Forward();
769 } else {
770 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
773 break;
774 case SCE_C_TRIPLEVERBATIM:
775 if (sc.Match("\"\"\"")) {
776 while (sc.Match('"')) {
777 sc.Forward();
779 sc.SetState(SCE_C_DEFAULT|activitySet);
781 break;
782 case SCE_C_UUID:
783 if (sc.ch == '\r' || sc.ch == '\n' || sc.ch == ')') {
784 sc.SetState(SCE_C_DEFAULT|activitySet);
788 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
789 // State exit processing consumed characters up to end of line.
790 lineCurrent++;
791 vlls.Add(lineCurrent, preproc);
794 // Determine if a new state should be entered.
795 if (MaskActive(sc.state) == SCE_C_DEFAULT) {
796 if (sc.Match('@', '\"')) {
797 sc.SetState(SCE_C_VERBATIM|activitySet);
798 sc.Forward();
799 } else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
800 sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
801 sc.Forward(2);
802 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
803 sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
804 sc.Forward();
805 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
806 if (lastWordWasUUID) {
807 sc.SetState(SCE_C_UUID|activitySet);
808 lastWordWasUUID = false;
809 } else {
810 sc.SetState(SCE_C_NUMBER|activitySet);
812 } else if (setWordStart.Contains(sc.ch) || (sc.ch == '@')) {
813 if (lastWordWasUUID) {
814 sc.SetState(SCE_C_UUID|activitySet);
815 lastWordWasUUID = false;
816 } else {
817 sc.SetState(SCE_C_IDENTIFIER|activitySet);
819 } else if (sc.Match('/', '*')) {
820 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
821 sc.SetState(SCE_C_COMMENTDOC|activitySet);
822 } else {
823 sc.SetState(SCE_C_COMMENT|activitySet);
825 sc.Forward(); // Eat the * so it isn't used for the end of the comment
826 } else if (sc.Match('/', '/')) {
827 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
828 // Support of Qt/Doxygen doc. style
829 sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
830 else
831 sc.SetState(SCE_C_COMMENTLINE|activitySet);
832 } else if (sc.ch == '/'
833 && (setOKBeforeRE.Contains(chPrevNonWhite)
834 || followsReturnKeyword(sc, styler))
835 && (!setCouldBePostOp.Contains(chPrevNonWhite)
836 || !FollowsPostfixOperator(sc, styler))) {
837 sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
838 } else if (sc.ch == '\"') {
839 if (sc.chPrev == 'R') {
840 styler.Flush();
841 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
842 sc.SetState(SCE_C_STRINGRAW|activitySet);
843 rawStringTerminator = ")";
844 for (int termPos = sc.currentPos + 1;; termPos++) {
845 char chTerminator = styler.SafeGetCharAt(termPos, '(');
846 if (chTerminator == '(')
847 break;
848 rawStringTerminator += chTerminator;
850 rawStringTerminator += '\"';
851 } else {
852 sc.SetState(SCE_C_STRING|activitySet);
854 } else {
855 sc.SetState(SCE_C_STRING|activitySet);
857 isIncludePreprocessor = false; // ensure that '>' won't end the string
858 } else if (isIncludePreprocessor && sc.ch == '<') {
859 sc.SetState(SCE_C_STRING|activitySet);
860 } else if (sc.ch == '\'') {
861 sc.SetState(SCE_C_CHARACTER|activitySet);
862 } else if (sc.ch == '#' && visibleChars == 0) {
863 // Preprocessor commands are alone on their line
864 sc.SetState(SCE_C_PREPROCESSOR|activitySet);
865 // Skip whitespace between # and preprocessor word
866 do {
867 sc.Forward();
868 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
869 if (sc.atLineEnd) {
870 sc.SetState(SCE_C_DEFAULT|activitySet);
871 } else if (sc.Match("include")) {
872 isIncludePreprocessor = true;
873 } else {
874 if (options.trackPreprocessor) {
875 if (sc.Match("ifdef") || sc.Match("ifndef")) {
876 bool isIfDef = sc.Match("ifdef");
877 int i = isIfDef ? 5 : 6;
878 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
879 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
880 preproc.StartSection(isIfDef == foundDef);
881 } else if (sc.Match("if")) {
882 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
883 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
884 preproc.StartSection(ifGood);
885 } else if (sc.Match("else")) {
886 if (!preproc.CurrentIfTaken()) {
887 preproc.InvertCurrentLevel();
888 activitySet = preproc.IsInactive() ? activeFlag : 0;
889 if (!activitySet)
890 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
891 } else if (!preproc.IsInactive()) {
892 preproc.InvertCurrentLevel();
893 activitySet = preproc.IsInactive() ? activeFlag : 0;
894 if (!activitySet)
895 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
897 } else if (sc.Match("elif")) {
898 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
899 if (!preproc.CurrentIfTaken()) {
900 // Similar to #if
901 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
902 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
903 if (ifGood) {
904 preproc.InvertCurrentLevel();
905 activitySet = preproc.IsInactive() ? activeFlag : 0;
906 if (!activitySet)
907 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
909 } else if (!preproc.IsInactive()) {
910 preproc.InvertCurrentLevel();
911 activitySet = preproc.IsInactive() ? activeFlag : 0;
912 if (!activitySet)
913 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
915 } else if (sc.Match("endif")) {
916 preproc.EndSection();
917 activitySet = preproc.IsInactive() ? activeFlag : 0;
918 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
919 } else if (sc.Match("define")) {
920 if (options.updatePreprocessor && !preproc.IsInactive()) {
921 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
922 if (restOfLine.find(")") == std::string::npos) { // Don't handle macros with arguments
923 std::vector<std::string> tokens = Tokenize(restOfLine);
924 std::string key;
925 std::string value("1");
926 if (tokens.size() >= 1) {
927 key = tokens[0];
928 if (tokens.size() >= 2) {
929 value = tokens[1];
931 preprocessorDefinitions[key] = value;
932 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
933 definitionsChanged = true;
940 } else if (isoperator(static_cast<char>(sc.ch))) {
941 sc.SetState(SCE_C_OPERATOR|activitySet);
945 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
946 chPrevNonWhite = sc.ch;
947 visibleChars++;
949 continuationLine = false;
950 sc.Forward();
952 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
953 if (definitionsChanged || rawStringsChanged)
954 styler.ChangeLexerState(startPos, startPos + length);
955 sc.Complete();
958 // Store both the current line's fold level and the next lines in the
959 // level store to make it easy to pick up with each increment
960 // and to make it possible to fiddle the current level for "} else {".
962 void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
964 if (!options.fold)
965 return;
967 LexAccessor styler(pAccess);
969 unsigned int endPos = startPos + length;
970 int visibleChars = 0;
971 int lineCurrent = styler.GetLine(startPos);
972 int levelCurrent = SC_FOLDLEVELBASE;
973 if (lineCurrent > 0)
974 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
975 int levelMinCurrent = levelCurrent;
976 int levelNext = levelCurrent;
977 char chNext = styler[startPos];
978 int styleNext = MaskActive(styler.StyleAt(startPos));
979 int style = MaskActive(initStyle);
980 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
981 for (unsigned int i = startPos; i < endPos; i++) {
982 char ch = chNext;
983 chNext = styler.SafeGetCharAt(i + 1);
984 int stylePrev = style;
985 style = styleNext;
986 styleNext = MaskActive(styler.StyleAt(i + 1));
987 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
988 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style)) {
989 if (!IsStreamCommentStyle(stylePrev) && (stylePrev != SCE_C_COMMENTLINEDOC)) {
990 levelNext++;
991 } else if (!IsStreamCommentStyle(styleNext) && (styleNext != SCE_C_COMMENTLINEDOC) && !atEOL) {
992 // Comments don't end at end of line and the next character may be unstyled.
993 levelNext--;
996 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
997 if (userDefinedFoldMarkers) {
998 if (styler.Match(i, options.foldExplicitStart.c_str())) {
999 levelNext++;
1000 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1001 levelNext--;
1003 } else {
1004 if ((ch == '/') && (chNext == '/')) {
1005 char chNext2 = styler.SafeGetCharAt(i + 2);
1006 if (chNext2 == '{') {
1007 levelNext++;
1008 } else if (chNext2 == '}') {
1009 levelNext--;
1014 if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1015 if (ch == '#') {
1016 unsigned int j = i + 1;
1017 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1018 j++;
1020 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1021 levelNext++;
1022 } else if (styler.Match(j, "end")) {
1023 levelNext--;
1027 if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1028 if (ch == '{') {
1029 // Measure the minimum before a '{' to allow
1030 // folding on "} else {"
1031 if (levelMinCurrent > levelNext) {
1032 levelMinCurrent = levelNext;
1034 levelNext++;
1035 } else if (ch == '}') {
1036 levelNext--;
1039 if (!IsASpace(ch))
1040 visibleChars++;
1041 if (atEOL || (i == endPos-1)) {
1042 int levelUse = levelCurrent;
1043 if (options.foldSyntaxBased && options.foldAtElse) {
1044 levelUse = levelMinCurrent;
1046 int lev = levelUse | levelNext << 16;
1047 if (visibleChars == 0 && options.foldCompact)
1048 lev |= SC_FOLDLEVELWHITEFLAG;
1049 if (levelUse < levelNext)
1050 lev |= SC_FOLDLEVELHEADERFLAG;
1051 if (lev != styler.LevelAt(lineCurrent)) {
1052 styler.SetLevel(lineCurrent, lev);
1054 lineCurrent++;
1055 levelCurrent = levelNext;
1056 levelMinCurrent = levelCurrent;
1057 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
1058 // There is an empty line at end of file so give it same level and empty
1059 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1061 visibleChars = 0;
1066 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens) {
1068 // Evaluate defined() statements to either 0 or 1
1069 for (size_t i=0; (i+2)<tokens.size();) {
1070 if ((tokens[i] == "defined") && (tokens[i+1] == "(")) {
1071 const char *val = "0";
1072 if (tokens[i+2] == ")") {
1073 // defined()
1074 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1075 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1076 // defined(<int>)
1077 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1078 val = "1";
1080 tokens[i] = val;
1081 } else {
1082 i++;
1086 // Find bracketed subexpressions and recurse on them
1087 std::vector<std::string>::iterator itBracket = std::find(tokens.begin(), tokens.end(), "(");
1088 std::vector<std::string>::iterator itEndBracket = std::find(tokens.begin(), tokens.end(), ")");
1089 while ((itBracket != tokens.end()) && (itEndBracket != tokens.end()) && (itEndBracket > itBracket)) {
1090 std::vector<std::string> inBracket(itBracket + 1, itEndBracket);
1091 EvaluateTokens(inBracket);
1093 // The insertion is done before the removal because there were failures with the opposite approach
1094 tokens.insert(itBracket, inBracket.begin(), inBracket.end());
1095 itBracket = std::find(tokens.begin(), tokens.end(), "(");
1096 itEndBracket = std::find(tokens.begin(), tokens.end(), ")");
1097 tokens.erase(itBracket, itEndBracket + 1);
1099 itBracket = std::find(tokens.begin(), tokens.end(), "(");
1100 itEndBracket = std::find(tokens.begin(), tokens.end(), ")");
1103 // Evaluate logical negations
1104 for (size_t j=0; (j+1)<tokens.size();) {
1105 if (setNegationOp.Contains(tokens[j][0])) {
1106 int isTrue = atoi(tokens[j+1].c_str());
1107 if (tokens[j] == "!")
1108 isTrue = !isTrue;
1109 std::vector<std::string>::iterator itInsert =
1110 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1111 tokens.insert(itInsert, isTrue ? "1" : "0");
1112 } else {
1113 j++;
1117 // Evaluate expressions in precedence order
1118 enum precedence { precArithmetic, precRelative, precLogical };
1119 for (int prec=precArithmetic; prec <= precLogical; prec++) {
1120 // Looking at 3 tokens at a time so end at 2 before end
1121 for (size_t k=0; (k+2)<tokens.size();) {
1122 char chOp = tokens[k+1][0];
1123 if (
1124 ((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1125 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1126 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1128 int valA = atoi(tokens[k].c_str());
1129 int valB = atoi(tokens[k+2].c_str());
1130 int result = 0;
1131 if (tokens[k+1] == "+")
1132 result = valA + valB;
1133 else if (tokens[k+1] == "-")
1134 result = valA - valB;
1135 else if (tokens[k+1] == "*")
1136 result = valA * valB;
1137 else if (tokens[k+1] == "/")
1138 result = valA / (valB ? valB : 1);
1139 else if (tokens[k+1] == "%")
1140 result = valA % (valB ? valB : 1);
1141 else if (tokens[k+1] == "<")
1142 result = valA < valB;
1143 else if (tokens[k+1] == "<=")
1144 result = valA <= valB;
1145 else if (tokens[k+1] == ">")
1146 result = valA > valB;
1147 else if (tokens[k+1] == ">=")
1148 result = valA >= valB;
1149 else if (tokens[k+1] == "==")
1150 result = valA == valB;
1151 else if (tokens[k+1] == "!=")
1152 result = valA != valB;
1153 else if (tokens[k+1] == "||")
1154 result = valA || valB;
1155 else if (tokens[k+1] == "&&")
1156 result = valA && valB;
1157 char sResult[30];
1158 sprintf(sResult, "%d", result);
1159 std::vector<std::string>::iterator itInsert =
1160 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1161 tokens.insert(itInsert, sResult);
1162 } else {
1163 k++;
1169 bool LexerCPP::EvaluateExpression(const std::string &expr, const std::map<std::string, std::string> &preprocessorDefinitions) {
1170 // Break into tokens, replacing with definitions
1171 std::string word;
1172 std::vector<std::string> tokens;
1173 const char *cp = expr.c_str();
1174 for (;;) {
1175 if (setWord.Contains(*cp)) {
1176 word += *cp;
1177 } else {
1178 std::map<std::string, std::string>::const_iterator it = preprocessorDefinitions.find(word);
1179 if (it != preprocessorDefinitions.end()) {
1180 tokens.push_back(it->second);
1181 } else if (!word.empty() && ((word[0] >= '0' && word[0] <= '9') || (word == "defined"))) {
1182 tokens.push_back(word);
1184 word = "";
1185 if (!*cp) {
1186 break;
1188 if ((*cp != ' ') && (*cp != '\t')) {
1189 std::string op(cp, 1);
1190 if (setRelOp.Contains(*cp)) {
1191 if (setRelOp.Contains(cp[1])) {
1192 op += cp[1];
1193 cp++;
1195 } else if (setLogicalOp.Contains(*cp)) {
1196 if (setLogicalOp.Contains(cp[1])) {
1197 op += cp[1];
1198 cp++;
1201 tokens.push_back(op);
1204 cp++;
1207 EvaluateTokens(tokens);
1209 // "0" or "" -> false else true
1210 bool isFalse = tokens.empty() ||
1211 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1212 return !isFalse;
1215 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1216 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);