Update Scintilla to version 3.5.4
[TortoiseGit.git] / ext / scintilla / lexers / LexCPP.cxx
blobc6f0f369d33b999d146ef9b857f632002838dc90
1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
16 #include <string>
17 #include <vector>
18 #include <map>
19 #include <algorithm>
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
35 #ifdef SCI_NAMESPACE
36 using namespace Scintilla;
37 #endif
39 namespace {
40 // Use an unnamed namespace to protect the functions and classes from name conflicts
42 bool IsSpaceEquiv(int state) {
43 return (state <= SCE_C_COMMENTDOC) ||
44 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
45 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
46 (state == SCE_C_COMMENTDOCKEYWORDERROR);
49 // Preconditions: sc.currentPos points to a character after '+' or '-'.
50 // The test for pos reaching 0 should be redundant,
51 // and is in only for safety measures.
52 // Limitation: this code will give the incorrect answer for code like
53 // a = b+++/ptn/...
54 // Putting a space between the '++' post-inc operator and the '+' binary op
55 // fixes this, and is highly recommended for readability anyway.
56 bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
57 int pos = (int) sc.currentPos;
58 while (--pos > 0) {
59 char ch = styler[pos];
60 if (ch == '+' || ch == '-') {
61 return styler[pos - 1] == ch;
64 return false;
67 bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
68 // Don't look at styles, so no need to flush.
69 int pos = (int) sc.currentPos;
70 int currentLine = styler.GetLine(pos);
71 int lineStartPos = styler.LineStart(currentLine);
72 while (--pos > lineStartPos) {
73 char ch = styler.SafeGetCharAt(pos);
74 if (ch != ' ' && ch != '\t') {
75 break;
78 const char *retBack = "nruter";
79 const char *s = retBack;
80 while (*s
81 && pos >= lineStartPos
82 && styler.SafeGetCharAt(pos) == *s) {
83 s++;
84 pos--;
86 return !*s;
89 bool IsSpaceOrTab(int ch) {
90 return ch == ' ' || ch == '\t';
93 bool OnlySpaceOrTab(const std::string &s) {
94 for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
95 if (!IsSpaceOrTab(*it))
96 return false;
98 return true;
101 std::vector<std::string> StringSplit(const std::string &text, int separator) {
102 std::vector<std::string> vs(text.empty() ? 0 : 1);
103 for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) {
104 if (*it == separator) {
105 vs.push_back(std::string());
106 } else {
107 vs.back() += *it;
110 return vs;
113 struct BracketPair {
114 std::vector<std::string>::iterator itBracket;
115 std::vector<std::string>::iterator itEndBracket;
118 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
119 BracketPair bp;
120 std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
121 bp.itBracket = tokens.end();
122 bp.itEndBracket = tokens.end();
123 if (itTok != tokens.end()) {
124 bp.itBracket = itTok;
125 size_t nest = 0;
126 while (itTok != tokens.end()) {
127 if (*itTok == "(") {
128 nest++;
129 } else if (*itTok == ")") {
130 nest--;
131 if (nest == 0) {
132 bp.itEndBracket = itTok;
133 return bp;
136 ++itTok;
139 bp.itBracket = tokens.end();
140 return bp;
143 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
144 int activity, WordList &markerList, bool caseSensitive){
145 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
146 const int lengthMarker = 50;
147 char marker[lengthMarker+1];
148 int currPos = (int) sc.currentPos;
149 int i = 0;
150 while (i < lengthMarker) {
151 char ch = styler.SafeGetCharAt(currPos + i);
152 if (IsASpace(ch) || isoperator(ch)) {
153 break;
155 if (caseSensitive)
156 marker[i] = ch;
157 else
158 marker[i] = static_cast<char>(tolower(ch));
159 i++;
161 marker[i] = '\0';
162 if (markerList.InList(marker)) {
163 sc.SetState(SCE_C_TASKMARKER|activity);
168 struct EscapeSequence {
169 int digitsLeft;
170 CharacterSet setHexDigits;
171 CharacterSet setOctDigits;
172 CharacterSet setNoneNumeric;
173 CharacterSet *escapeSetValid;
174 EscapeSequence() {
175 digitsLeft = 0;
176 escapeSetValid = 0;
177 setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
178 setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
180 void resetEscapeState(int nextChar) {
181 digitsLeft = 0;
182 escapeSetValid = &setNoneNumeric;
183 if (nextChar == 'U') {
184 digitsLeft = 9;
185 escapeSetValid = &setHexDigits;
186 } else if (nextChar == 'u') {
187 digitsLeft = 5;
188 escapeSetValid = &setHexDigits;
189 } else if (nextChar == 'x') {
190 digitsLeft = 5;
191 escapeSetValid = &setHexDigits;
192 } else if (setOctDigits.Contains(nextChar)) {
193 digitsLeft = 3;
194 escapeSetValid = &setOctDigits;
197 bool atEscapeEnd(int currChar) const {
198 return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
202 std::string GetRestOfLine(LexAccessor &styler, int start, bool allowSpace) {
203 std::string restOfLine;
204 int i =0;
205 char ch = styler.SafeGetCharAt(start, '\n');
206 int endLine = styler.LineEnd(styler.GetLine(start));
207 while (((start+i) < endLine) && (ch != '\r')) {
208 char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
209 if (ch == '/' && (chNext == '/' || chNext == '*'))
210 break;
211 if (allowSpace || (ch != ' '))
212 restOfLine += ch;
213 i++;
214 ch = chNext;
216 return restOfLine;
219 bool IsStreamCommentStyle(int style) {
220 return style == SCE_C_COMMENT ||
221 style == SCE_C_COMMENTDOC ||
222 style == SCE_C_COMMENTDOCKEYWORD ||
223 style == SCE_C_COMMENTDOCKEYWORDERROR;
226 struct PPDefinition {
227 int line;
228 std::string key;
229 std::string value;
230 bool isUndef;
231 std::string arguments;
232 PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") :
233 line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
237 class LinePPState {
238 int state;
239 int ifTaken;
240 int level;
241 bool ValidLevel() const {
242 return level >= 0 && level < 32;
244 int maskLevel() const {
245 return 1 << level;
247 public:
248 LinePPState() : state(0), ifTaken(0), level(-1) {
250 bool IsInactive() const {
251 return state != 0;
253 bool CurrentIfTaken() const {
254 return (ifTaken & maskLevel()) != 0;
256 void StartSection(bool on) {
257 level++;
258 if (ValidLevel()) {
259 if (on) {
260 state &= ~maskLevel();
261 ifTaken |= maskLevel();
262 } else {
263 state |= maskLevel();
264 ifTaken &= ~maskLevel();
268 void EndSection() {
269 if (ValidLevel()) {
270 state &= ~maskLevel();
271 ifTaken &= ~maskLevel();
273 level--;
275 void InvertCurrentLevel() {
276 if (ValidLevel()) {
277 state ^= maskLevel();
278 ifTaken |= maskLevel();
283 // Hold the preprocessor state for each line seen.
284 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
285 class PPStates {
286 std::vector<LinePPState> vlls;
287 public:
288 LinePPState ForLine(int line) const {
289 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
290 return vlls[line];
291 } else {
292 return LinePPState();
295 void Add(int line, LinePPState lls) {
296 vlls.resize(line+1);
297 vlls[line] = lls;
301 // An individual named option for use in an OptionSet
303 // Options used for LexerCPP
304 struct OptionsCPP {
305 bool stylingWithinPreprocessor;
306 bool identifiersAllowDollars;
307 bool trackPreprocessor;
308 bool updatePreprocessor;
309 bool verbatimStringsAllowEscapes;
310 bool triplequotedStrings;
311 bool hashquotedStrings;
312 bool backQuotedStrings;
313 bool escapeSequence;
314 bool fold;
315 bool foldSyntaxBased;
316 bool foldComment;
317 bool foldCommentMultiline;
318 bool foldCommentExplicit;
319 std::string foldExplicitStart;
320 std::string foldExplicitEnd;
321 bool foldExplicitAnywhere;
322 bool foldPreprocessor;
323 bool foldCompact;
324 bool foldAtElse;
325 OptionsCPP() {
326 stylingWithinPreprocessor = false;
327 identifiersAllowDollars = true;
328 trackPreprocessor = true;
329 updatePreprocessor = true;
330 verbatimStringsAllowEscapes = false;
331 triplequotedStrings = false;
332 hashquotedStrings = false;
333 backQuotedStrings = false;
334 escapeSequence = false;
335 fold = false;
336 foldSyntaxBased = true;
337 foldComment = false;
338 foldCommentMultiline = true;
339 foldCommentExplicit = true;
340 foldExplicitStart = "";
341 foldExplicitEnd = "";
342 foldExplicitAnywhere = false;
343 foldPreprocessor = false;
344 foldCompact = false;
345 foldAtElse = false;
349 const char *const cppWordLists[] = {
350 "Primary keywords and identifiers",
351 "Secondary keywords and identifiers",
352 "Documentation comment keywords",
353 "Global classes and typedefs",
354 "Preprocessor definitions",
355 "Task marker and error marker keywords",
359 struct OptionSetCPP : public OptionSet<OptionsCPP> {
360 OptionSetCPP() {
361 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
362 "For C++ code, determines whether all preprocessor code is styled in the "
363 "preprocessor style (0, the default) or only from the initial # to the end "
364 "of the command word(1).");
366 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
367 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
369 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
370 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
372 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
373 "Set to 1 to update preprocessor definitions when #define found.");
375 DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
376 "Set to 1 to allow verbatim strings to contain escape sequences.");
378 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
379 "Set to 1 to enable highlighting of triple-quoted strings.");
381 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
382 "Set to 1 to enable highlighting of hash-quoted strings.");
384 DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
385 "Set to 1 to enable highlighting of back-quoted raw strings .");
387 DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
388 "Set to 1 to enable highlighting of escape sequences in strings");
390 DefineProperty("fold", &OptionsCPP::fold);
392 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
393 "Set this property to 0 to disable syntax based folding.");
395 DefineProperty("fold.comment", &OptionsCPP::foldComment,
396 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
397 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
398 "at the end of a section that should fold.");
400 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
401 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
403 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
404 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
406 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
407 "The string to use for explicit fold start points, replacing the standard //{.");
409 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
410 "The string to use for explicit fold end points, replacing the standard //}.");
412 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
413 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
415 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
416 "This option enables folding preprocessor directives when using the C++ lexer. "
417 "Includes C#'s explicit #region and #endregion folding directives.");
419 DefineProperty("fold.compact", &OptionsCPP::foldCompact);
421 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
422 "This option enables C++ folding on a \"} else {\" line of an if statement.");
424 DefineWordListSets(cppWordLists);
428 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
432 class LexerCPP : public ILexerWithSubStyles {
433 bool caseSensitive;
434 CharacterSet setWord;
435 CharacterSet setNegationOp;
436 CharacterSet setArithmethicOp;
437 CharacterSet setRelOp;
438 CharacterSet setLogicalOp;
439 CharacterSet setWordStart;
440 PPStates vlls;
441 std::vector<PPDefinition> ppDefineHistory;
442 WordList keywords;
443 WordList keywords2;
444 WordList keywords3;
445 WordList keywords4;
446 WordList ppDefinitions;
447 WordList markerList;
448 struct SymbolValue {
449 std::string value;
450 std::string arguments;
451 SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
453 SymbolValue &operator = (const std::string &value_) {
454 value = value_;
455 arguments.clear();
456 return *this;
458 bool IsMacro() const {
459 return !arguments.empty();
462 typedef std::map<std::string, SymbolValue> SymbolTable;
463 SymbolTable preprocessorDefinitionsStart;
464 OptionsCPP options;
465 OptionSetCPP osCPP;
466 EscapeSequence escapeSeq;
467 SparseState<std::string> rawStringTerminators;
468 enum { activeFlag = 0x40 };
469 enum { ssIdentifier, ssDocKeyword };
470 SubStyles subStyles;
471 public:
472 explicit LexerCPP(bool caseSensitive_) :
473 caseSensitive(caseSensitive_),
474 setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
475 setNegationOp(CharacterSet::setNone, "!"),
476 setArithmethicOp(CharacterSet::setNone, "+-/*%"),
477 setRelOp(CharacterSet::setNone, "=!<>"),
478 setLogicalOp(CharacterSet::setNone, "|&"),
479 subStyles(styleSubable, 0x80, 0x40, activeFlag) {
481 virtual ~LexerCPP() {
483 void SCI_METHOD Release() {
484 delete this;
486 int SCI_METHOD Version() const {
487 return lvSubStyles;
489 const char * SCI_METHOD PropertyNames() {
490 return osCPP.PropertyNames();
492 int SCI_METHOD PropertyType(const char *name) {
493 return osCPP.PropertyType(name);
495 const char * SCI_METHOD DescribeProperty(const char *name) {
496 return osCPP.DescribeProperty(name);
498 int SCI_METHOD PropertySet(const char *key, const char *val);
499 const char * SCI_METHOD DescribeWordListSets() {
500 return osCPP.DescribeWordListSets();
502 int SCI_METHOD WordListSet(int n, const char *wl);
503 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
504 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
506 void * SCI_METHOD PrivateCall(int, void *) {
507 return 0;
510 int SCI_METHOD LineEndTypesSupported() {
511 return SC_LINE_END_TYPE_UNICODE;
514 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) {
515 return subStyles.Allocate(styleBase, numberStyles);
517 int SCI_METHOD SubStylesStart(int styleBase) {
518 return subStyles.Start(styleBase);
520 int SCI_METHOD SubStylesLength(int styleBase) {
521 return subStyles.Length(styleBase);
523 int SCI_METHOD StyleFromSubStyle(int subStyle) {
524 int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
525 int active = subStyle & activeFlag;
526 return styleBase | active;
528 int SCI_METHOD PrimaryStyleFromStyle(int style) {
529 return MaskActive(style);
531 void SCI_METHOD FreeSubStyles() {
532 subStyles.Free();
534 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) {
535 subStyles.SetIdentifiers(style, identifiers);
537 int SCI_METHOD DistanceToSecondaryStyles() {
538 return activeFlag;
540 const char * SCI_METHOD GetSubStyleBases() {
541 return styleSubable;
544 static ILexer *LexerFactoryCPP() {
545 return new LexerCPP(true);
547 static ILexer *LexerFactoryCPPInsensitive() {
548 return new LexerCPP(false);
550 static int MaskActive(int style) {
551 return style & ~activeFlag;
553 void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
554 std::vector<std::string> Tokenize(const std::string &expr) const;
555 bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
558 int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
559 if (osCPP.PropertySet(&options, key, val)) {
560 if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
561 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
562 if (options.identifiersAllowDollars) {
563 setWord.Add('$');
566 return 0;
568 return -1;
571 int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
572 WordList *wordListN = 0;
573 switch (n) {
574 case 0:
575 wordListN = &keywords;
576 break;
577 case 1:
578 wordListN = &keywords2;
579 break;
580 case 2:
581 wordListN = &keywords3;
582 break;
583 case 3:
584 wordListN = &keywords4;
585 break;
586 case 4:
587 wordListN = &ppDefinitions;
588 break;
589 case 5:
590 wordListN = &markerList;
591 break;
593 int firstModification = -1;
594 if (wordListN) {
595 WordList wlNew;
596 wlNew.Set(wl);
597 if (*wordListN != wlNew) {
598 wordListN->Set(wl);
599 firstModification = 0;
600 if (n == 4) {
601 // Rebuild preprocessorDefinitions
602 preprocessorDefinitionsStart.clear();
603 for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
604 const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
605 const char *cpEquals = strchr(cpDefinition, '=');
606 if (cpEquals) {
607 std::string name(cpDefinition, cpEquals - cpDefinition);
608 std::string val(cpEquals+1);
609 size_t bracket = name.find('(');
610 size_t bracketEnd = name.find(')');
611 if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
612 // Macro
613 std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
614 name = name.substr(0, bracket);
615 preprocessorDefinitionsStart[name] = SymbolValue(val, args);
616 } else {
617 preprocessorDefinitionsStart[name] = val;
619 } else {
620 std::string name(cpDefinition);
621 std::string val("1");
622 preprocessorDefinitionsStart[name] = val;
628 return firstModification;
631 // Functor used to truncate history
632 struct After {
633 int line;
634 explicit After(int line_) : line(line_) {}
635 bool operator()(PPDefinition &p) const {
636 return p.line > line;
640 void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
641 LexAccessor styler(pAccess);
643 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
644 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
646 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
648 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
650 CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
652 if (options.identifiersAllowDollars) {
653 setWordStart.Add('$');
656 int chPrevNonWhite = ' ';
657 int visibleChars = 0;
658 bool lastWordWasUUID = false;
659 int styleBeforeDCKeyword = SCE_C_DEFAULT;
660 int styleBeforeTaskMarker = SCE_C_DEFAULT;
661 bool continuationLine = false;
662 bool isIncludePreprocessor = false;
663 bool isStringInPreprocessor = false;
664 bool inRERange = false;
665 bool seenDocKeyBrace = false;
667 int lineCurrent = styler.GetLine(startPos);
668 if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
669 (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
670 (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
671 // Set continuationLine if last character of previous line is '\'
672 if (lineCurrent > 0) {
673 int endLinePrevious = styler.LineEnd(lineCurrent - 1);
674 if (endLinePrevious > 0) {
675 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
680 // look back to set chPrevNonWhite properly for better regex colouring
681 if (startPos > 0) {
682 int back = startPos;
683 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
685 if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
686 chPrevNonWhite = styler.SafeGetCharAt(back);
690 StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
691 LinePPState preproc = vlls.ForLine(lineCurrent);
693 bool definitionsChanged = false;
695 // Truncate ppDefineHistory before current line
697 if (!options.updatePreprocessor)
698 ppDefineHistory.clear();
700 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
701 if (itInvalid != ppDefineHistory.end()) {
702 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
703 definitionsChanged = true;
706 SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
707 for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
708 if (itDef->isUndef)
709 preprocessorDefinitions.erase(itDef->key);
710 else
711 preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments);
714 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
715 SparseState<std::string> rawSTNew(lineCurrent);
717 int activitySet = preproc.IsInactive() ? activeFlag : 0;
719 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
720 const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
722 int lineEndNext = styler.LineEnd(lineCurrent);
724 for (; sc.More();) {
726 if (sc.atLineStart) {
727 // Using MaskActive() is not needed in the following statement.
728 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
729 if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
730 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
731 // ends with a line continuation by locking in the state up to this position.
732 sc.SetState(sc.state);
734 if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
735 sc.SetState(SCE_C_DEFAULT|activitySet);
737 // Reset states to beginning of colourise so no surprises
738 // if different sets of lines lexed.
739 visibleChars = 0;
740 lastWordWasUUID = false;
741 isIncludePreprocessor = false;
742 inRERange = false;
743 if (preproc.IsInactive()) {
744 activitySet = activeFlag;
745 sc.SetState(sc.state | activitySet);
749 if (sc.atLineEnd) {
750 lineCurrent++;
751 lineEndNext = styler.LineEnd(lineCurrent);
752 vlls.Add(lineCurrent, preproc);
753 if (rawStringTerminator != "") {
754 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
758 // Handle line continuation generically.
759 if (sc.ch == '\\') {
760 if (static_cast<int>((sc.currentPos+1)) >= lineEndNext) {
761 lineCurrent++;
762 lineEndNext = styler.LineEnd(lineCurrent);
763 vlls.Add(lineCurrent, preproc);
764 sc.Forward();
765 if (sc.ch == '\r' && sc.chNext == '\n') {
766 // Even in UTF-8, \r and \n are separate
767 sc.Forward();
769 continuationLine = true;
770 sc.Forward();
771 continue;
775 const bool atLineEndBeforeSwitch = sc.atLineEnd;
777 // Determine if the current state should terminate.
778 switch (MaskActive(sc.state)) {
779 case SCE_C_OPERATOR:
780 sc.SetState(SCE_C_DEFAULT|activitySet);
781 break;
782 case SCE_C_NUMBER:
783 // We accept almost anything because of hex. and number suffixes
784 if (sc.ch == '_') {
785 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
786 } else if (!(setWord.Contains(sc.ch)
787 || (sc.ch == '\'')
788 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
789 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
790 sc.SetState(SCE_C_DEFAULT|activitySet);
792 break;
793 case SCE_C_USERLITERAL:
794 if (!(setWord.Contains(sc.ch)))
795 sc.SetState(SCE_C_DEFAULT|activitySet);
796 break;
797 case SCE_C_IDENTIFIER:
798 if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
799 char s[1000];
800 if (caseSensitive) {
801 sc.GetCurrent(s, sizeof(s));
802 } else {
803 sc.GetCurrentLowered(s, sizeof(s));
805 if (keywords.InList(s)) {
806 lastWordWasUUID = strcmp(s, "uuid") == 0;
807 sc.ChangeState(SCE_C_WORD|activitySet);
808 } else if (keywords2.InList(s)) {
809 sc.ChangeState(SCE_C_WORD2|activitySet);
810 } else if (keywords4.InList(s)) {
811 sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
812 } else {
813 int subStyle = classifierIdentifiers.ValueFor(s);
814 if (subStyle >= 0) {
815 sc.ChangeState(subStyle|activitySet);
818 const bool literalString = sc.ch == '\"';
819 if (literalString || sc.ch == '\'') {
820 size_t lenS = strlen(s);
821 const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
822 if (raw)
823 s[lenS--] = '\0';
824 bool valid =
825 (lenS == 0) ||
826 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
827 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
828 if (valid) {
829 if (literalString) {
830 if (raw) {
831 // Set the style of the string prefix to SCE_C_STRINGRAW but then change to
832 // SCE_C_DEFAULT as that allows the raw string start code to run.
833 sc.ChangeState(SCE_C_STRINGRAW|activitySet);
834 sc.SetState(SCE_C_DEFAULT|activitySet);
835 } else {
836 sc.ChangeState(SCE_C_STRING|activitySet);
838 } else {
839 sc.ChangeState(SCE_C_CHARACTER|activitySet);
841 } else {
842 sc.SetState(SCE_C_DEFAULT | activitySet);
844 } else {
845 sc.SetState(SCE_C_DEFAULT|activitySet);
848 break;
849 case SCE_C_PREPROCESSOR:
850 if (options.stylingWithinPreprocessor) {
851 if (IsASpace(sc.ch)) {
852 sc.SetState(SCE_C_DEFAULT|activitySet);
854 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
855 isStringInPreprocessor = false;
856 } else if (!isStringInPreprocessor) {
857 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
858 isStringInPreprocessor = true;
859 } else if (sc.Match('/', '*')) {
860 if (sc.Match("/**") || sc.Match("/*!")) {
861 sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
862 } else {
863 sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
865 sc.Forward(); // Eat the *
866 } else if (sc.Match('/', '/')) {
867 sc.SetState(SCE_C_DEFAULT|activitySet);
870 break;
871 case SCE_C_PREPROCESSORCOMMENT:
872 case SCE_C_PREPROCESSORCOMMENTDOC:
873 if (sc.Match('*', '/')) {
874 sc.Forward();
875 sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
876 continue; // Without advancing in case of '\'.
878 break;
879 case SCE_C_COMMENT:
880 if (sc.Match('*', '/')) {
881 sc.Forward();
882 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
883 } else {
884 styleBeforeTaskMarker = SCE_C_COMMENT;
885 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
887 break;
888 case SCE_C_COMMENTDOC:
889 if (sc.Match('*', '/')) {
890 sc.Forward();
891 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
892 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
893 // Verify that we have the conditions to mark a comment-doc-keyword
894 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
895 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
896 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
899 break;
900 case SCE_C_COMMENTLINE:
901 if (sc.atLineStart && !continuationLine) {
902 sc.SetState(SCE_C_DEFAULT|activitySet);
903 } else {
904 styleBeforeTaskMarker = SCE_C_COMMENTLINE;
905 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
907 break;
908 case SCE_C_COMMENTLINEDOC:
909 if (sc.atLineStart && !continuationLine) {
910 sc.SetState(SCE_C_DEFAULT|activitySet);
911 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
912 // Verify that we have the conditions to mark a comment-doc-keyword
913 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
914 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
915 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
918 break;
919 case SCE_C_COMMENTDOCKEYWORD:
920 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
921 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
922 sc.Forward();
923 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
924 seenDocKeyBrace = false;
925 } else if (sc.ch == '[' || sc.ch == '{') {
926 seenDocKeyBrace = true;
927 } else if (!setDoxygen.Contains(sc.ch)
928 && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
929 char s[100];
930 if (caseSensitive) {
931 sc.GetCurrent(s, sizeof(s));
932 } else {
933 sc.GetCurrentLowered(s, sizeof(s));
935 if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
936 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
937 } else if (!keywords3.InList(s + 1)) {
938 int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
939 if (subStyleCDKW >= 0) {
940 sc.ChangeState(subStyleCDKW|activitySet);
941 } else {
942 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
945 sc.SetState(styleBeforeDCKeyword|activitySet);
946 seenDocKeyBrace = false;
948 break;
949 case SCE_C_STRING:
950 if (sc.atLineEnd) {
951 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
952 } else if (isIncludePreprocessor) {
953 if (sc.ch == '>') {
954 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
955 isIncludePreprocessor = false;
957 } else if (sc.ch == '\\') {
958 if (options.escapeSequence) {
959 sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
960 escapeSeq.resetEscapeState(sc.chNext);
962 sc.Forward(); // Skip all characters after the backslash
963 } else if (sc.ch == '\"') {
964 if (sc.chNext == '_') {
965 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
966 } else {
967 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
970 break;
971 case SCE_C_ESCAPESEQUENCE:
972 escapeSeq.digitsLeft--;
973 if (!escapeSeq.atEscapeEnd(sc.ch)) {
974 break;
976 if (sc.ch == '"') {
977 sc.SetState(SCE_C_STRING|activitySet);
978 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
979 } else if (sc.ch == '\\') {
980 escapeSeq.resetEscapeState(sc.chNext);
981 sc.Forward();
982 } else {
983 sc.SetState(SCE_C_STRING|activitySet);
984 if (sc.atLineEnd) {
985 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
988 break;
989 case SCE_C_HASHQUOTEDSTRING:
990 if (sc.ch == '\\') {
991 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
992 sc.Forward();
994 } else if (sc.ch == '\"') {
995 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
997 break;
998 case SCE_C_STRINGRAW:
999 if (sc.Match(rawStringTerminator.c_str())) {
1000 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1001 sc.Forward();
1002 sc.SetState(SCE_C_DEFAULT|activitySet);
1003 rawStringTerminator = "";
1005 break;
1006 case SCE_C_CHARACTER:
1007 if (sc.atLineEnd) {
1008 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1009 } else if (sc.ch == '\\') {
1010 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1011 sc.Forward();
1013 } else if (sc.ch == '\'') {
1014 if (sc.chNext == '_') {
1015 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1016 } else {
1017 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1020 break;
1021 case SCE_C_REGEX:
1022 if (sc.atLineStart) {
1023 sc.SetState(SCE_C_DEFAULT|activitySet);
1024 } else if (! inRERange && sc.ch == '/') {
1025 sc.Forward();
1026 while ((sc.ch < 0x80) && islower(sc.ch))
1027 sc.Forward(); // gobble regex flags
1028 sc.SetState(SCE_C_DEFAULT|activitySet);
1029 } else if (sc.ch == '\\' && (static_cast<int>(sc.currentPos+1) < lineEndNext)) {
1030 // Gobble up the escaped character
1031 sc.Forward();
1032 } else if (sc.ch == '[') {
1033 inRERange = true;
1034 } else if (sc.ch == ']') {
1035 inRERange = false;
1037 break;
1038 case SCE_C_STRINGEOL:
1039 if (sc.atLineStart) {
1040 sc.SetState(SCE_C_DEFAULT|activitySet);
1042 break;
1043 case SCE_C_VERBATIM:
1044 if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1045 sc.Forward(); // Skip all characters after the backslash
1046 } else if (sc.ch == '\"') {
1047 if (sc.chNext == '\"') {
1048 sc.Forward();
1049 } else {
1050 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1053 break;
1054 case SCE_C_TRIPLEVERBATIM:
1055 if (sc.Match("\"\"\"")) {
1056 while (sc.Match('"')) {
1057 sc.Forward();
1059 sc.SetState(SCE_C_DEFAULT|activitySet);
1061 break;
1062 case SCE_C_UUID:
1063 if (sc.atLineEnd || sc.ch == ')') {
1064 sc.SetState(SCE_C_DEFAULT|activitySet);
1066 break;
1067 case SCE_C_TASKMARKER:
1068 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1069 sc.SetState(styleBeforeTaskMarker|activitySet);
1070 styleBeforeTaskMarker = SCE_C_DEFAULT;
1074 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1075 // State exit processing consumed characters up to end of line.
1076 lineCurrent++;
1077 lineEndNext = styler.LineEnd(lineCurrent);
1078 vlls.Add(lineCurrent, preproc);
1081 // Determine if a new state should be entered.
1082 if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1083 if (sc.Match('@', '\"')) {
1084 sc.SetState(SCE_C_VERBATIM|activitySet);
1085 sc.Forward();
1086 } else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
1087 sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1088 sc.Forward(2);
1089 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1090 sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1091 sc.Forward();
1092 } else if (options.backQuotedStrings && sc.Match('`')) {
1093 sc.SetState(SCE_C_STRINGRAW|activitySet);
1094 rawStringTerminator = "`";
1095 sc.Forward();
1096 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1097 if (lastWordWasUUID) {
1098 sc.SetState(SCE_C_UUID|activitySet);
1099 lastWordWasUUID = false;
1100 } else {
1101 sc.SetState(SCE_C_NUMBER|activitySet);
1103 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1104 if (lastWordWasUUID) {
1105 sc.SetState(SCE_C_UUID|activitySet);
1106 lastWordWasUUID = false;
1107 } else {
1108 sc.SetState(SCE_C_IDENTIFIER|activitySet);
1110 } else if (sc.Match('/', '*')) {
1111 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
1112 sc.SetState(SCE_C_COMMENTDOC|activitySet);
1113 } else {
1114 sc.SetState(SCE_C_COMMENT|activitySet);
1116 sc.Forward(); // Eat the * so it isn't used for the end of the comment
1117 } else if (sc.Match('/', '/')) {
1118 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1119 // Support of Qt/Doxygen doc. style
1120 sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1121 else
1122 sc.SetState(SCE_C_COMMENTLINE|activitySet);
1123 } else if (sc.ch == '/'
1124 && (setOKBeforeRE.Contains(chPrevNonWhite)
1125 || followsReturnKeyword(sc, styler))
1126 && (!setCouldBePostOp.Contains(chPrevNonWhite)
1127 || !FollowsPostfixOperator(sc, styler))) {
1128 sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
1129 inRERange = false;
1130 } else if (sc.ch == '\"') {
1131 if (sc.chPrev == 'R') {
1132 styler.Flush();
1133 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1134 sc.SetState(SCE_C_STRINGRAW|activitySet);
1135 rawStringTerminator = ")";
1136 for (int termPos = sc.currentPos + 1;; termPos++) {
1137 char chTerminator = styler.SafeGetCharAt(termPos, '(');
1138 if (chTerminator == '(')
1139 break;
1140 rawStringTerminator += chTerminator;
1142 rawStringTerminator += '\"';
1143 } else {
1144 sc.SetState(SCE_C_STRING|activitySet);
1146 } else {
1147 sc.SetState(SCE_C_STRING|activitySet);
1149 isIncludePreprocessor = false; // ensure that '>' won't end the string
1150 } else if (isIncludePreprocessor && sc.ch == '<') {
1151 sc.SetState(SCE_C_STRING|activitySet);
1152 } else if (sc.ch == '\'') {
1153 sc.SetState(SCE_C_CHARACTER|activitySet);
1154 } else if (sc.ch == '#' && visibleChars == 0) {
1155 // Preprocessor commands are alone on their line
1156 sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1157 // Skip whitespace between # and preprocessor word
1158 do {
1159 sc.Forward();
1160 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1161 if (sc.atLineEnd) {
1162 sc.SetState(SCE_C_DEFAULT|activitySet);
1163 } else if (sc.Match("include")) {
1164 isIncludePreprocessor = true;
1165 } else {
1166 if (options.trackPreprocessor) {
1167 if (sc.Match("ifdef") || sc.Match("ifndef")) {
1168 bool isIfDef = sc.Match("ifdef");
1169 int i = isIfDef ? 5 : 6;
1170 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
1171 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1172 preproc.StartSection(isIfDef == foundDef);
1173 } else if (sc.Match("if")) {
1174 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1175 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1176 preproc.StartSection(ifGood);
1177 } else if (sc.Match("else")) {
1178 if (!preproc.CurrentIfTaken()) {
1179 preproc.InvertCurrentLevel();
1180 activitySet = preproc.IsInactive() ? activeFlag : 0;
1181 if (!activitySet)
1182 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1183 } else if (!preproc.IsInactive()) {
1184 preproc.InvertCurrentLevel();
1185 activitySet = preproc.IsInactive() ? activeFlag : 0;
1186 if (!activitySet)
1187 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1189 } else if (sc.Match("elif")) {
1190 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1191 if (!preproc.CurrentIfTaken()) {
1192 // Similar to #if
1193 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1194 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1195 if (ifGood) {
1196 preproc.InvertCurrentLevel();
1197 activitySet = preproc.IsInactive() ? activeFlag : 0;
1198 if (!activitySet)
1199 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1201 } else if (!preproc.IsInactive()) {
1202 preproc.InvertCurrentLevel();
1203 activitySet = preproc.IsInactive() ? activeFlag : 0;
1204 if (!activitySet)
1205 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1207 } else if (sc.Match("endif")) {
1208 preproc.EndSection();
1209 activitySet = preproc.IsInactive() ? activeFlag : 0;
1210 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1211 } else if (sc.Match("define")) {
1212 if (options.updatePreprocessor && !preproc.IsInactive()) {
1213 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1214 size_t startName = 0;
1215 while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1216 startName++;
1217 size_t endName = startName;
1218 while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
1219 endName++;
1220 std::string key = restOfLine.substr(startName, endName-startName);
1221 if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1222 // Macro
1223 size_t endArgs = endName;
1224 while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1225 endArgs++;
1226 std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1227 size_t startValue = endArgs+1;
1228 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1229 startValue++;
1230 std::string value;
1231 if (startValue < restOfLine.length())
1232 value = restOfLine.substr(startValue);
1233 preprocessorDefinitions[key] = SymbolValue(value, args);
1234 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1235 definitionsChanged = true;
1236 } else {
1237 // Value
1238 size_t startValue = endName;
1239 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1240 startValue++;
1241 std::string value = restOfLine.substr(startValue);
1242 preprocessorDefinitions[key] = value;
1243 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1244 definitionsChanged = true;
1247 } else if (sc.Match("undef")) {
1248 if (options.updatePreprocessor && !preproc.IsInactive()) {
1249 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, true);
1250 std::vector<std::string> tokens = Tokenize(restOfLine);
1251 std::string key;
1252 if (tokens.size() >= 1) {
1253 key = tokens[0];
1254 preprocessorDefinitions.erase(key);
1255 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1256 definitionsChanged = true;
1262 } else if (isoperator(sc.ch)) {
1263 sc.SetState(SCE_C_OPERATOR|activitySet);
1267 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1268 chPrevNonWhite = sc.ch;
1269 visibleChars++;
1271 continuationLine = false;
1272 sc.Forward();
1274 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1275 if (definitionsChanged || rawStringsChanged)
1276 styler.ChangeLexerState(startPos, startPos + length);
1277 sc.Complete();
1280 // Store both the current line's fold level and the next lines in the
1281 // level store to make it easy to pick up with each increment
1282 // and to make it possible to fiddle the current level for "} else {".
1284 void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
1286 if (!options.fold)
1287 return;
1289 LexAccessor styler(pAccess);
1291 unsigned int endPos = startPos + length;
1292 int visibleChars = 0;
1293 bool inLineComment = false;
1294 int lineCurrent = styler.GetLine(startPos);
1295 int levelCurrent = SC_FOLDLEVELBASE;
1296 if (lineCurrent > 0)
1297 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1298 unsigned int lineStartNext = styler.LineStart(lineCurrent+1);
1299 int levelMinCurrent = levelCurrent;
1300 int levelNext = levelCurrent;
1301 char chNext = styler[startPos];
1302 int styleNext = MaskActive(styler.StyleAt(startPos));
1303 int style = MaskActive(initStyle);
1304 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1305 for (unsigned int i = startPos; i < endPos; i++) {
1306 char ch = chNext;
1307 chNext = styler.SafeGetCharAt(i + 1);
1308 int stylePrev = style;
1309 style = styleNext;
1310 styleNext = MaskActive(styler.StyleAt(i + 1));
1311 bool atEOL = i == (lineStartNext-1);
1312 if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1313 inLineComment = true;
1314 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1315 if (!IsStreamCommentStyle(stylePrev)) {
1316 levelNext++;
1317 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1318 // Comments don't end at end of line and the next character may be unstyled.
1319 levelNext--;
1322 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1323 if (userDefinedFoldMarkers) {
1324 if (styler.Match(i, options.foldExplicitStart.c_str())) {
1325 levelNext++;
1326 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1327 levelNext--;
1329 } else {
1330 if ((ch == '/') && (chNext == '/')) {
1331 char chNext2 = styler.SafeGetCharAt(i + 2);
1332 if (chNext2 == '{') {
1333 levelNext++;
1334 } else if (chNext2 == '}') {
1335 levelNext--;
1340 if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1341 if (ch == '#') {
1342 unsigned int j = i + 1;
1343 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1344 j++;
1346 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1347 levelNext++;
1348 } else if (styler.Match(j, "end")) {
1349 levelNext--;
1353 if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1354 if (ch == '{' || ch == '[') {
1355 // Measure the minimum before a '{' to allow
1356 // folding on "} else {"
1357 if (levelMinCurrent > levelNext) {
1358 levelMinCurrent = levelNext;
1360 levelNext++;
1361 } else if (ch == '}' || ch == ']') {
1362 levelNext--;
1365 if (!IsASpace(ch))
1366 visibleChars++;
1367 if (atEOL || (i == endPos-1)) {
1368 int levelUse = levelCurrent;
1369 if (options.foldSyntaxBased && options.foldAtElse) {
1370 levelUse = levelMinCurrent;
1372 int lev = levelUse | levelNext << 16;
1373 if (visibleChars == 0 && options.foldCompact)
1374 lev |= SC_FOLDLEVELWHITEFLAG;
1375 if (levelUse < levelNext)
1376 lev |= SC_FOLDLEVELHEADERFLAG;
1377 if (lev != styler.LevelAt(lineCurrent)) {
1378 styler.SetLevel(lineCurrent, lev);
1380 lineCurrent++;
1381 lineStartNext = styler.LineStart(lineCurrent+1);
1382 levelCurrent = levelNext;
1383 levelMinCurrent = levelCurrent;
1384 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
1385 // There is an empty line at end of file so give it same level and empty
1386 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1388 visibleChars = 0;
1389 inLineComment = false;
1394 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1396 // Remove whitespace tokens
1397 tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1399 // Evaluate defined statements to either 0 or 1
1400 for (size_t i=0; (i+1)<tokens.size();) {
1401 if (tokens[i] == "defined") {
1402 const char *val = "0";
1403 if (tokens[i+1] == "(") {
1404 if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1405 // defined()
1406 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1407 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1408 // defined(<identifier>)
1409 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1410 if (it != preprocessorDefinitions.end()) {
1411 val = "1";
1413 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1414 } else {
1415 // Spurious '(' so erase as more likely to result in false
1416 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1418 } else {
1419 // defined <identifier>
1420 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1421 if (it != preprocessorDefinitions.end()) {
1422 val = "1";
1425 tokens[i] = val;
1426 } else {
1427 i++;
1431 // Evaluate identifiers
1432 const size_t maxIterations = 100;
1433 size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
1434 for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1435 iterations++;
1436 if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
1437 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1438 if (it != preprocessorDefinitions.end()) {
1439 // Tokenize value
1440 std::vector<std::string> macroTokens = Tokenize(it->second.value);
1441 if (it->second.IsMacro()) {
1442 if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1443 // Create map of argument name to value
1444 std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1445 std::map<std::string, std::string> arguments;
1446 size_t arg = 0;
1447 size_t tok = i+2;
1448 while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1449 if (tokens.at(tok) != ",") {
1450 arguments[argumentNames.at(arg)] = tokens.at(tok);
1451 arg++;
1453 tok++;
1456 // Remove invocation
1457 tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1459 // Substitute values into macro
1460 macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1462 for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1463 if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
1464 std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1465 if (itFind != arguments.end()) {
1466 // TODO: Possible that value will be expression so should insert tokenized form
1467 macroTokens[iMacro] = itFind->second;
1470 iMacro++;
1473 // Insert results back into tokens
1474 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1476 } else {
1477 i++;
1479 } else {
1480 // Remove invocation
1481 tokens.erase(tokens.begin() + i);
1482 // Insert results back into tokens
1483 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1485 } else {
1486 // Identifier not found
1487 tokens.erase(tokens.begin() + i);
1489 } else {
1490 i++;
1494 // Find bracketed subexpressions and recurse on them
1495 BracketPair bracketPair = FindBracketPair(tokens);
1496 while (bracketPair.itBracket != tokens.end()) {
1497 std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1498 EvaluateTokens(inBracket, preprocessorDefinitions);
1500 // The insertion is done before the removal because there were failures with the opposite approach
1501 tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1503 bracketPair = FindBracketPair(tokens);
1504 tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1506 bracketPair = FindBracketPair(tokens);
1509 // Evaluate logical negations
1510 for (size_t j=0; (j+1)<tokens.size();) {
1511 if (setNegationOp.Contains(tokens[j][0])) {
1512 int isTrue = atoi(tokens[j+1].c_str());
1513 if (tokens[j] == "!")
1514 isTrue = !isTrue;
1515 std::vector<std::string>::iterator itInsert =
1516 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1517 tokens.insert(itInsert, isTrue ? "1" : "0");
1518 } else {
1519 j++;
1523 // Evaluate expressions in precedence order
1524 enum precedence { precArithmetic, precRelative, precLogical };
1525 for (int prec=precArithmetic; prec <= precLogical; prec++) {
1526 // Looking at 3 tokens at a time so end at 2 before end
1527 for (size_t k=0; (k+2)<tokens.size();) {
1528 char chOp = tokens[k+1][0];
1529 if (
1530 ((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1531 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1532 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1534 int valA = atoi(tokens[k].c_str());
1535 int valB = atoi(tokens[k+2].c_str());
1536 int result = 0;
1537 if (tokens[k+1] == "+")
1538 result = valA + valB;
1539 else if (tokens[k+1] == "-")
1540 result = valA - valB;
1541 else if (tokens[k+1] == "*")
1542 result = valA * valB;
1543 else if (tokens[k+1] == "/")
1544 result = valA / (valB ? valB : 1);
1545 else if (tokens[k+1] == "%")
1546 result = valA % (valB ? valB : 1);
1547 else if (tokens[k+1] == "<")
1548 result = valA < valB;
1549 else if (tokens[k+1] == "<=")
1550 result = valA <= valB;
1551 else if (tokens[k+1] == ">")
1552 result = valA > valB;
1553 else if (tokens[k+1] == ">=")
1554 result = valA >= valB;
1555 else if (tokens[k+1] == "==")
1556 result = valA == valB;
1557 else if (tokens[k+1] == "!=")
1558 result = valA != valB;
1559 else if (tokens[k+1] == "||")
1560 result = valA || valB;
1561 else if (tokens[k+1] == "&&")
1562 result = valA && valB;
1563 char sResult[30];
1564 sprintf(sResult, "%d", result);
1565 std::vector<std::string>::iterator itInsert =
1566 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1567 tokens.insert(itInsert, sResult);
1568 } else {
1569 k++;
1575 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1576 // Break into tokens
1577 std::vector<std::string> tokens;
1578 const char *cp = expr.c_str();
1579 while (*cp) {
1580 std::string word;
1581 if (setWord.Contains(static_cast<unsigned char>(*cp))) {
1582 // Identifiers and numbers
1583 while (setWord.Contains(static_cast<unsigned char>(*cp))) {
1584 word += *cp;
1585 cp++;
1587 } else if (IsSpaceOrTab(*cp)) {
1588 while (IsSpaceOrTab(*cp)) {
1589 word += *cp;
1590 cp++;
1592 } else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1593 word += *cp;
1594 cp++;
1595 if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1596 word += *cp;
1597 cp++;
1599 } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1600 word += *cp;
1601 cp++;
1602 if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1603 word += *cp;
1604 cp++;
1606 } else {
1607 // Should handle strings, characters, and comments here
1608 word += *cp;
1609 cp++;
1611 tokens.push_back(word);
1613 return tokens;
1616 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1617 std::vector<std::string> tokens = Tokenize(expr);
1619 EvaluateTokens(tokens, preprocessorDefinitions);
1621 // "0" or "" -> false else true
1622 bool isFalse = tokens.empty() ||
1623 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1624 return !isFalse;
1627 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1628 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);