Update Scintilla to version 3.6.5
[geany-mirror.git] / scintilla / lexers / LexCPP.cxx
blob4261084d47b3b86c4dfe1d1c9213579991803afe
1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
16 #include <string>
17 #include <vector>
18 #include <map>
19 #include <algorithm>
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
35 #ifdef SCI_NAMESPACE
36 using namespace Scintilla;
37 #endif
39 namespace {
40 // Use an unnamed namespace to protect the functions and classes from name conflicts
42 bool IsSpaceEquiv(int state) {
43 return (state <= SCE_C_COMMENTDOC) ||
44 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
45 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
46 (state == SCE_C_COMMENTDOCKEYWORDERROR);
49 // Preconditions: sc.currentPos points to a character after '+' or '-'.
50 // The test for pos reaching 0 should be redundant,
51 // and is in only for safety measures.
52 // Limitation: this code will give the incorrect answer for code like
53 // a = b+++/ptn/...
54 // Putting a space between the '++' post-inc operator and the '+' binary op
55 // fixes this, and is highly recommended for readability anyway.
56 bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
57 Sci_Position pos = (Sci_Position) sc.currentPos;
58 while (--pos > 0) {
59 char ch = styler[pos];
60 if (ch == '+' || ch == '-') {
61 return styler[pos - 1] == ch;
64 return false;
67 bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
68 // Don't look at styles, so no need to flush.
69 Sci_Position pos = (Sci_Position) sc.currentPos;
70 Sci_Position currentLine = styler.GetLine(pos);
71 Sci_Position lineStartPos = styler.LineStart(currentLine);
72 while (--pos > lineStartPos) {
73 char ch = styler.SafeGetCharAt(pos);
74 if (ch != ' ' && ch != '\t') {
75 break;
78 const char *retBack = "nruter";
79 const char *s = retBack;
80 while (*s
81 && pos >= lineStartPos
82 && styler.SafeGetCharAt(pos) == *s) {
83 s++;
84 pos--;
86 return !*s;
89 bool IsSpaceOrTab(int ch) {
90 return ch == ' ' || ch == '\t';
93 bool OnlySpaceOrTab(const std::string &s) {
94 for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
95 if (!IsSpaceOrTab(*it))
96 return false;
98 return true;
101 std::vector<std::string> StringSplit(const std::string &text, int separator) {
102 std::vector<std::string> vs(text.empty() ? 0 : 1);
103 for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) {
104 if (*it == separator) {
105 vs.push_back(std::string());
106 } else {
107 vs.back() += *it;
110 return vs;
113 struct BracketPair {
114 std::vector<std::string>::iterator itBracket;
115 std::vector<std::string>::iterator itEndBracket;
118 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
119 BracketPair bp;
120 std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
121 bp.itBracket = tokens.end();
122 bp.itEndBracket = tokens.end();
123 if (itTok != tokens.end()) {
124 bp.itBracket = itTok;
125 size_t nest = 0;
126 while (itTok != tokens.end()) {
127 if (*itTok == "(") {
128 nest++;
129 } else if (*itTok == ")") {
130 nest--;
131 if (nest == 0) {
132 bp.itEndBracket = itTok;
133 return bp;
136 ++itTok;
139 bp.itBracket = tokens.end();
140 return bp;
143 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
144 int activity, WordList &markerList, bool caseSensitive){
145 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
146 const int lengthMarker = 50;
147 char marker[lengthMarker+1];
148 Sci_Position currPos = (Sci_Position) sc.currentPos;
149 int i = 0;
150 while (i < lengthMarker) {
151 char ch = styler.SafeGetCharAt(currPos + i);
152 if (IsASpace(ch) || isoperator(ch)) {
153 break;
155 if (caseSensitive)
156 marker[i] = ch;
157 else
158 marker[i] = static_cast<char>(tolower(ch));
159 i++;
161 marker[i] = '\0';
162 if (markerList.InList(marker)) {
163 sc.SetState(SCE_C_TASKMARKER|activity);
168 struct EscapeSequence {
169 int digitsLeft;
170 CharacterSet setHexDigits;
171 CharacterSet setOctDigits;
172 CharacterSet setNoneNumeric;
173 CharacterSet *escapeSetValid;
174 EscapeSequence() {
175 digitsLeft = 0;
176 escapeSetValid = 0;
177 setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
178 setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
180 void resetEscapeState(int nextChar) {
181 digitsLeft = 0;
182 escapeSetValid = &setNoneNumeric;
183 if (nextChar == 'U') {
184 digitsLeft = 9;
185 escapeSetValid = &setHexDigits;
186 } else if (nextChar == 'u') {
187 digitsLeft = 5;
188 escapeSetValid = &setHexDigits;
189 } else if (nextChar == 'x') {
190 digitsLeft = 5;
191 escapeSetValid = &setHexDigits;
192 } else if (setOctDigits.Contains(nextChar)) {
193 digitsLeft = 3;
194 escapeSetValid = &setOctDigits;
197 bool atEscapeEnd(int currChar) const {
198 return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
202 std::string GetRestOfLine(LexAccessor &styler, Sci_Position start, bool allowSpace) {
203 std::string restOfLine;
204 Sci_Position i =0;
205 char ch = styler.SafeGetCharAt(start, '\n');
206 Sci_Position endLine = styler.LineEnd(styler.GetLine(start));
207 while (((start+i) < endLine) && (ch != '\r')) {
208 char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
209 if (ch == '/' && (chNext == '/' || chNext == '*'))
210 break;
211 if (allowSpace || (ch != ' '))
212 restOfLine += ch;
213 i++;
214 ch = chNext;
216 return restOfLine;
219 bool IsStreamCommentStyle(int style) {
220 return style == SCE_C_COMMENT ||
221 style == SCE_C_COMMENTDOC ||
222 style == SCE_C_COMMENTDOCKEYWORD ||
223 style == SCE_C_COMMENTDOCKEYWORDERROR;
226 struct PPDefinition {
227 Sci_Position line;
228 std::string key;
229 std::string value;
230 bool isUndef;
231 std::string arguments;
232 PPDefinition(Sci_Position line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") :
233 line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
237 class LinePPState {
238 int state;
239 int ifTaken;
240 int level;
241 bool ValidLevel() const {
242 return level >= 0 && level < 32;
244 int maskLevel() const {
245 return 1 << level;
247 public:
248 LinePPState() : state(0), ifTaken(0), level(-1) {
250 bool IsInactive() const {
251 return state != 0;
253 bool CurrentIfTaken() const {
254 return (ifTaken & maskLevel()) != 0;
256 void StartSection(bool on) {
257 level++;
258 if (ValidLevel()) {
259 if (on) {
260 state &= ~maskLevel();
261 ifTaken |= maskLevel();
262 } else {
263 state |= maskLevel();
264 ifTaken &= ~maskLevel();
268 void EndSection() {
269 if (ValidLevel()) {
270 state &= ~maskLevel();
271 ifTaken &= ~maskLevel();
273 level--;
275 void InvertCurrentLevel() {
276 if (ValidLevel()) {
277 state ^= maskLevel();
278 ifTaken |= maskLevel();
283 // Hold the preprocessor state for each line seen.
284 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
285 class PPStates {
286 std::vector<LinePPState> vlls;
287 public:
288 LinePPState ForLine(Sci_Position line) const {
289 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
290 return vlls[line];
291 } else {
292 return LinePPState();
295 void Add(Sci_Position line, LinePPState lls) {
296 vlls.resize(line+1);
297 vlls[line] = lls;
301 // An individual named option for use in an OptionSet
303 // Options used for LexerCPP
304 struct OptionsCPP {
305 bool stylingWithinPreprocessor;
306 bool identifiersAllowDollars;
307 bool trackPreprocessor;
308 bool updatePreprocessor;
309 bool verbatimStringsAllowEscapes;
310 bool triplequotedStrings;
311 bool hashquotedStrings;
312 bool backQuotedStrings;
313 bool escapeSequence;
314 bool fold;
315 bool foldSyntaxBased;
316 bool foldComment;
317 bool foldCommentMultiline;
318 bool foldCommentExplicit;
319 std::string foldExplicitStart;
320 std::string foldExplicitEnd;
321 bool foldExplicitAnywhere;
322 bool foldPreprocessor;
323 bool foldCompact;
324 bool foldAtElse;
325 OptionsCPP() {
326 stylingWithinPreprocessor = false;
327 identifiersAllowDollars = true;
328 trackPreprocessor = true;
329 updatePreprocessor = true;
330 verbatimStringsAllowEscapes = false;
331 triplequotedStrings = false;
332 hashquotedStrings = false;
333 backQuotedStrings = false;
334 escapeSequence = false;
335 fold = false;
336 foldSyntaxBased = true;
337 foldComment = false;
338 foldCommentMultiline = true;
339 foldCommentExplicit = true;
340 foldExplicitStart = "";
341 foldExplicitEnd = "";
342 foldExplicitAnywhere = false;
343 foldPreprocessor = false;
344 foldCompact = false;
345 foldAtElse = false;
349 const char *const cppWordLists[] = {
350 "Primary keywords and identifiers",
351 "Secondary keywords and identifiers",
352 "Documentation comment keywords",
353 "Global classes and typedefs",
354 "Preprocessor definitions",
355 "Task marker and error marker keywords",
359 struct OptionSetCPP : public OptionSet<OptionsCPP> {
360 OptionSetCPP() {
361 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
362 "For C++ code, determines whether all preprocessor code is styled in the "
363 "preprocessor style (0, the default) or only from the initial # to the end "
364 "of the command word(1).");
366 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
367 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
369 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
370 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
372 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
373 "Set to 1 to update preprocessor definitions when #define found.");
375 DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
376 "Set to 1 to allow verbatim strings to contain escape sequences.");
378 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
379 "Set to 1 to enable highlighting of triple-quoted strings.");
381 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
382 "Set to 1 to enable highlighting of hash-quoted strings.");
384 DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
385 "Set to 1 to enable highlighting of back-quoted raw strings .");
387 DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
388 "Set to 1 to enable highlighting of escape sequences in strings");
390 DefineProperty("fold", &OptionsCPP::fold);
392 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
393 "Set this property to 0 to disable syntax based folding.");
395 DefineProperty("fold.comment", &OptionsCPP::foldComment,
396 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
397 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
398 "at the end of a section that should fold.");
400 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
401 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
403 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
404 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
406 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
407 "The string to use for explicit fold start points, replacing the standard //{.");
409 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
410 "The string to use for explicit fold end points, replacing the standard //}.");
412 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
413 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
415 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
416 "This option enables folding preprocessor directives when using the C++ lexer. "
417 "Includes C#'s explicit #region and #endregion folding directives.");
419 DefineProperty("fold.compact", &OptionsCPP::foldCompact);
421 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
422 "This option enables C++ folding on a \"} else {\" line of an if statement.");
424 DefineWordListSets(cppWordLists);
428 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
432 class LexerCPP : public ILexerWithSubStyles {
433 bool caseSensitive;
434 CharacterSet setWord;
435 CharacterSet setNegationOp;
436 CharacterSet setArithmethicOp;
437 CharacterSet setRelOp;
438 CharacterSet setLogicalOp;
439 CharacterSet setWordStart;
440 PPStates vlls;
441 std::vector<PPDefinition> ppDefineHistory;
442 WordList keywords;
443 WordList keywords2;
444 WordList keywords3;
445 WordList keywords4;
446 WordList ppDefinitions;
447 WordList markerList;
448 struct SymbolValue {
449 std::string value;
450 std::string arguments;
451 SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
453 SymbolValue &operator = (const std::string &value_) {
454 value = value_;
455 arguments.clear();
456 return *this;
458 bool IsMacro() const {
459 return !arguments.empty();
462 typedef std::map<std::string, SymbolValue> SymbolTable;
463 SymbolTable preprocessorDefinitionsStart;
464 OptionsCPP options;
465 OptionSetCPP osCPP;
466 EscapeSequence escapeSeq;
467 SparseState<std::string> rawStringTerminators;
468 enum { activeFlag = 0x40 };
469 enum { ssIdentifier, ssDocKeyword };
470 SubStyles subStyles;
471 public:
472 explicit LexerCPP(bool caseSensitive_) :
473 caseSensitive(caseSensitive_),
474 setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
475 setNegationOp(CharacterSet::setNone, "!"),
476 setArithmethicOp(CharacterSet::setNone, "+-/*%"),
477 setRelOp(CharacterSet::setNone, "=!<>"),
478 setLogicalOp(CharacterSet::setNone, "|&"),
479 subStyles(styleSubable, 0x80, 0x40, activeFlag) {
481 virtual ~LexerCPP() {
483 void SCI_METHOD Release() {
484 delete this;
486 int SCI_METHOD Version() const {
487 return lvSubStyles;
489 const char * SCI_METHOD PropertyNames() {
490 return osCPP.PropertyNames();
492 int SCI_METHOD PropertyType(const char *name) {
493 return osCPP.PropertyType(name);
495 const char * SCI_METHOD DescribeProperty(const char *name) {
496 return osCPP.DescribeProperty(name);
498 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
499 const char * SCI_METHOD DescribeWordListSets() {
500 return osCPP.DescribeWordListSets();
502 Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
503 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
504 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
506 void * SCI_METHOD PrivateCall(int, void *) {
507 return 0;
510 int SCI_METHOD LineEndTypesSupported() {
511 return SC_LINE_END_TYPE_UNICODE;
514 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) {
515 return subStyles.Allocate(styleBase, numberStyles);
517 int SCI_METHOD SubStylesStart(int styleBase) {
518 return subStyles.Start(styleBase);
520 int SCI_METHOD SubStylesLength(int styleBase) {
521 return subStyles.Length(styleBase);
523 int SCI_METHOD StyleFromSubStyle(int subStyle) {
524 int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
525 int active = subStyle & activeFlag;
526 return styleBase | active;
528 int SCI_METHOD PrimaryStyleFromStyle(int style) {
529 return MaskActive(style);
531 void SCI_METHOD FreeSubStyles() {
532 subStyles.Free();
534 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) {
535 subStyles.SetIdentifiers(style, identifiers);
537 int SCI_METHOD DistanceToSecondaryStyles() {
538 return activeFlag;
540 const char * SCI_METHOD GetSubStyleBases() {
541 return styleSubable;
544 static ILexer *LexerFactoryCPP() {
545 return new LexerCPP(true);
547 static ILexer *LexerFactoryCPPInsensitive() {
548 return new LexerCPP(false);
550 static int MaskActive(int style) {
551 return style & ~activeFlag;
553 void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
554 std::vector<std::string> Tokenize(const std::string &expr) const;
555 bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
558 Sci_Position SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
559 if (osCPP.PropertySet(&options, key, val)) {
560 if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
561 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
562 if (options.identifiersAllowDollars) {
563 setWord.Add('$');
566 return 0;
568 return -1;
571 Sci_Position SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
572 WordList *wordListN = 0;
573 switch (n) {
574 case 0:
575 wordListN = &keywords;
576 break;
577 case 1:
578 wordListN = &keywords2;
579 break;
580 case 2:
581 wordListN = &keywords3;
582 break;
583 case 3:
584 wordListN = &keywords4;
585 break;
586 case 4:
587 wordListN = &ppDefinitions;
588 break;
589 case 5:
590 wordListN = &markerList;
591 break;
593 Sci_Position firstModification = -1;
594 if (wordListN) {
595 WordList wlNew;
596 wlNew.Set(wl);
597 if (*wordListN != wlNew) {
598 wordListN->Set(wl);
599 firstModification = 0;
600 if (n == 4) {
601 // Rebuild preprocessorDefinitions
602 preprocessorDefinitionsStart.clear();
603 for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
604 const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
605 const char *cpEquals = strchr(cpDefinition, '=');
606 if (cpEquals) {
607 std::string name(cpDefinition, cpEquals - cpDefinition);
608 std::string val(cpEquals+1);
609 size_t bracket = name.find('(');
610 size_t bracketEnd = name.find(')');
611 if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
612 // Macro
613 std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
614 name = name.substr(0, bracket);
615 preprocessorDefinitionsStart[name] = SymbolValue(val, args);
616 } else {
617 preprocessorDefinitionsStart[name] = val;
619 } else {
620 std::string name(cpDefinition);
621 std::string val("1");
622 preprocessorDefinitionsStart[name] = val;
628 return firstModification;
631 // Functor used to truncate history
632 struct After {
633 Sci_Position line;
634 explicit After(Sci_Position line_) : line(line_) {}
635 bool operator()(PPDefinition &p) const {
636 return p.line > line;
640 void SCI_METHOD LexerCPP::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
641 LexAccessor styler(pAccess);
643 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
644 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
646 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
648 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
650 CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
652 if (options.identifiersAllowDollars) {
653 setWordStart.Add('$');
656 int chPrevNonWhite = ' ';
657 int visibleChars = 0;
658 bool lastWordWasUUID = false;
659 int styleBeforeDCKeyword = SCE_C_DEFAULT;
660 int styleBeforeTaskMarker = SCE_C_DEFAULT;
661 bool continuationLine = false;
662 bool isIncludePreprocessor = false;
663 bool isStringInPreprocessor = false;
664 bool inRERange = false;
665 bool seenDocKeyBrace = false;
667 Sci_Position lineCurrent = styler.GetLine(startPos);
668 if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
669 (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
670 (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
671 // Set continuationLine if last character of previous line is '\'
672 if (lineCurrent > 0) {
673 Sci_Position endLinePrevious = styler.LineEnd(lineCurrent - 1);
674 if (endLinePrevious > 0) {
675 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
680 // look back to set chPrevNonWhite properly for better regex colouring
681 if (startPos > 0) {
682 Sci_Position back = startPos;
683 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
685 if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
686 chPrevNonWhite = styler.SafeGetCharAt(back);
690 StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
691 LinePPState preproc = vlls.ForLine(lineCurrent);
693 bool definitionsChanged = false;
695 // Truncate ppDefineHistory before current line
697 if (!options.updatePreprocessor)
698 ppDefineHistory.clear();
700 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
701 if (itInvalid != ppDefineHistory.end()) {
702 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
703 definitionsChanged = true;
706 SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
707 for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
708 if (itDef->isUndef)
709 preprocessorDefinitions.erase(itDef->key);
710 else
711 preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments);
714 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
715 SparseState<std::string> rawSTNew(lineCurrent);
717 int activitySet = preproc.IsInactive() ? activeFlag : 0;
719 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
720 const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
722 Sci_Position lineEndNext = styler.LineEnd(lineCurrent);
724 for (; sc.More();) {
726 if (sc.atLineStart) {
727 // Using MaskActive() is not needed in the following statement.
728 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
729 if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
730 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
731 // ends with a line continuation by locking in the state up to this position.
732 sc.SetState(sc.state);
734 if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
735 sc.SetState(SCE_C_DEFAULT|activitySet);
737 // Reset states to beginning of colourise so no surprises
738 // if different sets of lines lexed.
739 visibleChars = 0;
740 lastWordWasUUID = false;
741 isIncludePreprocessor = false;
742 inRERange = false;
743 if (preproc.IsInactive()) {
744 activitySet = activeFlag;
745 sc.SetState(sc.state | activitySet);
749 if (sc.atLineEnd) {
750 lineCurrent++;
751 lineEndNext = styler.LineEnd(lineCurrent);
752 vlls.Add(lineCurrent, preproc);
753 if (rawStringTerminator != "") {
754 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
758 // Handle line continuation generically.
759 if (sc.ch == '\\') {
760 if (static_cast<Sci_Position>((sc.currentPos+1)) >= lineEndNext) {
761 lineCurrent++;
762 lineEndNext = styler.LineEnd(lineCurrent);
763 vlls.Add(lineCurrent, preproc);
764 if (rawStringTerminator != "") {
765 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
767 sc.Forward();
768 if (sc.ch == '\r' && sc.chNext == '\n') {
769 // Even in UTF-8, \r and \n are separate
770 sc.Forward();
772 continuationLine = true;
773 sc.Forward();
774 continue;
778 const bool atLineEndBeforeSwitch = sc.atLineEnd;
780 // Determine if the current state should terminate.
781 switch (MaskActive(sc.state)) {
782 case SCE_C_OPERATOR:
783 sc.SetState(SCE_C_DEFAULT|activitySet);
784 break;
785 case SCE_C_NUMBER:
786 // We accept almost anything because of hex. and number suffixes
787 if (sc.ch == '_') {
788 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
789 } else if (!(setWord.Contains(sc.ch)
790 || (sc.ch == '\'')
791 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
792 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
793 sc.SetState(SCE_C_DEFAULT|activitySet);
795 break;
796 case SCE_C_USERLITERAL:
797 if (!(setWord.Contains(sc.ch)))
798 sc.SetState(SCE_C_DEFAULT|activitySet);
799 break;
800 case SCE_C_IDENTIFIER:
801 if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
802 char s[1000];
803 if (caseSensitive) {
804 sc.GetCurrent(s, sizeof(s));
805 } else {
806 sc.GetCurrentLowered(s, sizeof(s));
808 if (keywords.InList(s)) {
809 lastWordWasUUID = strcmp(s, "uuid") == 0;
810 sc.ChangeState(SCE_C_WORD|activitySet);
811 } else if (keywords2.InList(s)) {
812 sc.ChangeState(SCE_C_WORD2|activitySet);
813 } else if (keywords4.InList(s)) {
814 sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
815 } else {
816 int subStyle = classifierIdentifiers.ValueFor(s);
817 if (subStyle >= 0) {
818 sc.ChangeState(subStyle|activitySet);
821 const bool literalString = sc.ch == '\"';
822 if (literalString || sc.ch == '\'') {
823 size_t lenS = strlen(s);
824 const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
825 if (raw)
826 s[lenS--] = '\0';
827 bool valid =
828 (lenS == 0) ||
829 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
830 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
831 if (valid) {
832 if (literalString) {
833 if (raw) {
834 // Set the style of the string prefix to SCE_C_STRINGRAW but then change to
835 // SCE_C_DEFAULT as that allows the raw string start code to run.
836 sc.ChangeState(SCE_C_STRINGRAW|activitySet);
837 sc.SetState(SCE_C_DEFAULT|activitySet);
838 } else {
839 sc.ChangeState(SCE_C_STRING|activitySet);
841 } else {
842 sc.ChangeState(SCE_C_CHARACTER|activitySet);
844 } else {
845 sc.SetState(SCE_C_DEFAULT | activitySet);
847 } else {
848 sc.SetState(SCE_C_DEFAULT|activitySet);
851 break;
852 case SCE_C_PREPROCESSOR:
853 if (options.stylingWithinPreprocessor) {
854 if (IsASpace(sc.ch)) {
855 sc.SetState(SCE_C_DEFAULT|activitySet);
857 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
858 isStringInPreprocessor = false;
859 } else if (!isStringInPreprocessor) {
860 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
861 isStringInPreprocessor = true;
862 } else if (sc.Match('/', '*')) {
863 if (sc.Match("/**") || sc.Match("/*!")) {
864 sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
865 } else {
866 sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
868 sc.Forward(); // Eat the *
869 } else if (sc.Match('/', '/')) {
870 sc.SetState(SCE_C_DEFAULT|activitySet);
873 break;
874 case SCE_C_PREPROCESSORCOMMENT:
875 case SCE_C_PREPROCESSORCOMMENTDOC:
876 if (sc.Match('*', '/')) {
877 sc.Forward();
878 sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
879 continue; // Without advancing in case of '\'.
881 break;
882 case SCE_C_COMMENT:
883 if (sc.Match('*', '/')) {
884 sc.Forward();
885 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
886 } else {
887 styleBeforeTaskMarker = SCE_C_COMMENT;
888 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
890 break;
891 case SCE_C_COMMENTDOC:
892 if (sc.Match('*', '/')) {
893 sc.Forward();
894 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
895 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
896 // Verify that we have the conditions to mark a comment-doc-keyword
897 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
898 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
899 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
902 break;
903 case SCE_C_COMMENTLINE:
904 if (sc.atLineStart && !continuationLine) {
905 sc.SetState(SCE_C_DEFAULT|activitySet);
906 } else {
907 styleBeforeTaskMarker = SCE_C_COMMENTLINE;
908 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
910 break;
911 case SCE_C_COMMENTLINEDOC:
912 if (sc.atLineStart && !continuationLine) {
913 sc.SetState(SCE_C_DEFAULT|activitySet);
914 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
915 // Verify that we have the conditions to mark a comment-doc-keyword
916 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
917 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
918 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
921 break;
922 case SCE_C_COMMENTDOCKEYWORD:
923 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
924 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
925 sc.Forward();
926 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
927 seenDocKeyBrace = false;
928 } else if (sc.ch == '[' || sc.ch == '{') {
929 seenDocKeyBrace = true;
930 } else if (!setDoxygen.Contains(sc.ch)
931 && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
932 char s[100];
933 if (caseSensitive) {
934 sc.GetCurrent(s, sizeof(s));
935 } else {
936 sc.GetCurrentLowered(s, sizeof(s));
938 if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
939 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
940 } else if (!keywords3.InList(s + 1)) {
941 int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
942 if (subStyleCDKW >= 0) {
943 sc.ChangeState(subStyleCDKW|activitySet);
944 } else {
945 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
948 sc.SetState(styleBeforeDCKeyword|activitySet);
949 seenDocKeyBrace = false;
951 break;
952 case SCE_C_STRING:
953 if (sc.atLineEnd) {
954 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
955 } else if (isIncludePreprocessor) {
956 if (sc.ch == '>') {
957 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
958 isIncludePreprocessor = false;
960 } else if (sc.ch == '\\') {
961 if (options.escapeSequence) {
962 sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
963 escapeSeq.resetEscapeState(sc.chNext);
965 sc.Forward(); // Skip all characters after the backslash
966 } else if (sc.ch == '\"') {
967 if (sc.chNext == '_') {
968 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
969 } else {
970 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
973 break;
974 case SCE_C_ESCAPESEQUENCE:
975 escapeSeq.digitsLeft--;
976 if (!escapeSeq.atEscapeEnd(sc.ch)) {
977 break;
979 if (sc.ch == '"') {
980 sc.SetState(SCE_C_STRING|activitySet);
981 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
982 } else if (sc.ch == '\\') {
983 escapeSeq.resetEscapeState(sc.chNext);
984 sc.Forward();
985 } else {
986 sc.SetState(SCE_C_STRING|activitySet);
987 if (sc.atLineEnd) {
988 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
991 break;
992 case SCE_C_HASHQUOTEDSTRING:
993 if (sc.ch == '\\') {
994 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
995 sc.Forward();
997 } else if (sc.ch == '\"') {
998 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1000 break;
1001 case SCE_C_STRINGRAW:
1002 if (sc.Match(rawStringTerminator.c_str())) {
1003 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1004 sc.Forward();
1005 sc.SetState(SCE_C_DEFAULT|activitySet);
1006 rawStringTerminator = "";
1008 break;
1009 case SCE_C_CHARACTER:
1010 if (sc.atLineEnd) {
1011 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1012 } else if (sc.ch == '\\') {
1013 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1014 sc.Forward();
1016 } else if (sc.ch == '\'') {
1017 if (sc.chNext == '_') {
1018 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1019 } else {
1020 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1023 break;
1024 case SCE_C_REGEX:
1025 if (sc.atLineStart) {
1026 sc.SetState(SCE_C_DEFAULT|activitySet);
1027 } else if (! inRERange && sc.ch == '/') {
1028 sc.Forward();
1029 while ((sc.ch < 0x80) && islower(sc.ch))
1030 sc.Forward(); // gobble regex flags
1031 sc.SetState(SCE_C_DEFAULT|activitySet);
1032 } else if (sc.ch == '\\' && (static_cast<Sci_Position>(sc.currentPos+1) < lineEndNext)) {
1033 // Gobble up the escaped character
1034 sc.Forward();
1035 } else if (sc.ch == '[') {
1036 inRERange = true;
1037 } else if (sc.ch == ']') {
1038 inRERange = false;
1040 break;
1041 case SCE_C_STRINGEOL:
1042 if (sc.atLineStart) {
1043 sc.SetState(SCE_C_DEFAULT|activitySet);
1045 break;
1046 case SCE_C_VERBATIM:
1047 if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1048 sc.Forward(); // Skip all characters after the backslash
1049 } else if (sc.ch == '\"') {
1050 if (sc.chNext == '\"') {
1051 sc.Forward();
1052 } else {
1053 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1056 break;
1057 case SCE_C_TRIPLEVERBATIM:
1058 if (sc.Match("\"\"\"")) {
1059 while (sc.Match('"')) {
1060 sc.Forward();
1062 sc.SetState(SCE_C_DEFAULT|activitySet);
1064 break;
1065 case SCE_C_UUID:
1066 if (sc.atLineEnd || sc.ch == ')') {
1067 sc.SetState(SCE_C_DEFAULT|activitySet);
1069 break;
1070 case SCE_C_TASKMARKER:
1071 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1072 sc.SetState(styleBeforeTaskMarker|activitySet);
1073 styleBeforeTaskMarker = SCE_C_DEFAULT;
1077 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1078 // State exit processing consumed characters up to end of line.
1079 lineCurrent++;
1080 lineEndNext = styler.LineEnd(lineCurrent);
1081 vlls.Add(lineCurrent, preproc);
1084 // Determine if a new state should be entered.
1085 if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1086 if (sc.Match('@', '\"')) {
1087 sc.SetState(SCE_C_VERBATIM|activitySet);
1088 sc.Forward();
1089 } else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
1090 sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1091 sc.Forward(2);
1092 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1093 sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1094 sc.Forward();
1095 } else if (options.backQuotedStrings && sc.Match('`')) {
1096 sc.SetState(SCE_C_STRINGRAW|activitySet);
1097 rawStringTerminator = "`";
1098 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1099 if (lastWordWasUUID) {
1100 sc.SetState(SCE_C_UUID|activitySet);
1101 lastWordWasUUID = false;
1102 } else {
1103 sc.SetState(SCE_C_NUMBER|activitySet);
1105 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1106 if (lastWordWasUUID) {
1107 sc.SetState(SCE_C_UUID|activitySet);
1108 lastWordWasUUID = false;
1109 } else {
1110 sc.SetState(SCE_C_IDENTIFIER|activitySet);
1112 } else if (sc.Match('/', '*')) {
1113 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
1114 sc.SetState(SCE_C_COMMENTDOC|activitySet);
1115 } else {
1116 sc.SetState(SCE_C_COMMENT|activitySet);
1118 sc.Forward(); // Eat the * so it isn't used for the end of the comment
1119 } else if (sc.Match('/', '/')) {
1120 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1121 // Support of Qt/Doxygen doc. style
1122 sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1123 else
1124 sc.SetState(SCE_C_COMMENTLINE|activitySet);
1125 } else if (sc.ch == '/'
1126 && (setOKBeforeRE.Contains(chPrevNonWhite)
1127 || followsReturnKeyword(sc, styler))
1128 && (!setCouldBePostOp.Contains(chPrevNonWhite)
1129 || !FollowsPostfixOperator(sc, styler))) {
1130 sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
1131 inRERange = false;
1132 } else if (sc.ch == '\"') {
1133 if (sc.chPrev == 'R') {
1134 styler.Flush();
1135 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1136 sc.SetState(SCE_C_STRINGRAW|activitySet);
1137 rawStringTerminator = ")";
1138 for (Sci_Position termPos = sc.currentPos + 1;; termPos++) {
1139 char chTerminator = styler.SafeGetCharAt(termPos, '(');
1140 if (chTerminator == '(')
1141 break;
1142 rawStringTerminator += chTerminator;
1144 rawStringTerminator += '\"';
1145 } else {
1146 sc.SetState(SCE_C_STRING|activitySet);
1148 } else {
1149 sc.SetState(SCE_C_STRING|activitySet);
1151 isIncludePreprocessor = false; // ensure that '>' won't end the string
1152 } else if (isIncludePreprocessor && sc.ch == '<') {
1153 sc.SetState(SCE_C_STRING|activitySet);
1154 } else if (sc.ch == '\'') {
1155 sc.SetState(SCE_C_CHARACTER|activitySet);
1156 } else if (sc.ch == '#' && visibleChars == 0) {
1157 // Preprocessor commands are alone on their line
1158 sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1159 // Skip whitespace between # and preprocessor word
1160 do {
1161 sc.Forward();
1162 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1163 if (sc.atLineEnd) {
1164 sc.SetState(SCE_C_DEFAULT|activitySet);
1165 } else if (sc.Match("include")) {
1166 isIncludePreprocessor = true;
1167 } else {
1168 if (options.trackPreprocessor) {
1169 if (sc.Match("ifdef") || sc.Match("ifndef")) {
1170 bool isIfDef = sc.Match("ifdef");
1171 int i = isIfDef ? 5 : 6;
1172 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
1173 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1174 preproc.StartSection(isIfDef == foundDef);
1175 } else if (sc.Match("if")) {
1176 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1177 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1178 preproc.StartSection(ifGood);
1179 } else if (sc.Match("else")) {
1180 if (!preproc.CurrentIfTaken()) {
1181 preproc.InvertCurrentLevel();
1182 activitySet = preproc.IsInactive() ? activeFlag : 0;
1183 if (!activitySet)
1184 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1185 } else if (!preproc.IsInactive()) {
1186 preproc.InvertCurrentLevel();
1187 activitySet = preproc.IsInactive() ? activeFlag : 0;
1188 if (!activitySet)
1189 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1191 } else if (sc.Match("elif")) {
1192 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1193 if (!preproc.CurrentIfTaken()) {
1194 // Similar to #if
1195 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1196 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1197 if (ifGood) {
1198 preproc.InvertCurrentLevel();
1199 activitySet = preproc.IsInactive() ? activeFlag : 0;
1200 if (!activitySet)
1201 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1203 } else if (!preproc.IsInactive()) {
1204 preproc.InvertCurrentLevel();
1205 activitySet = preproc.IsInactive() ? activeFlag : 0;
1206 if (!activitySet)
1207 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1209 } else if (sc.Match("endif")) {
1210 preproc.EndSection();
1211 activitySet = preproc.IsInactive() ? activeFlag : 0;
1212 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1213 } else if (sc.Match("define")) {
1214 if (options.updatePreprocessor && !preproc.IsInactive()) {
1215 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1216 size_t startName = 0;
1217 while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1218 startName++;
1219 size_t endName = startName;
1220 while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
1221 endName++;
1222 std::string key = restOfLine.substr(startName, endName-startName);
1223 if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1224 // Macro
1225 size_t endArgs = endName;
1226 while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1227 endArgs++;
1228 std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1229 size_t startValue = endArgs+1;
1230 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1231 startValue++;
1232 std::string value;
1233 if (startValue < restOfLine.length())
1234 value = restOfLine.substr(startValue);
1235 preprocessorDefinitions[key] = SymbolValue(value, args);
1236 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1237 definitionsChanged = true;
1238 } else {
1239 // Value
1240 size_t startValue = endName;
1241 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1242 startValue++;
1243 std::string value = restOfLine.substr(startValue);
1244 preprocessorDefinitions[key] = value;
1245 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1246 definitionsChanged = true;
1249 } else if (sc.Match("undef")) {
1250 if (options.updatePreprocessor && !preproc.IsInactive()) {
1251 const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
1252 std::vector<std::string> tokens = Tokenize(restOfLine);
1253 if (tokens.size() >= 1) {
1254 const std::string key = tokens[0];
1255 preprocessorDefinitions.erase(key);
1256 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1257 definitionsChanged = true;
1263 } else if (isoperator(sc.ch)) {
1264 sc.SetState(SCE_C_OPERATOR|activitySet);
1268 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1269 chPrevNonWhite = sc.ch;
1270 visibleChars++;
1272 continuationLine = false;
1273 sc.Forward();
1275 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1276 if (definitionsChanged || rawStringsChanged)
1277 styler.ChangeLexerState(startPos, startPos + length);
1278 sc.Complete();
1281 // Store both the current line's fold level and the next lines in the
1282 // level store to make it easy to pick up with each increment
1283 // and to make it possible to fiddle the current level for "} else {".
1285 void SCI_METHOD LexerCPP::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1287 if (!options.fold)
1288 return;
1290 LexAccessor styler(pAccess);
1292 Sci_PositionU endPos = startPos + length;
1293 int visibleChars = 0;
1294 bool inLineComment = false;
1295 Sci_Position lineCurrent = styler.GetLine(startPos);
1296 int levelCurrent = SC_FOLDLEVELBASE;
1297 if (lineCurrent > 0)
1298 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1299 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1300 int levelMinCurrent = levelCurrent;
1301 int levelNext = levelCurrent;
1302 char chNext = styler[startPos];
1303 int styleNext = MaskActive(styler.StyleAt(startPos));
1304 int style = MaskActive(initStyle);
1305 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1306 for (Sci_PositionU i = startPos; i < endPos; i++) {
1307 char ch = chNext;
1308 chNext = styler.SafeGetCharAt(i + 1);
1309 int stylePrev = style;
1310 style = styleNext;
1311 styleNext = MaskActive(styler.StyleAt(i + 1));
1312 bool atEOL = i == (lineStartNext-1);
1313 if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1314 inLineComment = true;
1315 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1316 if (!IsStreamCommentStyle(stylePrev)) {
1317 levelNext++;
1318 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1319 // Comments don't end at end of line and the next character may be unstyled.
1320 levelNext--;
1323 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1324 if (userDefinedFoldMarkers) {
1325 if (styler.Match(i, options.foldExplicitStart.c_str())) {
1326 levelNext++;
1327 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1328 levelNext--;
1330 } else {
1331 if ((ch == '/') && (chNext == '/')) {
1332 char chNext2 = styler.SafeGetCharAt(i + 2);
1333 if (chNext2 == '{') {
1334 levelNext++;
1335 } else if (chNext2 == '}') {
1336 levelNext--;
1341 if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1342 if (ch == '#') {
1343 Sci_PositionU j = i + 1;
1344 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1345 j++;
1347 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1348 levelNext++;
1349 } else if (styler.Match(j, "end")) {
1350 levelNext--;
1354 if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1355 if (ch == '{' || ch == '[' || ch == '(') {
1356 // Measure the minimum before a '{' to allow
1357 // folding on "} else {"
1358 if (levelMinCurrent > levelNext) {
1359 levelMinCurrent = levelNext;
1361 levelNext++;
1362 } else if (ch == '}' || ch == ']' || ch == ')') {
1363 levelNext--;
1366 if (!IsASpace(ch))
1367 visibleChars++;
1368 if (atEOL || (i == endPos-1)) {
1369 int levelUse = levelCurrent;
1370 if (options.foldSyntaxBased && options.foldAtElse) {
1371 levelUse = levelMinCurrent;
1373 int lev = levelUse | levelNext << 16;
1374 if (visibleChars == 0 && options.foldCompact)
1375 lev |= SC_FOLDLEVELWHITEFLAG;
1376 if (levelUse < levelNext)
1377 lev |= SC_FOLDLEVELHEADERFLAG;
1378 if (lev != styler.LevelAt(lineCurrent)) {
1379 styler.SetLevel(lineCurrent, lev);
1381 lineCurrent++;
1382 lineStartNext = styler.LineStart(lineCurrent+1);
1383 levelCurrent = levelNext;
1384 levelMinCurrent = levelCurrent;
1385 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
1386 // There is an empty line at end of file so give it same level and empty
1387 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1389 visibleChars = 0;
1390 inLineComment = false;
1395 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1397 // Remove whitespace tokens
1398 tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1400 // Evaluate defined statements to either 0 or 1
1401 for (size_t i=0; (i+1)<tokens.size();) {
1402 if (tokens[i] == "defined") {
1403 const char *val = "0";
1404 if (tokens[i+1] == "(") {
1405 if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1406 // defined()
1407 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1408 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1409 // defined(<identifier>)
1410 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1411 if (it != preprocessorDefinitions.end()) {
1412 val = "1";
1414 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1415 } else {
1416 // Spurious '(' so erase as more likely to result in false
1417 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1419 } else {
1420 // defined <identifier>
1421 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1422 if (it != preprocessorDefinitions.end()) {
1423 val = "1";
1426 tokens[i] = val;
1427 } else {
1428 i++;
1432 // Evaluate identifiers
1433 const size_t maxIterations = 100;
1434 size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
1435 for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1436 iterations++;
1437 if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
1438 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1439 if (it != preprocessorDefinitions.end()) {
1440 // Tokenize value
1441 std::vector<std::string> macroTokens = Tokenize(it->second.value);
1442 if (it->second.IsMacro()) {
1443 if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1444 // Create map of argument name to value
1445 std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1446 std::map<std::string, std::string> arguments;
1447 size_t arg = 0;
1448 size_t tok = i+2;
1449 while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1450 if (tokens.at(tok) != ",") {
1451 arguments[argumentNames.at(arg)] = tokens.at(tok);
1452 arg++;
1454 tok++;
1457 // Remove invocation
1458 tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1460 // Substitute values into macro
1461 macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1463 for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1464 if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
1465 std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1466 if (itFind != arguments.end()) {
1467 // TODO: Possible that value will be expression so should insert tokenized form
1468 macroTokens[iMacro] = itFind->second;
1471 iMacro++;
1474 // Insert results back into tokens
1475 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1477 } else {
1478 i++;
1480 } else {
1481 // Remove invocation
1482 tokens.erase(tokens.begin() + i);
1483 // Insert results back into tokens
1484 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1486 } else {
1487 // Identifier not found
1488 tokens.erase(tokens.begin() + i);
1490 } else {
1491 i++;
1495 // Find bracketed subexpressions and recurse on them
1496 BracketPair bracketPair = FindBracketPair(tokens);
1497 while (bracketPair.itBracket != tokens.end()) {
1498 std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1499 EvaluateTokens(inBracket, preprocessorDefinitions);
1501 // The insertion is done before the removal because there were failures with the opposite approach
1502 tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1504 bracketPair = FindBracketPair(tokens);
1505 tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1507 bracketPair = FindBracketPair(tokens);
1510 // Evaluate logical negations
1511 for (size_t j=0; (j+1)<tokens.size();) {
1512 if (setNegationOp.Contains(tokens[j][0])) {
1513 int isTrue = atoi(tokens[j+1].c_str());
1514 if (tokens[j] == "!")
1515 isTrue = !isTrue;
1516 std::vector<std::string>::iterator itInsert =
1517 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1518 tokens.insert(itInsert, isTrue ? "1" : "0");
1519 } else {
1520 j++;
1524 // Evaluate expressions in precedence order
1525 enum precedence { precArithmetic, precRelative, precLogical };
1526 for (int prec=precArithmetic; prec <= precLogical; prec++) {
1527 // Looking at 3 tokens at a time so end at 2 before end
1528 for (size_t k=0; (k+2)<tokens.size();) {
1529 char chOp = tokens[k+1][0];
1530 if (
1531 ((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1532 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1533 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1535 int valA = atoi(tokens[k].c_str());
1536 int valB = atoi(tokens[k+2].c_str());
1537 int result = 0;
1538 if (tokens[k+1] == "+")
1539 result = valA + valB;
1540 else if (tokens[k+1] == "-")
1541 result = valA - valB;
1542 else if (tokens[k+1] == "*")
1543 result = valA * valB;
1544 else if (tokens[k+1] == "/")
1545 result = valA / (valB ? valB : 1);
1546 else if (tokens[k+1] == "%")
1547 result = valA % (valB ? valB : 1);
1548 else if (tokens[k+1] == "<")
1549 result = valA < valB;
1550 else if (tokens[k+1] == "<=")
1551 result = valA <= valB;
1552 else if (tokens[k+1] == ">")
1553 result = valA > valB;
1554 else if (tokens[k+1] == ">=")
1555 result = valA >= valB;
1556 else if (tokens[k+1] == "==")
1557 result = valA == valB;
1558 else if (tokens[k+1] == "!=")
1559 result = valA != valB;
1560 else if (tokens[k+1] == "||")
1561 result = valA || valB;
1562 else if (tokens[k+1] == "&&")
1563 result = valA && valB;
1564 char sResult[30];
1565 sprintf(sResult, "%d", result);
1566 std::vector<std::string>::iterator itInsert =
1567 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1568 tokens.insert(itInsert, sResult);
1569 } else {
1570 k++;
1576 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1577 // Break into tokens
1578 std::vector<std::string> tokens;
1579 const char *cp = expr.c_str();
1580 while (*cp) {
1581 std::string word;
1582 if (setWord.Contains(static_cast<unsigned char>(*cp))) {
1583 // Identifiers and numbers
1584 while (setWord.Contains(static_cast<unsigned char>(*cp))) {
1585 word += *cp;
1586 cp++;
1588 } else if (IsSpaceOrTab(*cp)) {
1589 while (IsSpaceOrTab(*cp)) {
1590 word += *cp;
1591 cp++;
1593 } else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1594 word += *cp;
1595 cp++;
1596 if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1597 word += *cp;
1598 cp++;
1600 } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1601 word += *cp;
1602 cp++;
1603 if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1604 word += *cp;
1605 cp++;
1607 } else {
1608 // Should handle strings, characters, and comments here
1609 word += *cp;
1610 cp++;
1612 tokens.push_back(word);
1614 return tokens;
1617 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1618 std::vector<std::string> tokens = Tokenize(expr);
1620 EvaluateTokens(tokens, preprocessorDefinitions);
1622 // "0" or "" -> false else true
1623 bool isFalse = tokens.empty() ||
1624 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1625 return !isFalse;
1628 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1629 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);