Scintilla 4.0.3
[TortoiseGit.git] / ext / scintilla / lexers / LexJSON.cxx
blob0dc166069eb7d91278f5bca38849de2d2a52a36f
1 // Scintilla source code edit control
2 /**
3 * @file LexJSON.cxx
4 * @date February 19, 2016
5 * @brief Lexer for JSON and JSON-LD formats
6 * @author nkmathew
8 * The License.txt file describes the conditions under which this software may
9 * be distributed.
13 #include <cstdlib>
14 #include <cassert>
15 #include <cctype>
16 #include <cstdio>
17 #include <string>
18 #include <vector>
19 #include <map>
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "DefaultLexer.h"
32 using namespace Scintilla;
34 static const char *const JSONWordListDesc[] = {
35 "JSON Keywords",
36 "JSON-LD Keywords",
40 /**
41 * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
42 * colon separating the prefix and suffix
44 * https://www.w3.org/TR/json-ld/#dfn-compact-iri
46 struct CompactIRI {
47 int colonCount;
48 bool foundInvalidChar;
49 CharacterSet setCompactIRI;
50 CompactIRI() {
51 colonCount = 0;
52 foundInvalidChar = false;
53 setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
55 void resetState() {
56 colonCount = 0;
57 foundInvalidChar = false;
59 void checkChar(int ch) {
60 if (ch == ':') {
61 colonCount++;
62 } else {
63 foundInvalidChar |= !setCompactIRI.Contains(ch);
66 bool shouldHighlight() const {
67 return !foundInvalidChar && colonCount == 1;
71 /**
72 * Keeps track of escaped characters in strings as per:
74 * https://tools.ietf.org/html/rfc7159#section-7
76 struct EscapeSequence {
77 int digitsLeft;
78 CharacterSet setHexDigits;
79 CharacterSet setEscapeChars;
80 EscapeSequence() {
81 digitsLeft = 0;
82 setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
83 setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
85 // Returns true if the following character is a valid escaped character
86 bool newSequence(int nextChar) {
87 digitsLeft = 0;
88 if (nextChar == 'u') {
89 digitsLeft = 5;
90 } else if (!setEscapeChars.Contains(nextChar)) {
91 return false;
93 return true;
95 bool atEscapeEnd() const {
96 return digitsLeft <= 0;
98 bool isInvalidChar(int currChar) const {
99 return !setHexDigits.Contains(currChar);
103 struct OptionsJSON {
104 bool foldCompact;
105 bool fold;
106 bool allowComments;
107 bool escapeSequence;
108 OptionsJSON() {
109 foldCompact = false;
110 fold = false;
111 allowComments = false;
112 escapeSequence = false;
116 struct OptionSetJSON : public OptionSet<OptionsJSON> {
117 OptionSetJSON() {
118 DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
119 "Set to 1 to enable highlighting of escape sequences in strings");
121 DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
122 "Set to 1 to enable highlighting of line/block comments in JSON");
124 DefineProperty("fold.compact", &OptionsJSON::foldCompact);
125 DefineProperty("fold", &OptionsJSON::fold);
126 DefineWordListSets(JSONWordListDesc);
130 class LexerJSON : public DefaultLexer {
131 OptionsJSON options;
132 OptionSetJSON optSetJSON;
133 EscapeSequence escapeSeq;
134 WordList keywordsJSON;
135 WordList keywordsJSONLD;
136 CharacterSet setOperators;
137 CharacterSet setURL;
138 CharacterSet setKeywordJSONLD;
139 CharacterSet setKeywordJSON;
140 CompactIRI compactIRI;
142 static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
143 Sci_Position i = 0;
144 while (i < 50) {
145 i++;
146 char curr = styler.SafeGetCharAt(start+i, '\0');
147 char next = styler.SafeGetCharAt(start+i+1, '\0');
148 bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
149 if (curr == ch) {
150 return true;
151 } else if (!isspacechar(curr) || atEOL) {
152 return false;
155 return false;
159 * Looks for the colon following the end quote
161 * Assumes property names of lengths no longer than a 100 characters.
162 * The colon is also expected to be less than 50 spaces after the end
163 * quote for the string to be considered a property name
165 static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
166 Sci_Position i = 0;
167 bool escaped = false;
168 while (i < 100) {
169 i++;
170 char curr = styler.SafeGetCharAt(start+i, '\0');
171 if (escaped) {
172 escaped = false;
173 continue;
175 escaped = curr == '\\';
176 if (curr == '"') {
177 return IsNextNonWhitespace(styler, start+i, ':');
178 } else if (!curr) {
179 return false;
182 return false;
185 static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
186 StyleContext &context, LexAccessor &styler) {
187 char word[51];
188 Sci_Position currPos = (Sci_Position) context.currentPos;
189 int i = 0;
190 while (i < 50) {
191 char ch = styler.SafeGetCharAt(currPos + i);
192 if (!wordSet.Contains(ch)) {
193 break;
195 word[i] = ch;
196 i++;
198 word[i] = '\0';
199 return keywordList.InList(word);
202 public:
203 LexerJSON() :
204 setOperators(CharacterSet::setNone, "[{}]:,"),
205 setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
206 setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
207 setKeywordJSON(CharacterSet::setAlpha, "$_") {
209 virtual ~LexerJSON() {}
210 int SCI_METHOD Version() const override {
211 return lvRelease4;
213 void SCI_METHOD Release() override {
214 delete this;
216 const char *SCI_METHOD PropertyNames() override {
217 return optSetJSON.PropertyNames();
219 int SCI_METHOD PropertyType(const char *name) override {
220 return optSetJSON.PropertyType(name);
222 const char *SCI_METHOD DescribeProperty(const char *name) override {
223 return optSetJSON.DescribeProperty(name);
225 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override {
226 if (optSetJSON.PropertySet(&options, key, val)) {
227 return 0;
229 return -1;
231 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override {
232 WordList *wordListN = 0;
233 switch (n) {
234 case 0:
235 wordListN = &keywordsJSON;
236 break;
237 case 1:
238 wordListN = &keywordsJSONLD;
239 break;
241 Sci_Position firstModification = -1;
242 if (wordListN) {
243 WordList wlNew;
244 wlNew.Set(wl);
245 if (*wordListN != wlNew) {
246 wordListN->Set(wl);
247 firstModification = 0;
250 return firstModification;
252 void *SCI_METHOD PrivateCall(int, void *) override {
253 return 0;
255 static ILexer4 *LexerFactoryJSON() {
256 return new LexerJSON;
258 const char *SCI_METHOD DescribeWordListSets() override {
259 return optSetJSON.DescribeWordListSets();
261 void SCI_METHOD Lex(Sci_PositionU startPos,
262 Sci_Position length,
263 int initStyle,
264 IDocument *pAccess) override;
265 void SCI_METHOD Fold(Sci_PositionU startPos,
266 Sci_Position length,
267 int initStyle,
268 IDocument *pAccess) override;
271 void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
272 Sci_Position length,
273 int initStyle,
274 IDocument *pAccess) {
275 LexAccessor styler(pAccess);
276 StyleContext context(startPos, length, initStyle, styler);
277 int stringStyleBefore = SCE_JSON_STRING;
278 while (context.More()) {
279 switch (context.state) {
280 case SCE_JSON_BLOCKCOMMENT:
281 if (context.Match("*/")) {
282 context.Forward();
283 context.ForwardSetState(SCE_JSON_DEFAULT);
285 break;
286 case SCE_JSON_LINECOMMENT:
287 if (context.atLineEnd) {
288 context.SetState(SCE_JSON_DEFAULT);
290 break;
291 case SCE_JSON_STRINGEOL:
292 if (context.atLineStart) {
293 context.SetState(SCE_JSON_DEFAULT);
295 break;
296 case SCE_JSON_ESCAPESEQUENCE:
297 escapeSeq.digitsLeft--;
298 if (!escapeSeq.atEscapeEnd()) {
299 if (escapeSeq.isInvalidChar(context.ch)) {
300 context.SetState(SCE_JSON_ERROR);
302 break;
304 if (context.ch == '"') {
305 context.SetState(stringStyleBefore);
306 context.ForwardSetState(SCE_C_DEFAULT);
307 } else if (context.ch == '\\') {
308 if (!escapeSeq.newSequence(context.chNext)) {
309 context.SetState(SCE_JSON_ERROR);
311 context.Forward();
312 } else {
313 context.SetState(stringStyleBefore);
314 if (context.atLineEnd) {
315 context.ChangeState(SCE_JSON_STRINGEOL);
318 break;
319 case SCE_JSON_PROPERTYNAME:
320 case SCE_JSON_STRING:
321 if (context.ch == '"') {
322 if (compactIRI.shouldHighlight()) {
323 context.ChangeState(SCE_JSON_COMPACTIRI);
324 context.ForwardSetState(SCE_JSON_DEFAULT);
325 compactIRI.resetState();
326 } else {
327 context.ForwardSetState(SCE_JSON_DEFAULT);
329 } else if (context.atLineEnd) {
330 context.ChangeState(SCE_JSON_STRINGEOL);
331 } else if (context.ch == '\\') {
332 stringStyleBefore = context.state;
333 if (options.escapeSequence) {
334 context.SetState(SCE_JSON_ESCAPESEQUENCE);
335 if (!escapeSeq.newSequence(context.chNext)) {
336 context.SetState(SCE_JSON_ERROR);
339 context.Forward();
340 } else if (context.Match("https://") ||
341 context.Match("http://") ||
342 context.Match("ssh://") ||
343 context.Match("git://") ||
344 context.Match("svn://") ||
345 context.Match("ftp://") ||
346 context.Match("mailto:")) {
347 // Handle most common URI schemes only
348 stringStyleBefore = context.state;
349 context.SetState(SCE_JSON_URI);
350 } else if (context.ch == '@') {
351 // https://www.w3.org/TR/json-ld/#dfn-keyword
352 if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
353 stringStyleBefore = context.state;
354 context.SetState(SCE_JSON_LDKEYWORD);
356 } else {
357 compactIRI.checkChar(context.ch);
359 break;
360 case SCE_JSON_LDKEYWORD:
361 case SCE_JSON_URI:
362 if ((!setKeywordJSONLD.Contains(context.ch) &&
363 (context.state == SCE_JSON_LDKEYWORD)) ||
364 (!setURL.Contains(context.ch))) {
365 context.SetState(stringStyleBefore);
367 if (context.ch == '"') {
368 context.ForwardSetState(SCE_JSON_DEFAULT);
369 } else if (context.atLineEnd) {
370 context.ChangeState(SCE_JSON_STRINGEOL);
372 break;
373 case SCE_JSON_OPERATOR:
374 case SCE_JSON_NUMBER:
375 context.SetState(SCE_JSON_DEFAULT);
376 break;
377 case SCE_JSON_ERROR:
378 if (context.atLineEnd) {
379 context.SetState(SCE_JSON_DEFAULT);
381 break;
382 case SCE_JSON_KEYWORD:
383 if (!setKeywordJSON.Contains(context.ch)) {
384 context.SetState(SCE_JSON_DEFAULT);
386 break;
388 if (context.state == SCE_JSON_DEFAULT) {
389 if (context.ch == '"') {
390 compactIRI.resetState();
391 context.SetState(SCE_JSON_STRING);
392 Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
393 if (AtPropertyName(styler, currPos)) {
394 context.SetState(SCE_JSON_PROPERTYNAME);
396 } else if (setOperators.Contains(context.ch)) {
397 context.SetState(SCE_JSON_OPERATOR);
398 } else if (options.allowComments && context.Match("/*")) {
399 context.SetState(SCE_JSON_BLOCKCOMMENT);
400 context.Forward();
401 } else if (options.allowComments && context.Match("//")) {
402 context.SetState(SCE_JSON_LINECOMMENT);
403 } else if (setKeywordJSON.Contains(context.ch)) {
404 if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
405 context.SetState(SCE_JSON_KEYWORD);
408 bool numberStart =
409 IsADigit(context.ch) && (context.chPrev == '+'||
410 context.chPrev == '-' ||
411 context.atLineStart ||
412 IsASpace(context.chPrev) ||
413 setOperators.Contains(context.chPrev));
414 bool exponentPart =
415 tolower(context.ch) == 'e' &&
416 IsADigit(context.chPrev) &&
417 (IsADigit(context.chNext) ||
418 context.chNext == '+' ||
419 context.chNext == '-');
420 bool signPart =
421 (context.ch == '-' || context.ch == '+') &&
422 ((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
423 ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
424 && IsADigit(context.chNext)));
425 bool adjacentDigit =
426 IsADigit(context.ch) && IsADigit(context.chPrev);
427 bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
428 bool dotPart = context.ch == '.' &&
429 IsADigit(context.chPrev) &&
430 IsADigit(context.chNext);
431 bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
432 if (numberStart ||
433 exponentPart ||
434 signPart ||
435 adjacentDigit ||
436 dotPart ||
437 afterExponent ||
438 afterDot) {
439 context.SetState(SCE_JSON_NUMBER);
440 } else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
441 context.SetState(SCE_JSON_ERROR);
444 context.Forward();
446 context.Complete();
449 void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
450 Sci_Position length,
451 int,
452 IDocument *pAccess) {
453 if (!options.fold) {
454 return;
456 LexAccessor styler(pAccess);
457 Sci_PositionU currLine = styler.GetLine(startPos);
458 Sci_PositionU endPos = startPos + length;
459 int currLevel = SC_FOLDLEVELBASE;
460 if (currLine > 0)
461 currLevel = styler.LevelAt(currLine - 1) >> 16;
462 int nextLevel = currLevel;
463 int visibleChars = 0;
464 for (Sci_PositionU i = startPos; i < endPos; i++) {
465 char curr = styler.SafeGetCharAt(i);
466 char next = styler.SafeGetCharAt(i+1);
467 bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
468 if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
469 if (curr == '{' || curr == '[') {
470 nextLevel++;
471 } else if (curr == '}' || curr == ']') {
472 nextLevel--;
475 if (atEOL || i == (endPos-1)) {
476 int level = currLevel | nextLevel << 16;
477 if (!visibleChars && options.foldCompact) {
478 level |= SC_FOLDLEVELWHITEFLAG;
479 } else if (nextLevel > currLevel) {
480 level |= SC_FOLDLEVELHEADERFLAG;
482 if (level != styler.LevelAt(currLine)) {
483 styler.SetLevel(currLine, level);
485 currLine++;
486 currLevel = nextLevel;
487 visibleChars = 0;
489 if (!isspacechar(curr)) {
490 visibleChars++;
495 LexerModule lmJSON(SCLEX_JSON,
496 LexerJSON::LexerFactoryJSON,
497 "json",
498 JSONWordListDesc);