Update Scintilla to version 3.7.1
[TortoiseGit.git] / ext / scintilla / lexers / LexJSON.cxx
blob361cd92535e19d3f6a9ddb41222c5223af400c53
1 // Scintilla source code edit control
2 /**
3 * @file LexJSON.cxx
4 * @date February 19, 2016
5 * @brief Lexer for JSON and JSON-LD formats
6 * @author nkmathew
8 * The License.txt file describes the conditions under which this software may
9 * be distributed.
13 #include <cstdlib>
14 #include <cassert>
15 #include <cctype>
16 #include <cstdio>
17 #include <string>
18 #include <vector>
19 #include <map>
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
35 static const char *const JSONWordListDesc[] = {
36 "JSON Keywords",
37 "JSON-LD Keywords",
41 /**
42 * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
43 * colon separating the prefix and suffix
45 * https://www.w3.org/TR/json-ld/#dfn-compact-iri
47 struct CompactIRI {
48 int colonCount;
49 bool foundInvalidChar;
50 CharacterSet setCompactIRI;
51 CompactIRI() {
52 colonCount = 0;
53 foundInvalidChar = false;
54 setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
56 void resetState() {
57 colonCount = 0;
58 foundInvalidChar = false;
60 void checkChar(int ch) {
61 if (ch == ':') {
62 colonCount++;
63 } else {
64 foundInvalidChar |= !setCompactIRI.Contains(ch);
67 bool shouldHighlight() const {
68 return !foundInvalidChar && colonCount == 1;
72 /**
73 * Keeps track of escaped characters in strings as per:
75 * https://tools.ietf.org/html/rfc7159#section-7
77 struct EscapeSequence {
78 int digitsLeft;
79 CharacterSet setHexDigits;
80 CharacterSet setEscapeChars;
81 EscapeSequence() {
82 digitsLeft = 0;
83 setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
84 setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
86 // Returns true if the following character is a valid escaped character
87 bool newSequence(int nextChar) {
88 digitsLeft = 0;
89 if (nextChar == 'u') {
90 digitsLeft = 5;
91 } else if (!setEscapeChars.Contains(nextChar)) {
92 return false;
94 return true;
96 bool atEscapeEnd() const {
97 return digitsLeft <= 0;
99 bool isInvalidChar(int currChar) const {
100 return !setHexDigits.Contains(currChar);
104 struct OptionsJSON {
105 bool foldCompact;
106 bool fold;
107 bool allowComments;
108 bool escapeSequence;
109 OptionsJSON() {
110 foldCompact = false;
111 fold = false;
112 allowComments = false;
113 escapeSequence = false;
117 struct OptionSetJSON : public OptionSet<OptionsJSON> {
118 OptionSetJSON() {
119 DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
120 "Set to 1 to enable highlighting of escape sequences in strings");
122 DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
123 "Set to 1 to enable highlighting of line/block comments in JSON");
125 DefineProperty("fold.compact", &OptionsJSON::foldCompact);
126 DefineProperty("fold", &OptionsJSON::fold);
127 DefineWordListSets(JSONWordListDesc);
131 class LexerJSON : public ILexer {
132 OptionsJSON options;
133 OptionSetJSON optSetJSON;
134 EscapeSequence escapeSeq;
135 WordList keywordsJSON;
136 WordList keywordsJSONLD;
137 CharacterSet setOperators;
138 CharacterSet setURL;
139 CharacterSet setKeywordJSONLD;
140 CharacterSet setKeywordJSON;
141 CompactIRI compactIRI;
143 static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
144 Sci_Position i = 0;
145 while (i < 50) {
146 i++;
147 char curr = styler.SafeGetCharAt(start+i, '\0');
148 char next = styler.SafeGetCharAt(start+i+1, '\0');
149 bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
150 if (curr == ch) {
151 return true;
152 } else if (!isspacechar(curr) || atEOL) {
153 return false;
156 return false;
160 * Looks for the colon following the end quote
162 * Assumes property names of lengths no longer than a 100 characters.
163 * The colon is also expected to be less than 50 spaces after the end
164 * quote for the string to be considered a property name
166 static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
167 Sci_Position i = 0;
168 bool escaped = false;
169 while (i < 100) {
170 i++;
171 char curr = styler.SafeGetCharAt(start+i, '\0');
172 if (escaped) {
173 escaped = false;
174 continue;
176 escaped = curr == '\\';
177 if (curr == '"') {
178 return IsNextNonWhitespace(styler, start+i, ':');
179 } else if (!curr) {
180 return false;
183 return false;
186 static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
187 StyleContext &context, LexAccessor &styler) {
188 char word[51];
189 Sci_Position currPos = (Sci_Position) context.currentPos;
190 int i = 0;
191 while (i < 50) {
192 char ch = styler.SafeGetCharAt(currPos + i);
193 if (!wordSet.Contains(ch)) {
194 break;
196 word[i] = ch;
197 i++;
199 word[i] = '\0';
200 return keywordList.InList(word);
203 public:
204 LexerJSON() :
205 setOperators(CharacterSet::setNone, "[{}]:,"),
206 setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
207 setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
208 setKeywordJSON(CharacterSet::setAlpha, "$_") {
210 virtual ~LexerJSON() {}
211 virtual int SCI_METHOD Version() const {
212 return lvOriginal;
214 virtual void SCI_METHOD Release() {
215 delete this;
217 virtual const char *SCI_METHOD PropertyNames() {
218 return optSetJSON.PropertyNames();
220 virtual int SCI_METHOD PropertyType(const char *name) {
221 return optSetJSON.PropertyType(name);
223 virtual const char *SCI_METHOD DescribeProperty(const char *name) {
224 return optSetJSON.DescribeProperty(name);
226 virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) {
227 if (optSetJSON.PropertySet(&options, key, val)) {
228 return 0;
230 return -1;
232 virtual Sci_Position SCI_METHOD WordListSet(int n, const char *wl) {
233 WordList *wordListN = 0;
234 switch (n) {
235 case 0:
236 wordListN = &keywordsJSON;
237 break;
238 case 1:
239 wordListN = &keywordsJSONLD;
240 break;
242 Sci_Position firstModification = -1;
243 if (wordListN) {
244 WordList wlNew;
245 wlNew.Set(wl);
246 if (*wordListN != wlNew) {
247 wordListN->Set(wl);
248 firstModification = 0;
251 return firstModification;
253 virtual void *SCI_METHOD PrivateCall(int, void *) {
254 return 0;
256 static ILexer *LexerFactoryJSON() {
257 return new LexerJSON;
259 virtual const char *SCI_METHOD DescribeWordListSets() {
260 return optSetJSON.DescribeWordListSets();
262 virtual void SCI_METHOD Lex(Sci_PositionU startPos,
263 Sci_Position length,
264 int initStyle,
265 IDocument *pAccess);
266 virtual void SCI_METHOD Fold(Sci_PositionU startPos,
267 Sci_Position length,
268 int initStyle,
269 IDocument *pAccess);
272 void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
273 Sci_Position length,
274 int initStyle,
275 IDocument *pAccess) {
276 LexAccessor styler(pAccess);
277 StyleContext context(startPos, length, initStyle, styler);
278 int stringStyleBefore = SCE_JSON_STRING;
279 while (context.More()) {
280 switch (context.state) {
281 case SCE_JSON_BLOCKCOMMENT:
282 if (context.Match("*/")) {
283 context.Forward();
284 context.ForwardSetState(SCE_JSON_DEFAULT);
286 break;
287 case SCE_JSON_LINECOMMENT:
288 if (context.atLineEnd) {
289 context.SetState(SCE_JSON_DEFAULT);
291 break;
292 case SCE_JSON_STRINGEOL:
293 if (context.atLineStart) {
294 context.SetState(SCE_JSON_DEFAULT);
296 break;
297 case SCE_JSON_ESCAPESEQUENCE:
298 escapeSeq.digitsLeft--;
299 if (!escapeSeq.atEscapeEnd()) {
300 if (escapeSeq.isInvalidChar(context.ch)) {
301 context.SetState(SCE_JSON_ERROR);
303 break;
305 if (context.ch == '"') {
306 context.SetState(stringStyleBefore);
307 context.ForwardSetState(SCE_C_DEFAULT);
308 } else if (context.ch == '\\') {
309 if (!escapeSeq.newSequence(context.chNext)) {
310 context.SetState(SCE_JSON_ERROR);
312 context.Forward();
313 } else {
314 context.SetState(stringStyleBefore);
315 if (context.atLineEnd) {
316 context.ChangeState(SCE_JSON_STRINGEOL);
319 break;
320 case SCE_JSON_PROPERTYNAME:
321 case SCE_JSON_STRING:
322 if (context.ch == '"') {
323 if (compactIRI.shouldHighlight()) {
324 context.ChangeState(SCE_JSON_COMPACTIRI);
325 context.ForwardSetState(SCE_JSON_DEFAULT);
326 compactIRI.resetState();
327 } else {
328 context.ForwardSetState(SCE_JSON_DEFAULT);
330 } else if (context.atLineEnd) {
331 context.ChangeState(SCE_JSON_STRINGEOL);
332 } else if (context.ch == '\\') {
333 stringStyleBefore = context.state;
334 if (options.escapeSequence) {
335 context.SetState(SCE_JSON_ESCAPESEQUENCE);
336 if (!escapeSeq.newSequence(context.chNext)) {
337 context.SetState(SCE_JSON_ERROR);
340 context.Forward();
341 } else if (context.Match("https://") ||
342 context.Match("http://") ||
343 context.Match("ssh://") ||
344 context.Match("git://") ||
345 context.Match("svn://") ||
346 context.Match("ftp://") ||
347 context.Match("mailto:")) {
348 // Handle most common URI schemes only
349 stringStyleBefore = context.state;
350 context.SetState(SCE_JSON_URI);
351 } else if (context.ch == '@') {
352 // https://www.w3.org/TR/json-ld/#dfn-keyword
353 if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
354 stringStyleBefore = context.state;
355 context.SetState(SCE_JSON_LDKEYWORD);
357 } else {
358 compactIRI.checkChar(context.ch);
360 break;
361 case SCE_JSON_LDKEYWORD:
362 case SCE_JSON_URI:
363 if ((!setKeywordJSONLD.Contains(context.ch) &&
364 (context.state == SCE_JSON_LDKEYWORD)) ||
365 (!setURL.Contains(context.ch))) {
366 context.SetState(stringStyleBefore);
368 if (context.ch == '"') {
369 context.ForwardSetState(SCE_JSON_DEFAULT);
370 } else if (context.atLineEnd) {
371 context.ChangeState(SCE_JSON_STRINGEOL);
373 break;
374 case SCE_JSON_OPERATOR:
375 case SCE_JSON_NUMBER:
376 context.SetState(SCE_JSON_DEFAULT);
377 break;
378 case SCE_JSON_ERROR:
379 if (context.atLineEnd) {
380 context.SetState(SCE_JSON_DEFAULT);
382 break;
383 case SCE_JSON_KEYWORD:
384 if (!setKeywordJSON.Contains(context.ch)) {
385 context.SetState(SCE_JSON_DEFAULT);
387 break;
389 if (context.state == SCE_JSON_DEFAULT) {
390 if (context.ch == '"') {
391 compactIRI.resetState();
392 context.SetState(SCE_JSON_STRING);
393 Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
394 if (AtPropertyName(styler, currPos)) {
395 context.SetState(SCE_JSON_PROPERTYNAME);
397 } else if (setOperators.Contains(context.ch)) {
398 context.SetState(SCE_JSON_OPERATOR);
399 } else if (options.allowComments && context.Match("/*")) {
400 context.SetState(SCE_JSON_BLOCKCOMMENT);
401 context.Forward();
402 } else if (options.allowComments && context.Match("//")) {
403 context.SetState(SCE_JSON_LINECOMMENT);
404 } else if (setKeywordJSON.Contains(context.ch)) {
405 if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
406 context.SetState(SCE_JSON_KEYWORD);
409 bool numberStart =
410 IsADigit(context.ch) && (context.chPrev == '+'||
411 context.chPrev == '-' ||
412 context.atLineStart ||
413 IsASpace(context.chPrev) ||
414 setOperators.Contains(context.chPrev));
415 bool exponentPart =
416 tolower(context.ch) == 'e' &&
417 IsADigit(context.chPrev) &&
418 (IsADigit(context.chNext) ||
419 context.chNext == '+' ||
420 context.chNext == '-');
421 bool signPart =
422 (context.ch == '-' || context.ch == '+') &&
423 ((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
424 ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
425 && IsADigit(context.chNext)));
426 bool adjacentDigit =
427 IsADigit(context.ch) && IsADigit(context.chPrev);
428 bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
429 bool dotPart = context.ch == '.' &&
430 IsADigit(context.chPrev) &&
431 IsADigit(context.chNext);
432 bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
433 if (numberStart ||
434 exponentPart ||
435 signPart ||
436 adjacentDigit ||
437 dotPart ||
438 afterExponent ||
439 afterDot) {
440 context.SetState(SCE_JSON_NUMBER);
441 } else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
442 context.SetState(SCE_JSON_ERROR);
445 context.Forward();
447 context.Complete();
450 void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
451 Sci_Position length,
452 int,
453 IDocument *pAccess) {
454 if (!options.fold) {
455 return;
457 LexAccessor styler(pAccess);
458 Sci_PositionU currLine = styler.GetLine(startPos);
459 Sci_PositionU endPos = startPos + length;
460 int currLevel = SC_FOLDLEVELBASE;
461 if (currLine > 0)
462 currLevel = styler.LevelAt(currLine - 1) >> 16;
463 int nextLevel = currLevel;
464 int visibleChars = 0;
465 for (Sci_PositionU i = startPos; i < endPos; i++) {
466 char curr = styler.SafeGetCharAt(i);
467 char next = styler.SafeGetCharAt(i+1);
468 bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
469 if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
470 if (curr == '{' || curr == '[') {
471 nextLevel++;
472 } else if (curr == '}' || curr == ']') {
473 nextLevel--;
476 if (atEOL || i == (endPos-1)) {
477 int level = currLevel | nextLevel << 16;
478 if (!visibleChars && options.foldCompact) {
479 level |= SC_FOLDLEVELWHITEFLAG;
480 } else if (nextLevel > currLevel) {
481 level |= SC_FOLDLEVELHEADERFLAG;
483 if (level != styler.LevelAt(currLine)) {
484 styler.SetLevel(currLine, level);
486 currLine++;
487 currLevel = nextLevel;
488 visibleChars = 0;
490 if (!isspacechar(curr)) {
491 visibleChars++;
496 LexerModule lmJSON(SCLEX_JSON,
497 LexerJSON::LexerFactoryJSON,
498 "json",
499 JSONWordListDesc);