1 // Scintilla source code edit control
4 * @date February 19, 2016
5 * @brief Lexer for JSON and JSON-LD formats
8 * The License.txt file describes the conditions under which this software may
22 #include "Scintilla.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
32 using namespace Scintilla
;
35 static const char *const JSONWordListDesc
[] = {
42 * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
43 * colon separating the prefix and suffix
45 * https://www.w3.org/TR/json-ld/#dfn-compact-iri
49 bool foundInvalidChar
;
50 CharacterSet setCompactIRI
;
53 foundInvalidChar
= false;
54 setCompactIRI
= CharacterSet(CharacterSet::setAlpha
, "$_-");
58 foundInvalidChar
= false;
60 void checkChar(int ch
) {
64 foundInvalidChar
|= !setCompactIRI
.Contains(ch
);
67 bool shouldHighlight() const {
68 return !foundInvalidChar
&& colonCount
== 1;
73 * Keeps track of escaped characters in strings as per:
75 * https://tools.ietf.org/html/rfc7159#section-7
77 struct EscapeSequence
{
79 CharacterSet setHexDigits
;
80 CharacterSet setEscapeChars
;
83 setHexDigits
= CharacterSet(CharacterSet::setDigits
, "ABCDEFabcdef");
84 setEscapeChars
= CharacterSet(CharacterSet::setNone
, "\\\"tnbfru/");
86 // Returns true if the following character is a valid escaped character
87 bool newSequence(int nextChar
) {
89 if (nextChar
== 'u') {
91 } else if (!setEscapeChars
.Contains(nextChar
)) {
96 bool atEscapeEnd() const {
97 return digitsLeft
<= 0;
99 bool isInvalidChar(int currChar
) const {
100 return !setHexDigits
.Contains(currChar
);
112 allowComments
= false;
113 escapeSequence
= false;
117 struct OptionSetJSON
: public OptionSet
<OptionsJSON
> {
119 DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence
,
120 "Set to 1 to enable highlighting of escape sequences in strings");
122 DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments
,
123 "Set to 1 to enable highlighting of line/block comments in JSON");
125 DefineProperty("fold.compact", &OptionsJSON::foldCompact
);
126 DefineProperty("fold", &OptionsJSON::fold
);
127 DefineWordListSets(JSONWordListDesc
);
131 class LexerJSON
: public ILexer
{
133 OptionSetJSON optSetJSON
;
134 EscapeSequence escapeSeq
;
135 WordList keywordsJSON
;
136 WordList keywordsJSONLD
;
137 CharacterSet setOperators
;
139 CharacterSet setKeywordJSONLD
;
140 CharacterSet setKeywordJSON
;
141 CompactIRI compactIRI
;
143 static bool IsNextNonWhitespace(LexAccessor
&styler
, Sci_Position start
, char ch
) {
147 char curr
= styler
.SafeGetCharAt(start
+i
, '\0');
148 char next
= styler
.SafeGetCharAt(start
+i
+1, '\0');
149 bool atEOL
= (curr
== '\r' && next
!= '\n') || (curr
== '\n');
152 } else if (!isspacechar(curr
) || atEOL
) {
160 * Looks for the colon following the end quote
162 * Assumes property names of lengths no longer than a 100 characters.
163 * The colon is also expected to be less than 50 spaces after the end
164 * quote for the string to be considered a property name
166 static bool AtPropertyName(LexAccessor
&styler
, Sci_Position start
) {
168 bool escaped
= false;
171 char curr
= styler
.SafeGetCharAt(start
+i
, '\0');
176 escaped
= curr
== '\\';
178 return IsNextNonWhitespace(styler
, start
+i
, ':');
186 static bool IsNextWordInList(WordList
&keywordList
, CharacterSet wordSet
,
187 StyleContext
&context
, LexAccessor
&styler
) {
189 Sci_Position currPos
= (Sci_Position
) context
.currentPos
;
192 char ch
= styler
.SafeGetCharAt(currPos
+ i
);
193 if (!wordSet
.Contains(ch
)) {
200 return keywordList
.InList(word
);
205 setOperators(CharacterSet::setNone
, "[{}]:,"),
206 setURL(CharacterSet::setAlphaNum
, "-._~:/?#[]@!$&'()*+,),="),
207 setKeywordJSONLD(CharacterSet::setAlpha
, ":@"),
208 setKeywordJSON(CharacterSet::setAlpha
, "$_") {
210 virtual ~LexerJSON() {}
211 virtual int SCI_METHOD
Version() const {
214 virtual void SCI_METHOD
Release() {
217 virtual const char *SCI_METHOD
PropertyNames() {
218 return optSetJSON
.PropertyNames();
220 virtual int SCI_METHOD
PropertyType(const char *name
) {
221 return optSetJSON
.PropertyType(name
);
223 virtual const char *SCI_METHOD
DescribeProperty(const char *name
) {
224 return optSetJSON
.DescribeProperty(name
);
226 virtual Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
) {
227 if (optSetJSON
.PropertySet(&options
, key
, val
)) {
232 virtual Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
) {
233 WordList
*wordListN
= 0;
236 wordListN
= &keywordsJSON
;
239 wordListN
= &keywordsJSONLD
;
242 Sci_Position firstModification
= -1;
246 if (*wordListN
!= wlNew
) {
248 firstModification
= 0;
251 return firstModification
;
253 virtual void *SCI_METHOD
PrivateCall(int, void *) {
256 static ILexer
*LexerFactoryJSON() {
257 return new LexerJSON
;
259 virtual const char *SCI_METHOD
DescribeWordListSets() {
260 return optSetJSON
.DescribeWordListSets();
262 virtual void SCI_METHOD
Lex(Sci_PositionU startPos
,
266 virtual void SCI_METHOD
Fold(Sci_PositionU startPos
,
272 void SCI_METHOD
LexerJSON::Lex(Sci_PositionU startPos
,
275 IDocument
*pAccess
) {
276 LexAccessor
styler(pAccess
);
277 StyleContext
context(startPos
, length
, initStyle
, styler
);
278 int stringStyleBefore
= SCE_JSON_STRING
;
279 while (context
.More()) {
280 switch (context
.state
) {
281 case SCE_JSON_BLOCKCOMMENT
:
282 if (context
.Match("*/")) {
284 context
.ForwardSetState(SCE_JSON_DEFAULT
);
287 case SCE_JSON_LINECOMMENT
:
288 if (context
.atLineEnd
) {
289 context
.SetState(SCE_JSON_DEFAULT
);
292 case SCE_JSON_STRINGEOL
:
293 if (context
.atLineStart
) {
294 context
.SetState(SCE_JSON_DEFAULT
);
297 case SCE_JSON_ESCAPESEQUENCE
:
298 escapeSeq
.digitsLeft
--;
299 if (!escapeSeq
.atEscapeEnd()) {
300 if (escapeSeq
.isInvalidChar(context
.ch
)) {
301 context
.SetState(SCE_JSON_ERROR
);
305 if (context
.ch
== '"') {
306 context
.SetState(stringStyleBefore
);
307 context
.ForwardSetState(SCE_C_DEFAULT
);
308 } else if (context
.ch
== '\\') {
309 if (!escapeSeq
.newSequence(context
.chNext
)) {
310 context
.SetState(SCE_JSON_ERROR
);
314 context
.SetState(stringStyleBefore
);
315 if (context
.atLineEnd
) {
316 context
.ChangeState(SCE_JSON_STRINGEOL
);
320 case SCE_JSON_PROPERTYNAME
:
321 case SCE_JSON_STRING
:
322 if (context
.ch
== '"') {
323 if (compactIRI
.shouldHighlight()) {
324 context
.ChangeState(SCE_JSON_COMPACTIRI
);
325 context
.ForwardSetState(SCE_JSON_DEFAULT
);
326 compactIRI
.resetState();
328 context
.ForwardSetState(SCE_JSON_DEFAULT
);
330 } else if (context
.atLineEnd
) {
331 context
.ChangeState(SCE_JSON_STRINGEOL
);
332 } else if (context
.ch
== '\\') {
333 stringStyleBefore
= context
.state
;
334 if (options
.escapeSequence
) {
335 context
.SetState(SCE_JSON_ESCAPESEQUENCE
);
336 if (!escapeSeq
.newSequence(context
.chNext
)) {
337 context
.SetState(SCE_JSON_ERROR
);
341 } else if (context
.Match("https://") ||
342 context
.Match("http://") ||
343 context
.Match("ssh://") ||
344 context
.Match("git://") ||
345 context
.Match("svn://") ||
346 context
.Match("ftp://") ||
347 context
.Match("mailto:")) {
348 // Handle most common URI schemes only
349 stringStyleBefore
= context
.state
;
350 context
.SetState(SCE_JSON_URI
);
351 } else if (context
.ch
== '@') {
352 // https://www.w3.org/TR/json-ld/#dfn-keyword
353 if (IsNextWordInList(keywordsJSONLD
, setKeywordJSONLD
, context
, styler
)) {
354 stringStyleBefore
= context
.state
;
355 context
.SetState(SCE_JSON_LDKEYWORD
);
358 compactIRI
.checkChar(context
.ch
);
361 case SCE_JSON_LDKEYWORD
:
363 if ((!setKeywordJSONLD
.Contains(context
.ch
) &&
364 (context
.state
== SCE_JSON_LDKEYWORD
)) ||
365 (!setURL
.Contains(context
.ch
))) {
366 context
.SetState(stringStyleBefore
);
368 if (context
.ch
== '"') {
369 context
.ForwardSetState(SCE_JSON_DEFAULT
);
370 } else if (context
.atLineEnd
) {
371 context
.ChangeState(SCE_JSON_STRINGEOL
);
374 case SCE_JSON_OPERATOR
:
375 case SCE_JSON_NUMBER
:
376 context
.SetState(SCE_JSON_DEFAULT
);
379 if (context
.atLineEnd
) {
380 context
.SetState(SCE_JSON_DEFAULT
);
383 case SCE_JSON_KEYWORD
:
384 if (!setKeywordJSON
.Contains(context
.ch
)) {
385 context
.SetState(SCE_JSON_DEFAULT
);
389 if (context
.state
== SCE_JSON_DEFAULT
) {
390 if (context
.ch
== '"') {
391 compactIRI
.resetState();
392 context
.SetState(SCE_JSON_STRING
);
393 Sci_Position currPos
= static_cast<Sci_Position
>(context
.currentPos
);
394 if (AtPropertyName(styler
, currPos
)) {
395 context
.SetState(SCE_JSON_PROPERTYNAME
);
397 } else if (setOperators
.Contains(context
.ch
)) {
398 context
.SetState(SCE_JSON_OPERATOR
);
399 } else if (options
.allowComments
&& context
.Match("/*")) {
400 context
.SetState(SCE_JSON_BLOCKCOMMENT
);
402 } else if (options
.allowComments
&& context
.Match("//")) {
403 context
.SetState(SCE_JSON_LINECOMMENT
);
404 } else if (setKeywordJSON
.Contains(context
.ch
)) {
405 if (IsNextWordInList(keywordsJSON
, setKeywordJSON
, context
, styler
)) {
406 context
.SetState(SCE_JSON_KEYWORD
);
410 IsADigit(context
.ch
) && (context
.chPrev
== '+'||
411 context
.chPrev
== '-' ||
412 context
.atLineStart
||
413 IsASpace(context
.chPrev
) ||
414 setOperators
.Contains(context
.chPrev
));
416 tolower(context
.ch
) == 'e' &&
417 IsADigit(context
.chPrev
) &&
418 (IsADigit(context
.chNext
) ||
419 context
.chNext
== '+' ||
420 context
.chNext
== '-');
422 (context
.ch
== '-' || context
.ch
== '+') &&
423 ((tolower(context
.chPrev
) == 'e' && IsADigit(context
.chNext
)) ||
424 ((IsASpace(context
.chPrev
) || setOperators
.Contains(context
.chPrev
))
425 && IsADigit(context
.chNext
)));
427 IsADigit(context
.ch
) && IsADigit(context
.chPrev
);
428 bool afterExponent
= IsADigit(context
.ch
) && tolower(context
.chPrev
) == 'e';
429 bool dotPart
= context
.ch
== '.' &&
430 IsADigit(context
.chPrev
) &&
431 IsADigit(context
.chNext
);
432 bool afterDot
= IsADigit(context
.ch
) && context
.chPrev
== '.';
440 context
.SetState(SCE_JSON_NUMBER
);
441 } else if (context
.state
== SCE_JSON_DEFAULT
&& !IsASpace(context
.ch
)) {
442 context
.SetState(SCE_JSON_ERROR
);
450 void SCI_METHOD
LexerJSON::Fold(Sci_PositionU startPos
,
453 IDocument
*pAccess
) {
457 LexAccessor
styler(pAccess
);
458 Sci_PositionU currLine
= styler
.GetLine(startPos
);
459 Sci_PositionU endPos
= startPos
+ length
;
460 int currLevel
= SC_FOLDLEVELBASE
;
462 currLevel
= styler
.LevelAt(currLine
- 1) >> 16;
463 int nextLevel
= currLevel
;
464 int visibleChars
= 0;
465 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
466 char curr
= styler
.SafeGetCharAt(i
);
467 char next
= styler
.SafeGetCharAt(i
+1);
468 bool atEOL
= (curr
== '\r' && next
!= '\n') || (curr
== '\n');
469 if (styler
.StyleAt(i
) == SCE_JSON_OPERATOR
) {
470 if (curr
== '{' || curr
== '[') {
472 } else if (curr
== '}' || curr
== ']') {
476 if (atEOL
|| i
== (endPos
-1)) {
477 int level
= currLevel
| nextLevel
<< 16;
478 if (!visibleChars
&& options
.foldCompact
) {
479 level
|= SC_FOLDLEVELWHITEFLAG
;
480 } else if (nextLevel
> currLevel
) {
481 level
|= SC_FOLDLEVELHEADERFLAG
;
483 if (level
!= styler
.LevelAt(currLine
)) {
484 styler
.SetLevel(currLine
, level
);
487 currLevel
= nextLevel
;
490 if (!isspacechar(curr
)) {
496 LexerModule
lmJSON(SCLEX_JSON
,
497 LexerJSON::LexerFactoryJSON
,