1 // Scintilla source code edit control
4 * @date February 19, 2016
5 * @brief Lexer for JSON and JSON-LD formats
8 * The License.txt file describes the conditions under which this software may
22 #include "Scintilla.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "DefaultLexer.h"
32 using namespace Scintilla
;
34 static const char *const JSONWordListDesc
[] = {
41 * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
42 * colon separating the prefix and suffix
44 * https://www.w3.org/TR/json-ld/#dfn-compact-iri
48 bool foundInvalidChar
;
49 CharacterSet setCompactIRI
;
52 foundInvalidChar
= false;
53 setCompactIRI
= CharacterSet(CharacterSet::setAlpha
, "$_-");
57 foundInvalidChar
= false;
59 void checkChar(int ch
) {
63 foundInvalidChar
|= !setCompactIRI
.Contains(ch
);
66 bool shouldHighlight() const {
67 return !foundInvalidChar
&& colonCount
== 1;
72 * Keeps track of escaped characters in strings as per:
74 * https://tools.ietf.org/html/rfc7159#section-7
76 struct EscapeSequence
{
78 CharacterSet setHexDigits
;
79 CharacterSet setEscapeChars
;
82 setHexDigits
= CharacterSet(CharacterSet::setDigits
, "ABCDEFabcdef");
83 setEscapeChars
= CharacterSet(CharacterSet::setNone
, "\\\"tnbfru/");
85 // Returns true if the following character is a valid escaped character
86 bool newSequence(int nextChar
) {
88 if (nextChar
== 'u') {
90 } else if (!setEscapeChars
.Contains(nextChar
)) {
95 bool atEscapeEnd() const {
96 return digitsLeft
<= 0;
98 bool isInvalidChar(int currChar
) const {
99 return !setHexDigits
.Contains(currChar
);
111 allowComments
= false;
112 escapeSequence
= false;
116 struct OptionSetJSON
: public OptionSet
<OptionsJSON
> {
118 DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence
,
119 "Set to 1 to enable highlighting of escape sequences in strings");
121 DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments
,
122 "Set to 1 to enable highlighting of line/block comments in JSON");
124 DefineProperty("fold.compact", &OptionsJSON::foldCompact
);
125 DefineProperty("fold", &OptionsJSON::fold
);
126 DefineWordListSets(JSONWordListDesc
);
130 class LexerJSON
: public DefaultLexer
{
132 OptionSetJSON optSetJSON
;
133 EscapeSequence escapeSeq
;
134 WordList keywordsJSON
;
135 WordList keywordsJSONLD
;
136 CharacterSet setOperators
;
138 CharacterSet setKeywordJSONLD
;
139 CharacterSet setKeywordJSON
;
140 CompactIRI compactIRI
;
142 static bool IsNextNonWhitespace(LexAccessor
&styler
, Sci_Position start
, char ch
) {
146 char curr
= styler
.SafeGetCharAt(start
+i
, '\0');
147 char next
= styler
.SafeGetCharAt(start
+i
+1, '\0');
148 bool atEOL
= (curr
== '\r' && next
!= '\n') || (curr
== '\n');
151 } else if (!isspacechar(curr
) || atEOL
) {
159 * Looks for the colon following the end quote
161 * Assumes property names of lengths no longer than a 100 characters.
162 * The colon is also expected to be less than 50 spaces after the end
163 * quote for the string to be considered a property name
165 static bool AtPropertyName(LexAccessor
&styler
, Sci_Position start
) {
167 bool escaped
= false;
170 char curr
= styler
.SafeGetCharAt(start
+i
, '\0');
175 escaped
= curr
== '\\';
177 return IsNextNonWhitespace(styler
, start
+i
, ':');
185 static bool IsNextWordInList(WordList
&keywordList
, CharacterSet wordSet
,
186 StyleContext
&context
, LexAccessor
&styler
) {
188 Sci_Position currPos
= (Sci_Position
) context
.currentPos
;
191 char ch
= styler
.SafeGetCharAt(currPos
+ i
);
192 if (!wordSet
.Contains(ch
)) {
199 return keywordList
.InList(word
);
204 setOperators(CharacterSet::setNone
, "[{}]:,"),
205 setURL(CharacterSet::setAlphaNum
, "-._~:/?#[]@!$&'()*+,),="),
206 setKeywordJSONLD(CharacterSet::setAlpha
, ":@"),
207 setKeywordJSON(CharacterSet::setAlpha
, "$_") {
209 virtual ~LexerJSON() {}
210 int SCI_METHOD
Version() const override
{
213 void SCI_METHOD
Release() override
{
216 const char *SCI_METHOD
PropertyNames() override
{
217 return optSetJSON
.PropertyNames();
219 int SCI_METHOD
PropertyType(const char *name
) override
{
220 return optSetJSON
.PropertyType(name
);
222 const char *SCI_METHOD
DescribeProperty(const char *name
) override
{
223 return optSetJSON
.DescribeProperty(name
);
225 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
) override
{
226 if (optSetJSON
.PropertySet(&options
, key
, val
)) {
231 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
) override
{
232 WordList
*wordListN
= 0;
235 wordListN
= &keywordsJSON
;
238 wordListN
= &keywordsJSONLD
;
241 Sci_Position firstModification
= -1;
245 if (*wordListN
!= wlNew
) {
247 firstModification
= 0;
250 return firstModification
;
252 void *SCI_METHOD
PrivateCall(int, void *) override
{
255 static ILexer4
*LexerFactoryJSON() {
256 return new LexerJSON
;
258 const char *SCI_METHOD
DescribeWordListSets() override
{
259 return optSetJSON
.DescribeWordListSets();
261 void SCI_METHOD
Lex(Sci_PositionU startPos
,
264 IDocument
*pAccess
) override
;
265 void SCI_METHOD
Fold(Sci_PositionU startPos
,
268 IDocument
*pAccess
) override
;
271 void SCI_METHOD
LexerJSON::Lex(Sci_PositionU startPos
,
274 IDocument
*pAccess
) {
275 LexAccessor
styler(pAccess
);
276 StyleContext
context(startPos
, length
, initStyle
, styler
);
277 int stringStyleBefore
= SCE_JSON_STRING
;
278 while (context
.More()) {
279 switch (context
.state
) {
280 case SCE_JSON_BLOCKCOMMENT
:
281 if (context
.Match("*/")) {
283 context
.ForwardSetState(SCE_JSON_DEFAULT
);
286 case SCE_JSON_LINECOMMENT
:
287 if (context
.atLineEnd
) {
288 context
.SetState(SCE_JSON_DEFAULT
);
291 case SCE_JSON_STRINGEOL
:
292 if (context
.atLineStart
) {
293 context
.SetState(SCE_JSON_DEFAULT
);
296 case SCE_JSON_ESCAPESEQUENCE
:
297 escapeSeq
.digitsLeft
--;
298 if (!escapeSeq
.atEscapeEnd()) {
299 if (escapeSeq
.isInvalidChar(context
.ch
)) {
300 context
.SetState(SCE_JSON_ERROR
);
304 if (context
.ch
== '"') {
305 context
.SetState(stringStyleBefore
);
306 context
.ForwardSetState(SCE_C_DEFAULT
);
307 } else if (context
.ch
== '\\') {
308 if (!escapeSeq
.newSequence(context
.chNext
)) {
309 context
.SetState(SCE_JSON_ERROR
);
313 context
.SetState(stringStyleBefore
);
314 if (context
.atLineEnd
) {
315 context
.ChangeState(SCE_JSON_STRINGEOL
);
319 case SCE_JSON_PROPERTYNAME
:
320 case SCE_JSON_STRING
:
321 if (context
.ch
== '"') {
322 if (compactIRI
.shouldHighlight()) {
323 context
.ChangeState(SCE_JSON_COMPACTIRI
);
324 context
.ForwardSetState(SCE_JSON_DEFAULT
);
325 compactIRI
.resetState();
327 context
.ForwardSetState(SCE_JSON_DEFAULT
);
329 } else if (context
.atLineEnd
) {
330 context
.ChangeState(SCE_JSON_STRINGEOL
);
331 } else if (context
.ch
== '\\') {
332 stringStyleBefore
= context
.state
;
333 if (options
.escapeSequence
) {
334 context
.SetState(SCE_JSON_ESCAPESEQUENCE
);
335 if (!escapeSeq
.newSequence(context
.chNext
)) {
336 context
.SetState(SCE_JSON_ERROR
);
340 } else if (context
.Match("https://") ||
341 context
.Match("http://") ||
342 context
.Match("ssh://") ||
343 context
.Match("git://") ||
344 context
.Match("svn://") ||
345 context
.Match("ftp://") ||
346 context
.Match("mailto:")) {
347 // Handle most common URI schemes only
348 stringStyleBefore
= context
.state
;
349 context
.SetState(SCE_JSON_URI
);
350 } else if (context
.ch
== '@') {
351 // https://www.w3.org/TR/json-ld/#dfn-keyword
352 if (IsNextWordInList(keywordsJSONLD
, setKeywordJSONLD
, context
, styler
)) {
353 stringStyleBefore
= context
.state
;
354 context
.SetState(SCE_JSON_LDKEYWORD
);
357 compactIRI
.checkChar(context
.ch
);
360 case SCE_JSON_LDKEYWORD
:
362 if ((!setKeywordJSONLD
.Contains(context
.ch
) &&
363 (context
.state
== SCE_JSON_LDKEYWORD
)) ||
364 (!setURL
.Contains(context
.ch
))) {
365 context
.SetState(stringStyleBefore
);
367 if (context
.ch
== '"') {
368 context
.ForwardSetState(SCE_JSON_DEFAULT
);
369 } else if (context
.atLineEnd
) {
370 context
.ChangeState(SCE_JSON_STRINGEOL
);
373 case SCE_JSON_OPERATOR
:
374 case SCE_JSON_NUMBER
:
375 context
.SetState(SCE_JSON_DEFAULT
);
378 if (context
.atLineEnd
) {
379 context
.SetState(SCE_JSON_DEFAULT
);
382 case SCE_JSON_KEYWORD
:
383 if (!setKeywordJSON
.Contains(context
.ch
)) {
384 context
.SetState(SCE_JSON_DEFAULT
);
388 if (context
.state
== SCE_JSON_DEFAULT
) {
389 if (context
.ch
== '"') {
390 compactIRI
.resetState();
391 context
.SetState(SCE_JSON_STRING
);
392 Sci_Position currPos
= static_cast<Sci_Position
>(context
.currentPos
);
393 if (AtPropertyName(styler
, currPos
)) {
394 context
.SetState(SCE_JSON_PROPERTYNAME
);
396 } else if (setOperators
.Contains(context
.ch
)) {
397 context
.SetState(SCE_JSON_OPERATOR
);
398 } else if (options
.allowComments
&& context
.Match("/*")) {
399 context
.SetState(SCE_JSON_BLOCKCOMMENT
);
401 } else if (options
.allowComments
&& context
.Match("//")) {
402 context
.SetState(SCE_JSON_LINECOMMENT
);
403 } else if (setKeywordJSON
.Contains(context
.ch
)) {
404 if (IsNextWordInList(keywordsJSON
, setKeywordJSON
, context
, styler
)) {
405 context
.SetState(SCE_JSON_KEYWORD
);
409 IsADigit(context
.ch
) && (context
.chPrev
== '+'||
410 context
.chPrev
== '-' ||
411 context
.atLineStart
||
412 IsASpace(context
.chPrev
) ||
413 setOperators
.Contains(context
.chPrev
));
415 tolower(context
.ch
) == 'e' &&
416 IsADigit(context
.chPrev
) &&
417 (IsADigit(context
.chNext
) ||
418 context
.chNext
== '+' ||
419 context
.chNext
== '-');
421 (context
.ch
== '-' || context
.ch
== '+') &&
422 ((tolower(context
.chPrev
) == 'e' && IsADigit(context
.chNext
)) ||
423 ((IsASpace(context
.chPrev
) || setOperators
.Contains(context
.chPrev
))
424 && IsADigit(context
.chNext
)));
426 IsADigit(context
.ch
) && IsADigit(context
.chPrev
);
427 bool afterExponent
= IsADigit(context
.ch
) && tolower(context
.chPrev
) == 'e';
428 bool dotPart
= context
.ch
== '.' &&
429 IsADigit(context
.chPrev
) &&
430 IsADigit(context
.chNext
);
431 bool afterDot
= IsADigit(context
.ch
) && context
.chPrev
== '.';
439 context
.SetState(SCE_JSON_NUMBER
);
440 } else if (context
.state
== SCE_JSON_DEFAULT
&& !IsASpace(context
.ch
)) {
441 context
.SetState(SCE_JSON_ERROR
);
449 void SCI_METHOD
LexerJSON::Fold(Sci_PositionU startPos
,
452 IDocument
*pAccess
) {
456 LexAccessor
styler(pAccess
);
457 Sci_PositionU currLine
= styler
.GetLine(startPos
);
458 Sci_PositionU endPos
= startPos
+ length
;
459 int currLevel
= SC_FOLDLEVELBASE
;
461 currLevel
= styler
.LevelAt(currLine
- 1) >> 16;
462 int nextLevel
= currLevel
;
463 int visibleChars
= 0;
464 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
465 char curr
= styler
.SafeGetCharAt(i
);
466 char next
= styler
.SafeGetCharAt(i
+1);
467 bool atEOL
= (curr
== '\r' && next
!= '\n') || (curr
== '\n');
468 if (styler
.StyleAt(i
) == SCE_JSON_OPERATOR
) {
469 if (curr
== '{' || curr
== '[') {
471 } else if (curr
== '}' || curr
== ']') {
475 if (atEOL
|| i
== (endPos
-1)) {
476 int level
= currLevel
| nextLevel
<< 16;
477 if (!visibleChars
&& options
.foldCompact
) {
478 level
|= SC_FOLDLEVELWHITEFLAG
;
479 } else if (nextLevel
> currLevel
) {
480 level
|= SC_FOLDLEVELHEADERFLAG
;
482 if (level
!= styler
.LevelAt(currLine
)) {
483 styler
.SetLevel(currLine
, level
);
486 currLevel
= nextLevel
;
489 if (!isspacechar(curr
)) {
495 LexerModule
lmJSON(SCLEX_JSON
,
496 LexerJSON::LexerFactoryJSON
,