2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
59 unsigned long lineNumber
;
69 static langType Lang_json
;
71 static kindOption JsonKinds
[] = {
72 { TRUE
, 'o', "member", "objects" },
73 { TRUE
, 'a', "member", "arrays" },
74 { TRUE
, 'n', "member", "numbers" },
75 { TRUE
, 's', "member", "strings" },
76 { TRUE
, 'b', "member", "booleans" },
77 { TRUE
, 'z', "member", "nulls" }
80 static tokenInfo
*newToken (void)
82 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
84 token
->type
= TOKEN_UNDEFINED
;
85 token
->scopeKind
= TAG_NONE
;
86 token
->string
= vStringNew ();
87 token
->scope
= vStringNew ();
88 token
->lineNumber
= getSourceLineNumber ();
89 token
->filePosition
= getInputFilePosition ();
94 static void deleteToken (tokenInfo
*const token
)
96 vStringDelete (token
->string
);
97 vStringDelete (token
->scope
);
101 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
103 dest
->type
= src
->type
;
104 dest
->scopeKind
= src
->scopeKind
;
105 vStringCopy (dest
->string
, src
->string
);
106 vStringCopy (dest
->scope
, src
->scope
);
107 dest
->lineNumber
= src
->lineNumber
;
108 dest
->filePosition
= src
->filePosition
;
111 static void makeJsonTag (tokenInfo
*const token
, const jsonKind kind
)
115 if (! JsonKinds
[kind
].enabled
)
118 initTagEntry (&e
, vStringValue (token
->string
));
120 e
.lineNumber
= token
->lineNumber
;
121 e
.filePosition
= token
->filePosition
;
122 e
.kindName
= JsonKinds
[kind
].name
;
123 e
.kind
= JsonKinds
[kind
].letter
;
125 if (vStringLength (token
->scope
) > 0)
127 Assert (token
->scopeKind
> TAG_NONE
&& token
->scopeKind
< TAG_COUNT
);
129 e
.extensionFields
.scope
[0] = JsonKinds
[token
->scopeKind
].name
;
130 e
.extensionFields
.scope
[1] = vStringValue (token
->scope
);
136 static boolean
isIdentChar (int c
)
138 return (isalnum (c
) || c
== '+' || c
== '-' || c
== '.');
141 static void readTokenFull (tokenInfo
*const token
,
142 boolean includeStringRepr
)
146 token
->type
= TOKEN_UNDEFINED
;
147 vStringClear (token
->string
);
151 while (c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n');
153 token
->lineNumber
= getSourceLineNumber ();
154 token
->filePosition
= getInputFilePosition ();
158 case EOF
: token
->type
= TOKEN_EOF
; break;
159 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
160 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
161 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
162 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
163 case ':': token
->type
= TOKEN_COLON
; break;
164 case ',': token
->type
= TOKEN_COMMA
; break;
168 boolean escaped
= FALSE
;
169 token
->type
= TOKEN_STRING
;
173 /* we don't handle unicode escapes but they are safe */
178 else if (c
>= 0x00 && c
<= 0x1F)
179 break; /* break on invalid, unescaped, control characters */
180 else if (c
== '"' || c
== EOF
)
182 if (includeStringRepr
)
183 vStringPut (token
->string
, c
);
185 vStringTerminate (token
->string
);
190 if (! isIdentChar (c
))
191 token
->type
= TOKEN_UNDEFINED
;
196 vStringPut (token
->string
, c
);
199 while (c
!= EOF
&& isIdentChar (c
));
200 vStringTerminate (token
->string
);
202 switch (lookupKeyword (vStringValue (token
->string
), Lang_json
))
204 case KEYWORD_true
: token
->type
= TOKEN_TRUE
; break;
205 case KEYWORD_false
: token
->type
= TOKEN_FALSE
; break;
206 case KEYWORD_null
: token
->type
= TOKEN_NULL
; break;
207 default: token
->type
= TOKEN_NUMBER
; break;
214 #define readToken(t) (readTokenFull ((t), FALSE))
216 static void pushScope (tokenInfo
*const token
,
217 const tokenInfo
*const parent
,
218 const jsonKind parentKind
)
220 if (vStringLength (token
->scope
) > 0)
221 vStringPut (token
->scope
, '.');
222 vStringCat (token
->scope
, parent
->string
);
223 vStringTerminate (token
->scope
);
224 token
->scopeKind
= parentKind
;
227 static void popScope (tokenInfo
*const token
,
228 const tokenInfo
*const parent
)
230 char *dot
= strrchr (token
->scope
->buffer
, '.');
233 vStringClear (token
->scope
);
237 token
->scope
->length
= dot
- token
->scope
->buffer
;
239 token
->scopeKind
= parent
->scopeKind
;
242 #define skipToOneOf2(token, type1, type2) \
243 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
245 #define skipTo(token, type) \
246 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
248 static void skipToOneOf3 (tokenInfo
*const token
,
249 const tokenType type1
,
250 const tokenType type2
,
251 const tokenType type3
)
253 while (token
->type
!= TOKEN_EOF
&&
254 token
->type
!= type1
&&
255 token
->type
!= type2
&&
256 token
->type
!= type3
)
259 if (token
->type
== TOKEN_OPEN_CURLY
)
261 skipTo (token
, TOKEN_CLOSE_CURLY
);
264 else if (token
->type
== TOKEN_OPEN_SQUARE
)
266 skipTo (token
, TOKEN_CLOSE_SQUARE
);
272 static jsonKind
tokenToKind (const tokenType type
)
276 case TOKEN_OPEN_CURLY
: return TAG_OBJECT
;
277 case TOKEN_OPEN_SQUARE
: return TAG_ARRAY
;
278 case TOKEN_STRING
: return TAG_STRING
;
280 case TOKEN_FALSE
: return TAG_BOOLEAN
;
281 case TOKEN_NUMBER
: return TAG_NUMBER
;
282 default: return TAG_NULL
;
286 static void parseValue (tokenInfo
*const token
)
288 if (token
->type
== TOKEN_OPEN_CURLY
)
290 tokenInfo
*name
= newToken ();
294 readTokenFull (token
, TRUE
);
295 if (token
->type
== TOKEN_STRING
)
297 jsonKind tagKind
= TAG_NULL
; /* default in case of invalid value */
299 copyToken (name
, token
);
301 /* skip any possible garbage before the value */
302 skipToOneOf3 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COLON
, TOKEN_COMMA
);
304 if (token
->type
== TOKEN_COLON
)
307 tagKind
= tokenToKind (token
->type
);
309 pushScope (token
, name
, tagKind
);
311 popScope (token
, name
);
314 makeJsonTag (name
, tagKind
);
316 /* skip to the end of the construct */
317 skipToOneOf2 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COMMA
);
319 while (token
->type
!= TOKEN_EOF
&&
320 token
->type
!= TOKEN_CLOSE_CURLY
);
322 if (token
->type
== TOKEN_CLOSE_CURLY
)
327 else if (token
->type
== TOKEN_OPEN_SQUARE
)
329 tokenInfo
*name
= newToken ();
331 unsigned int nth
= 0;
334 while (token
->type
!= TOKEN_EOF
&&
335 token
->type
!= TOKEN_CLOSE_SQUARE
)
339 tagKind
= tokenToKind (token
->type
);
341 copyToken (name
, token
);
342 snprintf (buf
, sizeof buf
, "%u", nth
++);
343 vStringCopyS (name
->string
, buf
);
345 makeJsonTag (name
, tagKind
);
346 pushScope (token
, name
, tagKind
);
348 popScope (token
, name
);
350 /* skip to the end of the construct */
351 skipToOneOf2 (token
, TOKEN_CLOSE_SQUARE
, TOKEN_COMMA
);
352 if (token
->type
!= TOKEN_CLOSE_SQUARE
)
356 if (token
->type
== TOKEN_CLOSE_SQUARE
)
363 static void findJsonTags (void)
365 tokenInfo
*const token
= newToken ();
367 /* We allow multiple top-level elements, although it's not actually valid
368 * JSON. An interesting side effect of this is that we allow a leading
369 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
375 while (token
->type
!= TOKEN_EOF
);
380 static void initialize (const langType language
)
382 Lang_json
= language
;
383 addKeyword ("true", language
, KEYWORD_true
);
384 addKeyword ("false", language
, KEYWORD_false
);
385 addKeyword ("null", language
, KEYWORD_null
);
388 /* Create parser definition stucture */
389 extern parserDefinition
* JsonParser (void)
391 static const char *const extensions
[] = { "json", NULL
};
392 parserDefinition
*const def
= parserNew ("JSON");
393 def
->extensions
= extensions
;
394 def
->kinds
= JsonKinds
;
395 def
->kindCount
= KIND_COUNT (JsonKinds
);
396 def
->parser
= findJsonTags
;
397 def
->initialize
= initialize
;