2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
61 unsigned long lineNumber
;
71 static langType Lang_json
;
73 static kindOption JsonKinds
[] = {
74 { TRUE
, 'o', "object", "objects" },
75 { TRUE
, 'a', "array", "arrays" },
76 { TRUE
, 'n', "number", "numbers" },
77 { TRUE
, 's', "string", "strings" },
78 { TRUE
, 'b', "boolean", "booleans" },
79 { TRUE
, 'z', "null", "nulls" }
82 static tokenInfo
*newToken (void)
84 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
86 token
->type
= TOKEN_UNDEFINED
;
87 token
->scopeKind
= TAG_NONE
;
88 token
->string
= vStringNew ();
89 token
->scope
= vStringNew ();
90 token
->lineNumber
= getSourceLineNumber ();
91 token
->filePosition
= getInputFilePosition ();
96 static void deleteToken (tokenInfo
*const token
)
98 vStringDelete (token
->string
);
99 vStringDelete (token
->scope
);
103 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
105 dest
->type
= src
->type
;
106 dest
->scopeKind
= src
->scopeKind
;
107 vStringCopy (dest
->string
, src
->string
);
108 vStringCopy (dest
->scope
, src
->scope
);
109 dest
->lineNumber
= src
->lineNumber
;
110 dest
->filePosition
= src
->filePosition
;
113 static void makeJsonTag (tokenInfo
*const token
, const jsonKind kind
)
117 if (! JsonKinds
[kind
].enabled
)
120 initTagEntry (&e
, vStringValue (token
->string
));
122 e
.lineNumber
= token
->lineNumber
;
123 e
.filePosition
= token
->filePosition
;
124 e
.kindName
= JsonKinds
[kind
].name
;
125 e
.kind
= JsonKinds
[kind
].letter
;
127 if (vStringLength (token
->scope
) > 0)
129 Assert (token
->scopeKind
> TAG_NONE
&& token
->scopeKind
< TAG_COUNT
);
131 e
.extensionFields
.scope
[0] = JsonKinds
[token
->scopeKind
].name
;
132 e
.extensionFields
.scope
[1] = vStringValue (token
->scope
);
138 static boolean
isIdentChar (int c
)
140 return (isalnum (c
) || c
== '+' || c
== '-' || c
== '.');
143 static void readTokenFull (tokenInfo
*const token
,
144 boolean includeStringRepr
)
148 token
->type
= TOKEN_UNDEFINED
;
149 vStringClear (token
->string
);
153 while (c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n');
155 token
->lineNumber
= getSourceLineNumber ();
156 token
->filePosition
= getInputFilePosition ();
160 case EOF
: token
->type
= TOKEN_EOF
; break;
161 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
162 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
163 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
164 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
165 case ':': token
->type
= TOKEN_COLON
; break;
166 case ',': token
->type
= TOKEN_COMMA
; break;
170 boolean escaped
= FALSE
;
171 token
->type
= TOKEN_STRING
;
175 /* we don't handle unicode escapes but they are safe */
180 else if (c
>= 0x00 && c
<= 0x1F)
181 break; /* break on invalid, unescaped, control characters */
182 else if (c
== '"' || c
== EOF
)
184 if (includeStringRepr
)
185 vStringPut (token
->string
, c
);
187 vStringTerminate (token
->string
);
192 if (! isIdentChar (c
))
193 token
->type
= TOKEN_UNDEFINED
;
198 vStringPut (token
->string
, c
);
201 while (c
!= EOF
&& isIdentChar (c
));
202 vStringTerminate (token
->string
);
204 switch (lookupKeyword (vStringValue (token
->string
), Lang_json
))
206 case KEYWORD_true
: token
->type
= TOKEN_TRUE
; break;
207 case KEYWORD_false
: token
->type
= TOKEN_FALSE
; break;
208 case KEYWORD_null
: token
->type
= TOKEN_NULL
; break;
209 default: token
->type
= TOKEN_NUMBER
; break;
216 #define readToken(t) (readTokenFull ((t), FALSE))
218 static void pushScope (tokenInfo
*const token
,
219 const tokenInfo
*const parent
,
220 const jsonKind parentKind
)
222 if (vStringLength (token
->scope
) > 0)
223 vStringPut (token
->scope
, '.');
224 vStringCat (token
->scope
, parent
->string
);
225 vStringTerminate (token
->scope
);
226 token
->scopeKind
= parentKind
;
229 static void popScope (tokenInfo
*const token
,
230 const tokenInfo
*const parent
)
232 vStringTruncate (token
->scope
, vStringLength (parent
->scope
));
233 token
->scopeKind
= parent
->scopeKind
;
236 #define skipToOneOf2(token, type1, type2) \
237 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
239 #define skipTo(token, type) \
240 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
242 static void skipToOneOf3 (tokenInfo
*const token
,
243 const tokenType type1
,
244 const tokenType type2
,
245 const tokenType type3
)
247 while (token
->type
!= TOKEN_EOF
&&
248 token
->type
!= type1
&&
249 token
->type
!= type2
&&
250 token
->type
!= type3
)
253 if (token
->type
== TOKEN_OPEN_CURLY
)
255 skipTo (token
, TOKEN_CLOSE_CURLY
);
258 else if (token
->type
== TOKEN_OPEN_SQUARE
)
260 skipTo (token
, TOKEN_CLOSE_SQUARE
);
266 static jsonKind
tokenToKind (const tokenType type
)
270 case TOKEN_OPEN_CURLY
: return TAG_OBJECT
;
271 case TOKEN_OPEN_SQUARE
: return TAG_ARRAY
;
272 case TOKEN_STRING
: return TAG_STRING
;
274 case TOKEN_FALSE
: return TAG_BOOLEAN
;
275 case TOKEN_NUMBER
: return TAG_NUMBER
;
276 default: return TAG_NULL
;
280 static void parseValue (tokenInfo
*const token
)
282 if (token
->type
== TOKEN_OPEN_CURLY
)
284 tokenInfo
*name
= newToken ();
288 readTokenFull (token
, TRUE
);
289 if (token
->type
== TOKEN_STRING
)
291 jsonKind tagKind
= TAG_NULL
; /* default in case of invalid value */
293 copyToken (name
, token
);
295 /* skip any possible garbage before the value */
296 skipToOneOf3 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COLON
, TOKEN_COMMA
);
298 if (token
->type
== TOKEN_COLON
)
301 tagKind
= tokenToKind (token
->type
);
303 pushScope (token
, name
, tagKind
);
305 popScope (token
, name
);
308 makeJsonTag (name
, tagKind
);
310 /* skip to the end of the construct */
311 skipToOneOf2 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COMMA
);
313 while (token
->type
!= TOKEN_EOF
&&
314 token
->type
!= TOKEN_CLOSE_CURLY
);
316 if (token
->type
== TOKEN_CLOSE_CURLY
)
321 else if (token
->type
== TOKEN_OPEN_SQUARE
)
323 tokenInfo
*name
= newToken ();
325 unsigned int nth
= 0;
328 while (token
->type
!= TOKEN_EOF
&&
329 token
->type
!= TOKEN_CLOSE_SQUARE
)
333 tagKind
= tokenToKind (token
->type
);
335 copyToken (name
, token
);
336 snprintf (buf
, sizeof buf
, "%u", nth
++);
337 vStringCopyS (name
->string
, buf
);
339 makeJsonTag (name
, tagKind
);
340 pushScope (token
, name
, tagKind
);
342 popScope (token
, name
);
344 /* skip to the end of the construct */
345 skipToOneOf2 (token
, TOKEN_CLOSE_SQUARE
, TOKEN_COMMA
);
346 if (token
->type
!= TOKEN_CLOSE_SQUARE
)
350 if (token
->type
== TOKEN_CLOSE_SQUARE
)
357 static void findJsonTags (void)
359 tokenInfo
*const token
= newToken ();
361 /* We allow multiple top-level elements, although it's not actually valid
362 * JSON. An interesting side effect of this is that we allow a leading
363 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
369 while (token
->type
!= TOKEN_EOF
);
374 static void initialize (const langType language
)
376 Lang_json
= language
;
377 addKeyword ("true", language
, KEYWORD_true
);
378 addKeyword ("false", language
, KEYWORD_false
);
379 addKeyword ("null", language
, KEYWORD_null
);
382 /* Create parser definition structure */
383 extern parserDefinition
* JsonParser (void)
385 static const char *const extensions
[] = { "json", NULL
};
386 parserDefinition
*const def
= parserNew ("JSON");
387 def
->extensions
= extensions
;
388 def
->kinds
= JsonKinds
;
389 def
->kindCount
= KIND_COUNT (JsonKinds
);
390 def
->parser
= findJsonTags
;
391 def
->initialize
= initialize
;