2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
59 unsigned long lineNumber
;
69 static langType Lang_json
;
71 static kindOption JsonKinds
[] = {
72 { TRUE
, 'o', "member", "objects" },
73 { TRUE
, 'a', "member", "arrays" },
74 { TRUE
, 'n', "member", "numbers" },
75 { TRUE
, 's', "member", "strings" },
76 { TRUE
, 'b', "member", "booleans" },
77 { TRUE
, 'z', "member", "nulls" }
80 static tokenInfo
*newToken (void)
82 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
84 token
->type
= TOKEN_UNDEFINED
;
85 token
->scopeKind
= TAG_NONE
;
86 token
->string
= vStringNew ();
87 token
->scope
= vStringNew ();
88 token
->lineNumber
= getSourceLineNumber ();
89 token
->filePosition
= getInputFilePosition ();
94 static void deleteToken (tokenInfo
*const token
)
96 vStringDelete (token
->string
);
97 vStringDelete (token
->scope
);
101 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
103 dest
->type
= src
->type
;
104 dest
->scopeKind
= src
->scopeKind
;
105 vStringCopy (dest
->string
, src
->string
);
106 vStringCopy (dest
->scope
, src
->scope
);
107 dest
->lineNumber
= src
->lineNumber
;
108 dest
->filePosition
= src
->filePosition
;
111 static void makeJsonTag (tokenInfo
*const token
, const jsonKind kind
)
115 if (! JsonKinds
[kind
].enabled
)
118 initTagEntry (&e
, vStringValue (token
->string
));
120 e
.lineNumber
= token
->lineNumber
;
121 e
.filePosition
= token
->filePosition
;
122 e
.kindName
= JsonKinds
[kind
].name
;
123 e
.kind
= JsonKinds
[kind
].letter
;
125 if (vStringLength (token
->scope
) > 0)
127 Assert (token
->scopeKind
> TAG_NONE
&& token
->scopeKind
< TAG_COUNT
);
129 e
.extensionFields
.scope
[0] = JsonKinds
[token
->scopeKind
].name
;
130 e
.extensionFields
.scope
[1] = vStringValue (token
->scope
);
136 static boolean
isIdentChar (int c
)
138 return (isalnum (c
) || c
== '+' || c
== '-' || c
== '.');
141 static void readTokenFull (tokenInfo
*const token
,
142 boolean includeStringRepr
)
146 token
->type
= TOKEN_UNDEFINED
;
147 vStringClear (token
->string
);
151 while (c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n');
153 token
->lineNumber
= getSourceLineNumber ();
154 token
->filePosition
= getInputFilePosition ();
158 case EOF
: token
->type
= TOKEN_EOF
; break;
159 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
160 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
161 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
162 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
163 case ':': token
->type
= TOKEN_COLON
; break;
164 case ',': token
->type
= TOKEN_COMMA
; break;
168 boolean escaped
= FALSE
;
169 token
->type
= TOKEN_STRING
;
173 /* we don't handle unicode escapes but they are safe */
178 else if (c
>= 0x00 && c
<= 0x1F)
179 break; /* break on invalid, unescaped, control characters */
180 else if (c
== '"' || c
== EOF
)
182 if (includeStringRepr
)
183 vStringPut (token
->string
, c
);
185 vStringTerminate (token
->string
);
190 if (! isIdentChar (c
))
191 token
->type
= TOKEN_UNDEFINED
;
196 vStringPut (token
->string
, c
);
199 while (c
!= EOF
&& isIdentChar (c
));
200 vStringTerminate (token
->string
);
202 switch (lookupKeyword (vStringValue (token
->string
), Lang_json
))
204 case KEYWORD_true
: token
->type
= TOKEN_TRUE
; break;
205 case KEYWORD_false
: token
->type
= TOKEN_FALSE
; break;
206 case KEYWORD_null
: token
->type
= TOKEN_NULL
; break;
207 default: token
->type
= TOKEN_NUMBER
; break;
214 #define readToken(t) (readTokenFull ((t), FALSE))
216 static void pushScope (tokenInfo
*const token
,
217 const tokenInfo
*const parent
,
218 const jsonKind parentKind
)
220 if (vStringLength (token
->scope
) > 0)
221 vStringPut (token
->scope
, '.');
222 vStringCat (token
->scope
, parent
->string
);
223 vStringTerminate (token
->scope
);
224 token
->scopeKind
= parentKind
;
227 static void popScope (tokenInfo
*const token
,
228 const tokenInfo
*const parent
)
230 vStringTruncate (token
->scope
, vStringLength (parent
->scope
));
231 token
->scopeKind
= parent
->scopeKind
;
234 #define skipToOneOf2(token, type1, type2) \
235 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
237 #define skipTo(token, type) \
238 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
240 static void skipToOneOf3 (tokenInfo
*const token
,
241 const tokenType type1
,
242 const tokenType type2
,
243 const tokenType type3
)
245 while (token
->type
!= TOKEN_EOF
&&
246 token
->type
!= type1
&&
247 token
->type
!= type2
&&
248 token
->type
!= type3
)
251 if (token
->type
== TOKEN_OPEN_CURLY
)
253 skipTo (token
, TOKEN_CLOSE_CURLY
);
256 else if (token
->type
== TOKEN_OPEN_SQUARE
)
258 skipTo (token
, TOKEN_CLOSE_SQUARE
);
264 static jsonKind
tokenToKind (const tokenType type
)
268 case TOKEN_OPEN_CURLY
: return TAG_OBJECT
;
269 case TOKEN_OPEN_SQUARE
: return TAG_ARRAY
;
270 case TOKEN_STRING
: return TAG_STRING
;
272 case TOKEN_FALSE
: return TAG_BOOLEAN
;
273 case TOKEN_NUMBER
: return TAG_NUMBER
;
274 default: return TAG_NULL
;
278 static void parseValue (tokenInfo
*const token
)
280 if (token
->type
== TOKEN_OPEN_CURLY
)
282 tokenInfo
*name
= newToken ();
286 readTokenFull (token
, TRUE
);
287 if (token
->type
== TOKEN_STRING
)
289 jsonKind tagKind
= TAG_NULL
; /* default in case of invalid value */
291 copyToken (name
, token
);
293 /* skip any possible garbage before the value */
294 skipToOneOf3 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COLON
, TOKEN_COMMA
);
296 if (token
->type
== TOKEN_COLON
)
299 tagKind
= tokenToKind (token
->type
);
301 pushScope (token
, name
, tagKind
);
303 popScope (token
, name
);
306 makeJsonTag (name
, tagKind
);
308 /* skip to the end of the construct */
309 skipToOneOf2 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COMMA
);
311 while (token
->type
!= TOKEN_EOF
&&
312 token
->type
!= TOKEN_CLOSE_CURLY
);
314 if (token
->type
== TOKEN_CLOSE_CURLY
)
319 else if (token
->type
== TOKEN_OPEN_SQUARE
)
321 tokenInfo
*name
= newToken ();
323 unsigned int nth
= 0;
326 while (token
->type
!= TOKEN_EOF
&&
327 token
->type
!= TOKEN_CLOSE_SQUARE
)
331 tagKind
= tokenToKind (token
->type
);
333 copyToken (name
, token
);
334 snprintf (buf
, sizeof buf
, "%u", nth
++);
335 vStringCopyS (name
->string
, buf
);
337 makeJsonTag (name
, tagKind
);
338 pushScope (token
, name
, tagKind
);
340 popScope (token
, name
);
342 /* skip to the end of the construct */
343 skipToOneOf2 (token
, TOKEN_CLOSE_SQUARE
, TOKEN_COMMA
);
344 if (token
->type
!= TOKEN_CLOSE_SQUARE
)
348 if (token
->type
== TOKEN_CLOSE_SQUARE
)
355 static void findJsonTags (void)
357 tokenInfo
*const token
= newToken ();
359 /* We allow multiple top-level elements, although it's not actually valid
360 * JSON. An interesting side effect of this is that we allow a leading
361 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
367 while (token
->type
!= TOKEN_EOF
);
372 static void initialize (const langType language
)
374 Lang_json
= language
;
375 addKeyword ("true", language
, KEYWORD_true
);
376 addKeyword ("false", language
, KEYWORD_false
);
377 addKeyword ("null", language
, KEYWORD_null
);
380 /* Create parser definition structure */
381 extern parserDefinition
* JsonParser (void)
383 static const char *const extensions
[] = { "json", NULL
};
384 parserDefinition
*const def
= parserNew ("JSON");
385 def
->extensions
= extensions
;
386 def
->kinds
= JsonKinds
;
387 def
->kindCount
= KIND_COUNT (JsonKinds
);
388 def
->parser
= findJsonTags
;
389 def
->initialize
= initialize
;