2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
60 unsigned long lineNumber
;
70 static langType Lang_json
;
72 static kindDefinition JsonKinds
[] = {
73 { true, 'o', "object", "objects" },
74 { true, 'a', "array", "arrays" },
75 { true, 'n', "number", "numbers" },
76 { true, 's', "string", "strings" },
77 { true, 'b', "boolean", "booleans" },
78 { true, 'z', "null", "nulls" }
81 static tokenInfo
*newToken (void)
83 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
85 token
->type
= TOKEN_UNDEFINED
;
86 token
->scopeKind
= TAG_NONE
;
87 token
->string
= vStringNew ();
88 token
->scope
= vStringNew ();
89 token
->lineNumber
= getInputLineNumber ();
90 token
->filePosition
= getInputFilePosition ();
95 static void deleteToken (tokenInfo
*const token
)
97 vStringDelete (token
->string
);
98 vStringDelete (token
->scope
);
102 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
104 dest
->type
= src
->type
;
105 dest
->scopeKind
= src
->scopeKind
;
106 vStringCopy (dest
->string
, src
->string
);
107 vStringCopy (dest
->scope
, src
->scope
);
108 dest
->lineNumber
= src
->lineNumber
;
109 dest
->filePosition
= src
->filePosition
;
112 static void makeJsonTag (tokenInfo
*const token
, const jsonKind kind
)
116 if (! JsonKinds
[kind
].enabled
)
119 initTagEntry (&e
, vStringValue (token
->string
), kind
);
121 e
.lineNumber
= token
->lineNumber
;
122 e
.filePosition
= token
->filePosition
;
124 if (vStringLength (token
->scope
) > 0)
126 Assert (token
->scopeKind
> TAG_NONE
&& token
->scopeKind
< TAG_COUNT
);
128 e
.extensionFields
.scopeKindIndex
= token
->scopeKind
;
129 e
.extensionFields
.scopeName
= vStringValue (token
->scope
);
135 static bool isIdentChar (int c
)
137 return (isalnum (c
) || c
== '+' || c
== '-' || c
== '.');
140 static void readTokenFull (tokenInfo
*const token
,
141 bool includeStringRepr
)
145 token
->type
= TOKEN_UNDEFINED
;
146 vStringClear (token
->string
);
149 c
= getcFromInputFile ();
150 while (c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n');
152 token
->lineNumber
= getInputLineNumber ();
153 token
->filePosition
= getInputFilePosition ();
157 case EOF
: token
->type
= TOKEN_EOF
; break;
158 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
159 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
160 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
161 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
162 case ':': token
->type
= TOKEN_COLON
; break;
163 case ',': token
->type
= TOKEN_COMMA
; break;
167 bool escaped
= false;
168 token
->type
= TOKEN_STRING
;
171 c
= getcFromInputFile ();
172 /* we don't handle unicode escapes but they are safe */
177 else if (c
>= 0x00 && c
<= 0x1F)
178 break; /* break on invalid, unescaped, control characters */
179 else if (c
== '"' || c
== EOF
)
181 if (includeStringRepr
)
182 vStringPut (token
->string
, c
);
188 if (! isIdentChar (c
))
189 token
->type
= TOKEN_UNDEFINED
;
194 vStringPut (token
->string
, c
);
195 c
= getcFromInputFile ();
197 while (c
!= EOF
&& isIdentChar (c
));
198 ungetcToInputFile (c
);
199 switch (lookupKeyword (vStringValue (token
->string
), Lang_json
))
201 case KEYWORD_true
: token
->type
= TOKEN_TRUE
; break;
202 case KEYWORD_false
: token
->type
= TOKEN_FALSE
; break;
203 case KEYWORD_null
: token
->type
= TOKEN_NULL
; break;
204 default: token
->type
= TOKEN_NUMBER
; break;
211 #define readToken(t) (readTokenFull ((t), false))
213 static void pushScope (tokenInfo
*const token
,
214 const tokenInfo
*const parent
,
215 const jsonKind parentKind
)
217 if (vStringLength (token
->scope
) > 0)
218 vStringPut (token
->scope
, '.');
219 vStringCat (token
->scope
, parent
->string
);
220 token
->scopeKind
= parentKind
;
223 static void popScope (tokenInfo
*const token
,
224 const tokenInfo
*const parent
)
226 vStringTruncate (token
->scope
, vStringLength (parent
->scope
));
227 token
->scopeKind
= parent
->scopeKind
;
230 #define skipToOneOf2(token, type1, type2) \
231 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
233 #define skipTo(token, type) \
234 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
236 static void skipToOneOf3 (tokenInfo
*const token
,
237 const tokenType type1
,
238 const tokenType type2
,
239 const tokenType type3
)
241 while (token
->type
!= TOKEN_EOF
&&
242 token
->type
!= type1
&&
243 token
->type
!= type2
&&
244 token
->type
!= type3
)
247 if (token
->type
== TOKEN_OPEN_CURLY
)
249 skipTo (token
, TOKEN_CLOSE_CURLY
);
252 else if (token
->type
== TOKEN_OPEN_SQUARE
)
254 skipTo (token
, TOKEN_CLOSE_SQUARE
);
260 static jsonKind
tokenToKind (const tokenType type
)
264 case TOKEN_OPEN_CURLY
: return TAG_OBJECT
;
265 case TOKEN_OPEN_SQUARE
: return TAG_ARRAY
;
266 case TOKEN_STRING
: return TAG_STRING
;
268 case TOKEN_FALSE
: return TAG_BOOLEAN
;
269 case TOKEN_NUMBER
: return TAG_NUMBER
;
270 default: return TAG_NULL
;
274 static void parseValue (tokenInfo
*const token
)
276 if (token
->type
== TOKEN_OPEN_CURLY
)
278 tokenInfo
*name
= newToken ();
282 readTokenFull (token
, true);
283 if (token
->type
== TOKEN_STRING
)
285 jsonKind tagKind
= TAG_NULL
; /* default in case of invalid value */
287 copyToken (name
, token
);
289 /* skip any possible garbage before the value */
290 skipToOneOf3 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COLON
, TOKEN_COMMA
);
292 if (token
->type
== TOKEN_COLON
)
295 tagKind
= tokenToKind (token
->type
);
297 pushScope (token
, name
, tagKind
);
299 popScope (token
, name
);
302 makeJsonTag (name
, tagKind
);
304 /* skip to the end of the construct */
305 skipToOneOf2 (token
, TOKEN_CLOSE_CURLY
, TOKEN_COMMA
);
307 while (token
->type
!= TOKEN_EOF
&&
308 token
->type
!= TOKEN_CLOSE_CURLY
);
310 if (token
->type
== TOKEN_CLOSE_CURLY
)
315 else if (token
->type
== TOKEN_OPEN_SQUARE
)
317 tokenInfo
*name
= newToken ();
319 unsigned int nth
= 0;
322 while (token
->type
!= TOKEN_EOF
&&
323 token
->type
!= TOKEN_CLOSE_SQUARE
)
327 tagKind
= tokenToKind (token
->type
);
329 copyToken (name
, token
);
330 snprintf (buf
, sizeof buf
, "%u", nth
++);
331 vStringCopyS (name
->string
, buf
);
333 makeJsonTag (name
, tagKind
);
334 pushScope (token
, name
, tagKind
);
336 popScope (token
, name
);
338 /* skip to the end of the construct */
339 skipToOneOf2 (token
, TOKEN_CLOSE_SQUARE
, TOKEN_COMMA
);
340 if (token
->type
!= TOKEN_CLOSE_SQUARE
)
344 if (token
->type
== TOKEN_CLOSE_SQUARE
)
351 static void findJsonTags (void)
353 tokenInfo
*const token
= newToken ();
355 /* We allow multiple top-level elements, although it's not actually valid
356 * JSON. An interesting side effect of this is that we allow a leading
357 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
363 while (token
->type
!= TOKEN_EOF
);
368 static void initialize (const langType language
)
370 Lang_json
= language
;
371 addKeyword ("true", language
, KEYWORD_true
);
372 addKeyword ("false", language
, KEYWORD_false
);
373 addKeyword ("null", language
, KEYWORD_null
);
376 /* Create parser definition structure */
377 extern parserDefinition
* JsonParser (void)
379 static const char *const extensions
[] = { "json", NULL
};
380 parserDefinition
*const def
= parserNew ("JSON");
381 def
->extensions
= extensions
;
382 def
->kindTable
= JsonKinds
;
383 def
->kindCount
= ARRAY_SIZE (JsonKinds
);
384 def
->parser
= findJsonTags
;
385 def
->initialize
= initialize
;