Make parser includes closer to uctags and sync parser license header
[geany-mirror.git] / ctags / parsers / json.c
blob6471ea774ebbbea33662d4fe0bc60fb530565081
1 /*
2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 */
7 /*
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
17 #include "general.h"
19 #include <string.h>
20 #include "debug.h"
21 #include "main.h"
22 #include "entry.h"
23 #include "keyword.h"
24 #include "parse.h"
25 #include "read.h"
26 #include "routines.h"
27 #include "vstring.h"
29 typedef enum {
30 TOKEN_EOF,
31 TOKEN_UNDEFINED,
32 TOKEN_OPEN_SQUARE,
33 TOKEN_CLOSE_SQUARE,
34 TOKEN_OPEN_CURLY,
35 TOKEN_CLOSE_CURLY,
36 TOKEN_COLON,
37 TOKEN_COMMA,
38 TOKEN_TRUE,
39 TOKEN_FALSE,
40 TOKEN_NULL,
41 TOKEN_NUMBER,
42 TOKEN_STRING
43 } tokenType;
45 typedef enum {
46 TAG_NONE = -1,
47 TAG_OBJECT,
48 TAG_ARRAY,
49 TAG_NUMBER,
50 TAG_STRING,
51 TAG_BOOLEAN,
52 TAG_NULL,
53 TAG_COUNT
54 } jsonKind;
56 typedef struct {
57 tokenType type;
58 jsonKind scopeKind;
59 vString *string;
60 vString *scope;
61 unsigned long lineNumber;
62 MIOPos filePosition;
63 } tokenInfo;
65 typedef enum {
66 KEYWORD_true,
67 KEYWORD_false,
68 KEYWORD_null
69 } keywordId;
71 static langType Lang_json;
73 static kindOption JsonKinds [] = {
74 { TRUE, 'o', "object", "objects" },
75 { TRUE, 'a', "array", "arrays" },
76 { TRUE, 'n', "number", "numbers" },
77 { TRUE, 's', "string", "strings" },
78 { TRUE, 'b', "boolean", "booleans" },
79 { TRUE, 'z', "null", "nulls" }
82 static tokenInfo *newToken (void)
84 tokenInfo *const token = xMalloc (1, tokenInfo);
86 token->type = TOKEN_UNDEFINED;
87 token->scopeKind = TAG_NONE;
88 token->string = vStringNew ();
89 token->scope = vStringNew ();
90 token->lineNumber = getSourceLineNumber ();
91 token->filePosition = getInputFilePosition ();
93 return token;
96 static void deleteToken (tokenInfo *const token)
98 vStringDelete (token->string);
99 vStringDelete (token->scope);
100 eFree (token);
103 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
105 dest->type = src->type;
106 dest->scopeKind = src->scopeKind;
107 vStringCopy (dest->string, src->string);
108 vStringCopy (dest->scope, src->scope);
109 dest->lineNumber = src->lineNumber;
110 dest->filePosition = src->filePosition;
113 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
115 tagEntryInfo e;
117 if (! JsonKinds[kind].enabled)
118 return;
120 initTagEntry (&e, vStringValue (token->string));
122 e.lineNumber = token->lineNumber;
123 e.filePosition = token->filePosition;
124 e.kindName = JsonKinds[kind].name;
125 e.kind = JsonKinds[kind].letter;
127 if (vStringLength (token->scope) > 0)
129 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
131 e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
132 e.extensionFields.scope[1] = vStringValue (token->scope);
135 makeTagEntry (&e);
138 static boolean isIdentChar (int c)
140 return (isalnum (c) || c == '+' || c == '-' || c == '.');
143 static void readTokenFull (tokenInfo *const token,
144 boolean includeStringRepr)
146 int c;
148 token->type = TOKEN_UNDEFINED;
149 vStringClear (token->string);
152 c = fileGetc ();
153 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
155 token->lineNumber = getSourceLineNumber ();
156 token->filePosition = getInputFilePosition ();
158 switch (c)
160 case EOF: token->type = TOKEN_EOF; break;
161 case '[': token->type = TOKEN_OPEN_SQUARE; break;
162 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
163 case '{': token->type = TOKEN_OPEN_CURLY; break;
164 case '}': token->type = TOKEN_CLOSE_CURLY; break;
165 case ':': token->type = TOKEN_COLON; break;
166 case ',': token->type = TOKEN_COMMA; break;
168 case '"':
170 boolean escaped = FALSE;
171 token->type = TOKEN_STRING;
172 while (TRUE)
174 c = fileGetc ();
175 /* we don't handle unicode escapes but they are safe */
176 if (escaped)
177 escaped = FALSE;
178 else if (c == '\\')
179 escaped = TRUE;
180 else if (c >= 0x00 && c <= 0x1F)
181 break; /* break on invalid, unescaped, control characters */
182 else if (c == '"' || c == EOF)
183 break;
184 if (includeStringRepr)
185 vStringPut (token->string, c);
187 vStringTerminate (token->string);
188 break;
191 default:
192 if (! isIdentChar (c))
193 token->type = TOKEN_UNDEFINED;
194 else
198 vStringPut (token->string, c);
199 c = fileGetc ();
201 while (c != EOF && isIdentChar (c));
202 vStringTerminate (token->string);
203 fileUngetc (c);
204 switch (lookupKeyword (vStringValue (token->string), Lang_json))
206 case KEYWORD_true: token->type = TOKEN_TRUE; break;
207 case KEYWORD_false: token->type = TOKEN_FALSE; break;
208 case KEYWORD_null: token->type = TOKEN_NULL; break;
209 default: token->type = TOKEN_NUMBER; break;
212 break;
216 #define readToken(t) (readTokenFull ((t), FALSE))
218 static void pushScope (tokenInfo *const token,
219 const tokenInfo *const parent,
220 const jsonKind parentKind)
222 if (vStringLength (token->scope) > 0)
223 vStringPut (token->scope, '.');
224 vStringCat (token->scope, parent->string);
225 vStringTerminate (token->scope);
226 token->scopeKind = parentKind;
229 static void popScope (tokenInfo *const token,
230 const tokenInfo *const parent)
232 vStringTruncate (token->scope, vStringLength (parent->scope));
233 token->scopeKind = parent->scopeKind;
236 #define skipToOneOf2(token, type1, type2) \
237 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
239 #define skipTo(token, type) \
240 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
242 static void skipToOneOf3 (tokenInfo *const token,
243 const tokenType type1,
244 const tokenType type2,
245 const tokenType type3)
247 while (token->type != TOKEN_EOF &&
248 token->type != type1 &&
249 token->type != type2 &&
250 token->type != type3)
252 readToken (token);
253 if (token->type == TOKEN_OPEN_CURLY)
255 skipTo (token, TOKEN_CLOSE_CURLY);
256 readToken (token);
258 else if (token->type == TOKEN_OPEN_SQUARE)
260 skipTo (token, TOKEN_CLOSE_SQUARE);
261 readToken (token);
266 static jsonKind tokenToKind (const tokenType type)
268 switch (type)
270 case TOKEN_OPEN_CURLY: return TAG_OBJECT;
271 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
272 case TOKEN_STRING: return TAG_STRING;
273 case TOKEN_TRUE:
274 case TOKEN_FALSE: return TAG_BOOLEAN;
275 case TOKEN_NUMBER: return TAG_NUMBER;
276 default: return TAG_NULL;
280 static void parseValue (tokenInfo *const token)
282 if (token->type == TOKEN_OPEN_CURLY)
284 tokenInfo *name = newToken ();
288 readTokenFull (token, TRUE);
289 if (token->type == TOKEN_STRING)
291 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
293 copyToken (name, token);
295 /* skip any possible garbage before the value */
296 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
298 if (token->type == TOKEN_COLON)
300 readToken (token);
301 tagKind = tokenToKind (token->type);
303 pushScope (token, name, tagKind);
304 parseValue (token);
305 popScope (token, name);
308 makeJsonTag (name, tagKind);
310 /* skip to the end of the construct */
311 skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
313 while (token->type != TOKEN_EOF &&
314 token->type != TOKEN_CLOSE_CURLY);
316 if (token->type == TOKEN_CLOSE_CURLY)
317 readToken (token);
319 deleteToken (name);
321 else if (token->type == TOKEN_OPEN_SQUARE)
323 tokenInfo *name = newToken ();
324 char buf[32];
325 unsigned int nth = 0;
327 readToken (token);
328 while (token->type != TOKEN_EOF &&
329 token->type != TOKEN_CLOSE_SQUARE)
331 jsonKind tagKind;
333 tagKind = tokenToKind (token->type);
335 copyToken (name, token);
336 snprintf (buf, sizeof buf, "%u", nth++);
337 vStringCopyS (name->string, buf);
339 makeJsonTag (name, tagKind);
340 pushScope (token, name, tagKind);
341 parseValue (token);
342 popScope (token, name);
344 /* skip to the end of the construct */
345 skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
346 if (token->type != TOKEN_CLOSE_SQUARE)
347 readToken (token);
350 if (token->type == TOKEN_CLOSE_SQUARE)
351 readToken (token);
353 deleteToken (name);
357 static void findJsonTags (void)
359 tokenInfo *const token = newToken ();
361 /* We allow multiple top-level elements, although it's not actually valid
362 * JSON. An interesting side effect of this is that we allow a leading
363 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
366 readToken (token);
367 parseValue (token);
369 while (token->type != TOKEN_EOF);
371 deleteToken (token);
374 static void initialize (const langType language)
376 Lang_json = language;
377 addKeyword ("true", language, KEYWORD_true);
378 addKeyword ("false", language, KEYWORD_false);
379 addKeyword ("null", language, KEYWORD_null);
382 /* Create parser definition structure */
383 extern parserDefinition* JsonParser (void)
385 static const char *const extensions [] = { "json", NULL };
386 parserDefinition *const def = parserNew ("JSON");
387 def->extensions = extensions;
388 def->kinds = JsonKinds;
389 def->kindCount = KIND_COUNT (JsonKinds);
390 def->parser = findJsonTags;
391 def->initialize = initialize;
393 return def;