Aggressively cache line height for computing margin sizes
[geany-mirror.git] / ctags / parsers / json.c
blob4020f2e5b59d58622d4cf4aa952ab46b92181dad
1 /*
2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 */
7 /*
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
17 #include "general.h"
19 #include <string.h>
20 #include "debug.h"
21 #include "entry.h"
22 #include "keyword.h"
23 #include "parse.h"
24 #include "read.h"
25 #include "routines.h"
26 #include "vstring.h"
28 typedef enum {
29 TOKEN_EOF,
30 TOKEN_UNDEFINED,
31 TOKEN_OPEN_SQUARE,
32 TOKEN_CLOSE_SQUARE,
33 TOKEN_OPEN_CURLY,
34 TOKEN_CLOSE_CURLY,
35 TOKEN_COLON,
36 TOKEN_COMMA,
37 TOKEN_TRUE,
38 TOKEN_FALSE,
39 TOKEN_NULL,
40 TOKEN_NUMBER,
41 TOKEN_STRING
42 } tokenType;
44 typedef enum {
45 TAG_NONE = -1,
46 TAG_OBJECT,
47 TAG_ARRAY,
48 TAG_NUMBER,
49 TAG_STRING,
50 TAG_BOOLEAN,
51 TAG_NULL,
52 TAG_COUNT
53 } jsonKind;
55 typedef struct {
56 tokenType type;
57 jsonKind scopeKind;
58 vString *string;
59 vString *scope;
60 unsigned long lineNumber;
61 MIOPos filePosition;
62 } tokenInfo;
64 typedef enum {
65 KEYWORD_true,
66 KEYWORD_false,
67 KEYWORD_null
68 } keywordId;
70 static langType Lang_json;
72 static kindDefinition JsonKinds [] = {
73 { true, 'o', "object", "objects" },
74 { true, 'a', "array", "arrays" },
75 { true, 'n', "number", "numbers" },
76 { true, 's', "string", "strings" },
77 { true, 'b', "boolean", "booleans" },
78 { true, 'z', "null", "nulls" }
81 static tokenInfo *newToken (void)
83 tokenInfo *const token = xMalloc (1, tokenInfo);
85 token->type = TOKEN_UNDEFINED;
86 token->scopeKind = TAG_NONE;
87 token->string = vStringNew ();
88 token->scope = vStringNew ();
89 token->lineNumber = getInputLineNumber ();
90 token->filePosition = getInputFilePosition ();
92 return token;
95 static void deleteToken (tokenInfo *const token)
97 vStringDelete (token->string);
98 vStringDelete (token->scope);
99 eFree (token);
102 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
104 dest->type = src->type;
105 dest->scopeKind = src->scopeKind;
106 vStringCopy (dest->string, src->string);
107 vStringCopy (dest->scope, src->scope);
108 dest->lineNumber = src->lineNumber;
109 dest->filePosition = src->filePosition;
112 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
114 tagEntryInfo e;
116 if (! JsonKinds[kind].enabled)
117 return;
119 initTagEntry (&e, vStringValue (token->string), kind);
121 e.lineNumber = token->lineNumber;
122 e.filePosition = token->filePosition;
124 if (vStringLength (token->scope) > 0)
126 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
128 e.extensionFields.scopeKindIndex = token->scopeKind;
129 e.extensionFields.scopeName = vStringValue (token->scope);
132 makeTagEntry (&e);
135 static bool isIdentChar (int c)
137 return (isalnum (c) || c == '+' || c == '-' || c == '.');
140 static void readTokenFull (tokenInfo *const token,
141 bool includeStringRepr)
143 int c;
145 token->type = TOKEN_UNDEFINED;
146 vStringClear (token->string);
149 c = getcFromInputFile ();
150 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
152 token->lineNumber = getInputLineNumber ();
153 token->filePosition = getInputFilePosition ();
155 switch (c)
157 case EOF: token->type = TOKEN_EOF; break;
158 case '[': token->type = TOKEN_OPEN_SQUARE; break;
159 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
160 case '{': token->type = TOKEN_OPEN_CURLY; break;
161 case '}': token->type = TOKEN_CLOSE_CURLY; break;
162 case ':': token->type = TOKEN_COLON; break;
163 case ',': token->type = TOKEN_COMMA; break;
165 case '"':
167 bool escaped = false;
168 token->type = TOKEN_STRING;
169 while (true)
171 c = getcFromInputFile ();
172 /* we don't handle unicode escapes but they are safe */
173 if (escaped)
174 escaped = false;
175 else if (c == '\\')
176 escaped = true;
177 else if (c >= 0x00 && c <= 0x1F)
178 break; /* break on invalid, unescaped, control characters */
179 else if (c == '"' || c == EOF)
180 break;
181 if (includeStringRepr)
182 vStringPut (token->string, c);
184 break;
187 default:
188 if (! isIdentChar (c))
189 token->type = TOKEN_UNDEFINED;
190 else
194 vStringPut (token->string, c);
195 c = getcFromInputFile ();
197 while (c != EOF && isIdentChar (c));
198 ungetcToInputFile (c);
199 switch (lookupKeyword (vStringValue (token->string), Lang_json))
201 case KEYWORD_true: token->type = TOKEN_TRUE; break;
202 case KEYWORD_false: token->type = TOKEN_FALSE; break;
203 case KEYWORD_null: token->type = TOKEN_NULL; break;
204 default: token->type = TOKEN_NUMBER; break;
207 break;
211 #define readToken(t) (readTokenFull ((t), false))
213 static void pushScope (tokenInfo *const token,
214 const tokenInfo *const parent,
215 const jsonKind parentKind)
217 if (vStringLength (token->scope) > 0)
218 vStringPut (token->scope, '.');
219 vStringCat (token->scope, parent->string);
220 token->scopeKind = parentKind;
223 static void popScope (tokenInfo *const token,
224 const tokenInfo *const parent)
226 vStringTruncate (token->scope, vStringLength (parent->scope));
227 token->scopeKind = parent->scopeKind;
230 #define skipToOneOf2(token, type1, type2) \
231 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
233 #define skipTo(token, type) \
234 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
236 static void skipToOneOf3 (tokenInfo *const token,
237 const tokenType type1,
238 const tokenType type2,
239 const tokenType type3)
241 while (token->type != TOKEN_EOF &&
242 token->type != type1 &&
243 token->type != type2 &&
244 token->type != type3)
246 readToken (token);
247 if (token->type == TOKEN_OPEN_CURLY)
249 skipTo (token, TOKEN_CLOSE_CURLY);
250 readToken (token);
252 else if (token->type == TOKEN_OPEN_SQUARE)
254 skipTo (token, TOKEN_CLOSE_SQUARE);
255 readToken (token);
260 static jsonKind tokenToKind (const tokenType type)
262 switch (type)
264 case TOKEN_OPEN_CURLY: return TAG_OBJECT;
265 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
266 case TOKEN_STRING: return TAG_STRING;
267 case TOKEN_TRUE:
268 case TOKEN_FALSE: return TAG_BOOLEAN;
269 case TOKEN_NUMBER: return TAG_NUMBER;
270 default: return TAG_NULL;
274 static void parseValue (tokenInfo *const token)
276 if (token->type == TOKEN_OPEN_CURLY)
278 tokenInfo *name = newToken ();
282 readTokenFull (token, true);
283 if (token->type == TOKEN_STRING)
285 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
287 copyToken (name, token);
289 /* skip any possible garbage before the value */
290 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
292 if (token->type == TOKEN_COLON)
294 readToken (token);
295 tagKind = tokenToKind (token->type);
297 pushScope (token, name, tagKind);
298 parseValue (token);
299 popScope (token, name);
302 makeJsonTag (name, tagKind);
304 /* skip to the end of the construct */
305 skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
307 while (token->type != TOKEN_EOF &&
308 token->type != TOKEN_CLOSE_CURLY);
310 if (token->type == TOKEN_CLOSE_CURLY)
311 readToken (token);
313 deleteToken (name);
315 else if (token->type == TOKEN_OPEN_SQUARE)
317 tokenInfo *name = newToken ();
318 char buf[32];
319 unsigned int nth = 0;
321 readToken (token);
322 while (token->type != TOKEN_EOF &&
323 token->type != TOKEN_CLOSE_SQUARE)
325 jsonKind tagKind;
327 tagKind = tokenToKind (token->type);
329 copyToken (name, token);
330 snprintf (buf, sizeof buf, "%u", nth++);
331 vStringCopyS (name->string, buf);
333 makeJsonTag (name, tagKind);
334 pushScope (token, name, tagKind);
335 parseValue (token);
336 popScope (token, name);
338 /* skip to the end of the construct */
339 skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
340 if (token->type != TOKEN_CLOSE_SQUARE)
341 readToken (token);
344 if (token->type == TOKEN_CLOSE_SQUARE)
345 readToken (token);
347 deleteToken (name);
351 static void findJsonTags (void)
353 tokenInfo *const token = newToken ();
355 /* We allow multiple top-level elements, although it's not actually valid
356 * JSON. An interesting side effect of this is that we allow a leading
357 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
360 readToken (token);
361 parseValue (token);
363 while (token->type != TOKEN_EOF);
365 deleteToken (token);
368 static void initialize (const langType language)
370 Lang_json = language;
371 addKeyword ("true", language, KEYWORD_true);
372 addKeyword ("false", language, KEYWORD_false);
373 addKeyword ("null", language, KEYWORD_null);
376 /* Create parser definition structure */
377 extern parserDefinition* JsonParser (void)
379 static const char *const extensions [] = { "json", NULL };
380 parserDefinition *const def = parserNew ("JSON");
381 def->extensions = extensions;
382 def->kindTable = JsonKinds;
383 def->kindCount = ARRAY_SIZE (JsonKinds);
384 def->parser = findJsonTags;
385 def->initialize = initialize;
387 return def;