Merge pull request #482 from philippwiesemann/fix-typos-po-de
[geany-mirror.git] / tagmanager / ctags / json.c
blob8c87476d38a1b30fe641359f28e7ebb1e0b60799
1 /*
2 * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
6 */
7 /*
8 * This module contains functions for generating tags for JSON files.
10 * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
12 * This implementation is forgiving and allows many constructs that are not
13 * actually valid but that don't conflict with the format. This is intend to
14 * better support partly broken or unfinished files.
17 #include "general.h"
19 #include <string.h>
20 #include "main.h"
21 #include "entry.h"
22 #include "keyword.h"
23 #include "parse.h"
24 #include "read.h"
25 #include "vstring.h"
27 typedef enum {
28 TOKEN_EOF,
29 TOKEN_UNDEFINED,
30 TOKEN_OPEN_SQUARE,
31 TOKEN_CLOSE_SQUARE,
32 TOKEN_OPEN_CURLY,
33 TOKEN_CLOSE_CURLY,
34 TOKEN_COLON,
35 TOKEN_COMMA,
36 TOKEN_TRUE,
37 TOKEN_FALSE,
38 TOKEN_NULL,
39 TOKEN_NUMBER,
40 TOKEN_STRING
41 } tokenType;
43 typedef enum {
44 TAG_NONE = -1,
45 TAG_OBJECT,
46 TAG_ARRAY,
47 TAG_NUMBER,
48 TAG_STRING,
49 TAG_BOOLEAN,
50 TAG_NULL,
51 TAG_COUNT
52 } jsonKind;
54 typedef struct {
55 tokenType type;
56 jsonKind scopeKind;
57 vString *string;
58 vString *scope;
59 unsigned long lineNumber;
60 MIOPos filePosition;
61 } tokenInfo;
63 typedef enum {
64 KEYWORD_true,
65 KEYWORD_false,
66 KEYWORD_null
67 } keywordId;
69 static langType Lang_json;
71 static kindOption JsonKinds [] = {
72 { TRUE, 'o', "member", "objects" },
73 { TRUE, 'a', "member", "arrays" },
74 { TRUE, 'n', "member", "numbers" },
75 { TRUE, 's', "member", "strings" },
76 { TRUE, 'b', "member", "booleans" },
77 { TRUE, 'z', "member", "nulls" }
80 static tokenInfo *newToken (void)
82 tokenInfo *const token = xMalloc (1, tokenInfo);
84 token->type = TOKEN_UNDEFINED;
85 token->scopeKind = TAG_NONE;
86 token->string = vStringNew ();
87 token->scope = vStringNew ();
88 token->lineNumber = getSourceLineNumber ();
89 token->filePosition = getInputFilePosition ();
91 return token;
94 static void deleteToken (tokenInfo *const token)
96 vStringDelete (token->string);
97 vStringDelete (token->scope);
98 eFree (token);
101 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
103 dest->type = src->type;
104 dest->scopeKind = src->scopeKind;
105 vStringCopy (dest->string, src->string);
106 vStringCopy (dest->scope, src->scope);
107 dest->lineNumber = src->lineNumber;
108 dest->filePosition = src->filePosition;
111 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
113 tagEntryInfo e;
115 if (! JsonKinds[kind].enabled)
116 return;
118 initTagEntry (&e, vStringValue (token->string));
120 e.lineNumber = token->lineNumber;
121 e.filePosition = token->filePosition;
122 e.kindName = JsonKinds[kind].name;
123 e.kind = JsonKinds[kind].letter;
125 if (vStringLength (token->scope) > 0)
127 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
129 e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
130 e.extensionFields.scope[1] = vStringValue (token->scope);
133 makeTagEntry (&e);
136 static boolean isIdentChar (int c)
138 return (isalnum (c) || c == '+' || c == '-' || c == '.');
141 static void readTokenFull (tokenInfo *const token,
142 boolean includeStringRepr)
144 int c;
146 token->type = TOKEN_UNDEFINED;
147 vStringClear (token->string);
150 c = fileGetc ();
151 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
153 token->lineNumber = getSourceLineNumber ();
154 token->filePosition = getInputFilePosition ();
156 switch (c)
158 case EOF: token->type = TOKEN_EOF; break;
159 case '[': token->type = TOKEN_OPEN_SQUARE; break;
160 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
161 case '{': token->type = TOKEN_OPEN_CURLY; break;
162 case '}': token->type = TOKEN_CLOSE_CURLY; break;
163 case ':': token->type = TOKEN_COLON; break;
164 case ',': token->type = TOKEN_COMMA; break;
166 case '"':
168 boolean escaped = FALSE;
169 token->type = TOKEN_STRING;
170 while (TRUE)
172 c = fileGetc ();
173 /* we don't handle unicode escapes but they are safe */
174 if (escaped)
175 escaped = FALSE;
176 else if (c == '\\')
177 escaped = TRUE;
178 else if (c >= 0x00 && c <= 0x1F)
179 break; /* break on invalid, unescaped, control characters */
180 else if (c == '"' || c == EOF)
181 break;
182 if (includeStringRepr)
183 vStringPut (token->string, c);
185 vStringTerminate (token->string);
186 break;
189 default:
190 if (! isIdentChar (c))
191 token->type = TOKEN_UNDEFINED;
192 else
196 vStringPut (token->string, c);
197 c = fileGetc ();
199 while (c != EOF && isIdentChar (c));
200 vStringTerminate (token->string);
201 fileUngetc (c);
202 switch (lookupKeyword (vStringValue (token->string), Lang_json))
204 case KEYWORD_true: token->type = TOKEN_TRUE; break;
205 case KEYWORD_false: token->type = TOKEN_FALSE; break;
206 case KEYWORD_null: token->type = TOKEN_NULL; break;
207 default: token->type = TOKEN_NUMBER; break;
210 break;
214 #define readToken(t) (readTokenFull ((t), FALSE))
216 static void pushScope (tokenInfo *const token,
217 const tokenInfo *const parent,
218 const jsonKind parentKind)
220 if (vStringLength (token->scope) > 0)
221 vStringPut (token->scope, '.');
222 vStringCat (token->scope, parent->string);
223 vStringTerminate (token->scope);
224 token->scopeKind = parentKind;
227 static void popScope (tokenInfo *const token,
228 const tokenInfo *const parent)
230 char *dot = strrchr (token->scope->buffer, '.');
232 if (! dot)
233 vStringClear (token->scope);
234 else
236 *dot = 0;
237 token->scope->length = dot - token->scope->buffer;
239 token->scopeKind = parent->scopeKind;
242 #define skipToOneOf2(token, type1, type2) \
243 (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
245 #define skipTo(token, type) \
246 (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
248 static void skipToOneOf3 (tokenInfo *const token,
249 const tokenType type1,
250 const tokenType type2,
251 const tokenType type3)
253 while (token->type != TOKEN_EOF &&
254 token->type != type1 &&
255 token->type != type2 &&
256 token->type != type3)
258 readToken (token);
259 if (token->type == TOKEN_OPEN_CURLY)
261 skipTo (token, TOKEN_CLOSE_CURLY);
262 readToken (token);
264 else if (token->type == TOKEN_OPEN_SQUARE)
266 skipTo (token, TOKEN_CLOSE_SQUARE);
267 readToken (token);
272 static jsonKind tokenToKind (const tokenType type)
274 switch (type)
276 case TOKEN_OPEN_CURLY: return TAG_OBJECT;
277 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
278 case TOKEN_STRING: return TAG_STRING;
279 case TOKEN_TRUE:
280 case TOKEN_FALSE: return TAG_BOOLEAN;
281 case TOKEN_NUMBER: return TAG_NUMBER;
282 default: return TAG_NULL;
286 static void parseValue (tokenInfo *const token)
288 if (token->type == TOKEN_OPEN_CURLY)
290 tokenInfo *name = newToken ();
294 readTokenFull (token, TRUE);
295 if (token->type == TOKEN_STRING)
297 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
299 copyToken (name, token);
301 /* skip any possible garbage before the value */
302 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
304 if (token->type == TOKEN_COLON)
306 readToken (token);
307 tagKind = tokenToKind (token->type);
309 pushScope (token, name, tagKind);
310 parseValue (token);
311 popScope (token, name);
314 makeJsonTag (name, tagKind);
316 /* skip to the end of the construct */
317 skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
319 while (token->type != TOKEN_EOF &&
320 token->type != TOKEN_CLOSE_CURLY);
322 if (token->type == TOKEN_CLOSE_CURLY)
323 readToken (token);
325 deleteToken (name);
327 else if (token->type == TOKEN_OPEN_SQUARE)
329 tokenInfo *name = newToken ();
330 char buf[32];
331 unsigned int nth = 0;
333 readToken (token);
334 while (token->type != TOKEN_EOF &&
335 token->type != TOKEN_CLOSE_SQUARE)
337 jsonKind tagKind;
339 tagKind = tokenToKind (token->type);
341 copyToken (name, token);
342 snprintf (buf, sizeof buf, "%u", nth++);
343 vStringCopyS (name->string, buf);
345 makeJsonTag (name, tagKind);
346 pushScope (token, name, tagKind);
347 parseValue (token);
348 popScope (token, name);
350 /* skip to the end of the construct */
351 skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
352 if (token->type != TOKEN_CLOSE_SQUARE)
353 readToken (token);
356 if (token->type == TOKEN_CLOSE_SQUARE)
357 readToken (token);
359 deleteToken (name);
363 static void findJsonTags (void)
365 tokenInfo *const token = newToken ();
367 /* We allow multiple top-level elements, although it's not actually valid
368 * JSON. An interesting side effect of this is that we allow a leading
369 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
372 readToken (token);
373 parseValue (token);
375 while (token->type != TOKEN_EOF);
377 deleteToken (token);
380 static void initialize (const langType language)
382 Lang_json = language;
383 addKeyword ("true", language, KEYWORD_true);
384 addKeyword ("false", language, KEYWORD_false);
385 addKeyword ("null", language, KEYWORD_null);
388 /* Create parser definition stucture */
389 extern parserDefinition* JsonParser (void)
391 static const char *const extensions [] = { "json", NULL };
392 parserDefinition *const def = parserNew ("JSON");
393 def->extensions = extensions;
394 def->kinds = JsonKinds;
395 def->kindCount = KIND_COUNT (JsonKinds);
396 def->parser = findJsonTags;
397 def->initialize = initialize;
399 return def;