ctags/parsers/json.c

   1 /*
   2  * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
   3  *
   4  * This source code is released for free distribution under the terms of the
   5  * GNU General Public License version 2 or (at your option) any later version.
   6  */
   7 /*
   8  * This module contains functions for generating tags for JSON files.
   9  *
  10  * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
  11  *
  12  * This implementation is forgiving and allows many constructs that are not
  13  * actually valid but that don't conflict with the format.  This is intend to
  14  * better support partly broken or unfinished files.
  15  */
  16
  17 #include "general.h"
  18
  19 #include <string.h>
  20 #include "debug.h"
  21 #include "entry.h"
  22 #include "keyword.h"
  23 #include "parse.h"
  24 #include "read.h"
  25 #include "routines.h"
  26 #include "vstring.h"
  27
  28 typedef enum {
  29         TOKEN_EOF,
  30         TOKEN_UNDEFINED,
  31         TOKEN_OPEN_SQUARE,
  32         TOKEN_CLOSE_SQUARE,
  33         TOKEN_OPEN_CURLY,
  34         TOKEN_CLOSE_CURLY,
  35         TOKEN_COLON,
  36         TOKEN_COMMA,
  37         TOKEN_TRUE,
  38         TOKEN_FALSE,
  39         TOKEN_NULL,
  40         TOKEN_NUMBER,
  41         TOKEN_STRING
  42 } tokenType;
  43
  44 typedef enum {
  45         TAG_NONE = -1,
  46         TAG_OBJECT,
  47         TAG_ARRAY,
  48         TAG_NUMBER,
  49         TAG_STRING,
  50         TAG_BOOLEAN,
  51         TAG_NULL,
  52         TAG_COUNT
  53 } jsonKind;
  54
  55 typedef struct {
  56         tokenType               type;
  57         jsonKind                scopeKind;
  58         vString                 *string;
  59         vString                 *scope;
  60         unsigned long   lineNumber;
  61         MIOPos                  filePosition;
  62 } tokenInfo;
  63
  64 typedef enum {
  65         KEYWORD_true,
  66         KEYWORD_false,
  67         KEYWORD_null
  68 } keywordId;
  69
  70 static langType Lang_json;
  71
  72 static kindDefinition JsonKinds [] = {
  73         { true,  'o', "object",         "objects"       },
  74         { true,  'a', "array",          "arrays"        },
  75         { true,  'n', "number",         "numbers"       },
  76         { true,  's', "string",         "strings"       },
  77         { true,  'b', "boolean",        "booleans"      },
  78         { true,  'z', "null",           "nulls"         }
  79 };
  80
  81 static tokenInfo *newToken (void)
  82 {
  83         tokenInfo *const token = xMalloc (1, tokenInfo);
  84
  85         token->type                     = TOKEN_UNDEFINED;
  86         token->scopeKind        = TAG_NONE;
  87         token->string           = vStringNew ();
  88         token->scope            = vStringNew ();
  89         token->lineNumber       = getInputLineNumber ();
  90         token->filePosition     = getInputFilePosition ();
  91
  92         return token;
  93 }
  94
  95 static void deleteToken (tokenInfo *const token)
  96 {
  97         vStringDelete (token->string);
  98         vStringDelete (token->scope);
  99         eFree (token);
 100 }
 101
 102 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 103 {
 104         dest->type = src->type;
 105         dest->scopeKind = src->scopeKind;
 106         vStringCopy (dest->string, src->string);
 107         vStringCopy (dest->scope, src->scope);
 108         dest->lineNumber = src->lineNumber;
 109         dest->filePosition = src->filePosition;
 110 }
 111
 112 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
 113 {
 114         tagEntryInfo e;
 115
 116         if (! JsonKinds[kind].enabled)
 117                 return;
 118
 119         initTagEntry (&e, vStringValue (token->string), kind);
 120
 121         e.lineNumber    = token->lineNumber;
 122         e.filePosition  = token->filePosition;
 123
 124         if (vStringLength (token->scope) > 0)
 125         {
 126                 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
 127
 128                 e.extensionFields.scopeKindIndex = token->scopeKind;
 129                 e.extensionFields.scopeName = vStringValue (token->scope);
 130         }
 131
 132         makeTagEntry (&e);
 133 }
 134
 135 static bool isIdentChar (int c)
 136 {
 137         return (isalnum (c) || c == '+' || c == '-' || c == '.');
 138 }
 139
 140 static void readTokenFull (tokenInfo *const token,
 141                                                    bool includeStringRepr)
 142 {
 143         int c;
 144
 145         token->type = TOKEN_UNDEFINED;
 146         vStringClear (token->string);
 147
 148         do
 149                 c = getcFromInputFile ();
 150         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
 151
 152         token->lineNumber   = getInputLineNumber ();
 153         token->filePosition = getInputFilePosition ();
 154
 155         switch (c)
 156         {
 157                 case EOF: token->type = TOKEN_EOF;                      break;
 158                 case '[': token->type = TOKEN_OPEN_SQUARE;      break;
 159                 case ']': token->type = TOKEN_CLOSE_SQUARE;     break;
 160                 case '{': token->type = TOKEN_OPEN_CURLY;       break;
 161                 case '}': token->type = TOKEN_CLOSE_CURLY;      break;
 162                 case ':': token->type = TOKEN_COLON;            break;
 163                 case ',': token->type = TOKEN_COMMA;            break;
 164
 165                 case '"':
 166                 {
 167                         bool escaped = false;
 168                         token->type = TOKEN_STRING;
 169                         while (true)
 170                         {
 171                                 c = getcFromInputFile ();
 172                                 /* we don't handle unicode escapes but they are safe */
 173                                 if (escaped)
 174                                         escaped = false;
 175                                 else if (c == '\\')
 176                                         escaped = true;
 177                                 else if (c >= 0x00 && c <= 0x1F)
 178                                         break; /* break on invalid, unescaped, control characters */
 179                                 else if (c == '"' || c == EOF)
 180                                         break;
 181                                 if (includeStringRepr)
 182                                         vStringPut (token->string, c);
 183                         }
 184                         break;
 185                 }
 186
 187                 default:
 188                         if (! isIdentChar (c))
 189                                 token->type = TOKEN_UNDEFINED;
 190                         else
 191                         {
 192                                 do
 193                                 {
 194                                         vStringPut (token->string, c);
 195                                         c = getcFromInputFile ();
 196                                 }
 197                                 while (c != EOF && isIdentChar (c));
 198                                 ungetcToInputFile (c);
 199                                 switch (lookupKeyword (vStringValue (token->string), Lang_json))
 200                                 {
 201                                         case KEYWORD_true:      token->type = TOKEN_TRUE;       break;
 202                                         case KEYWORD_false:     token->type = TOKEN_FALSE;      break;
 203                                         case KEYWORD_null:      token->type = TOKEN_NULL;       break;
 204                                         default:                        token->type = TOKEN_NUMBER;     break;
 205                                 }
 206                         }
 207                         break;
 208         }
 209 }
 210
 211 #define readToken(t) (readTokenFull ((t), false))
 212
 213 static void pushScope (tokenInfo *const token,
 214                                            const tokenInfo *const parent,
 215                                            const jsonKind parentKind)
 216 {
 217         if (vStringLength (token->scope) > 0)
 218                 vStringPut (token->scope, '.');
 219         vStringCat (token->scope, parent->string);
 220         token->scopeKind = parentKind;
 221 }
 222
 223 static void popScope (tokenInfo *const token,
 224                                           const tokenInfo *const parent)
 225 {
 226         vStringTruncate (token->scope, vStringLength (parent->scope));
 227         token->scopeKind = parent->scopeKind;
 228 }
 229
 230 #define skipToOneOf2(token, type1, type2) \
 231         (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
 232
 233 #define skipTo(token, type) \
 234         (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
 235
 236 static void skipToOneOf3 (tokenInfo *const token,
 237                                                   const tokenType type1,
 238                                                   const tokenType type2,
 239                                                   const tokenType type3)
 240 {
 241         while (token->type != TOKEN_EOF &&
 242                    token->type != type1 &&
 243                    token->type != type2 &&
 244                    token->type != type3)
 245         {
 246                 readToken (token);
 247                 if (token->type == TOKEN_OPEN_CURLY)
 248                 {
 249                         skipTo (token, TOKEN_CLOSE_CURLY);
 250                         readToken (token);
 251                 }
 252                 else if (token->type == TOKEN_OPEN_SQUARE)
 253                 {
 254                         skipTo (token, TOKEN_CLOSE_SQUARE);
 255                         readToken (token);
 256                 }
 257         }
 258 }
 259
 260 static jsonKind tokenToKind (const tokenType type)
 261 {
 262         switch (type)
 263         {
 264                 case TOKEN_OPEN_CURLY:  return TAG_OBJECT;
 265                 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
 266                 case TOKEN_STRING:              return TAG_STRING;
 267                 case TOKEN_TRUE:
 268                 case TOKEN_FALSE:               return TAG_BOOLEAN;
 269                 case TOKEN_NUMBER:              return TAG_NUMBER;
 270                 default:                                return TAG_NULL;
 271         }
 272 }
 273
 274 static void parseValue (tokenInfo *const token)
 275 {
 276         if (token->type == TOKEN_OPEN_CURLY)
 277         {
 278                 tokenInfo *name = newToken ();
 279
 280                 do
 281                 {
 282                         readTokenFull (token, true);
 283                         if (token->type == TOKEN_STRING)
 284                         {
 285                                 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
 286
 287                                 copyToken (name, token);
 288
 289                                 /* skip any possible garbage before the value */
 290                                 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
 291
 292                                 if (token->type == TOKEN_COLON)
 293                                 {
 294                                         readToken (token);
 295                                         tagKind = tokenToKind (token->type);
 296
 297                                         pushScope (token, name, tagKind);
 298                                         parseValue (token);
 299                                         popScope (token, name);
 300                                 }
 301
 302                                 makeJsonTag (name, tagKind);
 303                         }
 304                         /* skip to the end of the construct */
 305                         skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
 306                 }
 307                 while (token->type != TOKEN_EOF &&
 308                            token->type != TOKEN_CLOSE_CURLY);
 309
 310                 if (token->type == TOKEN_CLOSE_CURLY)
 311                         readToken (token);
 312
 313                 deleteToken (name);
 314         }
 315         else if (token->type == TOKEN_OPEN_SQUARE)
 316         {
 317                 tokenInfo *name = newToken ();
 318                 char buf[32];
 319                 unsigned int nth = 0;
 320
 321                 readToken (token);
 322                 while (token->type != TOKEN_EOF &&
 323                            token->type != TOKEN_CLOSE_SQUARE)
 324                 {
 325                         jsonKind tagKind;
 326
 327                         tagKind = tokenToKind (token->type);
 328
 329                         copyToken (name, token);
 330                         snprintf (buf, sizeof buf, "%u", nth++);
 331                         vStringCopyS (name->string, buf);
 332
 333                         makeJsonTag (name, tagKind);
 334                         pushScope (token, name, tagKind);
 335                         parseValue (token);
 336                         popScope (token, name);
 337
 338                         /* skip to the end of the construct */
 339                         skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
 340                         if (token->type != TOKEN_CLOSE_SQUARE)
 341                                 readToken (token);
 342                 }
 343
 344                 if (token->type == TOKEN_CLOSE_SQUARE)
 345                         readToken (token);
 346
 347                 deleteToken (name);
 348         }
 349 }
 350
 351 static void findJsonTags (void)
 352 {
 353         tokenInfo *const token = newToken ();
 354
 355         /* We allow multiple top-level elements, although it's not actually valid
 356          * JSON.  An interesting side effect of this is that we allow a leading
 357          * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
 358         do
 359         {
 360                 readToken (token);
 361                 parseValue (token);
 362         }
 363         while (token->type != TOKEN_EOF);
 364
 365         deleteToken (token);
 366 }
 367
 368 static void initialize (const langType language)
 369 {
 370         Lang_json = language;
 371         addKeyword ("true", language, KEYWORD_true);
 372         addKeyword ("false", language, KEYWORD_false);
 373         addKeyword ("null", language, KEYWORD_null);
 374 }
 375
 376 /* Create parser definition structure */
 377 extern parserDefinition* JsonParser (void)
 378 {
 379         static const char *const extensions [] = { "json", NULL };
 380         parserDefinition *const def = parserNew ("JSON");
 381         def->extensions = extensions;
 382         def->kindTable  = JsonKinds;
 383         def->kindCount  = ARRAY_SIZE (JsonKinds);
 384         def->parser             = findJsonTags;
 385         def->initialize = initialize;
 386
 387         return def;
 388 }