ctags/parsers/json.c

   1 /*
   2  * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
   3  *
   4  * This source code is released for free distribution under the terms of the
   5  * GNU General Public License version 2 or (at your option) any later version.
   6  */
   7 /*
   8  * This module contains functions for generating tags for JSON files.
   9  *
  10  * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
  11  *
  12  * This implementation is forgiving and allows many constructs that are not
  13  * actually valid but that don't conflict with the format.  This is intend to
  14  * better support partly broken or unfinished files.
  15  */
  16
  17 #include "general.h"
  18
  19 #include <string.h>
  20 #include "debug.h"
  21 #include "main.h"
  22 #include "entry.h"
  23 #include "keyword.h"
  24 #include "parse.h"
  25 #include "read.h"
  26 #include "routines.h"
  27 #include "vstring.h"
  28
  29 typedef enum {
  30         TOKEN_EOF,
  31         TOKEN_UNDEFINED,
  32         TOKEN_OPEN_SQUARE,
  33         TOKEN_CLOSE_SQUARE,
  34         TOKEN_OPEN_CURLY,
  35         TOKEN_CLOSE_CURLY,
  36         TOKEN_COLON,
  37         TOKEN_COMMA,
  38         TOKEN_TRUE,
  39         TOKEN_FALSE,
  40         TOKEN_NULL,
  41         TOKEN_NUMBER,
  42         TOKEN_STRING
  43 } tokenType;
  44
  45 typedef enum {
  46         TAG_NONE = -1,
  47         TAG_OBJECT,
  48         TAG_ARRAY,
  49         TAG_NUMBER,
  50         TAG_STRING,
  51         TAG_BOOLEAN,
  52         TAG_NULL,
  53         TAG_COUNT
  54 } jsonKind;
  55
  56 typedef struct {
  57         tokenType               type;
  58         jsonKind                scopeKind;
  59         vString                 *string;
  60         vString                 *scope;
  61         unsigned long   lineNumber;
  62         MIOPos                  filePosition;
  63 } tokenInfo;
  64
  65 typedef enum {
  66         KEYWORD_true,
  67         KEYWORD_false,
  68         KEYWORD_null
  69 } keywordId;
  70
  71 static langType Lang_json;
  72
  73 static kindOption JsonKinds [] = {
  74         { TRUE,  'o', "object",         "objects"       },
  75         { TRUE,  'a', "array",          "arrays"        },
  76         { TRUE,  'n', "number",         "numbers"       },
  77         { TRUE,  's', "string",         "strings"       },
  78         { TRUE,  'b', "boolean",        "booleans"      },
  79         { TRUE,  'z', "null",           "nulls"         }
  80 };
  81
  82 static tokenInfo *newToken (void)
  83 {
  84         tokenInfo *const token = xMalloc (1, tokenInfo);
  85
  86         token->type                     = TOKEN_UNDEFINED;
  87         token->scopeKind        = TAG_NONE;
  88         token->string           = vStringNew ();
  89         token->scope            = vStringNew ();
  90         token->lineNumber       = getSourceLineNumber ();
  91         token->filePosition     = getInputFilePosition ();
  92
  93         return token;
  94 }
  95
  96 static void deleteToken (tokenInfo *const token)
  97 {
  98         vStringDelete (token->string);
  99         vStringDelete (token->scope);
 100         eFree (token);
 101 }
 102
 103 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 104 {
 105         dest->type = src->type;
 106         dest->scopeKind = src->scopeKind;
 107         vStringCopy (dest->string, src->string);
 108         vStringCopy (dest->scope, src->scope);
 109         dest->lineNumber = src->lineNumber;
 110         dest->filePosition = src->filePosition;
 111 }
 112
 113 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
 114 {
 115         tagEntryInfo e;
 116
 117         if (! JsonKinds[kind].enabled)
 118                 return;
 119
 120         initTagEntry (&e, vStringValue (token->string));
 121
 122         e.lineNumber    = token->lineNumber;
 123         e.filePosition  = token->filePosition;
 124         e.kindName              = JsonKinds[kind].name;
 125         e.kind                  = JsonKinds[kind].letter;
 126
 127         if (vStringLength (token->scope) > 0)
 128         {
 129                 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
 130
 131                 e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
 132                 e.extensionFields.scope[1] = vStringValue (token->scope);
 133         }
 134
 135         makeTagEntry (&e);
 136 }
 137
 138 static boolean isIdentChar (int c)
 139 {
 140         return (isalnum (c) || c == '+' || c == '-' || c == '.');
 141 }
 142
 143 static void readTokenFull (tokenInfo *const token,
 144                                                    boolean includeStringRepr)
 145 {
 146         int c;
 147
 148         token->type = TOKEN_UNDEFINED;
 149         vStringClear (token->string);
 150
 151         do
 152                 c = fileGetc ();
 153         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
 154
 155         token->lineNumber   = getSourceLineNumber ();
 156         token->filePosition = getInputFilePosition ();
 157
 158         switch (c)
 159         {
 160                 case EOF: token->type = TOKEN_EOF;                      break;
 161                 case '[': token->type = TOKEN_OPEN_SQUARE;      break;
 162                 case ']': token->type = TOKEN_CLOSE_SQUARE;     break;
 163                 case '{': token->type = TOKEN_OPEN_CURLY;       break;
 164                 case '}': token->type = TOKEN_CLOSE_CURLY;      break;
 165                 case ':': token->type = TOKEN_COLON;            break;
 166                 case ',': token->type = TOKEN_COMMA;            break;
 167
 168                 case '"':
 169                 {
 170                         boolean escaped = FALSE;
 171                         token->type = TOKEN_STRING;
 172                         while (TRUE)
 173                         {
 174                                 c = fileGetc ();
 175                                 /* we don't handle unicode escapes but they are safe */
 176                                 if (escaped)
 177                                         escaped = FALSE;
 178                                 else if (c == '\\')
 179                                         escaped = TRUE;
 180                                 else if (c >= 0x00 && c <= 0x1F)
 181                                         break; /* break on invalid, unescaped, control characters */
 182                                 else if (c == '"' || c == EOF)
 183                                         break;
 184                                 if (includeStringRepr)
 185                                         vStringPut (token->string, c);
 186                         }
 187                         vStringTerminate (token->string);
 188                         break;
 189                 }
 190
 191                 default:
 192                         if (! isIdentChar (c))
 193                                 token->type = TOKEN_UNDEFINED;
 194                         else
 195                         {
 196                                 do
 197                                 {
 198                                         vStringPut (token->string, c);
 199                                         c = fileGetc ();
 200                                 }
 201                                 while (c != EOF && isIdentChar (c));
 202                                 vStringTerminate (token->string);
 203                                 fileUngetc (c);
 204                                 switch (lookupKeyword (vStringValue (token->string), Lang_json))
 205                                 {
 206                                         case KEYWORD_true:      token->type = TOKEN_TRUE;       break;
 207                                         case KEYWORD_false:     token->type = TOKEN_FALSE;      break;
 208                                         case KEYWORD_null:      token->type = TOKEN_NULL;       break;
 209                                         default:                        token->type = TOKEN_NUMBER;     break;
 210                                 }
 211                         }
 212                         break;
 213         }
 214 }
 215
 216 #define readToken(t) (readTokenFull ((t), FALSE))
 217
 218 static void pushScope (tokenInfo *const token,
 219                                            const tokenInfo *const parent,
 220                                            const jsonKind parentKind)
 221 {
 222         if (vStringLength (token->scope) > 0)
 223                 vStringPut (token->scope, '.');
 224         vStringCat (token->scope, parent->string);
 225         vStringTerminate (token->scope);
 226         token->scopeKind = parentKind;
 227 }
 228
 229 static void popScope (tokenInfo *const token,
 230                                           const tokenInfo *const parent)
 231 {
 232         vStringTruncate (token->scope, vStringLength (parent->scope));
 233         token->scopeKind = parent->scopeKind;
 234 }
 235
 236 #define skipToOneOf2(token, type1, type2) \
 237         (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
 238
 239 #define skipTo(token, type) \
 240         (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
 241
 242 static void skipToOneOf3 (tokenInfo *const token,
 243                                                   const tokenType type1,
 244                                                   const tokenType type2,
 245                                                   const tokenType type3)
 246 {
 247         while (token->type != TOKEN_EOF &&
 248                    token->type != type1 &&
 249                    token->type != type2 &&
 250                    token->type != type3)
 251         {
 252                 readToken (token);
 253                 if (token->type == TOKEN_OPEN_CURLY)
 254                 {
 255                         skipTo (token, TOKEN_CLOSE_CURLY);
 256                         readToken (token);
 257                 }
 258                 else if (token->type == TOKEN_OPEN_SQUARE)
 259                 {
 260                         skipTo (token, TOKEN_CLOSE_SQUARE);
 261                         readToken (token);
 262                 }
 263         }
 264 }
 265
 266 static jsonKind tokenToKind (const tokenType type)
 267 {
 268         switch (type)
 269         {
 270                 case TOKEN_OPEN_CURLY:  return TAG_OBJECT;
 271                 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
 272                 case TOKEN_STRING:              return TAG_STRING;
 273                 case TOKEN_TRUE:
 274                 case TOKEN_FALSE:               return TAG_BOOLEAN;
 275                 case TOKEN_NUMBER:              return TAG_NUMBER;
 276                 default:                                return TAG_NULL;
 277         }
 278 }
 279
 280 static void parseValue (tokenInfo *const token)
 281 {
 282         if (token->type == TOKEN_OPEN_CURLY)
 283         {
 284                 tokenInfo *name = newToken ();
 285
 286                 do
 287                 {
 288                         readTokenFull (token, TRUE);
 289                         if (token->type == TOKEN_STRING)
 290                         {
 291                                 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
 292
 293                                 copyToken (name, token);
 294
 295                                 /* skip any possible garbage before the value */
 296                                 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
 297
 298                                 if (token->type == TOKEN_COLON)
 299                                 {
 300                                         readToken (token);
 301                                         tagKind = tokenToKind (token->type);
 302
 303                                         pushScope (token, name, tagKind);
 304                                         parseValue (token);
 305                                         popScope (token, name);
 306                                 }
 307
 308                                 makeJsonTag (name, tagKind);
 309                         }
 310                         /* skip to the end of the construct */
 311                         skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
 312                 }
 313                 while (token->type != TOKEN_EOF &&
 314                            token->type != TOKEN_CLOSE_CURLY);
 315
 316                 if (token->type == TOKEN_CLOSE_CURLY)
 317                         readToken (token);
 318
 319                 deleteToken (name);
 320         }
 321         else if (token->type == TOKEN_OPEN_SQUARE)
 322         {
 323                 tokenInfo *name = newToken ();
 324                 char buf[32];
 325                 unsigned int nth = 0;
 326
 327                 readToken (token);
 328                 while (token->type != TOKEN_EOF &&
 329                            token->type != TOKEN_CLOSE_SQUARE)
 330                 {
 331                         jsonKind tagKind;
 332
 333                         tagKind = tokenToKind (token->type);
 334
 335                         copyToken (name, token);
 336                         snprintf (buf, sizeof buf, "%u", nth++);
 337                         vStringCopyS (name->string, buf);
 338
 339                         makeJsonTag (name, tagKind);
 340                         pushScope (token, name, tagKind);
 341                         parseValue (token);
 342                         popScope (token, name);
 343
 344                         /* skip to the end of the construct */
 345                         skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
 346                         if (token->type != TOKEN_CLOSE_SQUARE)
 347                                 readToken (token);
 348                 }
 349
 350                 if (token->type == TOKEN_CLOSE_SQUARE)
 351                         readToken (token);
 352
 353                 deleteToken (name);
 354         }
 355 }
 356
 357 static void findJsonTags (void)
 358 {
 359         tokenInfo *const token = newToken ();
 360
 361         /* We allow multiple top-level elements, although it's not actually valid
 362          * JSON.  An interesting side effect of this is that we allow a leading
 363          * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
 364         do
 365         {
 366                 readToken (token);
 367                 parseValue (token);
 368         }
 369         while (token->type != TOKEN_EOF);
 370
 371         deleteToken (token);
 372 }
 373
 374 static void initialize (const langType language)
 375 {
 376         Lang_json = language;
 377         addKeyword ("true", language, KEYWORD_true);
 378         addKeyword ("false", language, KEYWORD_false);
 379         addKeyword ("null", language, KEYWORD_null);
 380 }
 381
 382 /* Create parser definition structure */
 383 extern parserDefinition* JsonParser (void)
 384 {
 385         static const char *const extensions [] = { "json", NULL };
 386         parserDefinition *const def = parserNew ("JSON");
 387         def->extensions = extensions;
 388         def->kinds              = JsonKinds;
 389         def->kindCount  = KIND_COUNT (JsonKinds);
 390         def->parser             = findJsonTags;
 391         def->initialize = initialize;
 392
 393         return def;
 394 }