tagmanager/ctags/json.c

   1 /*
   2  * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
   3  *
   4  * This source code is released for free distribution under the terms of the
   5  * GNU General Public License.
   6  */
   7 /*
   8  * This module contains functions for generating tags for JSON files.
   9  *
  10  * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
  11  *
  12  * This implementation is forgiving and allows many constructs that are not
  13  * actually valid but that don't conflict with the format.  This is intend to
  14  * better support partly broken or unfinished files.
  15  */
  16
  17 #include "general.h"
  18
  19 #include <string.h>
  20 #include "main.h"
  21 #include "entry.h"
  22 #include "keyword.h"
  23 #include "parse.h"
  24 #include "read.h"
  25 #include "vstring.h"
  26
  27 typedef enum {
  28         TOKEN_EOF,
  29         TOKEN_UNDEFINED,
  30         TOKEN_OPEN_SQUARE,
  31         TOKEN_CLOSE_SQUARE,
  32         TOKEN_OPEN_CURLY,
  33         TOKEN_CLOSE_CURLY,
  34         TOKEN_COLON,
  35         TOKEN_COMMA,
  36         TOKEN_TRUE,
  37         TOKEN_FALSE,
  38         TOKEN_NULL,
  39         TOKEN_NUMBER,
  40         TOKEN_STRING
  41 } tokenType;
  42
  43 typedef enum {
  44         TAG_NONE = -1,
  45         TAG_OBJECT,
  46         TAG_ARRAY,
  47         TAG_NUMBER,
  48         TAG_STRING,
  49         TAG_BOOLEAN,
  50         TAG_NULL,
  51         TAG_COUNT
  52 } jsonKind;
  53
  54 typedef struct {
  55         tokenType               type;
  56         jsonKind                scopeKind;
  57         vString                 *string;
  58         vString                 *scope;
  59         unsigned long   lineNumber;
  60         MIOPos                  filePosition;
  61 } tokenInfo;
  62
  63 typedef enum {
  64         KEYWORD_true,
  65         KEYWORD_false,
  66         KEYWORD_null
  67 } keywordId;
  68
  69 static langType Lang_json;
  70
  71 static kindOption JsonKinds [] = {
  72         { TRUE,  'o', "member",         "objects"       },
  73         { TRUE,  'a', "member",         "arrays"        },
  74         { TRUE,  'n', "member",         "numbers"       },
  75         { TRUE,  's', "member",         "strings"       },
  76         { TRUE,  'b', "member",         "booleans"      },
  77         { TRUE,  'z', "member",         "nulls"         }
  78 };
  79
  80 static tokenInfo *newToken (void)
  81 {
  82         tokenInfo *const token = xMalloc (1, tokenInfo);
  83
  84         token->type                     = TOKEN_UNDEFINED;
  85         token->scopeKind        = TAG_NONE;
  86         token->string           = vStringNew ();
  87         token->scope            = vStringNew ();
  88         token->lineNumber       = getSourceLineNumber ();
  89         token->filePosition     = getInputFilePosition ();
  90
  91         return token;
  92 }
  93
  94 static void deleteToken (tokenInfo *const token)
  95 {
  96         vStringDelete (token->string);
  97         vStringDelete (token->scope);
  98         eFree (token);
  99 }
 100
 101 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 102 {
 103         dest->type = src->type;
 104         dest->scopeKind = src->scopeKind;
 105         vStringCopy (dest->string, src->string);
 106         vStringCopy (dest->scope, src->scope);
 107         dest->lineNumber = src->lineNumber;
 108         dest->filePosition = src->filePosition;
 109 }
 110
 111 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
 112 {
 113         tagEntryInfo e;
 114
 115         if (! JsonKinds[kind].enabled)
 116                 return;
 117
 118         initTagEntry (&e, vStringValue (token->string));
 119
 120         e.lineNumber    = token->lineNumber;
 121         e.filePosition  = token->filePosition;
 122         e.kindName              = JsonKinds[kind].name;
 123         e.kind                  = JsonKinds[kind].letter;
 124
 125         if (vStringLength (token->scope) > 0)
 126         {
 127                 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
 128
 129                 e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
 130                 e.extensionFields.scope[1] = vStringValue (token->scope);
 131         }
 132
 133         makeTagEntry (&e);
 134 }
 135
 136 static boolean isIdentChar (int c)
 137 {
 138         return (isalnum (c) || c == '+' || c == '-' || c == '.');
 139 }
 140
 141 static void readTokenFull (tokenInfo *const token,
 142                                                    boolean includeStringRepr)
 143 {
 144         int c;
 145
 146         token->type = TOKEN_UNDEFINED;
 147         vStringClear (token->string);
 148
 149         do
 150                 c = fileGetc ();
 151         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
 152
 153         token->lineNumber   = getSourceLineNumber ();
 154         token->filePosition = getInputFilePosition ();
 155
 156         switch (c)
 157         {
 158                 case EOF: token->type = TOKEN_EOF;                      break;
 159                 case '[': token->type = TOKEN_OPEN_SQUARE;      break;
 160                 case ']': token->type = TOKEN_CLOSE_SQUARE;     break;
 161                 case '{': token->type = TOKEN_OPEN_CURLY;       break;
 162                 case '}': token->type = TOKEN_CLOSE_CURLY;      break;
 163                 case ':': token->type = TOKEN_COLON;            break;
 164                 case ',': token->type = TOKEN_COMMA;            break;
 165
 166                 case '"':
 167                 {
 168                         boolean escaped = FALSE;
 169                         token->type = TOKEN_STRING;
 170                         while (TRUE)
 171                         {
 172                                 c = fileGetc ();
 173                                 /* we don't handle unicode escapes but they are safe */
 174                                 if (escaped)
 175                                         escaped = FALSE;
 176                                 else if (c == '\\')
 177                                         escaped = TRUE;
 178                                 else if (c >= 0x00 && c <= 0x1F)
 179                                         break; /* break on invalid, unescaped, control characters */
 180                                 else if (c == '"' || c == EOF)
 181                                         break;
 182                                 if (includeStringRepr)
 183                                         vStringPut (token->string, c);
 184                         }
 185                         vStringTerminate (token->string);
 186                         break;
 187                 }
 188
 189                 default:
 190                         if (! isIdentChar (c))
 191                                 token->type = TOKEN_UNDEFINED;
 192                         else
 193                         {
 194                                 do
 195                                 {
 196                                         vStringPut (token->string, c);
 197                                         c = fileGetc ();
 198                                 }
 199                                 while (c != EOF && isIdentChar (c));
 200                                 vStringTerminate (token->string);
 201                                 fileUngetc (c);
 202                                 switch (lookupKeyword (vStringValue (token->string), Lang_json))
 203                                 {
 204                                         case KEYWORD_true:      token->type = TOKEN_TRUE;       break;
 205                                         case KEYWORD_false:     token->type = TOKEN_FALSE;      break;
 206                                         case KEYWORD_null:      token->type = TOKEN_NULL;       break;
 207                                         default:                        token->type = TOKEN_NUMBER;     break;
 208                                 }
 209                         }
 210                         break;
 211         }
 212 }
 213
 214 #define readToken(t) (readTokenFull ((t), FALSE))
 215
 216 static void pushScope (tokenInfo *const token,
 217                                            const tokenInfo *const parent,
 218                                            const jsonKind parentKind)
 219 {
 220         if (vStringLength (token->scope) > 0)
 221                 vStringPut (token->scope, '.');
 222         vStringCat (token->scope, parent->string);
 223         vStringTerminate (token->scope);
 224         token->scopeKind = parentKind;
 225 }
 226
 227 static void popScope (tokenInfo *const token,
 228                                           const tokenInfo *const parent)
 229 {
 230         char *dot = strrchr (token->scope->buffer, '.');
 231
 232         if (! dot)
 233                 vStringClear (token->scope);
 234         else
 235         {
 236                 *dot = 0;
 237                 token->scope->length = dot - token->scope->buffer;
 238         }
 239         token->scopeKind = parent->scopeKind;
 240 }
 241
 242 #define skipToOneOf2(token, type1, type2) \
 243         (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
 244
 245 #define skipTo(token, type) \
 246         (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
 247
 248 static void skipToOneOf3 (tokenInfo *const token,
 249                                                   const tokenType type1,
 250                                                   const tokenType type2,
 251                                                   const tokenType type3)
 252 {
 253         while (token->type != TOKEN_EOF &&
 254                    token->type != type1 &&
 255                    token->type != type2 &&
 256                    token->type != type3)
 257         {
 258                 readToken (token);
 259                 if (token->type == TOKEN_OPEN_CURLY)
 260                 {
 261                         skipTo (token, TOKEN_CLOSE_CURLY);
 262                         readToken (token);
 263                 }
 264                 else if (token->type == TOKEN_OPEN_SQUARE)
 265                 {
 266                         skipTo (token, TOKEN_CLOSE_SQUARE);
 267                         readToken (token);
 268                 }
 269         }
 270 }
 271
 272 static jsonKind tokenToKind (const tokenType type)
 273 {
 274         switch (type)
 275         {
 276                 case TOKEN_OPEN_CURLY:  return TAG_OBJECT;
 277                 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
 278                 case TOKEN_STRING:              return TAG_STRING;
 279                 case TOKEN_TRUE:
 280                 case TOKEN_FALSE:               return TAG_BOOLEAN;
 281                 case TOKEN_NUMBER:              return TAG_NUMBER;
 282                 default:                                return TAG_NULL;
 283         }
 284 }
 285
 286 static void parseValue (tokenInfo *const token)
 287 {
 288         if (token->type == TOKEN_OPEN_CURLY)
 289         {
 290                 tokenInfo *name = newToken ();
 291
 292                 do
 293                 {
 294                         readTokenFull (token, TRUE);
 295                         if (token->type == TOKEN_STRING)
 296                         {
 297                                 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
 298
 299                                 copyToken (name, token);
 300
 301                                 /* skip any possible garbage before the value */
 302                                 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
 303
 304                                 if (token->type == TOKEN_COLON)
 305                                 {
 306                                         readToken (token);
 307                                         tagKind = tokenToKind (token->type);
 308
 309                                         pushScope (token, name, tagKind);
 310                                         parseValue (token);
 311                                         popScope (token, name);
 312                                 }
 313
 314                                 makeJsonTag (name, tagKind);
 315                         }
 316                         /* skip to the end of the construct */
 317                         skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
 318                 }
 319                 while (token->type != TOKEN_EOF &&
 320                            token->type != TOKEN_CLOSE_CURLY);
 321
 322                 if (token->type == TOKEN_CLOSE_CURLY)
 323                         readToken (token);
 324
 325                 deleteToken (name);
 326         }
 327         else if (token->type == TOKEN_OPEN_SQUARE)
 328         {
 329                 tokenInfo *name = newToken ();
 330                 char buf[32];
 331                 unsigned int nth = 0;
 332
 333                 readToken (token);
 334                 while (token->type != TOKEN_EOF &&
 335                            token->type != TOKEN_CLOSE_SQUARE)
 336                 {
 337                         jsonKind tagKind;
 338
 339                         tagKind = tokenToKind (token->type);
 340
 341                         copyToken (name, token);
 342                         snprintf (buf, sizeof buf, "%u", nth++);
 343                         vStringCopyS (name->string, buf);
 344
 345                         makeJsonTag (name, tagKind);
 346                         pushScope (token, name, tagKind);
 347                         parseValue (token);
 348                         popScope (token, name);
 349
 350                         /* skip to the end of the construct */
 351                         skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
 352                         if (token->type != TOKEN_CLOSE_SQUARE)
 353                                 readToken (token);
 354                 }
 355
 356                 if (token->type == TOKEN_CLOSE_SQUARE)
 357                         readToken (token);
 358
 359                 deleteToken (name);
 360         }
 361 }
 362
 363 static void findJsonTags (void)
 364 {
 365         tokenInfo *const token = newToken ();
 366
 367         /* We allow multiple top-level elements, although it's not actually valid
 368          * JSON.  An interesting side effect of this is that we allow a leading
 369          * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
 370         do
 371         {
 372                 readToken (token);
 373                 parseValue (token);
 374         }
 375         while (token->type != TOKEN_EOF);
 376
 377         deleteToken (token);
 378 }
 379
 380 static void initialize (const langType language)
 381 {
 382         Lang_json = language;
 383         addKeyword ("true", language, KEYWORD_true);
 384         addKeyword ("false", language, KEYWORD_false);
 385         addKeyword ("null", language, KEYWORD_null);
 386 }
 387
 388 /* Create parser definition stucture */
 389 extern parserDefinition* JsonParser (void)
 390 {
 391         static const char *const extensions [] = { "json", NULL };
 392         parserDefinition *const def = parserNew ("JSON");
 393         def->extensions = extensions;
 394         def->kinds              = JsonKinds;
 395         def->kindCount  = KIND_COUNT (JsonKinds);
 396         def->parser             = findJsonTags;
 397         def->initialize = initialize;
 398
 399         return def;
 400 }