tagmanager/ctags/json.c

   1 /*
   2  * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
   3  *
   4  * This source code is released for free distribution under the terms of the
   5  * GNU General Public License.
   6  */
   7 /*
   8  * This module contains functions for generating tags for JSON files.
   9  *
  10  * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
  11  *
  12  * This implementation is forgiving and allows many constructs that are not
  13  * actually valid but that don't conflict with the format.  This is intend to
  14  * better support partly broken or unfinished files.
  15  */
  16
  17 #include "general.h"
  18
  19 #include <string.h>
  20 #include "main.h"
  21 #include "entry.h"
  22 #include "keyword.h"
  23 #include "parse.h"
  24 #include "read.h"
  25 #include "vstring.h"
  26
  27 typedef enum {
  28         TOKEN_EOF,
  29         TOKEN_UNDEFINED,
  30         TOKEN_OPEN_SQUARE,
  31         TOKEN_CLOSE_SQUARE,
  32         TOKEN_OPEN_CURLY,
  33         TOKEN_CLOSE_CURLY,
  34         TOKEN_COLON,
  35         TOKEN_COMMA,
  36         TOKEN_TRUE,
  37         TOKEN_FALSE,
  38         TOKEN_NULL,
  39         TOKEN_NUMBER,
  40         TOKEN_STRING
  41 } tokenType;
  42
  43 typedef enum {
  44         TAG_NONE = -1,
  45         TAG_OBJECT,
  46         TAG_ARRAY,
  47         TAG_NUMBER,
  48         TAG_STRING,
  49         TAG_BOOLEAN,
  50         TAG_NULL,
  51         TAG_COUNT
  52 } jsonKind;
  53
  54 typedef struct {
  55         tokenType               type;
  56         jsonKind                scopeKind;
  57         vString                 *string;
  58         vString                 *scope;
  59         unsigned long   lineNumber;
  60         MIOPos                  filePosition;
  61 } tokenInfo;
  62
  63 typedef enum {
  64         KEYWORD_true,
  65         KEYWORD_false,
  66         KEYWORD_null
  67 } keywordId;
  68
  69 static langType Lang_json;
  70
  71 static kindOption JsonKinds [] = {
  72         { TRUE,  'o', "member",         "objects"       },
  73         { TRUE,  'a', "member",         "arrays"        },
  74         { TRUE,  'n', "member",         "numbers"       },
  75         { TRUE,  's', "member",         "strings"       },
  76         { TRUE,  'b', "member",         "booleans"      },
  77         { TRUE,  'z', "member",         "nulls"         }
  78 };
  79
  80 static tokenInfo *newToken (void)
  81 {
  82         tokenInfo *const token = xMalloc (1, tokenInfo);
  83
  84         token->type                     = TOKEN_UNDEFINED;
  85         token->scopeKind        = TAG_NONE;
  86         token->string           = vStringNew ();
  87         token->scope            = vStringNew ();
  88         token->lineNumber       = getSourceLineNumber ();
  89         token->filePosition     = getInputFilePosition ();
  90
  91         return token;
  92 }
  93
  94 static void deleteToken (tokenInfo *const token)
  95 {
  96         vStringDelete (token->string);
  97         vStringDelete (token->scope);
  98         eFree (token);
  99 }
 100
 101 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 102 {
 103         dest->type = src->type;
 104         dest->scopeKind = src->scopeKind;
 105         vStringCopy (dest->string, src->string);
 106         vStringCopy (dest->scope, src->scope);
 107         dest->lineNumber = src->lineNumber;
 108         dest->filePosition = src->filePosition;
 109 }
 110
 111 static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
 112 {
 113         tagEntryInfo e;
 114
 115         if (! JsonKinds[kind].enabled)
 116                 return;
 117
 118         initTagEntry (&e, vStringValue (token->string));
 119
 120         e.lineNumber    = token->lineNumber;
 121         e.filePosition  = token->filePosition;
 122         e.kindName              = JsonKinds[kind].name;
 123         e.kind                  = JsonKinds[kind].letter;
 124
 125         if (vStringLength (token->scope) > 0)
 126         {
 127                 Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
 128
 129                 e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
 130                 e.extensionFields.scope[1] = vStringValue (token->scope);
 131         }
 132
 133         makeTagEntry (&e);
 134 }
 135
 136 static boolean isIdentChar (int c)
 137 {
 138         return (isalnum (c) || c == '+' || c == '-' || c == '.');
 139 }
 140
 141 static void readTokenFull (tokenInfo *const token,
 142                                                    boolean includeStringRepr)
 143 {
 144         int c;
 145
 146         token->type = TOKEN_UNDEFINED;
 147         vStringClear (token->string);
 148
 149         do
 150                 c = fileGetc ();
 151         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
 152
 153         token->lineNumber   = getSourceLineNumber ();
 154         token->filePosition = getInputFilePosition ();
 155
 156         switch (c)
 157         {
 158                 case EOF: token->type = TOKEN_EOF;                      break;
 159                 case '[': token->type = TOKEN_OPEN_SQUARE;      break;
 160                 case ']': token->type = TOKEN_CLOSE_SQUARE;     break;
 161                 case '{': token->type = TOKEN_OPEN_CURLY;       break;
 162                 case '}': token->type = TOKEN_CLOSE_CURLY;      break;
 163                 case ':': token->type = TOKEN_COLON;            break;
 164                 case ',': token->type = TOKEN_COMMA;            break;
 165
 166                 case '"':
 167                 {
 168                         boolean escaped = FALSE;
 169                         token->type = TOKEN_STRING;
 170                         while (TRUE)
 171                         {
 172                                 c = fileGetc ();
 173                                 /* we don't handle unicode escapes but they are safe */
 174                                 if (escaped)
 175                                         escaped = FALSE;
 176                                 else if (c == '\\')
 177                                         escaped = TRUE;
 178                                 else if (c >= 0x00 && c <= 0x1F)
 179                                         break; /* break on invalid, unescaped, control characters */
 180                                 else if (c == '"' || c == EOF)
 181                                         break;
 182                                 if (includeStringRepr)
 183                                         vStringPut (token->string, c);
 184                         }
 185                         vStringTerminate (token->string);
 186                         break;
 187                 }
 188
 189                 default:
 190                         if (! isIdentChar (c))
 191                                 token->type = TOKEN_UNDEFINED;
 192                         else
 193                         {
 194                                 do
 195                                 {
 196                                         vStringPut (token->string, c);
 197                                         c = fileGetc ();
 198                                 }
 199                                 while (c != EOF && isIdentChar (c));
 200                                 vStringTerminate (token->string);
 201                                 fileUngetc (c);
 202                                 switch (lookupKeyword (vStringValue (token->string), Lang_json))
 203                                 {
 204                                         case KEYWORD_true:      token->type = TOKEN_TRUE;       break;
 205                                         case KEYWORD_false:     token->type = TOKEN_FALSE;      break;
 206                                         case KEYWORD_null:      token->type = TOKEN_NULL;       break;
 207                                         default:                        token->type = TOKEN_NUMBER;     break;
 208                                 }
 209                         }
 210                         break;
 211         }
 212 }
 213
 214 #define readToken(t) (readTokenFull ((t), FALSE))
 215
 216 static void pushScope (tokenInfo *const token,
 217                                            const tokenInfo *const parent,
 218                                            const jsonKind parentKind)
 219 {
 220         if (vStringLength (token->scope) > 0)
 221                 vStringPut (token->scope, '.');
 222         vStringCat (token->scope, parent->string);
 223         vStringTerminate (token->scope);
 224         token->scopeKind = parentKind;
 225 }
 226
 227 static void popScope (tokenInfo *const token,
 228                                           const tokenInfo *const parent)
 229 {
 230         vStringTruncate (token->scope, vStringLength (parent->scope));
 231         token->scopeKind = parent->scopeKind;
 232 }
 233
 234 #define skipToOneOf2(token, type1, type2) \
 235         (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
 236
 237 #define skipTo(token, type) \
 238         (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
 239
 240 static void skipToOneOf3 (tokenInfo *const token,
 241                                                   const tokenType type1,
 242                                                   const tokenType type2,
 243                                                   const tokenType type3)
 244 {
 245         while (token->type != TOKEN_EOF &&
 246                    token->type != type1 &&
 247                    token->type != type2 &&
 248                    token->type != type3)
 249         {
 250                 readToken (token);
 251                 if (token->type == TOKEN_OPEN_CURLY)
 252                 {
 253                         skipTo (token, TOKEN_CLOSE_CURLY);
 254                         readToken (token);
 255                 }
 256                 else if (token->type == TOKEN_OPEN_SQUARE)
 257                 {
 258                         skipTo (token, TOKEN_CLOSE_SQUARE);
 259                         readToken (token);
 260                 }
 261         }
 262 }
 263
 264 static jsonKind tokenToKind (const tokenType type)
 265 {
 266         switch (type)
 267         {
 268                 case TOKEN_OPEN_CURLY:  return TAG_OBJECT;
 269                 case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
 270                 case TOKEN_STRING:              return TAG_STRING;
 271                 case TOKEN_TRUE:
 272                 case TOKEN_FALSE:               return TAG_BOOLEAN;
 273                 case TOKEN_NUMBER:              return TAG_NUMBER;
 274                 default:                                return TAG_NULL;
 275         }
 276 }
 277
 278 static void parseValue (tokenInfo *const token)
 279 {
 280         if (token->type == TOKEN_OPEN_CURLY)
 281         {
 282                 tokenInfo *name = newToken ();
 283
 284                 do
 285                 {
 286                         readTokenFull (token, TRUE);
 287                         if (token->type == TOKEN_STRING)
 288                         {
 289                                 jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
 290
 291                                 copyToken (name, token);
 292
 293                                 /* skip any possible garbage before the value */
 294                                 skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
 295
 296                                 if (token->type == TOKEN_COLON)
 297                                 {
 298                                         readToken (token);
 299                                         tagKind = tokenToKind (token->type);
 300
 301                                         pushScope (token, name, tagKind);
 302                                         parseValue (token);
 303                                         popScope (token, name);
 304                                 }
 305
 306                                 makeJsonTag (name, tagKind);
 307                         }
 308                         /* skip to the end of the construct */
 309                         skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
 310                 }
 311                 while (token->type != TOKEN_EOF &&
 312                            token->type != TOKEN_CLOSE_CURLY);
 313
 314                 if (token->type == TOKEN_CLOSE_CURLY)
 315                         readToken (token);
 316
 317                 deleteToken (name);
 318         }
 319         else if (token->type == TOKEN_OPEN_SQUARE)
 320         {
 321                 tokenInfo *name = newToken ();
 322                 char buf[32];
 323                 unsigned int nth = 0;
 324
 325                 readToken (token);
 326                 while (token->type != TOKEN_EOF &&
 327                            token->type != TOKEN_CLOSE_SQUARE)
 328                 {
 329                         jsonKind tagKind;
 330
 331                         tagKind = tokenToKind (token->type);
 332
 333                         copyToken (name, token);
 334                         snprintf (buf, sizeof buf, "%u", nth++);
 335                         vStringCopyS (name->string, buf);
 336
 337                         makeJsonTag (name, tagKind);
 338                         pushScope (token, name, tagKind);
 339                         parseValue (token);
 340                         popScope (token, name);
 341
 342                         /* skip to the end of the construct */
 343                         skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
 344                         if (token->type != TOKEN_CLOSE_SQUARE)
 345                                 readToken (token);
 346                 }
 347
 348                 if (token->type == TOKEN_CLOSE_SQUARE)
 349                         readToken (token);
 350
 351                 deleteToken (name);
 352         }
 353 }
 354
 355 static void findJsonTags (void)
 356 {
 357         tokenInfo *const token = newToken ();
 358
 359         /* We allow multiple top-level elements, although it's not actually valid
 360          * JSON.  An interesting side effect of this is that we allow a leading
 361          * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
 362         do
 363         {
 364                 readToken (token);
 365                 parseValue (token);
 366         }
 367         while (token->type != TOKEN_EOF);
 368
 369         deleteToken (token);
 370 }
 371
 372 static void initialize (const langType language)
 373 {
 374         Lang_json = language;
 375         addKeyword ("true", language, KEYWORD_true);
 376         addKeyword ("false", language, KEYWORD_false);
 377         addKeyword ("null", language, KEYWORD_null);
 378 }
 379
 380 /* Create parser definition structure */
 381 extern parserDefinition* JsonParser (void)
 382 {
 383         static const char *const extensions [] = { "json", NULL };
 384         parserDefinition *const def = parserNew ("JSON");
 385         def->extensions = extensions;
 386         def->kinds              = JsonKinds;
 387         def->kindCount  = KIND_COUNT (JsonKinds);
 388         def->parser             = findJsonTags;
 389         def->initialize = initialize;
 390
 391         return def;
 392 }