ctags/parsers/ruby.c

   1 /*
   2 *   Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
   3 *   Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
   4 *   Copyright (c) 2004 Elliott Hughes <enh@acm.org>
   5 *
   6 *   This source code is released for free distribution under the terms of the
   7 *   GNU General Public License version 2 or (at your option) any later version.
   8 *
   9 *   This module contains functions for generating tags for Ruby language
  10 *   files.
  11 */
  12
  13 /*
  14 *   INCLUDE FILES
  15 */
  16 #include "general.h"  /* must always come first */
  17
  18 #include <string.h>
  19
  20 #include "entry.h"
  21 #include "parse.h"
  22 #include "nestlevel.h"
  23 #include "read.h"
  24 #include "vstring.h"
  25
  26 /*
  27 *   DATA DECLARATIONS
  28 */
  29 typedef enum {
  30         K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON,
  31 } rubyKind;
  32
  33 /*
  34 *   DATA DEFINITIONS
  35 */
  36 static kindOption RubyKinds [] = {
  37         { TRUE, 'c', "class",  "classes" },
  38         { TRUE, 'f', "method", "methods" },
  39         { TRUE, 'm', "module", "modules" },
  40         { TRUE, 'F', "singletonMethod", "singleton methods" },
  41 #if 0
  42         /* Following two kinds are reserved. */
  43         { TRUE, 'd', "describe", "describes and contexts for Rspec" },
  44         { TRUE, 'C', "constant", "constants" },
  45 #endif
  46 };
  47
  48 static NestingLevels* nesting = NULL;
  49
  50 #define SCOPE_SEPARATOR '.'
  51
  52 /*
  53 *   FUNCTION DEFINITIONS
  54 */
  55
  56 static void enterUnnamedScope (void);
  57
  58 /*
  59 * Returns a string describing the scope in 'nls'.
  60 * We record the current scope as a list of entered scopes.
  61 * Scopes corresponding to 'if' statements and the like are
  62 * represented by empty strings. Scopes corresponding to
  63 * modules and classes are represented by the name of the
  64 * module or class.
  65 */
  66 static vString* nestingLevelsToScope (const NestingLevels* nls)
  67 {
  68         int i;
  69         unsigned int chunks_output = 0;
  70         vString* result = vStringNew ();
  71         for (i = 0; i < nls->n; ++i)
  72         {
  73             const vString* chunk = nls->levels[i].name;
  74             if (vStringLength (chunk) > 0)
  75             {
  76                 if (chunks_output++ > 0)
  77                     vStringPut (result, SCOPE_SEPARATOR);
  78                 vStringCatS (result, vStringValue (chunk));
  79             }
  80         }
  81         return result;
  82 }
  83
  84 /*
  85 * Attempts to advance 's' past 'literal'.
  86 * Returns TRUE if it did, FALSE (and leaves 's' where
  87 * it was) otherwise.
  88 */
  89 static boolean canMatch (const unsigned char** s, const char* literal,
  90                          boolean (*end_check) (int))
  91 {
  92         const int literal_length = strlen (literal);
  93         const int s_length = strlen ((const char *)*s);
  94
  95         if (s_length < literal_length)
  96                 return FALSE;
  97
  98         const unsigned char next_char = *(*s + literal_length);
  99         if (strncmp ((const char*) *s, literal, literal_length) != 0)
 100         {
 101             return FALSE;
 102         }
 103         /* Additionally check that we're at the end of a token. */
 104         if (! end_check (next_char))
 105         {
 106             return FALSE;
 107         }
 108         *s += literal_length;
 109         return TRUE;
 110 }
 111
 112 static boolean isIdentChar (int c)
 113 {
 114         return (isalnum (c) || c == '_');
 115 }
 116
 117 static boolean notIdentChar (int c)
 118 {
 119         return ! isIdentChar (c);
 120 }
 121
 122 static boolean notOperatorChar (int c)
 123 {
 124         return ! (c == '[' || c == ']' ||
 125                   c == '=' || c == '!' || c == '~' ||
 126                   c == '+' || c == '-' ||
 127                   c == '@' || c == '*' || c == '/' || c == '%' ||
 128                   c == '<' || c == '>' ||
 129                   c == '&' || c == '^' || c == '|');
 130 }
 131
 132 static boolean isWhitespace (int c)
 133 {
 134         return c == 0 || isspace (c);
 135 }
 136
 137 static boolean canMatchKeyword (const unsigned char** s, const char* literal)
 138 {
 139         return canMatch (s, literal, notIdentChar);
 140 }
 141
 142 /*
 143 * Attempts to advance 'cp' past a Ruby operator method name. Returns
 144 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
 145 */
 146 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
 147 {
 148         static const char* RUBY_OPERATORS[] = {
 149             "[]", "[]=",
 150             "**",
 151             "!", "~", "+@", "-@",
 152             "*", "/", "%",
 153             "+", "-",
 154             ">>", "<<",
 155             "&",
 156             "^", "|",
 157             "<=", "<", ">", ">=",
 158             "<=>", "==", "===", "!=", "=~", "!~",
 159             "`",
 160             NULL
 161         };
 162         int i;
 163         for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
 164         {
 165             if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
 166             {
 167                 vStringCatS (name, RUBY_OPERATORS[i]);
 168                 return TRUE;
 169             }
 170         }
 171         return FALSE;
 172 }
 173
 174 /*
 175 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
 176 */
 177 static void emitRubyTag (vString* name, rubyKind kind)
 178 {
 179         tagEntryInfo tag;
 180         vString* scope;
 181         rubyKind parent_kind = K_UNDEFINED;
 182         NestingLevel *lvl;
 183         const char *unqualified_name;
 184         const char *qualified_name;
 185
 186         if (!RubyKinds[kind].enabled) {
 187                 return;
 188         }
 189
 190         vStringTerminate (name);
 191         scope = nestingLevelsToScope (nesting);
 192         lvl = nestingLevelsGetCurrent (nesting);
 193         if (lvl)
 194                 parent_kind = lvl->type;
 195
 196         qualified_name = vStringValue (name);
 197         unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
 198         if (unqualified_name && unqualified_name[1])
 199         {
 200                 if (unqualified_name > qualified_name)
 201                 {
 202                         if (vStringLength (scope) > 0)
 203                                 vStringPut (scope, SCOPE_SEPARATOR);
 204                         vStringNCatS (scope, qualified_name,
 205                                       unqualified_name - qualified_name);
 206                         /* assume module parent type for a lack of a better option */
 207                         parent_kind = K_MODULE;
 208                 }
 209                 unqualified_name++;
 210         }
 211         else
 212                 unqualified_name = qualified_name;
 213
 214         initTagEntry (&tag, unqualified_name);
 215         if (vStringLength (scope) > 0) {
 216                 Assert (0 <= parent_kind &&
 217                         (size_t) parent_kind < (sizeof RubyKinds / sizeof RubyKinds[0]));
 218
 219             tag.extensionFields.scope [0] = RubyKinds [parent_kind].name;
 220             tag.extensionFields.scope [1] = vStringValue (scope);
 221         }
 222         tag.kindName = RubyKinds [kind].name;
 223         tag.kind = RubyKinds [kind].letter;
 224         makeTagEntry (&tag);
 225
 226         nestingLevelsPush (nesting, name, kind);
 227
 228         vStringClear (name);
 229         vStringDelete (scope);
 230 }
 231
 232 /* Tests whether 'ch' is a character in 'list'. */
 233 static boolean charIsIn (char ch, const char* list)
 234 {
 235         return (strchr (list, ch) != NULL);
 236 }
 237
 238 /* Advances 'cp' over leading whitespace. */
 239 static void skipWhitespace (const unsigned char** cp)
 240 {
 241         while (isspace (**cp))
 242         {
 243             ++*cp;
 244         }
 245 }
 246
 247 /*
 248 * Copies the characters forming an identifier from *cp into
 249 * name, leaving *cp pointing to the character after the identifier.
 250 */
 251 static rubyKind parseIdentifier (
 252                 const unsigned char** cp, vString* name, rubyKind kind)
 253 {
 254         /* Method names are slightly different to class and variable names.
 255          * A method name may optionally end with a question mark, exclamation
 256          * point or equals sign. These are all part of the name.
 257          * A method name may also contain a period if it's a singleton method.
 258          */
 259         boolean had_sep = FALSE;
 260         const char* also_ok;
 261         if (kind == K_METHOD)
 262         {
 263                 also_ok = ".?!=";
 264         }
 265         else if (kind == K_SINGLETON)
 266         {
 267                 also_ok = "?!=";
 268         }
 269         else
 270         {
 271                 also_ok = "";
 272         }
 273
 274         skipWhitespace (cp);
 275
 276         /* Check for an anonymous (singleton) class such as "class << HTTP". */
 277         if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
 278         {
 279                 return K_UNDEFINED;
 280         }
 281
 282         /* Check for operators such as "def []=(key, val)". */
 283         if (kind == K_METHOD || kind == K_SINGLETON)
 284         {
 285                 if (parseRubyOperator (name, cp))
 286                 {
 287                         return kind;
 288                 }
 289         }
 290
 291         /* Copy the identifier into 'name'. */
 292         while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok)))
 293         {
 294                 char last_char = **cp;
 295
 296                 if (last_char == ':')
 297                         had_sep = TRUE;
 298                 else
 299                 {
 300                         if (had_sep)
 301                         {
 302                                 vStringPut (name, SCOPE_SEPARATOR);
 303                                 had_sep = FALSE;
 304                         }
 305                         vStringPut (name, last_char);
 306                 }
 307                 ++*cp;
 308
 309                 if (kind == K_METHOD)
 310                 {
 311                         /* Recognize singleton methods. */
 312                         if (last_char == '.')
 313                         {
 314                                 vStringTerminate (name);
 315                                 vStringClear (name);
 316                                 return parseIdentifier (cp, name, K_SINGLETON);
 317                         }
 318                 }
 319
 320                 if (kind == K_METHOD || kind == K_SINGLETON)
 321                 {
 322                         /* Recognize characters which mark the end of a method name. */
 323                         if (charIsIn (last_char, "?!="))
 324                         {
 325                                 break;
 326                         }
 327                 }
 328         }
 329         return kind;
 330 }
 331
 332 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
 333 {
 334         if (isspace (**cp))
 335         {
 336                 vString *name = vStringNew ();
 337                 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
 338
 339                 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
 340                 {
 341                         /*
 342                         * What kind of tags should we create for code like this?
 343                         *
 344                         *    %w(self.clfloor clfloor).each do |name|
 345                         *        module_eval <<-"end;"
 346                         *            def #{name}(x, y=1)
 347                         *                q, r = x.divmod(y)
 348                         *                q = q.to_i
 349                         *                return q, r
 350                         *            end
 351                         *        end;
 352                         *    end
 353                         *
 354                         * Or this?
 355                         *
 356                         *    class << HTTP
 357                         *
 358                         * For now, we don't create any.
 359                         */
 360                         enterUnnamedScope ();
 361                 }
 362                 else
 363                 {
 364                         emitRubyTag (name, actual_kind);
 365                 }
 366                 vStringDelete (name);
 367         }
 368 }
 369
 370 static void enterUnnamedScope (void)
 371 {
 372         vString *name = vStringNewInit ("");
 373         NestingLevel *parent = nestingLevelsGetCurrent (nesting);
 374         nestingLevelsPush (nesting, name, parent ? parent->type : K_UNDEFINED);
 375         vStringDelete (name);
 376 }
 377
 378 static void findRubyTags (void)
 379 {
 380         const unsigned char *line;
 381         boolean inMultiLineComment = FALSE;
 382
 383         nesting = nestingLevelsNew ();
 384
 385         /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
 386         * You could perfectly well write:
 387         *
 388         *  def
 389         *  method
 390         *   puts("hello")
 391         *  end
 392         *
 393         * if you wished, and this function would fail to recognize anything.
 394         */
 395         while ((line = fileReadLine ()) != NULL)
 396         {
 397                 const unsigned char *cp = line;
 398                 /* if we expect a separator after a while, for, or until statement
 399                  * separators are "do", ";" or newline */
 400                 boolean expect_separator = FALSE;
 401
 402                 if (canMatch (&cp, "=begin", isWhitespace))
 403                 {
 404                         inMultiLineComment = TRUE;
 405                         continue;
 406                 }
 407                 if (canMatch (&cp, "=end", isWhitespace))
 408                 {
 409                         inMultiLineComment = FALSE;
 410                         continue;
 411                 }
 412                 if (inMultiLineComment)
 413                         continue;
 414
 415                 skipWhitespace (&cp);
 416
 417                 /* Avoid mistakenly starting a scope for modifiers such as
 418                 *
 419                 *   return if <exp>
 420                 *
 421                 * FIXME: this is fooled by code such as
 422                 *
 423                 *   result = if <exp>
 424                 *               <a>
 425                 *            else
 426                 *               <b>
 427                 *            end
 428                 *
 429                 * FIXME: we're also fooled if someone does something heinous such as
 430                 *
 431                 *   puts("hello") \
 432                 *       unless <exp>
 433                 */
 434                 if (canMatchKeyword (&cp, "for") ||
 435                     canMatchKeyword (&cp, "until") ||
 436                     canMatchKeyword (&cp, "while"))
 437                 {
 438                         expect_separator = TRUE;
 439                         enterUnnamedScope ();
 440                 }
 441                 else if (canMatchKeyword (&cp, "case") ||
 442                          canMatchKeyword (&cp, "if") ||
 443                          canMatchKeyword (&cp, "unless"))
 444                 {
 445                         enterUnnamedScope ();
 446                 }
 447
 448                 /*
 449                 * "module M", "class C" and "def m" should only be at the beginning
 450                 * of a line.
 451                 */
 452                 if (canMatchKeyword (&cp, "module"))
 453                 {
 454                         readAndEmitTag (&cp, K_MODULE);
 455                 }
 456                 else if (canMatchKeyword (&cp, "class"))
 457                 {
 458                         readAndEmitTag (&cp, K_CLASS);
 459                 }
 460                 else if (canMatchKeyword (&cp, "def"))
 461                 {
 462                         rubyKind kind = K_METHOD;
 463                         NestingLevel *nl = nestingLevelsGetCurrent (nesting);
 464
 465                         /* if the def is inside an unnamed scope at the class level, assume
 466                          * it's from a singleton from a construct like this:
 467                          *
 468                          * class C
 469                          *   class << self
 470                          *     def singleton
 471                          *       ...
 472                          *     end
 473                          *   end
 474                          * end
 475                          */
 476                         if (nl && nl->type == K_CLASS && vStringLength (nl->name) == 0)
 477                                 kind = K_SINGLETON;
 478
 479                         readAndEmitTag (&cp, kind);
 480                 }
 481
 482                 while (*cp != '\0')
 483                 {
 484                         /* FIXME: we don't cope with here documents,
 485                         * or regular expression literals, or ... you get the idea.
 486                         * Hopefully, the restriction above that insists on seeing
 487                         * definitions at the starts of lines should keep us out of
 488                         * mischief.
 489                         */
 490                         if (inMultiLineComment || isspace (*cp))
 491                         {
 492                                 ++cp;
 493                         }
 494                         else if (*cp == '#')
 495                         {
 496                                 /* FIXME: this is wrong, but there *probably* won't be a
 497                                 * definition after an interpolated string (where # doesn't
 498                                 * mean 'comment').
 499                                 */
 500                                 break;
 501                         }
 502                         else if (canMatchKeyword (&cp, "begin"))
 503                         {
 504                                 enterUnnamedScope ();
 505                         }
 506                         else if (canMatchKeyword (&cp, "do"))
 507                         {
 508                                 if (! expect_separator)
 509                                         enterUnnamedScope ();
 510                                 else
 511                                         expect_separator = FALSE;
 512                         }
 513                         else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
 514                         {
 515                                 /* Leave the most recent scope. */
 516                                 nestingLevelsPop (nesting);
 517                         }
 518                         else if (*cp == '"')
 519                         {
 520                                 /* Skip string literals.
 521                                  * FIXME: should cope with escapes and interpolation.
 522                                  */
 523                                 do {
 524                                         ++cp;
 525                                 } while (*cp != 0 && *cp != '"');
 526                                 if (*cp == '"')
 527                                     cp++; /* skip the last found '"' */
 528                         }
 529                         else if (*cp == ';')
 530                         {
 531                                 ++cp;
 532                                 expect_separator = FALSE;
 533                         }
 534                         else if (*cp != '\0')
 535                         {
 536                                 do
 537                                         ++cp;
 538                                 while (isIdentChar (*cp));
 539                         }
 540                 }
 541         }
 542         nestingLevelsFree (nesting);
 543 }
 544
 545 extern parserDefinition* RubyParser (void)
 546 {
 547         static const char *const extensions [] = { "rb", "ruby", NULL };
 548         parserDefinition* def = parserNew ("Ruby");
 549         def->kinds      = RubyKinds;
 550         def->kindCount  = KIND_COUNT (RubyKinds);
 551         def->extensions = extensions;
 552         def->parser     = findRubyTags;
 553         return def;
 554 }
 555
 556 /* vi:set tabstop=4 shiftwidth=4: */