tagmanager/ctags/ruby.c

   1 /*
   2 *   Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
   3 *   Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
   4 *   Copyright (c) 2004 Elliott Hughes <enh@acm.org>
   5 *
   6 *   This source code is released for free distribution under the terms of the
   7 *   GNU General Public License.
   8 *
   9 *   This module contains functions for generating tags for Ruby language
  10 *   files.
  11 */
  12
  13 /*
  14 *   INCLUDE FILES
  15 */
  16 #include "general.h"  /* must always come first */
  17
  18 #include <string.h>
  19
  20 #include "entry.h"
  21 #include "parse.h"
  22 #include "nestlevel.h"
  23 #include "read.h"
  24 #include "vstring.h"
  25
  26 /*
  27 *   DATA DECLARATIONS
  28 */
  29 typedef enum {
  30         K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON, K_DESCRIBE, K_CONTEXT
  31 } rubyKind;
  32
  33 /*
  34 *   DATA DEFINITIONS
  35 */
  36 static kindOption RubyKinds [] = {
  37         { TRUE, 'c', "class",  "classes" },
  38         { TRUE, 'f', "method", "methods" },
  39         { TRUE, 'm', "namespace", "modules" },
  40         { TRUE, 'F', "member", "singleton methods" },
  41         { TRUE, 'd', "describe", "describes" },
  42         { TRUE, 'C', "context", "contexts" }
  43 };
  44
  45 static NestingLevels* nesting = NULL;
  46
  47 #define SCOPE_SEPARATOR '.'
  48
  49 /*
  50 *   FUNCTION DEFINITIONS
  51 */
  52
  53 static void enterUnnamedScope (void);
  54
  55 /*
  56 * Returns a string describing the scope in 'nls'.
  57 * We record the current scope as a list of entered scopes.
  58 * Scopes corresponding to 'if' statements and the like are
  59 * represented by empty strings. Scopes corresponding to
  60 * modules and classes are represented by the name of the
  61 * module or class.
  62 */
  63 static vString* nestingLevelsToScope (const NestingLevels* nls)
  64 {
  65         int i;
  66         unsigned int chunks_output = 0;
  67         vString* result = vStringNew ();
  68         for (i = 0; i < nls->n; ++i)
  69         {
  70             const vString* chunk = nls->levels[i].name;
  71             if (vStringLength (chunk) > 0)
  72             {
  73                 if (chunks_output++ > 0)
  74                     vStringPut (result, SCOPE_SEPARATOR);
  75                 vStringCatS (result, vStringValue (chunk));
  76             }
  77         }
  78         return result;
  79 }
  80
  81 /*
  82 * Attempts to advance 's' past 'literal'.
  83 * Returns TRUE if it did, FALSE (and leaves 's' where
  84 * it was) otherwise.
  85 */
  86 static boolean canMatch (const unsigned char** s, const char* literal,
  87                          boolean (*end_check) (int))
  88 {
  89         const int literal_length = strlen (literal);
  90         const int s_length = strlen ((const char *)*s);
  91
  92         if (s_length < literal_length)
  93                 return FALSE;
  94
  95         const unsigned char next_char = *(*s + literal_length);
  96         if (strncmp ((const char*) *s, literal, literal_length) != 0)
  97         {
  98             return FALSE;
  99         }
 100         /* Additionally check that we're at the end of a token. */
 101         if (! end_check (next_char))
 102         {
 103             return FALSE;
 104         }
 105         *s += literal_length;
 106         return TRUE;
 107 }
 108
 109 static boolean notIdentChar (int c)
 110 {
 111         return ! (isalnum (c) || c == '_');
 112 }
 113
 114 static boolean notOperatorChar (int c)
 115 {
 116         return ! (c == '[' || c == ']' ||
 117                   c == '=' || c == '!' || c == '~' ||
 118                   c == '+' || c == '-' ||
 119                   c == '@' || c == '*' || c == '/' || c == '%' ||
 120                   c == '<' || c == '>' ||
 121                   c == '&' || c == '^' || c == '|');
 122 }
 123
 124 static boolean isWhitespace (int c)
 125 {
 126         return c == 0 || isspace (c);
 127 }
 128
 129 static boolean canMatchKeyword (const unsigned char** s, const char* literal)
 130 {
 131         return canMatch (s, literal, notIdentChar);
 132 }
 133
 134 /*
 135 * Attempts to advance 'cp' past a Ruby operator method name. Returns
 136 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
 137 */
 138 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
 139 {
 140         static const char* RUBY_OPERATORS[] = {
 141             "[]", "[]=",
 142             "**",
 143             "!", "~", "+@", "-@",
 144             "*", "/", "%",
 145             "+", "-",
 146             ">>", "<<",
 147             "&",
 148             "^", "|",
 149             "<=", "<", ">", ">=",
 150             "<=>", "==", "===", "!=", "=~", "!~",
 151             "`",
 152             NULL
 153         };
 154         int i;
 155         for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
 156         {
 157             if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
 158             {
 159                 vStringCatS (name, RUBY_OPERATORS[i]);
 160                 return TRUE;
 161             }
 162         }
 163         return FALSE;
 164 }
 165
 166 /*
 167 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
 168 */
 169 static void emitRubyTag (vString* name, rubyKind kind)
 170 {
 171         tagEntryInfo tag;
 172         vString* scope;
 173         const char *unqualified_name;
 174         const char *qualified_name;
 175
 176         if (!RubyKinds[kind].enabled) {
 177                 return;
 178         }
 179
 180         vStringTerminate (name);
 181         scope = nestingLevelsToScope (nesting);
 182
 183         qualified_name = vStringValue (name);
 184         unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
 185         if (unqualified_name && unqualified_name[1])
 186         {
 187                 if (unqualified_name > qualified_name)
 188                 {
 189                         if (vStringLength (scope) > 0)
 190                                 vStringPut (scope, SCOPE_SEPARATOR);
 191                         vStringNCatS (scope, qualified_name,
 192                                       unqualified_name - qualified_name);
 193                 }
 194                 unqualified_name++;
 195         }
 196         else
 197                 unqualified_name = qualified_name;
 198
 199         initTagEntry (&tag, unqualified_name);
 200         if (vStringLength (scope) > 0) {
 201             tag.extensionFields.scope [0] = "class";
 202             tag.extensionFields.scope [1] = vStringValue (scope);
 203         }
 204         tag.kindName = RubyKinds [kind].name;
 205         tag.kind = RubyKinds [kind].letter;
 206         makeTagEntry (&tag);
 207
 208         nestingLevelsPush (nesting, name, kind);
 209
 210         vStringClear (name);
 211         vStringDelete (scope);
 212 }
 213
 214 /* Tests whether 'ch' is a character in 'list'. */
 215 static boolean charIsIn (char ch, const char* list)
 216 {
 217         return (strchr (list, ch) != NULL);
 218 }
 219
 220 /* Advances 'cp' over leading whitespace. */
 221 static void skipWhitespace (const unsigned char** cp)
 222 {
 223         while (isspace (**cp))
 224         {
 225             ++*cp;
 226         }
 227 }
 228
 229 /*
 230 * Copies the characters forming an identifier from *cp into
 231 * name, leaving *cp pointing to the character after the identifier.
 232 */
 233 static rubyKind parseIdentifier (
 234                 const unsigned char** cp, vString* name, rubyKind kind)
 235 {
 236         /* Method names are slightly different to class and variable names.
 237          * A method name may optionally end with a question mark, exclamation
 238          * point or equals sign. These are all part of the name.
 239          * A method name may also contain a period if it's a singleton method.
 240          */
 241         boolean had_sep = FALSE;
 242         const char* also_ok;
 243         if (kind == K_METHOD)
 244         {
 245                 also_ok = "_.?!=";
 246         }
 247         else if (kind == K_SINGLETON)
 248         {
 249                 also_ok = "_?!=";
 250         }
 251         else if (kind == K_DESCRIBE || kind == K_CONTEXT)
 252         {
 253                 also_ok = " ,\".#_?!='/-";
 254         }
 255         else
 256         {
 257                 also_ok = "_";
 258         }
 259
 260         skipWhitespace (cp);
 261
 262         /* Check for an anonymous (singleton) class such as "class << HTTP". */
 263         if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
 264         {
 265                 return K_UNDEFINED;
 266         }
 267
 268         /* Check for operators such as "def []=(key, val)". */
 269         if (kind == K_METHOD || kind == K_SINGLETON)
 270         {
 271                 if (parseRubyOperator (name, cp))
 272                 {
 273                         return kind;
 274                 }
 275         }
 276
 277         /* Copy the identifier into 'name'. */
 278         while (**cp != 0 && (**cp == ':' || isalnum (**cp) || charIsIn (**cp, also_ok)))
 279         {
 280                 char last_char = **cp;
 281
 282                 if (last_char == ':')
 283                         had_sep = TRUE;
 284                 else
 285                 {
 286                         if (had_sep)
 287                         {
 288                                 vStringPut (name, SCOPE_SEPARATOR);
 289                                 had_sep = FALSE;
 290                         }
 291                         vStringPut (name, last_char);
 292                 }
 293                 ++*cp;
 294
 295                 if (kind == K_METHOD)
 296                 {
 297                         /* Recognize singleton methods. */
 298                         if (last_char == '.')
 299                         {
 300                                 vStringTerminate (name);
 301                                 vStringClear (name);
 302                                 return parseIdentifier (cp, name, K_SINGLETON);
 303                         }
 304                 }
 305
 306                 if (kind == K_METHOD || kind == K_SINGLETON)
 307                 {
 308                         /* Recognize characters which mark the end of a method name. */
 309                         if (charIsIn (last_char, "?!="))
 310                         {
 311                                 break;
 312                         }
 313                 }
 314         }
 315         return kind;
 316 }
 317
 318 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
 319 {
 320         if (isspace (**cp))
 321         {
 322                 vString *name = vStringNew ();
 323                 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
 324
 325                 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
 326                 {
 327                         /*
 328                         * What kind of tags should we create for code like this?
 329                         *
 330                         *    %w(self.clfloor clfloor).each do |name|
 331                         *        module_eval <<-"end;"
 332                         *            def #{name}(x, y=1)
 333                         *                q, r = x.divmod(y)
 334                         *                q = q.to_i
 335                         *                return q, r
 336                         *            end
 337                         *        end;
 338                         *    end
 339                         *
 340                         * Or this?
 341                         *
 342                         *    class << HTTP
 343                         *
 344                         * For now, we don't create any.
 345                         */
 346                         enterUnnamedScope ();
 347                 }
 348                 else
 349                 {
 350                         emitRubyTag (name, actual_kind);
 351                 }
 352                 vStringDelete (name);
 353         }
 354 }
 355
 356 static void enterUnnamedScope (void)
 357 {
 358         vString *name = vStringNewInit ("");
 359         NestingLevel *parent = nestingLevelsGetCurrent (nesting);
 360         nestingLevelsPush (nesting, name, parent ? parent->type : K_UNDEFINED);
 361         vStringDelete (name);
 362 }
 363
 364 static void findRubyTags (void)
 365 {
 366         const unsigned char *line;
 367         boolean inMultiLineComment = FALSE;
 368
 369         nesting = nestingLevelsNew ();
 370
 371         /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
 372         * You could perfectly well write:
 373         *
 374         *  def
 375         *  method
 376         *   puts("hello")
 377         *  end
 378         *
 379         * if you wished, and this function would fail to recognize anything.
 380         */
 381         while ((line = fileReadLine ()) != NULL)
 382         {
 383                 const unsigned char *cp = line;
 384                 /* if we expect a separator after a while, for, or until statement
 385                  * separators are "do", ";" or newline */
 386                 boolean expect_separator = FALSE;
 387
 388                 if (canMatch (&cp, "=begin", isWhitespace))
 389                 {
 390                         inMultiLineComment = TRUE;
 391                         continue;
 392                 }
 393                 if (canMatch (&cp, "=end", isWhitespace))
 394                 {
 395                         inMultiLineComment = FALSE;
 396                         continue;
 397                 }
 398                 if (inMultiLineComment)
 399                         continue;
 400
 401                 skipWhitespace (&cp);
 402
 403                 /* Avoid mistakenly starting a scope for modifiers such as
 404                 *
 405                 *   return if <exp>
 406                 *
 407                 * FIXME: this is fooled by code such as
 408                 *
 409                 *   result = if <exp>
 410                 *               <a>
 411                 *            else
 412                 *               <b>
 413                 *            end
 414                 *
 415                 * FIXME: we're also fooled if someone does something heinous such as
 416                 *
 417                 *   puts("hello") \
 418                 *       unless <exp>
 419                 */
 420                 if (canMatchKeyword (&cp, "for") ||
 421                     canMatchKeyword (&cp, "until") ||
 422                     canMatchKeyword (&cp, "while"))
 423                 {
 424                         expect_separator = TRUE;
 425                         enterUnnamedScope ();
 426                 }
 427                 else if (canMatchKeyword (&cp, "case") ||
 428                          canMatchKeyword (&cp, "if") ||
 429                          canMatchKeyword (&cp, "unless"))
 430                 {
 431                         enterUnnamedScope ();
 432                 }
 433
 434                 /*
 435                 * "module M", "class C" and "def m" should only be at the beginning
 436                 * of a line.
 437                 */
 438                 if (canMatchKeyword (&cp, "module"))
 439                 {
 440                         readAndEmitTag (&cp, K_MODULE);
 441                 }
 442                 else if (canMatchKeyword (&cp, "class"))
 443                 {
 444                         readAndEmitTag (&cp, K_CLASS);
 445                 }
 446                 else if (canMatchKeyword (&cp, "def"))
 447                 {
 448                         readAndEmitTag (&cp, K_METHOD);
 449                 }
 450                 else if (canMatchKeyword (&cp, "describe"))
 451                 {
 452                         readAndEmitTag (&cp, K_DESCRIBE);
 453                 }
 454                 else if (canMatchKeyword (&cp, "context"))
 455                 {
 456                         readAndEmitTag (&cp, K_CONTEXT);
 457                 }
 458
 459                 while (*cp != '\0')
 460                 {
 461                         /* FIXME: we don't cope with here documents,
 462                         * or regular expression literals, or ... you get the idea.
 463                         * Hopefully, the restriction above that insists on seeing
 464                         * definitions at the starts of lines should keep us out of
 465                         * mischief.
 466                         */
 467                         if (inMultiLineComment || isspace (*cp))
 468                         {
 469                                 ++cp;
 470                         }
 471                         else if (*cp == '#')
 472                         {
 473                                 /* FIXME: this is wrong, but there *probably* won't be a
 474                                 * definition after an interpolated string (where # doesn't
 475                                 * mean 'comment').
 476                                 */
 477                                 break;
 478                         }
 479                         else if (canMatchKeyword (&cp, "begin"))
 480                         {
 481                                 enterUnnamedScope ();
 482                         }
 483                         else if (canMatchKeyword (&cp, "do"))
 484                         {
 485                                 if (! expect_separator)
 486                                         enterUnnamedScope ();
 487                                 else
 488                                         expect_separator = FALSE;
 489                         }
 490                         else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
 491                         {
 492                                 /* Leave the most recent scope. */
 493                                 nestingLevelsPop (nesting);
 494                         }
 495                         else if (*cp == '"')
 496                         {
 497                                 /* Skip string literals.
 498                                  * FIXME: should cope with escapes and interpolation.
 499                                  */
 500                                 do {
 501                                         ++cp;
 502                                 } while (*cp != 0 && *cp != '"');
 503                                 if (*cp == '"')
 504                                     cp++; /* skip the last found '"' */
 505                         }
 506                         else if (*cp == ';')
 507                         {
 508                                 ++cp;
 509                                 expect_separator = FALSE;
 510                         }
 511                         else if (*cp != '\0')
 512                         {
 513                                 do
 514                                         ++cp;
 515                                 while (isalnum (*cp) || *cp == '_');
 516                         }
 517                 }
 518         }
 519         nestingLevelsFree (nesting);
 520 }
 521
 522 extern parserDefinition* RubyParser (void)
 523 {
 524         static const char *const extensions [] = { "rb", "ruby", NULL };
 525         parserDefinition* def = parserNew ("Ruby");
 526         def->kinds      = RubyKinds;
 527         def->kindCount  = KIND_COUNT (RubyKinds);
 528         def->extensions = extensions;
 529         def->parser     = findRubyTags;
 530         return def;
 531 }
 532
 533 /* vi:set tabstop=4 shiftwidth=4: */