tagmanager/ruby.c

   1 /*
   2 *   Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
   3 *   Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
   4 *   Copyright (c) 2004 Elliott Hughes <enh@acm.org>
   5 *
   6 *   This source code is released for free distribution under the terms of the
   7 *   GNU General Public License.
   8 *
   9 *   This module contains functions for generating tags for Ruby language
  10 *   files.
  11 */
  12
  13 /*
  14 *   INCLUDE FILES
  15 */
  16 #include "general.h"  /* must always come first */
  17
  18 #include <string.h>
  19
  20 #include "entry.h"
  21 #include "parse.h"
  22 #include "read.h"
  23 #include "vstring.h"
  24
  25 /*
  26 *   DATA DECLARATIONS
  27 */
  28 typedef enum {
  29         K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON
  30 } rubyKind;
  31
  32 /*
  33 *   DATA DEFINITIONS
  34 */
  35 static kindOption RubyKinds [] = {
  36         { TRUE, 'c', "class",  "classes" },
  37         { TRUE, 'f', "method", "methods" },
  38         { TRUE, 'm', "namespace", "modules" },
  39         { TRUE, 'F', "member", "singleton methods" }
  40 };
  41
  42 static stringList* nesting = 0;
  43
  44 /*
  45 *   FUNCTION DEFINITIONS
  46 */
  47
  48 /*
  49 * Returns a string describing the scope in 'list'.
  50 * We record the current scope as a list of entered scopes.
  51 * Scopes corresponding to 'if' statements and the like are
  52 * represented by empty strings. Scopes corresponding to
  53 * modules and classes are represented by the name of the
  54 * module or class.
  55 */
  56 static vString* stringListToScope (const stringList* list)
  57 {
  58         unsigned int i;
  59         unsigned int chunks_output = 0;
  60         vString* result = vStringNew ();
  61         const unsigned int max = stringListCount (list);
  62         for (i = 0; i < max; ++i)
  63         {
  64             vString* chunk = stringListItem (list, i);
  65             if (vStringLength (chunk) > 0)
  66             {
  67                 vStringCatS (result, (chunks_output++ > 0) ? "." : "");
  68                 vStringCatS (result, vStringValue (chunk));
  69             }
  70         }
  71         return result;
  72 }
  73
  74 /*
  75 * Attempts to advance 's' past 'literal'.
  76 * Returns TRUE if it did, FALSE (and leaves 's' where
  77 * it was) otherwise.
  78 */
  79 static boolean canMatch (const unsigned char** s, const char* literal)
  80 {
  81         const int literal_length = strlen (literal);
  82         const unsigned char next_char = *(*s + literal_length);
  83         if (strncmp ((const char*) *s, literal, literal_length) != 0)
  84         {
  85             return FALSE;
  86         }
  87         /* Additionally check that we're at the end of a token. */
  88         if ( ! (next_char == 0 || isspace (next_char) || next_char == '('))
  89         {
  90             return FALSE;
  91         }
  92         *s += literal_length;
  93         return TRUE;
  94 }
  95
  96 /*
  97 * Attempts to advance 'cp' past a Ruby operator method name. Returns
  98 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
  99 */
 100 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
 101 {
 102         static const char* RUBY_OPERATORS[] = {
 103             "[]", "[]=",
 104             "**",
 105             "!", "~", "+@", "-@",
 106             "*", "/", "%",
 107             "+", "-",
 108             ">>", "<<",
 109             "&",
 110             "^", "|",
 111             "<=", "<", ">", ">=",
 112             "<=>", "==", "===", "!=", "=~", "!~",
 113             "`",
 114             0
 115         };
 116         int i;
 117         for (i = 0; RUBY_OPERATORS[i] != 0; ++i)
 118         {
 119             if (canMatch (cp, RUBY_OPERATORS[i]))
 120             {
 121                 vStringCatS (name, RUBY_OPERATORS[i]);
 122                 return TRUE;
 123             }
 124         }
 125         return FALSE;
 126 }
 127
 128 /*
 129 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
 130 */
 131 static void emitRubyTag (vString* name, rubyKind kind)
 132 {
 133         tagEntryInfo tag;
 134         vString* scope;
 135
 136         vStringTerminate (name);
 137         scope = stringListToScope (nesting);
 138
 139         initTagEntry (&tag, vStringValue (name));
 140         if (vStringLength (scope) > 0) {
 141             tag.extensionFields.scope [0] = "class";
 142             tag.extensionFields.scope [1] = vStringValue (scope);
 143         }
 144         tag.kindName = RubyKinds [kind].name;
 145         tag.kind = RubyKinds [kind].letter;
 146         makeTagEntry (&tag);
 147
 148         stringListAdd (nesting, vStringNewCopy (name));
 149
 150         vStringClear (name);
 151         vStringDelete (scope);
 152 }
 153
 154 /* Tests whether 'ch' is a character in 'list'. */
 155 static boolean charIsIn (char ch, const char* list)
 156 {
 157         return (strchr (list, ch) != 0);
 158 }
 159
 160 /* Advances 'cp' over leading whitespace. */
 161 static void skipWhitespace (const unsigned char** cp)
 162 {
 163         while (isspace (**cp))
 164         {
 165             ++*cp;
 166         }
 167 }
 168
 169 /*
 170 * Copies the characters forming an identifier from *cp into
 171 * name, leaving *cp pointing to the character after the identifier.
 172 */
 173 static rubyKind parseIdentifier (
 174                 const unsigned char** cp, vString* name, rubyKind kind)
 175 {
 176         /* Method names are slightly different to class and variable names.
 177          * A method name may optionally end with a question mark, exclamation
 178          * point or equals sign. These are all part of the name.
 179          * A method name may also contain a period if it's a singleton method.
 180          */
 181         const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_";
 182
 183         skipWhitespace (cp);
 184
 185         /* Check for an anonymous (singleton) class such as "class << HTTP". */
 186         if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
 187         {
 188                 return K_UNDEFINED;
 189         }
 190
 191         /* Check for operators such as "def []=(key, val)". */
 192         if (kind == K_METHOD || kind == K_SINGLETON)
 193         {
 194                 if (parseRubyOperator (name, cp))
 195                 {
 196                         return kind;
 197                 }
 198         }
 199
 200         /* Copy the identifier into 'name'. */
 201         while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
 202         {
 203                 char last_char = **cp;
 204
 205                 vStringPut (name, last_char);
 206                 ++*cp;
 207
 208                 if (kind == K_METHOD)
 209                 {
 210                         /* Recognize singleton methods. */
 211                         if (last_char == '.')
 212                         {
 213                                 vStringTerminate (name);
 214                                 vStringClear (name);
 215                                 return parseIdentifier (cp, name, K_SINGLETON);
 216                         }
 217
 218                         /* Recognize characters which mark the end of a method name. */
 219                         if (charIsIn (last_char, "?!="))
 220                         {
 221                                 break;
 222                         }
 223                 }
 224         }
 225         return kind;
 226 }
 227
 228 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
 229 {
 230         if (isspace (**cp))
 231         {
 232                 vString *name = vStringNew ();
 233                 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
 234
 235                 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
 236                 {
 237                         /*
 238                         * What kind of tags should we create for code like this?
 239                         *
 240                         *    %w(self.clfloor clfloor).each do |name|
 241                         *        module_eval <<-"end;"
 242                         *            def #{name}(x, y=1)
 243                         *                q, r = x.divmod(y)
 244                         *                q = q.to_i
 245                         *                return q, r
 246                         *            end
 247                         *        end;
 248                         *    end
 249                         *
 250                         * Or this?
 251                         *
 252                         *    class << HTTP
 253                         *
 254                         * For now, we don't create any.
 255                         */
 256                 }
 257                 else
 258                 {
 259                         emitRubyTag (name, actual_kind);
 260                 }
 261                 vStringDelete (name);
 262         }
 263 }
 264
 265 static void enterUnnamedScope (void)
 266 {
 267         stringListAdd (nesting, vStringNewInit (""));
 268 }
 269
 270 static void findRubyTags (void)
 271 {
 272         const unsigned char *line;
 273         boolean inMultiLineComment = FALSE;
 274
 275         nesting = stringListNew ();
 276
 277         /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
 278         * You could perfectly well write:
 279         *
 280         *  def
 281         *  method
 282         *   puts("hello")
 283         *  end
 284         *
 285         * if you wished, and this function would fail to recognize anything.
 286         */
 287         while ((line = fileReadLine ()) != NULL)
 288         {
 289                 const unsigned char *cp = line;
 290
 291                 if (canMatch (&cp, "=begin"))
 292                 {
 293                         inMultiLineComment = TRUE;
 294                         continue;
 295                 }
 296                 if (canMatch (&cp, "=end"))
 297                 {
 298                         inMultiLineComment = FALSE;
 299                         continue;
 300                 }
 301
 302                 skipWhitespace (&cp);
 303
 304                 /* Avoid mistakenly starting a scope for modifiers such as
 305                 *
 306                 *   return if <exp>
 307                 *
 308                 * FIXME: this is fooled by code such as
 309                 *
 310                 *   result = if <exp>
 311                 *               <a>
 312                 *            else
 313                 *               <b>
 314                 *            end
 315                 *
 316                 * FIXME: we're also fooled if someone does something heinous such as
 317                 *
 318                 *   puts("hello") \
 319                 *       unless <exp>
 320                 */
 321                 if (canMatch (&cp, "case") || canMatch (&cp, "for") ||
 322                         canMatch (&cp, "if") || canMatch (&cp, "unless") ||
 323                         canMatch (&cp, "while"))
 324                 {
 325                         enterUnnamedScope ();
 326                 }
 327
 328                 /*
 329                 * "module M", "class C" and "def m" should only be at the beginning
 330                 * of a line.
 331                 */
 332                 if (canMatch (&cp, "module"))
 333                 {
 334                         readAndEmitTag (&cp, K_MODULE);
 335                 }
 336                 else if (canMatch (&cp, "class"))
 337                 {
 338                         readAndEmitTag (&cp, K_CLASS);
 339                 }
 340                 else if (canMatch (&cp, "def"))
 341                 {
 342                         readAndEmitTag (&cp, K_METHOD);
 343                 }
 344
 345                 while (*cp != '\0')
 346                 {
 347                         /* FIXME: we don't cope with here documents,
 348                         * or regular expression literals, or ... you get the idea.
 349                         * Hopefully, the restriction above that insists on seeing
 350                         * definitions at the starts of lines should keep us out of
 351                         * mischief.
 352                         */
 353                         if (inMultiLineComment || isspace (*cp))
 354                         {
 355                                 ++cp;
 356                         }
 357                         else if (*cp == '#')
 358                         {
 359                                 /* FIXME: this is wrong, but there *probably* won't be a
 360                                 * definition after an interpolated string (where # doesn't
 361                                 * mean 'comment').
 362                                 */
 363                                 break;
 364                         }
 365                         else if (canMatch (&cp, "begin") || canMatch (&cp, "do"))
 366                         {
 367                                 enterUnnamedScope ();
 368                         }
 369                         else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
 370                         {
 371                                 /* Leave the most recent scope. */
 372                                 vStringDelete (stringListLast (nesting));
 373                                 stringListRemoveLast (nesting);
 374                         }
 375                         else if (*cp == '"')
 376                         {
 377                                 /* Skip string literals.
 378                                  * FIXME: should cope with escapes and interpolation.
 379                                  */
 380                                 do {
 381                                         ++cp;
 382                                 } while (*cp != 0 && *cp != '"');
 383                                 if (*cp == '"')
 384                                     cp++; /* skip the last found '"' */
 385                         }
 386                         else if (*cp != '\0')
 387                         {
 388                                 do
 389                                         ++cp;
 390                                 while (isalnum (*cp) || *cp == '_');
 391                         }
 392                 }
 393         }
 394         stringListDelete (nesting);
 395 }
 396
 397 extern parserDefinition* RubyParser (void)
 398 {
 399         static const char *const extensions [] = { "rb", "ruby", NULL };
 400         parserDefinition* def = parserNew ("Ruby");
 401         def->kinds      = RubyKinds;
 402         def->kindCount  = KIND_COUNT (RubyKinds);
 403         def->extensions = extensions;
 404         def->parser     = findRubyTags;
 405         return def;
 406 }
 407
 408 /* vi:set tabstop=4 shiftwidth=4: */