Use ARRAY_SIZE() in parsers
[geany-mirror.git] / ctags / parsers / ruby.c
blob7acabb82ce94e88233cc087977394c147556bbbe
1 /*
2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating tags for Ruby language
10 * files.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
20 #include "debug.h"
21 #include "entry.h"
22 #include "parse.h"
23 #include "nestlevel.h"
24 #include "read.h"
25 #include "routines.h"
26 #include "vstring.h"
29 * DATA DECLARATIONS
31 typedef enum {
32 K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON,
33 } rubyKind;
36 * DATA DEFINITIONS
38 static kindOption RubyKinds [] = {
39 { TRUE, 'c', "class", "classes" },
40 { TRUE, 'f', "method", "methods" },
41 { TRUE, 'm', "module", "modules" },
42 { TRUE, 'F', "singletonMethod", "singleton methods" },
43 #if 0
44 /* Following two kinds are reserved. */
45 { TRUE, 'd', "describe", "describes and contexts for Rspec" },
46 { TRUE, 'C', "constant", "constants" },
47 #endif
50 static NestingLevels* nesting = NULL;
52 #define SCOPE_SEPARATOR '.'
55 * FUNCTION DEFINITIONS
58 static void enterUnnamedScope (void);
61 * Returns a string describing the scope in 'nls'.
62 * We record the current scope as a list of entered scopes.
63 * Scopes corresponding to 'if' statements and the like are
64 * represented by empty strings. Scopes corresponding to
65 * modules and classes are represented by the name of the
66 * module or class.
68 static vString* nestingLevelsToScope (const NestingLevels* nls)
70 int i;
71 unsigned int chunks_output = 0;
72 vString* result = vStringNew ();
73 for (i = 0; i < nls->n; ++i)
75 const vString* chunk = nls->levels[i].name;
76 if (vStringLength (chunk) > 0)
78 if (chunks_output++ > 0)
79 vStringPut (result, SCOPE_SEPARATOR);
80 vStringCatS (result, vStringValue (chunk));
83 return result;
87 * Attempts to advance 's' past 'literal'.
88 * Returns TRUE if it did, FALSE (and leaves 's' where
89 * it was) otherwise.
91 static boolean canMatch (const unsigned char** s, const char* literal,
92 boolean (*end_check) (int))
94 const int literal_length = strlen (literal);
95 const int s_length = strlen ((const char *)*s);
97 if (s_length < literal_length)
98 return FALSE;
100 const unsigned char next_char = *(*s + literal_length);
101 if (strncmp ((const char*) *s, literal, literal_length) != 0)
103 return FALSE;
105 /* Additionally check that we're at the end of a token. */
106 if (! end_check (next_char))
108 return FALSE;
110 *s += literal_length;
111 return TRUE;
114 static boolean isIdentChar (int c)
116 return (isalnum (c) || c == '_');
119 static boolean notIdentChar (int c)
121 return ! isIdentChar (c);
124 static boolean notOperatorChar (int c)
126 return ! (c == '[' || c == ']' ||
127 c == '=' || c == '!' || c == '~' ||
128 c == '+' || c == '-' ||
129 c == '@' || c == '*' || c == '/' || c == '%' ||
130 c == '<' || c == '>' ||
131 c == '&' || c == '^' || c == '|');
134 static boolean isWhitespace (int c)
136 return c == 0 || isspace (c);
139 static boolean canMatchKeyword (const unsigned char** s, const char* literal)
141 return canMatch (s, literal, notIdentChar);
145 * Attempts to advance 'cp' past a Ruby operator method name. Returns
146 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
148 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
150 static const char* RUBY_OPERATORS[] = {
151 "[]", "[]=",
152 "**",
153 "!", "~", "+@", "-@",
154 "*", "/", "%",
155 "+", "-",
156 ">>", "<<",
157 "&",
158 "^", "|",
159 "<=", "<", ">", ">=",
160 "<=>", "==", "===", "!=", "=~", "!~",
161 "`",
162 NULL
164 int i;
165 for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
167 if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
169 vStringCatS (name, RUBY_OPERATORS[i]);
170 return TRUE;
173 return FALSE;
177 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
179 static void emitRubyTag (vString* name, rubyKind kind)
181 tagEntryInfo tag;
182 vString* scope;
183 rubyKind parent_kind = K_UNDEFINED;
184 NestingLevel *lvl;
185 const char *unqualified_name;
186 const char *qualified_name;
188 if (!RubyKinds[kind].enabled) {
189 return;
192 vStringTerminate (name);
193 scope = nestingLevelsToScope (nesting);
194 lvl = nestingLevelsGetCurrent (nesting);
195 if (lvl)
196 parent_kind = lvl->type;
198 qualified_name = vStringValue (name);
199 unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
200 if (unqualified_name && unqualified_name[1])
202 if (unqualified_name > qualified_name)
204 if (vStringLength (scope) > 0)
205 vStringPut (scope, SCOPE_SEPARATOR);
206 vStringNCatS (scope, qualified_name,
207 unqualified_name - qualified_name);
208 /* assume module parent type for a lack of a better option */
209 parent_kind = K_MODULE;
211 unqualified_name++;
213 else
214 unqualified_name = qualified_name;
216 initTagEntry (&tag, unqualified_name, &(RubyKinds [kind]));
217 if (vStringLength (scope) > 0) {
218 Assert (0 <= parent_kind &&
219 (size_t) parent_kind < (ARRAY_SIZE (RubyKinds)));
221 tag.extensionFields.scopeKind = &(RubyKinds [parent_kind]);
222 tag.extensionFields.scopeName = vStringValue (scope);
224 makeTagEntry (&tag);
226 nestingLevelsPush (nesting, name, kind);
228 vStringClear (name);
229 vStringDelete (scope);
232 /* Tests whether 'ch' is a character in 'list'. */
233 static boolean charIsIn (char ch, const char* list)
235 return (strchr (list, ch) != NULL);
238 /* Advances 'cp' over leading whitespace. */
239 static void skipWhitespace (const unsigned char** cp)
241 while (isspace (**cp))
243 ++*cp;
248 * Copies the characters forming an identifier from *cp into
249 * name, leaving *cp pointing to the character after the identifier.
251 static rubyKind parseIdentifier (
252 const unsigned char** cp, vString* name, rubyKind kind)
254 /* Method names are slightly different to class and variable names.
255 * A method name may optionally end with a question mark, exclamation
256 * point or equals sign. These are all part of the name.
257 * A method name may also contain a period if it's a singleton method.
259 boolean had_sep = FALSE;
260 const char* also_ok;
261 if (kind == K_METHOD)
263 also_ok = ".?!=";
265 else if (kind == K_SINGLETON)
267 also_ok = "?!=";
269 else
271 also_ok = "";
274 skipWhitespace (cp);
276 /* Check for an anonymous (singleton) class such as "class << HTTP". */
277 if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
279 return K_UNDEFINED;
282 /* Check for operators such as "def []=(key, val)". */
283 if (kind == K_METHOD || kind == K_SINGLETON)
285 if (parseRubyOperator (name, cp))
287 return kind;
291 /* Copy the identifier into 'name'. */
292 while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok)))
294 char last_char = **cp;
296 if (last_char == ':')
297 had_sep = TRUE;
298 else
300 if (had_sep)
302 vStringPut (name, SCOPE_SEPARATOR);
303 had_sep = FALSE;
305 vStringPut (name, last_char);
307 ++*cp;
309 if (kind == K_METHOD)
311 /* Recognize singleton methods. */
312 if (last_char == '.')
314 vStringTerminate (name);
315 vStringClear (name);
316 return parseIdentifier (cp, name, K_SINGLETON);
320 if (kind == K_METHOD || kind == K_SINGLETON)
322 /* Recognize characters which mark the end of a method name. */
323 if (charIsIn (last_char, "?!="))
325 break;
329 return kind;
332 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
334 if (isspace (**cp))
336 vString *name = vStringNew ();
337 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
339 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
342 * What kind of tags should we create for code like this?
344 * %w(self.clfloor clfloor).each do |name|
345 * module_eval <<-"end;"
346 * def #{name}(x, y=1)
347 * q, r = x.divmod(y)
348 * q = q.to_i
349 * return q, r
350 * end
351 * end;
352 * end
354 * Or this?
356 * class << HTTP
358 * For now, we don't create any.
360 enterUnnamedScope ();
362 else
364 emitRubyTag (name, actual_kind);
366 vStringDelete (name);
370 static void enterUnnamedScope (void)
372 vString *name = vStringNewInit ("");
373 NestingLevel *parent = nestingLevelsGetCurrent (nesting);
374 nestingLevelsPush (nesting, name, parent ? parent->type : K_UNDEFINED);
375 vStringDelete (name);
378 static void findRubyTags (void)
380 const unsigned char *line;
381 boolean inMultiLineComment = FALSE;
383 nesting = nestingLevelsNew ();
385 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
386 * You could perfectly well write:
388 * def
389 * method
390 * puts("hello")
391 * end
393 * if you wished, and this function would fail to recognize anything.
395 while ((line = readLineFromInputFile ()) != NULL)
397 const unsigned char *cp = line;
398 /* if we expect a separator after a while, for, or until statement
399 * separators are "do", ";" or newline */
400 boolean expect_separator = FALSE;
402 if (canMatch (&cp, "=begin", isWhitespace))
404 inMultiLineComment = TRUE;
405 continue;
407 if (canMatch (&cp, "=end", isWhitespace))
409 inMultiLineComment = FALSE;
410 continue;
412 if (inMultiLineComment)
413 continue;
415 skipWhitespace (&cp);
417 /* Avoid mistakenly starting a scope for modifiers such as
419 * return if <exp>
421 * FIXME: this is fooled by code such as
423 * result = if <exp>
424 * <a>
425 * else
426 * <b>
427 * end
429 * FIXME: we're also fooled if someone does something heinous such as
431 * puts("hello") \
432 * unless <exp>
434 if (canMatchKeyword (&cp, "for") ||
435 canMatchKeyword (&cp, "until") ||
436 canMatchKeyword (&cp, "while"))
438 expect_separator = TRUE;
439 enterUnnamedScope ();
441 else if (canMatchKeyword (&cp, "case") ||
442 canMatchKeyword (&cp, "if") ||
443 canMatchKeyword (&cp, "unless"))
445 enterUnnamedScope ();
449 * "module M", "class C" and "def m" should only be at the beginning
450 * of a line.
452 if (canMatchKeyword (&cp, "module"))
454 readAndEmitTag (&cp, K_MODULE);
456 else if (canMatchKeyword (&cp, "class"))
458 readAndEmitTag (&cp, K_CLASS);
460 else if (canMatchKeyword (&cp, "def"))
462 rubyKind kind = K_METHOD;
463 NestingLevel *nl = nestingLevelsGetCurrent (nesting);
465 /* if the def is inside an unnamed scope at the class level, assume
466 * it's from a singleton from a construct like this:
468 * class C
469 * class << self
470 * def singleton
471 * ...
472 * end
473 * end
474 * end
476 if (nl && nl->type == K_CLASS && vStringLength (nl->name) == 0)
477 kind = K_SINGLETON;
478 readAndEmitTag (&cp, kind);
480 while (*cp != '\0')
482 /* FIXME: we don't cope with here documents,
483 * or regular expression literals, or ... you get the idea.
484 * Hopefully, the restriction above that insists on seeing
485 * definitions at the starts of lines should keep us out of
486 * mischief.
488 if (inMultiLineComment || isspace (*cp))
490 ++cp;
492 else if (*cp == '#')
494 /* FIXME: this is wrong, but there *probably* won't be a
495 * definition after an interpolated string (where # doesn't
496 * mean 'comment').
498 break;
500 else if (canMatchKeyword (&cp, "begin"))
502 enterUnnamedScope ();
504 else if (canMatchKeyword (&cp, "do"))
506 if (! expect_separator)
507 enterUnnamedScope ();
508 else
509 expect_separator = FALSE;
511 else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
513 /* Leave the most recent scope. */
514 nestingLevelsPop (nesting);
516 else if (*cp == '"')
518 /* Skip string literals.
519 * FIXME: should cope with escapes and interpolation.
521 do {
522 ++cp;
523 } while (*cp != 0 && *cp != '"');
524 if (*cp == '"')
525 cp++; /* skip the last found '"' */
527 else if (*cp == ';')
529 ++cp;
530 expect_separator = FALSE;
532 else if (*cp != '\0')
535 ++cp;
536 while (isIdentChar (*cp));
540 nestingLevelsFree (nesting);
543 extern parserDefinition* RubyParser (void)
545 static const char *const extensions [] = { "rb", "ruby", NULL };
546 parserDefinition* def = parserNewFull ("Ruby", KIND_FILE_ALT);
547 def->kinds = RubyKinds;
548 def->kindCount = ARRAY_SIZE (RubyKinds);
549 def->extensions = extensions;
550 def->parser = findRubyTags;
551 return def;
554 /* vi:set tabstop=4 shiftwidth=4: */