ruby: Use nestlevel instead of string lists
[geany-mirror.git] / tagmanager / ctags / ruby.c
blob782b146bdbe1f03017a23085727f11d07dd90e02
1 /*
2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Ruby language
10 * files.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
20 #include "entry.h"
21 #include "parse.h"
22 #include "nestlevel.h"
23 #include "read.h"
24 #include "vstring.h"
27 * DATA DECLARATIONS
29 typedef enum {
30 K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON, K_DESCRIBE, K_CONTEXT
31 } rubyKind;
34 * DATA DEFINITIONS
36 static kindOption RubyKinds [] = {
37 { TRUE, 'c', "class", "classes" },
38 { TRUE, 'f', "method", "methods" },
39 { TRUE, 'm', "namespace", "modules" },
40 { TRUE, 'F', "member", "singleton methods" },
41 { TRUE, 'd', "describe", "describes" },
42 { TRUE, 'C', "context", "contexts" }
45 static NestingLevels* nesting = NULL;
47 #define SCOPE_SEPARATOR '.'
50 * FUNCTION DEFINITIONS
53 static void enterUnnamedScope (void);
56 * Returns a string describing the scope in 'nls'.
57 * We record the current scope as a list of entered scopes.
58 * Scopes corresponding to 'if' statements and the like are
59 * represented by empty strings. Scopes corresponding to
60 * modules and classes are represented by the name of the
61 * module or class.
63 static vString* nestingLevelsToScope (const NestingLevels* nls)
65 int i;
66 unsigned int chunks_output = 0;
67 vString* result = vStringNew ();
68 for (i = 0; i < nls->n; ++i)
70 const vString* chunk = nls->levels[i].name;
71 if (vStringLength (chunk) > 0)
73 if (chunks_output++ > 0)
74 vStringPut (result, SCOPE_SEPARATOR);
75 vStringCatS (result, vStringValue (chunk));
78 return result;
82 * Attempts to advance 's' past 'literal'.
83 * Returns TRUE if it did, FALSE (and leaves 's' where
84 * it was) otherwise.
86 static boolean canMatch (const unsigned char** s, const char* literal,
87 boolean (*end_check) (int))
89 const int literal_length = strlen (literal);
90 const int s_length = strlen ((const char *)*s);
92 if (s_length < literal_length)
93 return FALSE;
95 const unsigned char next_char = *(*s + literal_length);
96 if (strncmp ((const char*) *s, literal, literal_length) != 0)
98 return FALSE;
100 /* Additionally check that we're at the end of a token. */
101 if (! end_check (next_char))
103 return FALSE;
105 *s += literal_length;
106 return TRUE;
109 static boolean notIdentChar (int c)
111 return ! (isalnum (c) || c == '_');
114 static boolean notOperatorChar (int c)
116 return ! (c == '[' || c == ']' ||
117 c == '=' || c == '!' || c == '~' ||
118 c == '+' || c == '-' ||
119 c == '@' || c == '*' || c == '/' || c == '%' ||
120 c == '<' || c == '>' ||
121 c == '&' || c == '^' || c == '|');
124 static boolean isWhitespace (int c)
126 return c == 0 || isspace (c);
129 static boolean canMatchKeyword (const unsigned char** s, const char* literal)
131 return canMatch (s, literal, notIdentChar);
135 * Attempts to advance 'cp' past a Ruby operator method name. Returns
136 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
138 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
140 static const char* RUBY_OPERATORS[] = {
141 "[]", "[]=",
142 "**",
143 "!", "~", "+@", "-@",
144 "*", "/", "%",
145 "+", "-",
146 ">>", "<<",
147 "&",
148 "^", "|",
149 "<=", "<", ">", ">=",
150 "<=>", "==", "===", "!=", "=~", "!~",
151 "`",
152 NULL
154 int i;
155 for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
157 if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
159 vStringCatS (name, RUBY_OPERATORS[i]);
160 return TRUE;
163 return FALSE;
167 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
169 static void emitRubyTag (vString* name, rubyKind kind)
171 tagEntryInfo tag;
172 vString* scope;
173 const char *unqualified_name;
174 const char *qualified_name;
176 if (!RubyKinds[kind].enabled) {
177 return;
180 vStringTerminate (name);
181 scope = nestingLevelsToScope (nesting);
183 qualified_name = vStringValue (name);
184 unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
185 if (unqualified_name && unqualified_name[1])
187 if (unqualified_name > qualified_name)
189 if (vStringLength (scope) > 0)
190 vStringPut (scope, SCOPE_SEPARATOR);
191 vStringNCatS (scope, qualified_name,
192 unqualified_name - qualified_name);
194 unqualified_name++;
196 else
197 unqualified_name = qualified_name;
199 initTagEntry (&tag, unqualified_name);
200 if (vStringLength (scope) > 0) {
201 tag.extensionFields.scope [0] = "class";
202 tag.extensionFields.scope [1] = vStringValue (scope);
204 tag.kindName = RubyKinds [kind].name;
205 tag.kind = RubyKinds [kind].letter;
206 makeTagEntry (&tag);
208 nestingLevelsPush (nesting, name, kind);
210 vStringClear (name);
211 vStringDelete (scope);
214 /* Tests whether 'ch' is a character in 'list'. */
215 static boolean charIsIn (char ch, const char* list)
217 return (strchr (list, ch) != NULL);
220 /* Advances 'cp' over leading whitespace. */
221 static void skipWhitespace (const unsigned char** cp)
223 while (isspace (**cp))
225 ++*cp;
230 * Copies the characters forming an identifier from *cp into
231 * name, leaving *cp pointing to the character after the identifier.
233 static rubyKind parseIdentifier (
234 const unsigned char** cp, vString* name, rubyKind kind)
236 /* Method names are slightly different to class and variable names.
237 * A method name may optionally end with a question mark, exclamation
238 * point or equals sign. These are all part of the name.
239 * A method name may also contain a period if it's a singleton method.
241 boolean had_sep = FALSE;
242 const char* also_ok;
243 if (kind == K_METHOD)
245 also_ok = "_.?!=";
247 else if (kind == K_SINGLETON)
249 also_ok = "_?!=";
251 else if (kind == K_DESCRIBE || kind == K_CONTEXT)
253 also_ok = " ,\".#_?!='/-";
255 else
257 also_ok = "_";
260 skipWhitespace (cp);
262 /* Check for an anonymous (singleton) class such as "class << HTTP". */
263 if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
265 return K_UNDEFINED;
268 /* Check for operators such as "def []=(key, val)". */
269 if (kind == K_METHOD || kind == K_SINGLETON)
271 if (parseRubyOperator (name, cp))
273 return kind;
277 /* Copy the identifier into 'name'. */
278 while (**cp != 0 && (**cp == ':' || isalnum (**cp) || charIsIn (**cp, also_ok)))
280 char last_char = **cp;
282 if (last_char == ':')
283 had_sep = TRUE;
284 else
286 if (had_sep)
288 vStringPut (name, SCOPE_SEPARATOR);
289 had_sep = FALSE;
291 vStringPut (name, last_char);
293 ++*cp;
295 if (kind == K_METHOD)
297 /* Recognize singleton methods. */
298 if (last_char == '.')
300 vStringTerminate (name);
301 vStringClear (name);
302 return parseIdentifier (cp, name, K_SINGLETON);
306 if (kind == K_METHOD || kind == K_SINGLETON)
308 /* Recognize characters which mark the end of a method name. */
309 if (charIsIn (last_char, "?!="))
311 break;
315 return kind;
318 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
320 if (isspace (**cp))
322 vString *name = vStringNew ();
323 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
325 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
328 * What kind of tags should we create for code like this?
330 * %w(self.clfloor clfloor).each do |name|
331 * module_eval <<-"end;"
332 * def #{name}(x, y=1)
333 * q, r = x.divmod(y)
334 * q = q.to_i
335 * return q, r
336 * end
337 * end;
338 * end
340 * Or this?
342 * class << HTTP
344 * For now, we don't create any.
346 enterUnnamedScope ();
348 else
350 emitRubyTag (name, actual_kind);
352 vStringDelete (name);
356 static void enterUnnamedScope (void)
358 vString *name = vStringNewInit ("");
359 NestingLevel *parent = nestingLevelsGetCurrent (nesting);
360 nestingLevelsPush (nesting, name, parent ? parent->type : K_UNDEFINED);
361 vStringDelete (name);
364 static void findRubyTags (void)
366 const unsigned char *line;
367 boolean inMultiLineComment = FALSE;
369 nesting = nestingLevelsNew ();
371 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
372 * You could perfectly well write:
374 * def
375 * method
376 * puts("hello")
377 * end
379 * if you wished, and this function would fail to recognize anything.
381 while ((line = fileReadLine ()) != NULL)
383 const unsigned char *cp = line;
384 /* if we expect a separator after a while, for, or until statement
385 * separators are "do", ";" or newline */
386 boolean expect_separator = FALSE;
388 if (canMatch (&cp, "=begin", isWhitespace))
390 inMultiLineComment = TRUE;
391 continue;
393 if (canMatch (&cp, "=end", isWhitespace))
395 inMultiLineComment = FALSE;
396 continue;
398 if (inMultiLineComment)
399 continue;
401 skipWhitespace (&cp);
403 /* Avoid mistakenly starting a scope for modifiers such as
405 * return if <exp>
407 * FIXME: this is fooled by code such as
409 * result = if <exp>
410 * <a>
411 * else
412 * <b>
413 * end
415 * FIXME: we're also fooled if someone does something heinous such as
417 * puts("hello") \
418 * unless <exp>
420 if (canMatchKeyword (&cp, "for") ||
421 canMatchKeyword (&cp, "until") ||
422 canMatchKeyword (&cp, "while"))
424 expect_separator = TRUE;
425 enterUnnamedScope ();
427 else if (canMatchKeyword (&cp, "case") ||
428 canMatchKeyword (&cp, "if") ||
429 canMatchKeyword (&cp, "unless"))
431 enterUnnamedScope ();
435 * "module M", "class C" and "def m" should only be at the beginning
436 * of a line.
438 if (canMatchKeyword (&cp, "module"))
440 readAndEmitTag (&cp, K_MODULE);
442 else if (canMatchKeyword (&cp, "class"))
444 readAndEmitTag (&cp, K_CLASS);
446 else if (canMatchKeyword (&cp, "def"))
448 readAndEmitTag (&cp, K_METHOD);
450 else if (canMatchKeyword (&cp, "describe"))
452 readAndEmitTag (&cp, K_DESCRIBE);
454 else if (canMatchKeyword (&cp, "context"))
456 readAndEmitTag (&cp, K_CONTEXT);
459 while (*cp != '\0')
461 /* FIXME: we don't cope with here documents,
462 * or regular expression literals, or ... you get the idea.
463 * Hopefully, the restriction above that insists on seeing
464 * definitions at the starts of lines should keep us out of
465 * mischief.
467 if (inMultiLineComment || isspace (*cp))
469 ++cp;
471 else if (*cp == '#')
473 /* FIXME: this is wrong, but there *probably* won't be a
474 * definition after an interpolated string (where # doesn't
475 * mean 'comment').
477 break;
479 else if (canMatchKeyword (&cp, "begin"))
481 enterUnnamedScope ();
483 else if (canMatchKeyword (&cp, "do"))
485 if (! expect_separator)
486 enterUnnamedScope ();
487 else
488 expect_separator = FALSE;
490 else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
492 /* Leave the most recent scope. */
493 nestingLevelsPop (nesting);
495 else if (*cp == '"')
497 /* Skip string literals.
498 * FIXME: should cope with escapes and interpolation.
500 do {
501 ++cp;
502 } while (*cp != 0 && *cp != '"');
503 if (*cp == '"')
504 cp++; /* skip the last found '"' */
506 else if (*cp == ';')
508 ++cp;
509 expect_separator = FALSE;
511 else if (*cp != '\0')
514 ++cp;
515 while (isalnum (*cp) || *cp == '_');
519 nestingLevelsFree (nesting);
522 extern parserDefinition* RubyParser (void)
524 static const char *const extensions [] = { "rb", "ruby", NULL };
525 parserDefinition* def = parserNew ("Ruby");
526 def->kinds = RubyKinds;
527 def->kindCount = KIND_COUNT (RubyKinds);
528 def->extensions = extensions;
529 def->parser = findRubyTags;
530 return def;
533 /* vi:set tabstop=4 shiftwidth=4: */