ruby: handle singleton method including ?!= in its name(sf.bug:364)
[geany-mirror.git] / tagmanager / ctags / ruby.c
blobfdb5335f89d5bded18a7aa505ce1a3b864cc1ee5
1 /*
2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Ruby language
10 * files.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
20 #include "entry.h"
21 #include "parse.h"
22 #include "read.h"
23 #include "vstring.h"
26 * DATA DECLARATIONS
28 typedef enum {
29 K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON, K_DESCRIBE, K_CONTEXT
30 } rubyKind;
33 * DATA DEFINITIONS
35 static kindOption RubyKinds [] = {
36 { TRUE, 'c', "class", "classes" },
37 { TRUE, 'f', "method", "methods" },
38 { TRUE, 'm', "namespace", "modules" },
39 { TRUE, 'F', "member", "singleton methods" },
40 { TRUE, 'd', "describe", "describes" },
41 { TRUE, 'C', "context", "contexts" }
44 static stringList* nesting = NULL;
47 * FUNCTION DEFINITIONS
51 * Returns a string describing the scope in 'list'.
52 * We record the current scope as a list of entered scopes.
53 * Scopes corresponding to 'if' statements and the like are
54 * represented by empty strings. Scopes corresponding to
55 * modules and classes are represented by the name of the
56 * module or class.
58 static vString* stringListToScope (const stringList* list)
60 unsigned int i;
61 unsigned int chunks_output = 0;
62 vString* result = vStringNew ();
63 const unsigned int max = stringListCount (list);
64 for (i = 0; i < max; ++i)
66 vString* chunk = stringListItem (list, i);
67 if (vStringLength (chunk) > 0)
69 vStringCatS (result, (chunks_output++ > 0) ? "." : "");
70 vStringCatS (result, vStringValue (chunk));
73 return result;
77 * Attempts to advance 's' past 'literal'.
78 * Returns TRUE if it did, FALSE (and leaves 's' where
79 * it was) otherwise.
81 static boolean canMatch (const unsigned char** s, const char* literal)
83 const int literal_length = strlen (literal);
84 const int s_length = strlen ((const char *)*s);
86 if (s_length < literal_length)
87 return FALSE;
89 const unsigned char next_char = *(*s + literal_length);
90 if (strncmp ((const char*) *s, literal, literal_length) != 0)
92 return FALSE;
94 /* Additionally check that we're at the end of a token. */
95 if ( ! (next_char == 0 || isspace (next_char) || next_char == '(' || next_char == ';'))
97 return FALSE;
99 *s += literal_length;
100 return TRUE;
104 * Attempts to advance 'cp' past a Ruby operator method name. Returns
105 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
107 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
109 static const char* RUBY_OPERATORS[] = {
110 "[]", "[]=",
111 "**",
112 "!", "~", "+@", "-@",
113 "*", "/", "%",
114 "+", "-",
115 ">>", "<<",
116 "&",
117 "^", "|",
118 "<=", "<", ">", ">=",
119 "<=>", "==", "===", "!=", "=~", "!~",
120 "`",
121 NULL
123 int i;
124 for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
126 if (canMatch (cp, RUBY_OPERATORS[i]))
128 vStringCatS (name, RUBY_OPERATORS[i]);
129 return TRUE;
132 return FALSE;
136 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
138 static void emitRubyTag (vString* name, rubyKind kind)
140 tagEntryInfo tag;
141 vString* scope;
143 if (!RubyKinds[kind].enabled) {
144 return;
147 vStringTerminate (name);
148 scope = stringListToScope (nesting);
150 initTagEntry (&tag, vStringValue (name));
151 if (vStringLength (scope) > 0) {
152 tag.extensionFields.scope [0] = "class";
153 tag.extensionFields.scope [1] = vStringValue (scope);
155 tag.kindName = RubyKinds [kind].name;
156 tag.kind = RubyKinds [kind].letter;
157 makeTagEntry (&tag);
159 stringListAdd (nesting, vStringNewCopy (name));
161 vStringClear (name);
162 vStringDelete (scope);
165 /* Tests whether 'ch' is a character in 'list'. */
166 static boolean charIsIn (char ch, const char* list)
168 return (strchr (list, ch) != NULL);
171 /* Advances 'cp' over leading whitespace. */
172 static void skipWhitespace (const unsigned char** cp)
174 while (isspace (**cp))
176 ++*cp;
181 * Copies the characters forming an identifier from *cp into
182 * name, leaving *cp pointing to the character after the identifier.
184 static rubyKind parseIdentifier (
185 const unsigned char** cp, vString* name, rubyKind kind)
187 /* Method names are slightly different to class and variable names.
188 * A method name may optionally end with a question mark, exclamation
189 * point or equals sign. These are all part of the name.
190 * A method name may also contain a period if it's a singleton method.
192 const char* also_ok;
193 if (kind == K_METHOD)
195 also_ok = "_.?!=";
197 else if (kind == K_SINGLETON)
199 also_ok = "_?!=";
201 else if (kind == K_DESCRIBE || kind == K_CONTEXT)
203 also_ok = " ,\".#_?!='/-";
205 else
207 also_ok = "_";
210 skipWhitespace (cp);
212 /* Check for an anonymous (singleton) class such as "class << HTTP". */
213 if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
215 return K_UNDEFINED;
218 /* Check for operators such as "def []=(key, val)". */
219 if (kind == K_METHOD || kind == K_SINGLETON)
221 if (parseRubyOperator (name, cp))
223 return kind;
227 /* Copy the identifier into 'name'. */
228 while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
230 char last_char = **cp;
232 vStringPut (name, last_char);
233 ++*cp;
235 if (kind == K_METHOD)
237 /* Recognize singleton methods. */
238 if (last_char == '.')
240 vStringTerminate (name);
241 vStringClear (name);
242 return parseIdentifier (cp, name, K_SINGLETON);
246 if (kind == K_METHOD || kind == K_SINGLETON)
248 /* Recognize characters which mark the end of a method name. */
249 if (charIsIn (last_char, "?!="))
251 break;
255 return kind;
258 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
260 if (isspace (**cp))
262 vString *name = vStringNew ();
263 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
265 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
268 * What kind of tags should we create for code like this?
270 * %w(self.clfloor clfloor).each do |name|
271 * module_eval <<-"end;"
272 * def #{name}(x, y=1)
273 * q, r = x.divmod(y)
274 * q = q.to_i
275 * return q, r
276 * end
277 * end;
278 * end
280 * Or this?
282 * class << HTTP
284 * For now, we don't create any.
287 else
289 emitRubyTag (name, actual_kind);
291 vStringDelete (name);
295 static void enterUnnamedScope (void)
297 stringListAdd (nesting, vStringNewInit (""));
300 static void findRubyTags (void)
302 const unsigned char *line;
303 boolean inMultiLineComment = FALSE;
305 nesting = stringListNew ();
307 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
308 * You could perfectly well write:
310 * def
311 * method
312 * puts("hello")
313 * end
315 * if you wished, and this function would fail to recognize anything.
317 while ((line = fileReadLine ()) != NULL)
319 const unsigned char *cp = line;
320 /* if we expect a separator after a while, for, or until statement
321 * separators are "do", ";" or newline */
322 boolean expect_separator = FALSE;
324 if (canMatch (&cp, "=begin"))
326 inMultiLineComment = TRUE;
327 continue;
329 if (canMatch (&cp, "=end"))
331 inMultiLineComment = FALSE;
332 continue;
335 skipWhitespace (&cp);
337 /* Avoid mistakenly starting a scope for modifiers such as
339 * return if <exp>
341 * FIXME: this is fooled by code such as
343 * result = if <exp>
344 * <a>
345 * else
346 * <b>
347 * end
349 * FIXME: we're also fooled if someone does something heinous such as
351 * puts("hello") \
352 * unless <exp>
354 if (canMatch (&cp, "for") || canMatch (&cp, "until") ||
355 canMatch (&cp, "while"))
357 expect_separator = TRUE;
358 enterUnnamedScope ();
360 else if (canMatch (&cp, "case") || canMatch (&cp, "if") ||
361 canMatch (&cp, "unless"))
363 enterUnnamedScope ();
367 * "module M", "class C" and "def m" should only be at the beginning
368 * of a line.
370 if (canMatch (&cp, "module"))
372 readAndEmitTag (&cp, K_MODULE);
374 else if (canMatch (&cp, "class"))
376 readAndEmitTag (&cp, K_CLASS);
378 else if (canMatch (&cp, "def"))
380 readAndEmitTag (&cp, K_METHOD);
382 else if (canMatch (&cp, "describe"))
384 readAndEmitTag (&cp, K_DESCRIBE);
386 else if (canMatch (&cp, "context"))
388 readAndEmitTag (&cp, K_CONTEXT);
391 while (*cp != '\0')
393 /* FIXME: we don't cope with here documents,
394 * or regular expression literals, or ... you get the idea.
395 * Hopefully, the restriction above that insists on seeing
396 * definitions at the starts of lines should keep us out of
397 * mischief.
399 if (inMultiLineComment || isspace (*cp))
401 ++cp;
403 else if (*cp == '#')
405 /* FIXME: this is wrong, but there *probably* won't be a
406 * definition after an interpolated string (where # doesn't
407 * mean 'comment').
409 break;
411 else if (canMatch (&cp, "begin"))
413 enterUnnamedScope ();
415 else if (canMatch (&cp, "do"))
417 if (! expect_separator)
418 enterUnnamedScope ();
419 else
420 expect_separator = FALSE;
422 else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
424 /* Leave the most recent scope. */
425 vStringDelete (stringListLast (nesting));
426 stringListRemoveLast (nesting);
428 else if (*cp == '"')
430 /* Skip string literals.
431 * FIXME: should cope with escapes and interpolation.
433 do {
434 ++cp;
435 } while (*cp != 0 && *cp != '"');
436 if (*cp == '"')
437 cp++; /* skip the last found '"' */
439 else if (*cp == ';')
441 ++cp;
442 expect_separator = FALSE;
444 else if (*cp != '\0')
447 ++cp;
448 while (isalnum (*cp) || *cp == '_');
452 stringListDelete (nesting);
455 extern parserDefinition* RubyParser (void)
457 static const char *const extensions [] = { "rb", "ruby", NULL };
458 parserDefinition* def = parserNew ("Ruby");
459 def->kinds = RubyKinds;
460 def->kindCount = KIND_COUNT (RubyKinds);
461 def->extensions = extensions;
462 def->parser = findRubyTags;
463 return def;
466 /* vi:set tabstop=4 shiftwidth=4: */