Merge pull request #651 from b4n/vte-in-various-prefs
[geany-mirror.git] / ctags / parsers / ruby.c
blob75f97fe0ab6d35761a54f60f23199b928e4c90f0
1 /*
2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating tags for Ruby language
10 * files.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
20 #include "entry.h"
21 #include "parse.h"
22 #include "nestlevel.h"
23 #include "read.h"
24 #include "vstring.h"
27 * DATA DECLARATIONS
29 typedef enum {
30 K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON,
31 } rubyKind;
34 * DATA DEFINITIONS
36 static kindOption RubyKinds [] = {
37 { TRUE, 'c', "class", "classes" },
38 { TRUE, 'f', "method", "methods" },
39 { TRUE, 'm', "module", "modules" },
40 { TRUE, 'F', "singletonMethod", "singleton methods" },
41 #if 0
42 /* Following two kinds are reserved. */
43 { TRUE, 'd', "describe", "describes and contexts for Rspec" },
44 { TRUE, 'C', "constant", "constants" },
45 #endif
48 static NestingLevels* nesting = NULL;
50 #define SCOPE_SEPARATOR '.'
53 * FUNCTION DEFINITIONS
56 static void enterUnnamedScope (void);
59 * Returns a string describing the scope in 'nls'.
60 * We record the current scope as a list of entered scopes.
61 * Scopes corresponding to 'if' statements and the like are
62 * represented by empty strings. Scopes corresponding to
63 * modules and classes are represented by the name of the
64 * module or class.
66 static vString* nestingLevelsToScope (const NestingLevels* nls)
68 int i;
69 unsigned int chunks_output = 0;
70 vString* result = vStringNew ();
71 for (i = 0; i < nls->n; ++i)
73 const vString* chunk = nls->levels[i].name;
74 if (vStringLength (chunk) > 0)
76 if (chunks_output++ > 0)
77 vStringPut (result, SCOPE_SEPARATOR);
78 vStringCatS (result, vStringValue (chunk));
81 return result;
85 * Attempts to advance 's' past 'literal'.
86 * Returns TRUE if it did, FALSE (and leaves 's' where
87 * it was) otherwise.
89 static boolean canMatch (const unsigned char** s, const char* literal,
90 boolean (*end_check) (int))
92 const int literal_length = strlen (literal);
93 const int s_length = strlen ((const char *)*s);
95 if (s_length < literal_length)
96 return FALSE;
98 const unsigned char next_char = *(*s + literal_length);
99 if (strncmp ((const char*) *s, literal, literal_length) != 0)
101 return FALSE;
103 /* Additionally check that we're at the end of a token. */
104 if (! end_check (next_char))
106 return FALSE;
108 *s += literal_length;
109 return TRUE;
112 static boolean isIdentChar (int c)
114 return (isalnum (c) || c == '_');
117 static boolean notIdentChar (int c)
119 return ! isIdentChar (c);
122 static boolean notOperatorChar (int c)
124 return ! (c == '[' || c == ']' ||
125 c == '=' || c == '!' || c == '~' ||
126 c == '+' || c == '-' ||
127 c == '@' || c == '*' || c == '/' || c == '%' ||
128 c == '<' || c == '>' ||
129 c == '&' || c == '^' || c == '|');
132 static boolean isWhitespace (int c)
134 return c == 0 || isspace (c);
137 static boolean canMatchKeyword (const unsigned char** s, const char* literal)
139 return canMatch (s, literal, notIdentChar);
143 * Attempts to advance 'cp' past a Ruby operator method name. Returns
144 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
146 static boolean parseRubyOperator (vString* name, const unsigned char** cp)
148 static const char* RUBY_OPERATORS[] = {
149 "[]", "[]=",
150 "**",
151 "!", "~", "+@", "-@",
152 "*", "/", "%",
153 "+", "-",
154 ">>", "<<",
155 "&",
156 "^", "|",
157 "<=", "<", ">", ">=",
158 "<=>", "==", "===", "!=", "=~", "!~",
159 "`",
160 NULL
162 int i;
163 for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
165 if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
167 vStringCatS (name, RUBY_OPERATORS[i]);
168 return TRUE;
171 return FALSE;
175 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
177 static void emitRubyTag (vString* name, rubyKind kind)
179 tagEntryInfo tag;
180 vString* scope;
181 rubyKind parent_kind = K_UNDEFINED;
182 NestingLevel *lvl;
183 const char *unqualified_name;
184 const char *qualified_name;
186 if (!RubyKinds[kind].enabled) {
187 return;
190 vStringTerminate (name);
191 scope = nestingLevelsToScope (nesting);
192 lvl = nestingLevelsGetCurrent (nesting);
193 if (lvl)
194 parent_kind = lvl->type;
196 qualified_name = vStringValue (name);
197 unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
198 if (unqualified_name && unqualified_name[1])
200 if (unqualified_name > qualified_name)
202 if (vStringLength (scope) > 0)
203 vStringPut (scope, SCOPE_SEPARATOR);
204 vStringNCatS (scope, qualified_name,
205 unqualified_name - qualified_name);
206 /* assume module parent type for a lack of a better option */
207 parent_kind = K_MODULE;
209 unqualified_name++;
211 else
212 unqualified_name = qualified_name;
214 initTagEntry (&tag, unqualified_name);
215 if (vStringLength (scope) > 0) {
216 Assert (0 <= parent_kind &&
217 (size_t) parent_kind < (sizeof RubyKinds / sizeof RubyKinds[0]));
219 tag.extensionFields.scope [0] = RubyKinds [parent_kind].name;
220 tag.extensionFields.scope [1] = vStringValue (scope);
222 tag.kindName = RubyKinds [kind].name;
223 tag.kind = RubyKinds [kind].letter;
224 makeTagEntry (&tag);
226 nestingLevelsPush (nesting, name, kind);
228 vStringClear (name);
229 vStringDelete (scope);
232 /* Tests whether 'ch' is a character in 'list'. */
233 static boolean charIsIn (char ch, const char* list)
235 return (strchr (list, ch) != NULL);
238 /* Advances 'cp' over leading whitespace. */
239 static void skipWhitespace (const unsigned char** cp)
241 while (isspace (**cp))
243 ++*cp;
248 * Copies the characters forming an identifier from *cp into
249 * name, leaving *cp pointing to the character after the identifier.
251 static rubyKind parseIdentifier (
252 const unsigned char** cp, vString* name, rubyKind kind)
254 /* Method names are slightly different to class and variable names.
255 * A method name may optionally end with a question mark, exclamation
256 * point or equals sign. These are all part of the name.
257 * A method name may also contain a period if it's a singleton method.
259 boolean had_sep = FALSE;
260 const char* also_ok;
261 if (kind == K_METHOD)
263 also_ok = ".?!=";
265 else if (kind == K_SINGLETON)
267 also_ok = "?!=";
269 else
271 also_ok = "";
274 skipWhitespace (cp);
276 /* Check for an anonymous (singleton) class such as "class << HTTP". */
277 if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
279 return K_UNDEFINED;
282 /* Check for operators such as "def []=(key, val)". */
283 if (kind == K_METHOD || kind == K_SINGLETON)
285 if (parseRubyOperator (name, cp))
287 return kind;
291 /* Copy the identifier into 'name'. */
292 while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok)))
294 char last_char = **cp;
296 if (last_char == ':')
297 had_sep = TRUE;
298 else
300 if (had_sep)
302 vStringPut (name, SCOPE_SEPARATOR);
303 had_sep = FALSE;
305 vStringPut (name, last_char);
307 ++*cp;
309 if (kind == K_METHOD)
311 /* Recognize singleton methods. */
312 if (last_char == '.')
314 vStringTerminate (name);
315 vStringClear (name);
316 return parseIdentifier (cp, name, K_SINGLETON);
320 if (kind == K_METHOD || kind == K_SINGLETON)
322 /* Recognize characters which mark the end of a method name. */
323 if (charIsIn (last_char, "?!="))
325 break;
329 return kind;
332 static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
334 if (isspace (**cp))
336 vString *name = vStringNew ();
337 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
339 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
342 * What kind of tags should we create for code like this?
344 * %w(self.clfloor clfloor).each do |name|
345 * module_eval <<-"end;"
346 * def #{name}(x, y=1)
347 * q, r = x.divmod(y)
348 * q = q.to_i
349 * return q, r
350 * end
351 * end;
352 * end
354 * Or this?
356 * class << HTTP
358 * For now, we don't create any.
360 enterUnnamedScope ();
362 else
364 emitRubyTag (name, actual_kind);
366 vStringDelete (name);
370 static void enterUnnamedScope (void)
372 vString *name = vStringNewInit ("");
373 NestingLevel *parent = nestingLevelsGetCurrent (nesting);
374 nestingLevelsPush (nesting, name, parent ? parent->type : K_UNDEFINED);
375 vStringDelete (name);
378 static void findRubyTags (void)
380 const unsigned char *line;
381 boolean inMultiLineComment = FALSE;
383 nesting = nestingLevelsNew ();
385 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
386 * You could perfectly well write:
388 * def
389 * method
390 * puts("hello")
391 * end
393 * if you wished, and this function would fail to recognize anything.
395 while ((line = fileReadLine ()) != NULL)
397 const unsigned char *cp = line;
398 /* if we expect a separator after a while, for, or until statement
399 * separators are "do", ";" or newline */
400 boolean expect_separator = FALSE;
402 if (canMatch (&cp, "=begin", isWhitespace))
404 inMultiLineComment = TRUE;
405 continue;
407 if (canMatch (&cp, "=end", isWhitespace))
409 inMultiLineComment = FALSE;
410 continue;
412 if (inMultiLineComment)
413 continue;
415 skipWhitespace (&cp);
417 /* Avoid mistakenly starting a scope for modifiers such as
419 * return if <exp>
421 * FIXME: this is fooled by code such as
423 * result = if <exp>
424 * <a>
425 * else
426 * <b>
427 * end
429 * FIXME: we're also fooled if someone does something heinous such as
431 * puts("hello") \
432 * unless <exp>
434 if (canMatchKeyword (&cp, "for") ||
435 canMatchKeyword (&cp, "until") ||
436 canMatchKeyword (&cp, "while"))
438 expect_separator = TRUE;
439 enterUnnamedScope ();
441 else if (canMatchKeyword (&cp, "case") ||
442 canMatchKeyword (&cp, "if") ||
443 canMatchKeyword (&cp, "unless"))
445 enterUnnamedScope ();
449 * "module M", "class C" and "def m" should only be at the beginning
450 * of a line.
452 if (canMatchKeyword (&cp, "module"))
454 readAndEmitTag (&cp, K_MODULE);
456 else if (canMatchKeyword (&cp, "class"))
458 readAndEmitTag (&cp, K_CLASS);
460 else if (canMatchKeyword (&cp, "def"))
462 rubyKind kind = K_METHOD;
463 NestingLevel *nl = nestingLevelsGetCurrent (nesting);
465 /* if the def is inside an unnamed scope at the class level, assume
466 * it's from a singleton from a construct like this:
468 * class C
469 * class << self
470 * def singleton
471 * ...
472 * end
473 * end
474 * end
476 if (nl && nl->type == K_CLASS && vStringLength (nl->name) == 0)
477 kind = K_SINGLETON;
479 readAndEmitTag (&cp, kind);
482 while (*cp != '\0')
484 /* FIXME: we don't cope with here documents,
485 * or regular expression literals, or ... you get the idea.
486 * Hopefully, the restriction above that insists on seeing
487 * definitions at the starts of lines should keep us out of
488 * mischief.
490 if (inMultiLineComment || isspace (*cp))
492 ++cp;
494 else if (*cp == '#')
496 /* FIXME: this is wrong, but there *probably* won't be a
497 * definition after an interpolated string (where # doesn't
498 * mean 'comment').
500 break;
502 else if (canMatchKeyword (&cp, "begin"))
504 enterUnnamedScope ();
506 else if (canMatchKeyword (&cp, "do"))
508 if (! expect_separator)
509 enterUnnamedScope ();
510 else
511 expect_separator = FALSE;
513 else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
515 /* Leave the most recent scope. */
516 nestingLevelsPop (nesting);
518 else if (*cp == '"')
520 /* Skip string literals.
521 * FIXME: should cope with escapes and interpolation.
523 do {
524 ++cp;
525 } while (*cp != 0 && *cp != '"');
526 if (*cp == '"')
527 cp++; /* skip the last found '"' */
529 else if (*cp == ';')
531 ++cp;
532 expect_separator = FALSE;
534 else if (*cp != '\0')
537 ++cp;
538 while (isIdentChar (*cp));
542 nestingLevelsFree (nesting);
545 extern parserDefinition* RubyParser (void)
547 static const char *const extensions [] = { "rb", "ruby", NULL };
548 parserDefinition* def = parserNew ("Ruby");
549 def->kinds = RubyKinds;
550 def->kindCount = KIND_COUNT (RubyKinds);
551 def->extensions = extensions;
552 def->parser = findRubyTags;
553 return def;
556 /* vi:set tabstop=4 shiftwidth=4: */