2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating tags for Ruby language
16 #include "general.h" /* must always come first */
23 #include "nestlevel.h"
32 K_UNDEFINED
= -1, K_CLASS
, K_METHOD
, K_MODULE
, K_SINGLETON
,
38 static kindOption RubyKinds
[] = {
39 { TRUE
, 'c', "class", "classes" },
40 { TRUE
, 'f', "method", "methods" },
41 { TRUE
, 'm', "module", "modules" },
42 { TRUE
, 'F', "singletonMethod", "singleton methods" },
44 /* Following two kinds are reserved. */
45 { TRUE
, 'd', "describe", "describes and contexts for Rspec" },
46 { TRUE
, 'C', "constant", "constants" },
50 static NestingLevels
* nesting
= NULL
;
52 #define SCOPE_SEPARATOR '.'
55 * FUNCTION DEFINITIONS
58 static void enterUnnamedScope (void);
61 * Returns a string describing the scope in 'nls'.
62 * We record the current scope as a list of entered scopes.
63 * Scopes corresponding to 'if' statements and the like are
64 * represented by empty strings. Scopes corresponding to
65 * modules and classes are represented by the name of the
68 static vString
* nestingLevelsToScope (const NestingLevels
* nls
)
71 unsigned int chunks_output
= 0;
72 vString
* result
= vStringNew ();
73 for (i
= 0; i
< nls
->n
; ++i
)
75 const vString
* chunk
= nls
->levels
[i
].name
;
76 if (vStringLength (chunk
) > 0)
78 if (chunks_output
++ > 0)
79 vStringPut (result
, SCOPE_SEPARATOR
);
80 vStringCatS (result
, vStringValue (chunk
));
87 * Attempts to advance 's' past 'literal'.
88 * Returns TRUE if it did, FALSE (and leaves 's' where
91 static boolean
canMatch (const unsigned char** s
, const char* literal
,
92 boolean (*end_check
) (int))
94 const int literal_length
= strlen (literal
);
95 const int s_length
= strlen ((const char *)*s
);
97 if (s_length
< literal_length
)
100 const unsigned char next_char
= *(*s
+ literal_length
);
101 if (strncmp ((const char*) *s
, literal
, literal_length
) != 0)
105 /* Additionally check that we're at the end of a token. */
106 if (! end_check (next_char
))
110 *s
+= literal_length
;
114 static boolean
isIdentChar (int c
)
116 return (isalnum (c
) || c
== '_');
119 static boolean
notIdentChar (int c
)
121 return ! isIdentChar (c
);
124 static boolean
notOperatorChar (int c
)
126 return ! (c
== '[' || c
== ']' ||
127 c
== '=' || c
== '!' || c
== '~' ||
128 c
== '+' || c
== '-' ||
129 c
== '@' || c
== '*' || c
== '/' || c
== '%' ||
130 c
== '<' || c
== '>' ||
131 c
== '&' || c
== '^' || c
== '|');
134 static boolean
isWhitespace (int c
)
136 return c
== 0 || isspace (c
);
139 static boolean
canMatchKeyword (const unsigned char** s
, const char* literal
)
141 return canMatch (s
, literal
, notIdentChar
);
145 * Attempts to advance 'cp' past a Ruby operator method name. Returns
146 * TRUE if successful (and copies the name into 'name'), FALSE otherwise.
148 static boolean
parseRubyOperator (vString
* name
, const unsigned char** cp
)
150 static const char* RUBY_OPERATORS
[] = {
153 "!", "~", "+@", "-@",
159 "<=", "<", ">", ">=",
160 "<=>", "==", "===", "!=", "=~", "!~",
165 for (i
= 0; RUBY_OPERATORS
[i
] != NULL
; ++i
)
167 if (canMatch (cp
, RUBY_OPERATORS
[i
], notOperatorChar
))
169 vStringCatS (name
, RUBY_OPERATORS
[i
]);
177 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
179 static void emitRubyTag (vString
* name
, rubyKind kind
)
183 rubyKind parent_kind
= K_UNDEFINED
;
185 const char *unqualified_name
;
186 const char *qualified_name
;
188 if (!RubyKinds
[kind
].enabled
) {
192 vStringTerminate (name
);
193 scope
= nestingLevelsToScope (nesting
);
194 lvl
= nestingLevelsGetCurrent (nesting
);
196 parent_kind
= lvl
->type
;
198 qualified_name
= vStringValue (name
);
199 unqualified_name
= strrchr (qualified_name
, SCOPE_SEPARATOR
);
200 if (unqualified_name
&& unqualified_name
[1])
202 if (unqualified_name
> qualified_name
)
204 if (vStringLength (scope
) > 0)
205 vStringPut (scope
, SCOPE_SEPARATOR
);
206 vStringNCatS (scope
, qualified_name
,
207 unqualified_name
- qualified_name
);
208 /* assume module parent type for a lack of a better option */
209 parent_kind
= K_MODULE
;
214 unqualified_name
= qualified_name
;
216 initTagEntry (&tag
, unqualified_name
, &(RubyKinds
[kind
]));
217 if (vStringLength (scope
) > 0) {
218 Assert (0 <= parent_kind
&&
219 (size_t) parent_kind
< (sizeof RubyKinds
/ sizeof RubyKinds
[0]));
221 tag
.extensionFields
.scopeKind
= &(RubyKinds
[parent_kind
]);
222 tag
.extensionFields
.scopeName
= vStringValue (scope
);
226 nestingLevelsPush (nesting
, name
, kind
);
229 vStringDelete (scope
);
232 /* Tests whether 'ch' is a character in 'list'. */
233 static boolean
charIsIn (char ch
, const char* list
)
235 return (strchr (list
, ch
) != NULL
);
238 /* Advances 'cp' over leading whitespace. */
239 static void skipWhitespace (const unsigned char** cp
)
241 while (isspace (**cp
))
248 * Copies the characters forming an identifier from *cp into
249 * name, leaving *cp pointing to the character after the identifier.
251 static rubyKind
parseIdentifier (
252 const unsigned char** cp
, vString
* name
, rubyKind kind
)
254 /* Method names are slightly different to class and variable names.
255 * A method name may optionally end with a question mark, exclamation
256 * point or equals sign. These are all part of the name.
257 * A method name may also contain a period if it's a singleton method.
259 boolean had_sep
= FALSE
;
261 if (kind
== K_METHOD
)
265 else if (kind
== K_SINGLETON
)
276 /* Check for an anonymous (singleton) class such as "class << HTTP". */
277 if (kind
== K_CLASS
&& **cp
== '<' && *(*cp
+ 1) == '<')
282 /* Check for operators such as "def []=(key, val)". */
283 if (kind
== K_METHOD
|| kind
== K_SINGLETON
)
285 if (parseRubyOperator (name
, cp
))
291 /* Copy the identifier into 'name'. */
292 while (**cp
!= 0 && (**cp
== ':' || isIdentChar (**cp
) || charIsIn (**cp
, also_ok
)))
294 char last_char
= **cp
;
296 if (last_char
== ':')
302 vStringPut (name
, SCOPE_SEPARATOR
);
305 vStringPut (name
, last_char
);
309 if (kind
== K_METHOD
)
311 /* Recognize singleton methods. */
312 if (last_char
== '.')
314 vStringTerminate (name
);
316 return parseIdentifier (cp
, name
, K_SINGLETON
);
320 if (kind
== K_METHOD
|| kind
== K_SINGLETON
)
322 /* Recognize characters which mark the end of a method name. */
323 if (charIsIn (last_char
, "?!="))
332 static void readAndEmitTag (const unsigned char** cp
, rubyKind expected_kind
)
336 vString
*name
= vStringNew ();
337 rubyKind actual_kind
= parseIdentifier (cp
, name
, expected_kind
);
339 if (actual_kind
== K_UNDEFINED
|| vStringLength (name
) == 0)
342 * What kind of tags should we create for code like this?
344 * %w(self.clfloor clfloor).each do |name|
345 * module_eval <<-"end;"
346 * def #{name}(x, y=1)
358 * For now, we don't create any.
360 enterUnnamedScope ();
364 emitRubyTag (name
, actual_kind
);
366 vStringDelete (name
);
370 static void enterUnnamedScope (void)
372 vString
*name
= vStringNewInit ("");
373 NestingLevel
*parent
= nestingLevelsGetCurrent (nesting
);
374 nestingLevelsPush (nesting
, name
, parent
? parent
->type
: K_UNDEFINED
);
375 vStringDelete (name
);
378 static void findRubyTags (void)
380 const unsigned char *line
;
381 boolean inMultiLineComment
= FALSE
;
383 nesting
= nestingLevelsNew ();
385 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
386 * You could perfectly well write:
393 * if you wished, and this function would fail to recognize anything.
395 while ((line
= readLineFromInputFile ()) != NULL
)
397 const unsigned char *cp
= line
;
398 /* if we expect a separator after a while, for, or until statement
399 * separators are "do", ";" or newline */
400 boolean expect_separator
= FALSE
;
402 if (canMatch (&cp
, "=begin", isWhitespace
))
404 inMultiLineComment
= TRUE
;
407 if (canMatch (&cp
, "=end", isWhitespace
))
409 inMultiLineComment
= FALSE
;
412 if (inMultiLineComment
)
415 skipWhitespace (&cp
);
417 /* Avoid mistakenly starting a scope for modifiers such as
421 * FIXME: this is fooled by code such as
429 * FIXME: we're also fooled if someone does something heinous such as
434 if (canMatchKeyword (&cp
, "for") ||
435 canMatchKeyword (&cp
, "until") ||
436 canMatchKeyword (&cp
, "while"))
438 expect_separator
= TRUE
;
439 enterUnnamedScope ();
441 else if (canMatchKeyword (&cp
, "case") ||
442 canMatchKeyword (&cp
, "if") ||
443 canMatchKeyword (&cp
, "unless"))
445 enterUnnamedScope ();
449 * "module M", "class C" and "def m" should only be at the beginning
452 if (canMatchKeyword (&cp
, "module"))
454 readAndEmitTag (&cp
, K_MODULE
);
456 else if (canMatchKeyword (&cp
, "class"))
458 readAndEmitTag (&cp
, K_CLASS
);
460 else if (canMatchKeyword (&cp
, "def"))
462 rubyKind kind
= K_METHOD
;
463 NestingLevel
*nl
= nestingLevelsGetCurrent (nesting
);
465 /* if the def is inside an unnamed scope at the class level, assume
466 * it's from a singleton from a construct like this:
476 if (nl
&& nl
->type
== K_CLASS
&& vStringLength (nl
->name
) == 0)
478 readAndEmitTag (&cp
, kind
);
482 /* FIXME: we don't cope with here documents,
483 * or regular expression literals, or ... you get the idea.
484 * Hopefully, the restriction above that insists on seeing
485 * definitions at the starts of lines should keep us out of
488 if (inMultiLineComment
|| isspace (*cp
))
494 /* FIXME: this is wrong, but there *probably* won't be a
495 * definition after an interpolated string (where # doesn't
500 else if (canMatchKeyword (&cp
, "begin"))
502 enterUnnamedScope ();
504 else if (canMatchKeyword (&cp
, "do"))
506 if (! expect_separator
)
507 enterUnnamedScope ();
509 expect_separator
= FALSE
;
511 else if (canMatchKeyword (&cp
, "end") && nesting
->n
> 0)
513 /* Leave the most recent scope. */
514 nestingLevelsPop (nesting
);
518 /* Skip string literals.
519 * FIXME: should cope with escapes and interpolation.
523 } while (*cp
!= 0 && *cp
!= '"');
525 cp
++; /* skip the last found '"' */
530 expect_separator
= FALSE
;
532 else if (*cp
!= '\0')
536 while (isIdentChar (*cp
));
540 nestingLevelsFree (nesting
);
543 extern parserDefinition
* RubyParser (void)
545 static const char *const extensions
[] = { "rb", "ruby", NULL
};
546 parserDefinition
* def
= parserNewFull ("Ruby", KIND_FILE_ALT
);
547 def
->kinds
= RubyKinds
;
548 def
->kindCount
= ARRAY_SIZE (RubyKinds
);
549 def
->extensions
= extensions
;
550 def
->parser
= findRubyTags
;
554 /* vi:set tabstop=4 shiftwidth=4: */