ctags: Rename Geany-specific tagEntryInfo::arglist to upstream's ::signature
[geany-mirror.git] / ctags / parsers / python.c
blob5d164de2e15285720aa9381f03f4b314161800f5
1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
8 * files.
9 */
11 * INCLUDE FILES
13 #include "general.h" /* must always come first */
15 #include <string.h>
17 #include "entry.h"
18 #include "options.h"
19 #include "read.h"
20 #include "main.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
25 * DATA DEFINITIONS
27 typedef enum {
28 K_CLASS, K_FUNCTION, K_METHOD, K_VARIABLE, K_IMPORT
29 } pythonKind;
31 static kindOption PythonKinds[] = {
32 {TRUE, 'c', "class", "classes"},
33 {TRUE, 'f', "function", "functions"},
34 {TRUE, 'm', "member", "class members"},
35 {TRUE, 'v', "variable", "variables"},
36 {TRUE, 'x', "unknown", "name referring a classe/variable/function/module defined in other module"}
39 typedef enum {
40 A_PUBLIC, A_PRIVATE, A_PROTECTED
41 } pythonAccess;
43 static const char *const PythonAccesses[] = {
44 "public", "private", "protected"
47 static char const * const singletriple = "'''";
48 static char const * const doubletriple = "\"\"\"";
51 * FUNCTION DEFINITIONS
54 static boolean isIdentifierFirstCharacter (int c)
56 return (boolean) (isalpha (c) || c == '_');
59 static boolean isIdentifierCharacter (int c)
61 return (boolean) (isalnum (c) || c == '_');
64 /* follows PEP-8, and always reports single-underscores as protected
65 * See:
66 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
67 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
69 static pythonAccess accessFromIdentifier (const vString *const ident,
70 pythonKind kind, boolean has_parent, boolean parent_is_class)
72 const char *const p = vStringValue (ident);
73 const size_t len = vStringLength (ident);
75 /* inside a function/method, private */
76 if (has_parent && !parent_is_class)
77 return A_PRIVATE;
78 /* not starting with "_", public */
79 else if (len < 1 || p[0] != '_')
80 return A_PUBLIC;
81 /* "__...__": magic methods */
82 else if (kind == K_METHOD && parent_is_class &&
83 len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
84 return A_PUBLIC;
85 /* "__...": name mangling */
86 else if (parent_is_class && len > 1 && p[1] == '_')
87 return A_PRIVATE;
88 /* "_...": suggested as non-public, but easily accessible */
89 else
90 return A_PROTECTED;
93 static void addAccessFields (tagEntryInfo *const entry,
94 const vString *const ident, pythonKind kind,
95 boolean has_parent, boolean parent_is_class)
97 pythonAccess access;
99 access = accessFromIdentifier (ident, kind, has_parent, parent_is_class);
100 entry->extensionFields.access = PythonAccesses [access];
101 /* FIXME: should we really set isFileScope in addition to access? */
102 if (access == A_PRIVATE)
103 entry->isFileScope = TRUE;
106 /* Given a string with the contents of a line directly after the "def" keyword,
107 * extract all relevant information and create a tag.
109 static void makeFunctionTag (vString *const function,
110 vString *const parent, int is_class_parent, const char *arglist)
112 tagEntryInfo tag;
113 initTagEntry (&tag, vStringValue (function));
115 tag.kindName = PythonKinds[K_FUNCTION].name;
116 tag.kind = PythonKinds[K_FUNCTION].letter;
117 tag.extensionFields.signature = arglist;
119 if (vStringLength (parent) > 0)
121 if (is_class_parent)
123 tag.kindName = PythonKinds[K_METHOD].name;
124 tag.kind = PythonKinds[K_METHOD].letter;
125 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
126 tag.extensionFields.scope [1] = vStringValue (parent);
128 else
130 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
131 tag.extensionFields.scope [1] = vStringValue (parent);
135 addAccessFields (&tag, function, is_class_parent ? K_METHOD : K_FUNCTION,
136 vStringLength (parent) > 0, is_class_parent);
138 makeTagEntry (&tag);
141 /* Given a string with the contents of the line directly after the "class"
142 * keyword, extract all necessary information and create a tag.
144 static void makeClassTag (vString *const class, vString *const inheritance,
145 vString *const parent, int is_class_parent)
147 tagEntryInfo tag;
148 initTagEntry (&tag, vStringValue (class));
149 tag.kindName = PythonKinds[K_CLASS].name;
150 tag.kind = PythonKinds[K_CLASS].letter;
151 if (vStringLength (parent) > 0)
153 if (is_class_parent)
155 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
156 tag.extensionFields.scope [1] = vStringValue (parent);
158 else
160 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
161 tag.extensionFields.scope [1] = vStringValue (parent);
164 tag.extensionFields.inheritance = vStringValue (inheritance);
165 addAccessFields (&tag, class, K_CLASS, vStringLength (parent) > 0,
166 is_class_parent);
167 makeTagEntry (&tag);
170 static void makeVariableTag (vString *const var, vString *const parent,
171 boolean is_class_parent)
173 tagEntryInfo tag;
174 initTagEntry (&tag, vStringValue (var));
175 tag.kindName = PythonKinds[K_VARIABLE].name;
176 tag.kind = PythonKinds[K_VARIABLE].letter;
177 if (vStringLength (parent) > 0)
179 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
180 tag.extensionFields.scope [1] = vStringValue (parent);
182 addAccessFields (&tag, var, K_VARIABLE, vStringLength (parent) > 0,
183 is_class_parent);
184 makeTagEntry (&tag);
187 /* Skip a single or double quoted string. */
188 static const char *skipString (const char *cp)
190 const char *start = cp;
191 int escaped = 0;
192 for (cp++; *cp; cp++)
194 if (escaped)
195 escaped--;
196 else if (*cp == '\\')
197 escaped++;
198 else if (*cp == *start)
199 return cp + 1;
201 return cp;
204 /* Skip everything up to an identifier start. */
205 static const char *skipEverything (const char *cp)
207 int match;
208 for (; *cp; cp++)
210 if (*cp == '#')
211 return strchr(cp, '\0');
213 match = 0;
214 if (*cp == '"' || *cp == '\'')
215 match = 1;
217 /* these checks find unicode, binary (Python 3) and raw strings */
218 if (!match)
220 boolean r_first = (*cp == 'r' || *cp == 'R');
222 /* "r" | "R" | "u" | "U" | "b" | "B" */
223 if (r_first || *cp == 'u' || *cp == 'U' || *cp == 'b' || *cp == 'B')
225 unsigned int i = 1;
227 /* r_first -> "rb" | "rB" | "Rb" | "RB"
228 !r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */
229 if (( r_first && (cp[i] == 'b' || cp[i] == 'B')) ||
230 (!r_first && (cp[i] == 'r' || cp[i] == 'R')))
231 i++;
233 if (cp[i] == '\'' || cp[i] == '"')
235 match = 1;
236 cp += i;
240 if (match)
242 cp = skipString(cp);
243 if (!*cp) break;
245 if (isIdentifierFirstCharacter ((int) *cp))
246 return cp;
247 if (match)
248 cp--; /* avoid jumping over the character after a skipped string */
250 return cp;
253 /* Skip an identifier. */
254 static const char *skipIdentifier (const char *cp)
256 while (isIdentifierCharacter ((int) *cp))
257 cp++;
258 return cp;
261 static const char *findDefinitionOrClass (const char *cp)
263 while (*cp)
265 cp = skipEverything (cp);
266 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
267 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
269 return cp;
271 cp = skipIdentifier (cp);
273 return NULL;
276 static const char *skipSpace (const char *cp)
278 while (isspace ((int) *cp))
279 ++cp;
280 return cp;
283 /* Starting at ''cp'', parse an identifier into ''identifier''. */
284 static const char *parseIdentifier (const char *cp, vString *const identifier)
286 vStringClear (identifier);
287 while (isIdentifierCharacter ((int) *cp))
289 vStringPut (identifier, (int) *cp);
290 ++cp;
292 vStringTerminate (identifier);
293 return cp;
296 static void parseClass (const char *cp, vString *const class,
297 vString *const parent, int is_class_parent)
299 vString *const inheritance = vStringNew ();
300 vStringClear (inheritance);
301 cp = parseIdentifier (cp, class);
302 cp = skipSpace (cp);
303 if (*cp == '(')
305 ++cp;
306 while (*cp != ')')
308 if (*cp == '\0')
310 /* Closing parenthesis can be in follow up line. */
311 cp = (const char *) fileReadLine ();
312 if (!cp) break;
313 vStringPut (inheritance, ' ');
314 continue;
316 vStringPut (inheritance, *cp);
317 ++cp;
319 vStringTerminate (inheritance);
321 makeClassTag (class, inheritance, parent, is_class_parent);
322 vStringDelete (inheritance);
325 static void parseImports (const char *cp)
327 const char *pos;
328 vString *name, *name_next;
330 cp = skipEverything (cp);
332 if ((pos = strstr (cp, "import")) == NULL)
333 return;
335 cp = pos + 6;
337 /* continue only if there is some space between the keyword and the identifier */
338 if (! isspace (*cp))
339 return;
341 cp++;
342 cp = skipSpace (cp);
344 name = vStringNew ();
345 name_next = vStringNew ();
347 cp = skipEverything (cp);
348 while (*cp)
350 cp = parseIdentifier (cp, name);
352 cp = skipEverything (cp);
353 /* we parse the next possible import statement as well to be able to ignore 'foo' in
354 * 'import foo as bar' */
355 parseIdentifier (cp, name_next);
357 /* take the current tag only if the next one is not "as" */
358 if (strcmp (vStringValue (name_next), "as") != 0 &&
359 strcmp (vStringValue (name), "as") != 0)
361 makeSimpleTag (name, PythonKinds, K_IMPORT);
364 vStringDelete (name);
365 vStringDelete (name_next);
368 /* modified from get.c getArglistFromStr().
369 * warning: terminates rest of string past arglist!
370 * note: does not ignore brackets inside strings! */
371 static char *parseArglist(const char *buf)
373 char *start, *end;
374 int level;
375 if (NULL == buf)
376 return NULL;
377 if (NULL == (start = strchr(buf, '(')))
378 return NULL;
379 for (level = 1, end = start + 1; level > 0; ++end)
381 if ('\0' == *end)
382 break;
383 else if ('(' == *end)
384 ++ level;
385 else if (')' == *end)
386 -- level;
388 *end = '\0';
389 return strdup(start);
392 static void parseFunction (const char *cp, vString *const def,
393 vString *const parent, int is_class_parent)
395 char *arglist;
397 cp = parseIdentifier (cp, def);
398 arglist = parseArglist (cp);
399 makeFunctionTag (def, parent, is_class_parent, arglist);
400 if (arglist != NULL)
401 eFree (arglist);
404 /* Get the combined name of a nested symbol. Classes are separated with ".",
405 * functions with "/". For example this code:
406 * class MyClass:
407 * def myFunction:
408 * def SubFunction:
409 * class SubClass:
410 * def Method:
411 * pass
412 * Would produce this string:
413 * MyClass.MyFunction/SubFunction/SubClass.Method
415 static boolean constructParentString(NestingLevels *nls, int indent,
416 vString *result)
418 int i;
419 NestingLevel *prev = NULL;
420 int is_class = FALSE;
421 vStringClear (result);
422 for (i = 0; i < nls->n; i++)
424 NestingLevel *nl = nls->levels + i;
425 if (indent <= nl->indentation)
426 break;
427 if (prev)
429 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
431 if (prev->type == K_CLASS)
432 vStringCatS(result, ".");
433 else
434 vStringCatS(result, "/");
437 vStringCat(result, nl->name);
438 is_class = (nl->type == K_CLASS);
439 prev = nl;
441 return is_class;
444 /* Check indentation level and truncate nesting levels accordingly */
445 static void checkIndent(NestingLevels *nls, int indent)
447 int i;
448 NestingLevel *n;
450 for (i = 0; i < nls->n; i++)
452 n = nls->levels + i;
453 if (n && indent <= n->indentation)
455 /* truncate levels */
456 nls->n = i;
457 break;
462 static void addNestingLevel(NestingLevels *nls, int indentation,
463 const vString *name, boolean is_class)
465 int i;
466 NestingLevel *nl = NULL;
468 for (i = 0; i < nls->n; i++)
470 nl = nls->levels + i;
471 if (indentation <= nl->indentation) break;
473 if (i == nls->n)
475 nestingLevelsPush(nls, name, 0);
476 nl = nls->levels + i;
478 else
479 { /* reuse existing slot */
480 nls->n = i + 1;
481 vStringCopy(nl->name, name);
483 nl->indentation = indentation;
484 nl->type = is_class ? K_CLASS : !K_CLASS;
487 /* Return a pointer to the start of the next triple string, or NULL. Store
488 * the kind of triple string in "which" if the return is not NULL.
490 static char const *find_triple_start(char const *string, char const **which)
492 char const *cp = string;
494 for (; *cp; cp++)
496 if (*cp == '#')
497 break;
499 if (*cp == '"' || *cp == '\'')
501 if (strncmp(cp, doubletriple, 3) == 0)
503 *which = doubletriple;
504 return cp;
506 if (strncmp(cp, singletriple, 3) == 0)
508 *which = singletriple;
509 return cp;
511 cp = skipString(cp);
512 if (!*cp) break;
513 cp--; /* avoid jumping over the character after a skipped string */
516 return NULL;
519 /* Find the end of a triple string as pointed to by "which", and update "which"
520 * with any other triple strings following in the given string.
522 static void find_triple_end(char const *string, char const **which)
524 char const *s = string;
525 while (1)
527 /* Check if the string ends in the same line. */
528 s = strstr (s, *which);
529 if (!s) break;
530 s += 3;
531 *which = NULL;
532 /* If yes, check if another one starts in the same line. */
533 s = find_triple_start(s, which);
534 if (!s) break;
535 s += 3;
539 static const char *findVariable(const char *line)
541 /* Parse global and class variable names (C.x) from assignment statements.
542 * Object attributes (obj.x) are ignored.
543 * Assignment to a tuple 'x, y = 2, 3' not supported.
544 * TODO: ignore duplicate tags from reassignment statements. */
545 const char *cp, *sp, *eq, *start;
547 cp = strstr(line, "=");
548 if (!cp)
549 return NULL;
550 eq = cp + 1;
551 while (*eq)
553 if (*eq == '=')
554 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
555 if (*eq == '(' || *eq == '#')
556 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
557 eq++;
560 /* go backwards to the start of the line, checking we have valid chars */
561 start = cp - 1;
562 while (start >= line && isspace ((int) *start))
563 --start;
564 while (start >= line && isIdentifierCharacter ((int) *start))
565 --start;
566 if (!isIdentifierFirstCharacter(*(start + 1)))
567 return NULL;
568 sp = start;
569 while (sp >= line && isspace ((int) *sp))
570 --sp;
571 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
572 return NULL;
573 /* the line is valid, parse the variable name */
574 ++start;
575 return start;
578 /* Skip type declaration that optionally follows a cdef/cpdef */
579 static const char *skipTypeDecl (const char *cp, boolean *is_class)
581 const char *lastStart = cp, *ptr = cp;
582 int loopCount = 0;
583 ptr = skipSpace(cp);
584 if (!strncmp("extern", ptr, 6)) {
585 ptr += 6;
586 ptr = skipSpace(ptr);
587 if (!strncmp("from", ptr, 4)) { return NULL; }
589 if (!strncmp("class", ptr, 5)) {
590 ptr += 5 ;
591 *is_class = TRUE;
592 ptr = skipSpace(ptr);
593 return ptr;
595 /* limit so that we don't pick off "int item=obj()" */
596 while (*ptr && loopCount++ < 2) {
597 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) {
598 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
599 if(*ptr == '[') {
600 while (*ptr && *ptr != ']') ptr++;
601 if (*ptr) ptr++;
602 } else {
603 ptr++;
606 if (!*ptr || *ptr == '=') return NULL;
607 if (*ptr == '(') {
608 return lastStart; /* if we stopped on a '(' we are done */
610 ptr = skipSpace(ptr);
611 lastStart = ptr;
612 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
614 return NULL;
617 /* checks if there is a lambda at position of cp, and return its argument list
618 * if so.
619 * We don't return the lambda name since it is useless for now since we already
620 * know it when we call this function, and it would be a little slower. */
621 static boolean varIsLambda (const char *cp, char **arglist)
623 boolean is_lambda = FALSE;
625 cp = skipSpace (cp);
626 cp = skipIdentifier (cp); /* skip the lambda's name */
627 cp = skipSpace (cp);
628 if (*cp == '=')
630 cp++;
631 cp = skipSpace (cp);
632 if (strncmp (cp, "lambda", 6) == 0)
634 const char *tmp;
636 cp += 6; /* skip the lambda */
637 tmp = skipSpace (cp);
638 /* check if there is a space after lambda to detect assignations
639 * starting with 'lambdaXXX' */
640 if (tmp != cp)
642 vString *args = vStringNew ();
644 cp = tmp;
645 vStringPut (args, '(');
646 for (; *cp != 0 && *cp != ':'; cp++)
647 vStringPut (args, *cp);
648 vStringPut (args, ')');
649 vStringTerminate (args);
650 if (arglist)
651 *arglist = strdup (vStringValue (args));
652 vStringDelete (args);
653 is_lambda = TRUE;
657 return is_lambda;
660 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
661 * the position of the next non-whitespace after the keyword */
662 static boolean matchKeyword (const char *keyword, const char *cp, const char **cp_n)
664 size_t kw_len = strlen (keyword);
665 if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len]))
667 *cp_n = skipSpace (&cp[kw_len + 1]);
668 return TRUE;
670 return FALSE;
673 static void findPythonTags (void)
675 vString *const continuation = vStringNew ();
676 vString *const name = vStringNew ();
677 vString *const parent = vStringNew();
679 NestingLevels *const nesting_levels = nestingLevelsNew();
681 const char *line;
682 int line_skip = 0;
683 char const *longStringLiteral = NULL;
685 while ((line = (const char *) fileReadLine ()) != NULL)
687 const char *cp = line, *candidate;
688 char const *longstring;
689 char const *keyword, *variable;
690 int indent;
692 cp = skipSpace (cp);
694 if (*cp == '\0') /* skip blank line */
695 continue;
697 /* Skip comment if we are not inside a multi-line string. */
698 if (*cp == '#' && !longStringLiteral)
699 continue;
701 /* Deal with line continuation. */
702 if (!line_skip) vStringClear(continuation);
703 vStringCatS(continuation, line);
704 vStringStripTrailing(continuation);
705 if (vStringLast(continuation) == '\\')
707 vStringChop(continuation);
708 vStringCatS(continuation, " ");
709 line_skip = 1;
710 continue;
712 cp = line = vStringValue(continuation);
713 cp = skipSpace (cp);
714 indent = cp - line;
715 line_skip = 0;
717 /* Deal with multiline string ending. */
718 if (longStringLiteral)
720 find_triple_end(cp, &longStringLiteral);
721 continue;
724 checkIndent(nesting_levels, indent);
726 /* Find global and class variables */
727 variable = findVariable(line);
728 if (variable)
730 const char *start = variable;
731 char *arglist;
732 boolean parent_is_class;
734 vStringClear (name);
735 while (isIdentifierCharacter ((int) *start))
737 vStringPut (name, (int) *start);
738 ++start;
740 vStringTerminate (name);
742 parent_is_class = constructParentString(nesting_levels, indent, parent);
743 if (varIsLambda (variable, &arglist))
745 /* show class members or top-level script lambdas only */
746 if (parent_is_class || vStringLength(parent) == 0)
747 makeFunctionTag (name, parent, parent_is_class, arglist);
748 eFree (arglist);
750 else
752 /* skip variables in methods */
753 if (parent_is_class || vStringLength(parent) == 0)
754 makeVariableTag (name, parent, parent_is_class);
758 /* Deal with multiline string start. */
759 longstring = find_triple_start(cp, &longStringLiteral);
760 if (longstring)
762 longstring += 3;
763 find_triple_end(longstring, &longStringLiteral);
764 /* We don't parse for any tags in the rest of the line. */
765 continue;
768 /* Deal with def and class keywords. */
769 keyword = findDefinitionOrClass (cp);
770 if (keyword)
772 boolean found = FALSE;
773 boolean is_class = FALSE;
774 if (matchKeyword ("def", keyword, &cp))
776 found = TRUE;
778 else if (matchKeyword ("class", keyword, &cp))
780 found = TRUE;
781 is_class = TRUE;
783 else if (matchKeyword ("cdef", keyword, &cp))
785 candidate = skipTypeDecl (cp, &is_class);
786 if (candidate)
788 found = TRUE;
789 cp = candidate;
793 else if (matchKeyword ("cpdef", keyword, &cp))
795 candidate = skipTypeDecl (cp, &is_class);
796 if (candidate)
798 found = TRUE;
799 cp = candidate;
803 if (found)
805 boolean is_parent_class;
807 is_parent_class =
808 constructParentString(nesting_levels, indent, parent);
810 if (is_class)
811 parseClass (cp, name, parent, is_parent_class);
812 else
813 parseFunction(cp, name, parent, is_parent_class);
815 addNestingLevel(nesting_levels, indent, name, is_class);
818 /* Find and parse imports */
819 parseImports(line);
821 /* Clean up all memory we allocated. */
822 vStringDelete (parent);
823 vStringDelete (name);
824 vStringDelete (continuation);
825 nestingLevelsFree (nesting_levels);
828 extern parserDefinition *PythonParser (void)
830 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
831 parserDefinition *def = parserNew ("Python");
832 def->kinds = PythonKinds;
833 def->kindCount = KIND_COUNT (PythonKinds);
834 def->extensions = extensions;
835 def->parser = findPythonTags;
836 return def;
839 /* vi:set tabstop=4 shiftwidth=4: */