Create branch for 0.19.1 release (copied from 0.19).
[geany-mirror.git] / tagmanager / python.c
blob7eba5f8751f574954152bf91b9b4ec40e825eaf2
1 /*
2 * $Id$
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Python language
10 * files.
13 * INCLUDE FILES
15 #include "general.h" /* must always come first */
17 #include <string.h>
19 #include "entry.h"
20 #include "options.h"
21 #include "read.h"
22 #include "main.h"
23 #include "vstring.h"
24 #include "nestlevel.h"
27 * DATA DEFINITIONS
29 typedef enum {
30 K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT
31 } pythonKind;
33 static kindOption PythonKinds[] = {
34 {TRUE, 'c', "class", "classes"},
35 {TRUE, 'f', "function", "functions"},
36 {TRUE, 'm', "member", "class members"},
37 {TRUE, 'v', "variable", "variables"},
38 {TRUE, 'i', "namespace", "imports"}
41 static char const * const singletriple = "'''";
42 static char const * const doubletriple = "\"\"\"";
45 * FUNCTION DEFINITIONS
48 #define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
50 static boolean isIdentifierFirstCharacter (int c)
52 return (boolean) (isalpha (c) || c == '_');
55 static boolean isIdentifierCharacter (int c)
57 return (boolean) (isalnum (c) || c == '_');
60 static const char *get_class_name_from_parent (const char *parent)
62 const char *result;
64 if (parent == NULL)
65 return NULL;
67 result = strrchr (parent, '.');
68 if (result != NULL)
70 result++;
71 parent = result;
74 result = strrchr (parent, '/');
75 if (result != NULL)
76 result++;
77 else
78 result = parent;
80 return result;
84 /* Given a string with the contents of a line directly after the "def" keyword,
85 * extract all relevant information and create a tag.
87 static void makeFunctionTag (vString *const function,
88 vString *const parent, int is_class_parent, const char *arglist)
90 tagEntryInfo tag;
91 initTagEntry (&tag, vStringValue (function));
93 tag.kindName = "function";
94 tag.kind = 'f';
95 tag.extensionFields.arglist = arglist;
96 /* add argument list of __init__() methods to the class tag */
97 if (strcmp (vStringValue (function), "__init__") == 0 && parent != NULL)
99 const char *parent_tag_name = get_class_name_from_parent (vStringValue (parent));
100 if (parent_tag_name != NULL)
101 setTagArglistByName (parent_tag_name, arglist);
104 if (vStringLength (parent) > 0)
106 if (is_class_parent)
108 tag.kindName = "member";
109 tag.kind = 'm';
110 tag.extensionFields.scope [0] = "class";
111 tag.extensionFields.scope [1] = vStringValue (parent);
113 else
115 tag.extensionFields.scope [0] = "function";
116 tag.extensionFields.scope [1] = vStringValue (parent);
120 /* If a function starts with __, we mark it as file scope.
121 * FIXME: What is the proper way to signal such attributes?
122 * TODO: What does functions/classes starting with _ and __ mean in python?
124 if (strncmp (vStringValue (function), "__", 2) == 0 &&
125 strcmp (vStringValue (function), "__init__") != 0)
127 tag.extensionFields.access = "private";
128 tag.isFileScope = TRUE;
130 else
132 tag.extensionFields.access = "public";
134 makeTagEntry (&tag);
137 /* Given a string with the contents of the line directly after the "class"
138 * keyword, extract all necessary information and create a tag.
140 static void makeClassTag (vString *const class, vString *const inheritance,
141 vString *const parent, int is_class_parent)
143 tagEntryInfo tag;
144 initTagEntry (&tag, vStringValue (class));
145 tag.kindName = "class";
146 tag.kind = 'c';
147 if (vStringLength (parent) > 0)
149 if (is_class_parent)
151 tag.extensionFields.scope [0] = "class";
152 tag.extensionFields.scope [1] = vStringValue (parent);
154 else
156 tag.extensionFields.scope [0] = "function";
157 tag.extensionFields.scope [1] = vStringValue (parent);
160 tag.extensionFields.inheritance = vStringValue (inheritance);
161 makeTagEntry (&tag);
164 static void makeVariableTag (vString *const var, vString *const parent)
166 tagEntryInfo tag;
167 initTagEntry (&tag, vStringValue (var));
168 tag.kindName = "variable";
169 tag.kind = 'v';
170 if (vStringLength (parent) > 0)
172 tag.extensionFields.scope [0] = "class";
173 tag.extensionFields.scope [1] = vStringValue (parent);
175 makeTagEntry (&tag);
178 /* Skip a single or double quoted string. */
179 static const char *skipString (const char *cp)
181 const char *start = cp;
182 int escaped = 0;
183 for (cp++; *cp; cp++)
185 if (escaped)
186 escaped--;
187 else if (*cp == '\\')
188 escaped++;
189 else if (*cp == *start)
190 return cp + 1;
192 return cp;
195 /* Skip everything up to an identifier start. */
196 static const char *skipEverything (const char *cp)
198 for (; *cp; cp++)
200 if (*cp == '"' || *cp == '\'' || *cp == '#')
202 cp = skipString(cp);
203 if (!*cp) break;
205 if (isIdentifierFirstCharacter ((int) *cp))
206 return cp;
208 return cp;
211 /* Skip an identifier. */
212 static const char *skipIdentifier (const char *cp)
214 while (isIdentifierCharacter ((int) *cp))
215 cp++;
216 return cp;
219 static const char *findDefinitionOrClass (const char *cp)
221 while (*cp)
223 cp = skipEverything (cp);
224 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
225 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
227 return cp;
229 cp = skipIdentifier (cp);
231 return NULL;
234 static const char *skipSpace (const char *cp)
236 while (isspace ((int) *cp))
237 ++cp;
238 return cp;
241 /* Starting at ''cp'', parse an identifier into ''identifier''. */
242 static const char *parseIdentifier (const char *cp, vString *const identifier)
244 vStringClear (identifier);
245 while (isIdentifierCharacter ((int) *cp))
247 vStringPut (identifier, (int) *cp);
248 ++cp;
250 vStringTerminate (identifier);
251 return cp;
254 static void parseClass (const char *cp, vString *const class,
255 vString *const parent, int is_class_parent)
257 vString *const inheritance = vStringNew ();
258 vStringClear (inheritance);
259 cp = parseIdentifier (cp, class);
260 cp = skipSpace (cp);
261 if (*cp == '(')
263 ++cp;
264 while (*cp != ')')
266 if (*cp == '\0')
268 /* Closing parenthesis can be in follow up line. */
269 cp = (const char *) fileReadLine ();
270 if (!cp) break;
271 vStringPut (inheritance, ' ');
272 continue;
274 vStringPut (inheritance, *cp);
275 ++cp;
277 vStringTerminate (inheritance);
279 makeClassTag (class, inheritance, parent, is_class_parent);
280 vStringDelete (inheritance);
283 static void parseImports (const char *cp)
285 const char *pos;
286 vString *name, *name_next;
288 cp = skipEverything (cp);
290 if ((pos = strstr (cp, "import")) == NULL)
291 return;
293 cp = pos + 6;
295 /* continue only if there is some space between the keyword and the identifier */
296 if (! isspace (*cp))
297 return;
299 cp++;
300 cp = skipSpace (cp);
302 name = vStringNew ();
303 name_next = vStringNew ();
305 cp = skipEverything (cp);
306 while (*cp)
308 cp = parseIdentifier (cp, name);
310 cp = skipEverything (cp);
311 /* we parse the next possible import statement as well to be able to ignore 'foo' in
312 * 'import foo as bar' */
313 parseIdentifier (cp, name_next);
315 /* take the current tag only if the next one is not "as" */
316 if (strcmp (vStringValue (name_next), "as") != 0 &&
317 strcmp (vStringValue (name), "as") != 0)
319 makeSimpleTag (name, PythonKinds, K_IMPORT);
322 vStringDelete (name);
323 vStringDelete (name_next);
326 /* modified from get.c getArglistFromStr().
327 * warning: terminates rest of string past arglist!
328 * note: does not ignore brackets inside strings! */
329 static char *parseArglist(const char *buf)
331 char *start, *end;
332 int level;
333 if (NULL == buf)
334 return NULL;
335 if (NULL == (start = strchr(buf, '(')))
336 return NULL;
337 for (level = 1, end = start + 1; level > 0; ++end)
339 if ('\0' == *end)
340 break;
341 else if ('(' == *end)
342 ++ level;
343 else if (')' == *end)
344 -- level;
346 *end = '\0';
347 return strdup(start);
350 static void parseFunction (const char *cp, vString *const def,
351 vString *const parent, int is_class_parent)
353 char *arglist;
355 cp = parseIdentifier (cp, def);
356 arglist = parseArglist (cp);
357 makeFunctionTag (def, parent, is_class_parent, arglist);
358 if (arglist != NULL)
359 eFree (arglist);
362 /* Get the combined name of a nested symbol. Classes are separated with ".",
363 * functions with "/". For example this code:
364 * class MyClass:
365 * def myFunction:
366 * def SubFunction:
367 * class SubClass:
368 * def Method:
369 * pass
370 * Would produce this string:
371 * MyClass.MyFunction/SubFunction/SubClass.Method
373 static boolean constructParentString(NestingLevels *nls, int indent,
374 vString *result)
376 int i;
377 NestingLevel *prev = NULL;
378 int is_class = FALSE;
379 vStringClear (result);
380 for (i = 0; i < nls->n; i++)
382 NestingLevel *nl = nls->levels + i;
383 if (indent <= nl->indentation)
384 break;
385 if (prev)
387 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
389 if (prev->type == K_CLASS)
390 vStringCatS(result, ".");
391 else
392 vStringCatS(result, "/");
395 vStringCat(result, nl->name);
396 is_class = (nl->type == K_CLASS);
397 prev = nl;
399 return is_class;
402 /* Check whether parent's indentation level is higher than the current level and
403 * if so, remove it.
405 static void checkParent(NestingLevels *nls, int indent, vString *parent)
407 int i;
408 NestingLevel *n;
410 for (i = 0; i < nls->n; i++)
412 n = nls->levels + i;
413 /* is there a better way to compare two vStrings? */
414 if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
416 if (n && indent <= n->indentation)
418 /* remove this level by clearing its name */
419 vStringClear(n->name);
421 break;
426 static void addNestingLevel(NestingLevels *nls, int indentation,
427 const vString *name, boolean is_class)
429 int i;
430 NestingLevel *nl = NULL;
432 for (i = 0; i < nls->n; i++)
434 nl = nls->levels + i;
435 if (indentation <= nl->indentation) break;
437 if (i == nls->n)
439 nestingLevelsPush(nls, name, 0);
440 nl = nls->levels + i;
442 else
443 { /* reuse existing slot */
444 nls->n = i + 1;
445 vStringCopy(nl->name, name);
447 nl->indentation = indentation;
448 nl->type = is_class ? K_CLASS : !K_CLASS;
451 /* Return a pointer to the start of the next triple string, or NULL. Store
452 * the kind of triple string in "which" if the return is not NULL.
454 static char const *find_triple_start(char const *string, char const **which)
456 char const *cp = string;
458 for (; *cp; cp++)
460 if (*cp == '"' || *cp == '\'')
462 if (strncmp(cp, doubletriple, 3) == 0)
464 *which = doubletriple;
465 return cp;
467 if (strncmp(cp, singletriple, 3) == 0)
469 *which = singletriple;
470 return cp;
472 cp = skipString(cp);
473 if (!*cp) break;
476 return NULL;
479 /* Find the end of a triple string as pointed to by "which", and update "which"
480 * with any other triple strings following in the given string.
482 static void find_triple_end(char const *string, char const **which)
484 char const *s = string;
485 while (1)
487 /* Check if the string ends in the same line. */
488 s = strstr (s, *which);
489 if (!s) break;
490 s += 3;
491 *which = NULL;
492 /* If yes, check if another one starts in the same line. */
493 s = find_triple_start(s, which);
494 if (!s) break;
495 s += 3;
499 static const char *findVariable(const char *line)
501 /* Parse global and class variable names (C.x) from assignment statements.
502 * Object attributes (obj.x) are ignored.
503 * Assignment to a tuple 'x, y = 2, 3' not supported.
504 * TODO: ignore duplicate tags from reassignment statements. */
505 const char *cp, *sp, *eq, *start;
507 cp = strstr(line, "=");
508 if (!cp)
509 return NULL;
510 eq = cp + 1;
511 while (*eq)
513 if (*eq == '=')
514 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
515 if (*eq == '(' || *eq == '#')
516 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
517 eq++;
520 /* go backwards to the start of the line, checking we have valid chars */
521 start = cp - 1;
522 while (start >= line && isspace ((int) *start))
523 --start;
524 while (start >= line && isIdentifierCharacter ((int) *start))
525 --start;
526 if (!isIdentifierFirstCharacter(*(start + 1)))
527 return NULL;
528 sp = start;
529 while (sp >= line && isspace ((int) *sp))
530 --sp;
531 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
532 return NULL;
533 /* the line is valid, parse the variable name */
534 ++start;
535 return start;
538 /* Skip type declaration that optionally follows a cdef/cpdef */
539 static const char *skipTypeDecl (const char *cp, boolean *is_class)
541 const char *lastStart = cp, *ptr = cp;
542 int loopCount = 0;
543 ptr = skipSpace(cp);
544 if (!strncmp("extern", ptr, 6)) {
545 ptr += 6;
546 ptr = skipSpace(ptr);
547 if (!strncmp("from", ptr, 4)) { return NULL; }
549 if (!strncmp("class", ptr, 5)) {
550 ptr += 5 ;
551 *is_class = TRUE;
552 ptr = skipSpace(ptr);
553 return ptr;
555 /* limit so that we don't pick off "int item=obj()" */
556 while (*ptr && loopCount++ < 2) {
557 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
558 if (!*ptr || *ptr == '=') return NULL;
559 if (*ptr == '(') {
560 return lastStart; /* if we stopped on a '(' we are done */
562 ptr = skipSpace(ptr);
563 lastStart = ptr;
564 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
566 return NULL;
569 /* checks if there is a lambda at position of cp, and return its argument list
570 * if so.
571 * We don't return the lambda name since it is useless for now since we already
572 * know it when we call this function, and it would be a little slower. */
573 static boolean varIsLambda (const char *cp, char **arglist)
575 boolean is_lambda = FALSE;
577 cp = skipSpace (cp);
578 cp = skipIdentifier (cp); /* skip the lambda's name */
579 cp = skipSpace (cp);
580 if (*cp == '=')
582 cp++;
583 cp = skipSpace (cp);
584 if (strncmp (cp, "lambda", 6) == 0)
586 const char *tmp;
588 cp += 6; /* skip the lambda */
589 tmp = skipSpace (cp);
590 /* check if there is a space after lambda to detect assignations
591 * starting with 'lambdaXXX' */
592 if (tmp != cp)
594 vString *args = vStringNew ();
596 cp = tmp;
597 vStringPut (args, '(');
598 for (; *cp != 0 && *cp != ':'; cp++)
599 vStringPut (args, *cp);
600 vStringPut (args, ')');
601 vStringTerminate (args);
602 if (arglist)
603 *arglist = strdup (vStringValue (args));
604 vStringDelete (args);
605 is_lambda = TRUE;
609 return is_lambda;
612 static void findPythonTags (void)
614 vString *const continuation = vStringNew ();
615 vString *const name = vStringNew ();
616 vString *const parent = vStringNew();
618 NestingLevels *const nesting_levels = nestingLevelsNew();
620 const char *line;
621 int line_skip = 0;
622 char const *longStringLiteral = NULL;
624 while ((line = (const char *) fileReadLine ()) != NULL)
626 const char *cp = line, *candidate;
627 char const *longstring;
628 char const *keyword, *variable;
629 int indent;
631 cp = skipSpace (cp);
633 if (*cp == '\0') /* skip blank line */
634 continue;
636 /* Skip comment if we are not inside a multi-line string. */
637 if (*cp == '#' && !longStringLiteral)
638 continue;
640 /* Deal with line continuation. */
641 if (!line_skip) vStringClear(continuation);
642 vStringCatS(continuation, line);
643 vStringStripTrailing(continuation);
644 if (vStringLast(continuation) == '\\')
646 vStringChop(continuation);
647 vStringCatS(continuation, " ");
648 line_skip = 1;
649 continue;
651 cp = line = vStringValue(continuation);
652 cp = skipSpace (cp);
653 indent = cp - line;
654 line_skip = 0;
656 checkParent(nesting_levels, indent, parent);
658 /* Deal with multiline string ending. */
659 if (longStringLiteral)
661 find_triple_end(cp, &longStringLiteral);
662 continue;
665 /* Deal with multiline string start. */
666 longstring = find_triple_start(cp, &longStringLiteral);
667 if (longstring)
669 longstring += 3;
670 find_triple_end(longstring, &longStringLiteral);
671 /* We don't parse for any tags in the rest of the line. */
672 continue;
675 /* Deal with def and class keywords. */
676 keyword = findDefinitionOrClass (cp);
677 if (keyword)
679 boolean found = FALSE;
680 boolean is_class = FALSE;
681 if (!strncmp (keyword, "def ", 4))
683 cp = skipSpace (keyword + 3);
684 found = TRUE;
686 else if (!strncmp (keyword, "class ", 6))
688 cp = skipSpace (keyword + 5);
689 found = TRUE;
690 is_class = TRUE;
692 else if (!strncmp (keyword, "cdef ", 5))
694 cp = skipSpace(keyword + 4);
695 candidate = skipTypeDecl (cp, &is_class);
696 if (candidate)
698 found = TRUE;
699 cp = candidate;
703 else if (!strncmp (keyword, "cpdef ", 6))
705 cp = skipSpace(keyword + 5);
706 candidate = skipTypeDecl (cp, &is_class);
707 if (candidate)
709 found = TRUE;
710 cp = candidate;
714 if (found)
716 boolean is_parent_class;
718 is_parent_class =
719 constructParentString(nesting_levels, indent, parent);
721 if (is_class)
722 parseClass (cp, name, parent, is_parent_class);
723 else
724 parseFunction(cp, name, parent, is_parent_class);
726 addNestingLevel(nesting_levels, indent, name, is_class);
729 /* Find global and class variables */
730 variable = findVariable(line);
731 if (variable)
733 const char *start = variable;
734 char *arglist;
735 boolean parent_is_class;
737 vStringClear (name);
738 while (isIdentifierCharacter ((int) *start))
740 vStringPut (name, (int) *start);
741 ++start;
743 vStringTerminate (name);
745 parent_is_class = constructParentString(nesting_levels, indent, parent);
746 if (varIsLambda (variable, &arglist))
748 /* show class members or top-level script lambdas only */
749 if (parent_is_class || vStringLength(parent) == 0)
750 makeFunctionTag (name, parent, parent_is_class, arglist);
751 if (arglist != NULL)
752 eFree (arglist);
754 else
756 /* skip variables in methods */
757 if (! parent_is_class && vStringLength(parent) > 0)
758 continue;
760 makeVariableTag (name, parent);
763 /* Find and parse imports */
764 parseImports(line);
766 /* Clean up all memory we allocated. */
767 vStringDelete (parent);
768 vStringDelete (name);
769 vStringDelete (continuation);
770 nestingLevelsFree (nesting_levels);
773 extern parserDefinition *PythonParser (void)
775 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
776 parserDefinition *def = parserNew ("Python");
777 def->kinds = PythonKinds;
778 def->kindCount = KIND_COUNT (PythonKinds);
779 def->extensions = extensions;
780 def->parser = findPythonTags;
781 return def;
784 /* vi:set tabstop=4 shiftwidth=4: */