Update HACKING for changed doc generation instructions
[geany-mirror.git] / tagmanager / ctags / python.c
blob3010258f3693b49bf933be63963d9fd8eb731177
1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
8 * files.
9 */
11 * INCLUDE FILES
13 #include "general.h" /* must always come first */
15 #include <string.h>
17 #include "entry.h"
18 #include "options.h"
19 #include "read.h"
20 #include "main.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
25 * DATA DEFINITIONS
27 typedef enum {
28 K_CLASS, K_FUNCTION, K_METHOD, K_VARIABLE, K_IMPORT
29 } pythonKind;
31 static kindOption PythonKinds[] = {
32 {TRUE, 'c', "class", "classes"},
33 {TRUE, 'f', "function", "functions"},
34 {TRUE, 'm', "method", "class methods"},
35 {TRUE, 'v', "variable", "variables"},
36 /* defined as externvar to get those excluded as forward type in symbols.c:goto_tag()
37 * so we can jump to the real implementation (if known) instead of to the import statement */
38 {TRUE, 'x', "externvar", "imports"}
41 typedef enum {
42 A_PUBLIC, A_PRIVATE, A_PROTECTED
43 } pythonAccess;
45 static const char *const PythonAccesses[] = {
46 "public", "private", "protected"
49 static char const * const singletriple = "'''";
50 static char const * const doubletriple = "\"\"\"";
53 * FUNCTION DEFINITIONS
56 static boolean isIdentifierFirstCharacter (int c)
58 return (boolean) (isalpha (c) || c == '_');
61 static boolean isIdentifierCharacter (int c)
63 return (boolean) (isalnum (c) || c == '_');
66 static const char *get_class_name_from_parent (const char *parent)
68 const char *result;
70 if (parent == NULL)
71 return NULL;
73 result = strrchr (parent, '.');
74 if (result != NULL)
76 result++;
77 parent = result;
80 result = strrchr (parent, '/');
81 if (result != NULL)
82 result++;
83 else
84 result = parent;
86 return result;
89 /* follows PEP-8, and always reports single-underscores as protected
90 * See:
91 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
92 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
94 static pythonAccess accessFromIdentifier (const vString *const ident)
96 const char *const p = vStringValue (ident);
97 const size_t len = vStringLength (ident);
99 /* not starting with "_", public */
100 if (len < 1 || p[0] != '_')
101 return A_PUBLIC;
102 /* "__...__": magic methods */
103 else if (len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
104 return A_PUBLIC;
105 /* "__...": name mangling */
106 else if (len > 1 && p[1] == '_')
107 return A_PRIVATE;
108 /* "_...": suggested as non-public, but easily accessible */
109 else
110 return A_PROTECTED;
113 /* Given a string with the contents of a line directly after the "def" keyword,
114 * extract all relevant information and create a tag.
116 static void makeFunctionTag (vString *const function,
117 vString *const parent, int is_class_parent, const char *arglist)
119 pythonAccess access;
120 tagEntryInfo tag;
121 initTagEntry (&tag, vStringValue (function));
123 tag.kindName = PythonKinds[K_FUNCTION].name;
124 tag.kind = PythonKinds[K_FUNCTION].letter;
125 tag.extensionFields.arglist = arglist;
126 /* add argument list of __init__() methods to the class tag */
127 if (strcmp (vStringValue (function), "__init__") == 0 && parent != NULL)
129 const char *parent_tag_name = get_class_name_from_parent (vStringValue (parent));
130 if (parent_tag_name != NULL)
131 setTagArglistByName (parent_tag_name, arglist);
134 if (vStringLength (parent) > 0)
136 if (is_class_parent)
138 tag.kindName = PythonKinds[K_METHOD].name;
139 tag.kind = PythonKinds[K_METHOD].letter;
140 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
141 tag.extensionFields.scope [1] = vStringValue (parent);
143 else
145 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
146 tag.extensionFields.scope [1] = vStringValue (parent);
150 access = accessFromIdentifier (function);
151 tag.extensionFields.access = PythonAccesses [access];
152 /* FIXME: should we really set isFileScope in addition to access? */
153 if (access == A_PRIVATE)
154 tag.isFileScope = TRUE;
156 makeTagEntry (&tag);
159 /* Given a string with the contents of the line directly after the "class"
160 * keyword, extract all necessary information and create a tag.
162 static void makeClassTag (vString *const class, vString *const inheritance,
163 vString *const parent, int is_class_parent)
165 tagEntryInfo tag;
166 initTagEntry (&tag, vStringValue (class));
167 tag.kindName = PythonKinds[K_CLASS].name;
168 tag.kind = PythonKinds[K_CLASS].letter;
169 if (vStringLength (parent) > 0)
171 if (is_class_parent)
173 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
174 tag.extensionFields.scope [1] = vStringValue (parent);
176 else
178 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
179 tag.extensionFields.scope [1] = vStringValue (parent);
182 tag.extensionFields.inheritance = vStringValue (inheritance);
183 makeTagEntry (&tag);
186 static void makeVariableTag (vString *const var, vString *const parent)
188 tagEntryInfo tag;
189 initTagEntry (&tag, vStringValue (var));
190 tag.kindName = PythonKinds[K_VARIABLE].name;
191 tag.kind = PythonKinds[K_VARIABLE].letter;
192 if (vStringLength (parent) > 0)
194 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
195 tag.extensionFields.scope [1] = vStringValue (parent);
197 makeTagEntry (&tag);
200 /* Skip a single or double quoted string. */
201 static const char *skipString (const char *cp)
203 const char *start = cp;
204 int escaped = 0;
205 for (cp++; *cp; cp++)
207 if (escaped)
208 escaped--;
209 else if (*cp == '\\')
210 escaped++;
211 else if (*cp == *start)
212 return cp + 1;
214 return cp;
217 /* Skip everything up to an identifier start. */
218 static const char *skipEverything (const char *cp)
220 int match;
221 for (; *cp; cp++)
223 match = 0;
224 if (*cp == '"' || *cp == '\'' || *cp == '#')
225 match = 1;
227 /* these checks find unicode, binary (Python 3) and raw strings */
228 if (!match && (
229 !strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) ||
230 !strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) ||
231 !strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2)))
233 match = 1;
234 cp += 1;
236 if (!match && (
237 !strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) ||
238 !strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3)))
240 match = 1;
241 cp += 2;
243 if (match)
245 cp = skipString(cp);
246 if (!*cp) break;
248 if (isIdentifierFirstCharacter ((int) *cp))
249 return cp;
251 return cp;
254 /* Skip an identifier. */
255 static const char *skipIdentifier (const char *cp)
257 while (isIdentifierCharacter ((int) *cp))
258 cp++;
259 return cp;
262 static const char *findDefinitionOrClass (const char *cp)
264 while (*cp)
266 cp = skipEverything (cp);
267 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
268 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
270 return cp;
272 cp = skipIdentifier (cp);
274 return NULL;
277 static const char *skipSpace (const char *cp)
279 while (isspace ((int) *cp))
280 ++cp;
281 return cp;
284 /* Starting at ''cp'', parse an identifier into ''identifier''. */
285 static const char *parseIdentifier (const char *cp, vString *const identifier)
287 vStringClear (identifier);
288 while (isIdentifierCharacter ((int) *cp))
290 vStringPut (identifier, (int) *cp);
291 ++cp;
293 vStringTerminate (identifier);
294 return cp;
297 static void parseClass (const char *cp, vString *const class,
298 vString *const parent, int is_class_parent)
300 vString *const inheritance = vStringNew ();
301 vStringClear (inheritance);
302 cp = parseIdentifier (cp, class);
303 cp = skipSpace (cp);
304 if (*cp == '(')
306 ++cp;
307 while (*cp != ')')
309 if (*cp == '\0')
311 /* Closing parenthesis can be in follow up line. */
312 cp = (const char *) fileReadLine ();
313 if (!cp) break;
314 vStringPut (inheritance, ' ');
315 continue;
317 vStringPut (inheritance, *cp);
318 ++cp;
320 vStringTerminate (inheritance);
322 makeClassTag (class, inheritance, parent, is_class_parent);
323 vStringDelete (inheritance);
326 static void parseImports (const char *cp)
328 const char *pos;
329 vString *name, *name_next;
331 cp = skipEverything (cp);
333 if ((pos = strstr (cp, "import")) == NULL)
334 return;
336 cp = pos + 6;
338 /* continue only if there is some space between the keyword and the identifier */
339 if (! isspace (*cp))
340 return;
342 cp++;
343 cp = skipSpace (cp);
345 name = vStringNew ();
346 name_next = vStringNew ();
348 cp = skipEverything (cp);
349 while (*cp)
351 cp = parseIdentifier (cp, name);
353 cp = skipEverything (cp);
354 /* we parse the next possible import statement as well to be able to ignore 'foo' in
355 * 'import foo as bar' */
356 parseIdentifier (cp, name_next);
358 /* take the current tag only if the next one is not "as" */
359 if (strcmp (vStringValue (name_next), "as") != 0 &&
360 strcmp (vStringValue (name), "as") != 0)
362 makeSimpleTag (name, PythonKinds, K_IMPORT);
365 vStringDelete (name);
366 vStringDelete (name_next);
369 /* modified from get.c getArglistFromStr().
370 * warning: terminates rest of string past arglist!
371 * note: does not ignore brackets inside strings! */
372 static char *parseArglist(const char *buf)
374 char *start, *end;
375 int level;
376 if (NULL == buf)
377 return NULL;
378 if (NULL == (start = strchr(buf, '(')))
379 return NULL;
380 for (level = 1, end = start + 1; level > 0; ++end)
382 if ('\0' == *end)
383 break;
384 else if ('(' == *end)
385 ++ level;
386 else if (')' == *end)
387 -- level;
389 *end = '\0';
390 return strdup(start);
393 static void parseFunction (const char *cp, vString *const def,
394 vString *const parent, int is_class_parent)
396 char *arglist;
398 cp = parseIdentifier (cp, def);
399 arglist = parseArglist (cp);
400 makeFunctionTag (def, parent, is_class_parent, arglist);
401 if (arglist != NULL)
402 eFree (arglist);
405 /* Get the combined name of a nested symbol. Classes are separated with ".",
406 * functions with "/". For example this code:
407 * class MyClass:
408 * def myFunction:
409 * def SubFunction:
410 * class SubClass:
411 * def Method:
412 * pass
413 * Would produce this string:
414 * MyClass.MyFunction/SubFunction/SubClass.Method
416 static boolean constructParentString(NestingLevels *nls, int indent,
417 vString *result)
419 int i;
420 NestingLevel *prev = NULL;
421 int is_class = FALSE;
422 vStringClear (result);
423 for (i = 0; i < nls->n; i++)
425 NestingLevel *nl = nls->levels + i;
426 if (indent <= nl->indentation)
427 break;
428 if (prev)
430 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
432 if (prev->type == K_CLASS)
433 vStringCatS(result, ".");
434 else
435 vStringCatS(result, "/");
438 vStringCat(result, nl->name);
439 is_class = (nl->type == K_CLASS);
440 prev = nl;
442 return is_class;
445 /* Check whether parent's indentation level is higher than the current level and
446 * if so, remove it.
448 static void checkParent(NestingLevels *nls, int indent, vString *parent)
450 int i;
451 NestingLevel *n;
453 for (i = 0; i < nls->n; i++)
455 n = nls->levels + i;
456 /* is there a better way to compare two vStrings? */
457 if (n && strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
459 if (indent <= n->indentation)
461 /* remove this level by clearing its name */
462 vStringClear(n->name);
464 break;
469 static void addNestingLevel(NestingLevels *nls, int indentation,
470 const vString *name, boolean is_class)
472 int i;
473 NestingLevel *nl = NULL;
475 for (i = 0; i < nls->n; i++)
477 nl = nls->levels + i;
478 if (indentation <= nl->indentation) break;
480 if (i == nls->n)
482 nestingLevelsPush(nls, name, 0);
483 nl = nls->levels + i;
485 else
486 { /* reuse existing slot */
487 nls->n = i + 1;
488 vStringCopy(nl->name, name);
490 nl->indentation = indentation;
491 nl->type = is_class ? K_CLASS : !K_CLASS;
494 /* Return a pointer to the start of the next triple string, or NULL. Store
495 * the kind of triple string in "which" if the return is not NULL.
497 static char const *find_triple_start(char const *string, char const **which)
499 char const *cp = string;
501 for (; *cp; cp++)
503 if (*cp == '#')
504 break;
506 if (*cp == '"' || *cp == '\'')
508 if (strncmp(cp, doubletriple, 3) == 0)
510 *which = doubletriple;
511 return cp;
513 if (strncmp(cp, singletriple, 3) == 0)
515 *which = singletriple;
516 return cp;
518 cp = skipString(cp);
519 if (!*cp) break;
522 return NULL;
525 /* Find the end of a triple string as pointed to by "which", and update "which"
526 * with any other triple strings following in the given string.
528 static void find_triple_end(char const *string, char const **which)
530 char const *s = string;
531 while (1)
533 /* Check if the string ends in the same line. */
534 s = strstr (s, *which);
535 if (!s) break;
536 s += 3;
537 *which = NULL;
538 /* If yes, check if another one starts in the same line. */
539 s = find_triple_start(s, which);
540 if (!s) break;
541 s += 3;
545 static const char *findVariable(const char *line)
547 /* Parse global and class variable names (C.x) from assignment statements.
548 * Object attributes (obj.x) are ignored.
549 * Assignment to a tuple 'x, y = 2, 3' not supported.
550 * TODO: ignore duplicate tags from reassignment statements. */
551 const char *cp, *sp, *eq, *start;
553 cp = strstr(line, "=");
554 if (!cp)
555 return NULL;
556 eq = cp + 1;
557 while (*eq)
559 if (*eq == '=')
560 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
561 if (*eq == '(' || *eq == '#')
562 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
563 eq++;
566 /* go backwards to the start of the line, checking we have valid chars */
567 start = cp - 1;
568 while (start >= line && isspace ((int) *start))
569 --start;
570 while (start >= line && isIdentifierCharacter ((int) *start))
571 --start;
572 if (!isIdentifierFirstCharacter(*(start + 1)))
573 return NULL;
574 sp = start;
575 while (sp >= line && isspace ((int) *sp))
576 --sp;
577 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
578 return NULL;
579 /* the line is valid, parse the variable name */
580 ++start;
581 return start;
584 /* Skip type declaration that optionally follows a cdef/cpdef */
585 static const char *skipTypeDecl (const char *cp, boolean *is_class)
587 const char *lastStart = cp, *ptr = cp;
588 int loopCount = 0;
589 ptr = skipSpace(cp);
590 if (!strncmp("extern", ptr, 6)) {
591 ptr += 6;
592 ptr = skipSpace(ptr);
593 if (!strncmp("from", ptr, 4)) { return NULL; }
595 if (!strncmp("class", ptr, 5)) {
596 ptr += 5 ;
597 *is_class = TRUE;
598 ptr = skipSpace(ptr);
599 return ptr;
601 /* limit so that we don't pick off "int item=obj()" */
602 while (*ptr && loopCount++ < 2) {
603 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) {
604 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
605 if(*ptr == '[') {
606 while(*ptr && *ptr != ']') ptr++;
608 ptr++;
610 if (!*ptr || *ptr == '=') return NULL;
611 if (*ptr == '(') {
612 return lastStart; /* if we stopped on a '(' we are done */
614 ptr = skipSpace(ptr);
615 lastStart = ptr;
616 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
618 return NULL;
621 /* checks if there is a lambda at position of cp, and return its argument list
622 * if so.
623 * We don't return the lambda name since it is useless for now since we already
624 * know it when we call this function, and it would be a little slower. */
625 static boolean varIsLambda (const char *cp, char **arglist)
627 boolean is_lambda = FALSE;
629 cp = skipSpace (cp);
630 cp = skipIdentifier (cp); /* skip the lambda's name */
631 cp = skipSpace (cp);
632 if (*cp == '=')
634 cp++;
635 cp = skipSpace (cp);
636 if (strncmp (cp, "lambda", 6) == 0)
638 const char *tmp;
640 cp += 6; /* skip the lambda */
641 tmp = skipSpace (cp);
642 /* check if there is a space after lambda to detect assignations
643 * starting with 'lambdaXXX' */
644 if (tmp != cp)
646 vString *args = vStringNew ();
648 cp = tmp;
649 vStringPut (args, '(');
650 for (; *cp != 0 && *cp != ':'; cp++)
651 vStringPut (args, *cp);
652 vStringPut (args, ')');
653 vStringTerminate (args);
654 if (arglist)
655 *arglist = strdup (vStringValue (args));
656 vStringDelete (args);
657 is_lambda = TRUE;
661 return is_lambda;
664 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
665 * the position of the next non-whitespace after the keyword */
666 static boolean matchKeyword (const char *keyword, const char *cp, const char **cp_n)
668 size_t kw_len = strlen (keyword);
669 if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len]))
671 *cp_n = skipSpace (&cp[kw_len + 1]);
672 return TRUE;
674 return FALSE;
677 static void findPythonTags (void)
679 vString *const continuation = vStringNew ();
680 vString *const name = vStringNew ();
681 vString *const parent = vStringNew();
683 NestingLevels *const nesting_levels = nestingLevelsNew();
685 const char *line;
686 int line_skip = 0;
687 char const *longStringLiteral = NULL;
689 while ((line = (const char *) fileReadLine ()) != NULL)
691 const char *cp = line, *candidate;
692 char const *longstring;
693 char const *keyword, *variable;
694 int indent;
696 cp = skipSpace (cp);
698 if (*cp == '\0') /* skip blank line */
699 continue;
701 /* Skip comment if we are not inside a multi-line string. */
702 if (*cp == '#' && !longStringLiteral)
703 continue;
705 /* Deal with line continuation. */
706 if (!line_skip) vStringClear(continuation);
707 vStringCatS(continuation, line);
708 vStringStripTrailing(continuation);
709 if (vStringLast(continuation) == '\\')
711 vStringChop(continuation);
712 vStringCatS(continuation, " ");
713 line_skip = 1;
714 continue;
716 cp = line = vStringValue(continuation);
717 cp = skipSpace (cp);
718 indent = cp - line;
719 line_skip = 0;
721 /* Deal with multiline string ending. */
722 if (longStringLiteral)
724 find_triple_end(cp, &longStringLiteral);
725 continue;
728 checkParent(nesting_levels, indent, parent);
730 /* Find global and class variables */
731 variable = findVariable(line);
732 if (variable)
734 const char *start = variable;
735 char *arglist;
736 boolean parent_is_class;
738 vStringClear (name);
739 while (isIdentifierCharacter ((int) *start))
741 vStringPut (name, (int) *start);
742 ++start;
744 vStringTerminate (name);
746 parent_is_class = constructParentString(nesting_levels, indent, parent);
747 if (varIsLambda (variable, &arglist))
749 /* show class members or top-level script lambdas only */
750 if (parent_is_class || vStringLength(parent) == 0)
751 makeFunctionTag (name, parent, parent_is_class, arglist);
752 eFree (arglist);
754 else
756 /* skip variables in methods */
757 if (parent_is_class || vStringLength(parent) == 0)
758 makeVariableTag (name, parent);
762 /* Deal with multiline string start. */
763 longstring = find_triple_start(cp, &longStringLiteral);
764 if (longstring)
766 longstring += 3;
767 find_triple_end(longstring, &longStringLiteral);
768 /* We don't parse for any tags in the rest of the line. */
769 continue;
772 /* Deal with def and class keywords. */
773 keyword = findDefinitionOrClass (cp);
774 if (keyword)
776 boolean found = FALSE;
777 boolean is_class = FALSE;
778 if (matchKeyword ("def", keyword, &cp))
780 found = TRUE;
782 else if (matchKeyword ("class", keyword, &cp))
784 found = TRUE;
785 is_class = TRUE;
787 else if (matchKeyword ("cdef", keyword, &cp))
789 candidate = skipTypeDecl (cp, &is_class);
790 if (candidate)
792 found = TRUE;
793 cp = candidate;
797 else if (matchKeyword ("cpdef", keyword, &cp))
799 candidate = skipTypeDecl (cp, &is_class);
800 if (candidate)
802 found = TRUE;
803 cp = candidate;
807 if (found)
809 boolean is_parent_class;
811 is_parent_class =
812 constructParentString(nesting_levels, indent, parent);
814 if (is_class)
815 parseClass (cp, name, parent, is_parent_class);
816 else
817 parseFunction(cp, name, parent, is_parent_class);
819 addNestingLevel(nesting_levels, indent, name, is_class);
822 /* Find and parse imports */
823 parseImports(line);
825 /* Clean up all memory we allocated. */
826 vStringDelete (parent);
827 vStringDelete (name);
828 vStringDelete (continuation);
829 nestingLevelsFree (nesting_levels);
832 extern parserDefinition *PythonParser (void)
834 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
835 parserDefinition *def = parserNew ("Python");
836 def->kinds = PythonKinds;
837 def->kindCount = KIND_COUNT (PythonKinds);
838 def->extensions = extensions;
839 def->parser = findPythonTags;
840 return def;
843 /* vi:set tabstop=4 shiftwidth=4: */