python: do not ignore the character after a skipped string
[geany-mirror.git] / tagmanager / ctags / python.c
blob16191e6457e9df3036ec7dc4290f9ada5823bc7f
1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
8 * files.
9 */
11 * INCLUDE FILES
13 #include "general.h" /* must always come first */
15 #include <string.h>
17 #include "entry.h"
18 #include "options.h"
19 #include "read.h"
20 #include "main.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
25 * DATA DEFINITIONS
27 typedef enum {
28 K_CLASS, K_FUNCTION, K_METHOD, K_VARIABLE, K_IMPORT
29 } pythonKind;
31 static kindOption PythonKinds[] = {
32 {TRUE, 'c', "class", "classes"},
33 {TRUE, 'f', "function", "functions"},
34 {TRUE, 'm', "method", "class methods"},
35 {TRUE, 'v', "variable", "variables"},
36 /* defined as externvar to get those excluded as forward type in symbols.c:goto_tag()
37 * so we can jump to the real implementation (if known) instead of to the import statement */
38 {TRUE, 'x', "externvar", "imports"}
41 typedef enum {
42 A_PUBLIC, A_PRIVATE, A_PROTECTED
43 } pythonAccess;
45 static const char *const PythonAccesses[] = {
46 "public", "private", "protected"
49 static char const * const singletriple = "'''";
50 static char const * const doubletriple = "\"\"\"";
53 * FUNCTION DEFINITIONS
56 static boolean isIdentifierFirstCharacter (int c)
58 return (boolean) (isalpha (c) || c == '_');
61 static boolean isIdentifierCharacter (int c)
63 return (boolean) (isalnum (c) || c == '_');
66 static const char *get_class_name_from_parent (const char *parent)
68 const char *result;
70 if (parent == NULL)
71 return NULL;
73 result = strrchr (parent, '.');
74 if (result != NULL)
76 result++;
77 parent = result;
80 result = strrchr (parent, '/');
81 if (result != NULL)
82 result++;
83 else
84 result = parent;
86 return result;
89 /* follows PEP-8, and always reports single-underscores as protected
90 * See:
91 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
92 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
94 static pythonAccess accessFromIdentifier (const vString *const ident,
95 pythonKind kind, boolean has_parent, boolean parent_is_class)
97 const char *const p = vStringValue (ident);
98 const size_t len = vStringLength (ident);
100 /* inside a function/method, private */
101 if (has_parent && !parent_is_class)
102 return A_PRIVATE;
103 /* not starting with "_", public */
104 else if (len < 1 || p[0] != '_')
105 return A_PUBLIC;
106 /* "__...__": magic methods */
107 else if (kind == K_METHOD && parent_is_class &&
108 len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
109 return A_PUBLIC;
110 /* "__...": name mangling */
111 else if (parent_is_class && len > 1 && p[1] == '_')
112 return A_PRIVATE;
113 /* "_...": suggested as non-public, but easily accessible */
114 else
115 return A_PROTECTED;
118 static void addAccessFields (tagEntryInfo *const entry,
119 const vString *const ident, pythonKind kind,
120 boolean has_parent, boolean parent_is_class)
122 pythonAccess access;
124 access = accessFromIdentifier (ident, kind, has_parent, parent_is_class);
125 entry->extensionFields.access = PythonAccesses [access];
126 /* FIXME: should we really set isFileScope in addition to access? */
127 if (access == A_PRIVATE)
128 entry->isFileScope = TRUE;
131 /* Given a string with the contents of a line directly after the "def" keyword,
132 * extract all relevant information and create a tag.
134 static void makeFunctionTag (vString *const function,
135 vString *const parent, int is_class_parent, const char *arglist)
137 tagEntryInfo tag;
138 initTagEntry (&tag, vStringValue (function));
140 tag.kindName = PythonKinds[K_FUNCTION].name;
141 tag.kind = PythonKinds[K_FUNCTION].letter;
142 tag.extensionFields.arglist = arglist;
143 /* add argument list of __init__() methods to the class tag */
144 if (strcmp (vStringValue (function), "__init__") == 0 && parent != NULL)
146 const char *parent_tag_name = get_class_name_from_parent (vStringValue (parent));
147 if (parent_tag_name != NULL)
148 setTagArglistByName (parent_tag_name, arglist);
151 if (vStringLength (parent) > 0)
153 if (is_class_parent)
155 tag.kindName = PythonKinds[K_METHOD].name;
156 tag.kind = PythonKinds[K_METHOD].letter;
157 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
158 tag.extensionFields.scope [1] = vStringValue (parent);
160 else
162 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
163 tag.extensionFields.scope [1] = vStringValue (parent);
167 addAccessFields (&tag, function, is_class_parent ? K_METHOD : K_FUNCTION,
168 vStringLength (parent) > 0, is_class_parent);
170 makeTagEntry (&tag);
173 /* Given a string with the contents of the line directly after the "class"
174 * keyword, extract all necessary information and create a tag.
176 static void makeClassTag (vString *const class, vString *const inheritance,
177 vString *const parent, int is_class_parent)
179 tagEntryInfo tag;
180 initTagEntry (&tag, vStringValue (class));
181 tag.kindName = PythonKinds[K_CLASS].name;
182 tag.kind = PythonKinds[K_CLASS].letter;
183 if (vStringLength (parent) > 0)
185 if (is_class_parent)
187 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
188 tag.extensionFields.scope [1] = vStringValue (parent);
190 else
192 tag.extensionFields.scope [0] = PythonKinds[K_FUNCTION].name;
193 tag.extensionFields.scope [1] = vStringValue (parent);
196 tag.extensionFields.inheritance = vStringValue (inheritance);
197 addAccessFields (&tag, class, K_CLASS, vStringLength (parent) > 0,
198 is_class_parent);
199 makeTagEntry (&tag);
202 static void makeVariableTag (vString *const var, vString *const parent,
203 boolean is_class_parent)
205 tagEntryInfo tag;
206 initTagEntry (&tag, vStringValue (var));
207 tag.kindName = PythonKinds[K_VARIABLE].name;
208 tag.kind = PythonKinds[K_VARIABLE].letter;
209 if (vStringLength (parent) > 0)
211 tag.extensionFields.scope [0] = PythonKinds[K_CLASS].name;
212 tag.extensionFields.scope [1] = vStringValue (parent);
214 addAccessFields (&tag, var, K_VARIABLE, vStringLength (parent) > 0,
215 is_class_parent);
216 makeTagEntry (&tag);
219 /* Skip a single or double quoted string. */
220 static const char *skipString (const char *cp)
222 const char *start = cp;
223 int escaped = 0;
224 for (cp++; *cp; cp++)
226 if (escaped)
227 escaped--;
228 else if (*cp == '\\')
229 escaped++;
230 else if (*cp == *start)
231 return cp + 1;
233 return cp;
236 /* Skip everything up to an identifier start. */
237 static const char *skipEverything (const char *cp)
239 int match;
240 for (; *cp; cp++)
242 match = 0;
243 if (*cp == '"' || *cp == '\'' || *cp == '#')
244 match = 1;
246 /* these checks find unicode, binary (Python 3) and raw strings */
247 if (!match && (
248 !strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) ||
249 !strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) ||
250 !strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2)))
252 match = 1;
253 cp += 1;
255 if (!match && (
256 !strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) ||
257 !strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3)))
259 match = 1;
260 cp += 2;
262 if (match)
264 cp = skipString(cp);
265 if (!*cp) break;
267 if (isIdentifierFirstCharacter ((int) *cp))
268 return cp;
269 if (match)
270 cp--; /* avoid jumping over the character after a skipped string */
272 return cp;
275 /* Skip an identifier. */
276 static const char *skipIdentifier (const char *cp)
278 while (isIdentifierCharacter ((int) *cp))
279 cp++;
280 return cp;
283 static const char *findDefinitionOrClass (const char *cp)
285 while (*cp)
287 cp = skipEverything (cp);
288 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
289 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
291 return cp;
293 cp = skipIdentifier (cp);
295 return NULL;
298 static const char *skipSpace (const char *cp)
300 while (isspace ((int) *cp))
301 ++cp;
302 return cp;
305 /* Starting at ''cp'', parse an identifier into ''identifier''. */
306 static const char *parseIdentifier (const char *cp, vString *const identifier)
308 vStringClear (identifier);
309 while (isIdentifierCharacter ((int) *cp))
311 vStringPut (identifier, (int) *cp);
312 ++cp;
314 vStringTerminate (identifier);
315 return cp;
318 static void parseClass (const char *cp, vString *const class,
319 vString *const parent, int is_class_parent)
321 vString *const inheritance = vStringNew ();
322 vStringClear (inheritance);
323 cp = parseIdentifier (cp, class);
324 cp = skipSpace (cp);
325 if (*cp == '(')
327 ++cp;
328 while (*cp != ')')
330 if (*cp == '\0')
332 /* Closing parenthesis can be in follow up line. */
333 cp = (const char *) fileReadLine ();
334 if (!cp) break;
335 vStringPut (inheritance, ' ');
336 continue;
338 vStringPut (inheritance, *cp);
339 ++cp;
341 vStringTerminate (inheritance);
343 makeClassTag (class, inheritance, parent, is_class_parent);
344 vStringDelete (inheritance);
347 static void parseImports (const char *cp)
349 const char *pos;
350 vString *name, *name_next;
352 cp = skipEverything (cp);
354 if ((pos = strstr (cp, "import")) == NULL)
355 return;
357 cp = pos + 6;
359 /* continue only if there is some space between the keyword and the identifier */
360 if (! isspace (*cp))
361 return;
363 cp++;
364 cp = skipSpace (cp);
366 name = vStringNew ();
367 name_next = vStringNew ();
369 cp = skipEverything (cp);
370 while (*cp)
372 cp = parseIdentifier (cp, name);
374 cp = skipEverything (cp);
375 /* we parse the next possible import statement as well to be able to ignore 'foo' in
376 * 'import foo as bar' */
377 parseIdentifier (cp, name_next);
379 /* take the current tag only if the next one is not "as" */
380 if (strcmp (vStringValue (name_next), "as") != 0 &&
381 strcmp (vStringValue (name), "as") != 0)
383 makeSimpleTag (name, PythonKinds, K_IMPORT);
386 vStringDelete (name);
387 vStringDelete (name_next);
390 /* modified from get.c getArglistFromStr().
391 * warning: terminates rest of string past arglist!
392 * note: does not ignore brackets inside strings! */
393 static char *parseArglist(const char *buf)
395 char *start, *end;
396 int level;
397 if (NULL == buf)
398 return NULL;
399 if (NULL == (start = strchr(buf, '(')))
400 return NULL;
401 for (level = 1, end = start + 1; level > 0; ++end)
403 if ('\0' == *end)
404 break;
405 else if ('(' == *end)
406 ++ level;
407 else if (')' == *end)
408 -- level;
410 *end = '\0';
411 return strdup(start);
414 static void parseFunction (const char *cp, vString *const def,
415 vString *const parent, int is_class_parent)
417 char *arglist;
419 cp = parseIdentifier (cp, def);
420 arglist = parseArglist (cp);
421 makeFunctionTag (def, parent, is_class_parent, arglist);
422 if (arglist != NULL)
423 eFree (arglist);
426 /* Get the combined name of a nested symbol. Classes are separated with ".",
427 * functions with "/". For example this code:
428 * class MyClass:
429 * def myFunction:
430 * def SubFunction:
431 * class SubClass:
432 * def Method:
433 * pass
434 * Would produce this string:
435 * MyClass.MyFunction/SubFunction/SubClass.Method
437 static boolean constructParentString(NestingLevels *nls, int indent,
438 vString *result)
440 int i;
441 NestingLevel *prev = NULL;
442 int is_class = FALSE;
443 vStringClear (result);
444 for (i = 0; i < nls->n; i++)
446 NestingLevel *nl = nls->levels + i;
447 if (indent <= nl->indentation)
448 break;
449 if (prev)
451 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
453 if (prev->type == K_CLASS)
454 vStringCatS(result, ".");
455 else
456 vStringCatS(result, "/");
459 vStringCat(result, nl->name);
460 is_class = (nl->type == K_CLASS);
461 prev = nl;
463 return is_class;
466 /* Check whether parent's indentation level is higher than the current level and
467 * if so, remove it.
469 static void checkParent(NestingLevels *nls, int indent, vString *parent)
471 int i;
472 NestingLevel *n;
474 for (i = 0; i < nls->n; i++)
476 n = nls->levels + i;
477 /* is there a better way to compare two vStrings? */
478 if (n && strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
480 if (indent <= n->indentation)
482 /* remove this level by clearing its name */
483 vStringClear(n->name);
485 break;
490 static void addNestingLevel(NestingLevels *nls, int indentation,
491 const vString *name, boolean is_class)
493 int i;
494 NestingLevel *nl = NULL;
496 for (i = 0; i < nls->n; i++)
498 nl = nls->levels + i;
499 if (indentation <= nl->indentation) break;
501 if (i == nls->n)
503 nestingLevelsPush(nls, name, 0);
504 nl = nls->levels + i;
506 else
507 { /* reuse existing slot */
508 nls->n = i + 1;
509 vStringCopy(nl->name, name);
511 nl->indentation = indentation;
512 nl->type = is_class ? K_CLASS : !K_CLASS;
515 /* Return a pointer to the start of the next triple string, or NULL. Store
516 * the kind of triple string in "which" if the return is not NULL.
518 static char const *find_triple_start(char const *string, char const **which)
520 char const *cp = string;
522 for (; *cp; cp++)
524 if (*cp == '#')
525 break;
527 if (*cp == '"' || *cp == '\'')
529 if (strncmp(cp, doubletriple, 3) == 0)
531 *which = doubletriple;
532 return cp;
534 if (strncmp(cp, singletriple, 3) == 0)
536 *which = singletriple;
537 return cp;
539 cp = skipString(cp);
540 if (!*cp) break;
541 cp--; /* avoid jumping over the character after a skipped string */
544 return NULL;
547 /* Find the end of a triple string as pointed to by "which", and update "which"
548 * with any other triple strings following in the given string.
550 static void find_triple_end(char const *string, char const **which)
552 char const *s = string;
553 while (1)
555 /* Check if the string ends in the same line. */
556 s = strstr (s, *which);
557 if (!s) break;
558 s += 3;
559 *which = NULL;
560 /* If yes, check if another one starts in the same line. */
561 s = find_triple_start(s, which);
562 if (!s) break;
563 s += 3;
567 static const char *findVariable(const char *line)
569 /* Parse global and class variable names (C.x) from assignment statements.
570 * Object attributes (obj.x) are ignored.
571 * Assignment to a tuple 'x, y = 2, 3' not supported.
572 * TODO: ignore duplicate tags from reassignment statements. */
573 const char *cp, *sp, *eq, *start;
575 cp = strstr(line, "=");
576 if (!cp)
577 return NULL;
578 eq = cp + 1;
579 while (*eq)
581 if (*eq == '=')
582 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
583 if (*eq == '(' || *eq == '#')
584 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
585 eq++;
588 /* go backwards to the start of the line, checking we have valid chars */
589 start = cp - 1;
590 while (start >= line && isspace ((int) *start))
591 --start;
592 while (start >= line && isIdentifierCharacter ((int) *start))
593 --start;
594 if (!isIdentifierFirstCharacter(*(start + 1)))
595 return NULL;
596 sp = start;
597 while (sp >= line && isspace ((int) *sp))
598 --sp;
599 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
600 return NULL;
601 /* the line is valid, parse the variable name */
602 ++start;
603 return start;
606 /* Skip type declaration that optionally follows a cdef/cpdef */
607 static const char *skipTypeDecl (const char *cp, boolean *is_class)
609 const char *lastStart = cp, *ptr = cp;
610 int loopCount = 0;
611 ptr = skipSpace(cp);
612 if (!strncmp("extern", ptr, 6)) {
613 ptr += 6;
614 ptr = skipSpace(ptr);
615 if (!strncmp("from", ptr, 4)) { return NULL; }
617 if (!strncmp("class", ptr, 5)) {
618 ptr += 5 ;
619 *is_class = TRUE;
620 ptr = skipSpace(ptr);
621 return ptr;
623 /* limit so that we don't pick off "int item=obj()" */
624 while (*ptr && loopCount++ < 2) {
625 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) {
626 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
627 if(*ptr == '[') {
628 while (*ptr && *ptr != ']') ptr++;
629 if (*ptr) ptr++;
630 } else {
631 ptr++;
634 if (!*ptr || *ptr == '=') return NULL;
635 if (*ptr == '(') {
636 return lastStart; /* if we stopped on a '(' we are done */
638 ptr = skipSpace(ptr);
639 lastStart = ptr;
640 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
642 return NULL;
645 /* checks if there is a lambda at position of cp, and return its argument list
646 * if so.
647 * We don't return the lambda name since it is useless for now since we already
648 * know it when we call this function, and it would be a little slower. */
649 static boolean varIsLambda (const char *cp, char **arglist)
651 boolean is_lambda = FALSE;
653 cp = skipSpace (cp);
654 cp = skipIdentifier (cp); /* skip the lambda's name */
655 cp = skipSpace (cp);
656 if (*cp == '=')
658 cp++;
659 cp = skipSpace (cp);
660 if (strncmp (cp, "lambda", 6) == 0)
662 const char *tmp;
664 cp += 6; /* skip the lambda */
665 tmp = skipSpace (cp);
666 /* check if there is a space after lambda to detect assignations
667 * starting with 'lambdaXXX' */
668 if (tmp != cp)
670 vString *args = vStringNew ();
672 cp = tmp;
673 vStringPut (args, '(');
674 for (; *cp != 0 && *cp != ':'; cp++)
675 vStringPut (args, *cp);
676 vStringPut (args, ')');
677 vStringTerminate (args);
678 if (arglist)
679 *arglist = strdup (vStringValue (args));
680 vStringDelete (args);
681 is_lambda = TRUE;
685 return is_lambda;
688 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
689 * the position of the next non-whitespace after the keyword */
690 static boolean matchKeyword (const char *keyword, const char *cp, const char **cp_n)
692 size_t kw_len = strlen (keyword);
693 if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len]))
695 *cp_n = skipSpace (&cp[kw_len + 1]);
696 return TRUE;
698 return FALSE;
701 static void findPythonTags (void)
703 vString *const continuation = vStringNew ();
704 vString *const name = vStringNew ();
705 vString *const parent = vStringNew();
707 NestingLevels *const nesting_levels = nestingLevelsNew();
709 const char *line;
710 int line_skip = 0;
711 char const *longStringLiteral = NULL;
713 while ((line = (const char *) fileReadLine ()) != NULL)
715 const char *cp = line, *candidate;
716 char const *longstring;
717 char const *keyword, *variable;
718 int indent;
720 cp = skipSpace (cp);
722 if (*cp == '\0') /* skip blank line */
723 continue;
725 /* Skip comment if we are not inside a multi-line string. */
726 if (*cp == '#' && !longStringLiteral)
727 continue;
729 /* Deal with line continuation. */
730 if (!line_skip) vStringClear(continuation);
731 vStringCatS(continuation, line);
732 vStringStripTrailing(continuation);
733 if (vStringLast(continuation) == '\\')
735 vStringChop(continuation);
736 vStringCatS(continuation, " ");
737 line_skip = 1;
738 continue;
740 cp = line = vStringValue(continuation);
741 cp = skipSpace (cp);
742 indent = cp - line;
743 line_skip = 0;
745 /* Deal with multiline string ending. */
746 if (longStringLiteral)
748 find_triple_end(cp, &longStringLiteral);
749 continue;
752 checkParent(nesting_levels, indent, parent);
754 /* Find global and class variables */
755 variable = findVariable(line);
756 if (variable)
758 const char *start = variable;
759 char *arglist;
760 boolean parent_is_class;
762 vStringClear (name);
763 while (isIdentifierCharacter ((int) *start))
765 vStringPut (name, (int) *start);
766 ++start;
768 vStringTerminate (name);
770 parent_is_class = constructParentString(nesting_levels, indent, parent);
771 if (varIsLambda (variable, &arglist))
773 /* show class members or top-level script lambdas only */
774 if (parent_is_class || vStringLength(parent) == 0)
775 makeFunctionTag (name, parent, parent_is_class, arglist);
776 eFree (arglist);
778 else
780 /* skip variables in methods */
781 if (parent_is_class || vStringLength(parent) == 0)
782 makeVariableTag (name, parent, parent_is_class);
786 /* Deal with multiline string start. */
787 longstring = find_triple_start(cp, &longStringLiteral);
788 if (longstring)
790 longstring += 3;
791 find_triple_end(longstring, &longStringLiteral);
792 /* We don't parse for any tags in the rest of the line. */
793 continue;
796 /* Deal with def and class keywords. */
797 keyword = findDefinitionOrClass (cp);
798 if (keyword)
800 boolean found = FALSE;
801 boolean is_class = FALSE;
802 if (matchKeyword ("def", keyword, &cp))
804 found = TRUE;
806 else if (matchKeyword ("class", keyword, &cp))
808 found = TRUE;
809 is_class = TRUE;
811 else if (matchKeyword ("cdef", keyword, &cp))
813 candidate = skipTypeDecl (cp, &is_class);
814 if (candidate)
816 found = TRUE;
817 cp = candidate;
821 else if (matchKeyword ("cpdef", keyword, &cp))
823 candidate = skipTypeDecl (cp, &is_class);
824 if (candidate)
826 found = TRUE;
827 cp = candidate;
831 if (found)
833 boolean is_parent_class;
835 is_parent_class =
836 constructParentString(nesting_levels, indent, parent);
838 if (is_class)
839 parseClass (cp, name, parent, is_parent_class);
840 else
841 parseFunction(cp, name, parent, is_parent_class);
843 addNestingLevel(nesting_levels, indent, name, is_class);
846 /* Find and parse imports */
847 parseImports(line);
849 /* Clean up all memory we allocated. */
850 vStringDelete (parent);
851 vStringDelete (name);
852 vStringDelete (continuation);
853 nestingLevelsFree (nesting_levels);
856 extern parserDefinition *PythonParser (void)
858 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
859 parserDefinition *def = parserNew ("Python");
860 def->kinds = PythonKinds;
861 def->kindCount = KIND_COUNT (PythonKinds);
862 def->extensions = extensions;
863 def->parser = findPythonTags;
864 return def;
867 /* vi:set tabstop=4 shiftwidth=4: */