Update Scintilla keywords and highlighting when changing document tabs
[geany-mirror.git] / tagmanager / python.c
blobbaba588cd727004d95c396db6e81c1e1ec239db5
1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
8 * files.
9 */
11 * INCLUDE FILES
13 #include "general.h" /* must always come first */
15 #include <string.h>
17 #include "entry.h"
18 #include "options.h"
19 #include "read.h"
20 #include "main.h"
21 #include "vstring.h"
22 #include "nestlevel.h"
25 * DATA DEFINITIONS
27 typedef enum {
28 K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT
29 } pythonKind;
31 static kindOption PythonKinds[] = {
32 {TRUE, 'c', "class", "classes"},
33 {TRUE, 'f', "function", "functions"},
34 {TRUE, 'm', "member", "class members"},
35 {TRUE, 'v', "variable", "variables"},
36 {TRUE, 'i', "namespace", "imports"}
39 static char const * const singletriple = "'''";
40 static char const * const doubletriple = "\"\"\"";
43 * FUNCTION DEFINITIONS
46 #define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
48 static boolean isIdentifierFirstCharacter (int c)
50 return (boolean) (isalpha (c) || c == '_');
53 static boolean isIdentifierCharacter (int c)
55 return (boolean) (isalnum (c) || c == '_');
58 static const char *get_class_name_from_parent (const char *parent)
60 const char *result;
62 if (parent == NULL)
63 return NULL;
65 result = strrchr (parent, '.');
66 if (result != NULL)
68 result++;
69 parent = result;
72 result = strrchr (parent, '/');
73 if (result != NULL)
74 result++;
75 else
76 result = parent;
78 return result;
82 /* Given a string with the contents of a line directly after the "def" keyword,
83 * extract all relevant information and create a tag.
85 static void makeFunctionTag (vString *const function,
86 vString *const parent, int is_class_parent, const char *arglist)
88 tagEntryInfo tag;
89 initTagEntry (&tag, vStringValue (function));
91 tag.kindName = "function";
92 tag.kind = 'f';
93 tag.extensionFields.arglist = arglist;
94 /* add argument list of __init__() methods to the class tag */
95 if (strcmp (vStringValue (function), "__init__") == 0 && parent != NULL)
97 const char *parent_tag_name = get_class_name_from_parent (vStringValue (parent));
98 if (parent_tag_name != NULL)
99 setTagArglistByName (parent_tag_name, arglist);
102 if (vStringLength (parent) > 0)
104 if (is_class_parent)
106 tag.kindName = "member";
107 tag.kind = 'm';
108 tag.extensionFields.scope [0] = "class";
109 tag.extensionFields.scope [1] = vStringValue (parent);
111 else
113 tag.extensionFields.scope [0] = "function";
114 tag.extensionFields.scope [1] = vStringValue (parent);
118 /* If a function starts with __, we mark it as file scope.
119 * FIXME: What is the proper way to signal such attributes?
120 * TODO: What does functions/classes starting with _ and __ mean in python?
122 if (strncmp (vStringValue (function), "__", 2) == 0 &&
123 strcmp (vStringValue (function), "__init__") != 0)
125 tag.extensionFields.access = "private";
126 tag.isFileScope = TRUE;
128 else
130 tag.extensionFields.access = "public";
132 makeTagEntry (&tag);
135 /* Given a string with the contents of the line directly after the "class"
136 * keyword, extract all necessary information and create a tag.
138 static void makeClassTag (vString *const class, vString *const inheritance,
139 vString *const parent, int is_class_parent)
141 tagEntryInfo tag;
142 initTagEntry (&tag, vStringValue (class));
143 tag.kindName = "class";
144 tag.kind = 'c';
145 if (vStringLength (parent) > 0)
147 if (is_class_parent)
149 tag.extensionFields.scope [0] = "class";
150 tag.extensionFields.scope [1] = vStringValue (parent);
152 else
154 tag.extensionFields.scope [0] = "function";
155 tag.extensionFields.scope [1] = vStringValue (parent);
158 tag.extensionFields.inheritance = vStringValue (inheritance);
159 makeTagEntry (&tag);
162 static void makeVariableTag (vString *const var, vString *const parent)
164 tagEntryInfo tag;
165 initTagEntry (&tag, vStringValue (var));
166 tag.kindName = "variable";
167 tag.kind = 'v';
168 if (vStringLength (parent) > 0)
170 tag.extensionFields.scope [0] = "class";
171 tag.extensionFields.scope [1] = vStringValue (parent);
173 makeTagEntry (&tag);
176 /* Skip a single or double quoted string. */
177 static const char *skipString (const char *cp)
179 const char *start = cp;
180 int escaped = 0;
181 for (cp++; *cp; cp++)
183 if (escaped)
184 escaped--;
185 else if (*cp == '\\')
186 escaped++;
187 else if (*cp == *start)
188 return cp + 1;
190 return cp;
193 /* Skip everything up to an identifier start. */
194 static const char *skipEverything (const char *cp)
196 int match;
197 for (; *cp; cp++)
199 match = 0;
200 if (*cp == '"' || *cp == '\'' || *cp == '#')
201 match = 1;
203 /* these checks find unicode, binary (Python 3) and raw strings */
204 if (!match && (
205 !strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) ||
206 !strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) ||
207 !strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2)))
209 match = 1;
210 cp += 1;
212 if (!match && (
213 !strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) ||
214 !strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3)))
216 match = 1;
217 cp += 2;
219 if (match)
221 cp = skipString(cp);
222 if (!*cp) break;
224 if (isIdentifierFirstCharacter ((int) *cp))
225 return cp;
227 return cp;
230 /* Skip an identifier. */
231 static const char *skipIdentifier (const char *cp)
233 while (isIdentifierCharacter ((int) *cp))
234 cp++;
235 return cp;
238 static const char *findDefinitionOrClass (const char *cp)
240 while (*cp)
242 cp = skipEverything (cp);
243 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
244 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
246 return cp;
248 cp = skipIdentifier (cp);
250 return NULL;
253 static const char *skipSpace (const char *cp)
255 while (isspace ((int) *cp))
256 ++cp;
257 return cp;
260 /* Starting at ''cp'', parse an identifier into ''identifier''. */
261 static const char *parseIdentifier (const char *cp, vString *const identifier)
263 vStringClear (identifier);
264 while (isIdentifierCharacter ((int) *cp))
266 vStringPut (identifier, (int) *cp);
267 ++cp;
269 vStringTerminate (identifier);
270 return cp;
273 static void parseClass (const char *cp, vString *const class,
274 vString *const parent, int is_class_parent)
276 vString *const inheritance = vStringNew ();
277 vStringClear (inheritance);
278 cp = parseIdentifier (cp, class);
279 cp = skipSpace (cp);
280 if (*cp == '(')
282 ++cp;
283 while (*cp != ')')
285 if (*cp == '\0')
287 /* Closing parenthesis can be in follow up line. */
288 cp = (const char *) fileReadLine ();
289 if (!cp) break;
290 vStringPut (inheritance, ' ');
291 continue;
293 vStringPut (inheritance, *cp);
294 ++cp;
296 vStringTerminate (inheritance);
298 makeClassTag (class, inheritance, parent, is_class_parent);
299 vStringDelete (inheritance);
302 static void parseImports (const char *cp)
304 const char *pos;
305 vString *name, *name_next;
307 cp = skipEverything (cp);
309 if ((pos = strstr (cp, "import")) == NULL)
310 return;
312 cp = pos + 6;
314 /* continue only if there is some space between the keyword and the identifier */
315 if (! isspace (*cp))
316 return;
318 cp++;
319 cp = skipSpace (cp);
321 name = vStringNew ();
322 name_next = vStringNew ();
324 cp = skipEverything (cp);
325 while (*cp)
327 cp = parseIdentifier (cp, name);
329 cp = skipEverything (cp);
330 /* we parse the next possible import statement as well to be able to ignore 'foo' in
331 * 'import foo as bar' */
332 parseIdentifier (cp, name_next);
334 /* take the current tag only if the next one is not "as" */
335 if (strcmp (vStringValue (name_next), "as") != 0 &&
336 strcmp (vStringValue (name), "as") != 0)
338 makeSimpleTag (name, PythonKinds, K_IMPORT);
341 vStringDelete (name);
342 vStringDelete (name_next);
345 /* modified from get.c getArglistFromStr().
346 * warning: terminates rest of string past arglist!
347 * note: does not ignore brackets inside strings! */
348 static char *parseArglist(const char *buf)
350 char *start, *end;
351 int level;
352 if (NULL == buf)
353 return NULL;
354 if (NULL == (start = strchr(buf, '(')))
355 return NULL;
356 for (level = 1, end = start + 1; level > 0; ++end)
358 if ('\0' == *end)
359 break;
360 else if ('(' == *end)
361 ++ level;
362 else if (')' == *end)
363 -- level;
365 *end = '\0';
366 return strdup(start);
369 static void parseFunction (const char *cp, vString *const def,
370 vString *const parent, int is_class_parent)
372 char *arglist;
374 cp = parseIdentifier (cp, def);
375 arglist = parseArglist (cp);
376 makeFunctionTag (def, parent, is_class_parent, arglist);
377 if (arglist != NULL)
378 eFree (arglist);
381 /* Get the combined name of a nested symbol. Classes are separated with ".",
382 * functions with "/". For example this code:
383 * class MyClass:
384 * def myFunction:
385 * def SubFunction:
386 * class SubClass:
387 * def Method:
388 * pass
389 * Would produce this string:
390 * MyClass.MyFunction/SubFunction/SubClass.Method
392 static boolean constructParentString(NestingLevels *nls, int indent,
393 vString *result)
395 int i;
396 NestingLevel *prev = NULL;
397 int is_class = FALSE;
398 vStringClear (result);
399 for (i = 0; i < nls->n; i++)
401 NestingLevel *nl = nls->levels + i;
402 if (indent <= nl->indentation)
403 break;
404 if (prev)
406 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
408 if (prev->type == K_CLASS)
409 vStringCatS(result, ".");
410 else
411 vStringCatS(result, "/");
414 vStringCat(result, nl->name);
415 is_class = (nl->type == K_CLASS);
416 prev = nl;
418 return is_class;
421 /* Check whether parent's indentation level is higher than the current level and
422 * if so, remove it.
424 static void checkParent(NestingLevels *nls, int indent, vString *parent)
426 int i;
427 NestingLevel *n;
429 for (i = 0; i < nls->n; i++)
431 n = nls->levels + i;
432 /* is there a better way to compare two vStrings? */
433 if (n && strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
435 if (indent <= n->indentation)
437 /* remove this level by clearing its name */
438 vStringClear(n->name);
440 break;
445 static void addNestingLevel(NestingLevels *nls, int indentation,
446 const vString *name, boolean is_class)
448 int i;
449 NestingLevel *nl = NULL;
451 for (i = 0; i < nls->n; i++)
453 nl = nls->levels + i;
454 if (indentation <= nl->indentation) break;
456 if (i == nls->n)
458 nestingLevelsPush(nls, name, 0);
459 nl = nls->levels + i;
461 else
462 { /* reuse existing slot */
463 nls->n = i + 1;
464 vStringCopy(nl->name, name);
466 nl->indentation = indentation;
467 nl->type = is_class ? K_CLASS : !K_CLASS;
470 /* Return a pointer to the start of the next triple string, or NULL. Store
471 * the kind of triple string in "which" if the return is not NULL.
473 static char const *find_triple_start(char const *string, char const **which)
475 char const *cp = string;
477 for (; *cp; cp++)
479 if (*cp == '#')
480 break;
482 if (*cp == '"' || *cp == '\'')
484 if (strncmp(cp, doubletriple, 3) == 0)
486 *which = doubletriple;
487 return cp;
489 if (strncmp(cp, singletriple, 3) == 0)
491 *which = singletriple;
492 return cp;
494 cp = skipString(cp);
495 if (!*cp) break;
498 return NULL;
501 /* Find the end of a triple string as pointed to by "which", and update "which"
502 * with any other triple strings following in the given string.
504 static void find_triple_end(char const *string, char const **which)
506 char const *s = string;
507 while (1)
509 /* Check if the string ends in the same line. */
510 s = strstr (s, *which);
511 if (!s) break;
512 s += 3;
513 *which = NULL;
514 /* If yes, check if another one starts in the same line. */
515 s = find_triple_start(s, which);
516 if (!s) break;
517 s += 3;
521 static const char *findVariable(const char *line)
523 /* Parse global and class variable names (C.x) from assignment statements.
524 * Object attributes (obj.x) are ignored.
525 * Assignment to a tuple 'x, y = 2, 3' not supported.
526 * TODO: ignore duplicate tags from reassignment statements. */
527 const char *cp, *sp, *eq, *start;
529 cp = strstr(line, "=");
530 if (!cp)
531 return NULL;
532 eq = cp + 1;
533 while (*eq)
535 if (*eq == '=')
536 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
537 if (*eq == '(' || *eq == '#')
538 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
539 eq++;
542 /* go backwards to the start of the line, checking we have valid chars */
543 start = cp - 1;
544 while (start >= line && isspace ((int) *start))
545 --start;
546 while (start >= line && isIdentifierCharacter ((int) *start))
547 --start;
548 if (!isIdentifierFirstCharacter(*(start + 1)))
549 return NULL;
550 sp = start;
551 while (sp >= line && isspace ((int) *sp))
552 --sp;
553 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
554 return NULL;
555 /* the line is valid, parse the variable name */
556 ++start;
557 return start;
560 /* Skip type declaration that optionally follows a cdef/cpdef */
561 static const char *skipTypeDecl (const char *cp, boolean *is_class)
563 const char *lastStart = cp, *ptr = cp;
564 int loopCount = 0;
565 ptr = skipSpace(cp);
566 if (!strncmp("extern", ptr, 6)) {
567 ptr += 6;
568 ptr = skipSpace(ptr);
569 if (!strncmp("from", ptr, 4)) { return NULL; }
571 if (!strncmp("class", ptr, 5)) {
572 ptr += 5 ;
573 *is_class = TRUE;
574 ptr = skipSpace(ptr);
575 return ptr;
577 /* limit so that we don't pick off "int item=obj()" */
578 while (*ptr && loopCount++ < 2) {
579 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
580 if (!*ptr || *ptr == '=') return NULL;
581 if (*ptr == '(') {
582 return lastStart; /* if we stopped on a '(' we are done */
584 ptr = skipSpace(ptr);
585 lastStart = ptr;
586 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
588 return NULL;
591 /* checks if there is a lambda at position of cp, and return its argument list
592 * if so.
593 * We don't return the lambda name since it is useless for now since we already
594 * know it when we call this function, and it would be a little slower. */
595 static boolean varIsLambda (const char *cp, char **arglist)
597 boolean is_lambda = FALSE;
599 cp = skipSpace (cp);
600 cp = skipIdentifier (cp); /* skip the lambda's name */
601 cp = skipSpace (cp);
602 if (*cp == '=')
604 cp++;
605 cp = skipSpace (cp);
606 if (strncmp (cp, "lambda", 6) == 0)
608 const char *tmp;
610 cp += 6; /* skip the lambda */
611 tmp = skipSpace (cp);
612 /* check if there is a space after lambda to detect assignations
613 * starting with 'lambdaXXX' */
614 if (tmp != cp)
616 vString *args = vStringNew ();
618 cp = tmp;
619 vStringPut (args, '(');
620 for (; *cp != 0 && *cp != ':'; cp++)
621 vStringPut (args, *cp);
622 vStringPut (args, ')');
623 vStringTerminate (args);
624 if (arglist)
625 *arglist = strdup (vStringValue (args));
626 vStringDelete (args);
627 is_lambda = TRUE;
631 return is_lambda;
634 static void findPythonTags (void)
636 vString *const continuation = vStringNew ();
637 vString *const name = vStringNew ();
638 vString *const parent = vStringNew();
640 NestingLevels *const nesting_levels = nestingLevelsNew();
642 const char *line;
643 int line_skip = 0;
644 char const *longStringLiteral = NULL;
646 while ((line = (const char *) fileReadLine ()) != NULL)
648 const char *cp = line, *candidate;
649 char const *longstring;
650 char const *keyword, *variable;
651 int indent;
653 cp = skipSpace (cp);
655 if (*cp == '\0') /* skip blank line */
656 continue;
658 /* Skip comment if we are not inside a multi-line string. */
659 if (*cp == '#' && !longStringLiteral)
660 continue;
662 /* Deal with line continuation. */
663 if (!line_skip) vStringClear(continuation);
664 vStringCatS(continuation, line);
665 vStringStripTrailing(continuation);
666 if (vStringLast(continuation) == '\\')
668 vStringChop(continuation);
669 vStringCatS(continuation, " ");
670 line_skip = 1;
671 continue;
673 cp = line = vStringValue(continuation);
674 cp = skipSpace (cp);
675 indent = cp - line;
676 line_skip = 0;
678 /* Deal with multiline string ending. */
679 if (longStringLiteral)
681 find_triple_end(cp, &longStringLiteral);
682 continue;
685 checkParent(nesting_levels, indent, parent);
687 /* Deal with multiline string start. */
688 longstring = find_triple_start(cp, &longStringLiteral);
689 if (longstring)
691 longstring += 3;
692 find_triple_end(longstring, &longStringLiteral);
693 /* We don't parse for any tags in the rest of the line. */
694 continue;
697 /* Deal with def and class keywords. */
698 keyword = findDefinitionOrClass (cp);
699 if (keyword)
701 boolean found = FALSE;
702 boolean is_class = FALSE;
703 if (!strncmp (keyword, "def ", 4))
705 cp = skipSpace (keyword + 3);
706 found = TRUE;
708 else if (!strncmp (keyword, "class ", 6))
710 cp = skipSpace (keyword + 5);
711 found = TRUE;
712 is_class = TRUE;
714 else if (!strncmp (keyword, "cdef ", 5))
716 cp = skipSpace(keyword + 4);
717 candidate = skipTypeDecl (cp, &is_class);
718 if (candidate)
720 found = TRUE;
721 cp = candidate;
725 else if (!strncmp (keyword, "cpdef ", 6))
727 cp = skipSpace(keyword + 5);
728 candidate = skipTypeDecl (cp, &is_class);
729 if (candidate)
731 found = TRUE;
732 cp = candidate;
736 if (found)
738 boolean is_parent_class;
740 is_parent_class =
741 constructParentString(nesting_levels, indent, parent);
743 if (is_class)
744 parseClass (cp, name, parent, is_parent_class);
745 else
746 parseFunction(cp, name, parent, is_parent_class);
748 addNestingLevel(nesting_levels, indent, name, is_class);
751 /* Find global and class variables */
752 variable = findVariable(line);
753 if (variable)
755 const char *start = variable;
756 char *arglist;
757 boolean parent_is_class;
759 vStringClear (name);
760 while (isIdentifierCharacter ((int) *start))
762 vStringPut (name, (int) *start);
763 ++start;
765 vStringTerminate (name);
767 parent_is_class = constructParentString(nesting_levels, indent, parent);
768 if (varIsLambda (variable, &arglist))
770 /* show class members or top-level script lambdas only */
771 if (parent_is_class || vStringLength(parent) == 0)
772 makeFunctionTag (name, parent, parent_is_class, arglist);
773 if (arglist != NULL)
774 eFree (arglist);
776 else
778 /* skip variables in methods */
779 if (! parent_is_class && vStringLength(parent) > 0)
780 continue;
782 makeVariableTag (name, parent);
785 /* Find and parse imports */
786 parseImports(line);
788 /* Clean up all memory we allocated. */
789 vStringDelete (parent);
790 vStringDelete (name);
791 vStringDelete (continuation);
792 nestingLevelsFree (nesting_levels);
795 extern parserDefinition *PythonParser (void)
797 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
798 parserDefinition *def = parserNew ("Python");
799 def->kinds = PythonKinds;
800 def->kindCount = KIND_COUNT (PythonKinds);
801 def->extensions = extensions;
802 def->parser = findPythonTags;
803 return def;
806 /* vi:set tabstop=4 shiftwidth=4: */