2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
13 #include "general.h" /* must always come first */
22 #include "nestlevel.h"
28 K_CLASS
, K_FUNCTION
, K_METHOD
, K_VARIABLE
, K_IMPORT
31 static kindOption PythonKinds
[] = {
32 {TRUE
, 'c', "class", "classes"},
33 {TRUE
, 'f', "function", "functions"},
34 {TRUE
, 'm', "member", "class members"},
35 {TRUE
, 'v', "variable", "variables"},
36 {TRUE
, 'x', "unknown", "name referring a classe/variable/function/module defined in other module"}
40 A_PUBLIC
, A_PRIVATE
, A_PROTECTED
43 static const char *const PythonAccesses
[] = {
44 "public", "private", "protected"
47 static char const * const singletriple
= "'''";
48 static char const * const doubletriple
= "\"\"\"";
51 * FUNCTION DEFINITIONS
54 static boolean
isIdentifierFirstCharacter (int c
)
56 return (boolean
) (isalpha (c
) || c
== '_');
59 static boolean
isIdentifierCharacter (int c
)
61 return (boolean
) (isalnum (c
) || c
== '_');
64 /* follows PEP-8, and always reports single-underscores as protected
66 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
67 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
69 static pythonAccess
accessFromIdentifier (const vString
*const ident
,
70 pythonKind kind
, boolean has_parent
, boolean parent_is_class
)
72 const char *const p
= vStringValue (ident
);
73 const size_t len
= vStringLength (ident
);
75 /* inside a function/method, private */
76 if (has_parent
&& !parent_is_class
)
78 /* not starting with "_", public */
79 else if (len
< 1 || p
[0] != '_')
81 /* "__...__": magic methods */
82 else if (kind
== K_METHOD
&& parent_is_class
&&
83 len
> 3 && p
[1] == '_' && p
[len
- 2] == '_' && p
[len
- 1] == '_')
85 /* "__...": name mangling */
86 else if (parent_is_class
&& len
> 1 && p
[1] == '_')
88 /* "_...": suggested as non-public, but easily accessible */
93 static void addAccessFields (tagEntryInfo
*const entry
,
94 const vString
*const ident
, pythonKind kind
,
95 boolean has_parent
, boolean parent_is_class
)
99 access
= accessFromIdentifier (ident
, kind
, has_parent
, parent_is_class
);
100 entry
->extensionFields
.access
= PythonAccesses
[access
];
101 /* FIXME: should we really set isFileScope in addition to access? */
102 if (access
== A_PRIVATE
)
103 entry
->isFileScope
= TRUE
;
106 /* Given a string with the contents of a line directly after the "def" keyword,
107 * extract all relevant information and create a tag.
109 static void makeFunctionTag (vString
*const function
,
110 vString
*const parent
, int is_class_parent
, const char *arglist
)
113 initTagEntry (&tag
, vStringValue (function
));
115 tag
.kindName
= PythonKinds
[K_FUNCTION
].name
;
116 tag
.kind
= PythonKinds
[K_FUNCTION
].letter
;
117 tag
.extensionFields
.signature
= arglist
;
119 if (vStringLength (parent
) > 0)
123 tag
.kindName
= PythonKinds
[K_METHOD
].name
;
124 tag
.kind
= PythonKinds
[K_METHOD
].letter
;
125 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
126 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
130 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
131 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
135 addAccessFields (&tag
, function
, is_class_parent
? K_METHOD
: K_FUNCTION
,
136 vStringLength (parent
) > 0, is_class_parent
);
141 /* Given a string with the contents of the line directly after the "class"
142 * keyword, extract all necessary information and create a tag.
144 static void makeClassTag (vString
*const class, vString
*const inheritance
,
145 vString
*const parent
, int is_class_parent
)
148 initTagEntry (&tag
, vStringValue (class));
149 tag
.kindName
= PythonKinds
[K_CLASS
].name
;
150 tag
.kind
= PythonKinds
[K_CLASS
].letter
;
151 if (vStringLength (parent
) > 0)
155 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
156 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
160 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
161 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
164 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
165 addAccessFields (&tag
, class, K_CLASS
, vStringLength (parent
) > 0,
170 static void makeVariableTag (vString
*const var
, vString
*const parent
,
171 boolean is_class_parent
)
174 initTagEntry (&tag
, vStringValue (var
));
175 tag
.kindName
= PythonKinds
[K_VARIABLE
].name
;
176 tag
.kind
= PythonKinds
[K_VARIABLE
].letter
;
177 if (vStringLength (parent
) > 0)
179 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
180 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
182 addAccessFields (&tag
, var
, K_VARIABLE
, vStringLength (parent
) > 0,
187 /* Skip a single or double quoted string. */
188 static const char *skipString (const char *cp
)
190 const char *start
= cp
;
192 for (cp
++; *cp
; cp
++)
196 else if (*cp
== '\\')
198 else if (*cp
== *start
)
204 /* Skip everything up to an identifier start. */
205 static const char *skipEverything (const char *cp
)
211 return strchr(cp
, '\0');
214 if (*cp
== '"' || *cp
== '\'')
217 /* these checks find unicode, binary (Python 3) and raw strings */
220 boolean r_first
= (*cp
== 'r' || *cp
== 'R');
222 /* "r" | "R" | "u" | "U" | "b" | "B" */
223 if (r_first
|| *cp
== 'u' || *cp
== 'U' || *cp
== 'b' || *cp
== 'B')
227 /* r_first -> "rb" | "rB" | "Rb" | "RB"
228 !r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */
229 if (( r_first
&& (cp
[i
] == 'b' || cp
[i
] == 'B')) ||
230 (!r_first
&& (cp
[i
] == 'r' || cp
[i
] == 'R')))
233 if (cp
[i
] == '\'' || cp
[i
] == '"')
245 if (isIdentifierFirstCharacter ((int) *cp
))
248 cp
--; /* avoid jumping over the character after a skipped string */
253 /* Skip an identifier. */
254 static const char *skipIdentifier (const char *cp
)
256 while (isIdentifierCharacter ((int) *cp
))
261 static const char *findDefinitionOrClass (const char *cp
)
265 cp
= skipEverything (cp
);
266 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
267 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
271 cp
= skipIdentifier (cp
);
276 static const char *skipSpace (const char *cp
)
278 while (isspace ((int) *cp
))
283 /* Starting at ''cp'', parse an identifier into ''identifier''. */
284 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
286 vStringClear (identifier
);
287 while (isIdentifierCharacter ((int) *cp
))
289 vStringPut (identifier
, (int) *cp
);
292 vStringTerminate (identifier
);
296 static void parseClass (const char *cp
, vString
*const class,
297 vString
*const parent
, int is_class_parent
)
299 vString
*const inheritance
= vStringNew ();
300 vStringClear (inheritance
);
301 cp
= parseIdentifier (cp
, class);
310 /* Closing parenthesis can be in follow up line. */
311 cp
= (const char *) fileReadLine ();
313 vStringPut (inheritance
, ' ');
316 vStringPut (inheritance
, *cp
);
319 vStringTerminate (inheritance
);
321 makeClassTag (class, inheritance
, parent
, is_class_parent
);
322 vStringDelete (inheritance
);
325 static void parseImports (const char *cp
)
328 vString
*name
, *name_next
;
330 cp
= skipEverything (cp
);
332 if ((pos
= strstr (cp
, "import")) == NULL
)
337 /* continue only if there is some space between the keyword and the identifier */
344 name
= vStringNew ();
345 name_next
= vStringNew ();
347 cp
= skipEverything (cp
);
350 cp
= parseIdentifier (cp
, name
);
352 cp
= skipEverything (cp
);
353 /* we parse the next possible import statement as well to be able to ignore 'foo' in
354 * 'import foo as bar' */
355 parseIdentifier (cp
, name_next
);
357 /* take the current tag only if the next one is not "as" */
358 if (strcmp (vStringValue (name_next
), "as") != 0 &&
359 strcmp (vStringValue (name
), "as") != 0)
361 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
364 vStringDelete (name
);
365 vStringDelete (name_next
);
368 /* modified from get.c getArglistFromStr().
369 * warning: terminates rest of string past arglist!
370 * note: does not ignore brackets inside strings! */
371 static char *parseArglist(const char *buf
)
377 if (NULL
== (start
= strchr(buf
, '(')))
379 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
383 else if ('(' == *end
)
385 else if (')' == *end
)
389 return strdup(start
);
392 static void parseFunction (const char *cp
, vString
*const def
,
393 vString
*const parent
, int is_class_parent
)
397 cp
= parseIdentifier (cp
, def
);
398 arglist
= parseArglist (cp
);
399 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
404 /* Get the combined name of a nested symbol. Classes are separated with ".",
405 * functions with "/". For example this code:
412 * Would produce this string:
413 * MyClass.MyFunction/SubFunction/SubClass.Method
415 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
419 NestingLevel
*prev
= NULL
;
420 int is_class
= FALSE
;
421 vStringClear (result
);
422 for (i
= 0; i
< nls
->n
; i
++)
424 NestingLevel
*nl
= nls
->levels
+ i
;
425 if (indent
<= nl
->indentation
)
429 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
431 if (prev->type == K_CLASS)
432 vStringCatS(result, ".");
434 vStringCatS(result, "/");
437 vStringCat(result
, nl
->name
);
438 is_class
= (nl
->type
== K_CLASS
);
444 /* Check indentation level and truncate nesting levels accordingly */
445 static void checkIndent(NestingLevels
*nls
, int indent
)
450 for (i
= 0; i
< nls
->n
; i
++)
453 if (n
&& indent
<= n
->indentation
)
455 /* truncate levels */
462 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
463 const vString
*name
, boolean is_class
)
466 NestingLevel
*nl
= NULL
;
468 for (i
= 0; i
< nls
->n
; i
++)
470 nl
= nls
->levels
+ i
;
471 if (indentation
<= nl
->indentation
) break;
475 nestingLevelsPush(nls
, name
, 0);
476 nl
= nls
->levels
+ i
;
479 { /* reuse existing slot */
481 vStringCopy(nl
->name
, name
);
483 nl
->indentation
= indentation
;
484 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
487 /* Return a pointer to the start of the next triple string, or NULL. Store
488 * the kind of triple string in "which" if the return is not NULL.
490 static char const *find_triple_start(char const *string
, char const **which
)
492 char const *cp
= string
;
499 if (*cp
== '"' || *cp
== '\'')
501 if (strncmp(cp
, doubletriple
, 3) == 0)
503 *which
= doubletriple
;
506 if (strncmp(cp
, singletriple
, 3) == 0)
508 *which
= singletriple
;
513 cp
--; /* avoid jumping over the character after a skipped string */
519 /* Find the end of a triple string as pointed to by "which", and update "which"
520 * with any other triple strings following in the given string.
522 static void find_triple_end(char const *string
, char const **which
)
524 char const *s
= string
;
527 /* Check if the string ends in the same line. */
528 s
= strstr (s
, *which
);
532 /* If yes, check if another one starts in the same line. */
533 s
= find_triple_start(s
, which
);
539 static const char *findVariable(const char *line
)
541 /* Parse global and class variable names (C.x) from assignment statements.
542 * Object attributes (obj.x) are ignored.
543 * Assignment to a tuple 'x, y = 2, 3' not supported.
544 * TODO: ignore duplicate tags from reassignment statements. */
545 const char *cp
, *sp
, *eq
, *start
;
547 cp
= strstr(line
, "=");
554 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
555 if (*eq
== '(' || *eq
== '#')
556 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
560 /* go backwards to the start of the line, checking we have valid chars */
562 while (start
>= line
&& isspace ((int) *start
))
564 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
566 if (!isIdentifierFirstCharacter(*(start
+ 1)))
569 while (sp
>= line
&& isspace ((int) *sp
))
571 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
573 /* the line is valid, parse the variable name */
578 /* Skip type declaration that optionally follows a cdef/cpdef */
579 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
581 const char *lastStart
= cp
, *ptr
= cp
;
584 if (!strncmp("extern", ptr
, 6)) {
586 ptr
= skipSpace(ptr
);
587 if (!strncmp("from", ptr
, 4)) { return NULL
; }
589 if (!strncmp("class", ptr
, 5)) {
592 ptr
= skipSpace(ptr
);
595 /* limit so that we don't pick off "int item=obj()" */
596 while (*ptr
&& loopCount
++ < 2) {
597 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) {
598 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
600 while (*ptr
&& *ptr
!= ']') ptr
++;
606 if (!*ptr
|| *ptr
== '=') return NULL
;
608 return lastStart
; /* if we stopped on a '(' we are done */
610 ptr
= skipSpace(ptr
);
612 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
617 /* checks if there is a lambda at position of cp, and return its argument list
619 * We don't return the lambda name since it is useless for now since we already
620 * know it when we call this function, and it would be a little slower. */
621 static boolean
varIsLambda (const char *cp
, char **arglist
)
623 boolean is_lambda
= FALSE
;
626 cp
= skipIdentifier (cp
); /* skip the lambda's name */
632 if (strncmp (cp
, "lambda", 6) == 0)
636 cp
+= 6; /* skip the lambda */
637 tmp
= skipSpace (cp
);
638 /* check if there is a space after lambda to detect assignations
639 * starting with 'lambdaXXX' */
642 vString
*args
= vStringNew ();
645 vStringPut (args
, '(');
646 for (; *cp
!= 0 && *cp
!= ':'; cp
++)
647 vStringPut (args
, *cp
);
648 vStringPut (args
, ')');
649 vStringTerminate (args
);
651 *arglist
= strdup (vStringValue (args
));
652 vStringDelete (args
);
660 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
661 * the position of the next non-whitespace after the keyword */
662 static boolean
matchKeyword (const char *keyword
, const char *cp
, const char **cp_n
)
664 size_t kw_len
= strlen (keyword
);
665 if (strncmp (cp
, keyword
, kw_len
) == 0 && isspace (cp
[kw_len
]))
667 *cp_n
= skipSpace (&cp
[kw_len
+ 1]);
673 static void findPythonTags (void)
675 vString
*const continuation
= vStringNew ();
676 vString
*const name
= vStringNew ();
677 vString
*const parent
= vStringNew();
679 NestingLevels
*const nesting_levels
= nestingLevelsNew();
683 char const *longStringLiteral
= NULL
;
685 while ((line
= (const char *) fileReadLine ()) != NULL
)
687 const char *cp
= line
, *candidate
;
688 char const *longstring
;
689 char const *keyword
, *variable
;
694 if (*cp
== '\0') /* skip blank line */
697 /* Skip comment if we are not inside a multi-line string. */
698 if (*cp
== '#' && !longStringLiteral
)
701 /* Deal with line continuation. */
702 if (!line_skip
) vStringClear(continuation
);
703 vStringCatS(continuation
, line
);
704 vStringStripTrailing(continuation
);
705 if (vStringLast(continuation
) == '\\')
707 vStringChop(continuation
);
708 vStringCatS(continuation
, " ");
712 cp
= line
= vStringValue(continuation
);
717 /* Deal with multiline string ending. */
718 if (longStringLiteral
)
720 find_triple_end(cp
, &longStringLiteral
);
724 checkIndent(nesting_levels
, indent
);
726 /* Find global and class variables */
727 variable
= findVariable(line
);
730 const char *start
= variable
;
732 boolean parent_is_class
;
735 while (isIdentifierCharacter ((int) *start
))
737 vStringPut (name
, (int) *start
);
740 vStringTerminate (name
);
742 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
743 if (varIsLambda (variable
, &arglist
))
745 /* show class members or top-level script lambdas only */
746 if (parent_is_class
|| vStringLength(parent
) == 0)
747 makeFunctionTag (name
, parent
, parent_is_class
, arglist
);
752 /* skip variables in methods */
753 if (parent_is_class
|| vStringLength(parent
) == 0)
754 makeVariableTag (name
, parent
, parent_is_class
);
758 /* Deal with multiline string start. */
759 longstring
= find_triple_start(cp
, &longStringLiteral
);
763 find_triple_end(longstring
, &longStringLiteral
);
764 /* We don't parse for any tags in the rest of the line. */
768 /* Deal with def and class keywords. */
769 keyword
= findDefinitionOrClass (cp
);
772 boolean found
= FALSE
;
773 boolean is_class
= FALSE
;
774 if (matchKeyword ("def", keyword
, &cp
))
778 else if (matchKeyword ("class", keyword
, &cp
))
783 else if (matchKeyword ("cdef", keyword
, &cp
))
785 candidate
= skipTypeDecl (cp
, &is_class
);
793 else if (matchKeyword ("cpdef", keyword
, &cp
))
795 candidate
= skipTypeDecl (cp
, &is_class
);
805 boolean is_parent_class
;
808 constructParentString(nesting_levels
, indent
, parent
);
811 parseClass (cp
, name
, parent
, is_parent_class
);
813 parseFunction(cp
, name
, parent
, is_parent_class
);
815 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
818 /* Find and parse imports */
821 /* Clean up all memory we allocated. */
822 vStringDelete (parent
);
823 vStringDelete (name
);
824 vStringDelete (continuation
);
825 nestingLevelsFree (nesting_levels
);
828 extern parserDefinition
*PythonParser (void)
830 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
831 parserDefinition
*def
= parserNew ("Python");
832 def
->kinds
= PythonKinds
;
833 def
->kindCount
= KIND_COUNT (PythonKinds
);
834 def
->extensions
= extensions
;
835 def
->parser
= findPythonTags
;
839 /* vi:set tabstop=4 shiftwidth=4: */