2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
13 #include "general.h" /* must always come first */
22 #include "nestlevel.h"
28 K_CLASS
, K_FUNCTION
, K_METHOD
, K_VARIABLE
, K_IMPORT
31 static kindOption PythonKinds
[] = {
32 {TRUE
, 'c', "class", "classes"},
33 {TRUE
, 'f', "function", "functions"},
34 {TRUE
, 'm', "method", "class methods"},
35 {TRUE
, 'v', "variable", "variables"},
36 /* defined as externvar to get those excluded as forward type in symbols.c:goto_tag()
37 * so we can jump to the real implementation (if known) instead of to the import statement */
38 {TRUE
, 'x', "externvar", "imports"}
42 A_PUBLIC
, A_PRIVATE
, A_PROTECTED
45 static const char *const PythonAccesses
[] = {
46 "public", "private", "protected"
49 static char const * const singletriple
= "'''";
50 static char const * const doubletriple
= "\"\"\"";
53 * FUNCTION DEFINITIONS
56 static boolean
isIdentifierFirstCharacter (int c
)
58 return (boolean
) (isalpha (c
) || c
== '_');
61 static boolean
isIdentifierCharacter (int c
)
63 return (boolean
) (isalnum (c
) || c
== '_');
66 static const char *get_class_name_from_parent (const char *parent
)
73 result
= strrchr (parent
, '.');
80 result
= strrchr (parent
, '/');
89 /* follows PEP-8, and always reports single-underscores as protected
91 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
92 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
94 static pythonAccess
accessFromIdentifier (const vString
*const ident
)
96 const char *const p
= vStringValue (ident
);
97 const size_t len
= vStringLength (ident
);
99 /* not starting with "_", public */
100 if (len
< 1 || p
[0] != '_')
102 /* "__...__": magic methods */
103 else if (len
> 3 && p
[1] == '_' && p
[len
- 2] == '_' && p
[len
- 1] == '_')
105 /* "__...": name mangling */
106 else if (len
> 1 && p
[1] == '_')
108 /* "_...": suggested as non-public, but easily accessible */
113 /* Given a string with the contents of a line directly after the "def" keyword,
114 * extract all relevant information and create a tag.
116 static void makeFunctionTag (vString
*const function
,
117 vString
*const parent
, int is_class_parent
, const char *arglist
)
121 initTagEntry (&tag
, vStringValue (function
));
123 tag
.kindName
= PythonKinds
[K_FUNCTION
].name
;
124 tag
.kind
= PythonKinds
[K_FUNCTION
].letter
;
125 tag
.extensionFields
.arglist
= arglist
;
126 /* add argument list of __init__() methods to the class tag */
127 if (strcmp (vStringValue (function
), "__init__") == 0 && parent
!= NULL
)
129 const char *parent_tag_name
= get_class_name_from_parent (vStringValue (parent
));
130 if (parent_tag_name
!= NULL
)
131 setTagArglistByName (parent_tag_name
, arglist
);
134 if (vStringLength (parent
) > 0)
138 tag
.kindName
= PythonKinds
[K_METHOD
].name
;
139 tag
.kind
= PythonKinds
[K_METHOD
].letter
;
140 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
141 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
145 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
146 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
150 access
= accessFromIdentifier (function
);
151 tag
.extensionFields
.access
= PythonAccesses
[access
];
152 /* FIXME: should we really set isFileScope in addition to access? */
153 if (access
== A_PRIVATE
)
154 tag
.isFileScope
= TRUE
;
159 /* Given a string with the contents of the line directly after the "class"
160 * keyword, extract all necessary information and create a tag.
162 static void makeClassTag (vString
*const class, vString
*const inheritance
,
163 vString
*const parent
, int is_class_parent
)
166 initTagEntry (&tag
, vStringValue (class));
167 tag
.kindName
= PythonKinds
[K_CLASS
].name
;
168 tag
.kind
= PythonKinds
[K_CLASS
].letter
;
169 if (vStringLength (parent
) > 0)
173 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
174 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
178 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
179 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
182 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
186 static void makeVariableTag (vString
*const var
, vString
*const parent
)
189 initTagEntry (&tag
, vStringValue (var
));
190 tag
.kindName
= PythonKinds
[K_VARIABLE
].name
;
191 tag
.kind
= PythonKinds
[K_VARIABLE
].letter
;
192 if (vStringLength (parent
) > 0)
194 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
195 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
200 /* Skip a single or double quoted string. */
201 static const char *skipString (const char *cp
)
203 const char *start
= cp
;
205 for (cp
++; *cp
; cp
++)
209 else if (*cp
== '\\')
211 else if (*cp
== *start
)
217 /* Skip everything up to an identifier start. */
218 static const char *skipEverything (const char *cp
)
224 if (*cp
== '"' || *cp
== '\'' || *cp
== '#')
227 /* these checks find unicode, binary (Python 3) and raw strings */
229 !strncasecmp(cp
, "u'", 2) || !strncasecmp(cp
, "u\"", 2) ||
230 !strncasecmp(cp
, "r'", 2) || !strncasecmp(cp
, "r\"", 2) ||
231 !strncasecmp(cp
, "b'", 2) || !strncasecmp(cp
, "b\"", 2)))
237 !strncasecmp(cp
, "ur'", 3) || !strncasecmp(cp
, "ur\"", 3) ||
238 !strncasecmp(cp
, "br'", 3) || !strncasecmp(cp
, "br\"", 3)))
248 if (isIdentifierFirstCharacter ((int) *cp
))
254 /* Skip an identifier. */
255 static const char *skipIdentifier (const char *cp
)
257 while (isIdentifierCharacter ((int) *cp
))
262 static const char *findDefinitionOrClass (const char *cp
)
266 cp
= skipEverything (cp
);
267 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
268 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
272 cp
= skipIdentifier (cp
);
277 static const char *skipSpace (const char *cp
)
279 while (isspace ((int) *cp
))
284 /* Starting at ''cp'', parse an identifier into ''identifier''. */
285 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
287 vStringClear (identifier
);
288 while (isIdentifierCharacter ((int) *cp
))
290 vStringPut (identifier
, (int) *cp
);
293 vStringTerminate (identifier
);
297 static void parseClass (const char *cp
, vString
*const class,
298 vString
*const parent
, int is_class_parent
)
300 vString
*const inheritance
= vStringNew ();
301 vStringClear (inheritance
);
302 cp
= parseIdentifier (cp
, class);
311 /* Closing parenthesis can be in follow up line. */
312 cp
= (const char *) fileReadLine ();
314 vStringPut (inheritance
, ' ');
317 vStringPut (inheritance
, *cp
);
320 vStringTerminate (inheritance
);
322 makeClassTag (class, inheritance
, parent
, is_class_parent
);
323 vStringDelete (inheritance
);
326 static void parseImports (const char *cp
)
329 vString
*name
, *name_next
;
331 cp
= skipEverything (cp
);
333 if ((pos
= strstr (cp
, "import")) == NULL
)
338 /* continue only if there is some space between the keyword and the identifier */
345 name
= vStringNew ();
346 name_next
= vStringNew ();
348 cp
= skipEverything (cp
);
351 cp
= parseIdentifier (cp
, name
);
353 cp
= skipEverything (cp
);
354 /* we parse the next possible import statement as well to be able to ignore 'foo' in
355 * 'import foo as bar' */
356 parseIdentifier (cp
, name_next
);
358 /* take the current tag only if the next one is not "as" */
359 if (strcmp (vStringValue (name_next
), "as") != 0 &&
360 strcmp (vStringValue (name
), "as") != 0)
362 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
365 vStringDelete (name
);
366 vStringDelete (name_next
);
369 /* modified from get.c getArglistFromStr().
370 * warning: terminates rest of string past arglist!
371 * note: does not ignore brackets inside strings! */
372 static char *parseArglist(const char *buf
)
378 if (NULL
== (start
= strchr(buf
, '(')))
380 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
384 else if ('(' == *end
)
386 else if (')' == *end
)
390 return strdup(start
);
393 static void parseFunction (const char *cp
, vString
*const def
,
394 vString
*const parent
, int is_class_parent
)
398 cp
= parseIdentifier (cp
, def
);
399 arglist
= parseArglist (cp
);
400 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
405 /* Get the combined name of a nested symbol. Classes are separated with ".",
406 * functions with "/". For example this code:
413 * Would produce this string:
414 * MyClass.MyFunction/SubFunction/SubClass.Method
416 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
420 NestingLevel
*prev
= NULL
;
421 int is_class
= FALSE
;
422 vStringClear (result
);
423 for (i
= 0; i
< nls
->n
; i
++)
425 NestingLevel
*nl
= nls
->levels
+ i
;
426 if (indent
<= nl
->indentation
)
430 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
432 if (prev->type == K_CLASS)
433 vStringCatS(result, ".");
435 vStringCatS(result, "/");
438 vStringCat(result
, nl
->name
);
439 is_class
= (nl
->type
== K_CLASS
);
445 /* Check whether parent's indentation level is higher than the current level and
448 static void checkParent(NestingLevels
*nls
, int indent
, vString
*parent
)
453 for (i
= 0; i
< nls
->n
; i
++)
456 /* is there a better way to compare two vStrings? */
457 if (n
&& strcmp(vStringValue(parent
), vStringValue(n
->name
)) == 0)
459 if (indent
<= n
->indentation
)
461 /* remove this level by clearing its name */
462 vStringClear(n
->name
);
469 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
470 const vString
*name
, boolean is_class
)
473 NestingLevel
*nl
= NULL
;
475 for (i
= 0; i
< nls
->n
; i
++)
477 nl
= nls
->levels
+ i
;
478 if (indentation
<= nl
->indentation
) break;
482 nestingLevelsPush(nls
, name
, 0);
483 nl
= nls
->levels
+ i
;
486 { /* reuse existing slot */
488 vStringCopy(nl
->name
, name
);
490 nl
->indentation
= indentation
;
491 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
494 /* Return a pointer to the start of the next triple string, or NULL. Store
495 * the kind of triple string in "which" if the return is not NULL.
497 static char const *find_triple_start(char const *string
, char const **which
)
499 char const *cp
= string
;
506 if (*cp
== '"' || *cp
== '\'')
508 if (strncmp(cp
, doubletriple
, 3) == 0)
510 *which
= doubletriple
;
513 if (strncmp(cp
, singletriple
, 3) == 0)
515 *which
= singletriple
;
525 /* Find the end of a triple string as pointed to by "which", and update "which"
526 * with any other triple strings following in the given string.
528 static void find_triple_end(char const *string
, char const **which
)
530 char const *s
= string
;
533 /* Check if the string ends in the same line. */
534 s
= strstr (s
, *which
);
538 /* If yes, check if another one starts in the same line. */
539 s
= find_triple_start(s
, which
);
545 static const char *findVariable(const char *line
)
547 /* Parse global and class variable names (C.x) from assignment statements.
548 * Object attributes (obj.x) are ignored.
549 * Assignment to a tuple 'x, y = 2, 3' not supported.
550 * TODO: ignore duplicate tags from reassignment statements. */
551 const char *cp
, *sp
, *eq
, *start
;
553 cp
= strstr(line
, "=");
560 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
561 if (*eq
== '(' || *eq
== '#')
562 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
566 /* go backwards to the start of the line, checking we have valid chars */
568 while (start
>= line
&& isspace ((int) *start
))
570 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
572 if (!isIdentifierFirstCharacter(*(start
+ 1)))
575 while (sp
>= line
&& isspace ((int) *sp
))
577 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
579 /* the line is valid, parse the variable name */
584 /* Skip type declaration that optionally follows a cdef/cpdef */
585 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
587 const char *lastStart
= cp
, *ptr
= cp
;
590 if (!strncmp("extern", ptr
, 6)) {
592 ptr
= skipSpace(ptr
);
593 if (!strncmp("from", ptr
, 4)) { return NULL
; }
595 if (!strncmp("class", ptr
, 5)) {
598 ptr
= skipSpace(ptr
);
601 /* limit so that we don't pick off "int item=obj()" */
602 while (*ptr
&& loopCount
++ < 2) {
603 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) {
604 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
606 while(*ptr
&& *ptr
!= ']') ptr
++;
610 if (!*ptr
|| *ptr
== '=') return NULL
;
612 return lastStart
; /* if we stopped on a '(' we are done */
614 ptr
= skipSpace(ptr
);
616 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
621 /* checks if there is a lambda at position of cp, and return its argument list
623 * We don't return the lambda name since it is useless for now since we already
624 * know it when we call this function, and it would be a little slower. */
625 static boolean
varIsLambda (const char *cp
, char **arglist
)
627 boolean is_lambda
= FALSE
;
630 cp
= skipIdentifier (cp
); /* skip the lambda's name */
636 if (strncmp (cp
, "lambda", 6) == 0)
640 cp
+= 6; /* skip the lambda */
641 tmp
= skipSpace (cp
);
642 /* check if there is a space after lambda to detect assignations
643 * starting with 'lambdaXXX' */
646 vString
*args
= vStringNew ();
649 vStringPut (args
, '(');
650 for (; *cp
!= 0 && *cp
!= ':'; cp
++)
651 vStringPut (args
, *cp
);
652 vStringPut (args
, ')');
653 vStringTerminate (args
);
655 *arglist
= strdup (vStringValue (args
));
656 vStringDelete (args
);
664 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
665 * the position of the next non-whitespace after the keyword */
666 static boolean
matchKeyword (const char *keyword
, const char *cp
, const char **cp_n
)
668 size_t kw_len
= strlen (keyword
);
669 if (strncmp (cp
, keyword
, kw_len
) == 0 && isspace (cp
[kw_len
]))
671 *cp_n
= skipSpace (&cp
[kw_len
+ 1]);
677 static void findPythonTags (void)
679 vString
*const continuation
= vStringNew ();
680 vString
*const name
= vStringNew ();
681 vString
*const parent
= vStringNew();
683 NestingLevels
*const nesting_levels
= nestingLevelsNew();
687 char const *longStringLiteral
= NULL
;
689 while ((line
= (const char *) fileReadLine ()) != NULL
)
691 const char *cp
= line
, *candidate
;
692 char const *longstring
;
693 char const *keyword
, *variable
;
698 if (*cp
== '\0') /* skip blank line */
701 /* Skip comment if we are not inside a multi-line string. */
702 if (*cp
== '#' && !longStringLiteral
)
705 /* Deal with line continuation. */
706 if (!line_skip
) vStringClear(continuation
);
707 vStringCatS(continuation
, line
);
708 vStringStripTrailing(continuation
);
709 if (vStringLast(continuation
) == '\\')
711 vStringChop(continuation
);
712 vStringCatS(continuation
, " ");
716 cp
= line
= vStringValue(continuation
);
721 /* Deal with multiline string ending. */
722 if (longStringLiteral
)
724 find_triple_end(cp
, &longStringLiteral
);
728 checkParent(nesting_levels
, indent
, parent
);
730 /* Find global and class variables */
731 variable
= findVariable(line
);
734 const char *start
= variable
;
736 boolean parent_is_class
;
739 while (isIdentifierCharacter ((int) *start
))
741 vStringPut (name
, (int) *start
);
744 vStringTerminate (name
);
746 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
747 if (varIsLambda (variable
, &arglist
))
749 /* show class members or top-level script lambdas only */
750 if (parent_is_class
|| vStringLength(parent
) == 0)
751 makeFunctionTag (name
, parent
, parent_is_class
, arglist
);
756 /* skip variables in methods */
757 if (parent_is_class
|| vStringLength(parent
) == 0)
758 makeVariableTag (name
, parent
);
762 /* Deal with multiline string start. */
763 longstring
= find_triple_start(cp
, &longStringLiteral
);
767 find_triple_end(longstring
, &longStringLiteral
);
768 /* We don't parse for any tags in the rest of the line. */
772 /* Deal with def and class keywords. */
773 keyword
= findDefinitionOrClass (cp
);
776 boolean found
= FALSE
;
777 boolean is_class
= FALSE
;
778 if (matchKeyword ("def", keyword
, &cp
))
782 else if (matchKeyword ("class", keyword
, &cp
))
787 else if (matchKeyword ("cdef", keyword
, &cp
))
789 candidate
= skipTypeDecl (cp
, &is_class
);
797 else if (matchKeyword ("cpdef", keyword
, &cp
))
799 candidate
= skipTypeDecl (cp
, &is_class
);
809 boolean is_parent_class
;
812 constructParentString(nesting_levels
, indent
, parent
);
815 parseClass (cp
, name
, parent
, is_parent_class
);
817 parseFunction(cp
, name
, parent
, is_parent_class
);
819 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
822 /* Find and parse imports */
825 /* Clean up all memory we allocated. */
826 vStringDelete (parent
);
827 vStringDelete (name
);
828 vStringDelete (continuation
);
829 nestingLevelsFree (nesting_levels
);
832 extern parserDefinition
*PythonParser (void)
834 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
835 parserDefinition
*def
= parserNew ("Python");
836 def
->kinds
= PythonKinds
;
837 def
->kindCount
= KIND_COUNT (PythonKinds
);
838 def
->extensions
= extensions
;
839 def
->parser
= findPythonTags
;
843 /* vi:set tabstop=4 shiftwidth=4: */