2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
13 #include "general.h" /* must always come first */
22 #include "nestlevel.h"
28 K_CLASS
, K_FUNCTION
, K_METHOD
, K_VARIABLE
, K_IMPORT
31 static kindOption PythonKinds
[] = {
32 {TRUE
, 'c', "class", "classes"},
33 {TRUE
, 'f', "function", "functions"},
34 {TRUE
, 'm', "method", "class methods"},
35 {TRUE
, 'v', "variable", "variables"},
36 /* defined as externvar to get those excluded as forward type in symbols.c:goto_tag()
37 * so we can jump to the real implementation (if known) instead of to the import statement */
38 {TRUE
, 'x', "externvar", "imports"}
42 A_PUBLIC
, A_PRIVATE
, A_PROTECTED
45 static const char *const PythonAccesses
[] = {
46 "public", "private", "protected"
49 static char const * const singletriple
= "'''";
50 static char const * const doubletriple
= "\"\"\"";
53 * FUNCTION DEFINITIONS
56 static boolean
isIdentifierFirstCharacter (int c
)
58 return (boolean
) (isalpha (c
) || c
== '_');
61 static boolean
isIdentifierCharacter (int c
)
63 return (boolean
) (isalnum (c
) || c
== '_');
66 static const char *get_class_name_from_parent (const char *parent
)
73 result
= strrchr (parent
, '.');
80 result
= strrchr (parent
, '/');
89 /* follows PEP-8, and always reports single-underscores as protected
91 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
92 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
94 static pythonAccess
accessFromIdentifier (const vString
*const ident
,
95 pythonKind kind
, boolean has_parent
, boolean parent_is_class
)
97 const char *const p
= vStringValue (ident
);
98 const size_t len
= vStringLength (ident
);
100 /* inside a function/method, private */
101 if (has_parent
&& !parent_is_class
)
103 /* not starting with "_", public */
104 else if (len
< 1 || p
[0] != '_')
106 /* "__...__": magic methods */
107 else if (kind
== K_METHOD
&& parent_is_class
&&
108 len
> 3 && p
[1] == '_' && p
[len
- 2] == '_' && p
[len
- 1] == '_')
110 /* "__...": name mangling */
111 else if (parent_is_class
&& len
> 1 && p
[1] == '_')
113 /* "_...": suggested as non-public, but easily accessible */
118 static void addAccessFields (tagEntryInfo
*const entry
,
119 const vString
*const ident
, pythonKind kind
,
120 boolean has_parent
, boolean parent_is_class
)
124 access
= accessFromIdentifier (ident
, kind
, has_parent
, parent_is_class
);
125 entry
->extensionFields
.access
= PythonAccesses
[access
];
126 /* FIXME: should we really set isFileScope in addition to access? */
127 if (access
== A_PRIVATE
)
128 entry
->isFileScope
= TRUE
;
131 /* Given a string with the contents of a line directly after the "def" keyword,
132 * extract all relevant information and create a tag.
134 static void makeFunctionTag (vString
*const function
,
135 vString
*const parent
, int is_class_parent
, const char *arglist
)
138 initTagEntry (&tag
, vStringValue (function
));
140 tag
.kindName
= PythonKinds
[K_FUNCTION
].name
;
141 tag
.kind
= PythonKinds
[K_FUNCTION
].letter
;
142 tag
.extensionFields
.arglist
= arglist
;
143 /* add argument list of __init__() methods to the class tag */
144 if (strcmp (vStringValue (function
), "__init__") == 0 && parent
!= NULL
)
146 const char *parent_tag_name
= get_class_name_from_parent (vStringValue (parent
));
147 if (parent_tag_name
!= NULL
)
148 setTagArglistByName (parent_tag_name
, arglist
);
151 if (vStringLength (parent
) > 0)
155 tag
.kindName
= PythonKinds
[K_METHOD
].name
;
156 tag
.kind
= PythonKinds
[K_METHOD
].letter
;
157 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
158 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
162 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
163 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
167 addAccessFields (&tag
, function
, is_class_parent
? K_METHOD
: K_FUNCTION
,
168 vStringLength (parent
) > 0, is_class_parent
);
173 /* Given a string with the contents of the line directly after the "class"
174 * keyword, extract all necessary information and create a tag.
176 static void makeClassTag (vString
*const class, vString
*const inheritance
,
177 vString
*const parent
, int is_class_parent
)
180 initTagEntry (&tag
, vStringValue (class));
181 tag
.kindName
= PythonKinds
[K_CLASS
].name
;
182 tag
.kind
= PythonKinds
[K_CLASS
].letter
;
183 if (vStringLength (parent
) > 0)
187 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
188 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
192 tag
.extensionFields
.scope
[0] = PythonKinds
[K_FUNCTION
].name
;
193 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
196 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
197 addAccessFields (&tag
, class, K_CLASS
, vStringLength (parent
) > 0,
202 static void makeVariableTag (vString
*const var
, vString
*const parent
,
203 boolean is_class_parent
)
206 initTagEntry (&tag
, vStringValue (var
));
207 tag
.kindName
= PythonKinds
[K_VARIABLE
].name
;
208 tag
.kind
= PythonKinds
[K_VARIABLE
].letter
;
209 if (vStringLength (parent
) > 0)
211 tag
.extensionFields
.scope
[0] = PythonKinds
[K_CLASS
].name
;
212 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
214 addAccessFields (&tag
, var
, K_VARIABLE
, vStringLength (parent
) > 0,
219 /* Skip a single or double quoted string. */
220 static const char *skipString (const char *cp
)
222 const char *start
= cp
;
224 for (cp
++; *cp
; cp
++)
228 else if (*cp
== '\\')
230 else if (*cp
== *start
)
236 /* Skip everything up to an identifier start. */
237 static const char *skipEverything (const char *cp
)
243 if (*cp
== '"' || *cp
== '\'' || *cp
== '#')
246 /* these checks find unicode, binary (Python 3) and raw strings */
248 !strncasecmp(cp
, "u'", 2) || !strncasecmp(cp
, "u\"", 2) ||
249 !strncasecmp(cp
, "r'", 2) || !strncasecmp(cp
, "r\"", 2) ||
250 !strncasecmp(cp
, "b'", 2) || !strncasecmp(cp
, "b\"", 2)))
256 !strncasecmp(cp
, "ur'", 3) || !strncasecmp(cp
, "ur\"", 3) ||
257 !strncasecmp(cp
, "br'", 3) || !strncasecmp(cp
, "br\"", 3)))
267 if (isIdentifierFirstCharacter ((int) *cp
))
270 cp
--; /* avoid jumping over the character after a skipped string */
275 /* Skip an identifier. */
276 static const char *skipIdentifier (const char *cp
)
278 while (isIdentifierCharacter ((int) *cp
))
283 static const char *findDefinitionOrClass (const char *cp
)
287 cp
= skipEverything (cp
);
288 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
289 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
293 cp
= skipIdentifier (cp
);
298 static const char *skipSpace (const char *cp
)
300 while (isspace ((int) *cp
))
305 /* Starting at ''cp'', parse an identifier into ''identifier''. */
306 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
308 vStringClear (identifier
);
309 while (isIdentifierCharacter ((int) *cp
))
311 vStringPut (identifier
, (int) *cp
);
314 vStringTerminate (identifier
);
318 static void parseClass (const char *cp
, vString
*const class,
319 vString
*const parent
, int is_class_parent
)
321 vString
*const inheritance
= vStringNew ();
322 vStringClear (inheritance
);
323 cp
= parseIdentifier (cp
, class);
332 /* Closing parenthesis can be in follow up line. */
333 cp
= (const char *) fileReadLine ();
335 vStringPut (inheritance
, ' ');
338 vStringPut (inheritance
, *cp
);
341 vStringTerminate (inheritance
);
343 makeClassTag (class, inheritance
, parent
, is_class_parent
);
344 vStringDelete (inheritance
);
347 static void parseImports (const char *cp
)
350 vString
*name
, *name_next
;
352 cp
= skipEverything (cp
);
354 if ((pos
= strstr (cp
, "import")) == NULL
)
359 /* continue only if there is some space between the keyword and the identifier */
366 name
= vStringNew ();
367 name_next
= vStringNew ();
369 cp
= skipEverything (cp
);
372 cp
= parseIdentifier (cp
, name
);
374 cp
= skipEverything (cp
);
375 /* we parse the next possible import statement as well to be able to ignore 'foo' in
376 * 'import foo as bar' */
377 parseIdentifier (cp
, name_next
);
379 /* take the current tag only if the next one is not "as" */
380 if (strcmp (vStringValue (name_next
), "as") != 0 &&
381 strcmp (vStringValue (name
), "as") != 0)
383 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
386 vStringDelete (name
);
387 vStringDelete (name_next
);
390 /* modified from get.c getArglistFromStr().
391 * warning: terminates rest of string past arglist!
392 * note: does not ignore brackets inside strings! */
393 static char *parseArglist(const char *buf
)
399 if (NULL
== (start
= strchr(buf
, '(')))
401 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
405 else if ('(' == *end
)
407 else if (')' == *end
)
411 return strdup(start
);
414 static void parseFunction (const char *cp
, vString
*const def
,
415 vString
*const parent
, int is_class_parent
)
419 cp
= parseIdentifier (cp
, def
);
420 arglist
= parseArglist (cp
);
421 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
426 /* Get the combined name of a nested symbol. Classes are separated with ".",
427 * functions with "/". For example this code:
434 * Would produce this string:
435 * MyClass.MyFunction/SubFunction/SubClass.Method
437 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
441 NestingLevel
*prev
= NULL
;
442 int is_class
= FALSE
;
443 vStringClear (result
);
444 for (i
= 0; i
< nls
->n
; i
++)
446 NestingLevel
*nl
= nls
->levels
+ i
;
447 if (indent
<= nl
->indentation
)
451 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
453 if (prev->type == K_CLASS)
454 vStringCatS(result, ".");
456 vStringCatS(result, "/");
459 vStringCat(result
, nl
->name
);
460 is_class
= (nl
->type
== K_CLASS
);
466 /* Check whether parent's indentation level is higher than the current level and
469 static void checkParent(NestingLevels
*nls
, int indent
, vString
*parent
)
474 for (i
= 0; i
< nls
->n
; i
++)
477 /* is there a better way to compare two vStrings? */
478 if (n
&& strcmp(vStringValue(parent
), vStringValue(n
->name
)) == 0)
480 if (indent
<= n
->indentation
)
482 /* remove this level by clearing its name */
483 vStringClear(n
->name
);
490 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
491 const vString
*name
, boolean is_class
)
494 NestingLevel
*nl
= NULL
;
496 for (i
= 0; i
< nls
->n
; i
++)
498 nl
= nls
->levels
+ i
;
499 if (indentation
<= nl
->indentation
) break;
503 nestingLevelsPush(nls
, name
, 0);
504 nl
= nls
->levels
+ i
;
507 { /* reuse existing slot */
509 vStringCopy(nl
->name
, name
);
511 nl
->indentation
= indentation
;
512 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
515 /* Return a pointer to the start of the next triple string, or NULL. Store
516 * the kind of triple string in "which" if the return is not NULL.
518 static char const *find_triple_start(char const *string
, char const **which
)
520 char const *cp
= string
;
527 if (*cp
== '"' || *cp
== '\'')
529 if (strncmp(cp
, doubletriple
, 3) == 0)
531 *which
= doubletriple
;
534 if (strncmp(cp
, singletriple
, 3) == 0)
536 *which
= singletriple
;
541 cp
--; /* avoid jumping over the character after a skipped string */
547 /* Find the end of a triple string as pointed to by "which", and update "which"
548 * with any other triple strings following in the given string.
550 static void find_triple_end(char const *string
, char const **which
)
552 char const *s
= string
;
555 /* Check if the string ends in the same line. */
556 s
= strstr (s
, *which
);
560 /* If yes, check if another one starts in the same line. */
561 s
= find_triple_start(s
, which
);
567 static const char *findVariable(const char *line
)
569 /* Parse global and class variable names (C.x) from assignment statements.
570 * Object attributes (obj.x) are ignored.
571 * Assignment to a tuple 'x, y = 2, 3' not supported.
572 * TODO: ignore duplicate tags from reassignment statements. */
573 const char *cp
, *sp
, *eq
, *start
;
575 cp
= strstr(line
, "=");
582 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
583 if (*eq
== '(' || *eq
== '#')
584 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
588 /* go backwards to the start of the line, checking we have valid chars */
590 while (start
>= line
&& isspace ((int) *start
))
592 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
594 if (!isIdentifierFirstCharacter(*(start
+ 1)))
597 while (sp
>= line
&& isspace ((int) *sp
))
599 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
601 /* the line is valid, parse the variable name */
606 /* Skip type declaration that optionally follows a cdef/cpdef */
607 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
609 const char *lastStart
= cp
, *ptr
= cp
;
612 if (!strncmp("extern", ptr
, 6)) {
614 ptr
= skipSpace(ptr
);
615 if (!strncmp("from", ptr
, 4)) { return NULL
; }
617 if (!strncmp("class", ptr
, 5)) {
620 ptr
= skipSpace(ptr
);
623 /* limit so that we don't pick off "int item=obj()" */
624 while (*ptr
&& loopCount
++ < 2) {
625 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) {
626 /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
628 while (*ptr
&& *ptr
!= ']') ptr
++;
634 if (!*ptr
|| *ptr
== '=') return NULL
;
636 return lastStart
; /* if we stopped on a '(' we are done */
638 ptr
= skipSpace(ptr
);
640 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
645 /* checks if there is a lambda at position of cp, and return its argument list
647 * We don't return the lambda name since it is useless for now since we already
648 * know it when we call this function, and it would be a little slower. */
649 static boolean
varIsLambda (const char *cp
, char **arglist
)
651 boolean is_lambda
= FALSE
;
654 cp
= skipIdentifier (cp
); /* skip the lambda's name */
660 if (strncmp (cp
, "lambda", 6) == 0)
664 cp
+= 6; /* skip the lambda */
665 tmp
= skipSpace (cp
);
666 /* check if there is a space after lambda to detect assignations
667 * starting with 'lambdaXXX' */
670 vString
*args
= vStringNew ();
673 vStringPut (args
, '(');
674 for (; *cp
!= 0 && *cp
!= ':'; cp
++)
675 vStringPut (args
, *cp
);
676 vStringPut (args
, ')');
677 vStringTerminate (args
);
679 *arglist
= strdup (vStringValue (args
));
680 vStringDelete (args
);
688 /* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
689 * the position of the next non-whitespace after the keyword */
690 static boolean
matchKeyword (const char *keyword
, const char *cp
, const char **cp_n
)
692 size_t kw_len
= strlen (keyword
);
693 if (strncmp (cp
, keyword
, kw_len
) == 0 && isspace (cp
[kw_len
]))
695 *cp_n
= skipSpace (&cp
[kw_len
+ 1]);
701 static void findPythonTags (void)
703 vString
*const continuation
= vStringNew ();
704 vString
*const name
= vStringNew ();
705 vString
*const parent
= vStringNew();
707 NestingLevels
*const nesting_levels
= nestingLevelsNew();
711 char const *longStringLiteral
= NULL
;
713 while ((line
= (const char *) fileReadLine ()) != NULL
)
715 const char *cp
= line
, *candidate
;
716 char const *longstring
;
717 char const *keyword
, *variable
;
722 if (*cp
== '\0') /* skip blank line */
725 /* Skip comment if we are not inside a multi-line string. */
726 if (*cp
== '#' && !longStringLiteral
)
729 /* Deal with line continuation. */
730 if (!line_skip
) vStringClear(continuation
);
731 vStringCatS(continuation
, line
);
732 vStringStripTrailing(continuation
);
733 if (vStringLast(continuation
) == '\\')
735 vStringChop(continuation
);
736 vStringCatS(continuation
, " ");
740 cp
= line
= vStringValue(continuation
);
745 /* Deal with multiline string ending. */
746 if (longStringLiteral
)
748 find_triple_end(cp
, &longStringLiteral
);
752 checkParent(nesting_levels
, indent
, parent
);
754 /* Find global and class variables */
755 variable
= findVariable(line
);
758 const char *start
= variable
;
760 boolean parent_is_class
;
763 while (isIdentifierCharacter ((int) *start
))
765 vStringPut (name
, (int) *start
);
768 vStringTerminate (name
);
770 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
771 if (varIsLambda (variable
, &arglist
))
773 /* show class members or top-level script lambdas only */
774 if (parent_is_class
|| vStringLength(parent
) == 0)
775 makeFunctionTag (name
, parent
, parent_is_class
, arglist
);
780 /* skip variables in methods */
781 if (parent_is_class
|| vStringLength(parent
) == 0)
782 makeVariableTag (name
, parent
, parent_is_class
);
786 /* Deal with multiline string start. */
787 longstring
= find_triple_start(cp
, &longStringLiteral
);
791 find_triple_end(longstring
, &longStringLiteral
);
792 /* We don't parse for any tags in the rest of the line. */
796 /* Deal with def and class keywords. */
797 keyword
= findDefinitionOrClass (cp
);
800 boolean found
= FALSE
;
801 boolean is_class
= FALSE
;
802 if (matchKeyword ("def", keyword
, &cp
))
806 else if (matchKeyword ("class", keyword
, &cp
))
811 else if (matchKeyword ("cdef", keyword
, &cp
))
813 candidate
= skipTypeDecl (cp
, &is_class
);
821 else if (matchKeyword ("cpdef", keyword
, &cp
))
823 candidate
= skipTypeDecl (cp
, &is_class
);
833 boolean is_parent_class
;
836 constructParentString(nesting_levels
, indent
, parent
);
839 parseClass (cp
, name
, parent
, is_parent_class
);
841 parseFunction(cp
, name
, parent
, is_parent_class
);
843 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
846 /* Find and parse imports */
849 /* Clean up all memory we allocated. */
850 vStringDelete (parent
);
851 vStringDelete (name
);
852 vStringDelete (continuation
);
853 nestingLevelsFree (nesting_levels
);
856 extern parserDefinition
*PythonParser (void)
858 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
859 parserDefinition
*def
= parserNew ("Python");
860 def
->kinds
= PythonKinds
;
861 def
->kindCount
= KIND_COUNT (PythonKinds
);
862 def
->extensions
= extensions
;
863 def
->parser
= findPythonTags
;
867 /* vi:set tabstop=4 shiftwidth=4: */