4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Python language
15 #include "general.h" /* must always come first */
24 #include "nestlevel.h"
30 K_CLASS
, K_FUNCTION
, K_MEMBER
, K_VARIABLE
, K_IMPORT
33 static kindOption PythonKinds
[] = {
34 {TRUE
, 'c', "class", "classes"},
35 {TRUE
, 'f', "function", "functions"},
36 {TRUE
, 'm', "member", "class members"},
37 {TRUE
, 'v', "variable", "variables"},
38 {TRUE
, 'i', "namespace", "imports"}
41 static char const * const singletriple
= "'''";
42 static char const * const doubletriple
= "\"\"\"";
45 * FUNCTION DEFINITIONS
48 #define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
50 static boolean
isIdentifierFirstCharacter (int c
)
52 return (boolean
) (isalpha (c
) || c
== '_');
55 static boolean
isIdentifierCharacter (int c
)
57 return (boolean
) (isalnum (c
) || c
== '_');
60 static const char *get_class_name_from_parent (const char *parent
)
67 result
= strrchr (parent
, '.');
74 result
= strrchr (parent
, '/');
84 /* Given a string with the contents of a line directly after the "def" keyword,
85 * extract all relevant information and create a tag.
87 static void makeFunctionTag (vString
*const function
,
88 vString
*const parent
, int is_class_parent
, const char *arglist
)
91 initTagEntry (&tag
, vStringValue (function
));
93 tag
.kindName
= "function";
95 tag
.extensionFields
.arglist
= arglist
;
96 /* add argument list of __init__() methods to the class tag */
97 if (strcmp (vStringValue (function
), "__init__") == 0 && parent
!= NULL
)
99 const char *parent_tag_name
= get_class_name_from_parent (vStringValue (parent
));
100 if (parent_tag_name
!= NULL
)
101 setTagArglistByName (parent_tag_name
, arglist
);
104 if (vStringLength (parent
) > 0)
108 tag
.kindName
= "member";
110 tag
.extensionFields
.scope
[0] = "class";
111 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
115 tag
.extensionFields
.scope
[0] = "function";
116 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
120 /* If a function starts with __, we mark it as file scope.
121 * FIXME: What is the proper way to signal such attributes?
122 * TODO: What does functions/classes starting with _ and __ mean in python?
124 if (strncmp (vStringValue (function
), "__", 2) == 0 &&
125 strcmp (vStringValue (function
), "__init__") != 0)
127 tag
.extensionFields
.access
= "private";
128 tag
.isFileScope
= TRUE
;
132 tag
.extensionFields
.access
= "public";
137 /* Given a string with the contents of the line directly after the "class"
138 * keyword, extract all necessary information and create a tag.
140 static void makeClassTag (vString
*const class, vString
*const inheritance
,
141 vString
*const parent
, int is_class_parent
)
144 initTagEntry (&tag
, vStringValue (class));
145 tag
.kindName
= "class";
147 if (vStringLength (parent
) > 0)
151 tag
.extensionFields
.scope
[0] = "class";
152 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
156 tag
.extensionFields
.scope
[0] = "function";
157 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
160 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
164 static void makeVariableTag (vString
*const var
, vString
*const parent
)
167 initTagEntry (&tag
, vStringValue (var
));
168 tag
.kindName
= "variable";
170 if (vStringLength (parent
) > 0)
172 tag
.extensionFields
.scope
[0] = "class";
173 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
178 /* Skip a single or double quoted string. */
179 static const char *skipString (const char *cp
)
181 const char *start
= cp
;
183 for (cp
++; *cp
; cp
++)
187 else if (*cp
== '\\')
189 else if (*cp
== *start
)
195 /* Skip everything up to an identifier start. */
196 static const char *skipEverything (const char *cp
)
200 if (*cp
== '"' || *cp
== '\'' || *cp
== '#')
205 if (isIdentifierFirstCharacter ((int) *cp
))
211 /* Skip an identifier. */
212 static const char *skipIdentifier (const char *cp
)
214 while (isIdentifierCharacter ((int) *cp
))
219 static const char *findDefinitionOrClass (const char *cp
)
223 cp
= skipEverything (cp
);
224 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
225 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
229 cp
= skipIdentifier (cp
);
234 static const char *skipSpace (const char *cp
)
236 while (isspace ((int) *cp
))
241 /* Starting at ''cp'', parse an identifier into ''identifier''. */
242 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
244 vStringClear (identifier
);
245 while (isIdentifierCharacter ((int) *cp
))
247 vStringPut (identifier
, (int) *cp
);
250 vStringTerminate (identifier
);
254 static void parseClass (const char *cp
, vString
*const class,
255 vString
*const parent
, int is_class_parent
)
257 vString
*const inheritance
= vStringNew ();
258 vStringClear (inheritance
);
259 cp
= parseIdentifier (cp
, class);
268 /* Closing parenthesis can be in follow up line. */
269 cp
= (const char *) fileReadLine ();
271 vStringPut (inheritance
, ' ');
274 vStringPut (inheritance
, *cp
);
277 vStringTerminate (inheritance
);
279 makeClassTag (class, inheritance
, parent
, is_class_parent
);
280 vStringDelete (inheritance
);
283 static void parseImports (const char *cp
)
286 vString
*name
, *name_next
;
288 cp
= skipEverything (cp
);
290 if ((pos
= strstr (cp
, "import")) == NULL
)
295 /* continue only if there is some space between the keyword and the identifier */
302 name
= vStringNew ();
303 name_next
= vStringNew ();
305 cp
= skipEverything (cp
);
308 cp
= parseIdentifier (cp
, name
);
310 cp
= skipEverything (cp
);
311 /* we parse the next possible import statement as well to be able to ignore 'foo' in
312 * 'import foo as bar' */
313 parseIdentifier (cp
, name_next
);
315 /* take the current tag only if the next one is not "as" */
316 if (strcmp (vStringValue (name_next
), "as") != 0 &&
317 strcmp (vStringValue (name
), "as") != 0)
319 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
322 vStringDelete (name
);
323 vStringDelete (name_next
);
326 /* modified from get.c getArglistFromStr().
327 * warning: terminates rest of string past arglist!
328 * note: does not ignore brackets inside strings! */
329 static char *parseArglist(const char *buf
)
335 if (NULL
== (start
= strchr(buf
, '(')))
337 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
341 else if ('(' == *end
)
343 else if (')' == *end
)
347 return strdup(start
);
350 static void parseFunction (const char *cp
, vString
*const def
,
351 vString
*const parent
, int is_class_parent
)
355 cp
= parseIdentifier (cp
, def
);
356 arglist
= parseArglist (cp
);
357 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
362 /* Get the combined name of a nested symbol. Classes are separated with ".",
363 * functions with "/". For example this code:
370 * Would produce this string:
371 * MyClass.MyFunction/SubFunction/SubClass.Method
373 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
377 NestingLevel
*prev
= NULL
;
378 int is_class
= FALSE
;
379 vStringClear (result
);
380 for (i
= 0; i
< nls
->n
; i
++)
382 NestingLevel
*nl
= nls
->levels
+ i
;
383 if (indent
<= nl
->indentation
)
387 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
389 if (prev->type == K_CLASS)
390 vStringCatS(result, ".");
392 vStringCatS(result, "/");
395 vStringCat(result
, nl
->name
);
396 is_class
= (nl
->type
== K_CLASS
);
402 /* Check whether parent's indentation level is higher than the current level and
405 static void checkParent(NestingLevels
*nls
, int indent
, vString
*parent
)
410 for (i
= 0; i
< nls
->n
; i
++)
413 /* is there a better way to compare two vStrings? */
414 if (strcmp(vStringValue(parent
), vStringValue(n
->name
)) == 0)
416 if (n
&& indent
<= n
->indentation
)
418 /* remove this level by clearing its name */
419 vStringClear(n
->name
);
426 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
427 const vString
*name
, boolean is_class
)
430 NestingLevel
*nl
= NULL
;
432 for (i
= 0; i
< nls
->n
; i
++)
434 nl
= nls
->levels
+ i
;
435 if (indentation
<= nl
->indentation
) break;
439 nestingLevelsPush(nls
, name
, 0);
440 nl
= nls
->levels
+ i
;
443 { /* reuse existing slot */
445 vStringCopy(nl
->name
, name
);
447 nl
->indentation
= indentation
;
448 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
451 /* Return a pointer to the start of the next triple string, or NULL. Store
452 * the kind of triple string in "which" if the return is not NULL.
454 static char const *find_triple_start(char const *string
, char const **which
)
456 char const *cp
= string
;
460 if (*cp
== '"' || *cp
== '\'')
462 if (strncmp(cp
, doubletriple
, 3) == 0)
464 *which
= doubletriple
;
467 if (strncmp(cp
, singletriple
, 3) == 0)
469 *which
= singletriple
;
479 /* Find the end of a triple string as pointed to by "which", and update "which"
480 * with any other triple strings following in the given string.
482 static void find_triple_end(char const *string
, char const **which
)
484 char const *s
= string
;
487 /* Check if the string ends in the same line. */
488 s
= strstr (s
, *which
);
492 /* If yes, check if another one starts in the same line. */
493 s
= find_triple_start(s
, which
);
499 static const char *findVariable(const char *line
)
501 /* Parse global and class variable names (C.x) from assignment statements.
502 * Object attributes (obj.x) are ignored.
503 * Assignment to a tuple 'x, y = 2, 3' not supported.
504 * TODO: ignore duplicate tags from reassignment statements. */
505 const char *cp
, *sp
, *eq
, *start
;
507 cp
= strstr(line
, "=");
514 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
515 if (*eq
== '(' || *eq
== '#')
516 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
520 /* go backwards to the start of the line, checking we have valid chars */
522 while (start
>= line
&& isspace ((int) *start
))
524 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
526 if (!isIdentifierFirstCharacter(*(start
+ 1)))
529 while (sp
>= line
&& isspace ((int) *sp
))
531 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
533 /* the line is valid, parse the variable name */
538 /* Skip type declaration that optionally follows a cdef/cpdef */
539 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
541 const char *lastStart
= cp
, *ptr
= cp
;
544 if (!strncmp("extern", ptr
, 6)) {
546 ptr
= skipSpace(ptr
);
547 if (!strncmp("from", ptr
, 4)) { return NULL
; }
549 if (!strncmp("class", ptr
, 5)) {
552 ptr
= skipSpace(ptr
);
555 /* limit so that we don't pick off "int item=obj()" */
556 while (*ptr
&& loopCount
++ < 2) {
557 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) ptr
++;
558 if (!*ptr
|| *ptr
== '=') return NULL
;
560 return lastStart
; /* if we stopped on a '(' we are done */
562 ptr
= skipSpace(ptr
);
564 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
569 /* checks if there is a lambda at position of cp, and return its argument list
571 * We don't return the lambda name since it is useless for now since we already
572 * know it when we call this function, and it would be a little slower. */
573 static boolean
varIsLambda (const char *cp
, char **arglist
)
575 boolean is_lambda
= FALSE
;
578 cp
= skipIdentifier (cp
); /* skip the lambda's name */
584 if (strncmp (cp
, "lambda", 6) == 0)
588 cp
+= 6; /* skip the lambda */
589 tmp
= skipSpace (cp
);
590 /* check if there is a space after lambda to detect assignations
591 * starting with 'lambdaXXX' */
594 vString
*args
= vStringNew ();
597 vStringPut (args
, '(');
598 for (; *cp
!= 0 && *cp
!= ':'; cp
++)
599 vStringPut (args
, *cp
);
600 vStringPut (args
, ')');
601 vStringTerminate (args
);
603 *arglist
= strdup (vStringValue (args
));
604 vStringDelete (args
);
612 static void findPythonTags (void)
614 vString
*const continuation
= vStringNew ();
615 vString
*const name
= vStringNew ();
616 vString
*const parent
= vStringNew();
618 NestingLevels
*const nesting_levels
= nestingLevelsNew();
622 char const *longStringLiteral
= NULL
;
624 while ((line
= (const char *) fileReadLine ()) != NULL
)
626 const char *cp
= line
, *candidate
;
627 char const *longstring
;
628 char const *keyword
, *variable
;
633 if (*cp
== '\0') /* skip blank line */
636 /* Skip comment if we are not inside a multi-line string. */
637 if (*cp
== '#' && !longStringLiteral
)
640 /* Deal with line continuation. */
641 if (!line_skip
) vStringClear(continuation
);
642 vStringCatS(continuation
, line
);
643 vStringStripTrailing(continuation
);
644 if (vStringLast(continuation
) == '\\')
646 vStringChop(continuation
);
647 vStringCatS(continuation
, " ");
651 cp
= line
= vStringValue(continuation
);
656 checkParent(nesting_levels
, indent
, parent
);
658 /* Deal with multiline string ending. */
659 if (longStringLiteral
)
661 find_triple_end(cp
, &longStringLiteral
);
665 /* Deal with multiline string start. */
666 longstring
= find_triple_start(cp
, &longStringLiteral
);
670 find_triple_end(longstring
, &longStringLiteral
);
671 /* We don't parse for any tags in the rest of the line. */
675 /* Deal with def and class keywords. */
676 keyword
= findDefinitionOrClass (cp
);
679 boolean found
= FALSE
;
680 boolean is_class
= FALSE
;
681 if (!strncmp (keyword
, "def ", 4))
683 cp
= skipSpace (keyword
+ 3);
686 else if (!strncmp (keyword
, "class ", 6))
688 cp
= skipSpace (keyword
+ 5);
692 else if (!strncmp (keyword
, "cdef ", 5))
694 cp
= skipSpace(keyword
+ 4);
695 candidate
= skipTypeDecl (cp
, &is_class
);
703 else if (!strncmp (keyword
, "cpdef ", 6))
705 cp
= skipSpace(keyword
+ 5);
706 candidate
= skipTypeDecl (cp
, &is_class
);
716 boolean is_parent_class
;
719 constructParentString(nesting_levels
, indent
, parent
);
722 parseClass (cp
, name
, parent
, is_parent_class
);
724 parseFunction(cp
, name
, parent
, is_parent_class
);
726 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
729 /* Find global and class variables */
730 variable
= findVariable(line
);
733 const char *start
= variable
;
735 boolean parent_is_class
;
738 while (isIdentifierCharacter ((int) *start
))
740 vStringPut (name
, (int) *start
);
743 vStringTerminate (name
);
745 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
746 if (varIsLambda (variable
, &arglist
))
748 /* show class members or top-level script lambdas only */
749 if (parent_is_class
|| vStringLength(parent
) == 0)
750 makeFunctionTag (name
, parent
, parent_is_class
, arglist
);
756 /* skip variables in methods */
757 if (! parent_is_class
&& vStringLength(parent
) > 0)
760 makeVariableTag (name
, parent
);
763 /* Find and parse imports */
766 /* Clean up all memory we allocated. */
767 vStringDelete (parent
);
768 vStringDelete (name
);
769 vStringDelete (continuation
);
770 nestingLevelsFree (nesting_levels
);
773 extern parserDefinition
*PythonParser (void)
775 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
776 parserDefinition
*def
= parserNew ("Python");
777 def
->kinds
= PythonKinds
;
778 def
->kindCount
= KIND_COUNT (PythonKinds
);
779 def
->extensions
= extensions
;
780 def
->parser
= findPythonTags
;
784 /* vi:set tabstop=4 shiftwidth=4: */