2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Python language
13 #include "general.h" /* must always come first */
22 #include "nestlevel.h"
28 K_CLASS
, K_FUNCTION
, K_MEMBER
, K_VARIABLE
, K_IMPORT
31 static kindOption PythonKinds
[] = {
32 {TRUE
, 'c', "class", "classes"},
33 {TRUE
, 'f', "function", "functions"},
34 {TRUE
, 'm', "member", "class members"},
35 {TRUE
, 'v', "variable", "variables"},
36 {TRUE
, 'i', "namespace", "imports"}
39 static char const * const singletriple
= "'''";
40 static char const * const doubletriple
= "\"\"\"";
43 * FUNCTION DEFINITIONS
46 #define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
48 static boolean
isIdentifierFirstCharacter (int c
)
50 return (boolean
) (isalpha (c
) || c
== '_');
53 static boolean
isIdentifierCharacter (int c
)
55 return (boolean
) (isalnum (c
) || c
== '_');
58 static const char *get_class_name_from_parent (const char *parent
)
65 result
= strrchr (parent
, '.');
72 result
= strrchr (parent
, '/');
82 /* Given a string with the contents of a line directly after the "def" keyword,
83 * extract all relevant information and create a tag.
85 static void makeFunctionTag (vString
*const function
,
86 vString
*const parent
, int is_class_parent
, const char *arglist
)
89 initTagEntry (&tag
, vStringValue (function
));
91 tag
.kindName
= "function";
93 tag
.extensionFields
.arglist
= arglist
;
94 /* add argument list of __init__() methods to the class tag */
95 if (strcmp (vStringValue (function
), "__init__") == 0 && parent
!= NULL
)
97 const char *parent_tag_name
= get_class_name_from_parent (vStringValue (parent
));
98 if (parent_tag_name
!= NULL
)
99 setTagArglistByName (parent_tag_name
, arglist
);
102 if (vStringLength (parent
) > 0)
106 tag
.kindName
= "member";
108 tag
.extensionFields
.scope
[0] = "class";
109 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
113 tag
.extensionFields
.scope
[0] = "function";
114 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
118 /* If a function starts with __, we mark it as file scope.
119 * FIXME: What is the proper way to signal such attributes?
120 * TODO: What does functions/classes starting with _ and __ mean in python?
122 if (strncmp (vStringValue (function
), "__", 2) == 0 &&
123 strcmp (vStringValue (function
), "__init__") != 0)
125 tag
.extensionFields
.access
= "private";
126 tag
.isFileScope
= TRUE
;
130 tag
.extensionFields
.access
= "public";
135 /* Given a string with the contents of the line directly after the "class"
136 * keyword, extract all necessary information and create a tag.
138 static void makeClassTag (vString
*const class, vString
*const inheritance
,
139 vString
*const parent
, int is_class_parent
)
142 initTagEntry (&tag
, vStringValue (class));
143 tag
.kindName
= "class";
145 if (vStringLength (parent
) > 0)
149 tag
.extensionFields
.scope
[0] = "class";
150 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
154 tag
.extensionFields
.scope
[0] = "function";
155 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
158 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
162 static void makeVariableTag (vString
*const var
, vString
*const parent
)
165 initTagEntry (&tag
, vStringValue (var
));
166 tag
.kindName
= "variable";
168 if (vStringLength (parent
) > 0)
170 tag
.extensionFields
.scope
[0] = "class";
171 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
176 /* Skip a single or double quoted string. */
177 static const char *skipString (const char *cp
)
179 const char *start
= cp
;
181 for (cp
++; *cp
; cp
++)
185 else if (*cp
== '\\')
187 else if (*cp
== *start
)
193 /* Skip everything up to an identifier start. */
194 static const char *skipEverything (const char *cp
)
200 if (*cp
== '"' || *cp
== '\'' || *cp
== '#')
203 /* these checks find unicode, binary (Python 3) and raw strings */
205 !strncasecmp(cp
, "u'", 2) || !strncasecmp(cp
, "u\"", 2) ||
206 !strncasecmp(cp
, "r'", 2) || !strncasecmp(cp
, "r\"", 2) ||
207 !strncasecmp(cp
, "b'", 2) || !strncasecmp(cp
, "b\"", 2)))
213 !strncasecmp(cp
, "ur'", 3) || !strncasecmp(cp
, "ur\"", 3) ||
214 !strncasecmp(cp
, "br'", 3) || !strncasecmp(cp
, "br\"", 3)))
224 if (isIdentifierFirstCharacter ((int) *cp
))
230 /* Skip an identifier. */
231 static const char *skipIdentifier (const char *cp
)
233 while (isIdentifierCharacter ((int) *cp
))
238 static const char *findDefinitionOrClass (const char *cp
)
242 cp
= skipEverything (cp
);
243 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
244 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
248 cp
= skipIdentifier (cp
);
253 static const char *skipSpace (const char *cp
)
255 while (isspace ((int) *cp
))
260 /* Starting at ''cp'', parse an identifier into ''identifier''. */
261 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
263 vStringClear (identifier
);
264 while (isIdentifierCharacter ((int) *cp
))
266 vStringPut (identifier
, (int) *cp
);
269 vStringTerminate (identifier
);
273 static void parseClass (const char *cp
, vString
*const class,
274 vString
*const parent
, int is_class_parent
)
276 vString
*const inheritance
= vStringNew ();
277 vStringClear (inheritance
);
278 cp
= parseIdentifier (cp
, class);
287 /* Closing parenthesis can be in follow up line. */
288 cp
= (const char *) fileReadLine ();
290 vStringPut (inheritance
, ' ');
293 vStringPut (inheritance
, *cp
);
296 vStringTerminate (inheritance
);
298 makeClassTag (class, inheritance
, parent
, is_class_parent
);
299 vStringDelete (inheritance
);
302 static void parseImports (const char *cp
)
305 vString
*name
, *name_next
;
307 cp
= skipEverything (cp
);
309 if ((pos
= strstr (cp
, "import")) == NULL
)
314 /* continue only if there is some space between the keyword and the identifier */
321 name
= vStringNew ();
322 name_next
= vStringNew ();
324 cp
= skipEverything (cp
);
327 cp
= parseIdentifier (cp
, name
);
329 cp
= skipEverything (cp
);
330 /* we parse the next possible import statement as well to be able to ignore 'foo' in
331 * 'import foo as bar' */
332 parseIdentifier (cp
, name_next
);
334 /* take the current tag only if the next one is not "as" */
335 if (strcmp (vStringValue (name_next
), "as") != 0 &&
336 strcmp (vStringValue (name
), "as") != 0)
338 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
341 vStringDelete (name
);
342 vStringDelete (name_next
);
345 /* modified from get.c getArglistFromStr().
346 * warning: terminates rest of string past arglist!
347 * note: does not ignore brackets inside strings! */
348 static char *parseArglist(const char *buf
)
354 if (NULL
== (start
= strchr(buf
, '(')))
356 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
360 else if ('(' == *end
)
362 else if (')' == *end
)
366 return strdup(start
);
369 static void parseFunction (const char *cp
, vString
*const def
,
370 vString
*const parent
, int is_class_parent
)
374 cp
= parseIdentifier (cp
, def
);
375 arglist
= parseArglist (cp
);
376 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
381 /* Get the combined name of a nested symbol. Classes are separated with ".",
382 * functions with "/". For example this code:
389 * Would produce this string:
390 * MyClass.MyFunction/SubFunction/SubClass.Method
392 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
396 NestingLevel
*prev
= NULL
;
397 int is_class
= FALSE
;
398 vStringClear (result
);
399 for (i
= 0; i
< nls
->n
; i
++)
401 NestingLevel
*nl
= nls
->levels
+ i
;
402 if (indent
<= nl
->indentation
)
406 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
408 if (prev->type == K_CLASS)
409 vStringCatS(result, ".");
411 vStringCatS(result, "/");
414 vStringCat(result
, nl
->name
);
415 is_class
= (nl
->type
== K_CLASS
);
421 /* Check whether parent's indentation level is higher than the current level and
424 static void checkParent(NestingLevels
*nls
, int indent
, vString
*parent
)
429 for (i
= 0; i
< nls
->n
; i
++)
432 /* is there a better way to compare two vStrings? */
433 if (n
&& strcmp(vStringValue(parent
), vStringValue(n
->name
)) == 0)
435 if (indent
<= n
->indentation
)
437 /* remove this level by clearing its name */
438 vStringClear(n
->name
);
445 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
446 const vString
*name
, boolean is_class
)
449 NestingLevel
*nl
= NULL
;
451 for (i
= 0; i
< nls
->n
; i
++)
453 nl
= nls
->levels
+ i
;
454 if (indentation
<= nl
->indentation
) break;
458 nestingLevelsPush(nls
, name
, 0);
459 nl
= nls
->levels
+ i
;
462 { /* reuse existing slot */
464 vStringCopy(nl
->name
, name
);
466 nl
->indentation
= indentation
;
467 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
470 /* Return a pointer to the start of the next triple string, or NULL. Store
471 * the kind of triple string in "which" if the return is not NULL.
473 static char const *find_triple_start(char const *string
, char const **which
)
475 char const *cp
= string
;
482 if (*cp
== '"' || *cp
== '\'')
484 if (strncmp(cp
, doubletriple
, 3) == 0)
486 *which
= doubletriple
;
489 if (strncmp(cp
, singletriple
, 3) == 0)
491 *which
= singletriple
;
501 /* Find the end of a triple string as pointed to by "which", and update "which"
502 * with any other triple strings following in the given string.
504 static void find_triple_end(char const *string
, char const **which
)
506 char const *s
= string
;
509 /* Check if the string ends in the same line. */
510 s
= strstr (s
, *which
);
514 /* If yes, check if another one starts in the same line. */
515 s
= find_triple_start(s
, which
);
521 static const char *findVariable(const char *line
)
523 /* Parse global and class variable names (C.x) from assignment statements.
524 * Object attributes (obj.x) are ignored.
525 * Assignment to a tuple 'x, y = 2, 3' not supported.
526 * TODO: ignore duplicate tags from reassignment statements. */
527 const char *cp
, *sp
, *eq
, *start
;
529 cp
= strstr(line
, "=");
536 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
537 if (*eq
== '(' || *eq
== '#')
538 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
542 /* go backwards to the start of the line, checking we have valid chars */
544 while (start
>= line
&& isspace ((int) *start
))
546 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
548 if (!isIdentifierFirstCharacter(*(start
+ 1)))
551 while (sp
>= line
&& isspace ((int) *sp
))
553 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
555 /* the line is valid, parse the variable name */
560 /* Skip type declaration that optionally follows a cdef/cpdef */
561 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
563 const char *lastStart
= cp
, *ptr
= cp
;
566 if (!strncmp("extern", ptr
, 6)) {
568 ptr
= skipSpace(ptr
);
569 if (!strncmp("from", ptr
, 4)) { return NULL
; }
571 if (!strncmp("class", ptr
, 5)) {
574 ptr
= skipSpace(ptr
);
577 /* limit so that we don't pick off "int item=obj()" */
578 while (*ptr
&& loopCount
++ < 2) {
579 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) ptr
++;
580 if (!*ptr
|| *ptr
== '=') return NULL
;
582 return lastStart
; /* if we stopped on a '(' we are done */
584 ptr
= skipSpace(ptr
);
586 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
591 /* checks if there is a lambda at position of cp, and return its argument list
593 * We don't return the lambda name since it is useless for now since we already
594 * know it when we call this function, and it would be a little slower. */
595 static boolean
varIsLambda (const char *cp
, char **arglist
)
597 boolean is_lambda
= FALSE
;
600 cp
= skipIdentifier (cp
); /* skip the lambda's name */
606 if (strncmp (cp
, "lambda", 6) == 0)
610 cp
+= 6; /* skip the lambda */
611 tmp
= skipSpace (cp
);
612 /* check if there is a space after lambda to detect assignations
613 * starting with 'lambdaXXX' */
616 vString
*args
= vStringNew ();
619 vStringPut (args
, '(');
620 for (; *cp
!= 0 && *cp
!= ':'; cp
++)
621 vStringPut (args
, *cp
);
622 vStringPut (args
, ')');
623 vStringTerminate (args
);
625 *arglist
= strdup (vStringValue (args
));
626 vStringDelete (args
);
634 static void findPythonTags (void)
636 vString
*const continuation
= vStringNew ();
637 vString
*const name
= vStringNew ();
638 vString
*const parent
= vStringNew();
640 NestingLevels
*const nesting_levels
= nestingLevelsNew();
644 char const *longStringLiteral
= NULL
;
646 while ((line
= (const char *) fileReadLine ()) != NULL
)
648 const char *cp
= line
, *candidate
;
649 char const *longstring
;
650 char const *keyword
, *variable
;
655 if (*cp
== '\0') /* skip blank line */
658 /* Skip comment if we are not inside a multi-line string. */
659 if (*cp
== '#' && !longStringLiteral
)
662 /* Deal with line continuation. */
663 if (!line_skip
) vStringClear(continuation
);
664 vStringCatS(continuation
, line
);
665 vStringStripTrailing(continuation
);
666 if (vStringLast(continuation
) == '\\')
668 vStringChop(continuation
);
669 vStringCatS(continuation
, " ");
673 cp
= line
= vStringValue(continuation
);
678 /* Deal with multiline string ending. */
679 if (longStringLiteral
)
681 find_triple_end(cp
, &longStringLiteral
);
685 checkParent(nesting_levels
, indent
, parent
);
687 /* Deal with multiline string start. */
688 longstring
= find_triple_start(cp
, &longStringLiteral
);
692 find_triple_end(longstring
, &longStringLiteral
);
693 /* We don't parse for any tags in the rest of the line. */
697 /* Deal with def and class keywords. */
698 keyword
= findDefinitionOrClass (cp
);
701 boolean found
= FALSE
;
702 boolean is_class
= FALSE
;
703 if (!strncmp (keyword
, "def ", 4))
705 cp
= skipSpace (keyword
+ 3);
708 else if (!strncmp (keyword
, "class ", 6))
710 cp
= skipSpace (keyword
+ 5);
714 else if (!strncmp (keyword
, "cdef ", 5))
716 cp
= skipSpace(keyword
+ 4);
717 candidate
= skipTypeDecl (cp
, &is_class
);
725 else if (!strncmp (keyword
, "cpdef ", 6))
727 cp
= skipSpace(keyword
+ 5);
728 candidate
= skipTypeDecl (cp
, &is_class
);
738 boolean is_parent_class
;
741 constructParentString(nesting_levels
, indent
, parent
);
744 parseClass (cp
, name
, parent
, is_parent_class
);
746 parseFunction(cp
, name
, parent
, is_parent_class
);
748 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
751 /* Find global and class variables */
752 variable
= findVariable(line
);
755 const char *start
= variable
;
757 boolean parent_is_class
;
760 while (isIdentifierCharacter ((int) *start
))
762 vStringPut (name
, (int) *start
);
765 vStringTerminate (name
);
767 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
768 if (varIsLambda (variable
, &arglist
))
770 /* show class members or top-level script lambdas only */
771 if (parent_is_class
|| vStringLength(parent
) == 0)
772 makeFunctionTag (name
, parent
, parent_is_class
, arglist
);
778 /* skip variables in methods */
779 if (! parent_is_class
&& vStringLength(parent
) > 0)
782 makeVariableTag (name
, parent
);
785 /* Find and parse imports */
788 /* Clean up all memory we allocated. */
789 vStringDelete (parent
);
790 vStringDelete (name
);
791 vStringDelete (continuation
);
792 nestingLevelsFree (nesting_levels
);
795 extern parserDefinition
*PythonParser (void)
797 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
798 parserDefinition
*def
= parserNew ("Python");
799 def
->kinds
= PythonKinds
;
800 def
->kindCount
= KIND_COUNT (PythonKinds
);
801 def
->extensions
= extensions
;
802 def
->parser
= findPythonTags
;
806 /* vi:set tabstop=4 shiftwidth=4: */