Handle template expressions that may use the << or >> operators
[arduino-ctags.git] / python.c
bloba90d072b3acf217512c737be0e037cf5c919b1bb
1 /*
2 * $Id: python.c 752 2010-02-27 17:52:46Z elliotth $
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Python language
10 * files.
13 * INCLUDE FILES
15 #include "general.h" /* must always come first */
17 #include <string.h>
19 #include "entry.h"
20 #include "options.h"
21 #include "read.h"
22 #include "main.h"
23 #include "vstring.h"
24 #include "routines.h"
25 #include "debug.h"
28 * DATA DECLARATIONS
30 typedef struct NestingLevel NestingLevel;
31 typedef struct NestingLevels NestingLevels;
33 struct NestingLevel
35 int indentation;
36 vString *name;
37 int type;
40 struct NestingLevels
42 NestingLevel *levels;
43 int n; /* number of levels in use */
44 int allocated;
47 typedef enum {
48 K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT
49 } pythonKind;
52 * DATA DEFINITIONS
54 static kindOption PythonKinds[] = {
55 {TRUE, 'c', "class", "classes"},
56 {TRUE, 'f', "function", "functions"},
57 {TRUE, 'm', "member", "class members"},
58 {TRUE, 'v', "variable", "variables"},
59 {TRUE, 'i', "namespace", "imports"}
62 static char const * const singletriple = "'''";
63 static char const * const doubletriple = "\"\"\"";
66 * FUNCTION DEFINITIONS
69 static NestingLevels *nestingLevelsNew (void)
71 NestingLevels *nls = xCalloc (1, NestingLevels);
72 return nls;
75 static void nestingLevelsFree (NestingLevels *nls)
77 int i;
78 for (i = 0; i < nls->allocated; i++)
79 vStringDelete(nls->levels[i].name);
80 if (nls->levels) eFree(nls->levels);
81 eFree(nls);
84 static void nestingLevelsPush (NestingLevels *nls,
85 const vString *name, int type)
87 NestingLevel *nl = NULL;
89 if (nls->n >= nls->allocated)
91 nls->allocated++;
92 nls->levels = xRealloc(nls->levels,
93 nls->allocated, NestingLevel);
94 nls->levels[nls->n].name = vStringNew();
96 nl = &nls->levels[nls->n];
97 nls->n++;
99 vStringCopy(nl->name, name);
100 nl->type = type;
103 #if 0
104 static NestingLevel *nestingLevelsGetCurrent (NestingLevels *nls)
106 Assert (nls != NULL);
108 if (nls->n < 1)
109 return NULL;
111 return &nls->levels[nls->n - 1];
114 static void nestingLevelsPop (NestingLevels *nls)
116 const NestingLevel *nl = nestingLevelsGetCurrent(nls);
118 Assert (nl != NULL);
119 vStringClear(nl->name);
120 nls->n--;
122 #endif
124 static boolean isIdentifierFirstCharacter (int c)
126 return (boolean) (isalpha (c) || c == '_');
129 static boolean isIdentifierCharacter (int c)
131 return (boolean) (isalnum (c) || c == '_');
134 /* Given a string with the contents of a line directly after the "def" keyword,
135 * extract all relevant information and create a tag.
137 static void makeFunctionTag (vString *const function,
138 vString *const parent, int is_class_parent, const char *arglist __unused__)
140 tagEntryInfo tag;
141 initTagEntry (&tag, vStringValue (function));
143 tag.kindName = "function";
144 tag.kind = 'f';
145 /* tag.extensionFields.arglist = arglist; */
147 if (vStringLength (parent) > 0)
149 if (is_class_parent)
151 tag.kindName = "member";
152 tag.kind = 'm';
153 tag.extensionFields.scope [0] = "class";
154 tag.extensionFields.scope [1] = vStringValue (parent);
156 else
158 tag.extensionFields.scope [0] = "function";
159 tag.extensionFields.scope [1] = vStringValue (parent);
163 /* If a function starts with __, we mark it as file scope.
164 * FIXME: What is the proper way to signal such attributes?
165 * TODO: What does functions/classes starting with _ and __ mean in python?
167 if (strncmp (vStringValue (function), "__", 2) == 0 &&
168 strcmp (vStringValue (function), "__init__") != 0)
170 tag.extensionFields.access = "private";
171 tag.isFileScope = TRUE;
173 else
175 tag.extensionFields.access = "public";
177 makeTagEntry (&tag);
180 /* Given a string with the contents of the line directly after the "class"
181 * keyword, extract all necessary information and create a tag.
183 static void makeClassTag (vString *const class, vString *const inheritance,
184 vString *const parent, int is_class_parent)
186 tagEntryInfo tag;
187 initTagEntry (&tag, vStringValue (class));
188 tag.kindName = "class";
189 tag.kind = 'c';
190 if (vStringLength (parent) > 0)
192 if (is_class_parent)
194 tag.extensionFields.scope [0] = "class";
195 tag.extensionFields.scope [1] = vStringValue (parent);
197 else
199 tag.extensionFields.scope [0] = "function";
200 tag.extensionFields.scope [1] = vStringValue (parent);
203 tag.extensionFields.inheritance = vStringValue (inheritance);
204 makeTagEntry (&tag);
207 static void makeVariableTag (vString *const var, vString *const parent)
209 tagEntryInfo tag;
210 initTagEntry (&tag, vStringValue (var));
211 tag.kindName = "variable";
212 tag.kind = 'v';
213 if (vStringLength (parent) > 0)
215 tag.extensionFields.scope [0] = "class";
216 tag.extensionFields.scope [1] = vStringValue (parent);
218 makeTagEntry (&tag);
221 /* Skip a single or double quoted string. */
222 static const char *skipString (const char *cp)
224 const char *start = cp;
225 int escaped = 0;
226 for (cp++; *cp; cp++)
228 if (escaped)
229 escaped--;
230 else if (*cp == '\\')
231 escaped++;
232 else if (*cp == *start)
233 return cp + 1;
235 return cp;
238 /* Skip everything up to an identifier start. */
239 static const char *skipEverything (const char *cp)
241 for (; *cp; cp++)
243 if (*cp == '"' || *cp == '\'' || *cp == '#')
245 cp = skipString(cp);
246 if (!*cp) break;
248 if (isIdentifierFirstCharacter ((int) *cp))
249 return cp;
251 return cp;
254 /* Skip an identifier. */
255 static const char *skipIdentifier (const char *cp)
257 while (isIdentifierCharacter ((int) *cp))
258 cp++;
259 return cp;
262 static const char *findDefinitionOrClass (const char *cp)
264 while (*cp)
266 cp = skipEverything (cp);
267 if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
268 !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
270 return cp;
272 cp = skipIdentifier (cp);
274 return NULL;
277 static const char *skipSpace (const char *cp)
279 while (isspace ((int) *cp))
280 ++cp;
281 return cp;
284 /* Starting at ''cp'', parse an identifier into ''identifier''. */
285 static const char *parseIdentifier (const char *cp, vString *const identifier)
287 vStringClear (identifier);
288 while (isIdentifierCharacter ((int) *cp))
290 vStringPut (identifier, (int) *cp);
291 ++cp;
293 vStringTerminate (identifier);
294 return cp;
297 static void parseClass (const char *cp, vString *const class,
298 vString *const parent, int is_class_parent)
300 vString *const inheritance = vStringNew ();
301 vStringClear (inheritance);
302 cp = parseIdentifier (cp, class);
303 cp = skipSpace (cp);
304 if (*cp == '(')
306 ++cp;
307 while (*cp != ')')
309 if (*cp == '\0')
311 /* Closing parenthesis can be in follow up line. */
312 cp = (const char *) fileReadLine ();
313 if (!cp) break;
314 vStringPut (inheritance, ' ');
315 continue;
317 vStringPut (inheritance, *cp);
318 ++cp;
320 vStringTerminate (inheritance);
322 makeClassTag (class, inheritance, parent, is_class_parent);
323 vStringDelete (inheritance);
326 static void parseImports (const char *cp)
328 const char *pos;
329 vString *name, *name_next;
331 cp = skipEverything (cp);
333 if ((pos = strstr (cp, "import")) == NULL)
334 return;
336 cp = pos + 6;
338 /* continue only if there is some space between the keyword and the identifier */
339 if (! isspace (*cp))
340 return;
342 cp++;
343 cp = skipSpace (cp);
345 name = vStringNew ();
346 name_next = vStringNew ();
348 cp = skipEverything (cp);
349 while (*cp)
351 cp = parseIdentifier (cp, name);
353 cp = skipEverything (cp);
354 /* we parse the next possible import statement as well to be able to ignore 'foo' in
355 * 'import foo as bar' */
356 parseIdentifier (cp, name_next);
358 /* take the current tag only if the next one is not "as" */
359 if (strcmp (vStringValue (name_next), "as") != 0 &&
360 strcmp (vStringValue (name), "as") != 0)
362 makeSimpleTag (name, PythonKinds, K_IMPORT);
365 vStringDelete (name);
366 vStringDelete (name_next);
369 /* modified from get.c getArglistFromStr().
370 * warning: terminates rest of string past arglist!
371 * note: does not ignore brackets inside strings! */
372 static char *parseArglist(const char *buf)
374 char *start, *end;
375 int level;
376 if (NULL == buf)
377 return NULL;
378 if (NULL == (start = strchr(buf, '(')))
379 return NULL;
380 for (level = 1, end = start + 1; level > 0; ++end)
382 if ('\0' == *end)
383 break;
384 else if ('(' == *end)
385 ++ level;
386 else if (')' == *end)
387 -- level;
389 *end = '\0';
390 return strdup(start);
393 static void parseFunction (const char *cp, vString *const def,
394 vString *const parent, int is_class_parent)
396 char *arglist;
398 cp = parseIdentifier (cp, def);
399 arglist = parseArglist (cp);
400 makeFunctionTag (def, parent, is_class_parent, arglist);
401 if (arglist != NULL) {
402 eFree (arglist);
406 /* Get the combined name of a nested symbol. Classes are separated with ".",
407 * functions with "/". For example this code:
408 * class MyClass:
409 * def myFunction:
410 * def SubFunction:
411 * class SubClass:
412 * def Method:
413 * pass
414 * Would produce this string:
415 * MyClass.MyFunction/SubFunction/SubClass.Method
417 static boolean constructParentString(NestingLevels *nls, int indent,
418 vString *result)
420 int i;
421 NestingLevel *prev = NULL;
422 int is_class = FALSE;
423 vStringClear (result);
424 for (i = 0; i < nls->n; i++)
426 NestingLevel *nl = nls->levels + i;
427 if (indent <= nl->indentation)
428 break;
429 if (prev)
431 vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
433 if (prev->type == K_CLASS)
434 vStringCatS(result, ".");
435 else
436 vStringCatS(result, "/");
439 vStringCat(result, nl->name);
440 is_class = (nl->type == K_CLASS);
441 prev = nl;
443 return is_class;
446 /* Check whether parent's indentation level is higher than the current level and
447 * if so, remove it.
449 static void checkParent(NestingLevels *nls, int indent, vString *parent)
451 int i;
452 NestingLevel *n;
454 for (i = 0; i < nls->n; i++)
456 n = nls->levels + i;
457 /* is there a better way to compare two vStrings? */
458 if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
460 if (n && indent <= n->indentation)
462 /* remove this level by clearing its name */
463 vStringClear(n->name);
465 break;
470 static void addNestingLevel(NestingLevels *nls, int indentation,
471 const vString *name, boolean is_class)
473 int i;
474 NestingLevel *nl = NULL;
476 for (i = 0; i < nls->n; i++)
478 nl = nls->levels + i;
479 if (indentation <= nl->indentation) break;
481 if (i == nls->n)
483 nestingLevelsPush(nls, name, 0);
484 nl = nls->levels + i;
486 else
487 { /* reuse existing slot */
488 nls->n = i + 1;
489 vStringCopy(nl->name, name);
491 nl->indentation = indentation;
492 nl->type = is_class ? K_CLASS : !K_CLASS;
495 /* Return a pointer to the start of the next triple string, or NULL. Store
496 * the kind of triple string in "which" if the return is not NULL.
498 static char const *find_triple_start(char const *string, char const **which)
500 char const *cp = string;
502 for (; *cp; cp++)
504 if (*cp == '"' || *cp == '\'')
506 if (strncmp(cp, doubletriple, 3) == 0)
508 *which = doubletriple;
509 return cp;
511 if (strncmp(cp, singletriple, 3) == 0)
513 *which = singletriple;
514 return cp;
516 cp = skipString(cp);
517 if (!*cp) break;
520 return NULL;
523 /* Find the end of a triple string as pointed to by "which", and update "which"
524 * with any other triple strings following in the given string.
526 static void find_triple_end(char const *string, char const **which)
528 char const *s = string;
529 while (1)
531 /* Check if the string ends in the same line. */
532 s = strstr (s, *which);
533 if (!s) break;
534 s += 3;
535 *which = NULL;
536 /* If yes, check if another one starts in the same line. */
537 s = find_triple_start(s, which);
538 if (!s) break;
539 s += 3;
543 static const char *findVariable(const char *line)
545 /* Parse global and class variable names (C.x) from assignment statements.
546 * Object attributes (obj.x) are ignored.
547 * Assignment to a tuple 'x, y = 2, 3' not supported.
548 * TODO: ignore duplicate tags from reassignment statements. */
549 const char *cp, *sp, *eq, *start;
551 cp = strstr(line, "=");
552 if (!cp)
553 return NULL;
554 eq = cp + 1;
555 while (*eq)
557 if (*eq == '=')
558 return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
559 if (*eq == '(' || *eq == '#')
560 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
561 eq++;
564 /* go backwards to the start of the line, checking we have valid chars */
565 start = cp - 1;
566 while (start >= line && isspace ((int) *start))
567 --start;
568 while (start >= line && isIdentifierCharacter ((int) *start))
569 --start;
570 if (!isIdentifierFirstCharacter(*(start + 1)))
571 return NULL;
572 sp = start;
573 while (sp >= line && isspace ((int) *sp))
574 --sp;
575 if ((sp + 1) != line) /* the line isn't a simple variable assignment */
576 return NULL;
577 /* the line is valid, parse the variable name */
578 ++start;
579 return start;
582 /* Skip type declaration that optionally follows a cdef/cpdef */
583 static const char *skipTypeDecl (const char *cp, boolean *is_class)
585 const char *lastStart = cp, *ptr = cp;
586 int loopCount = 0;
587 ptr = skipSpace(cp);
588 if (!strncmp("extern", ptr, 6)) {
589 ptr += 6;
590 ptr = skipSpace(ptr);
591 if (!strncmp("from", ptr, 4)) { return NULL; }
593 if (!strncmp("class", ptr, 5)) {
594 ptr += 5 ;
595 *is_class = TRUE;
596 ptr = skipSpace(ptr);
597 return ptr;
599 /* limit so that we don't pick off "int item=obj()" */
600 while (*ptr && loopCount++ < 2) {
601 while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
602 if (!*ptr || *ptr == '=') return NULL;
603 if (*ptr == '(') {
604 return lastStart; /* if we stopped on a '(' we are done */
606 ptr = skipSpace(ptr);
607 lastStart = ptr;
608 while (*lastStart == '*') lastStart++; /* cdef int *identifier */
610 return NULL;
613 static void findPythonTags (void)
615 vString *const continuation = vStringNew ();
616 vString *const name = vStringNew ();
617 vString *const parent = vStringNew();
619 NestingLevels *const nesting_levels = nestingLevelsNew();
621 const char *line;
622 int line_skip = 0;
623 char const *longStringLiteral = NULL;
625 while ((line = (const char *) fileReadLine ()) != NULL)
627 const char *cp = line, *candidate;
628 char const *longstring;
629 char const *keyword, *variable;
630 int indent;
632 cp = skipSpace (cp);
634 if (*cp == '\0') /* skip blank line */
635 continue;
637 /* Skip comment if we are not inside a multi-line string. */
638 if (*cp == '#' && !longStringLiteral)
639 continue;
641 /* Deal with line continuation. */
642 if (!line_skip) vStringClear(continuation);
643 vStringCatS(continuation, line);
644 vStringStripTrailing(continuation);
645 if (vStringLast(continuation) == '\\')
647 vStringChop(continuation);
648 vStringCatS(continuation, " ");
649 line_skip = 1;
650 continue;
652 cp = line = vStringValue(continuation);
653 cp = skipSpace (cp);
654 indent = cp - line;
655 line_skip = 0;
657 checkParent(nesting_levels, indent, parent);
659 /* Deal with multiline string ending. */
660 if (longStringLiteral)
662 find_triple_end(cp, &longStringLiteral);
663 continue;
666 /* Deal with multiline string start. */
667 longstring = find_triple_start(cp, &longStringLiteral);
668 if (longstring)
670 longstring += 3;
671 find_triple_end(longstring, &longStringLiteral);
672 /* We don't parse for any tags in the rest of the line. */
673 continue;
676 /* Deal with def and class keywords. */
677 keyword = findDefinitionOrClass (cp);
678 if (keyword)
680 boolean found = FALSE;
681 boolean is_class = FALSE;
682 if (!strncmp (keyword, "def ", 4))
684 cp = skipSpace (keyword + 3);
685 found = TRUE;
687 else if (!strncmp (keyword, "class ", 6))
689 cp = skipSpace (keyword + 5);
690 found = TRUE;
691 is_class = TRUE;
693 else if (!strncmp (keyword, "cdef ", 5))
695 cp = skipSpace(keyword + 4);
696 candidate = skipTypeDecl (cp, &is_class);
697 if (candidate)
699 found = TRUE;
700 cp = candidate;
704 else if (!strncmp (keyword, "cpdef ", 6))
706 cp = skipSpace(keyword + 5);
707 candidate = skipTypeDecl (cp, &is_class);
708 if (candidate)
710 found = TRUE;
711 cp = candidate;
715 if (found)
717 boolean is_parent_class;
719 is_parent_class =
720 constructParentString(nesting_levels, indent, parent);
722 if (is_class)
723 parseClass (cp, name, parent, is_parent_class);
724 else
725 parseFunction(cp, name, parent, is_parent_class);
727 addNestingLevel(nesting_levels, indent, name, is_class);
730 /* Find global and class variables */
731 variable = findVariable(line);
732 if (variable)
734 const char *start = variable;
735 boolean parent_is_class;
737 vStringClear (name);
738 while (isIdentifierCharacter ((int) *start))
740 vStringPut (name, (int) *start);
741 ++start;
743 vStringTerminate (name);
745 parent_is_class = constructParentString(nesting_levels, indent, parent);
746 /* skip variables in methods */
747 if (! parent_is_class && vStringLength(parent) > 0)
748 continue;
750 makeVariableTag (name, parent);
752 /* Find and parse imports */
753 parseImports(line);
755 /* Clean up all memory we allocated. */
756 vStringDelete (parent);
757 vStringDelete (name);
758 vStringDelete (continuation);
759 nestingLevelsFree (nesting_levels);
762 extern parserDefinition *PythonParser (void)
764 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
765 parserDefinition *def = parserNew ("Python");
766 def->kinds = PythonKinds;
767 def->kindCount = KIND_COUNT (PythonKinds);
768 def->extensions = extensions;
769 def->parser = findPythonTags;
770 return def;
773 /* vi:set tabstop=4 shiftwidth=4: */