ctags: Rename Geany-specific tagEntryInfo::arglist to upstream's ::signature
[geany-mirror.git] / ctags / parsers / powershell.c
blobc7752ae9b37c289020d69f7530cbbdf6ce7b8e8c
1 /*
2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
17 #include "main.h"
18 #include "parse.h"
19 #include "read.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "entry.h"
23 #include <string.h>
25 #define SCOPE_SEPARATOR "::"
28 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
30 #define ACCESS_UNDEFINED NULL
31 static const char *const accessTypes[] = {
32 ACCESS_UNDEFINED,
33 "global",
34 "local",
35 "script",
36 "private"
39 typedef enum {
40 K_FUNCTION,
41 K_VARIABLE,
42 COUNT_KIND
43 } powerShellKind;
45 static kindOption PowerShellKinds[COUNT_KIND] = {
46 { TRUE, 'f', "function", "functions" },
47 { TRUE, 'v', "variable", "variables" }
51 typedef enum eTokenType {
52 TOKEN_UNDEFINED,
53 TOKEN_EOF,
54 TOKEN_CLOSE_PAREN,
55 TOKEN_SEMICOLON,
56 TOKEN_COLON,
57 TOKEN_COMMA,
58 TOKEN_KEYWORD,
59 TOKEN_OPEN_PAREN,
60 TOKEN_OPERATOR,
61 TOKEN_IDENTIFIER,
62 TOKEN_STRING,
63 TOKEN_PERIOD,
64 TOKEN_OPEN_CURLY,
65 TOKEN_CLOSE_CURLY,
66 TOKEN_EQUAL_SIGN,
67 TOKEN_OPEN_SQUARE,
68 TOKEN_CLOSE_SQUARE,
69 TOKEN_VARIABLE
70 } tokenType;
72 typedef struct {
73 tokenType type;
74 vString * string;
75 vString * scope;
76 unsigned long lineNumber;
77 MIOPos filePosition;
78 int parentKind; /* -1 if none */
79 } tokenInfo;
82 static const char *findValidAccessType (const char *const access)
84 unsigned int i;
85 if (access == ACCESS_UNDEFINED)
86 return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
87 for (i = 0; i < ARRAY_LENGTH(accessTypes); i++)
89 if (accessTypes[i] == ACCESS_UNDEFINED)
90 continue;
91 if (strcasecmp (access, accessTypes[i]) == 0)
92 return accessTypes[i];
93 i++;
95 return ACCESS_UNDEFINED;
98 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
99 const powerShellKind kind, const char *const access)
101 initTagEntry (e, vStringValue (token->string));
103 e->lineNumber = token->lineNumber;
104 e->filePosition = token->filePosition;
105 e->kindName = PowerShellKinds[kind].name;
106 e->kind = (char) PowerShellKinds[kind].letter;
108 if (access != NULL)
109 e->extensionFields.access = access;
110 if (vStringLength (token->scope) > 0)
112 int parentKind = token->parentKind;
113 Assert (parentKind >= 0);
115 e->extensionFields.scope[0] = PowerShellKinds[parentKind].name;
116 e->extensionFields.scope[1] = vStringValue (token->scope);
120 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
121 const char *const access)
123 if (PowerShellKinds[kind].enabled)
125 tagEntryInfo e;
127 initPowerShellEntry (&e, token, kind, access);
128 makeTagEntry (&e);
132 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
133 const char *const access)
135 if (PowerShellKinds[K_FUNCTION].enabled)
137 tagEntryInfo e;
139 initPowerShellEntry (&e, token, K_FUNCTION, access);
141 if (arglist)
142 e.extensionFields.signature = vStringValue (arglist);
144 makeTagEntry (&e);
148 static tokenInfo *newToken (void)
150 tokenInfo *const token = xMalloc (1, tokenInfo);
152 token->type = TOKEN_UNDEFINED;
153 token->string = vStringNew ();
154 token->scope = vStringNew ();
155 token->lineNumber = getSourceLineNumber ();
156 token->filePosition = getInputFilePosition ();
157 token->parentKind = -1;
159 return token;
162 static void deleteToken (tokenInfo *const token)
164 vStringDelete (token->string);
165 vStringDelete (token->scope);
166 eFree (token);
169 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
170 boolean scope)
172 dest->lineNumber = src->lineNumber;
173 dest->filePosition = src->filePosition;
174 dest->type = src->type;
175 vStringCopy (dest->string, src->string);
176 dest->parentKind = src->parentKind;
177 if (scope)
178 vStringCopy (dest->scope, src->scope);
181 static void addToScope (tokenInfo *const token, const vString *const extra)
183 if (vStringLength (token->scope) > 0)
184 vStringCatS (token->scope, SCOPE_SEPARATOR);
185 vStringCatS (token->scope, vStringValue (extra));
186 vStringTerminate (token->scope);
189 static boolean isIdentChar (const int c)
191 return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
194 static int skipToCharacter (const int c)
196 int d;
199 d = fileGetc ();
200 } while (d != EOF && d != c);
201 return d;
204 static void parseString (vString *const string, const int delimiter)
206 while (TRUE)
208 int c = fileGetc ();
210 if (c == '\\' && (c = fileGetc ()) != EOF)
211 vStringPut (string, (char) c);
212 else if (c == EOF || c == delimiter)
213 break;
214 else
215 vStringPut (string, (char) c);
217 vStringTerminate (string);
220 static void parseIdentifier (vString *const string, const int firstChar)
222 int c = firstChar;
225 vStringPut (string, (char) c);
226 c = fileGetc ();
227 } while (isIdentChar (c));
228 fileUngetc (c);
229 vStringTerminate (string);
232 static boolean isTokenFunction (vString *const name)
234 return (strcasecmp (vStringValue (name), "function") == 0 ||
235 strcasecmp (vStringValue (name), "filter") == 0);
238 static boolean isSpace (int c)
240 return (c == '\t' || c == ' ' || c == '\v' ||
241 c == '\n' || c == '\r' || c == '\f');
244 static int skipWhitespaces (int c)
246 while (isSpace (c))
247 c = fileGetc ();
248 return c;
251 static int skipSingleComment (void)
253 int c;
256 c = fileGetc ();
257 if (c == '\r')
259 int next = fileGetc ();
260 if (next != '\n')
261 fileUngetc (next);
262 else
263 c = next;
265 } while (c != EOF && c != '\n' && c != '\r');
266 return c;
269 static void readToken (tokenInfo *const token)
271 int c;
273 token->type = TOKEN_UNDEFINED;
274 vStringClear (token->string);
276 getNextChar:
278 c = fileGetc ();
279 c = skipWhitespaces (c);
281 token->lineNumber = getSourceLineNumber ();
282 token->filePosition = getInputFilePosition ();
284 switch (c)
286 case EOF: token->type = TOKEN_EOF; break;
287 case '(': token->type = TOKEN_OPEN_PAREN; break;
288 case ')': token->type = TOKEN_CLOSE_PAREN; break;
289 case ';': token->type = TOKEN_SEMICOLON; break;
290 case ',': token->type = TOKEN_COMMA; break;
291 case '.': token->type = TOKEN_PERIOD; break;
292 case ':': token->type = TOKEN_COLON; break;
293 case '{': token->type = TOKEN_OPEN_CURLY; break;
294 case '}': token->type = TOKEN_CLOSE_CURLY; break;
295 case '[': token->type = TOKEN_OPEN_SQUARE; break;
296 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
297 case '=': token->type = TOKEN_EQUAL_SIGN; break;
299 case '\'':
300 case '"':
301 token->type = TOKEN_STRING;
302 parseString (token->string, c);
303 token->lineNumber = getSourceLineNumber ();
304 token->filePosition = getInputFilePosition ();
305 break;
307 case '<':
309 int d = fileGetc ();
310 if (d == '#')
312 /* <# ... #> multiline comment */
315 c = skipToCharacter ('#');
316 if (c != EOF)
318 c = fileGetc ();
319 if (c == '>')
320 break;
321 else
322 fileUngetc (c);
324 } while (c != EOF);
325 goto getNextChar;
327 else
329 fileUngetc (d);
330 token->type = TOKEN_UNDEFINED;
332 break;
335 case '#': /* comment */
336 skipSingleComment ();
337 goto getNextChar;
338 break;
340 case '+':
341 case '-':
342 case '*':
343 case '/':
344 case '%':
346 int d = fileGetc ();
347 if (d != '=')
348 fileUngetc (d);
349 token->type = TOKEN_OPERATOR;
350 break;
353 case '$': /* variable start */
355 int d = fileGetc ();
356 if (! isIdentChar (d))
358 fileUngetc (d);
359 token->type = TOKEN_UNDEFINED;
361 else
363 parseIdentifier (token->string, d);
364 token->type = TOKEN_VARIABLE;
366 break;
369 default:
370 if (! isIdentChar (c))
371 token->type = TOKEN_UNDEFINED;
372 else
374 parseIdentifier (token->string, c);
375 if (isTokenFunction (token->string))
376 token->type = TOKEN_KEYWORD;
377 else
378 token->type = TOKEN_IDENTIFIER;
380 break;
384 static void enterScope (tokenInfo *const parentToken,
385 const vString *const extraScope,
386 const int parentKind);
388 /* strip a possible PowerShell scope specification and convert it to accessType */
389 static const char *parsePowerShellScope (tokenInfo *const token)
391 const char *access = ACCESS_UNDEFINED;
392 const char *const tokenName = vStringValue (token->string);
393 const char *powershellScopeEnd;
395 powershellScopeEnd = strchr (tokenName, ':');
396 if (powershellScopeEnd)
398 size_t powershellScopeLen;
399 vString * powershellScope = vStringNew ();
401 powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
402 /* extract the scope */
403 vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
404 vStringTerminate (powershellScope);
405 /* cut the resulting scope string from the identifier */
406 memmove (token->string->buffer,
407 /* +1 to skip the leading colon */
408 token->string->buffer + powershellScopeLen + 1,
409 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
410 token->string->length + 1 - powershellScopeLen - 1);
411 token->string->length -= powershellScopeLen + 1;
413 access = findValidAccessType (vStringValue (powershellScope));
415 vStringDelete (powershellScope);
417 return access;
421 /* parse a function
423 * function myfunc($foo, $bar) {}
425 static boolean parseFunction (tokenInfo *const token)
427 boolean readNext = TRUE;
428 tokenInfo *nameFree = NULL;
429 const char *access;
431 readToken (token);
433 if (token->type != TOKEN_IDENTIFIER)
434 return FALSE;
436 access = parsePowerShellScope (token);
438 nameFree = newToken ();
439 copyToken (nameFree, token, TRUE);
440 readToken (token);
442 if (token->type == TOKEN_OPEN_PAREN)
444 vString *arglist = vStringNew ();
445 int depth = 1;
447 vStringPut (arglist, '(');
450 readToken (token);
452 switch (token->type)
454 case TOKEN_OPEN_PAREN: depth++; break;
455 case TOKEN_CLOSE_PAREN: depth--; break;
456 default: break;
458 /* display part */
459 switch (token->type)
461 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
462 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
463 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
464 case TOKEN_COLON: vStringPut (arglist, ':'); break;
465 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
466 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
467 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
468 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
469 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
470 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
471 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
472 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
474 case TOKEN_IDENTIFIER:
475 case TOKEN_KEYWORD:
476 case TOKEN_VARIABLE:
478 switch (vStringLast (arglist))
480 case 0:
481 case ' ':
482 case '{':
483 case '(':
484 case '[':
485 case '.':
486 /* no need for a space between those and the identifier */
487 break;
489 default:
490 vStringPut (arglist, ' ');
491 break;
493 if (token->type == TOKEN_VARIABLE)
494 vStringPut (arglist, '$');
495 vStringCat (arglist, token->string);
496 break;
499 default: break;
502 while (token->type != TOKEN_EOF && depth > 0);
504 vStringTerminate (arglist);
506 makeFunctionTag (nameFree, arglist, access);
507 vStringDelete (arglist);
509 readToken (token);
511 else if (token->type == TOKEN_OPEN_CURLY)
512 { /* filters doesn't need to have an arglist */
513 makeFunctionTag (nameFree, NULL, access);
516 if (token->type == TOKEN_OPEN_CURLY)
517 enterScope (token, nameFree->string, K_FUNCTION);
518 else
519 readNext = FALSE;
521 if (nameFree)
522 deleteToken (nameFree);
524 return readNext;
527 /* parses declarations of the form
528 * $var = VALUE
530 static boolean parseVariable (tokenInfo *const token)
532 tokenInfo *name;
533 boolean readNext = TRUE;
534 const char *access;
536 name = newToken ();
537 copyToken (name, token, TRUE);
539 readToken (token);
540 if (token->type == TOKEN_EQUAL_SIGN)
542 if (token->parentKind != K_FUNCTION)
543 { /* ignore local variables (i.e. within a function) */
544 access = parsePowerShellScope (name);
545 makeSimplePowerShellTag (name, K_VARIABLE, access);
546 readNext = TRUE;
549 else
550 readNext = FALSE;
552 deleteToken (name);
554 return readNext;
557 static void enterScope (tokenInfo *const parentToken,
558 const vString *const extraScope,
559 const int parentKind)
561 tokenInfo *token = newToken ();
562 int origParentKind = parentToken->parentKind;
564 copyToken (token, parentToken, TRUE);
566 if (extraScope)
568 addToScope (token, extraScope);
569 token->parentKind = parentKind;
572 readToken (token);
573 while (token->type != TOKEN_EOF &&
574 token->type != TOKEN_CLOSE_CURLY)
576 boolean readNext = TRUE;
578 switch (token->type)
580 case TOKEN_OPEN_CURLY:
581 enterScope (token, NULL, -1);
582 break;
584 case TOKEN_KEYWORD:
585 readNext = parseFunction (token);
586 break;
588 case TOKEN_VARIABLE:
589 readNext = parseVariable (token);
590 break;
592 default: break;
595 if (readNext)
596 readToken (token);
599 copyToken (parentToken, token, FALSE);
600 parentToken->parentKind = origParentKind;
601 deleteToken (token);
604 static void findPowerShellTags (void)
606 tokenInfo *const token = newToken ();
610 enterScope (token, NULL, -1);
612 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
614 deleteToken (token);
617 extern parserDefinition* PowerShellParser (void)
619 static const char *const extensions [] = { "ps1", "psm1", NULL };
620 parserDefinition* def = parserNew ("PowerShell");
621 def->kinds = PowerShellKinds;
622 def->kindCount = KIND_COUNT (PowerShellKinds);
623 def->extensions = extensions;
624 def->parser = findPowerShellTags;
625 return def;
628 /* vi:set tabstop=4 shiftwidth=4: */