Use ARRAY_SIZE() in parsers
[geany-mirror.git] / ctags / parsers / powershell.c
blobec0170a667fbaf8cb97e34eb4f8c46433fbf03f9
1 /*
2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
17 #include "main.h"
18 #include "parse.h"
19 #include "read.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "entry.h"
23 #include "routines.h"
24 #include <string.h>
26 #define SCOPE_SEPARATOR "::"
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes[] = {
31 ACCESS_UNDEFINED,
32 "global",
33 "local",
34 "script",
35 "private"
38 typedef enum {
39 K_FUNCTION,
40 K_VARIABLE,
41 COUNT_KIND
42 } powerShellKind;
44 static kindOption PowerShellKinds[COUNT_KIND] = {
45 { TRUE, 'f', "function", "functions" },
46 { TRUE, 'v', "variable", "variables" }
50 typedef enum eTokenType {
51 TOKEN_UNDEFINED,
52 TOKEN_EOF,
53 TOKEN_CLOSE_PAREN,
54 TOKEN_SEMICOLON,
55 TOKEN_COLON,
56 TOKEN_COMMA,
57 TOKEN_KEYWORD,
58 TOKEN_OPEN_PAREN,
59 TOKEN_OPERATOR,
60 TOKEN_IDENTIFIER,
61 TOKEN_STRING,
62 TOKEN_PERIOD,
63 TOKEN_OPEN_CURLY,
64 TOKEN_CLOSE_CURLY,
65 TOKEN_EQUAL_SIGN,
66 TOKEN_OPEN_SQUARE,
67 TOKEN_CLOSE_SQUARE,
68 TOKEN_VARIABLE
69 } tokenType;
71 typedef struct {
72 tokenType type;
73 vString * string;
74 vString * scope;
75 unsigned long lineNumber;
76 MIOPos filePosition;
77 int parentKind; /* -1 if none */
78 } tokenInfo;
81 static const char *findValidAccessType (const char *const access)
83 unsigned int i;
84 if (access == ACCESS_UNDEFINED)
85 return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
86 for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
88 if (accessTypes[i] == ACCESS_UNDEFINED)
89 continue;
90 if (strcasecmp (access, accessTypes[i]) == 0)
91 return accessTypes[i];
92 i++;
94 return ACCESS_UNDEFINED;
97 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
98 const powerShellKind kind, const char *const access)
100 initTagEntry (e, vStringValue (token->string), &(PowerShellKinds[kind]));
102 e->lineNumber = token->lineNumber;
103 e->filePosition = token->filePosition;
105 if (access != NULL)
106 e->extensionFields.access = access;
107 if (vStringLength (token->scope) > 0)
109 int parentKind = token->parentKind;
110 Assert (parentKind >= 0);
112 e->extensionFields.scopeKind = &(PowerShellKinds[parentKind]);
113 e->extensionFields.scopeName = vStringValue (token->scope);
117 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
118 const char *const access)
120 if (PowerShellKinds[kind].enabled)
122 tagEntryInfo e;
124 initPowerShellEntry (&e, token, kind, access);
125 makeTagEntry (&e);
129 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
130 const char *const access)
132 if (PowerShellKinds[K_FUNCTION].enabled)
134 tagEntryInfo e;
136 initPowerShellEntry (&e, token, K_FUNCTION, access);
138 if (arglist)
139 e.extensionFields.signature = vStringValue (arglist);
141 makeTagEntry (&e);
145 static tokenInfo *newToken (void)
147 tokenInfo *const token = xMalloc (1, tokenInfo);
149 token->type = TOKEN_UNDEFINED;
150 token->string = vStringNew ();
151 token->scope = vStringNew ();
152 token->lineNumber = getInputLineNumber ();
153 token->filePosition = getInputFilePosition ();
154 token->parentKind = -1;
156 return token;
159 static void deleteToken (tokenInfo *const token)
161 vStringDelete (token->string);
162 vStringDelete (token->scope);
163 eFree (token);
166 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
167 boolean scope)
169 dest->lineNumber = src->lineNumber;
170 dest->filePosition = src->filePosition;
171 dest->type = src->type;
172 vStringCopy (dest->string, src->string);
173 dest->parentKind = src->parentKind;
174 if (scope)
175 vStringCopy (dest->scope, src->scope);
178 static void addToScope (tokenInfo *const token, const vString *const extra)
180 if (vStringLength (token->scope) > 0)
181 vStringCatS (token->scope, SCOPE_SEPARATOR);
182 vStringCatS (token->scope, vStringValue (extra));
183 vStringTerminate (token->scope);
186 static boolean isIdentChar (const int c)
188 return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
191 static int skipToCharacter (const int c)
193 int d;
196 d = getcFromInputFile ();
197 } while (d != EOF && d != c);
198 return d;
201 static void parseString (vString *const string, const int delimiter)
203 while (TRUE)
205 int c = getcFromInputFile ();
207 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
208 vStringPut (string, (char) c);
209 else if (c == EOF || c == delimiter)
210 break;
211 else
212 vStringPut (string, (char) c);
214 vStringTerminate (string);
217 static void parseIdentifier (vString *const string, const int firstChar)
219 int c = firstChar;
222 vStringPut (string, (char) c);
223 c = getcFromInputFile ();
224 } while (isIdentChar (c));
225 ungetcToInputFile (c);
226 vStringTerminate (string);
229 static boolean isTokenFunction (vString *const name)
231 return (strcasecmp (vStringValue (name), "function") == 0 ||
232 strcasecmp (vStringValue (name), "filter") == 0);
235 static boolean isSpace (int c)
237 return (c == '\t' || c == ' ' || c == '\v' ||
238 c == '\n' || c == '\r' || c == '\f');
241 static int skipWhitespaces (int c)
243 while (isSpace (c))
244 c = getcFromInputFile ();
245 return c;
248 static int skipSingleComment (void)
250 int c;
253 c = getcFromInputFile ();
254 if (c == '\r')
256 int next = getcFromInputFile ();
257 if (next != '\n')
258 ungetcToInputFile (next);
259 else
260 c = next;
262 } while (c != EOF && c != '\n' && c != '\r');
263 return c;
266 static void readToken (tokenInfo *const token)
268 int c;
270 token->type = TOKEN_UNDEFINED;
271 vStringClear (token->string);
273 getNextChar:
275 c = getcFromInputFile ();
276 c = skipWhitespaces (c);
278 token->lineNumber = getInputLineNumber ();
279 token->filePosition = getInputFilePosition ();
281 switch (c)
283 case EOF: token->type = TOKEN_EOF; break;
284 case '(': token->type = TOKEN_OPEN_PAREN; break;
285 case ')': token->type = TOKEN_CLOSE_PAREN; break;
286 case ';': token->type = TOKEN_SEMICOLON; break;
287 case ',': token->type = TOKEN_COMMA; break;
288 case '.': token->type = TOKEN_PERIOD; break;
289 case ':': token->type = TOKEN_COLON; break;
290 case '{': token->type = TOKEN_OPEN_CURLY; break;
291 case '}': token->type = TOKEN_CLOSE_CURLY; break;
292 case '[': token->type = TOKEN_OPEN_SQUARE; break;
293 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
294 case '=': token->type = TOKEN_EQUAL_SIGN; break;
296 case '\'':
297 case '"':
298 token->type = TOKEN_STRING;
299 parseString (token->string, c);
300 token->lineNumber = getInputLineNumber ();
301 token->filePosition = getInputFilePosition ();
302 break;
304 case '<':
306 int d = getcFromInputFile ();
307 if (d == '#')
309 /* <# ... #> multiline comment */
312 c = skipToCharacter ('#');
313 if (c != EOF)
315 c = getcFromInputFile ();
316 if (c == '>')
317 break;
318 else
319 ungetcToInputFile (c);
321 } while (c != EOF);
322 goto getNextChar;
324 else
326 ungetcToInputFile (d);
327 token->type = TOKEN_UNDEFINED;
329 break;
332 case '#': /* comment */
333 skipSingleComment ();
334 goto getNextChar;
335 break;
337 case '+':
338 case '-':
339 case '*':
340 case '/':
341 case '%':
343 int d = getcFromInputFile ();
344 if (d != '=')
345 ungetcToInputFile (d);
346 token->type = TOKEN_OPERATOR;
347 break;
350 case '$': /* variable start */
352 int d = getcFromInputFile ();
353 if (! isIdentChar (d))
355 ungetcToInputFile (d);
356 token->type = TOKEN_UNDEFINED;
358 else
360 parseIdentifier (token->string, d);
361 token->type = TOKEN_VARIABLE;
363 break;
366 default:
367 if (! isIdentChar (c))
368 token->type = TOKEN_UNDEFINED;
369 else
371 parseIdentifier (token->string, c);
372 if (isTokenFunction (token->string))
373 token->type = TOKEN_KEYWORD;
374 else
375 token->type = TOKEN_IDENTIFIER;
377 break;
381 static void enterScope (tokenInfo *const parentToken,
382 const vString *const extraScope,
383 const int parentKind);
385 /* strip a possible PowerShell scope specification and convert it to accessType */
386 static const char *parsePowerShellScope (tokenInfo *const token)
388 const char *access = ACCESS_UNDEFINED;
389 const char *const tokenName = vStringValue (token->string);
390 const char *powershellScopeEnd;
392 powershellScopeEnd = strchr (tokenName, ':');
393 if (powershellScopeEnd)
395 size_t powershellScopeLen;
396 vString * powershellScope = vStringNew ();
398 powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
399 /* extract the scope */
400 vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
401 vStringTerminate (powershellScope);
402 /* cut the resulting scope string from the identifier */
403 memmove (token->string->buffer,
404 /* +1 to skip the leading colon */
405 token->string->buffer + powershellScopeLen + 1,
406 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
407 token->string->length + 1 - powershellScopeLen - 1);
408 token->string->length -= powershellScopeLen + 1;
410 access = findValidAccessType (vStringValue (powershellScope));
412 vStringDelete (powershellScope);
414 return access;
418 /* parse a function
420 * function myfunc($foo, $bar) {}
422 static boolean parseFunction (tokenInfo *const token)
424 boolean readNext = TRUE;
425 tokenInfo *nameFree = NULL;
426 const char *access;
428 readToken (token);
430 if (token->type != TOKEN_IDENTIFIER)
431 return FALSE;
433 access = parsePowerShellScope (token);
435 nameFree = newToken ();
436 copyToken (nameFree, token, TRUE);
437 readToken (token);
439 if (token->type == TOKEN_OPEN_PAREN)
441 vString *arglist = vStringNew ();
442 int depth = 1;
444 vStringPut (arglist, '(');
447 readToken (token);
449 switch (token->type)
451 case TOKEN_OPEN_PAREN: depth++; break;
452 case TOKEN_CLOSE_PAREN: depth--; break;
453 default: break;
455 /* display part */
456 switch (token->type)
458 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
459 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
460 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
461 case TOKEN_COLON: vStringPut (arglist, ':'); break;
462 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
463 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
464 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
465 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
466 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
467 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
468 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
469 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
471 case TOKEN_IDENTIFIER:
472 case TOKEN_KEYWORD:
473 case TOKEN_VARIABLE:
475 switch (vStringLast (arglist))
477 case 0:
478 case ' ':
479 case '{':
480 case '(':
481 case '[':
482 case '.':
483 /* no need for a space between those and the identifier */
484 break;
486 default:
487 vStringPut (arglist, ' ');
488 break;
490 if (token->type == TOKEN_VARIABLE)
491 vStringPut (arglist, '$');
492 vStringCat (arglist, token->string);
493 break;
496 default: break;
499 while (token->type != TOKEN_EOF && depth > 0);
501 vStringTerminate (arglist);
503 makeFunctionTag (nameFree, arglist, access);
504 vStringDelete (arglist);
506 readToken (token);
508 else if (token->type == TOKEN_OPEN_CURLY)
509 { /* filters doesn't need to have an arglist */
510 makeFunctionTag (nameFree, NULL, access);
513 if (token->type == TOKEN_OPEN_CURLY)
514 enterScope (token, nameFree->string, K_FUNCTION);
515 else
516 readNext = FALSE;
518 if (nameFree)
519 deleteToken (nameFree);
521 return readNext;
524 /* parses declarations of the form
525 * $var = VALUE
527 static boolean parseVariable (tokenInfo *const token)
529 tokenInfo *name;
530 boolean readNext = TRUE;
531 const char *access;
533 name = newToken ();
534 copyToken (name, token, TRUE);
536 readToken (token);
537 if (token->type == TOKEN_EQUAL_SIGN)
539 if (token->parentKind != K_FUNCTION)
540 { /* ignore local variables (i.e. within a function) */
541 access = parsePowerShellScope (name);
542 makeSimplePowerShellTag (name, K_VARIABLE, access);
543 readNext = TRUE;
546 else
547 readNext = FALSE;
549 deleteToken (name);
551 return readNext;
554 static void enterScope (tokenInfo *const parentToken,
555 const vString *const extraScope,
556 const int parentKind)
558 tokenInfo *token = newToken ();
559 int origParentKind = parentToken->parentKind;
561 copyToken (token, parentToken, TRUE);
563 if (extraScope)
565 addToScope (token, extraScope);
566 token->parentKind = parentKind;
569 readToken (token);
570 while (token->type != TOKEN_EOF &&
571 token->type != TOKEN_CLOSE_CURLY)
573 boolean readNext = TRUE;
575 switch (token->type)
577 case TOKEN_OPEN_CURLY:
578 enterScope (token, NULL, -1);
579 break;
581 case TOKEN_KEYWORD:
582 readNext = parseFunction (token);
583 break;
585 case TOKEN_VARIABLE:
586 readNext = parseVariable (token);
587 break;
589 default: break;
592 if (readNext)
593 readToken (token);
596 copyToken (parentToken, token, FALSE);
597 parentToken->parentKind = origParentKind;
598 deleteToken (token);
601 static void findPowerShellTags (void)
603 tokenInfo *const token = newToken ();
607 enterScope (token, NULL, -1);
609 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
611 deleteToken (token);
614 extern parserDefinition* PowerShellParser (void)
616 static const char *const extensions [] = { "ps1", "psm1", NULL };
617 parserDefinition* def = parserNew ("PowerShell");
618 def->kinds = PowerShellKinds;
619 def->kindCount = ARRAY_SIZE (PowerShellKinds);
620 def->extensions = extensions;
621 def->parser = findPowerShellTags;
622 return def;
625 /* vi:set tabstop=4 shiftwidth=4: */