manual: added documentation about replacement of 'untitled.ext' with filename (#1804)
[geany-mirror.git] / ctags / parsers / powershell.c
blob32623a6cf396947c850ba93fd0fb82578bfb6ce6
1 /*
2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
17 #include "main.h"
18 #include "parse.h"
19 #include "read.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "entry.h"
23 #include "routines.h"
24 #include <string.h>
26 #define SCOPE_SEPARATOR "::"
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes[] = {
31 ACCESS_UNDEFINED,
32 "global",
33 "local",
34 "script",
35 "private"
38 typedef enum {
39 K_FUNCTION,
40 K_VARIABLE,
41 COUNT_KIND
42 } powerShellKind;
44 static kindOption PowerShellKinds[COUNT_KIND] = {
45 { true, 'f', "function", "functions" },
46 { true, 'v', "variable", "variables" }
50 typedef enum eTokenType {
51 TOKEN_UNDEFINED,
52 TOKEN_EOF,
53 TOKEN_CLOSE_PAREN,
54 TOKEN_SEMICOLON,
55 TOKEN_COLON,
56 TOKEN_COMMA,
57 TOKEN_KEYWORD,
58 TOKEN_OPEN_PAREN,
59 TOKEN_OPERATOR,
60 TOKEN_IDENTIFIER,
61 TOKEN_STRING,
62 TOKEN_PERIOD,
63 TOKEN_OPEN_CURLY,
64 TOKEN_CLOSE_CURLY,
65 TOKEN_EQUAL_SIGN,
66 TOKEN_OPEN_SQUARE,
67 TOKEN_CLOSE_SQUARE,
68 TOKEN_VARIABLE
69 } tokenType;
71 typedef struct {
72 tokenType type;
73 vString * string;
74 vString * scope;
75 unsigned long lineNumber;
76 MIOPos filePosition;
77 int parentKind; /* -1 if none */
78 } tokenInfo;
81 static const char *findValidAccessType (const char *const access)
83 unsigned int i;
84 if (access == ACCESS_UNDEFINED)
85 return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
86 for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
88 if (accessTypes[i] == ACCESS_UNDEFINED)
89 continue;
90 if (strcasecmp (access, accessTypes[i]) == 0)
91 return accessTypes[i];
92 i++;
94 return ACCESS_UNDEFINED;
97 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
98 const powerShellKind kind, const char *const access)
100 initTagEntry (e, vStringValue (token->string), &(PowerShellKinds[kind]));
102 e->lineNumber = token->lineNumber;
103 e->filePosition = token->filePosition;
105 if (access != NULL)
106 e->extensionFields.access = access;
107 if (vStringLength (token->scope) > 0)
109 int parentKind = token->parentKind;
110 Assert (parentKind >= 0);
112 e->extensionFields.scopeKind = &(PowerShellKinds[parentKind]);
113 e->extensionFields.scopeName = vStringValue (token->scope);
117 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
118 const char *const access)
120 if (PowerShellKinds[kind].enabled)
122 tagEntryInfo e;
124 initPowerShellEntry (&e, token, kind, access);
125 makeTagEntry (&e);
129 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
130 const char *const access)
132 if (PowerShellKinds[K_FUNCTION].enabled)
134 tagEntryInfo e;
136 initPowerShellEntry (&e, token, K_FUNCTION, access);
138 if (arglist)
139 e.extensionFields.signature = vStringValue (arglist);
141 makeTagEntry (&e);
145 static tokenInfo *newToken (void)
147 tokenInfo *const token = xMalloc (1, tokenInfo);
149 token->type = TOKEN_UNDEFINED;
150 token->string = vStringNew ();
151 token->scope = vStringNew ();
152 token->lineNumber = getInputLineNumber ();
153 token->filePosition = getInputFilePosition ();
154 token->parentKind = -1;
156 return token;
159 static void deleteToken (tokenInfo *const token)
161 vStringDelete (token->string);
162 vStringDelete (token->scope);
163 eFree (token);
166 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
167 bool scope)
169 dest->lineNumber = src->lineNumber;
170 dest->filePosition = src->filePosition;
171 dest->type = src->type;
172 vStringCopy (dest->string, src->string);
173 dest->parentKind = src->parentKind;
174 if (scope)
175 vStringCopy (dest->scope, src->scope);
178 static void addToScope (tokenInfo *const token, const vString *const extra)
180 if (vStringLength (token->scope) > 0)
181 vStringCatS (token->scope, SCOPE_SEPARATOR);
182 vStringCatS (token->scope, vStringValue (extra));
185 static bool isIdentChar (const int c)
187 return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
190 static void parseString (vString *const string, const int delimiter)
192 while (true)
194 int c = getcFromInputFile ();
196 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
197 vStringPut (string, (char) c);
198 else if (c == EOF || c == delimiter)
199 break;
200 else
201 vStringPut (string, (char) c);
205 static void parseIdentifier (vString *const string, const int firstChar)
207 int c = firstChar;
210 vStringPut (string, (char) c);
211 c = getcFromInputFile ();
212 } while (isIdentChar (c));
213 ungetcToInputFile (c);
216 static bool isTokenFunction (vString *const name)
218 return (strcasecmp (vStringValue (name), "function") == 0 ||
219 strcasecmp (vStringValue (name), "filter") == 0);
222 static bool isSpace (int c)
224 return (c == '\t' || c == ' ' || c == '\v' ||
225 c == '\n' || c == '\r' || c == '\f');
228 static int skipWhitespaces (int c)
230 while (isSpace (c))
231 c = getcFromInputFile ();
232 return c;
235 static int skipSingleComment (void)
237 int c;
240 c = getcFromInputFile ();
241 if (c == '\r')
243 int next = getcFromInputFile ();
244 if (next != '\n')
245 ungetcToInputFile (next);
246 else
247 c = next;
249 } while (c != EOF && c != '\n' && c != '\r');
250 return c;
253 static void readToken (tokenInfo *const token)
255 int c;
257 token->type = TOKEN_UNDEFINED;
258 vStringClear (token->string);
260 getNextChar:
262 c = getcFromInputFile ();
263 c = skipWhitespaces (c);
265 token->lineNumber = getInputLineNumber ();
266 token->filePosition = getInputFilePosition ();
268 switch (c)
270 case EOF: token->type = TOKEN_EOF; break;
271 case '(': token->type = TOKEN_OPEN_PAREN; break;
272 case ')': token->type = TOKEN_CLOSE_PAREN; break;
273 case ';': token->type = TOKEN_SEMICOLON; break;
274 case ',': token->type = TOKEN_COMMA; break;
275 case '.': token->type = TOKEN_PERIOD; break;
276 case ':': token->type = TOKEN_COLON; break;
277 case '{': token->type = TOKEN_OPEN_CURLY; break;
278 case '}': token->type = TOKEN_CLOSE_CURLY; break;
279 case '[': token->type = TOKEN_OPEN_SQUARE; break;
280 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
281 case '=': token->type = TOKEN_EQUAL_SIGN; break;
283 case '\'':
284 case '"':
285 token->type = TOKEN_STRING;
286 parseString (token->string, c);
287 token->lineNumber = getInputLineNumber ();
288 token->filePosition = getInputFilePosition ();
289 break;
291 case '<':
293 int d = getcFromInputFile ();
294 if (d == '#')
296 /* <# ... #> multiline comment */
299 c = skipToCharacterInInputFile ('#');
300 if (c != EOF)
302 c = getcFromInputFile ();
303 if (c == '>')
304 break;
305 else
306 ungetcToInputFile (c);
308 } while (c != EOF);
309 goto getNextChar;
311 else
313 ungetcToInputFile (d);
314 token->type = TOKEN_UNDEFINED;
316 break;
319 case '#': /* comment */
320 skipSingleComment ();
321 goto getNextChar;
322 break;
324 case '+':
325 case '-':
326 case '*':
327 case '/':
328 case '%':
330 int d = getcFromInputFile ();
331 if (d != '=')
332 ungetcToInputFile (d);
333 token->type = TOKEN_OPERATOR;
334 break;
337 case '$': /* variable start */
339 int d = getcFromInputFile ();
340 if (! isIdentChar (d))
342 ungetcToInputFile (d);
343 token->type = TOKEN_UNDEFINED;
345 else
347 parseIdentifier (token->string, d);
348 token->type = TOKEN_VARIABLE;
350 break;
353 default:
354 if (! isIdentChar (c))
355 token->type = TOKEN_UNDEFINED;
356 else
358 parseIdentifier (token->string, c);
359 if (isTokenFunction (token->string))
360 token->type = TOKEN_KEYWORD;
361 else
362 token->type = TOKEN_IDENTIFIER;
364 break;
368 static void enterScope (tokenInfo *const parentToken,
369 const vString *const extraScope,
370 const int parentKind);
372 /* strip a possible PowerShell scope specification and convert it to accessType */
373 static const char *parsePowerShellScope (tokenInfo *const token)
375 const char *access = ACCESS_UNDEFINED;
376 const char *const tokenName = vStringValue (token->string);
377 const char *powershellScopeEnd;
379 powershellScopeEnd = strchr (tokenName, ':');
380 if (powershellScopeEnd)
382 size_t powershellScopeLen;
383 vString * powershellScope = vStringNew ();
385 powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
386 /* extract the scope */
387 vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
388 /* cut the resulting scope string from the identifier */
389 memmove (token->string->buffer,
390 /* +1 to skip the leading colon */
391 token->string->buffer + powershellScopeLen + 1,
392 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
393 token->string->length + 1 - powershellScopeLen - 1);
394 token->string->length -= powershellScopeLen + 1;
396 access = findValidAccessType (vStringValue (powershellScope));
398 vStringDelete (powershellScope);
400 return access;
404 /* parse a function
406 * function myfunc($foo, $bar) {}
408 static bool parseFunction (tokenInfo *const token)
410 bool readNext = true;
411 tokenInfo *nameFree = NULL;
412 const char *access;
414 readToken (token);
416 if (token->type != TOKEN_IDENTIFIER)
417 return false;
419 access = parsePowerShellScope (token);
421 nameFree = newToken ();
422 copyToken (nameFree, token, true);
423 readToken (token);
425 if (token->type == TOKEN_OPEN_PAREN)
427 vString *arglist = vStringNew ();
428 int depth = 1;
430 vStringPut (arglist, '(');
433 readToken (token);
435 switch (token->type)
437 case TOKEN_OPEN_PAREN: depth++; break;
438 case TOKEN_CLOSE_PAREN: depth--; break;
439 default: break;
441 /* display part */
442 switch (token->type)
444 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
445 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
446 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
447 case TOKEN_COLON: vStringPut (arglist, ':'); break;
448 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
449 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
450 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
451 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
452 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
453 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
454 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
455 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
457 case TOKEN_IDENTIFIER:
458 case TOKEN_KEYWORD:
459 case TOKEN_VARIABLE:
461 switch (vStringLast (arglist))
463 case 0:
464 case ' ':
465 case '{':
466 case '(':
467 case '[':
468 case '.':
469 /* no need for a space between those and the identifier */
470 break;
472 default:
473 vStringPut (arglist, ' ');
474 break;
476 if (token->type == TOKEN_VARIABLE)
477 vStringPut (arglist, '$');
478 vStringCat (arglist, token->string);
479 break;
482 default: break;
485 while (token->type != TOKEN_EOF && depth > 0);
487 makeFunctionTag (nameFree, arglist, access);
488 vStringDelete (arglist);
490 readToken (token);
492 else if (token->type == TOKEN_OPEN_CURLY)
493 { /* filters doesn't need to have an arglist */
494 makeFunctionTag (nameFree, NULL, access);
497 if (token->type == TOKEN_OPEN_CURLY)
498 enterScope (token, nameFree->string, K_FUNCTION);
499 else
500 readNext = false;
502 if (nameFree)
503 deleteToken (nameFree);
505 return readNext;
508 /* parses declarations of the form
509 * $var = VALUE
511 static bool parseVariable (tokenInfo *const token)
513 tokenInfo *name;
514 bool readNext = true;
515 const char *access;
517 name = newToken ();
518 copyToken (name, token, true);
520 readToken (token);
521 if (token->type == TOKEN_EQUAL_SIGN)
523 if (token->parentKind != K_FUNCTION)
524 { /* ignore local variables (i.e. within a function) */
525 access = parsePowerShellScope (name);
526 makeSimplePowerShellTag (name, K_VARIABLE, access);
527 readNext = true;
530 else
531 readNext = false;
533 deleteToken (name);
535 return readNext;
538 static void enterScope (tokenInfo *const parentToken,
539 const vString *const extraScope,
540 const int parentKind)
542 tokenInfo *token = newToken ();
543 int origParentKind = parentToken->parentKind;
545 copyToken (token, parentToken, true);
547 if (extraScope)
549 addToScope (token, extraScope);
550 token->parentKind = parentKind;
553 readToken (token);
554 while (token->type != TOKEN_EOF &&
555 token->type != TOKEN_CLOSE_CURLY)
557 bool readNext = true;
559 switch (token->type)
561 case TOKEN_OPEN_CURLY:
562 enterScope (token, NULL, -1);
563 break;
565 case TOKEN_KEYWORD:
566 readNext = parseFunction (token);
567 break;
569 case TOKEN_VARIABLE:
570 readNext = parseVariable (token);
571 break;
573 default: break;
576 if (readNext)
577 readToken (token);
580 copyToken (parentToken, token, false);
581 parentToken->parentKind = origParentKind;
582 deleteToken (token);
585 static void findPowerShellTags (void)
587 tokenInfo *const token = newToken ();
591 enterScope (token, NULL, -1);
593 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
595 deleteToken (token);
598 extern parserDefinition* PowerShellParser (void)
600 static const char *const extensions [] = { "ps1", "psm1", NULL };
601 parserDefinition* def = parserNew ("PowerShell");
602 def->kinds = PowerShellKinds;
603 def->kindCount = ARRAY_SIZE (PowerShellKinds);
604 def->extensions = extensions;
605 def->parser = findPowerShellTags;
606 return def;