2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
16 #include "general.h" /* must always come first */
25 #define SCOPE_SEPARATOR "::"
28 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
30 #define ACCESS_UNDEFINED NULL
31 static const char *const accessTypes
[] = {
45 static kindOption PowerShellKinds
[COUNT_KIND
] = {
46 { TRUE
, 'f', "function", "functions" },
47 { TRUE
, 'v', "variable", "variables" }
51 typedef enum eTokenType
{
76 unsigned long lineNumber
;
78 int parentKind
; /* -1 if none */
82 static const char *findValidAccessType (const char *const access
)
85 if (access
== ACCESS_UNDEFINED
)
86 return ACCESS_UNDEFINED
; /* early out to save the for-loop if possible */
87 for (i
= 0; i
< ARRAY_LENGTH(accessTypes
); i
++)
89 if (accessTypes
[i
] == ACCESS_UNDEFINED
)
91 if (strcasecmp (access
, accessTypes
[i
]) == 0)
92 return accessTypes
[i
];
95 return ACCESS_UNDEFINED
;
98 static void initPowerShellEntry (tagEntryInfo
*const e
, const tokenInfo
*const token
,
99 const powerShellKind kind
, const char *const access
)
101 initTagEntry (e
, vStringValue (token
->string
));
103 e
->lineNumber
= token
->lineNumber
;
104 e
->filePosition
= token
->filePosition
;
105 e
->kindName
= PowerShellKinds
[kind
].name
;
106 e
->kind
= (char) PowerShellKinds
[kind
].letter
;
109 e
->extensionFields
.access
= access
;
110 if (vStringLength (token
->scope
) > 0)
112 int parentKind
= token
->parentKind
;
113 Assert (parentKind
>= 0);
115 e
->extensionFields
.scope
[0] = PowerShellKinds
[parentKind
].name
;
116 e
->extensionFields
.scope
[1] = vStringValue (token
->scope
);
120 static void makeSimplePowerShellTag (const tokenInfo
*const token
, const powerShellKind kind
,
121 const char *const access
)
123 if (PowerShellKinds
[kind
].enabled
)
127 initPowerShellEntry (&e
, token
, kind
, access
);
132 static void makeFunctionTag (const tokenInfo
*const token
, const vString
*const arglist
,
133 const char *const access
)
135 if (PowerShellKinds
[K_FUNCTION
].enabled
)
139 initPowerShellEntry (&e
, token
, K_FUNCTION
, access
);
142 e
.extensionFields
.signature
= vStringValue (arglist
);
148 static tokenInfo
*newToken (void)
150 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
152 token
->type
= TOKEN_UNDEFINED
;
153 token
->string
= vStringNew ();
154 token
->scope
= vStringNew ();
155 token
->lineNumber
= getSourceLineNumber ();
156 token
->filePosition
= getInputFilePosition ();
157 token
->parentKind
= -1;
162 static void deleteToken (tokenInfo
*const token
)
164 vStringDelete (token
->string
);
165 vStringDelete (token
->scope
);
169 static void copyToken (tokenInfo
*const dest
, const tokenInfo
*const src
,
172 dest
->lineNumber
= src
->lineNumber
;
173 dest
->filePosition
= src
->filePosition
;
174 dest
->type
= src
->type
;
175 vStringCopy (dest
->string
, src
->string
);
176 dest
->parentKind
= src
->parentKind
;
178 vStringCopy (dest
->scope
, src
->scope
);
181 static void addToScope (tokenInfo
*const token
, const vString
*const extra
)
183 if (vStringLength (token
->scope
) > 0)
184 vStringCatS (token
->scope
, SCOPE_SEPARATOR
);
185 vStringCatS (token
->scope
, vStringValue (extra
));
186 vStringTerminate (token
->scope
);
189 static boolean
isIdentChar (const int c
)
191 return (isalnum (c
) || c
== ':' || c
== '_' || c
== '-' || c
>= 0x80);
194 static int skipToCharacter (const int c
)
200 } while (d
!= EOF
&& d
!= c
);
204 static void parseString (vString
*const string
, const int delimiter
)
210 if (c
== '\\' && (c
= fileGetc ()) != EOF
)
211 vStringPut (string
, (char) c
);
212 else if (c
== EOF
|| c
== delimiter
)
215 vStringPut (string
, (char) c
);
217 vStringTerminate (string
);
220 static void parseIdentifier (vString
*const string
, const int firstChar
)
225 vStringPut (string
, (char) c
);
227 } while (isIdentChar (c
));
229 vStringTerminate (string
);
232 static boolean
isTokenFunction (vString
*const name
)
234 return (strcasecmp (vStringValue (name
), "function") == 0 ||
235 strcasecmp (vStringValue (name
), "filter") == 0);
238 static boolean
isSpace (int c
)
240 return (c
== '\t' || c
== ' ' || c
== '\v' ||
241 c
== '\n' || c
== '\r' || c
== '\f');
244 static int skipWhitespaces (int c
)
251 static int skipSingleComment (void)
259 int next
= fileGetc ();
265 } while (c
!= EOF
&& c
!= '\n' && c
!= '\r');
269 static void readToken (tokenInfo
*const token
)
273 token
->type
= TOKEN_UNDEFINED
;
274 vStringClear (token
->string
);
279 c
= skipWhitespaces (c
);
281 token
->lineNumber
= getSourceLineNumber ();
282 token
->filePosition
= getInputFilePosition ();
286 case EOF
: token
->type
= TOKEN_EOF
; break;
287 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
288 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
289 case ';': token
->type
= TOKEN_SEMICOLON
; break;
290 case ',': token
->type
= TOKEN_COMMA
; break;
291 case '.': token
->type
= TOKEN_PERIOD
; break;
292 case ':': token
->type
= TOKEN_COLON
; break;
293 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
294 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
295 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
296 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
297 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
301 token
->type
= TOKEN_STRING
;
302 parseString (token
->string
, c
);
303 token
->lineNumber
= getSourceLineNumber ();
304 token
->filePosition
= getInputFilePosition ();
312 /* <# ... #> multiline comment */
315 c
= skipToCharacter ('#');
330 token
->type
= TOKEN_UNDEFINED
;
335 case '#': /* comment */
336 skipSingleComment ();
349 token
->type
= TOKEN_OPERATOR
;
353 case '$': /* variable start */
356 if (! isIdentChar (d
))
359 token
->type
= TOKEN_UNDEFINED
;
363 parseIdentifier (token
->string
, d
);
364 token
->type
= TOKEN_VARIABLE
;
370 if (! isIdentChar (c
))
371 token
->type
= TOKEN_UNDEFINED
;
374 parseIdentifier (token
->string
, c
);
375 if (isTokenFunction (token
->string
))
376 token
->type
= TOKEN_KEYWORD
;
378 token
->type
= TOKEN_IDENTIFIER
;
384 static void enterScope (tokenInfo
*const parentToken
,
385 const vString
*const extraScope
,
386 const int parentKind
);
388 /* strip a possible PowerShell scope specification and convert it to accessType */
389 static const char *parsePowerShellScope (tokenInfo
*const token
)
391 const char *access
= ACCESS_UNDEFINED
;
392 const char *const tokenName
= vStringValue (token
->string
);
393 const char *powershellScopeEnd
;
395 powershellScopeEnd
= strchr (tokenName
, ':');
396 if (powershellScopeEnd
)
398 size_t powershellScopeLen
;
399 vString
* powershellScope
= vStringNew ();
401 powershellScopeLen
= (size_t)(powershellScopeEnd
- tokenName
);
402 /* extract the scope */
403 vStringNCopyS (powershellScope
, tokenName
, powershellScopeLen
);
404 vStringTerminate (powershellScope
);
405 /* cut the resulting scope string from the identifier */
406 memmove (token
->string
->buffer
,
407 /* +1 to skip the leading colon */
408 token
->string
->buffer
+ powershellScopeLen
+ 1,
409 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
410 token
->string
->length
+ 1 - powershellScopeLen
- 1);
411 token
->string
->length
-= powershellScopeLen
+ 1;
413 access
= findValidAccessType (vStringValue (powershellScope
));
415 vStringDelete (powershellScope
);
423 * function myfunc($foo, $bar) {}
425 static boolean
parseFunction (tokenInfo
*const token
)
427 boolean readNext
= TRUE
;
428 tokenInfo
*nameFree
= NULL
;
433 if (token
->type
!= TOKEN_IDENTIFIER
)
436 access
= parsePowerShellScope (token
);
438 nameFree
= newToken ();
439 copyToken (nameFree
, token
, TRUE
);
442 if (token
->type
== TOKEN_OPEN_PAREN
)
444 vString
*arglist
= vStringNew ();
447 vStringPut (arglist
, '(');
454 case TOKEN_OPEN_PAREN
: depth
++; break;
455 case TOKEN_CLOSE_PAREN
: depth
--; break;
461 case TOKEN_CLOSE_CURLY
: vStringPut (arglist
, '}'); break;
462 case TOKEN_CLOSE_PAREN
: vStringPut (arglist
, ')'); break;
463 case TOKEN_CLOSE_SQUARE
: vStringPut (arglist
, ']'); break;
464 case TOKEN_COLON
: vStringPut (arglist
, ':'); break;
465 case TOKEN_COMMA
: vStringCatS (arglist
, ", "); break;
466 case TOKEN_EQUAL_SIGN
: vStringCatS (arglist
, " = "); break;
467 case TOKEN_OPEN_CURLY
: vStringPut (arglist
, '{'); break;
468 case TOKEN_OPEN_PAREN
: vStringPut (arglist
, '('); break;
469 case TOKEN_OPEN_SQUARE
: vStringPut (arglist
, '['); break;
470 case TOKEN_PERIOD
: vStringPut (arglist
, '.'); break;
471 case TOKEN_SEMICOLON
: vStringPut (arglist
, ';'); break;
472 case TOKEN_STRING
: vStringCatS (arglist
, "'...'"); break;
474 case TOKEN_IDENTIFIER
:
478 switch (vStringLast (arglist
))
486 /* no need for a space between those and the identifier */
490 vStringPut (arglist
, ' ');
493 if (token
->type
== TOKEN_VARIABLE
)
494 vStringPut (arglist
, '$');
495 vStringCat (arglist
, token
->string
);
502 while (token
->type
!= TOKEN_EOF
&& depth
> 0);
504 vStringTerminate (arglist
);
506 makeFunctionTag (nameFree
, arglist
, access
);
507 vStringDelete (arglist
);
511 else if (token
->type
== TOKEN_OPEN_CURLY
)
512 { /* filters doesn't need to have an arglist */
513 makeFunctionTag (nameFree
, NULL
, access
);
516 if (token
->type
== TOKEN_OPEN_CURLY
)
517 enterScope (token
, nameFree
->string
, K_FUNCTION
);
522 deleteToken (nameFree
);
527 /* parses declarations of the form
530 static boolean
parseVariable (tokenInfo
*const token
)
533 boolean readNext
= TRUE
;
537 copyToken (name
, token
, TRUE
);
540 if (token
->type
== TOKEN_EQUAL_SIGN
)
542 if (token
->parentKind
!= K_FUNCTION
)
543 { /* ignore local variables (i.e. within a function) */
544 access
= parsePowerShellScope (name
);
545 makeSimplePowerShellTag (name
, K_VARIABLE
, access
);
557 static void enterScope (tokenInfo
*const parentToken
,
558 const vString
*const extraScope
,
559 const int parentKind
)
561 tokenInfo
*token
= newToken ();
562 int origParentKind
= parentToken
->parentKind
;
564 copyToken (token
, parentToken
, TRUE
);
568 addToScope (token
, extraScope
);
569 token
->parentKind
= parentKind
;
573 while (token
->type
!= TOKEN_EOF
&&
574 token
->type
!= TOKEN_CLOSE_CURLY
)
576 boolean readNext
= TRUE
;
580 case TOKEN_OPEN_CURLY
:
581 enterScope (token
, NULL
, -1);
585 readNext
= parseFunction (token
);
589 readNext
= parseVariable (token
);
599 copyToken (parentToken
, token
, FALSE
);
600 parentToken
->parentKind
= origParentKind
;
604 static void findPowerShellTags (void)
606 tokenInfo
*const token
= newToken ();
610 enterScope (token
, NULL
, -1);
612 while (token
->type
!= TOKEN_EOF
); /* keep going even with unmatched braces */
617 extern parserDefinition
* PowerShellParser (void)
619 static const char *const extensions
[] = { "ps1", "psm1", NULL
};
620 parserDefinition
* def
= parserNew ("PowerShell");
621 def
->kinds
= PowerShellKinds
;
622 def
->kindCount
= KIND_COUNT (PowerShellKinds
);
623 def
->extensions
= extensions
;
624 def
->parser
= findPowerShellTags
;
628 /* vi:set tabstop=4 shiftwidth=4: */