2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
16 #include "general.h" /* must always come first */
26 #define SCOPE_SEPARATOR "::"
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes
[] = {
44 static kindOption PowerShellKinds
[COUNT_KIND
] = {
45 { true, 'f', "function", "functions" },
46 { true, 'v', "variable", "variables" }
50 typedef enum eTokenType
{
75 unsigned long lineNumber
;
77 int parentKind
; /* -1 if none */
81 static const char *findValidAccessType (const char *const access
)
84 if (access
== ACCESS_UNDEFINED
)
85 return ACCESS_UNDEFINED
; /* early out to save the for-loop if possible */
86 for (i
= 0; i
< ARRAY_SIZE(accessTypes
); i
++)
88 if (accessTypes
[i
] == ACCESS_UNDEFINED
)
90 if (strcasecmp (access
, accessTypes
[i
]) == 0)
91 return accessTypes
[i
];
94 return ACCESS_UNDEFINED
;
97 static void initPowerShellEntry (tagEntryInfo
*const e
, const tokenInfo
*const token
,
98 const powerShellKind kind
, const char *const access
)
100 initTagEntry (e
, vStringValue (token
->string
), &(PowerShellKinds
[kind
]));
102 e
->lineNumber
= token
->lineNumber
;
103 e
->filePosition
= token
->filePosition
;
106 e
->extensionFields
.access
= access
;
107 if (vStringLength (token
->scope
) > 0)
109 int parentKind
= token
->parentKind
;
110 Assert (parentKind
>= 0);
112 e
->extensionFields
.scopeKind
= &(PowerShellKinds
[parentKind
]);
113 e
->extensionFields
.scopeName
= vStringValue (token
->scope
);
117 static void makeSimplePowerShellTag (const tokenInfo
*const token
, const powerShellKind kind
,
118 const char *const access
)
120 if (PowerShellKinds
[kind
].enabled
)
124 initPowerShellEntry (&e
, token
, kind
, access
);
129 static void makeFunctionTag (const tokenInfo
*const token
, const vString
*const arglist
,
130 const char *const access
)
132 if (PowerShellKinds
[K_FUNCTION
].enabled
)
136 initPowerShellEntry (&e
, token
, K_FUNCTION
, access
);
139 e
.extensionFields
.signature
= vStringValue (arglist
);
145 static tokenInfo
*newToken (void)
147 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
149 token
->type
= TOKEN_UNDEFINED
;
150 token
->string
= vStringNew ();
151 token
->scope
= vStringNew ();
152 token
->lineNumber
= getInputLineNumber ();
153 token
->filePosition
= getInputFilePosition ();
154 token
->parentKind
= -1;
159 static void deleteToken (tokenInfo
*const token
)
161 vStringDelete (token
->string
);
162 vStringDelete (token
->scope
);
166 static void copyToken (tokenInfo
*const dest
, const tokenInfo
*const src
,
169 dest
->lineNumber
= src
->lineNumber
;
170 dest
->filePosition
= src
->filePosition
;
171 dest
->type
= src
->type
;
172 vStringCopy (dest
->string
, src
->string
);
173 dest
->parentKind
= src
->parentKind
;
175 vStringCopy (dest
->scope
, src
->scope
);
178 static void addToScope (tokenInfo
*const token
, const vString
*const extra
)
180 if (vStringLength (token
->scope
) > 0)
181 vStringCatS (token
->scope
, SCOPE_SEPARATOR
);
182 vStringCatS (token
->scope
, vStringValue (extra
));
185 static bool isIdentChar (const int c
)
187 return (isalnum (c
) || c
== ':' || c
== '_' || c
== '-' || c
>= 0x80);
190 static void parseString (vString
*const string
, const int delimiter
)
194 int c
= getcFromInputFile ();
196 if (c
== '\\' && (c
= getcFromInputFile ()) != EOF
)
197 vStringPut (string
, (char) c
);
198 else if (c
== EOF
|| c
== delimiter
)
201 vStringPut (string
, (char) c
);
205 static void parseIdentifier (vString
*const string
, const int firstChar
)
210 vStringPut (string
, (char) c
);
211 c
= getcFromInputFile ();
212 } while (isIdentChar (c
));
213 ungetcToInputFile (c
);
216 static bool isTokenFunction (vString
*const name
)
218 return (strcasecmp (vStringValue (name
), "function") == 0 ||
219 strcasecmp (vStringValue (name
), "filter") == 0);
222 static bool isSpace (int c
)
224 return (c
== '\t' || c
== ' ' || c
== '\v' ||
225 c
== '\n' || c
== '\r' || c
== '\f');
228 static int skipWhitespaces (int c
)
231 c
= getcFromInputFile ();
235 static int skipSingleComment (void)
240 c
= getcFromInputFile ();
243 int next
= getcFromInputFile ();
245 ungetcToInputFile (next
);
249 } while (c
!= EOF
&& c
!= '\n' && c
!= '\r');
253 static void readToken (tokenInfo
*const token
)
257 token
->type
= TOKEN_UNDEFINED
;
258 vStringClear (token
->string
);
262 c
= getcFromInputFile ();
263 c
= skipWhitespaces (c
);
265 token
->lineNumber
= getInputLineNumber ();
266 token
->filePosition
= getInputFilePosition ();
270 case EOF
: token
->type
= TOKEN_EOF
; break;
271 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
272 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
273 case ';': token
->type
= TOKEN_SEMICOLON
; break;
274 case ',': token
->type
= TOKEN_COMMA
; break;
275 case '.': token
->type
= TOKEN_PERIOD
; break;
276 case ':': token
->type
= TOKEN_COLON
; break;
277 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
278 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
279 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
280 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
281 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
285 token
->type
= TOKEN_STRING
;
286 parseString (token
->string
, c
);
287 token
->lineNumber
= getInputLineNumber ();
288 token
->filePosition
= getInputFilePosition ();
293 int d
= getcFromInputFile ();
296 /* <# ... #> multiline comment */
299 c
= skipToCharacterInInputFile ('#');
302 c
= getcFromInputFile ();
306 ungetcToInputFile (c
);
313 ungetcToInputFile (d
);
314 token
->type
= TOKEN_UNDEFINED
;
319 case '#': /* comment */
320 skipSingleComment ();
330 int d
= getcFromInputFile ();
332 ungetcToInputFile (d
);
333 token
->type
= TOKEN_OPERATOR
;
337 case '$': /* variable start */
339 int d
= getcFromInputFile ();
340 if (! isIdentChar (d
))
342 ungetcToInputFile (d
);
343 token
->type
= TOKEN_UNDEFINED
;
347 parseIdentifier (token
->string
, d
);
348 token
->type
= TOKEN_VARIABLE
;
354 if (! isIdentChar (c
))
355 token
->type
= TOKEN_UNDEFINED
;
358 parseIdentifier (token
->string
, c
);
359 if (isTokenFunction (token
->string
))
360 token
->type
= TOKEN_KEYWORD
;
362 token
->type
= TOKEN_IDENTIFIER
;
368 static void enterScope (tokenInfo
*const parentToken
,
369 const vString
*const extraScope
,
370 const int parentKind
);
372 /* strip a possible PowerShell scope specification and convert it to accessType */
373 static const char *parsePowerShellScope (tokenInfo
*const token
)
375 const char *access
= ACCESS_UNDEFINED
;
376 const char *const tokenName
= vStringValue (token
->string
);
377 const char *powershellScopeEnd
;
379 powershellScopeEnd
= strchr (tokenName
, ':');
380 if (powershellScopeEnd
)
382 size_t powershellScopeLen
;
383 vString
* powershellScope
= vStringNew ();
385 powershellScopeLen
= (size_t)(powershellScopeEnd
- tokenName
);
386 /* extract the scope */
387 vStringNCopyS (powershellScope
, tokenName
, powershellScopeLen
);
388 /* cut the resulting scope string from the identifier */
389 memmove (token
->string
->buffer
,
390 /* +1 to skip the leading colon */
391 token
->string
->buffer
+ powershellScopeLen
+ 1,
392 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
393 token
->string
->length
+ 1 - powershellScopeLen
- 1);
394 token
->string
->length
-= powershellScopeLen
+ 1;
396 access
= findValidAccessType (vStringValue (powershellScope
));
398 vStringDelete (powershellScope
);
406 * function myfunc($foo, $bar) {}
408 static bool parseFunction (tokenInfo
*const token
)
410 bool readNext
= true;
411 tokenInfo
*nameFree
= NULL
;
416 if (token
->type
!= TOKEN_IDENTIFIER
)
419 access
= parsePowerShellScope (token
);
421 nameFree
= newToken ();
422 copyToken (nameFree
, token
, true);
425 if (token
->type
== TOKEN_OPEN_PAREN
)
427 vString
*arglist
= vStringNew ();
430 vStringPut (arglist
, '(');
437 case TOKEN_OPEN_PAREN
: depth
++; break;
438 case TOKEN_CLOSE_PAREN
: depth
--; break;
444 case TOKEN_CLOSE_CURLY
: vStringPut (arglist
, '}'); break;
445 case TOKEN_CLOSE_PAREN
: vStringPut (arglist
, ')'); break;
446 case TOKEN_CLOSE_SQUARE
: vStringPut (arglist
, ']'); break;
447 case TOKEN_COLON
: vStringPut (arglist
, ':'); break;
448 case TOKEN_COMMA
: vStringCatS (arglist
, ", "); break;
449 case TOKEN_EQUAL_SIGN
: vStringCatS (arglist
, " = "); break;
450 case TOKEN_OPEN_CURLY
: vStringPut (arglist
, '{'); break;
451 case TOKEN_OPEN_PAREN
: vStringPut (arglist
, '('); break;
452 case TOKEN_OPEN_SQUARE
: vStringPut (arglist
, '['); break;
453 case TOKEN_PERIOD
: vStringPut (arglist
, '.'); break;
454 case TOKEN_SEMICOLON
: vStringPut (arglist
, ';'); break;
455 case TOKEN_STRING
: vStringCatS (arglist
, "'...'"); break;
457 case TOKEN_IDENTIFIER
:
461 switch (vStringLast (arglist
))
469 /* no need for a space between those and the identifier */
473 vStringPut (arglist
, ' ');
476 if (token
->type
== TOKEN_VARIABLE
)
477 vStringPut (arglist
, '$');
478 vStringCat (arglist
, token
->string
);
485 while (token
->type
!= TOKEN_EOF
&& depth
> 0);
487 makeFunctionTag (nameFree
, arglist
, access
);
488 vStringDelete (arglist
);
492 else if (token
->type
== TOKEN_OPEN_CURLY
)
493 { /* filters doesn't need to have an arglist */
494 makeFunctionTag (nameFree
, NULL
, access
);
497 if (token
->type
== TOKEN_OPEN_CURLY
)
498 enterScope (token
, nameFree
->string
, K_FUNCTION
);
503 deleteToken (nameFree
);
508 /* parses declarations of the form
511 static bool parseVariable (tokenInfo
*const token
)
514 bool readNext
= true;
518 copyToken (name
, token
, true);
521 if (token
->type
== TOKEN_EQUAL_SIGN
)
523 if (token
->parentKind
!= K_FUNCTION
)
524 { /* ignore local variables (i.e. within a function) */
525 access
= parsePowerShellScope (name
);
526 makeSimplePowerShellTag (name
, K_VARIABLE
, access
);
538 static void enterScope (tokenInfo
*const parentToken
,
539 const vString
*const extraScope
,
540 const int parentKind
)
542 tokenInfo
*token
= newToken ();
543 int origParentKind
= parentToken
->parentKind
;
545 copyToken (token
, parentToken
, true);
549 addToScope (token
, extraScope
);
550 token
->parentKind
= parentKind
;
554 while (token
->type
!= TOKEN_EOF
&&
555 token
->type
!= TOKEN_CLOSE_CURLY
)
557 bool readNext
= true;
561 case TOKEN_OPEN_CURLY
:
562 enterScope (token
, NULL
, -1);
566 readNext
= parseFunction (token
);
570 readNext
= parseVariable (token
);
580 copyToken (parentToken
, token
, false);
581 parentToken
->parentKind
= origParentKind
;
585 static void findPowerShellTags (void)
587 tokenInfo
*const token
= newToken ();
591 enterScope (token
, NULL
, -1);
593 while (token
->type
!= TOKEN_EOF
); /* keep going even with unmatched braces */
598 extern parserDefinition
* PowerShellParser (void)
600 static const char *const extensions
[] = { "ps1", "psm1", NULL
};
601 parserDefinition
* def
= parserNew ("PowerShell");
602 def
->kinds
= PowerShellKinds
;
603 def
->kindCount
= ARRAY_SIZE (PowerShellKinds
);
604 def
->extensions
= extensions
;
605 def
->parser
= findPowerShellTags
;