2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
10 * This module contains code for generating tags for Windows PowerShell scripts.
16 #include "general.h" /* must always come first */
26 #define SCOPE_SEPARATOR "::"
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes
[] = {
44 static kindOption PowerShellKinds
[COUNT_KIND
] = {
45 { TRUE
, 'f', "function", "functions" },
46 { TRUE
, 'v', "variable", "variables" }
50 typedef enum eTokenType
{
75 unsigned long lineNumber
;
77 int parentKind
; /* -1 if none */
81 static const char *findValidAccessType (const char *const access
)
84 if (access
== ACCESS_UNDEFINED
)
85 return ACCESS_UNDEFINED
; /* early out to save the for-loop if possible */
86 for (i
= 0; i
< ARRAY_SIZE(accessTypes
); i
++)
88 if (accessTypes
[i
] == ACCESS_UNDEFINED
)
90 if (strcasecmp (access
, accessTypes
[i
]) == 0)
91 return accessTypes
[i
];
94 return ACCESS_UNDEFINED
;
97 static void initPowerShellEntry (tagEntryInfo
*const e
, const tokenInfo
*const token
,
98 const powerShellKind kind
, const char *const access
)
100 initTagEntry (e
, vStringValue (token
->string
), &(PowerShellKinds
[kind
]));
102 e
->lineNumber
= token
->lineNumber
;
103 e
->filePosition
= token
->filePosition
;
106 e
->extensionFields
.access
= access
;
107 if (vStringLength (token
->scope
) > 0)
109 int parentKind
= token
->parentKind
;
110 Assert (parentKind
>= 0);
112 e
->extensionFields
.scopeKind
= &(PowerShellKinds
[parentKind
]);
113 e
->extensionFields
.scopeName
= vStringValue (token
->scope
);
117 static void makeSimplePowerShellTag (const tokenInfo
*const token
, const powerShellKind kind
,
118 const char *const access
)
120 if (PowerShellKinds
[kind
].enabled
)
124 initPowerShellEntry (&e
, token
, kind
, access
);
129 static void makeFunctionTag (const tokenInfo
*const token
, const vString
*const arglist
,
130 const char *const access
)
132 if (PowerShellKinds
[K_FUNCTION
].enabled
)
136 initPowerShellEntry (&e
, token
, K_FUNCTION
, access
);
139 e
.extensionFields
.signature
= vStringValue (arglist
);
145 static tokenInfo
*newToken (void)
147 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
149 token
->type
= TOKEN_UNDEFINED
;
150 token
->string
= vStringNew ();
151 token
->scope
= vStringNew ();
152 token
->lineNumber
= getInputLineNumber ();
153 token
->filePosition
= getInputFilePosition ();
154 token
->parentKind
= -1;
159 static void deleteToken (tokenInfo
*const token
)
161 vStringDelete (token
->string
);
162 vStringDelete (token
->scope
);
166 static void copyToken (tokenInfo
*const dest
, const tokenInfo
*const src
,
169 dest
->lineNumber
= src
->lineNumber
;
170 dest
->filePosition
= src
->filePosition
;
171 dest
->type
= src
->type
;
172 vStringCopy (dest
->string
, src
->string
);
173 dest
->parentKind
= src
->parentKind
;
175 vStringCopy (dest
->scope
, src
->scope
);
178 static void addToScope (tokenInfo
*const token
, const vString
*const extra
)
180 if (vStringLength (token
->scope
) > 0)
181 vStringCatS (token
->scope
, SCOPE_SEPARATOR
);
182 vStringCatS (token
->scope
, vStringValue (extra
));
183 vStringTerminate (token
->scope
);
186 static boolean
isIdentChar (const int c
)
188 return (isalnum (c
) || c
== ':' || c
== '_' || c
== '-' || c
>= 0x80);
191 static int skipToCharacter (const int c
)
196 d
= getcFromInputFile ();
197 } while (d
!= EOF
&& d
!= c
);
201 static void parseString (vString
*const string
, const int delimiter
)
205 int c
= getcFromInputFile ();
207 if (c
== '\\' && (c
= getcFromInputFile ()) != EOF
)
208 vStringPut (string
, (char) c
);
209 else if (c
== EOF
|| c
== delimiter
)
212 vStringPut (string
, (char) c
);
214 vStringTerminate (string
);
217 static void parseIdentifier (vString
*const string
, const int firstChar
)
222 vStringPut (string
, (char) c
);
223 c
= getcFromInputFile ();
224 } while (isIdentChar (c
));
225 ungetcToInputFile (c
);
226 vStringTerminate (string
);
229 static boolean
isTokenFunction (vString
*const name
)
231 return (strcasecmp (vStringValue (name
), "function") == 0 ||
232 strcasecmp (vStringValue (name
), "filter") == 0);
235 static boolean
isSpace (int c
)
237 return (c
== '\t' || c
== ' ' || c
== '\v' ||
238 c
== '\n' || c
== '\r' || c
== '\f');
241 static int skipWhitespaces (int c
)
244 c
= getcFromInputFile ();
248 static int skipSingleComment (void)
253 c
= getcFromInputFile ();
256 int next
= getcFromInputFile ();
258 ungetcToInputFile (next
);
262 } while (c
!= EOF
&& c
!= '\n' && c
!= '\r');
266 static void readToken (tokenInfo
*const token
)
270 token
->type
= TOKEN_UNDEFINED
;
271 vStringClear (token
->string
);
275 c
= getcFromInputFile ();
276 c
= skipWhitespaces (c
);
278 token
->lineNumber
= getInputLineNumber ();
279 token
->filePosition
= getInputFilePosition ();
283 case EOF
: token
->type
= TOKEN_EOF
; break;
284 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
285 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
286 case ';': token
->type
= TOKEN_SEMICOLON
; break;
287 case ',': token
->type
= TOKEN_COMMA
; break;
288 case '.': token
->type
= TOKEN_PERIOD
; break;
289 case ':': token
->type
= TOKEN_COLON
; break;
290 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
291 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
292 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
293 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
294 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
298 token
->type
= TOKEN_STRING
;
299 parseString (token
->string
, c
);
300 token
->lineNumber
= getInputLineNumber ();
301 token
->filePosition
= getInputFilePosition ();
306 int d
= getcFromInputFile ();
309 /* <# ... #> multiline comment */
312 c
= skipToCharacter ('#');
315 c
= getcFromInputFile ();
319 ungetcToInputFile (c
);
326 ungetcToInputFile (d
);
327 token
->type
= TOKEN_UNDEFINED
;
332 case '#': /* comment */
333 skipSingleComment ();
343 int d
= getcFromInputFile ();
345 ungetcToInputFile (d
);
346 token
->type
= TOKEN_OPERATOR
;
350 case '$': /* variable start */
352 int d
= getcFromInputFile ();
353 if (! isIdentChar (d
))
355 ungetcToInputFile (d
);
356 token
->type
= TOKEN_UNDEFINED
;
360 parseIdentifier (token
->string
, d
);
361 token
->type
= TOKEN_VARIABLE
;
367 if (! isIdentChar (c
))
368 token
->type
= TOKEN_UNDEFINED
;
371 parseIdentifier (token
->string
, c
);
372 if (isTokenFunction (token
->string
))
373 token
->type
= TOKEN_KEYWORD
;
375 token
->type
= TOKEN_IDENTIFIER
;
381 static void enterScope (tokenInfo
*const parentToken
,
382 const vString
*const extraScope
,
383 const int parentKind
);
385 /* strip a possible PowerShell scope specification and convert it to accessType */
386 static const char *parsePowerShellScope (tokenInfo
*const token
)
388 const char *access
= ACCESS_UNDEFINED
;
389 const char *const tokenName
= vStringValue (token
->string
);
390 const char *powershellScopeEnd
;
392 powershellScopeEnd
= strchr (tokenName
, ':');
393 if (powershellScopeEnd
)
395 size_t powershellScopeLen
;
396 vString
* powershellScope
= vStringNew ();
398 powershellScopeLen
= (size_t)(powershellScopeEnd
- tokenName
);
399 /* extract the scope */
400 vStringNCopyS (powershellScope
, tokenName
, powershellScopeLen
);
401 vStringTerminate (powershellScope
);
402 /* cut the resulting scope string from the identifier */
403 memmove (token
->string
->buffer
,
404 /* +1 to skip the leading colon */
405 token
->string
->buffer
+ powershellScopeLen
+ 1,
406 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
407 token
->string
->length
+ 1 - powershellScopeLen
- 1);
408 token
->string
->length
-= powershellScopeLen
+ 1;
410 access
= findValidAccessType (vStringValue (powershellScope
));
412 vStringDelete (powershellScope
);
420 * function myfunc($foo, $bar) {}
422 static boolean
parseFunction (tokenInfo
*const token
)
424 boolean readNext
= TRUE
;
425 tokenInfo
*nameFree
= NULL
;
430 if (token
->type
!= TOKEN_IDENTIFIER
)
433 access
= parsePowerShellScope (token
);
435 nameFree
= newToken ();
436 copyToken (nameFree
, token
, TRUE
);
439 if (token
->type
== TOKEN_OPEN_PAREN
)
441 vString
*arglist
= vStringNew ();
444 vStringPut (arglist
, '(');
451 case TOKEN_OPEN_PAREN
: depth
++; break;
452 case TOKEN_CLOSE_PAREN
: depth
--; break;
458 case TOKEN_CLOSE_CURLY
: vStringPut (arglist
, '}'); break;
459 case TOKEN_CLOSE_PAREN
: vStringPut (arglist
, ')'); break;
460 case TOKEN_CLOSE_SQUARE
: vStringPut (arglist
, ']'); break;
461 case TOKEN_COLON
: vStringPut (arglist
, ':'); break;
462 case TOKEN_COMMA
: vStringCatS (arglist
, ", "); break;
463 case TOKEN_EQUAL_SIGN
: vStringCatS (arglist
, " = "); break;
464 case TOKEN_OPEN_CURLY
: vStringPut (arglist
, '{'); break;
465 case TOKEN_OPEN_PAREN
: vStringPut (arglist
, '('); break;
466 case TOKEN_OPEN_SQUARE
: vStringPut (arglist
, '['); break;
467 case TOKEN_PERIOD
: vStringPut (arglist
, '.'); break;
468 case TOKEN_SEMICOLON
: vStringPut (arglist
, ';'); break;
469 case TOKEN_STRING
: vStringCatS (arglist
, "'...'"); break;
471 case TOKEN_IDENTIFIER
:
475 switch (vStringLast (arglist
))
483 /* no need for a space between those and the identifier */
487 vStringPut (arglist
, ' ');
490 if (token
->type
== TOKEN_VARIABLE
)
491 vStringPut (arglist
, '$');
492 vStringCat (arglist
, token
->string
);
499 while (token
->type
!= TOKEN_EOF
&& depth
> 0);
501 vStringTerminate (arglist
);
503 makeFunctionTag (nameFree
, arglist
, access
);
504 vStringDelete (arglist
);
508 else if (token
->type
== TOKEN_OPEN_CURLY
)
509 { /* filters doesn't need to have an arglist */
510 makeFunctionTag (nameFree
, NULL
, access
);
513 if (token
->type
== TOKEN_OPEN_CURLY
)
514 enterScope (token
, nameFree
->string
, K_FUNCTION
);
519 deleteToken (nameFree
);
524 /* parses declarations of the form
527 static boolean
parseVariable (tokenInfo
*const token
)
530 boolean readNext
= TRUE
;
534 copyToken (name
, token
, TRUE
);
537 if (token
->type
== TOKEN_EQUAL_SIGN
)
539 if (token
->parentKind
!= K_FUNCTION
)
540 { /* ignore local variables (i.e. within a function) */
541 access
= parsePowerShellScope (name
);
542 makeSimplePowerShellTag (name
, K_VARIABLE
, access
);
554 static void enterScope (tokenInfo
*const parentToken
,
555 const vString
*const extraScope
,
556 const int parentKind
)
558 tokenInfo
*token
= newToken ();
559 int origParentKind
= parentToken
->parentKind
;
561 copyToken (token
, parentToken
, TRUE
);
565 addToScope (token
, extraScope
);
566 token
->parentKind
= parentKind
;
570 while (token
->type
!= TOKEN_EOF
&&
571 token
->type
!= TOKEN_CLOSE_CURLY
)
573 boolean readNext
= TRUE
;
577 case TOKEN_OPEN_CURLY
:
578 enterScope (token
, NULL
, -1);
582 readNext
= parseFunction (token
);
586 readNext
= parseVariable (token
);
596 copyToken (parentToken
, token
, FALSE
);
597 parentToken
->parentKind
= origParentKind
;
601 static void findPowerShellTags (void)
603 tokenInfo
*const token
= newToken ();
607 enterScope (token
, NULL
, -1);
609 while (token
->type
!= TOKEN_EOF
); /* keep going even with unmatched braces */
614 extern parserDefinition
* PowerShellParser (void)
616 static const char *const extensions
[] = { "ps1", "psm1", NULL
};
617 parserDefinition
* def
= parserNew ("PowerShell");
618 def
->kinds
= PowerShellKinds
;
619 def
->kindCount
= ARRAY_SIZE (PowerShellKinds
);
620 def
->extensions
= extensions
;
621 def
->parser
= findPowerShellTags
;
625 /* vi:set tabstop=4 shiftwidth=4: */