2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains code for generating tags for the PHP scripting
10 * The language reference: http://php.net/manual/en/langref.php
16 #include "general.h" /* must always come first */
29 #define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80)
30 #define newToken() (objPoolGet (TokenPool))
31 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
94 typedef int keywordId
; /* to allow KEYWORD_NONE */
123 #define NAMESPACE_SEPARATOR "\\"
124 static scopeSeparator PhpGenericSeparators
[] = {
125 { K_NAMESPACE
, NAMESPACE_SEPARATOR
},
126 { KIND_WILDCARD_INDEX
, "::" },
129 static kindDefinition PhpKinds
[COUNT_KIND
] = {
130 { true, 'c', "class", "classes",
131 ATTACH_SEPARATORS(PhpGenericSeparators
) },
132 { true, 'd', "define", "constant definitions",
133 ATTACH_SEPARATORS(PhpGenericSeparators
)},
134 { true, 'f', "function", "functions",
135 ATTACH_SEPARATORS(PhpGenericSeparators
)},
136 { true, 'i', "interface", "interfaces",
137 ATTACH_SEPARATORS(PhpGenericSeparators
)},
138 { false, 'l', "local", "local variables",
139 ATTACH_SEPARATORS(PhpGenericSeparators
)},
140 { true, 'n', "namespace", "namespaces",
141 ATTACH_SEPARATORS(PhpGenericSeparators
)},
142 { true, 't', "trait", "traits",
143 ATTACH_SEPARATORS(PhpGenericSeparators
)},
144 { true, 'v', "variable", "variables",
145 ATTACH_SEPARATORS(PhpGenericSeparators
)},
146 { true, 'a', "alias", "aliases",
147 ATTACH_SEPARATORS(PhpGenericSeparators
)},
150 static const keywordTable PhpKeywordTable
[] = {
151 /* keyword keyword ID */
152 { "abstract", KEYWORD_abstract
},
153 { "and", KEYWORD_and
},
154 { "as", KEYWORD_as
},
155 { "break", KEYWORD_break
},
156 { "callable", KEYWORD_callable
},
157 { "case", KEYWORD_case
},
158 { "catch", KEYWORD_catch
},
159 { "cfunction", KEYWORD_function
}, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
160 { "class", KEYWORD_class
},
161 { "clone", KEYWORD_clone
},
162 { "const", KEYWORD_const
},
163 { "continue", KEYWORD_continue
},
164 { "declare", KEYWORD_declare
},
165 { "define", KEYWORD_define
}, /* this isn't really a keyword but we handle it so it's easier this way */
166 { "default", KEYWORD_default
},
167 { "do", KEYWORD_do
},
168 { "echo", KEYWORD_echo
},
169 { "else", KEYWORD_else
},
170 { "elseif", KEYWORD_elif
},
171 { "enddeclare", KEYWORD_enddeclare
},
172 { "endfor", KEYWORD_endfor
},
173 { "endforeach", KEYWORD_endforeach
},
174 { "endif", KEYWORD_endif
},
175 { "endswitch", KEYWORD_endswitch
},
176 { "endwhile", KEYWORD_endwhile
},
177 { "extends", KEYWORD_extends
},
178 { "final", KEYWORD_final
},
179 { "finally", KEYWORD_finally
},
180 { "for", KEYWORD_for
},
181 { "foreach", KEYWORD_foreach
},
182 { "function", KEYWORD_function
},
183 { "global", KEYWORD_global
},
184 { "goto", KEYWORD_goto
},
185 { "if", KEYWORD_if
},
186 { "implements", KEYWORD_implements
},
187 { "include", KEYWORD_include
},
188 { "include_once", KEYWORD_include_once
},
189 { "instanceof", KEYWORD_instanceof
},
190 { "insteadof", KEYWORD_insteadof
},
191 { "interface", KEYWORD_interface
},
192 { "namespace", KEYWORD_namespace
},
193 { "new", KEYWORD_new
},
194 { "or", KEYWORD_or
},
195 { "print", KEYWORD_print
},
196 { "private", KEYWORD_private
},
197 { "protected", KEYWORD_protected
},
198 { "public", KEYWORD_public
},
199 { "require", KEYWORD_require
},
200 { "require_once", KEYWORD_require_once
},
201 { "return", KEYWORD_return
},
202 { "static", KEYWORD_static
},
203 { "switch", KEYWORD_switch
},
204 { "throw", KEYWORD_throw
},
205 { "trait", KEYWORD_trait
},
206 { "try", KEYWORD_try
},
207 { "use", KEYWORD_use
},
208 { "var", KEYWORD_var
},
209 { "while", KEYWORD_while
},
210 { "xor", KEYWORD_xor
},
211 { "yield", KEYWORD_yield
}
215 typedef enum eTokenType
{
245 unsigned long lineNumber
;
247 int parentKind
; /* -1 if none */
248 bool anonymous
; /* true if token specifies
249 * an anonymous class */
252 static langType Lang_php
;
253 static langType Lang_zephir
;
255 static bool InPhp
= false; /* whether we are between <? ?> */
256 /* whether the next token may be a keyword, e.g. not after "::" or "->" */
257 static bool MayBeKeyword
= true;
259 /* current statement details */
265 /* Current namespace */
266 static vString
*CurrentNamesapce
;
267 /* Cache variable to build the tag's scope. It has no real meaning outside
268 * of initPhpEntry()'s scope. */
269 static vString
*FullScope
;
270 /* The class name specified at "extends" keyword in the current class
271 * definition. Used to resolve "parent" in return type. */
272 static vString
*ParentClass
;
274 static objPool
*TokenPool
= NULL
;
276 static const char *phpScopeSeparatorFor (int kind
, int upperScopeKind
)
278 return scopeSeparatorFor (getInputLanguage(), kind
, upperScopeKind
);
281 static const char *accessToString (const accessType access
)
283 static const char *const names
[COUNT_ACCESS
] = {
290 Assert (access
< COUNT_ACCESS
);
292 return names
[access
];
295 static const char *implToString (const implType impl
)
297 static const char *const names
[COUNT_IMPL
] = {
302 Assert (impl
< COUNT_IMPL
);
307 static void initPhpEntry (tagEntryInfo
*const e
, const tokenInfo
*const token
,
308 const phpKind kind
, const accessType access
)
312 vStringClear (FullScope
);
314 if (vStringLength (CurrentNamesapce
) > 0)
316 parentKind
= K_NAMESPACE
;
317 vStringCat (FullScope
, CurrentNamesapce
);
321 initTagEntry (e
, vStringValue (token
->string
), kind
);
323 e
->lineNumber
= token
->lineNumber
;
324 e
->filePosition
= token
->filePosition
;
326 if (access
!= ACCESS_UNDEFINED
)
327 e
->extensionFields
.access
= accessToString (access
);
328 if (vStringLength (token
->scope
) > 0)
330 parentKind
= token
->parentKind
;
332 if (vStringLength (FullScope
) > 0)
336 sep
= phpScopeSeparatorFor (parentKind
,
338 vStringCatS (FullScope
, sep
);
340 vStringCat (FullScope
, token
->scope
);
342 if (vStringLength (FullScope
) > 0)
344 Assert (parentKind
>= 0);
346 e
->extensionFields
.scopeKindIndex
= parentKind
;
347 e
->extensionFields
.scopeName
= vStringValue (FullScope
);
350 if (token
->anonymous
)
351 markTagExtraBit (e
, XTAG_ANONYMOUS
);
354 static void makePhpTagEntry (tagEntryInfo
*const e
)
357 makeQualifiedTagEntry (e
);
360 static void fillTypeRefField (tagEntryInfo
*const e
,
361 const vString
*const rtype
, const tokenInfo
*const token
)
363 if ((vStringLength (rtype
) == 4)
364 && (strcmp (vStringValue (rtype
), "self") == 0)
365 && vStringLength (token
->scope
) > 0)
367 if (token
->parentKind
== -1)
368 e
->extensionFields
.typeRef
[0] = "unknown";
370 e
->extensionFields
.typeRef
[0] = PhpKinds
[token
->parentKind
].name
;
371 e
->extensionFields
.typeRef
[1] = vStringValue (token
->scope
);
373 else if ((vStringLength (rtype
) == 6)
374 && (strcmp (vStringValue (rtype
), "parent") == 0)
375 && (ParentClass
&& vStringLength (ParentClass
) > 0))
377 e
->extensionFields
.typeRef
[0] = "class";
378 e
->extensionFields
.typeRef
[1] = vStringValue (ParentClass
);
382 e
->extensionFields
.typeRef
[0] = "unknown";
383 e
->extensionFields
.typeRef
[1] = vStringValue (rtype
);
387 static void makeTypedPhpTag (const tokenInfo
*const token
, const phpKind kind
,
388 const accessType access
, vString
* typeName
)
390 if (PhpKinds
[kind
].enabled
)
394 initPhpEntry (&e
, token
, kind
, access
);
396 fillTypeRefField (&e
, typeName
, token
);
397 makePhpTagEntry (&e
);
401 static void makeSimplePhpTag (const tokenInfo
*const token
, const phpKind kind
,
402 const accessType access
)
404 makeTypedPhpTag (token
, kind
, access
, NULL
);
407 static void makeNamespacePhpTag (const tokenInfo
*const token
, const vString
*const name
)
409 if (PhpKinds
[K_NAMESPACE
].enabled
)
413 initTagEntry (&e
, vStringValue (name
), K_NAMESPACE
);
415 e
.lineNumber
= token
->lineNumber
;
416 e
.filePosition
= token
->filePosition
;
418 makePhpTagEntry (&e
);
422 static void makeClassOrIfaceTag (const phpKind kind
, const tokenInfo
*const token
,
423 vString
*const inheritance
, const implType impl
)
425 if (PhpKinds
[kind
].enabled
)
429 initPhpEntry (&e
, token
, kind
, ACCESS_UNDEFINED
);
431 if (impl
!= IMPL_UNDEFINED
)
432 e
.extensionFields
.implementation
= implToString (impl
);
433 if (vStringLength (inheritance
) > 0)
434 e
.extensionFields
.inheritance
= vStringValue (inheritance
);
436 makePhpTagEntry (&e
);
440 static void makeFunctionTag (const tokenInfo
*const token
,
441 const vString
*const arglist
,
442 const vString
*const rtype
,
443 const accessType access
, const implType impl
)
445 if (PhpKinds
[K_FUNCTION
].enabled
)
449 initPhpEntry (&e
, token
, K_FUNCTION
, access
);
451 if (impl
!= IMPL_UNDEFINED
)
452 e
.extensionFields
.implementation
= implToString (impl
);
454 e
.extensionFields
.signature
= vStringValue (arglist
);
456 fillTypeRefField (&e
, rtype
, token
);
458 makePhpTagEntry (&e
);
462 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED
)
464 tokenInfo
*token
= xMalloc (1, tokenInfo
);
466 token
->string
= vStringNew ();
467 token
->scope
= vStringNew ();
471 static void clearPoolToken (void *data
)
473 tokenInfo
*token
= data
;
475 token
->type
= TOKEN_UNDEFINED
;
476 token
->keyword
= KEYWORD_NONE
;
477 token
->lineNumber
= getInputLineNumber ();
478 token
->filePosition
= getInputFilePosition ();
479 token
->parentKind
= -1;
480 token
->anonymous
= false;
481 vStringClear (token
->string
);
482 vStringClear (token
->scope
);
485 static void deletePoolToken (void *data
)
487 tokenInfo
*token
= data
;
488 vStringDelete (token
->string
);
489 vStringDelete (token
->scope
);
493 static void copyToken (tokenInfo
*const dest
, const tokenInfo
*const src
,
496 dest
->lineNumber
= src
->lineNumber
;
497 dest
->filePosition
= src
->filePosition
;
498 dest
->type
= src
->type
;
499 dest
->keyword
= src
->keyword
;
500 vStringCopy(dest
->string
, src
->string
);
501 dest
->parentKind
= src
->parentKind
;
503 vStringCopy(dest
->scope
, src
->scope
);
504 dest
->anonymous
= src
->anonymous
;
510 static const char *tokenTypeName (const tokenType type
)
514 case TOKEN_UNDEFINED
: return "undefined";
515 case TOKEN_EOF
: return "EOF";
516 case TOKEN_CHARACTER
: return "character";
517 case TOKEN_CLOSE_PAREN
: return "')'";
518 case TOKEN_SEMICOLON
: return "';'";
519 case TOKEN_COLON
: return "':'";
520 case TOKEN_COMMA
: return "','";
521 case TOKEN_OPEN_PAREN
: return "'('";
522 case TOKEN_OPERATOR
: return "operator";
523 case TOKEN_IDENTIFIER
: return "identifier";
524 case TOKEN_KEYWORD
: return "keyword";
525 case TOKEN_STRING
: return "string";
526 case TOKEN_PERIOD
: return "'.'";
527 case TOKEN_OPEN_CURLY
: return "'{'";
528 case TOKEN_CLOSE_CURLY
: return "'}'";
529 case TOKEN_EQUAL_SIGN
: return "'='";
530 case TOKEN_OPEN_SQUARE
: return "'['";
531 case TOKEN_CLOSE_SQUARE
: return "']'";
532 case TOKEN_VARIABLE
: return "variable";
537 static void printToken (const tokenInfo
*const token
)
539 fprintf (stderr
, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token
,
540 tokenTypeName (token
->type
),
542 vStringValue (token
->scope
));
545 case TOKEN_IDENTIFIER
:
548 fprintf (stderr
, "\tcontent:\t%s\n", vStringValue (token
->string
));
553 size_t n
= ARRAY_SIZE (PhpKeywordTable
);
556 fprintf (stderr
, "\tkeyword:\t");
557 for (i
= 0; i
< n
; i
++)
559 if (PhpKeywordTable
[i
].id
== token
->keyword
)
561 fprintf (stderr
, "%s\n", PhpKeywordTable
[i
].name
);
566 fprintf (stderr
, "(unknown)\n");
574 static void addToScope (tokenInfo
*const token
, const vString
*const extra
,
575 int kindOfUpperScope
)
577 if (vStringLength (token
->scope
) > 0)
581 sep
= phpScopeSeparatorFor(token
->parentKind
,
583 vStringCatS (token
->scope
, sep
);
585 vStringCat (token
->scope
, extra
);
588 static int skipToCharacter (const int c
)
593 d
= getcFromInputFile ();
594 } while (d
!= EOF
&& d
!= c
);
598 static void parseString (vString
*const string
, const int delimiter
)
602 int c
= getcFromInputFile ();
604 if (c
== '\\' && (c
= getcFromInputFile ()) != EOF
)
605 vStringPut (string
, (char) c
);
606 else if (c
== EOF
|| c
== delimiter
)
609 vStringPut (string
, (char) c
);
613 /* Strips @indent_len characters from lines in @string to get the correct
614 * string value for an indented heredoc (PHP 7.3+).
615 * This doesn't handle invalid values specially and might yield surprising
616 * results with them, but it doesn't really matter as it's invalid anyway. */
617 static void stripHeredocIndent (vString
*const string
, size_t indent_len
)
619 char *str
= vStringValue (string
);
620 size_t str_len
= vStringLength (string
);
622 size_t new_len
= str_len
;
623 bool at_line_start
= true;
632 p_len
= str_len
- (p
- str
);
633 strip_len
= p_len
< indent_len
? p_len
: indent_len
;
634 memmove (p
, p
+ strip_len
, p_len
- strip_len
);
636 new_len
-= strip_len
;
638 /* CRLF is already normalized as LF */
639 at_line_start
= (*p
== '\r' || *p
== '\n');
642 vStringTruncate (string
, new_len
);
645 /* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<).
646 * <<<[ \t]*(ID|'ID'|"ID")
648 * [ \t]*ID[^:indent-char:];?
651 * 1) starting ID must be immediately followed by a newline;
652 * 2) closing ID is the same as opening one;
653 * 3) closing ID must not be immediately followed by an identifier character;
654 * 4) optional indentation of the closing ID is stripped from body lines,
655 * which lines must have the exact same prefix indentation.
657 * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the
658 * only thing on its line, with the only exception of a semicolon right after
661 * Example of a single valid heredoc:
665 * FOO_this is not an end
667 * # previous line was the end, but the semicolon wasn't required
669 * Another example using indentation and more code after the heredoc:
674 * # the heredoc ends at FOO, and leading tabs are stripped from the body.
675 * # ". 'hello'" is a normal concatenation operator and the string "hello".
677 static void parseHeredoc (vString
*const string
)
681 char delimiter
[64]; /* arbitrary limit, but more is crazy anyway */
686 c
= getcFromInputFile ();
688 while (c
== ' ' || c
== '\t');
690 if (c
== '\'' || c
== '"')
693 c
= getcFromInputFile ();
695 for (len
= 0; len
< ARRAY_SIZE (delimiter
) - 1; len
++)
697 if (! isIdentChar (c
))
699 delimiter
[len
] = (char) c
;
700 c
= getcFromInputFile ();
704 if (len
== 0) /* no delimiter, give up */
708 if (c
!= quote
) /* no closing quote for quoted identifier, give up */
710 c
= getcFromInputFile ();
712 if (c
!= '\r' && c
!= '\n') /* missing newline, give up */
717 c
= getcFromInputFile ();
719 vStringPut (string
, (char) c
);
720 if (c
== '\r' || c
== '\n')
722 /* new line, check for a delimiter right after. No need to handle
723 * CRLF, getcFromInputFile() normalizes it to LF already. */
724 const size_t prev_string_len
= vStringLength (string
) - 1;
725 size_t indent_len
= 0;
727 c
= getcFromInputFile ();
728 while (c
== ' ' || c
== '\t')
730 vStringPut (string
, (char) c
);
731 c
= getcFromInputFile ();
735 for (len
= 0; c
!= 0 && (c
- delimiter
[len
]) == 0; len
++)
736 c
= getcFromInputFile ();
738 if (delimiter
[len
] != 0)
739 ungetcToInputFile (c
);
740 else if (! isIdentChar (c
))
742 /* line start matched the delimiter and has a separator, we're done */
743 ungetcToInputFile (c
);
745 /* strip trailing newline and indent of the end delimiter */
746 vStringTruncate (string
, prev_string_len
);
748 /* strip indent from the value if needed */
750 stripHeredocIndent (string
, indent_len
);
753 /* if we are here it wasn't a delimiter, so put everything in the
755 vStringNCatS (string
, delimiter
, len
);
763 ungetcToInputFile (c
);
766 static void parseIdentifier (vString
*const string
, const int firstChar
)
771 vStringPut (string
, (char) c
);
772 c
= getcFromInputFile ();
773 } while (isIdentChar (c
));
774 ungetcToInputFile (c
);
777 static bool isSpace (int c
)
779 return (c
== '\t' || c
== ' ' || c
== '\v' ||
780 c
== '\n' || c
== '\r' || c
== '\f');
783 static int skipWhitespaces (int c
)
786 c
= getcFromInputFile ();
790 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
792 * This is ugly, but the whole "<script language=php>" tag is and we can't
793 * really do better without adding a lot of code only for this */
794 static bool isOpenScriptLanguagePhp (int c
)
798 /* <script[:white:]+language[:white:]*= */
800 tolower ((c
= getcFromInputFile ())) != 's' ||
801 tolower ((c
= getcFromInputFile ())) != 'c' ||
802 tolower ((c
= getcFromInputFile ())) != 'r' ||
803 tolower ((c
= getcFromInputFile ())) != 'i' ||
804 tolower ((c
= getcFromInputFile ())) != 'p' ||
805 tolower ((c
= getcFromInputFile ())) != 't' ||
806 ! isSpace ((c
= getcFromInputFile ())) ||
807 tolower ((c
= skipWhitespaces (c
))) != 'l' ||
808 tolower ((c
= getcFromInputFile ())) != 'a' ||
809 tolower ((c
= getcFromInputFile ())) != 'n' ||
810 tolower ((c
= getcFromInputFile ())) != 'g' ||
811 tolower ((c
= getcFromInputFile ())) != 'u' ||
812 tolower ((c
= getcFromInputFile ())) != 'a' ||
813 tolower ((c
= getcFromInputFile ())) != 'g' ||
814 tolower ((c
= getcFromInputFile ())) != 'e' ||
815 (c
= skipWhitespaces (getcFromInputFile ())) != '=')
818 /* (php|'php'|"php")> */
819 c
= skipWhitespaces (getcFromInputFile ());
820 if (c
== '"' || c
== '\'')
823 c
= getcFromInputFile ();
825 if (tolower (c
) != 'p' ||
826 tolower ((c
= getcFromInputFile ())) != 'h' ||
827 tolower ((c
= getcFromInputFile ())) != 'p' ||
828 (quote
!= 0 && (c
= getcFromInputFile ()) != quote
) ||
829 (c
= skipWhitespaces (getcFromInputFile ())) != '>')
835 static int findPhpStart (void)
840 if ((c
= getcFromInputFile ()) == '<')
842 c
= getcFromInputFile ();
843 /* <?, <?= and <?php, but not <?xml */
846 c
= getcFromInputFile ();
849 c
= getcFromInputFile ();
850 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
851 else if (tolower (c
) != 'x' ||
852 tolower ((c
= getcFromInputFile ())) != 'm' ||
853 tolower ((c
= getcFromInputFile ())) != 'l')
858 /* <script language="php"> */
861 ungetcToInputFile (c
);
862 if (isOpenScriptLanguagePhp ('<'))
872 static int skipSingleComment (void)
877 c
= getcFromInputFile ();
878 /* ?> in single-line comments leaves PHP mode */
881 int next
= getcFromInputFile ();
885 ungetcToInputFile (next
);
887 } while (InPhp
&& c
!= EOF
&& c
!= '\n' && c
!= '\r');
891 static void readToken (tokenInfo
*const token
)
894 bool nextMayBeKeyword
= true;
896 token
->type
= TOKEN_UNDEFINED
;
897 token
->keyword
= KEYWORD_NONE
;
898 vStringClear (token
->string
);
909 c
= getcFromInputFile ();
911 c
= skipWhitespaces (c
);
913 token
->lineNumber
= getInputLineNumber ();
914 token
->filePosition
= getInputFilePosition ();
918 case EOF
: token
->type
= TOKEN_EOF
; break;
919 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
920 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
921 case ';': token
->type
= TOKEN_SEMICOLON
; break;
922 case ',': token
->type
= TOKEN_COMMA
; break;
923 case '.': token
->type
= TOKEN_PERIOD
; break;
924 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
925 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
926 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
927 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
928 case '&': token
->type
= TOKEN_AMPERSAND
; break;
929 case '\\': token
->type
= TOKEN_BACKSLASH
; break;
933 int d
= getcFromInputFile ();
936 nextMayBeKeyword
= false;
937 token
->type
= TOKEN_OPERATOR
;
941 ungetcToInputFile (d
);
942 token
->type
= TOKEN_COLON
;
949 int d
= getcFromInputFile ();
950 if (d
== '=' || d
== '>')
951 token
->type
= TOKEN_OPERATOR
;
954 ungetcToInputFile (d
);
955 token
->type
= TOKEN_EQUAL_SIGN
;
962 token
->type
= TOKEN_STRING
;
963 parseString (token
->string
, c
);
964 token
->lineNumber
= getInputLineNumber ();
965 token
->filePosition
= getInputFilePosition ();
970 int d
= getcFromInputFile ();
973 /* </script[:white:]*> */
974 if (tolower ((d
= getcFromInputFile ())) == 's' &&
975 tolower ((d
= getcFromInputFile ())) == 'c' &&
976 tolower ((d
= getcFromInputFile ())) == 'r' &&
977 tolower ((d
= getcFromInputFile ())) == 'i' &&
978 tolower ((d
= getcFromInputFile ())) == 'p' &&
979 tolower ((d
= getcFromInputFile ())) == 't' &&
980 (d
= skipWhitespaces (getcFromInputFile ())) == '>')
987 ungetcToInputFile (d
);
988 token
->type
= TOKEN_UNDEFINED
;
991 else if (d
== '<' && (d
= getcFromInputFile ()) == '<')
993 token
->type
= TOKEN_STRING
;
994 parseHeredoc (token
->string
);
998 ungetcToInputFile (d
);
999 token
->type
= TOKEN_UNDEFINED
;
1004 case '#': /* comment */
1005 skipSingleComment ();
1014 int d
= getcFromInputFile ();
1015 if (c
== '-' && d
== '>')
1016 nextMayBeKeyword
= false;
1018 ungetcToInputFile (d
);
1019 token
->type
= TOKEN_OPERATOR
;
1023 case '/': /* division or comment start */
1025 int d
= getcFromInputFile ();
1026 if (d
== '/') /* single-line comment */
1028 skipSingleComment ();
1035 c
= skipToCharacter ('*');
1038 c
= getcFromInputFile ();
1042 ungetcToInputFile (c
);
1044 } while (c
!= EOF
&& c
!= '\0');
1050 ungetcToInputFile (d
);
1051 token
->type
= TOKEN_OPERATOR
;
1056 case '$': /* variable start */
1058 int d
= getcFromInputFile ();
1059 if (! isIdentChar (d
))
1061 ungetcToInputFile (d
);
1062 token
->type
= TOKEN_UNDEFINED
;
1066 parseIdentifier (token
->string
, d
);
1067 token
->type
= TOKEN_VARIABLE
;
1072 case '?': /* maybe the end of the PHP chunk */
1074 int d
= getcFromInputFile ();
1082 ungetcToInputFile (d
);
1083 token
->type
= TOKEN_QMARK
;
1089 if (! isIdentChar (c
))
1090 token
->type
= TOKEN_UNDEFINED
;
1093 parseIdentifier (token
->string
, c
);
1095 token
->keyword
= lookupCaseKeyword (vStringValue (token
->string
), getInputLanguage ());
1097 token
->keyword
= KEYWORD_NONE
;
1099 if (token
->keyword
== KEYWORD_NONE
)
1100 token
->type
= TOKEN_IDENTIFIER
;
1102 token
->type
= TOKEN_KEYWORD
;
1107 if (token
->type
== TOKEN_SEMICOLON
||
1108 token
->type
== TOKEN_OPEN_CURLY
||
1109 token
->type
== TOKEN_CLOSE_CURLY
)
1111 /* reset current statement details on statement end, and when entering
1113 * it is a bit ugly to do this in readToken(), but it makes everything
1115 CurrentStatement
.access
= ACCESS_UNDEFINED
;
1116 CurrentStatement
.impl
= IMPL_UNDEFINED
;
1119 MayBeKeyword
= nextMayBeKeyword
;
1122 static void readQualifiedName (tokenInfo
*const token
, vString
*name
,
1123 tokenInfo
*const lastToken
)
1125 while (token
->type
== TOKEN_IDENTIFIER
|| token
->type
== TOKEN_BACKSLASH
)
1129 if (token
->type
== TOKEN_BACKSLASH
)
1130 vStringPut (name
, '\\');
1132 vStringCat (name
, token
->string
);
1135 copyToken (lastToken
, token
, true);
1140 static void enterScope (tokenInfo
*const parentToken
,
1141 const vString
*const extraScope
,
1142 const int parentKind
);
1144 static void skipOverParens (tokenInfo
*token
)
1146 if (token
->type
== TOKEN_OPEN_PAREN
)
1153 switch (token
->type
)
1155 case TOKEN_OPEN_PAREN
: depth
++; break;
1156 case TOKEN_CLOSE_PAREN
: depth
--; break;
1160 while (token
->type
!= TOKEN_EOF
&& depth
> 0);
1166 /* parses a class or an interface:
1168 * class Foo extends Bar {}
1169 * class Foo extends Bar implements iFoo, iBar {}
1171 * interface iBar extends iFoo {}
1173 * if @name is not NULL, parses an anonymous class with name @name
1175 * new class(1, 2) {}
1176 * new class(1, 2) extends Foo implements iFoo, iBar {} */
1177 static bool parseClassOrIface (tokenInfo
*const token
, const phpKind kind
,
1178 const tokenInfo
*name
)
1180 bool readNext
= true;
1181 implType impl
= CurrentStatement
.impl
;
1182 tokenInfo
*nameFree
= NULL
;
1183 vString
*inheritance
= NULL
;
1184 vString
*parent
= NULL
;
1187 if (name
) /* anonymous class */
1189 /* skip possible construction arguments */
1190 skipOverParens (token
);
1192 else /* normal, named class */
1194 if (token
->type
!= TOKEN_IDENTIFIER
)
1197 name
= nameFree
= newToken ();
1198 copyToken (nameFree
, token
, true);
1203 inheritance
= vStringNew ();
1204 /* read every identifiers, keywords and commas, and assume each
1205 * identifier (not keyword) is an inheritance
1206 * (like in "class Foo extends Bar implements iA, iB") */
1207 enum { inheritance_initial
,
1208 inheritance_extends
,
1209 inheritance_implements
1210 } istat
= inheritance_initial
;
1211 while (token
->type
== TOKEN_IDENTIFIER
||
1212 token
->type
== TOKEN_BACKSLASH
||
1213 token
->type
== TOKEN_KEYWORD
||
1214 token
->type
== TOKEN_COMMA
)
1216 if (token
->type
== TOKEN_IDENTIFIER
|| token
->type
== TOKEN_BACKSLASH
)
1218 vString
*qualifiedName
= vStringNew ();
1220 readQualifiedName (token
, qualifiedName
, NULL
);
1221 if (vStringLength (inheritance
) > 0)
1222 vStringPut (inheritance
, ',');
1223 vStringCat (inheritance
, qualifiedName
);
1224 if (istat
== inheritance_extends
&& !parent
)
1225 parent
= qualifiedName
;
1227 vStringDelete (qualifiedName
);
1231 if (token
->type
== TOKEN_KEYWORD
)
1233 if (token
->keyword
== KEYWORD_extends
)
1234 istat
= inheritance_extends
;
1235 else if (token
->keyword
== KEYWORD_implements
)
1236 istat
= inheritance_implements
;
1242 makeClassOrIfaceTag (kind
, name
, inheritance
, impl
);
1244 if (token
->type
== TOKEN_OPEN_CURLY
)
1246 vString
*backup
= ParentClass
;
1247 ParentClass
= parent
;
1248 enterScope (token
, name
->string
, kind
);
1249 ParentClass
= backup
;
1255 deleteToken (nameFree
);
1256 vStringDelete (parent
);
1257 vStringDelete (inheritance
);
1264 static bool parseTrait (tokenInfo
*const token
)
1266 bool readNext
= true;
1270 if (token
->type
!= TOKEN_IDENTIFIER
)
1274 copyToken (name
, token
, true);
1276 makeSimplePhpTag (name
, K_TRAIT
, ACCESS_UNDEFINED
);
1279 if (token
->type
== TOKEN_OPEN_CURLY
)
1280 enterScope (token
, name
->string
, K_TRAIT
);
1291 * if @name is NULL, parses a normal function
1292 * function myfunc($foo, $bar) {}
1293 * function &myfunc($foo, $bar) {}
1294 * function myfunc($foo, $bar) : type {}
1295 * function myfunc($foo, $bar) : ?type {}
1297 * if @name is not NULL, parses an anonymous function with name @name
1298 * $foo = function($foo, $bar) {}
1299 * $foo = function&($foo, $bar) {}
1300 * $foo = function($foo, $bar) use ($x, &$y) {}
1301 * $foo = function($foo, $bar) use ($x, &$y) : type {}
1302 * $foo = function($foo, $bar) use ($x, &$y) : ?type {} */
1303 static bool parseFunction (tokenInfo
*const token
, const tokenInfo
*name
)
1305 bool readNext
= true;
1306 accessType access
= CurrentStatement
.access
;
1307 implType impl
= CurrentStatement
.impl
;
1308 tokenInfo
*nameFree
= NULL
;
1309 vString
*rtype
= NULL
;
1310 vString
*arglist
= NULL
;
1313 /* skip a possible leading ampersand (return by reference) */
1314 if (token
->type
== TOKEN_AMPERSAND
)
1319 if (token
->type
!= TOKEN_IDENTIFIER
&& token
->type
!= TOKEN_KEYWORD
)
1322 name
= nameFree
= newToken ();
1323 copyToken (nameFree
, token
, true);
1327 if (token
->type
== TOKEN_OPEN_PAREN
)
1331 arglist
= vStringNew ();
1332 vStringPut (arglist
, '(');
1337 switch (token
->type
)
1339 case TOKEN_OPEN_PAREN
: depth
++; break;
1340 case TOKEN_CLOSE_PAREN
: depth
--; break;
1344 switch (token
->type
)
1346 case TOKEN_AMPERSAND
: vStringPut (arglist
, '&'); break;
1347 case TOKEN_CLOSE_CURLY
: vStringPut (arglist
, '}'); break;
1348 case TOKEN_CLOSE_PAREN
: vStringPut (arglist
, ')'); break;
1349 case TOKEN_CLOSE_SQUARE
: vStringPut (arglist
, ']'); break;
1350 case TOKEN_COLON
: vStringPut (arglist
, ':'); break;
1351 case TOKEN_COMMA
: vStringCatS (arglist
, ", "); break;
1352 case TOKEN_EQUAL_SIGN
: vStringCatS (arglist
, " = "); break;
1353 case TOKEN_OPEN_CURLY
: vStringPut (arglist
, '{'); break;
1354 case TOKEN_OPEN_PAREN
: vStringPut (arglist
, '('); break;
1355 case TOKEN_OPEN_SQUARE
: vStringPut (arglist
, '['); break;
1356 case TOKEN_PERIOD
: vStringPut (arglist
, '.'); break;
1357 case TOKEN_SEMICOLON
: vStringPut (arglist
, ';'); break;
1358 case TOKEN_BACKSLASH
: vStringPut (arglist
, '\\'); break;
1361 vStringPut (arglist
, '\'');
1362 vStringCat (arglist
, token
->string
);
1363 vStringPut (arglist
, '\'');
1367 case TOKEN_IDENTIFIER
:
1369 case TOKEN_VARIABLE
:
1371 switch (vStringLast (arglist
))
1380 /* no need for a space between those and the identifier */
1384 vStringPut (arglist
, ' ');
1387 if (token
->type
== TOKEN_VARIABLE
)
1388 vStringPut (arglist
, '$');
1389 vStringCat (arglist
, token
->string
);
1396 while (token
->type
!= TOKEN_EOF
&& depth
> 0);
1398 readToken (token
); /* normally it's an open brace or "use" keyword */
1402 if (token
->type
== TOKEN_KEYWORD
&& token
->keyword
== KEYWORD_use
)
1405 skipOverParens (token
);
1408 /* PHP7 return type declaration or if parsing Zephir, gather function return
1409 * type hint to fill typeRef. */
1410 if ((getInputLanguage () == Lang_php
&& token
->type
== TOKEN_COLON
) ||
1411 (getInputLanguage () == Lang_zephir
&& token
->type
== TOKEN_OPERATOR
))
1414 rtype
= vStringNew ();
1417 if (token
->type
== TOKEN_QMARK
)
1420 vStringPut (rtype
, '?');
1423 readQualifiedName (token
, rtype
, NULL
);
1425 if (rtype
&& vStringIsEmpty (rtype
))
1427 vStringDelete (rtype
);
1433 makeFunctionTag (name
, arglist
, rtype
, access
, impl
);
1435 if (token
->type
== TOKEN_OPEN_CURLY
)
1436 enterScope (token
, name
->string
, K_FUNCTION
);
1440 vStringDelete (rtype
);
1441 vStringDelete (arglist
);
1443 deleteToken (nameFree
);
1448 /* parses declarations of the form
1449 * const NAME = VALUE */
1450 static bool parseConstant (tokenInfo
*const token
)
1454 readToken (token
); /* skip const keyword */
1455 if (token
->type
!= TOKEN_IDENTIFIER
&& token
->type
!= TOKEN_KEYWORD
)
1459 copyToken (name
, token
, true);
1462 if (token
->type
== TOKEN_EQUAL_SIGN
)
1463 makeSimplePhpTag (name
, K_DEFINE
, ACCESS_UNDEFINED
);
1467 return token
->type
== TOKEN_EQUAL_SIGN
;
1470 /* parses declarations of the form
1471 * define('NAME', 'VALUE')
1472 * define(NAME, 'VALUE) */
1473 static bool parseDefine (tokenInfo
*const token
)
1477 readToken (token
); /* skip "define" identifier */
1478 if (token
->type
!= TOKEN_OPEN_PAREN
)
1482 if (token
->type
== TOKEN_STRING
||
1483 token
->type
== TOKEN_IDENTIFIER
)
1485 makeSimplePhpTag (token
, K_DEFINE
, ACCESS_UNDEFINED
);
1489 /* skip until the close parenthesis.
1490 * no need to handle nested blocks since they would be invalid
1491 * in this context anyway (the VALUE may only be a scalar, like
1495 while (token
->type
!= TOKEN_EOF
&& depth
> 0)
1497 switch (token
->type
)
1499 case TOKEN_OPEN_PAREN
: depth
++; break;
1500 case TOKEN_CLOSE_PAREN
: depth
--; break;
1509 /* parses declarations of the form
1512 * use Foo\Bar\Class as FooBarClass
1513 * use function Foo\Bar\func
1514 * use function Foo\Bar\func as foobarfunc
1515 * use const Foo\Bar\CONST
1516 * use const Foo\Bar\CONST as FOOBARCONST
1518 * use Foo, Bar as Baz
1519 * use Foo as Test, Bar as Baz
1520 * use Foo\{Bar, Baz as Child, Nested\Other, Even\More as Something} */
1521 static bool parseUse (tokenInfo
*const token
)
1523 bool readNext
= false;
1524 /* we can't know the use type, because class, interface and namespaces
1525 * aliases are the same, and the only difference is the referenced name's
1527 const char *refType
= "unknown";
1528 vString
*refName
= vStringNew ();
1529 tokenInfo
*nameToken
= newToken ();
1530 bool grouped
= false;
1532 readToken (token
); /* skip use keyword itself */
1533 if (token
->type
== TOKEN_KEYWORD
&& (token
->keyword
== KEYWORD_function
||
1534 token
->keyword
== KEYWORD_const
))
1536 switch (token
->keyword
)
1538 case KEYWORD_function
: refType
= PhpKinds
[K_FUNCTION
].name
; break;
1539 case KEYWORD_const
: refType
= PhpKinds
[K_DEFINE
].name
; break;
1540 default: break; /* silence compilers */
1548 readQualifiedName (token
, refName
, nameToken
);
1549 grouped
= readNext
= (token
->type
== TOKEN_OPEN_CURLY
);
1553 size_t refNamePrefixLength
= grouped
? vStringLength (refName
) : 0;
1555 /* if it's either not the first name in a comma-separated list, or we
1556 * are in a grouped alias and need to read the leaf name */
1560 /* in case of a trailing comma (or an empty group) */
1561 if (token
->type
== TOKEN_CLOSE_CURLY
)
1563 readQualifiedName (token
, refName
, nameToken
);
1566 if (token
->type
== TOKEN_KEYWORD
&& token
->keyword
== KEYWORD_as
)
1569 copyToken (nameToken
, token
, true);
1573 if (nameToken
->type
== TOKEN_IDENTIFIER
&& PhpKinds
[K_ALIAS
].enabled
)
1577 initPhpEntry (&entry
, nameToken
, K_ALIAS
, ACCESS_UNDEFINED
);
1579 entry
.extensionFields
.typeRef
[0] = refType
;
1580 entry
.extensionFields
.typeRef
[1] = vStringValue (refName
);
1582 makePhpTagEntry (&entry
);
1585 vStringTruncate (refName
, refNamePrefixLength
);
1589 while (token
->type
== TOKEN_COMMA
);
1591 if (grouped
&& token
->type
== TOKEN_CLOSE_CURLY
)
1594 vStringDelete (refName
);
1595 deleteToken (nameToken
);
1597 return (token
->type
== TOKEN_SEMICOLON
);
1600 /* parses declarations of the form
1603 static bool parseVariable (tokenInfo
*const token
, vString
* typeName
)
1606 bool readNext
= true;
1607 accessType access
= CurrentStatement
.access
;
1610 copyToken (name
, token
, true);
1613 if (token
->type
== TOKEN_EQUAL_SIGN
)
1615 phpKind kind
= K_VARIABLE
;
1617 if (token
->parentKind
== K_FUNCTION
)
1618 kind
= K_LOCAL_VARIABLE
;
1621 if (token
->type
== TOKEN_KEYWORD
&&
1622 token
->keyword
== KEYWORD_function
&&
1623 PhpKinds
[kind
].enabled
)
1625 if (parseFunction (token
, name
))
1627 readNext
= (bool) (token
->type
== TOKEN_SEMICOLON
);
1631 makeSimplePhpTag (name
, kind
, access
);
1635 else if (token
->type
== TOKEN_SEMICOLON
)
1637 /* generate tags for variable declarations in classes
1641 * but don't get fooled by stuff like $foo = $bar; */
1642 if (token
->parentKind
== K_CLASS
||
1643 token
->parentKind
== K_INTERFACE
||
1644 token
->parentKind
== K_TRAIT
)
1645 makeTypedPhpTag (name
, K_VARIABLE
, access
, typeName
);
1655 /* parses namespace declarations
1657 * namespace Foo\Bar {}
1659 * namespace Foo\Bar;
1662 static bool parseNamespace (tokenInfo
*const token
)
1664 tokenInfo
*nsToken
= newToken ();
1666 vStringClear (CurrentNamesapce
);
1667 copyToken (nsToken
, token
, false);
1672 if (token
->type
== TOKEN_IDENTIFIER
)
1674 if (vStringLength (CurrentNamesapce
) > 0)
1678 sep
= phpScopeSeparatorFor(K_NAMESPACE
,
1680 vStringCatS (CurrentNamesapce
, sep
);
1682 vStringCat (CurrentNamesapce
, token
->string
);
1685 while (token
->type
!= TOKEN_EOF
&&
1686 token
->type
!= TOKEN_SEMICOLON
&&
1687 token
->type
!= TOKEN_OPEN_CURLY
);
1689 if (vStringLength (CurrentNamesapce
) > 0)
1690 makeNamespacePhpTag (nsToken
, CurrentNamesapce
);
1692 if (token
->type
== TOKEN_OPEN_CURLY
)
1693 enterScope (token
, NULL
, -1);
1695 deleteToken (nsToken
);
1700 static void enterScope (tokenInfo
*const parentToken
,
1701 const vString
*const extraScope
,
1702 const int parentKind
)
1704 tokenInfo
*token
= newToken ();
1705 vString
*typeName
= vStringNew ();
1706 int origParentKind
= parentToken
->parentKind
;
1708 copyToken (token
, parentToken
, true);
1712 token
->parentKind
= parentKind
;
1713 addToScope (token
, extraScope
, origParentKind
);
1717 while (token
->type
!= TOKEN_EOF
&&
1718 token
->type
!= TOKEN_CLOSE_CURLY
)
1720 bool readNext
= true;
1722 switch (token
->type
)
1724 case TOKEN_OPEN_CURLY
:
1725 enterScope (token
, NULL
, -1);
1729 switch (token
->keyword
)
1731 /* handle anonymous classes */
1734 if (token
->keyword
!= KEYWORD_class
)
1738 tokenInfo
*name
= newToken ();
1740 copyToken (name
, token
, true);
1741 anonGenerate (name
->string
, "AnonymousClass", K_CLASS
);
1742 name
->anonymous
= true;
1743 readNext
= parseClassOrIface (token
, K_CLASS
, name
);
1748 case KEYWORD_class
: readNext
= parseClassOrIface (token
, K_CLASS
, NULL
); break;
1749 case KEYWORD_interface
: readNext
= parseClassOrIface (token
, K_INTERFACE
, NULL
); break;
1750 case KEYWORD_trait
: readNext
= parseTrait (token
); break;
1751 case KEYWORD_function
: readNext
= parseFunction (token
, NULL
); break;
1752 case KEYWORD_const
: readNext
= parseConstant (token
); break;
1753 case KEYWORD_define
: readNext
= parseDefine (token
); break;
1756 /* aliases are only allowed at root scope, but the keyword
1757 * is also used to i.e. "import" traits into a class */
1758 if (vStringLength (token
->scope
) == 0)
1759 readNext
= parseUse (token
);
1762 case KEYWORD_namespace
: readNext
= parseNamespace (token
); break;
1764 case KEYWORD_private
: CurrentStatement
.access
= ACCESS_PRIVATE
; break;
1765 case KEYWORD_protected
: CurrentStatement
.access
= ACCESS_PROTECTED
; break;
1766 case KEYWORD_public
: CurrentStatement
.access
= ACCESS_PUBLIC
; break;
1767 case KEYWORD_var
: CurrentStatement
.access
= ACCESS_PUBLIC
; break;
1769 case KEYWORD_abstract
: CurrentStatement
.impl
= IMPL_ABSTRACT
; break;
1776 vStringClear (typeName
);
1777 vStringPut (typeName
, '?');
1780 case TOKEN_IDENTIFIER
:
1781 vStringCat (typeName
, token
->string
);
1784 case TOKEN_VARIABLE
:
1785 readNext
= parseVariable (token
,
1786 vStringIsEmpty(typeName
)
1789 vStringClear (typeName
);
1799 copyToken (parentToken
, token
, false);
1800 parentToken
->parentKind
= origParentKind
;
1801 vStringDelete (typeName
);
1802 deleteToken (token
);
1805 static void findTags (bool startsInPhpMode
)
1807 tokenInfo
*const token
= newToken ();
1809 InPhp
= startsInPhpMode
;
1810 MayBeKeyword
= true;
1811 CurrentStatement
.access
= ACCESS_UNDEFINED
;
1812 CurrentStatement
.impl
= IMPL_UNDEFINED
;
1813 CurrentNamesapce
= vStringNew ();
1814 FullScope
= vStringNew ();
1815 Assert (ParentClass
== NULL
);
1819 enterScope (token
, NULL
, -1);
1821 while (token
->type
!= TOKEN_EOF
); /* keep going even with unmatched braces */
1823 vStringDelete (FullScope
);
1824 vStringDelete (CurrentNamesapce
);
1825 deleteToken (token
);
1828 static void findPhpTags (void)
1833 static void findZephirTags (void)
1838 static void initializePool (void)
1840 if (TokenPool
== NULL
)
1841 TokenPool
= objPoolNew (16, newPoolToken
, deletePoolToken
, clearPoolToken
, NULL
);
1844 static void initializePhpParser (const langType language
)
1846 Lang_php
= language
;
1850 static void initializeZephirParser (const langType language
)
1852 Lang_zephir
= language
;
1856 static void finalize (langType language CTAGS_ATTR_UNUSED
, bool initialized
)
1861 if (TokenPool
!= NULL
)
1863 objPoolDelete (TokenPool
);
1868 extern parserDefinition
* PhpParser (void)
1870 static const char *const extensions
[] = { "php", "php3", "php4", "php5", "php7", "phtml", NULL
};
1871 parserDefinition
* def
= parserNew ("PHP");
1872 def
->kindTable
= PhpKinds
;
1873 def
->kindCount
= ARRAY_SIZE (PhpKinds
);
1874 def
->extensions
= extensions
;
1875 def
->parser
= findPhpTags
;
1876 def
->initialize
= initializePhpParser
;
1877 def
->finalize
= finalize
;
1878 def
->keywordTable
= PhpKeywordTable
;
1879 def
->keywordCount
= ARRAY_SIZE (PhpKeywordTable
);
1883 extern parserDefinition
* ZephirParser (void)
1885 static const char *const extensions
[] = { "zep", NULL
};
1886 parserDefinition
* def
= parserNew ("Zephir");
1887 def
->kindTable
= PhpKinds
;
1888 def
->kindCount
= ARRAY_SIZE (PhpKinds
);
1889 def
->extensions
= extensions
;
1890 def
->parser
= findZephirTags
;
1891 def
->initialize
= initializeZephirParser
;
1892 def
->finalize
= finalize
;
1893 def
->keywordTable
= PhpKeywordTable
;
1894 def
->keywordCount
= ARRAY_SIZE (PhpKeywordTable
);