manual: added documentation about replacement of 'untitled.ext' with filename (#1804)
[geany-mirror.git] / ctags / parsers / php.c
blobb29ed4fefa4e0554018127e41ff2bf5394a738e7
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "parse.h"
16 #include "read.h"
17 #include "vstring.h"
18 #include "keyword.h"
19 #include "entry.h"
20 #include "routines.h"
21 #include "debug.h"
24 #define SCOPE_SEPARATOR "::"
27 enum {
28 KEYWORD_abstract,
29 KEYWORD_and,
30 KEYWORD_as,
31 KEYWORD_break,
32 KEYWORD_callable,
33 KEYWORD_case,
34 KEYWORD_catch,
35 KEYWORD_class,
36 KEYWORD_clone,
37 KEYWORD_const,
38 KEYWORD_continue,
39 KEYWORD_declare,
40 KEYWORD_define,
41 KEYWORD_default,
42 KEYWORD_do,
43 KEYWORD_echo,
44 KEYWORD_else,
45 KEYWORD_elif,
46 KEYWORD_enddeclare,
47 KEYWORD_endfor,
48 KEYWORD_endforeach,
49 KEYWORD_endif,
50 KEYWORD_endswitch,
51 KEYWORD_endwhile,
52 KEYWORD_extends,
53 KEYWORD_final,
54 KEYWORD_finally,
55 KEYWORD_for,
56 KEYWORD_foreach,
57 KEYWORD_function,
58 KEYWORD_global,
59 KEYWORD_goto,
60 KEYWORD_if,
61 KEYWORD_implements,
62 KEYWORD_include,
63 KEYWORD_include_once,
64 KEYWORD_instanceof,
65 KEYWORD_insteadof,
66 KEYWORD_interface,
67 KEYWORD_namespace,
68 KEYWORD_new,
69 KEYWORD_or,
70 KEYWORD_print,
71 KEYWORD_private,
72 KEYWORD_protected,
73 KEYWORD_public,
74 KEYWORD_require,
75 KEYWORD_require_once,
76 KEYWORD_return,
77 KEYWORD_static,
78 KEYWORD_switch,
79 KEYWORD_throw,
80 KEYWORD_trait,
81 KEYWORD_try,
82 KEYWORD_use,
83 KEYWORD_var,
84 KEYWORD_while,
85 KEYWORD_xor,
86 KEYWORD_yield
88 typedef int keywordId; /* to allow KEYWORD_NONE */
90 typedef enum {
91 ACCESS_UNDEFINED,
92 ACCESS_PRIVATE,
93 ACCESS_PROTECTED,
94 ACCESS_PUBLIC,
95 COUNT_ACCESS
96 } accessType;
98 typedef enum {
99 IMPL_UNDEFINED,
100 IMPL_ABSTRACT,
101 COUNT_IMPL
102 } implType;
104 typedef enum {
105 K_CLASS,
106 K_DEFINE,
107 K_FUNCTION,
108 K_INTERFACE,
109 K_LOCAL_VARIABLE,
110 K_NAMESPACE,
111 K_TRAIT,
112 K_VARIABLE,
113 COUNT_KIND
114 } phpKind;
116 static kindOption PhpKinds[COUNT_KIND] = {
117 { true, 'c', "class", "classes" },
118 { true, 'd', "define", "constant definitions" },
119 { true, 'f', "function", "functions" },
120 { true, 'i', "interface", "interfaces" },
121 { false, 'l', "local", "local variables" },
122 { true, 'n', "namespace", "namespaces" },
123 { true, 't', "trait", "traits" },
124 { true, 'v', "variable", "variables" }
127 static const keywordTable PhpKeywordTable[] = {
128 /* keyword keyword ID */
129 { "abstract", KEYWORD_abstract },
130 { "and", KEYWORD_and },
131 { "as", KEYWORD_as },
132 { "break", KEYWORD_break },
133 { "callable", KEYWORD_callable },
134 { "case", KEYWORD_case },
135 { "catch", KEYWORD_catch },
136 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
137 { "class", KEYWORD_class },
138 { "clone", KEYWORD_clone },
139 { "const", KEYWORD_const },
140 { "continue", KEYWORD_continue },
141 { "declare", KEYWORD_declare },
142 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
143 { "default", KEYWORD_default },
144 { "do", KEYWORD_do },
145 { "echo", KEYWORD_echo },
146 { "else", KEYWORD_else },
147 { "elseif", KEYWORD_elif },
148 { "enddeclare", KEYWORD_enddeclare },
149 { "endfor", KEYWORD_endfor },
150 { "endforeach", KEYWORD_endforeach },
151 { "endif", KEYWORD_endif },
152 { "endswitch", KEYWORD_endswitch },
153 { "endwhile", KEYWORD_endwhile },
154 { "extends", KEYWORD_extends },
155 { "final", KEYWORD_final },
156 { "finally", KEYWORD_finally },
157 { "for", KEYWORD_for },
158 { "foreach", KEYWORD_foreach },
159 { "function", KEYWORD_function },
160 { "global", KEYWORD_global },
161 { "goto", KEYWORD_goto },
162 { "if", KEYWORD_if },
163 { "implements", KEYWORD_implements },
164 { "include", KEYWORD_include },
165 { "include_once", KEYWORD_include_once },
166 { "instanceof", KEYWORD_instanceof },
167 { "insteadof", KEYWORD_insteadof },
168 { "interface", KEYWORD_interface },
169 { "namespace", KEYWORD_namespace },
170 { "new", KEYWORD_new },
171 { "or", KEYWORD_or },
172 { "print", KEYWORD_print },
173 { "private", KEYWORD_private },
174 { "protected", KEYWORD_protected },
175 { "public", KEYWORD_public },
176 { "require", KEYWORD_require },
177 { "require_once", KEYWORD_require_once },
178 { "return", KEYWORD_return },
179 { "static", KEYWORD_static },
180 { "switch", KEYWORD_switch },
181 { "throw", KEYWORD_throw },
182 { "trait", KEYWORD_trait },
183 { "try", KEYWORD_try },
184 { "use", KEYWORD_use },
185 { "var", KEYWORD_var },
186 { "while", KEYWORD_while },
187 { "xor", KEYWORD_xor },
188 { "yield", KEYWORD_yield }
192 typedef enum eTokenType {
193 TOKEN_UNDEFINED,
194 TOKEN_EOF,
195 TOKEN_CHARACTER,
196 TOKEN_CLOSE_PAREN,
197 TOKEN_SEMICOLON,
198 TOKEN_COLON,
199 TOKEN_COMMA,
200 TOKEN_KEYWORD,
201 TOKEN_OPEN_PAREN,
202 TOKEN_OPERATOR,
203 TOKEN_IDENTIFIER,
204 TOKEN_STRING,
205 TOKEN_PERIOD,
206 TOKEN_OPEN_CURLY,
207 TOKEN_CLOSE_CURLY,
208 TOKEN_EQUAL_SIGN,
209 TOKEN_OPEN_SQUARE,
210 TOKEN_CLOSE_SQUARE,
211 TOKEN_VARIABLE,
212 TOKEN_AMPERSAND
213 } tokenType;
215 typedef struct {
216 tokenType type;
217 keywordId keyword;
218 vString * string;
219 vString * scope;
220 unsigned long lineNumber;
221 MIOPos filePosition;
222 int parentKind; /* -1 if none */
223 } tokenInfo;
225 static langType Lang_php;
226 static langType Lang_zephir;
228 static bool InPhp = false; /* whether we are between <? ?> */
230 /* current statement details */
231 static struct {
232 accessType access;
233 implType impl;
234 } CurrentStatement;
236 /* Current namespace */
237 static vString *CurrentNamespace;
240 static const char *accessToString (const accessType access)
242 static const char *const names[COUNT_ACCESS] = {
243 "undefined",
244 "private",
245 "protected",
246 "public"
249 Assert (access < COUNT_ACCESS);
251 return names[access];
254 static const char *implToString (const implType impl)
256 static const char *const names[COUNT_IMPL] = {
257 "undefined",
258 "abstract"
261 Assert (impl < COUNT_IMPL);
263 return names[impl];
266 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
267 const phpKind kind, const accessType access)
269 static vString *fullScope = NULL;
270 int parentKind = -1;
272 if (fullScope == NULL)
273 fullScope = vStringNew ();
274 else
275 vStringClear (fullScope);
277 if (vStringLength (CurrentNamespace) > 0)
279 vStringCopy (fullScope, CurrentNamespace);
280 parentKind = K_NAMESPACE;
283 initTagEntry (e, vStringValue (token->string), &(PhpKinds[kind]));
285 e->lineNumber = token->lineNumber;
286 e->filePosition = token->filePosition;
288 if (access != ACCESS_UNDEFINED)
289 e->extensionFields.access = accessToString (access);
290 if (vStringLength (token->scope) > 0)
292 parentKind = token->parentKind;
293 if (vStringLength (fullScope) > 0)
294 vStringCatS (fullScope, SCOPE_SEPARATOR);
295 vStringCat (fullScope, token->scope);
297 if (vStringLength (fullScope) > 0)
299 Assert (parentKind >= 0);
301 e->extensionFields.scopeKind = &(PhpKinds[parentKind]);
302 e->extensionFields.scopeName = vStringValue (fullScope);
306 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
307 const accessType access)
309 if (PhpKinds[kind].enabled)
311 tagEntryInfo e;
313 initPhpEntry (&e, token, kind, access);
314 makeTagEntry (&e);
318 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
320 if (PhpKinds[K_NAMESPACE].enabled)
322 tagEntryInfo e;
324 initTagEntry (&e, vStringValue (name), &(PhpKinds[K_NAMESPACE]));
326 e.lineNumber = token->lineNumber;
327 e.filePosition = token->filePosition;
329 makeTagEntry (&e);
333 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
334 vString *const inheritance, const implType impl)
336 if (PhpKinds[kind].enabled)
338 tagEntryInfo e;
340 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
342 if (impl != IMPL_UNDEFINED)
343 e.extensionFields.implementation = implToString (impl);
344 if (vStringLength (inheritance) > 0)
345 e.extensionFields.inheritance = vStringValue (inheritance);
347 makeTagEntry (&e);
351 static void makeFunctionTag (const tokenInfo *const token,
352 const vString *const arglist,
353 const accessType access, const implType impl)
355 if (PhpKinds[K_FUNCTION].enabled)
357 tagEntryInfo e;
359 initPhpEntry (&e, token, K_FUNCTION, access);
361 if (impl != IMPL_UNDEFINED)
362 e.extensionFields.implementation = implToString (impl);
363 if (arglist)
364 e.extensionFields.signature = vStringValue (arglist);
366 makeTagEntry (&e);
370 static tokenInfo *newToken (void)
372 tokenInfo *const token = xMalloc (1, tokenInfo);
374 token->type = TOKEN_UNDEFINED;
375 token->keyword = KEYWORD_NONE;
376 token->string = vStringNew ();
377 token->scope = vStringNew ();
378 token->lineNumber = getInputLineNumber ();
379 token->filePosition = getInputFilePosition ();
380 token->parentKind = -1;
382 return token;
385 static void deleteToken (tokenInfo *const token)
387 vStringDelete (token->string);
388 vStringDelete (token->scope);
389 eFree (token);
392 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
393 bool scope)
395 dest->lineNumber = src->lineNumber;
396 dest->filePosition = src->filePosition;
397 dest->type = src->type;
398 dest->keyword = src->keyword;
399 vStringCopy(dest->string, src->string);
400 dest->parentKind = src->parentKind;
401 if (scope)
402 vStringCopy(dest->scope, src->scope);
405 #if 0
406 #include <stdio.h>
408 static const char *tokenTypeName (const tokenType type)
410 switch (type)
412 case TOKEN_UNDEFINED: return "undefined";
413 case TOKEN_EOF: return "EOF";
414 case TOKEN_CHARACTER: return "character";
415 case TOKEN_CLOSE_PAREN: return "')'";
416 case TOKEN_SEMICOLON: return "';'";
417 case TOKEN_COLON: return "':'";
418 case TOKEN_COMMA: return "','";
419 case TOKEN_OPEN_PAREN: return "'('";
420 case TOKEN_OPERATOR: return "operator";
421 case TOKEN_IDENTIFIER: return "identifier";
422 case TOKEN_KEYWORD: return "keyword";
423 case TOKEN_STRING: return "string";
424 case TOKEN_PERIOD: return "'.'";
425 case TOKEN_OPEN_CURLY: return "'{'";
426 case TOKEN_CLOSE_CURLY: return "'}'";
427 case TOKEN_EQUAL_SIGN: return "'='";
428 case TOKEN_OPEN_SQUARE: return "'['";
429 case TOKEN_CLOSE_SQUARE: return "']'";
430 case TOKEN_VARIABLE: return "variable";
432 return NULL;
435 static void printToken (const tokenInfo *const token)
437 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
438 tokenTypeName (token->type),
439 token->lineNumber,
440 vStringValue (token->scope));
441 switch (token->type)
443 case TOKEN_IDENTIFIER:
444 case TOKEN_STRING:
445 case TOKEN_VARIABLE:
446 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
447 break;
449 case TOKEN_KEYWORD:
451 size_t n = ARRAY_SIZE (PhpKeywordTable);
452 size_t i;
454 fprintf (stderr, "\tkeyword:\t");
455 for (i = 0; i < n; i++)
457 if (PhpKeywordTable[i].id == token->keyword)
459 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
460 break;
463 if (i >= n)
464 fprintf (stderr, "(unknown)\n");
467 default: break;
470 #endif
472 static void addToScope (tokenInfo *const token, const vString *const extra)
474 if (vStringLength (token->scope) > 0)
475 vStringCatS (token->scope, SCOPE_SEPARATOR);
476 vStringCatS (token->scope, vStringValue (extra));
479 static bool isIdentChar (const int c)
481 return (isalnum (c) || c == '_' || c >= 0x80);
484 static void parseString (vString *const string, const int delimiter)
486 while (true)
488 int c = getcFromInputFile ();
490 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
491 vStringPut (string, (char) c);
492 else if (c == EOF || c == delimiter)
493 break;
494 else
495 vStringPut (string, (char) c);
499 /* reads an HereDoc or a NowDoc (the part after the <<<).
500 * <<<[ \t]*(ID|'ID'|"ID")
501 * ...
502 * ID;?
504 * note that:
505 * 1) starting ID must be immediately followed by a newline;
506 * 2) closing ID is the same as opening one;
507 * 3) closing ID must be immediately followed by a newline or a semicolon
508 * then a newline.
510 * Example of a *single* valid heredoc:
511 * <<< FOO
512 * something
513 * something else
514 * FOO this is not an end
515 * FOO; this isn't either
516 * FOO; # neither this is
517 * FOO;
518 * # previous line was the end, but the semicolon wasn't required
520 static void parseHeredoc (vString *const string)
522 int c;
523 unsigned int len;
524 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
525 int quote = 0;
529 c = getcFromInputFile ();
531 while (c == ' ' || c == '\t');
533 if (c == '\'' || c == '"')
535 quote = c;
536 c = getcFromInputFile ();
538 for (len = 0; len < ARRAY_SIZE (delimiter) - 1; len++)
540 if (! isIdentChar (c))
541 break;
542 delimiter[len] = (char) c;
543 c = getcFromInputFile ();
545 delimiter[len] = 0;
547 if (len == 0) /* no delimiter, give up */
548 goto error;
549 if (quote)
551 if (c != quote) /* no closing quote for quoted identifier, give up */
552 goto error;
553 c = getcFromInputFile ();
555 if (c != '\r' && c != '\n') /* missing newline, give up */
556 goto error;
560 c = getcFromInputFile ();
562 if (c != '\r' && c != '\n')
563 vStringPut (string, (char) c);
564 else
566 /* new line, check for a delimiter right after */
567 int nl = c;
568 int extra = EOF;
570 c = getcFromInputFile ();
571 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
572 c = getcFromInputFile ();
574 if (delimiter[len] != 0)
575 ungetcToInputFile (c);
576 else
578 /* line start matched the delimiter, now check whether there
579 * is anything after it */
580 if (c == '\r' || c == '\n')
582 ungetcToInputFile (c);
583 break;
585 else if (c == ';')
587 int d = getcFromInputFile ();
588 if (d == '\r' || d == '\n')
590 /* put back the semicolon since it's not part of the
591 * string. we can't put back the newline, but it's a
592 * whitespace character nobody cares about it anyway */
593 ungetcToInputFile (';');
594 break;
596 else
598 /* put semicolon in the string and continue */
599 extra = ';';
600 ungetcToInputFile (d);
604 /* if we are here it wasn't a delimiter, so put everything in the
605 * string */
606 vStringPut (string, (char) nl);
607 vStringNCatS (string, delimiter, len);
608 if (extra != EOF)
609 vStringPut (string, (char) extra);
612 while (c != EOF);
614 return;
616 error:
617 ungetcToInputFile (c);
620 static void parseIdentifier (vString *const string, const int firstChar)
622 int c = firstChar;
625 vStringPut (string, (char) c);
626 c = getcFromInputFile ();
627 } while (isIdentChar (c));
628 ungetcToInputFile (c);
631 static keywordId analyzeToken (vString *const name, langType language)
633 vString *keyword = vStringNew ();
634 keywordId result;
635 vStringCopyToLower (keyword, name);
636 result = lookupKeyword (vStringValue (keyword), language);
637 vStringDelete (keyword);
638 return result;
641 static bool isSpace (int c)
643 return (c == '\t' || c == ' ' || c == '\v' ||
644 c == '\n' || c == '\r' || c == '\f');
647 static int skipWhitespaces (int c)
649 while (isSpace (c))
650 c = getcFromInputFile ();
651 return c;
654 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
656 * This is ugly, but the whole "<script language=php>" tag is and we can't
657 * really do better without adding a lot of code only for this */
658 static bool isOpenScriptLanguagePhp (int c)
660 int quote = 0;
662 /* <script[:white:]+language[:white:]*= */
663 if (c != '<' ||
664 tolower ((c = getcFromInputFile ())) != 's' ||
665 tolower ((c = getcFromInputFile ())) != 'c' ||
666 tolower ((c = getcFromInputFile ())) != 'r' ||
667 tolower ((c = getcFromInputFile ())) != 'i' ||
668 tolower ((c = getcFromInputFile ())) != 'p' ||
669 tolower ((c = getcFromInputFile ())) != 't' ||
670 ! isSpace ((c = getcFromInputFile ())) ||
671 tolower ((c = skipWhitespaces (c))) != 'l' ||
672 tolower ((c = getcFromInputFile ())) != 'a' ||
673 tolower ((c = getcFromInputFile ())) != 'n' ||
674 tolower ((c = getcFromInputFile ())) != 'g' ||
675 tolower ((c = getcFromInputFile ())) != 'u' ||
676 tolower ((c = getcFromInputFile ())) != 'a' ||
677 tolower ((c = getcFromInputFile ())) != 'g' ||
678 tolower ((c = getcFromInputFile ())) != 'e' ||
679 (c = skipWhitespaces (getcFromInputFile ())) != '=')
680 return false;
682 /* (php|'php'|"php")> */
683 c = skipWhitespaces (getcFromInputFile ());
684 if (c == '"' || c == '\'')
686 quote = c;
687 c = getcFromInputFile ();
689 if (tolower (c) != 'p' ||
690 tolower ((c = getcFromInputFile ())) != 'h' ||
691 tolower ((c = getcFromInputFile ())) != 'p' ||
692 (quote != 0 && (c = getcFromInputFile ()) != quote) ||
693 (c = skipWhitespaces (getcFromInputFile ())) != '>')
694 return false;
696 return true;
699 static int findPhpStart (void)
701 int c;
704 if ((c = getcFromInputFile ()) == '<')
706 c = getcFromInputFile ();
707 /* <? and <?php, but not <?xml */
708 if (c == '?')
710 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
711 if (tolower ((c = getcFromInputFile ())) != 'x' ||
712 tolower ((c = getcFromInputFile ())) != 'm' ||
713 tolower ((c = getcFromInputFile ())) != 'l')
715 break;
718 /* <script language="php"> */
719 else
721 ungetcToInputFile (c);
722 if (isOpenScriptLanguagePhp ('<'))
723 break;
727 while (c != EOF);
729 return c;
732 static int skipSingleComment (void)
734 int c;
737 c = getcFromInputFile ();
738 if (c == '\r')
740 int next = getcFromInputFile ();
741 if (next != '\n')
742 ungetcToInputFile (next);
743 else
744 c = next;
746 /* ?> in single-line comments leaves PHP mode */
747 else if (c == '?')
749 int next = getcFromInputFile ();
750 if (next == '>')
751 InPhp = false;
752 else
753 ungetcToInputFile (next);
755 } while (InPhp && c != EOF && c != '\n' && c != '\r');
756 return c;
759 static void readToken (tokenInfo *const token)
761 int c;
763 token->type = TOKEN_UNDEFINED;
764 token->keyword = KEYWORD_NONE;
765 vStringClear (token->string);
767 getNextChar:
769 if (! InPhp)
771 c = findPhpStart ();
772 if (c != EOF)
773 InPhp = true;
775 else
776 c = getcFromInputFile ();
778 c = skipWhitespaces (c);
780 token->lineNumber = getInputLineNumber ();
781 token->filePosition = getInputFilePosition ();
783 switch (c)
785 case EOF: token->type = TOKEN_EOF; break;
786 case '(': token->type = TOKEN_OPEN_PAREN; break;
787 case ')': token->type = TOKEN_CLOSE_PAREN; break;
788 case ';': token->type = TOKEN_SEMICOLON; break;
789 case ',': token->type = TOKEN_COMMA; break;
790 case '.': token->type = TOKEN_PERIOD; break;
791 case ':': token->type = TOKEN_COLON; break;
792 case '{': token->type = TOKEN_OPEN_CURLY; break;
793 case '}': token->type = TOKEN_CLOSE_CURLY; break;
794 case '[': token->type = TOKEN_OPEN_SQUARE; break;
795 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
796 case '&': token->type = TOKEN_AMPERSAND; break;
798 case '=':
800 int d = getcFromInputFile ();
801 if (d == '=' || d == '>')
802 token->type = TOKEN_OPERATOR;
803 else
805 ungetcToInputFile (d);
806 token->type = TOKEN_EQUAL_SIGN;
808 break;
811 case '\'':
812 case '"':
813 token->type = TOKEN_STRING;
814 parseString (token->string, c);
815 token->lineNumber = getInputLineNumber ();
816 token->filePosition = getInputFilePosition ();
817 break;
819 case '<':
821 int d = getcFromInputFile ();
822 if (d == '/')
824 /* </script[:white:]*> */
825 if (tolower ((d = getcFromInputFile ())) == 's' &&
826 tolower ((d = getcFromInputFile ())) == 'c' &&
827 tolower ((d = getcFromInputFile ())) == 'r' &&
828 tolower ((d = getcFromInputFile ())) == 'i' &&
829 tolower ((d = getcFromInputFile ())) == 'p' &&
830 tolower ((d = getcFromInputFile ())) == 't' &&
831 (d = skipWhitespaces (getcFromInputFile ())) == '>')
833 InPhp = false;
834 goto getNextChar;
836 else
838 ungetcToInputFile (d);
839 token->type = TOKEN_UNDEFINED;
842 else if (d == '<' && (d = getcFromInputFile ()) == '<')
844 token->type = TOKEN_STRING;
845 parseHeredoc (token->string);
847 else
849 ungetcToInputFile (d);
850 token->type = TOKEN_UNDEFINED;
852 break;
855 case '#': /* comment */
856 skipSingleComment ();
857 goto getNextChar;
858 break;
860 case '+':
861 case '-':
862 case '*':
863 case '%':
865 int d = getcFromInputFile ();
866 if (d != '=')
867 ungetcToInputFile (d);
868 token->type = TOKEN_OPERATOR;
869 break;
872 case '/': /* division or comment start */
874 int d = getcFromInputFile ();
875 if (d == '/') /* single-line comment */
877 skipSingleComment ();
878 goto getNextChar;
880 else if (d == '*')
884 c = skipToCharacterInInputFile ('*');
885 if (c != EOF)
887 c = getcFromInputFile ();
888 if (c == '/')
889 break;
890 else
891 ungetcToInputFile (c);
893 } while (c != EOF && c != '\0');
894 goto getNextChar;
896 else
898 if (d != '=')
899 ungetcToInputFile (d);
900 token->type = TOKEN_OPERATOR;
902 break;
905 case '$': /* variable start */
907 int d = getcFromInputFile ();
908 if (! isIdentChar (d))
910 ungetcToInputFile (d);
911 token->type = TOKEN_UNDEFINED;
913 else
915 parseIdentifier (token->string, d);
916 token->type = TOKEN_VARIABLE;
918 break;
921 case '?': /* maybe the end of the PHP chunk */
923 int d = getcFromInputFile ();
924 if (d == '>')
926 InPhp = false;
927 goto getNextChar;
929 else
931 ungetcToInputFile (d);
932 token->type = TOKEN_UNDEFINED;
934 break;
937 default:
938 if (! isIdentChar (c))
939 token->type = TOKEN_UNDEFINED;
940 else
942 parseIdentifier (token->string, c);
943 token->keyword = analyzeToken (token->string, getSourceLanguage ());
944 if (token->keyword == KEYWORD_NONE)
945 token->type = TOKEN_IDENTIFIER;
946 else
947 token->type = TOKEN_KEYWORD;
949 break;
952 if (token->type == TOKEN_SEMICOLON ||
953 token->type == TOKEN_OPEN_CURLY ||
954 token->type == TOKEN_CLOSE_CURLY)
956 /* reset current statement details on statement end, and when entering
957 * a deeper scope.
958 * it is a bit ugly to do this in readToken(), but it makes everything
959 * a lot simpler. */
960 CurrentStatement.access = ACCESS_UNDEFINED;
961 CurrentStatement.impl = IMPL_UNDEFINED;
965 static void enterScope (tokenInfo *const parentToken,
966 const vString *const extraScope,
967 const int parentKind);
969 /* parses a class or an interface:
970 * class Foo {}
971 * class Foo extends Bar {}
972 * class Foo extends Bar implements iFoo, iBar {}
973 * interface iFoo {}
974 * interface iBar extends iFoo {} */
975 static bool parseClassOrIface (tokenInfo *const token, const phpKind kind)
977 bool readNext = true;
978 implType impl = CurrentStatement.impl;
979 tokenInfo *name;
980 vString *inheritance = NULL;
982 readToken (token);
983 if (token->type != TOKEN_IDENTIFIER)
984 return false;
986 name = newToken ();
987 copyToken (name, token, true);
989 inheritance = vStringNew ();
990 /* skip until the open bracket and assume every identifier (not keyword)
991 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
994 readToken (token);
996 if (token->type == TOKEN_IDENTIFIER)
998 if (vStringLength (inheritance) > 0)
999 vStringPut (inheritance, ',');
1000 vStringCat (inheritance, token->string);
1003 while (token->type != TOKEN_EOF &&
1004 token->type != TOKEN_OPEN_CURLY);
1006 makeClassOrIfaceTag (kind, name, inheritance, impl);
1008 if (token->type == TOKEN_OPEN_CURLY)
1009 enterScope (token, name->string, K_CLASS);
1010 else
1011 readNext = false;
1013 deleteToken (name);
1014 vStringDelete (inheritance);
1016 return readNext;
1019 /* parses a trait:
1020 * trait Foo {} */
1021 static bool parseTrait (tokenInfo *const token)
1023 bool readNext = true;
1024 tokenInfo *name;
1026 readToken (token);
1027 if (token->type != TOKEN_IDENTIFIER)
1028 return false;
1030 name = newToken ();
1031 copyToken (name, token, true);
1033 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1035 readToken (token);
1036 if (token->type == TOKEN_OPEN_CURLY)
1037 enterScope (token, name->string, K_TRAIT);
1038 else
1039 readNext = false;
1041 deleteToken (name);
1043 return readNext;
1046 /* parse a function
1048 * if @name is NULL, parses a normal function
1049 * function myfunc($foo, $bar) {}
1050 * function &myfunc($foo, $bar) {}
1052 * if @name is not NULL, parses an anonymous function with name @name
1053 * $foo = function($foo, $bar) {}
1054 * $foo = function&($foo, $bar) {}
1055 * $foo = function($foo, $bar) use ($x, &$y) {} */
1056 static bool parseFunction (tokenInfo *const token, const tokenInfo *name)
1058 bool readNext = true;
1059 accessType access = CurrentStatement.access;
1060 implType impl = CurrentStatement.impl;
1061 tokenInfo *nameFree = NULL;
1063 readToken (token);
1064 /* skip a possible leading ampersand (return by reference) */
1065 if (token->type == TOKEN_AMPERSAND)
1066 readToken (token);
1068 if (! name)
1070 if (token->type != TOKEN_IDENTIFIER)
1071 return false;
1073 name = nameFree = newToken ();
1074 copyToken (nameFree, token, true);
1075 readToken (token);
1078 if (token->type == TOKEN_OPEN_PAREN)
1080 vString *arglist = vStringNew ();
1081 int depth = 1;
1083 vStringPut (arglist, '(');
1086 readToken (token);
1088 switch (token->type)
1090 case TOKEN_OPEN_PAREN: depth++; break;
1091 case TOKEN_CLOSE_PAREN: depth--; break;
1092 default: break;
1094 /* display part */
1095 switch (token->type)
1097 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1098 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1099 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1100 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1101 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1102 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1103 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1104 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1105 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1106 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1107 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1108 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1109 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1111 case TOKEN_IDENTIFIER:
1112 case TOKEN_KEYWORD:
1113 case TOKEN_VARIABLE:
1115 switch (vStringLast (arglist))
1117 case 0:
1118 case ' ':
1119 case '{':
1120 case '(':
1121 case '[':
1122 case '.':
1123 /* no need for a space between those and the identifier */
1124 break;
1126 default:
1127 vStringPut (arglist, ' ');
1128 break;
1130 if (token->type == TOKEN_VARIABLE)
1131 vStringPut (arglist, '$');
1132 vStringCat (arglist, token->string);
1133 break;
1136 default: break;
1139 while (token->type != TOKEN_EOF && depth > 0);
1141 makeFunctionTag (name, arglist, access, impl);
1142 vStringDelete (arglist);
1144 readToken (token); /* normally it's an open brace or "use" keyword */
1147 /* if parsing Zephir, skip function return type hint */
1148 if (getSourceLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)
1151 readToken (token);
1152 while (token->type != TOKEN_EOF &&
1153 token->type != TOKEN_OPEN_CURLY &&
1154 token->type != TOKEN_CLOSE_CURLY &&
1155 token->type != TOKEN_SEMICOLON);
1158 /* skip use(...) */
1159 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1161 readToken (token);
1162 if (token->type == TOKEN_OPEN_PAREN)
1164 int depth = 1;
1168 readToken (token);
1169 switch (token->type)
1171 case TOKEN_OPEN_PAREN: depth++; break;
1172 case TOKEN_CLOSE_PAREN: depth--; break;
1173 default: break;
1176 while (token->type != TOKEN_EOF && depth > 0);
1178 readToken (token);
1182 if (token->type == TOKEN_OPEN_CURLY)
1183 enterScope (token, name->string, K_FUNCTION);
1184 else
1185 readNext = false;
1187 if (nameFree)
1188 deleteToken (nameFree);
1190 return readNext;
1193 /* parses declarations of the form
1194 * const NAME = VALUE */
1195 static bool parseConstant (tokenInfo *const token)
1197 tokenInfo *name;
1199 readToken (token); /* skip const keyword */
1200 if (token->type != TOKEN_IDENTIFIER)
1201 return false;
1203 name = newToken ();
1204 copyToken (name, token, true);
1206 readToken (token);
1207 if (token->type == TOKEN_EQUAL_SIGN)
1208 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1210 deleteToken (name);
1212 return token->type == TOKEN_EQUAL_SIGN;
1215 /* parses declarations of the form
1216 * define('NAME', 'VALUE')
1217 * define(NAME, 'VALUE) */
1218 static bool parseDefine (tokenInfo *const token)
1220 int depth = 1;
1222 readToken (token); /* skip "define" identifier */
1223 if (token->type != TOKEN_OPEN_PAREN)
1224 return false;
1226 readToken (token);
1227 if (token->type == TOKEN_STRING ||
1228 token->type == TOKEN_IDENTIFIER)
1230 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1231 readToken (token);
1234 /* skip until the close parenthesis.
1235 * no need to handle nested blocks since they would be invalid
1236 * in this context anyway (the VALUE may only be a scalar, like
1237 * 42
1238 * (42)
1239 * and alike) */
1240 while (token->type != TOKEN_EOF && depth > 0)
1242 switch (token->type)
1244 case TOKEN_OPEN_PAREN: depth++; break;
1245 case TOKEN_CLOSE_PAREN: depth--; break;
1246 default: break;
1248 readToken (token);
1251 return false;
1254 /* parses declarations of the form
1255 * $var = VALUE
1256 * $var; */
1257 static bool parseVariable (tokenInfo *const token)
1259 tokenInfo *name;
1260 bool readNext = true;
1261 accessType access = CurrentStatement.access;
1263 name = newToken ();
1264 copyToken (name, token, true);
1266 readToken (token);
1267 if (token->type == TOKEN_EQUAL_SIGN)
1269 phpKind kind = K_VARIABLE;
1271 if (token->parentKind == K_FUNCTION)
1272 kind = K_LOCAL_VARIABLE;
1274 readToken (token);
1275 if (token->type == TOKEN_KEYWORD &&
1276 token->keyword == KEYWORD_function &&
1277 PhpKinds[kind].enabled)
1279 if (parseFunction (token, name))
1280 readToken (token);
1281 readNext = (bool) (token->type == TOKEN_SEMICOLON);
1283 else
1285 makeSimplePhpTag (name, kind, access);
1286 readNext = false;
1289 else if (token->type == TOKEN_SEMICOLON)
1291 /* generate tags for variable declarations in classes
1292 * class Foo {
1293 * protected $foo;
1295 * but don't get fooled by stuff like $foo = $bar; */
1296 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1297 makeSimplePhpTag (name, K_VARIABLE, access);
1299 else
1300 readNext = false;
1302 deleteToken (name);
1304 return readNext;
1307 /* parses namespace declarations
1308 * namespace Foo {}
1309 * namespace Foo\Bar {}
1310 * namespace Foo;
1311 * namespace Foo\Bar;
1312 * namespace;
1313 * napespace {} */
1314 static bool parseNamespace (tokenInfo *const token)
1316 tokenInfo *nsToken = newToken ();
1318 vStringClear (CurrentNamespace);
1319 copyToken (nsToken, token, false);
1323 readToken (token);
1324 if (token->type == TOKEN_IDENTIFIER)
1326 if (vStringLength (CurrentNamespace) > 0)
1327 vStringPut (CurrentNamespace, '\\');
1328 vStringCat (CurrentNamespace, token->string);
1331 while (token->type != TOKEN_EOF &&
1332 token->type != TOKEN_SEMICOLON &&
1333 token->type != TOKEN_OPEN_CURLY);
1335 if (vStringLength (CurrentNamespace) > 0)
1336 makeNamespacePhpTag (nsToken, CurrentNamespace);
1338 if (token->type == TOKEN_OPEN_CURLY)
1339 enterScope (token, NULL, -1);
1341 deleteToken (nsToken);
1343 return true;
1346 static void enterScope (tokenInfo *const parentToken,
1347 const vString *const extraScope,
1348 const int parentKind)
1350 tokenInfo *token = newToken ();
1351 int origParentKind = parentToken->parentKind;
1353 copyToken (token, parentToken, true);
1355 if (extraScope)
1357 addToScope (token, extraScope);
1358 token->parentKind = parentKind;
1361 readToken (token);
1362 while (token->type != TOKEN_EOF &&
1363 token->type != TOKEN_CLOSE_CURLY)
1365 bool readNext = true;
1367 switch (token->type)
1369 case TOKEN_OPEN_CURLY:
1370 enterScope (token, NULL, -1);
1371 break;
1373 case TOKEN_KEYWORD:
1374 switch (token->keyword)
1376 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1377 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1378 case KEYWORD_trait: readNext = parseTrait (token); break;
1379 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1380 case KEYWORD_const: readNext = parseConstant (token); break;
1381 case KEYWORD_define: readNext = parseDefine (token); break;
1383 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1385 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1386 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1387 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1388 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1390 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1392 default: break;
1394 break;
1396 case TOKEN_VARIABLE:
1397 readNext = parseVariable (token);
1398 break;
1400 default: break;
1403 if (readNext)
1404 readToken (token);
1407 copyToken (parentToken, token, false);
1408 parentToken->parentKind = origParentKind;
1409 deleteToken (token);
1412 static void findTags (void)
1414 tokenInfo *const token = newToken ();
1416 CurrentStatement.access = ACCESS_UNDEFINED;
1417 CurrentStatement.impl = IMPL_UNDEFINED;
1418 CurrentNamespace = vStringNew ();
1422 enterScope (token, NULL, -1);
1424 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1426 vStringDelete (CurrentNamespace);
1427 deleteToken (token);
1430 static void findPhpTags (void)
1432 InPhp = false;
1433 findTags ();
1436 static void findZephirTags (void)
1438 InPhp = true;
1439 findTags ();
1442 static void initializePhpParser (const langType language)
1444 Lang_php = language;
1447 static void initializeZephirParser (const langType language)
1449 Lang_zephir = language;
1452 extern parserDefinition* PhpParser (void)
1454 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1455 parserDefinition* def = parserNew ("PHP");
1456 def->kinds = PhpKinds;
1457 def->kindCount = ARRAY_SIZE (PhpKinds);
1458 def->extensions = extensions;
1459 def->parser = findPhpTags;
1460 def->initialize = initializePhpParser;
1461 def->keywordTable = PhpKeywordTable;
1462 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1463 return def;
1466 extern parserDefinition* ZephirParser (void)
1468 static const char *const extensions [] = { "zep", NULL };
1469 parserDefinition* def = parserNew ("Zephir");
1470 def->kinds = PhpKinds;
1471 def->kindCount = ARRAY_SIZE (PhpKinds);
1472 def->extensions = extensions;
1473 def->parser = findZephirTags;
1474 def->initialize = initializeZephirParser;
1475 def->keywordTable = PhpKeywordTable;
1476 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1477 return def;