Make parser includes closer to uctags and sync parser license header
[geany-mirror.git] / ctags / parsers / php.c
blob107bdfe4c0bab37051b9eaea30390c0146887c8a
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "main.h"
16 #include "parse.h"
17 #include "read.h"
18 #include "vstring.h"
19 #include "keyword.h"
20 #include "entry.h"
21 #include "routines.h"
22 #include "debug.h"
25 #define SCOPE_SEPARATOR "::"
28 typedef enum {
29 KEYWORD_NONE = -1,
30 KEYWORD_abstract,
31 KEYWORD_and,
32 KEYWORD_as,
33 KEYWORD_break,
34 KEYWORD_callable,
35 KEYWORD_case,
36 KEYWORD_catch,
37 KEYWORD_class,
38 KEYWORD_clone,
39 KEYWORD_const,
40 KEYWORD_continue,
41 KEYWORD_declare,
42 KEYWORD_define,
43 KEYWORD_default,
44 KEYWORD_do,
45 KEYWORD_echo,
46 KEYWORD_else,
47 KEYWORD_elif,
48 KEYWORD_enddeclare,
49 KEYWORD_endfor,
50 KEYWORD_endforeach,
51 KEYWORD_endif,
52 KEYWORD_endswitch,
53 KEYWORD_endwhile,
54 KEYWORD_extends,
55 KEYWORD_final,
56 KEYWORD_finally,
57 KEYWORD_for,
58 KEYWORD_foreach,
59 KEYWORD_function,
60 KEYWORD_global,
61 KEYWORD_goto,
62 KEYWORD_if,
63 KEYWORD_implements,
64 KEYWORD_include,
65 KEYWORD_include_once,
66 KEYWORD_instanceof,
67 KEYWORD_insteadof,
68 KEYWORD_interface,
69 KEYWORD_namespace,
70 KEYWORD_new,
71 KEYWORD_or,
72 KEYWORD_print,
73 KEYWORD_private,
74 KEYWORD_protected,
75 KEYWORD_public,
76 KEYWORD_require,
77 KEYWORD_require_once,
78 KEYWORD_return,
79 KEYWORD_static,
80 KEYWORD_switch,
81 KEYWORD_throw,
82 KEYWORD_trait,
83 KEYWORD_try,
84 KEYWORD_use,
85 KEYWORD_var,
86 KEYWORD_while,
87 KEYWORD_xor,
88 KEYWORD_yield
89 } keywordId;
91 typedef enum {
92 ACCESS_UNDEFINED,
93 ACCESS_PRIVATE,
94 ACCESS_PROTECTED,
95 ACCESS_PUBLIC,
96 COUNT_ACCESS
97 } accessType;
99 typedef enum {
100 IMPL_UNDEFINED,
101 IMPL_ABSTRACT,
102 COUNT_IMPL
103 } implType;
105 typedef enum {
106 K_CLASS,
107 K_DEFINE,
108 K_FUNCTION,
109 K_INTERFACE,
110 K_LOCAL_VARIABLE,
111 K_NAMESPACE,
112 K_TRAIT,
113 K_VARIABLE,
114 COUNT_KIND
115 } phpKind;
117 static kindOption PhpKinds[COUNT_KIND] = {
118 { TRUE, 'c', "class", "classes" },
119 { TRUE, 'd', "define", "constant definitions" },
120 { TRUE, 'f', "function", "functions" },
121 { TRUE, 'i', "interface", "interfaces" },
122 { FALSE, 'l', "local", "local variables" },
123 { TRUE, 'n', "namespace", "namespaces" },
124 { TRUE, 't', "trait", "traits" },
125 { TRUE, 'v', "variable", "variables" }
128 typedef struct {
129 const char *name;
130 keywordId id;
131 } keywordDesc;
133 static const keywordDesc PhpKeywordTable[] = {
134 /* keyword keyword ID */
135 { "abstract", KEYWORD_abstract },
136 { "and", KEYWORD_and },
137 { "as", KEYWORD_as },
138 { "break", KEYWORD_break },
139 { "callable", KEYWORD_callable },
140 { "case", KEYWORD_case },
141 { "catch", KEYWORD_catch },
142 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
143 { "class", KEYWORD_class },
144 { "clone", KEYWORD_clone },
145 { "const", KEYWORD_const },
146 { "continue", KEYWORD_continue },
147 { "declare", KEYWORD_declare },
148 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
149 { "default", KEYWORD_default },
150 { "do", KEYWORD_do },
151 { "echo", KEYWORD_echo },
152 { "else", KEYWORD_else },
153 { "elseif", KEYWORD_elif },
154 { "enddeclare", KEYWORD_enddeclare },
155 { "endfor", KEYWORD_endfor },
156 { "endforeach", KEYWORD_endforeach },
157 { "endif", KEYWORD_endif },
158 { "endswitch", KEYWORD_endswitch },
159 { "endwhile", KEYWORD_endwhile },
160 { "extends", KEYWORD_extends },
161 { "final", KEYWORD_final },
162 { "finally", KEYWORD_finally },
163 { "for", KEYWORD_for },
164 { "foreach", KEYWORD_foreach },
165 { "function", KEYWORD_function },
166 { "global", KEYWORD_global },
167 { "goto", KEYWORD_goto },
168 { "if", KEYWORD_if },
169 { "implements", KEYWORD_implements },
170 { "include", KEYWORD_include },
171 { "include_once", KEYWORD_include_once },
172 { "instanceof", KEYWORD_instanceof },
173 { "insteadof", KEYWORD_insteadof },
174 { "interface", KEYWORD_interface },
175 { "namespace", KEYWORD_namespace },
176 { "new", KEYWORD_new },
177 { "or", KEYWORD_or },
178 { "print", KEYWORD_print },
179 { "private", KEYWORD_private },
180 { "protected", KEYWORD_protected },
181 { "public", KEYWORD_public },
182 { "require", KEYWORD_require },
183 { "require_once", KEYWORD_require_once },
184 { "return", KEYWORD_return },
185 { "static", KEYWORD_static },
186 { "switch", KEYWORD_switch },
187 { "throw", KEYWORD_throw },
188 { "trait", KEYWORD_trait },
189 { "try", KEYWORD_try },
190 { "use", KEYWORD_use },
191 { "var", KEYWORD_var },
192 { "while", KEYWORD_while },
193 { "xor", KEYWORD_xor },
194 { "yield", KEYWORD_yield }
198 typedef enum eTokenType {
199 TOKEN_UNDEFINED,
200 TOKEN_EOF,
201 TOKEN_CHARACTER,
202 TOKEN_CLOSE_PAREN,
203 TOKEN_SEMICOLON,
204 TOKEN_COLON,
205 TOKEN_COMMA,
206 TOKEN_KEYWORD,
207 TOKEN_OPEN_PAREN,
208 TOKEN_OPERATOR,
209 TOKEN_IDENTIFIER,
210 TOKEN_STRING,
211 TOKEN_PERIOD,
212 TOKEN_OPEN_CURLY,
213 TOKEN_CLOSE_CURLY,
214 TOKEN_EQUAL_SIGN,
215 TOKEN_OPEN_SQUARE,
216 TOKEN_CLOSE_SQUARE,
217 TOKEN_VARIABLE,
218 TOKEN_AMPERSAND
219 } tokenType;
221 typedef struct {
222 tokenType type;
223 keywordId keyword;
224 vString * string;
225 vString * scope;
226 unsigned long lineNumber;
227 MIOPos filePosition;
228 int parentKind; /* -1 if none */
229 } tokenInfo;
231 static langType Lang_php;
232 static langType Lang_zephir;
234 static boolean InPhp = FALSE; /* whether we are between <? ?> */
236 /* current statement details */
237 static struct {
238 accessType access;
239 implType impl;
240 } CurrentStatement;
242 /* Current namespace */
243 static vString *CurrentNamespace;
246 static void buildPhpKeywordHash (const langType language)
248 const size_t count = sizeof (PhpKeywordTable) / sizeof (PhpKeywordTable[0]);
249 size_t i;
250 for (i = 0; i < count ; i++)
252 const keywordDesc* const p = &PhpKeywordTable[i];
253 addKeyword (p->name, language, (int) p->id);
257 static const char *accessToString (const accessType access)
259 static const char *const names[COUNT_ACCESS] = {
260 "undefined",
261 "private",
262 "protected",
263 "public"
266 Assert (access < COUNT_ACCESS);
268 return names[access];
271 static const char *implToString (const implType impl)
273 static const char *const names[COUNT_IMPL] = {
274 "undefined",
275 "abstract"
278 Assert (impl < COUNT_IMPL);
280 return names[impl];
283 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
284 const phpKind kind, const accessType access)
286 static vString *fullScope = NULL;
287 int parentKind = -1;
289 if (fullScope == NULL)
290 fullScope = vStringNew ();
291 else
292 vStringClear (fullScope);
294 if (vStringLength (CurrentNamespace) > 0)
296 vStringCopy (fullScope, CurrentNamespace);
297 parentKind = K_NAMESPACE;
300 initTagEntry (e, vStringValue (token->string));
302 e->lineNumber = token->lineNumber;
303 e->filePosition = token->filePosition;
304 e->kindName = PhpKinds[kind].name;
305 e->kind = (char) PhpKinds[kind].letter;
307 if (access != ACCESS_UNDEFINED)
308 e->extensionFields.access = accessToString (access);
309 if (vStringLength (token->scope) > 0)
311 parentKind = token->parentKind;
312 if (vStringLength (fullScope) > 0)
313 vStringCatS (fullScope, SCOPE_SEPARATOR);
314 vStringCat (fullScope, token->scope);
316 if (vStringLength (fullScope) > 0)
318 Assert (parentKind >= 0);
320 vStringTerminate (fullScope);
321 e->extensionFields.scope[0] = PhpKinds[parentKind].name;
322 e->extensionFields.scope[1] = vStringValue (fullScope);
326 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
327 const accessType access)
329 if (PhpKinds[kind].enabled)
331 tagEntryInfo e;
333 initPhpEntry (&e, token, kind, access);
334 makeTagEntry (&e);
338 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
340 if (PhpKinds[K_NAMESPACE].enabled)
342 tagEntryInfo e;
344 initTagEntry (&e, vStringValue (name));
346 e.lineNumber = token->lineNumber;
347 e.filePosition = token->filePosition;
348 e.kindName = PhpKinds[K_NAMESPACE].name;
349 e.kind = (char) PhpKinds[K_NAMESPACE].letter;
351 makeTagEntry (&e);
355 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
356 vString *const inheritance, const implType impl)
358 if (PhpKinds[kind].enabled)
360 tagEntryInfo e;
362 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
364 if (impl != IMPL_UNDEFINED)
365 e.extensionFields.implementation = implToString (impl);
366 if (vStringLength (inheritance) > 0)
367 e.extensionFields.inheritance = vStringValue (inheritance);
369 makeTagEntry (&e);
373 static void makeFunctionTag (const tokenInfo *const token,
374 const vString *const arglist,
375 const accessType access, const implType impl)
377 if (PhpKinds[K_FUNCTION].enabled)
379 tagEntryInfo e;
381 initPhpEntry (&e, token, K_FUNCTION, access);
383 if (impl != IMPL_UNDEFINED)
384 e.extensionFields.implementation = implToString (impl);
385 if (arglist)
386 e.extensionFields.signature = vStringValue (arglist);
388 makeTagEntry (&e);
392 static tokenInfo *newToken (void)
394 tokenInfo *const token = xMalloc (1, tokenInfo);
396 token->type = TOKEN_UNDEFINED;
397 token->keyword = KEYWORD_NONE;
398 token->string = vStringNew ();
399 token->scope = vStringNew ();
400 token->lineNumber = getSourceLineNumber ();
401 token->filePosition = getInputFilePosition ();
402 token->parentKind = -1;
404 return token;
407 static void deleteToken (tokenInfo *const token)
409 vStringDelete (token->string);
410 vStringDelete (token->scope);
411 eFree (token);
414 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
415 boolean scope)
417 dest->lineNumber = src->lineNumber;
418 dest->filePosition = src->filePosition;
419 dest->type = src->type;
420 dest->keyword = src->keyword;
421 vStringCopy(dest->string, src->string);
422 dest->parentKind = src->parentKind;
423 if (scope)
424 vStringCopy(dest->scope, src->scope);
427 #if 0
428 #include <stdio.h>
430 static const char *tokenTypeName (const tokenType type)
432 switch (type)
434 case TOKEN_UNDEFINED: return "undefined";
435 case TOKEN_EOF: return "EOF";
436 case TOKEN_CHARACTER: return "character";
437 case TOKEN_CLOSE_PAREN: return "')'";
438 case TOKEN_SEMICOLON: return "';'";
439 case TOKEN_COLON: return "':'";
440 case TOKEN_COMMA: return "','";
441 case TOKEN_OPEN_PAREN: return "'('";
442 case TOKEN_OPERATOR: return "operator";
443 case TOKEN_IDENTIFIER: return "identifier";
444 case TOKEN_KEYWORD: return "keyword";
445 case TOKEN_STRING: return "string";
446 case TOKEN_PERIOD: return "'.'";
447 case TOKEN_OPEN_CURLY: return "'{'";
448 case TOKEN_CLOSE_CURLY: return "'}'";
449 case TOKEN_EQUAL_SIGN: return "'='";
450 case TOKEN_OPEN_SQUARE: return "'['";
451 case TOKEN_CLOSE_SQUARE: return "']'";
452 case TOKEN_VARIABLE: return "variable";
454 return NULL;
457 static void printToken (const tokenInfo *const token)
459 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
460 tokenTypeName (token->type),
461 token->lineNumber,
462 vStringValue (token->scope));
463 switch (token->type)
465 case TOKEN_IDENTIFIER:
466 case TOKEN_STRING:
467 case TOKEN_VARIABLE:
468 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
469 break;
471 case TOKEN_KEYWORD:
473 size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
474 size_t i;
476 fprintf (stderr, "\tkeyword:\t");
477 for (i = 0; i < n; i++)
479 if (PhpKeywordTable[i].id == token->keyword)
481 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
482 break;
485 if (i >= n)
486 fprintf (stderr, "(unknown)\n");
489 default: break;
492 #endif
494 static void addToScope (tokenInfo *const token, const vString *const extra)
496 if (vStringLength (token->scope) > 0)
497 vStringCatS (token->scope, SCOPE_SEPARATOR);
498 vStringCatS (token->scope, vStringValue (extra));
499 vStringTerminate(token->scope);
502 static boolean isIdentChar (const int c)
504 return (isalnum (c) || c == '_' || c >= 0x80);
507 static int skipToCharacter (const int c)
509 int d;
512 d = fileGetc ();
513 } while (d != EOF && d != c);
514 return d;
517 static void parseString (vString *const string, const int delimiter)
519 while (TRUE)
521 int c = fileGetc ();
523 if (c == '\\' && (c = fileGetc ()) != EOF)
524 vStringPut (string, (char) c);
525 else if (c == EOF || c == delimiter)
526 break;
527 else
528 vStringPut (string, (char) c);
530 vStringTerminate (string);
533 /* reads an HereDoc or a NowDoc (the part after the <<<).
534 * <<<[ \t]*(ID|'ID'|"ID")
535 * ...
536 * ID;?
538 * note that:
539 * 1) starting ID must be immediately followed by a newline;
540 * 2) closing ID is the same as opening one;
541 * 3) closing ID must be immediately followed by a newline or a semicolon
542 * then a newline.
544 * Example of a *single* valid heredoc:
545 * <<< FOO
546 * something
547 * something else
548 * FOO this is not an end
549 * FOO; this isn't either
550 * FOO; # neither this is
551 * FOO;
552 * # previous line was the end, but the semicolon wasn't required
554 static void parseHeredoc (vString *const string)
556 int c;
557 unsigned int len;
558 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
559 int quote = 0;
563 c = fileGetc ();
565 while (c == ' ' || c == '\t');
567 if (c == '\'' || c == '"')
569 quote = c;
570 c = fileGetc ();
572 for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
574 if (! isIdentChar (c))
575 break;
576 delimiter[len] = (char) c;
577 c = fileGetc ();
579 delimiter[len] = 0;
581 if (len == 0) /* no delimiter, give up */
582 goto error;
583 if (quote)
585 if (c != quote) /* no closing quote for quoted identifier, give up */
586 goto error;
587 c = fileGetc ();
589 if (c != '\r' && c != '\n') /* missing newline, give up */
590 goto error;
594 c = fileGetc ();
596 if (c != '\r' && c != '\n')
597 vStringPut (string, (char) c);
598 else
600 /* new line, check for a delimiter right after */
601 int nl = c;
602 int extra = EOF;
604 c = fileGetc ();
605 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
606 c = fileGetc ();
608 if (delimiter[len] != 0)
609 fileUngetc (c);
610 else
612 /* line start matched the delimiter, now check whether there
613 * is anything after it */
614 if (c == '\r' || c == '\n')
616 fileUngetc (c);
617 break;
619 else if (c == ';')
621 int d = fileGetc ();
622 if (d == '\r' || d == '\n')
624 /* put back the semicolon since it's not part of the
625 * string. we can't put back the newline, but it's a
626 * whitespace character nobody cares about it anyway */
627 fileUngetc (';');
628 break;
630 else
632 /* put semicolon in the string and continue */
633 extra = ';';
634 fileUngetc (d);
638 /* if we are here it wasn't a delimiter, so put everything in the
639 * string */
640 vStringPut (string, (char) nl);
641 vStringNCatS (string, delimiter, len);
642 if (extra != EOF)
643 vStringPut (string, (char) extra);
646 while (c != EOF);
648 vStringTerminate (string);
650 return;
652 error:
653 fileUngetc (c);
656 static void parseIdentifier (vString *const string, const int firstChar)
658 int c = firstChar;
661 vStringPut (string, (char) c);
662 c = fileGetc ();
663 } while (isIdentChar (c));
664 fileUngetc (c);
665 vStringTerminate (string);
668 static keywordId analyzeToken (vString *const name, langType language)
670 vString *keyword = vStringNew ();
671 keywordId result;
672 vStringCopyToLower (keyword, name);
673 result = lookupKeyword (vStringValue (keyword), language);
674 vStringDelete (keyword);
675 return result;
678 static boolean isSpace (int c)
680 return (c == '\t' || c == ' ' || c == '\v' ||
681 c == '\n' || c == '\r' || c == '\f');
684 static int skipWhitespaces (int c)
686 while (isSpace (c))
687 c = fileGetc ();
688 return c;
691 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
693 * This is ugly, but the whole "<script language=php>" tag is and we can't
694 * really do better without adding a lot of code only for this */
695 static boolean isOpenScriptLanguagePhp (int c)
697 int quote = 0;
699 /* <script[:white:]+language[:white:]*= */
700 if (c != '<' ||
701 tolower ((c = fileGetc ())) != 's' ||
702 tolower ((c = fileGetc ())) != 'c' ||
703 tolower ((c = fileGetc ())) != 'r' ||
704 tolower ((c = fileGetc ())) != 'i' ||
705 tolower ((c = fileGetc ())) != 'p' ||
706 tolower ((c = fileGetc ())) != 't' ||
707 ! isSpace ((c = fileGetc ())) ||
708 tolower ((c = skipWhitespaces (c))) != 'l' ||
709 tolower ((c = fileGetc ())) != 'a' ||
710 tolower ((c = fileGetc ())) != 'n' ||
711 tolower ((c = fileGetc ())) != 'g' ||
712 tolower ((c = fileGetc ())) != 'u' ||
713 tolower ((c = fileGetc ())) != 'a' ||
714 tolower ((c = fileGetc ())) != 'g' ||
715 tolower ((c = fileGetc ())) != 'e' ||
716 (c = skipWhitespaces (fileGetc ())) != '=')
717 return FALSE;
719 /* (php|'php'|"php")> */
720 c = skipWhitespaces (fileGetc ());
721 if (c == '"' || c == '\'')
723 quote = c;
724 c = fileGetc ();
726 if (tolower (c) != 'p' ||
727 tolower ((c = fileGetc ())) != 'h' ||
728 tolower ((c = fileGetc ())) != 'p' ||
729 (quote != 0 && (c = fileGetc ()) != quote) ||
730 (c = skipWhitespaces (fileGetc ())) != '>')
731 return FALSE;
733 return TRUE;
736 static int findPhpStart (void)
738 int c;
741 if ((c = fileGetc ()) == '<')
743 c = fileGetc ();
744 /* <? and <?php, but not <?xml */
745 if (c == '?')
747 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
748 if (tolower ((c = fileGetc ())) != 'x' ||
749 tolower ((c = fileGetc ())) != 'm' ||
750 tolower ((c = fileGetc ())) != 'l')
752 break;
755 /* <script language="php"> */
756 else
758 fileUngetc (c);
759 if (isOpenScriptLanguagePhp ('<'))
760 break;
764 while (c != EOF);
766 return c;
769 static int skipSingleComment (void)
771 int c;
774 c = fileGetc ();
775 if (c == '\r')
777 int next = fileGetc ();
778 if (next != '\n')
779 fileUngetc (next);
780 else
781 c = next;
783 /* ?> in single-line comments leaves PHP mode */
784 else if (c == '?')
786 int next = fileGetc ();
787 if (next == '>')
788 InPhp = FALSE;
789 else
790 fileUngetc (next);
792 } while (InPhp && c != EOF && c != '\n' && c != '\r');
793 return c;
796 static void readToken (tokenInfo *const token)
798 int c;
800 token->type = TOKEN_UNDEFINED;
801 token->keyword = KEYWORD_NONE;
802 vStringClear (token->string);
804 getNextChar:
806 if (! InPhp)
808 c = findPhpStart ();
809 if (c != EOF)
810 InPhp = TRUE;
812 else
813 c = fileGetc ();
815 c = skipWhitespaces (c);
817 token->lineNumber = getSourceLineNumber ();
818 token->filePosition = getInputFilePosition ();
820 switch (c)
822 case EOF: token->type = TOKEN_EOF; break;
823 case '(': token->type = TOKEN_OPEN_PAREN; break;
824 case ')': token->type = TOKEN_CLOSE_PAREN; break;
825 case ';': token->type = TOKEN_SEMICOLON; break;
826 case ',': token->type = TOKEN_COMMA; break;
827 case '.': token->type = TOKEN_PERIOD; break;
828 case ':': token->type = TOKEN_COLON; break;
829 case '{': token->type = TOKEN_OPEN_CURLY; break;
830 case '}': token->type = TOKEN_CLOSE_CURLY; break;
831 case '[': token->type = TOKEN_OPEN_SQUARE; break;
832 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
833 case '&': token->type = TOKEN_AMPERSAND; break;
835 case '=':
837 int d = fileGetc ();
838 if (d == '=' || d == '>')
839 token->type = TOKEN_OPERATOR;
840 else
842 fileUngetc (d);
843 token->type = TOKEN_EQUAL_SIGN;
845 break;
848 case '\'':
849 case '"':
850 token->type = TOKEN_STRING;
851 parseString (token->string, c);
852 token->lineNumber = getSourceLineNumber ();
853 token->filePosition = getInputFilePosition ();
854 break;
856 case '<':
858 int d = fileGetc ();
859 if (d == '/')
861 /* </script[:white:]*> */
862 if (tolower ((d = fileGetc ())) == 's' &&
863 tolower ((d = fileGetc ())) == 'c' &&
864 tolower ((d = fileGetc ())) == 'r' &&
865 tolower ((d = fileGetc ())) == 'i' &&
866 tolower ((d = fileGetc ())) == 'p' &&
867 tolower ((d = fileGetc ())) == 't' &&
868 (d = skipWhitespaces (fileGetc ())) == '>')
870 InPhp = FALSE;
871 goto getNextChar;
873 else
875 fileUngetc (d);
876 token->type = TOKEN_UNDEFINED;
879 else if (d == '<' && (d = fileGetc ()) == '<')
881 token->type = TOKEN_STRING;
882 parseHeredoc (token->string);
884 else
886 fileUngetc (d);
887 token->type = TOKEN_UNDEFINED;
889 break;
892 case '#': /* comment */
893 skipSingleComment ();
894 goto getNextChar;
895 break;
897 case '+':
898 case '-':
899 case '*':
900 case '%':
902 int d = fileGetc ();
903 if (d != '=')
904 fileUngetc (d);
905 token->type = TOKEN_OPERATOR;
906 break;
909 case '/': /* division or comment start */
911 int d = fileGetc ();
912 if (d == '/') /* single-line comment */
914 skipSingleComment ();
915 goto getNextChar;
917 else if (d == '*')
921 c = skipToCharacter ('*');
922 if (c != EOF)
924 c = fileGetc ();
925 if (c == '/')
926 break;
927 else
928 fileUngetc (c);
930 } while (c != EOF && c != '\0');
931 goto getNextChar;
933 else
935 if (d != '=')
936 fileUngetc (d);
937 token->type = TOKEN_OPERATOR;
939 break;
942 case '$': /* variable start */
944 int d = fileGetc ();
945 if (! isIdentChar (d))
947 fileUngetc (d);
948 token->type = TOKEN_UNDEFINED;
950 else
952 parseIdentifier (token->string, d);
953 token->type = TOKEN_VARIABLE;
955 break;
958 case '?': /* maybe the end of the PHP chunk */
960 int d = fileGetc ();
961 if (d == '>')
963 InPhp = FALSE;
964 goto getNextChar;
966 else
968 fileUngetc (d);
969 token->type = TOKEN_UNDEFINED;
971 break;
974 default:
975 if (! isIdentChar (c))
976 token->type = TOKEN_UNDEFINED;
977 else
979 parseIdentifier (token->string, c);
980 token->keyword = analyzeToken (token->string, getSourceLanguage ());
981 if (token->keyword == KEYWORD_NONE)
982 token->type = TOKEN_IDENTIFIER;
983 else
984 token->type = TOKEN_KEYWORD;
986 break;
989 if (token->type == TOKEN_SEMICOLON ||
990 token->type == TOKEN_OPEN_CURLY ||
991 token->type == TOKEN_CLOSE_CURLY)
993 /* reset current statement details on statement end, and when entering
994 * a deeper scope.
995 * it is a bit ugly to do this in readToken(), but it makes everything
996 * a lot simpler. */
997 CurrentStatement.access = ACCESS_UNDEFINED;
998 CurrentStatement.impl = IMPL_UNDEFINED;
1002 static void enterScope (tokenInfo *const parentToken,
1003 const vString *const extraScope,
1004 const int parentKind);
1006 /* parses a class or an interface:
1007 * class Foo {}
1008 * class Foo extends Bar {}
1009 * class Foo extends Bar implements iFoo, iBar {}
1010 * interface iFoo {}
1011 * interface iBar extends iFoo {} */
1012 static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
1014 boolean readNext = TRUE;
1015 implType impl = CurrentStatement.impl;
1016 tokenInfo *name;
1017 vString *inheritance = NULL;
1019 readToken (token);
1020 if (token->type != TOKEN_IDENTIFIER)
1021 return FALSE;
1023 name = newToken ();
1024 copyToken (name, token, TRUE);
1026 inheritance = vStringNew ();
1027 /* skip until the open bracket and assume every identifier (not keyword)
1028 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
1031 readToken (token);
1033 if (token->type == TOKEN_IDENTIFIER)
1035 if (vStringLength (inheritance) > 0)
1036 vStringPut (inheritance, ',');
1037 vStringCat (inheritance, token->string);
1040 while (token->type != TOKEN_EOF &&
1041 token->type != TOKEN_OPEN_CURLY);
1043 makeClassOrIfaceTag (kind, name, inheritance, impl);
1045 if (token->type == TOKEN_OPEN_CURLY)
1046 enterScope (token, name->string, K_CLASS);
1047 else
1048 readNext = FALSE;
1050 deleteToken (name);
1051 vStringDelete (inheritance);
1053 return readNext;
1056 /* parses a trait:
1057 * trait Foo {} */
1058 static boolean parseTrait (tokenInfo *const token)
1060 boolean readNext = TRUE;
1061 tokenInfo *name;
1063 readToken (token);
1064 if (token->type != TOKEN_IDENTIFIER)
1065 return FALSE;
1067 name = newToken ();
1068 copyToken (name, token, TRUE);
1070 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1072 readToken (token);
1073 if (token->type == TOKEN_OPEN_CURLY)
1074 enterScope (token, name->string, K_TRAIT);
1075 else
1076 readNext = FALSE;
1078 deleteToken (name);
1080 return readNext;
1083 /* parse a function
1085 * if @name is NULL, parses a normal function
1086 * function myfunc($foo, $bar) {}
1087 * function &myfunc($foo, $bar) {}
1089 * if @name is not NULL, parses an anonymous function with name @name
1090 * $foo = function($foo, $bar) {}
1091 * $foo = function&($foo, $bar) {}
1092 * $foo = function($foo, $bar) use ($x, &$y) {} */
1093 static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
1095 boolean readNext = TRUE;
1096 accessType access = CurrentStatement.access;
1097 implType impl = CurrentStatement.impl;
1098 tokenInfo *nameFree = NULL;
1100 readToken (token);
1101 /* skip a possible leading ampersand (return by reference) */
1102 if (token->type == TOKEN_AMPERSAND)
1103 readToken (token);
1105 if (! name)
1107 if (token->type != TOKEN_IDENTIFIER)
1108 return FALSE;
1110 name = nameFree = newToken ();
1111 copyToken (nameFree, token, TRUE);
1112 readToken (token);
1115 if (token->type == TOKEN_OPEN_PAREN)
1117 vString *arglist = vStringNew ();
1118 int depth = 1;
1120 vStringPut (arglist, '(');
1123 readToken (token);
1125 switch (token->type)
1127 case TOKEN_OPEN_PAREN: depth++; break;
1128 case TOKEN_CLOSE_PAREN: depth--; break;
1129 default: break;
1131 /* display part */
1132 switch (token->type)
1134 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1135 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1136 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1137 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1138 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1139 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1140 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1141 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1142 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1143 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1144 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1145 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1146 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1148 case TOKEN_IDENTIFIER:
1149 case TOKEN_KEYWORD:
1150 case TOKEN_VARIABLE:
1152 switch (vStringLast (arglist))
1154 case 0:
1155 case ' ':
1156 case '{':
1157 case '(':
1158 case '[':
1159 case '.':
1160 /* no need for a space between those and the identifier */
1161 break;
1163 default:
1164 vStringPut (arglist, ' ');
1165 break;
1167 if (token->type == TOKEN_VARIABLE)
1168 vStringPut (arglist, '$');
1169 vStringCat (arglist, token->string);
1170 break;
1173 default: break;
1176 while (token->type != TOKEN_EOF && depth > 0);
1178 vStringTerminate (arglist);
1180 makeFunctionTag (name, arglist, access, impl);
1181 vStringDelete (arglist);
1183 readToken (token); /* normally it's an open brace or "use" keyword */
1186 /* if parsing Zephir, skip function return type hint */
1187 if (getSourceLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)
1190 readToken (token);
1191 while (token->type != TOKEN_EOF &&
1192 token->type != TOKEN_OPEN_CURLY &&
1193 token->type != TOKEN_CLOSE_CURLY &&
1194 token->type != TOKEN_SEMICOLON);
1197 /* skip use(...) */
1198 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1200 readToken (token);
1201 if (token->type == TOKEN_OPEN_PAREN)
1203 int depth = 1;
1207 readToken (token);
1208 switch (token->type)
1210 case TOKEN_OPEN_PAREN: depth++; break;
1211 case TOKEN_CLOSE_PAREN: depth--; break;
1212 default: break;
1215 while (token->type != TOKEN_EOF && depth > 0);
1217 readToken (token);
1221 if (token->type == TOKEN_OPEN_CURLY)
1222 enterScope (token, name->string, K_FUNCTION);
1223 else
1224 readNext = FALSE;
1226 if (nameFree)
1227 deleteToken (nameFree);
1229 return readNext;
1232 /* parses declarations of the form
1233 * const NAME = VALUE */
1234 static boolean parseConstant (tokenInfo *const token)
1236 tokenInfo *name;
1238 readToken (token); /* skip const keyword */
1239 if (token->type != TOKEN_IDENTIFIER)
1240 return FALSE;
1242 name = newToken ();
1243 copyToken (name, token, TRUE);
1245 readToken (token);
1246 if (token->type == TOKEN_EQUAL_SIGN)
1247 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1249 deleteToken (name);
1251 return token->type == TOKEN_EQUAL_SIGN;
1254 /* parses declarations of the form
1255 * define('NAME', 'VALUE')
1256 * define(NAME, 'VALUE) */
1257 static boolean parseDefine (tokenInfo *const token)
1259 int depth = 1;
1261 readToken (token); /* skip "define" identifier */
1262 if (token->type != TOKEN_OPEN_PAREN)
1263 return FALSE;
1265 readToken (token);
1266 if (token->type == TOKEN_STRING ||
1267 token->type == TOKEN_IDENTIFIER)
1269 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1270 readToken (token);
1273 /* skip until the close parenthesis.
1274 * no need to handle nested blocks since they would be invalid
1275 * in this context anyway (the VALUE may only be a scalar, like
1276 * 42
1277 * (42)
1278 * and alike) */
1279 while (token->type != TOKEN_EOF && depth > 0)
1281 switch (token->type)
1283 case TOKEN_OPEN_PAREN: depth++; break;
1284 case TOKEN_CLOSE_PAREN: depth--; break;
1285 default: break;
1287 readToken (token);
1290 return FALSE;
1293 /* parses declarations of the form
1294 * $var = VALUE
1295 * $var; */
1296 static boolean parseVariable (tokenInfo *const token)
1298 tokenInfo *name;
1299 boolean readNext = TRUE;
1300 accessType access = CurrentStatement.access;
1302 name = newToken ();
1303 copyToken (name, token, TRUE);
1305 readToken (token);
1306 if (token->type == TOKEN_EQUAL_SIGN)
1308 phpKind kind = K_VARIABLE;
1310 if (token->parentKind == K_FUNCTION)
1311 kind = K_LOCAL_VARIABLE;
1313 readToken (token);
1314 if (token->type == TOKEN_KEYWORD &&
1315 token->keyword == KEYWORD_function &&
1316 PhpKinds[kind].enabled)
1318 if (parseFunction (token, name))
1319 readToken (token);
1320 readNext = (boolean) (token->type == TOKEN_SEMICOLON);
1322 else
1324 makeSimplePhpTag (name, kind, access);
1325 readNext = FALSE;
1328 else if (token->type == TOKEN_SEMICOLON)
1330 /* generate tags for variable declarations in classes
1331 * class Foo {
1332 * protected $foo;
1334 * but don't get fooled by stuff like $foo = $bar; */
1335 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1336 makeSimplePhpTag (name, K_VARIABLE, access);
1338 else
1339 readNext = FALSE;
1341 deleteToken (name);
1343 return readNext;
1346 /* parses namespace declarations
1347 * namespace Foo {}
1348 * namespace Foo\Bar {}
1349 * namespace Foo;
1350 * namespace Foo\Bar;
1351 * namespace;
1352 * napespace {} */
1353 static boolean parseNamespace (tokenInfo *const token)
1355 tokenInfo *nsToken = newToken ();
1357 vStringClear (CurrentNamespace);
1358 copyToken (nsToken, token, FALSE);
1362 readToken (token);
1363 if (token->type == TOKEN_IDENTIFIER)
1365 if (vStringLength (CurrentNamespace) > 0)
1366 vStringPut (CurrentNamespace, '\\');
1367 vStringCat (CurrentNamespace, token->string);
1370 while (token->type != TOKEN_EOF &&
1371 token->type != TOKEN_SEMICOLON &&
1372 token->type != TOKEN_OPEN_CURLY);
1374 vStringTerminate (CurrentNamespace);
1375 if (vStringLength (CurrentNamespace) > 0)
1376 makeNamespacePhpTag (nsToken, CurrentNamespace);
1378 if (token->type == TOKEN_OPEN_CURLY)
1379 enterScope (token, NULL, -1);
1381 deleteToken (nsToken);
1383 return TRUE;
1386 static void enterScope (tokenInfo *const parentToken,
1387 const vString *const extraScope,
1388 const int parentKind)
1390 tokenInfo *token = newToken ();
1391 int origParentKind = parentToken->parentKind;
1393 copyToken (token, parentToken, TRUE);
1395 if (extraScope)
1397 addToScope (token, extraScope);
1398 token->parentKind = parentKind;
1401 readToken (token);
1402 while (token->type != TOKEN_EOF &&
1403 token->type != TOKEN_CLOSE_CURLY)
1405 boolean readNext = TRUE;
1407 switch (token->type)
1409 case TOKEN_OPEN_CURLY:
1410 enterScope (token, NULL, -1);
1411 break;
1413 case TOKEN_KEYWORD:
1414 switch (token->keyword)
1416 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1417 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1418 case KEYWORD_trait: readNext = parseTrait (token); break;
1419 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1420 case KEYWORD_const: readNext = parseConstant (token); break;
1421 case KEYWORD_define: readNext = parseDefine (token); break;
1423 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1425 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1426 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1427 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1428 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1430 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1432 default: break;
1434 break;
1436 case TOKEN_VARIABLE:
1437 readNext = parseVariable (token);
1438 break;
1440 default: break;
1443 if (readNext)
1444 readToken (token);
1447 copyToken (parentToken, token, FALSE);
1448 parentToken->parentKind = origParentKind;
1449 deleteToken (token);
1452 static void findTags (void)
1454 tokenInfo *const token = newToken ();
1456 CurrentStatement.access = ACCESS_UNDEFINED;
1457 CurrentStatement.impl = IMPL_UNDEFINED;
1458 CurrentNamespace = vStringNew ();
1462 enterScope (token, NULL, -1);
1464 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1466 vStringDelete (CurrentNamespace);
1467 deleteToken (token);
1470 static void findPhpTags (void)
1472 InPhp = FALSE;
1473 findTags ();
1476 static void findZephirTags (void)
1478 InPhp = TRUE;
1479 findTags ();
1482 static void initializePhpParser (const langType language)
1484 Lang_php = language;
1485 buildPhpKeywordHash (language);
1488 static void initializeZephirParser (const langType language)
1490 Lang_zephir = language;
1491 buildPhpKeywordHash (language);
1494 extern parserDefinition* PhpParser (void)
1496 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1497 parserDefinition* def = parserNew ("PHP");
1498 def->kinds = PhpKinds;
1499 def->kindCount = KIND_COUNT (PhpKinds);
1500 def->extensions = extensions;
1501 def->parser = findPhpTags;
1502 def->initialize = initializePhpParser;
1503 return def;
1506 extern parserDefinition* ZephirParser (void)
1508 static const char *const extensions [] = { "zep", NULL };
1509 parserDefinition* def = parserNew ("Zephir");
1510 def->kinds = PhpKinds;
1511 def->kindCount = KIND_COUNT (PhpKinds);
1512 def->extensions = extensions;
1513 def->parser = findZephirTags;
1514 def->initialize = initializeZephirParser;
1515 return def;
1518 /* vi:set tabstop=4 shiftwidth=4: */