Sync whitespace in parsers
[geany-mirror.git] / ctags / parsers / php.c
blob0394ffea4c3c723275be28944ff1cbb4c88a158c
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "parse.h"
16 #include "read.h"
17 #include "vstring.h"
18 #include "keyword.h"
19 #include "entry.h"
20 #include "routines.h"
21 #include "debug.h"
24 #define SCOPE_SEPARATOR "::"
27 typedef enum {
28 KEYWORD_NONE = -1,
29 KEYWORD_abstract,
30 KEYWORD_and,
31 KEYWORD_as,
32 KEYWORD_break,
33 KEYWORD_callable,
34 KEYWORD_case,
35 KEYWORD_catch,
36 KEYWORD_class,
37 KEYWORD_clone,
38 KEYWORD_const,
39 KEYWORD_continue,
40 KEYWORD_declare,
41 KEYWORD_define,
42 KEYWORD_default,
43 KEYWORD_do,
44 KEYWORD_echo,
45 KEYWORD_else,
46 KEYWORD_elif,
47 KEYWORD_enddeclare,
48 KEYWORD_endfor,
49 KEYWORD_endforeach,
50 KEYWORD_endif,
51 KEYWORD_endswitch,
52 KEYWORD_endwhile,
53 KEYWORD_extends,
54 KEYWORD_final,
55 KEYWORD_finally,
56 KEYWORD_for,
57 KEYWORD_foreach,
58 KEYWORD_function,
59 KEYWORD_global,
60 KEYWORD_goto,
61 KEYWORD_if,
62 KEYWORD_implements,
63 KEYWORD_include,
64 KEYWORD_include_once,
65 KEYWORD_instanceof,
66 KEYWORD_insteadof,
67 KEYWORD_interface,
68 KEYWORD_namespace,
69 KEYWORD_new,
70 KEYWORD_or,
71 KEYWORD_print,
72 KEYWORD_private,
73 KEYWORD_protected,
74 KEYWORD_public,
75 KEYWORD_require,
76 KEYWORD_require_once,
77 KEYWORD_return,
78 KEYWORD_static,
79 KEYWORD_switch,
80 KEYWORD_throw,
81 KEYWORD_trait,
82 KEYWORD_try,
83 KEYWORD_use,
84 KEYWORD_var,
85 KEYWORD_while,
86 KEYWORD_xor,
87 KEYWORD_yield
88 } keywordId;
90 typedef enum {
91 ACCESS_UNDEFINED,
92 ACCESS_PRIVATE,
93 ACCESS_PROTECTED,
94 ACCESS_PUBLIC,
95 COUNT_ACCESS
96 } accessType;
98 typedef enum {
99 IMPL_UNDEFINED,
100 IMPL_ABSTRACT,
101 COUNT_IMPL
102 } implType;
104 typedef enum {
105 K_CLASS,
106 K_DEFINE,
107 K_FUNCTION,
108 K_INTERFACE,
109 K_LOCAL_VARIABLE,
110 K_NAMESPACE,
111 K_TRAIT,
112 K_VARIABLE,
113 COUNT_KIND
114 } phpKind;
116 static kindOption PhpKinds[COUNT_KIND] = {
117 { TRUE, 'c', "class", "classes" },
118 { TRUE, 'd', "define", "constant definitions" },
119 { TRUE, 'f', "function", "functions" },
120 { TRUE, 'i', "interface", "interfaces" },
121 { FALSE, 'l', "local", "local variables" },
122 { TRUE, 'n', "namespace", "namespaces" },
123 { TRUE, 't', "trait", "traits" },
124 { TRUE, 'v', "variable", "variables" }
127 static const keywordTable PhpKeywordTable[] = {
128 /* keyword keyword ID */
129 { "abstract", KEYWORD_abstract },
130 { "and", KEYWORD_and },
131 { "as", KEYWORD_as },
132 { "break", KEYWORD_break },
133 { "callable", KEYWORD_callable },
134 { "case", KEYWORD_case },
135 { "catch", KEYWORD_catch },
136 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
137 { "class", KEYWORD_class },
138 { "clone", KEYWORD_clone },
139 { "const", KEYWORD_const },
140 { "continue", KEYWORD_continue },
141 { "declare", KEYWORD_declare },
142 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
143 { "default", KEYWORD_default },
144 { "do", KEYWORD_do },
145 { "echo", KEYWORD_echo },
146 { "else", KEYWORD_else },
147 { "elseif", KEYWORD_elif },
148 { "enddeclare", KEYWORD_enddeclare },
149 { "endfor", KEYWORD_endfor },
150 { "endforeach", KEYWORD_endforeach },
151 { "endif", KEYWORD_endif },
152 { "endswitch", KEYWORD_endswitch },
153 { "endwhile", KEYWORD_endwhile },
154 { "extends", KEYWORD_extends },
155 { "final", KEYWORD_final },
156 { "finally", KEYWORD_finally },
157 { "for", KEYWORD_for },
158 { "foreach", KEYWORD_foreach },
159 { "function", KEYWORD_function },
160 { "global", KEYWORD_global },
161 { "goto", KEYWORD_goto },
162 { "if", KEYWORD_if },
163 { "implements", KEYWORD_implements },
164 { "include", KEYWORD_include },
165 { "include_once", KEYWORD_include_once },
166 { "instanceof", KEYWORD_instanceof },
167 { "insteadof", KEYWORD_insteadof },
168 { "interface", KEYWORD_interface },
169 { "namespace", KEYWORD_namespace },
170 { "new", KEYWORD_new },
171 { "or", KEYWORD_or },
172 { "print", KEYWORD_print },
173 { "private", KEYWORD_private },
174 { "protected", KEYWORD_protected },
175 { "public", KEYWORD_public },
176 { "require", KEYWORD_require },
177 { "require_once", KEYWORD_require_once },
178 { "return", KEYWORD_return },
179 { "static", KEYWORD_static },
180 { "switch", KEYWORD_switch },
181 { "throw", KEYWORD_throw },
182 { "trait", KEYWORD_trait },
183 { "try", KEYWORD_try },
184 { "use", KEYWORD_use },
185 { "var", KEYWORD_var },
186 { "while", KEYWORD_while },
187 { "xor", KEYWORD_xor },
188 { "yield", KEYWORD_yield }
192 typedef enum eTokenType {
193 TOKEN_UNDEFINED,
194 TOKEN_EOF,
195 TOKEN_CHARACTER,
196 TOKEN_CLOSE_PAREN,
197 TOKEN_SEMICOLON,
198 TOKEN_COLON,
199 TOKEN_COMMA,
200 TOKEN_KEYWORD,
201 TOKEN_OPEN_PAREN,
202 TOKEN_OPERATOR,
203 TOKEN_IDENTIFIER,
204 TOKEN_STRING,
205 TOKEN_PERIOD,
206 TOKEN_OPEN_CURLY,
207 TOKEN_CLOSE_CURLY,
208 TOKEN_EQUAL_SIGN,
209 TOKEN_OPEN_SQUARE,
210 TOKEN_CLOSE_SQUARE,
211 TOKEN_VARIABLE,
212 TOKEN_AMPERSAND
213 } tokenType;
215 typedef struct {
216 tokenType type;
217 keywordId keyword;
218 vString * string;
219 vString * scope;
220 unsigned long lineNumber;
221 MIOPos filePosition;
222 int parentKind; /* -1 if none */
223 } tokenInfo;
225 static langType Lang_php;
226 static langType Lang_zephir;
228 static boolean InPhp = FALSE; /* whether we are between <? ?> */
230 /* current statement details */
231 static struct {
232 accessType access;
233 implType impl;
234 } CurrentStatement;
236 /* Current namespace */
237 static vString *CurrentNamespace;
240 static const char *accessToString (const accessType access)
242 static const char *const names[COUNT_ACCESS] = {
243 "undefined",
244 "private",
245 "protected",
246 "public"
249 Assert (access < COUNT_ACCESS);
251 return names[access];
254 static const char *implToString (const implType impl)
256 static const char *const names[COUNT_IMPL] = {
257 "undefined",
258 "abstract"
261 Assert (impl < COUNT_IMPL);
263 return names[impl];
266 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
267 const phpKind kind, const accessType access)
269 static vString *fullScope = NULL;
270 int parentKind = -1;
272 if (fullScope == NULL)
273 fullScope = vStringNew ();
274 else
275 vStringClear (fullScope);
277 if (vStringLength (CurrentNamespace) > 0)
279 vStringCopy (fullScope, CurrentNamespace);
280 parentKind = K_NAMESPACE;
283 initTagEntry (e, vStringValue (token->string), &(PhpKinds[kind]));
285 e->lineNumber = token->lineNumber;
286 e->filePosition = token->filePosition;
288 if (access != ACCESS_UNDEFINED)
289 e->extensionFields.access = accessToString (access);
290 if (vStringLength (token->scope) > 0)
292 parentKind = token->parentKind;
293 if (vStringLength (fullScope) > 0)
294 vStringCatS (fullScope, SCOPE_SEPARATOR);
295 vStringCat (fullScope, token->scope);
297 if (vStringLength (fullScope) > 0)
299 Assert (parentKind >= 0);
301 vStringTerminate (fullScope);
302 e->extensionFields.scopeKind = &(PhpKinds[parentKind]);
303 e->extensionFields.scopeName = vStringValue (fullScope);
307 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
308 const accessType access)
310 if (PhpKinds[kind].enabled)
312 tagEntryInfo e;
314 initPhpEntry (&e, token, kind, access);
315 makeTagEntry (&e);
319 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
321 if (PhpKinds[K_NAMESPACE].enabled)
323 tagEntryInfo e;
325 initTagEntry (&e, vStringValue (name), &(PhpKinds[K_NAMESPACE]));
327 e.lineNumber = token->lineNumber;
328 e.filePosition = token->filePosition;
330 makeTagEntry (&e);
334 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
335 vString *const inheritance, const implType impl)
337 if (PhpKinds[kind].enabled)
339 tagEntryInfo e;
341 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
343 if (impl != IMPL_UNDEFINED)
344 e.extensionFields.implementation = implToString (impl);
345 if (vStringLength (inheritance) > 0)
346 e.extensionFields.inheritance = vStringValue (inheritance);
348 makeTagEntry (&e);
352 static void makeFunctionTag (const tokenInfo *const token,
353 const vString *const arglist,
354 const accessType access, const implType impl)
356 if (PhpKinds[K_FUNCTION].enabled)
358 tagEntryInfo e;
360 initPhpEntry (&e, token, K_FUNCTION, access);
362 if (impl != IMPL_UNDEFINED)
363 e.extensionFields.implementation = implToString (impl);
364 if (arglist)
365 e.extensionFields.signature = vStringValue (arglist);
367 makeTagEntry (&e);
371 static tokenInfo *newToken (void)
373 tokenInfo *const token = xMalloc (1, tokenInfo);
375 token->type = TOKEN_UNDEFINED;
376 token->keyword = KEYWORD_NONE;
377 token->string = vStringNew ();
378 token->scope = vStringNew ();
379 token->lineNumber = getInputLineNumber ();
380 token->filePosition = getInputFilePosition ();
381 token->parentKind = -1;
383 return token;
386 static void deleteToken (tokenInfo *const token)
388 vStringDelete (token->string);
389 vStringDelete (token->scope);
390 eFree (token);
393 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
394 boolean scope)
396 dest->lineNumber = src->lineNumber;
397 dest->filePosition = src->filePosition;
398 dest->type = src->type;
399 dest->keyword = src->keyword;
400 vStringCopy(dest->string, src->string);
401 dest->parentKind = src->parentKind;
402 if (scope)
403 vStringCopy(dest->scope, src->scope);
406 #if 0
407 #include <stdio.h>
409 static const char *tokenTypeName (const tokenType type)
411 switch (type)
413 case TOKEN_UNDEFINED: return "undefined";
414 case TOKEN_EOF: return "EOF";
415 case TOKEN_CHARACTER: return "character";
416 case TOKEN_CLOSE_PAREN: return "')'";
417 case TOKEN_SEMICOLON: return "';'";
418 case TOKEN_COLON: return "':'";
419 case TOKEN_COMMA: return "','";
420 case TOKEN_OPEN_PAREN: return "'('";
421 case TOKEN_OPERATOR: return "operator";
422 case TOKEN_IDENTIFIER: return "identifier";
423 case TOKEN_KEYWORD: return "keyword";
424 case TOKEN_STRING: return "string";
425 case TOKEN_PERIOD: return "'.'";
426 case TOKEN_OPEN_CURLY: return "'{'";
427 case TOKEN_CLOSE_CURLY: return "'}'";
428 case TOKEN_EQUAL_SIGN: return "'='";
429 case TOKEN_OPEN_SQUARE: return "'['";
430 case TOKEN_CLOSE_SQUARE: return "']'";
431 case TOKEN_VARIABLE: return "variable";
433 return NULL;
436 static void printToken (const tokenInfo *const token)
438 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
439 tokenTypeName (token->type),
440 token->lineNumber,
441 vStringValue (token->scope));
442 switch (token->type)
444 case TOKEN_IDENTIFIER:
445 case TOKEN_STRING:
446 case TOKEN_VARIABLE:
447 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
448 break;
450 case TOKEN_KEYWORD:
452 size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
453 size_t i;
455 fprintf (stderr, "\tkeyword:\t");
456 for (i = 0; i < n; i++)
458 if (PhpKeywordTable[i].id == token->keyword)
460 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
461 break;
464 if (i >= n)
465 fprintf (stderr, "(unknown)\n");
468 default: break;
471 #endif
473 static void addToScope (tokenInfo *const token, const vString *const extra)
475 if (vStringLength (token->scope) > 0)
476 vStringCatS (token->scope, SCOPE_SEPARATOR);
477 vStringCatS (token->scope, vStringValue (extra));
478 vStringTerminate(token->scope);
481 static boolean isIdentChar (const int c)
483 return (isalnum (c) || c == '_' || c >= 0x80);
486 static int skipToCharacter (const int c)
488 int d;
491 d = getcFromInputFile ();
492 } while (d != EOF && d != c);
493 return d;
496 static void parseString (vString *const string, const int delimiter)
498 while (TRUE)
500 int c = getcFromInputFile ();
502 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
503 vStringPut (string, (char) c);
504 else if (c == EOF || c == delimiter)
505 break;
506 else
507 vStringPut (string, (char) c);
509 vStringTerminate (string);
512 /* reads an HereDoc or a NowDoc (the part after the <<<).
513 * <<<[ \t]*(ID|'ID'|"ID")
514 * ...
515 * ID;?
517 * note that:
518 * 1) starting ID must be immediately followed by a newline;
519 * 2) closing ID is the same as opening one;
520 * 3) closing ID must be immediately followed by a newline or a semicolon
521 * then a newline.
523 * Example of a *single* valid heredoc:
524 * <<< FOO
525 * something
526 * something else
527 * FOO this is not an end
528 * FOO; this isn't either
529 * FOO; # neither this is
530 * FOO;
531 * # previous line was the end, but the semicolon wasn't required
533 static void parseHeredoc (vString *const string)
535 int c;
536 unsigned int len;
537 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
538 int quote = 0;
542 c = getcFromInputFile ();
544 while (c == ' ' || c == '\t');
546 if (c == '\'' || c == '"')
548 quote = c;
549 c = getcFromInputFile ();
551 for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
553 if (! isIdentChar (c))
554 break;
555 delimiter[len] = (char) c;
556 c = getcFromInputFile ();
558 delimiter[len] = 0;
560 if (len == 0) /* no delimiter, give up */
561 goto error;
562 if (quote)
564 if (c != quote) /* no closing quote for quoted identifier, give up */
565 goto error;
566 c = getcFromInputFile ();
568 if (c != '\r' && c != '\n') /* missing newline, give up */
569 goto error;
573 c = getcFromInputFile ();
575 if (c != '\r' && c != '\n')
576 vStringPut (string, (char) c);
577 else
579 /* new line, check for a delimiter right after */
580 int nl = c;
581 int extra = EOF;
583 c = getcFromInputFile ();
584 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
585 c = getcFromInputFile ();
587 if (delimiter[len] != 0)
588 ungetcToInputFile (c);
589 else
591 /* line start matched the delimiter, now check whether there
592 * is anything after it */
593 if (c == '\r' || c == '\n')
595 ungetcToInputFile (c);
596 break;
598 else if (c == ';')
600 int d = getcFromInputFile ();
601 if (d == '\r' || d == '\n')
603 /* put back the semicolon since it's not part of the
604 * string. we can't put back the newline, but it's a
605 * whitespace character nobody cares about it anyway */
606 ungetcToInputFile (';');
607 break;
609 else
611 /* put semicolon in the string and continue */
612 extra = ';';
613 ungetcToInputFile (d);
617 /* if we are here it wasn't a delimiter, so put everything in the
618 * string */
619 vStringPut (string, (char) nl);
620 vStringNCatS (string, delimiter, len);
621 if (extra != EOF)
622 vStringPut (string, (char) extra);
625 while (c != EOF);
627 vStringTerminate (string);
629 return;
631 error:
632 ungetcToInputFile (c);
635 static void parseIdentifier (vString *const string, const int firstChar)
637 int c = firstChar;
640 vStringPut (string, (char) c);
641 c = getcFromInputFile ();
642 } while (isIdentChar (c));
643 ungetcToInputFile (c);
644 vStringTerminate (string);
647 static keywordId analyzeToken (vString *const name, langType language)
649 vString *keyword = vStringNew ();
650 keywordId result;
651 vStringCopyToLower (keyword, name);
652 result = lookupKeyword (vStringValue (keyword), language);
653 vStringDelete (keyword);
654 return result;
657 static boolean isSpace (int c)
659 return (c == '\t' || c == ' ' || c == '\v' ||
660 c == '\n' || c == '\r' || c == '\f');
663 static int skipWhitespaces (int c)
665 while (isSpace (c))
666 c = getcFromInputFile ();
667 return c;
670 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
672 * This is ugly, but the whole "<script language=php>" tag is and we can't
673 * really do better without adding a lot of code only for this */
674 static boolean isOpenScriptLanguagePhp (int c)
676 int quote = 0;
678 /* <script[:white:]+language[:white:]*= */
679 if (c != '<' ||
680 tolower ((c = getcFromInputFile ())) != 's' ||
681 tolower ((c = getcFromInputFile ())) != 'c' ||
682 tolower ((c = getcFromInputFile ())) != 'r' ||
683 tolower ((c = getcFromInputFile ())) != 'i' ||
684 tolower ((c = getcFromInputFile ())) != 'p' ||
685 tolower ((c = getcFromInputFile ())) != 't' ||
686 ! isSpace ((c = getcFromInputFile ())) ||
687 tolower ((c = skipWhitespaces (c))) != 'l' ||
688 tolower ((c = getcFromInputFile ())) != 'a' ||
689 tolower ((c = getcFromInputFile ())) != 'n' ||
690 tolower ((c = getcFromInputFile ())) != 'g' ||
691 tolower ((c = getcFromInputFile ())) != 'u' ||
692 tolower ((c = getcFromInputFile ())) != 'a' ||
693 tolower ((c = getcFromInputFile ())) != 'g' ||
694 tolower ((c = getcFromInputFile ())) != 'e' ||
695 (c = skipWhitespaces (getcFromInputFile ())) != '=')
696 return FALSE;
698 /* (php|'php'|"php")> */
699 c = skipWhitespaces (getcFromInputFile ());
700 if (c == '"' || c == '\'')
702 quote = c;
703 c = getcFromInputFile ();
705 if (tolower (c) != 'p' ||
706 tolower ((c = getcFromInputFile ())) != 'h' ||
707 tolower ((c = getcFromInputFile ())) != 'p' ||
708 (quote != 0 && (c = getcFromInputFile ()) != quote) ||
709 (c = skipWhitespaces (getcFromInputFile ())) != '>')
710 return FALSE;
712 return TRUE;
715 static int findPhpStart (void)
717 int c;
720 if ((c = getcFromInputFile ()) == '<')
722 c = getcFromInputFile ();
723 /* <? and <?php, but not <?xml */
724 if (c == '?')
726 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
727 if (tolower ((c = getcFromInputFile ())) != 'x' ||
728 tolower ((c = getcFromInputFile ())) != 'm' ||
729 tolower ((c = getcFromInputFile ())) != 'l')
731 break;
734 /* <script language="php"> */
735 else
737 ungetcToInputFile (c);
738 if (isOpenScriptLanguagePhp ('<'))
739 break;
743 while (c != EOF);
745 return c;
748 static int skipSingleComment (void)
750 int c;
753 c = getcFromInputFile ();
754 if (c == '\r')
756 int next = getcFromInputFile ();
757 if (next != '\n')
758 ungetcToInputFile (next);
759 else
760 c = next;
762 /* ?> in single-line comments leaves PHP mode */
763 else if (c == '?')
765 int next = getcFromInputFile ();
766 if (next == '>')
767 InPhp = FALSE;
768 else
769 ungetcToInputFile (next);
771 } while (InPhp && c != EOF && c != '\n' && c != '\r');
772 return c;
775 static void readToken (tokenInfo *const token)
777 int c;
779 token->type = TOKEN_UNDEFINED;
780 token->keyword = KEYWORD_NONE;
781 vStringClear (token->string);
783 getNextChar:
785 if (! InPhp)
787 c = findPhpStart ();
788 if (c != EOF)
789 InPhp = TRUE;
791 else
792 c = getcFromInputFile ();
794 c = skipWhitespaces (c);
796 token->lineNumber = getInputLineNumber ();
797 token->filePosition = getInputFilePosition ();
799 switch (c)
801 case EOF: token->type = TOKEN_EOF; break;
802 case '(': token->type = TOKEN_OPEN_PAREN; break;
803 case ')': token->type = TOKEN_CLOSE_PAREN; break;
804 case ';': token->type = TOKEN_SEMICOLON; break;
805 case ',': token->type = TOKEN_COMMA; break;
806 case '.': token->type = TOKEN_PERIOD; break;
807 case ':': token->type = TOKEN_COLON; break;
808 case '{': token->type = TOKEN_OPEN_CURLY; break;
809 case '}': token->type = TOKEN_CLOSE_CURLY; break;
810 case '[': token->type = TOKEN_OPEN_SQUARE; break;
811 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
812 case '&': token->type = TOKEN_AMPERSAND; break;
814 case '=':
816 int d = getcFromInputFile ();
817 if (d == '=' || d == '>')
818 token->type = TOKEN_OPERATOR;
819 else
821 ungetcToInputFile (d);
822 token->type = TOKEN_EQUAL_SIGN;
824 break;
827 case '\'':
828 case '"':
829 token->type = TOKEN_STRING;
830 parseString (token->string, c);
831 token->lineNumber = getInputLineNumber ();
832 token->filePosition = getInputFilePosition ();
833 break;
835 case '<':
837 int d = getcFromInputFile ();
838 if (d == '/')
840 /* </script[:white:]*> */
841 if (tolower ((d = getcFromInputFile ())) == 's' &&
842 tolower ((d = getcFromInputFile ())) == 'c' &&
843 tolower ((d = getcFromInputFile ())) == 'r' &&
844 tolower ((d = getcFromInputFile ())) == 'i' &&
845 tolower ((d = getcFromInputFile ())) == 'p' &&
846 tolower ((d = getcFromInputFile ())) == 't' &&
847 (d = skipWhitespaces (getcFromInputFile ())) == '>')
849 InPhp = FALSE;
850 goto getNextChar;
852 else
854 ungetcToInputFile (d);
855 token->type = TOKEN_UNDEFINED;
858 else if (d == '<' && (d = getcFromInputFile ()) == '<')
860 token->type = TOKEN_STRING;
861 parseHeredoc (token->string);
863 else
865 ungetcToInputFile (d);
866 token->type = TOKEN_UNDEFINED;
868 break;
871 case '#': /* comment */
872 skipSingleComment ();
873 goto getNextChar;
874 break;
876 case '+':
877 case '-':
878 case '*':
879 case '%':
881 int d = getcFromInputFile ();
882 if (d != '=')
883 ungetcToInputFile (d);
884 token->type = TOKEN_OPERATOR;
885 break;
888 case '/': /* division or comment start */
890 int d = getcFromInputFile ();
891 if (d == '/') /* single-line comment */
893 skipSingleComment ();
894 goto getNextChar;
896 else if (d == '*')
900 c = skipToCharacter ('*');
901 if (c != EOF)
903 c = getcFromInputFile ();
904 if (c == '/')
905 break;
906 else
907 ungetcToInputFile (c);
909 } while (c != EOF && c != '\0');
910 goto getNextChar;
912 else
914 if (d != '=')
915 ungetcToInputFile (d);
916 token->type = TOKEN_OPERATOR;
918 break;
921 case '$': /* variable start */
923 int d = getcFromInputFile ();
924 if (! isIdentChar (d))
926 ungetcToInputFile (d);
927 token->type = TOKEN_UNDEFINED;
929 else
931 parseIdentifier (token->string, d);
932 token->type = TOKEN_VARIABLE;
934 break;
937 case '?': /* maybe the end of the PHP chunk */
939 int d = getcFromInputFile ();
940 if (d == '>')
942 InPhp = FALSE;
943 goto getNextChar;
945 else
947 ungetcToInputFile (d);
948 token->type = TOKEN_UNDEFINED;
950 break;
953 default:
954 if (! isIdentChar (c))
955 token->type = TOKEN_UNDEFINED;
956 else
958 parseIdentifier (token->string, c);
959 token->keyword = analyzeToken (token->string, getSourceLanguage ());
960 if (token->keyword == KEYWORD_NONE)
961 token->type = TOKEN_IDENTIFIER;
962 else
963 token->type = TOKEN_KEYWORD;
965 break;
968 if (token->type == TOKEN_SEMICOLON ||
969 token->type == TOKEN_OPEN_CURLY ||
970 token->type == TOKEN_CLOSE_CURLY)
972 /* reset current statement details on statement end, and when entering
973 * a deeper scope.
974 * it is a bit ugly to do this in readToken(), but it makes everything
975 * a lot simpler. */
976 CurrentStatement.access = ACCESS_UNDEFINED;
977 CurrentStatement.impl = IMPL_UNDEFINED;
981 static void enterScope (tokenInfo *const parentToken,
982 const vString *const extraScope,
983 const int parentKind);
985 /* parses a class or an interface:
986 * class Foo {}
987 * class Foo extends Bar {}
988 * class Foo extends Bar implements iFoo, iBar {}
989 * interface iFoo {}
990 * interface iBar extends iFoo {} */
991 static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
993 boolean readNext = TRUE;
994 implType impl = CurrentStatement.impl;
995 tokenInfo *name;
996 vString *inheritance = NULL;
998 readToken (token);
999 if (token->type != TOKEN_IDENTIFIER)
1000 return FALSE;
1002 name = newToken ();
1003 copyToken (name, token, TRUE);
1005 inheritance = vStringNew ();
1006 /* skip until the open bracket and assume every identifier (not keyword)
1007 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
1010 readToken (token);
1012 if (token->type == TOKEN_IDENTIFIER)
1014 if (vStringLength (inheritance) > 0)
1015 vStringPut (inheritance, ',');
1016 vStringCat (inheritance, token->string);
1019 while (token->type != TOKEN_EOF &&
1020 token->type != TOKEN_OPEN_CURLY);
1022 makeClassOrIfaceTag (kind, name, inheritance, impl);
1024 if (token->type == TOKEN_OPEN_CURLY)
1025 enterScope (token, name->string, K_CLASS);
1026 else
1027 readNext = FALSE;
1029 deleteToken (name);
1030 vStringDelete (inheritance);
1032 return readNext;
1035 /* parses a trait:
1036 * trait Foo {} */
1037 static boolean parseTrait (tokenInfo *const token)
1039 boolean readNext = TRUE;
1040 tokenInfo *name;
1042 readToken (token);
1043 if (token->type != TOKEN_IDENTIFIER)
1044 return FALSE;
1046 name = newToken ();
1047 copyToken (name, token, TRUE);
1049 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1051 readToken (token);
1052 if (token->type == TOKEN_OPEN_CURLY)
1053 enterScope (token, name->string, K_TRAIT);
1054 else
1055 readNext = FALSE;
1057 deleteToken (name);
1059 return readNext;
1062 /* parse a function
1064 * if @name is NULL, parses a normal function
1065 * function myfunc($foo, $bar) {}
1066 * function &myfunc($foo, $bar) {}
1068 * if @name is not NULL, parses an anonymous function with name @name
1069 * $foo = function($foo, $bar) {}
1070 * $foo = function&($foo, $bar) {}
1071 * $foo = function($foo, $bar) use ($x, &$y) {} */
1072 static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
1074 boolean readNext = TRUE;
1075 accessType access = CurrentStatement.access;
1076 implType impl = CurrentStatement.impl;
1077 tokenInfo *nameFree = NULL;
1079 readToken (token);
1080 /* skip a possible leading ampersand (return by reference) */
1081 if (token->type == TOKEN_AMPERSAND)
1082 readToken (token);
1084 if (! name)
1086 if (token->type != TOKEN_IDENTIFIER)
1087 return FALSE;
1089 name = nameFree = newToken ();
1090 copyToken (nameFree, token, TRUE);
1091 readToken (token);
1094 if (token->type == TOKEN_OPEN_PAREN)
1096 vString *arglist = vStringNew ();
1097 int depth = 1;
1099 vStringPut (arglist, '(');
1102 readToken (token);
1104 switch (token->type)
1106 case TOKEN_OPEN_PAREN: depth++; break;
1107 case TOKEN_CLOSE_PAREN: depth--; break;
1108 default: break;
1110 /* display part */
1111 switch (token->type)
1113 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1114 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1115 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1116 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1117 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1118 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1119 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1120 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1121 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1122 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1123 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1124 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1125 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1127 case TOKEN_IDENTIFIER:
1128 case TOKEN_KEYWORD:
1129 case TOKEN_VARIABLE:
1131 switch (vStringLast (arglist))
1133 case 0:
1134 case ' ':
1135 case '{':
1136 case '(':
1137 case '[':
1138 case '.':
1139 /* no need for a space between those and the identifier */
1140 break;
1142 default:
1143 vStringPut (arglist, ' ');
1144 break;
1146 if (token->type == TOKEN_VARIABLE)
1147 vStringPut (arglist, '$');
1148 vStringCat (arglist, token->string);
1149 break;
1152 default: break;
1155 while (token->type != TOKEN_EOF && depth > 0);
1157 vStringTerminate (arglist);
1159 makeFunctionTag (name, arglist, access, impl);
1160 vStringDelete (arglist);
1162 readToken (token); /* normally it's an open brace or "use" keyword */
1165 /* if parsing Zephir, skip function return type hint */
1166 if (getSourceLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)
1169 readToken (token);
1170 while (token->type != TOKEN_EOF &&
1171 token->type != TOKEN_OPEN_CURLY &&
1172 token->type != TOKEN_CLOSE_CURLY &&
1173 token->type != TOKEN_SEMICOLON);
1176 /* skip use(...) */
1177 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1179 readToken (token);
1180 if (token->type == TOKEN_OPEN_PAREN)
1182 int depth = 1;
1186 readToken (token);
1187 switch (token->type)
1189 case TOKEN_OPEN_PAREN: depth++; break;
1190 case TOKEN_CLOSE_PAREN: depth--; break;
1191 default: break;
1194 while (token->type != TOKEN_EOF && depth > 0);
1196 readToken (token);
1200 if (token->type == TOKEN_OPEN_CURLY)
1201 enterScope (token, name->string, K_FUNCTION);
1202 else
1203 readNext = FALSE;
1205 if (nameFree)
1206 deleteToken (nameFree);
1208 return readNext;
1211 /* parses declarations of the form
1212 * const NAME = VALUE */
1213 static boolean parseConstant (tokenInfo *const token)
1215 tokenInfo *name;
1217 readToken (token); /* skip const keyword */
1218 if (token->type != TOKEN_IDENTIFIER)
1219 return FALSE;
1221 name = newToken ();
1222 copyToken (name, token, TRUE);
1224 readToken (token);
1225 if (token->type == TOKEN_EQUAL_SIGN)
1226 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1228 deleteToken (name);
1230 return token->type == TOKEN_EQUAL_SIGN;
1233 /* parses declarations of the form
1234 * define('NAME', 'VALUE')
1235 * define(NAME, 'VALUE) */
1236 static boolean parseDefine (tokenInfo *const token)
1238 int depth = 1;
1240 readToken (token); /* skip "define" identifier */
1241 if (token->type != TOKEN_OPEN_PAREN)
1242 return FALSE;
1244 readToken (token);
1245 if (token->type == TOKEN_STRING ||
1246 token->type == TOKEN_IDENTIFIER)
1248 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1249 readToken (token);
1252 /* skip until the close parenthesis.
1253 * no need to handle nested blocks since they would be invalid
1254 * in this context anyway (the VALUE may only be a scalar, like
1255 * 42
1256 * (42)
1257 * and alike) */
1258 while (token->type != TOKEN_EOF && depth > 0)
1260 switch (token->type)
1262 case TOKEN_OPEN_PAREN: depth++; break;
1263 case TOKEN_CLOSE_PAREN: depth--; break;
1264 default: break;
1266 readToken (token);
1269 return FALSE;
1272 /* parses declarations of the form
1273 * $var = VALUE
1274 * $var; */
1275 static boolean parseVariable (tokenInfo *const token)
1277 tokenInfo *name;
1278 boolean readNext = TRUE;
1279 accessType access = CurrentStatement.access;
1281 name = newToken ();
1282 copyToken (name, token, TRUE);
1284 readToken (token);
1285 if (token->type == TOKEN_EQUAL_SIGN)
1287 phpKind kind = K_VARIABLE;
1289 if (token->parentKind == K_FUNCTION)
1290 kind = K_LOCAL_VARIABLE;
1292 readToken (token);
1293 if (token->type == TOKEN_KEYWORD &&
1294 token->keyword == KEYWORD_function &&
1295 PhpKinds[kind].enabled)
1297 if (parseFunction (token, name))
1298 readToken (token);
1299 readNext = (boolean) (token->type == TOKEN_SEMICOLON);
1301 else
1303 makeSimplePhpTag (name, kind, access);
1304 readNext = FALSE;
1307 else if (token->type == TOKEN_SEMICOLON)
1309 /* generate tags for variable declarations in classes
1310 * class Foo {
1311 * protected $foo;
1313 * but don't get fooled by stuff like $foo = $bar; */
1314 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1315 makeSimplePhpTag (name, K_VARIABLE, access);
1317 else
1318 readNext = FALSE;
1320 deleteToken (name);
1322 return readNext;
1325 /* parses namespace declarations
1326 * namespace Foo {}
1327 * namespace Foo\Bar {}
1328 * namespace Foo;
1329 * namespace Foo\Bar;
1330 * namespace;
1331 * napespace {} */
1332 static boolean parseNamespace (tokenInfo *const token)
1334 tokenInfo *nsToken = newToken ();
1336 vStringClear (CurrentNamespace);
1337 copyToken (nsToken, token, FALSE);
1341 readToken (token);
1342 if (token->type == TOKEN_IDENTIFIER)
1344 if (vStringLength (CurrentNamespace) > 0)
1345 vStringPut (CurrentNamespace, '\\');
1346 vStringCat (CurrentNamespace, token->string);
1349 while (token->type != TOKEN_EOF &&
1350 token->type != TOKEN_SEMICOLON &&
1351 token->type != TOKEN_OPEN_CURLY);
1353 vStringTerminate (CurrentNamespace);
1354 if (vStringLength (CurrentNamespace) > 0)
1355 makeNamespacePhpTag (nsToken, CurrentNamespace);
1357 if (token->type == TOKEN_OPEN_CURLY)
1358 enterScope (token, NULL, -1);
1360 deleteToken (nsToken);
1362 return TRUE;
1365 static void enterScope (tokenInfo *const parentToken,
1366 const vString *const extraScope,
1367 const int parentKind)
1369 tokenInfo *token = newToken ();
1370 int origParentKind = parentToken->parentKind;
1372 copyToken (token, parentToken, TRUE);
1374 if (extraScope)
1376 addToScope (token, extraScope);
1377 token->parentKind = parentKind;
1380 readToken (token);
1381 while (token->type != TOKEN_EOF &&
1382 token->type != TOKEN_CLOSE_CURLY)
1384 boolean readNext = TRUE;
1386 switch (token->type)
1388 case TOKEN_OPEN_CURLY:
1389 enterScope (token, NULL, -1);
1390 break;
1392 case TOKEN_KEYWORD:
1393 switch (token->keyword)
1395 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1396 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1397 case KEYWORD_trait: readNext = parseTrait (token); break;
1398 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1399 case KEYWORD_const: readNext = parseConstant (token); break;
1400 case KEYWORD_define: readNext = parseDefine (token); break;
1402 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1404 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1405 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1406 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1407 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1409 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1411 default: break;
1413 break;
1415 case TOKEN_VARIABLE:
1416 readNext = parseVariable (token);
1417 break;
1419 default: break;
1422 if (readNext)
1423 readToken (token);
1426 copyToken (parentToken, token, FALSE);
1427 parentToken->parentKind = origParentKind;
1428 deleteToken (token);
1431 static void findTags (void)
1433 tokenInfo *const token = newToken ();
1435 CurrentStatement.access = ACCESS_UNDEFINED;
1436 CurrentStatement.impl = IMPL_UNDEFINED;
1437 CurrentNamespace = vStringNew ();
1441 enterScope (token, NULL, -1);
1443 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1445 vStringDelete (CurrentNamespace);
1446 deleteToken (token);
1449 static void findPhpTags (void)
1451 InPhp = FALSE;
1452 findTags ();
1455 static void findZephirTags (void)
1457 InPhp = TRUE;
1458 findTags ();
1461 static void initializePhpParser (const langType language)
1463 Lang_php = language;
1466 static void initializeZephirParser (const langType language)
1468 Lang_zephir = language;
1471 extern parserDefinition* PhpParser (void)
1473 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1474 parserDefinition* def = parserNew ("PHP");
1475 def->kinds = PhpKinds;
1476 def->kindCount = ARRAY_SIZE (PhpKinds);
1477 def->extensions = extensions;
1478 def->parser = findPhpTags;
1479 def->initialize = initializePhpParser;
1480 def->keywordTable = PhpKeywordTable;
1481 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1482 return def;
1485 extern parserDefinition* ZephirParser (void)
1487 static const char *const extensions [] = { "zep", NULL };
1488 parserDefinition* def = parserNew ("Zephir");
1489 def->kinds = PhpKinds;
1490 def->kindCount = ARRAY_SIZE (PhpKinds);
1491 def->extensions = extensions;
1492 def->parser = findZephirTags;
1493 def->initialize = initializeZephirParser;
1494 def->keywordTable = PhpKeywordTable;
1495 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1496 return def;
1499 /* vi:set tabstop=4 shiftwidth=4: */