ctags: Rename Geany-specific tagEntryInfo::arglist to upstream's ::signature
[geany-mirror.git] / ctags / parsers / php.c
blobabe87689252208ec547f2cf1fad5d4a68c05e178
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "main.h"
16 #include "parse.h"
17 #include "read.h"
18 #include "vstring.h"
19 #include "keyword.h"
20 #include "entry.h"
23 #define SCOPE_SEPARATOR "::"
26 typedef enum {
27 KEYWORD_NONE = -1,
28 KEYWORD_abstract,
29 KEYWORD_and,
30 KEYWORD_as,
31 KEYWORD_break,
32 KEYWORD_callable,
33 KEYWORD_case,
34 KEYWORD_catch,
35 KEYWORD_class,
36 KEYWORD_clone,
37 KEYWORD_const,
38 KEYWORD_continue,
39 KEYWORD_declare,
40 KEYWORD_define,
41 KEYWORD_default,
42 KEYWORD_do,
43 KEYWORD_echo,
44 KEYWORD_else,
45 KEYWORD_elif,
46 KEYWORD_enddeclare,
47 KEYWORD_endfor,
48 KEYWORD_endforeach,
49 KEYWORD_endif,
50 KEYWORD_endswitch,
51 KEYWORD_endwhile,
52 KEYWORD_extends,
53 KEYWORD_final,
54 KEYWORD_finally,
55 KEYWORD_for,
56 KEYWORD_foreach,
57 KEYWORD_function,
58 KEYWORD_global,
59 KEYWORD_goto,
60 KEYWORD_if,
61 KEYWORD_implements,
62 KEYWORD_include,
63 KEYWORD_include_once,
64 KEYWORD_instanceof,
65 KEYWORD_insteadof,
66 KEYWORD_interface,
67 KEYWORD_namespace,
68 KEYWORD_new,
69 KEYWORD_or,
70 KEYWORD_print,
71 KEYWORD_private,
72 KEYWORD_protected,
73 KEYWORD_public,
74 KEYWORD_require,
75 KEYWORD_require_once,
76 KEYWORD_return,
77 KEYWORD_static,
78 KEYWORD_switch,
79 KEYWORD_throw,
80 KEYWORD_trait,
81 KEYWORD_try,
82 KEYWORD_use,
83 KEYWORD_var,
84 KEYWORD_while,
85 KEYWORD_xor,
86 KEYWORD_yield
87 } keywordId;
89 typedef enum {
90 ACCESS_UNDEFINED,
91 ACCESS_PRIVATE,
92 ACCESS_PROTECTED,
93 ACCESS_PUBLIC,
94 COUNT_ACCESS
95 } accessType;
97 typedef enum {
98 IMPL_UNDEFINED,
99 IMPL_ABSTRACT,
100 COUNT_IMPL
101 } implType;
103 typedef enum {
104 K_CLASS,
105 K_DEFINE,
106 K_FUNCTION,
107 K_INTERFACE,
108 K_LOCAL_VARIABLE,
109 K_NAMESPACE,
110 K_TRAIT,
111 K_VARIABLE,
112 COUNT_KIND
113 } phpKind;
115 static kindOption PhpKinds[COUNT_KIND] = {
116 { TRUE, 'c', "class", "classes" },
117 { TRUE, 'd', "define", "constant definitions" },
118 { TRUE, 'f', "function", "functions" },
119 { TRUE, 'i', "interface", "interfaces" },
120 { FALSE, 'l', "local", "local variables" },
121 { TRUE, 'n', "namespace", "namespaces" },
122 { TRUE, 't', "trait", "traits" },
123 { TRUE, 'v', "variable", "variables" }
126 typedef struct {
127 const char *name;
128 keywordId id;
129 } keywordDesc;
131 static const keywordDesc PhpKeywordTable[] = {
132 /* keyword keyword ID */
133 { "abstract", KEYWORD_abstract },
134 { "and", KEYWORD_and },
135 { "as", KEYWORD_as },
136 { "break", KEYWORD_break },
137 { "callable", KEYWORD_callable },
138 { "case", KEYWORD_case },
139 { "catch", KEYWORD_catch },
140 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
141 { "class", KEYWORD_class },
142 { "clone", KEYWORD_clone },
143 { "const", KEYWORD_const },
144 { "continue", KEYWORD_continue },
145 { "declare", KEYWORD_declare },
146 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
147 { "default", KEYWORD_default },
148 { "do", KEYWORD_do },
149 { "echo", KEYWORD_echo },
150 { "else", KEYWORD_else },
151 { "elseif", KEYWORD_elif },
152 { "enddeclare", KEYWORD_enddeclare },
153 { "endfor", KEYWORD_endfor },
154 { "endforeach", KEYWORD_endforeach },
155 { "endif", KEYWORD_endif },
156 { "endswitch", KEYWORD_endswitch },
157 { "endwhile", KEYWORD_endwhile },
158 { "extends", KEYWORD_extends },
159 { "final", KEYWORD_final },
160 { "finally", KEYWORD_finally },
161 { "for", KEYWORD_for },
162 { "foreach", KEYWORD_foreach },
163 { "function", KEYWORD_function },
164 { "global", KEYWORD_global },
165 { "goto", KEYWORD_goto },
166 { "if", KEYWORD_if },
167 { "implements", KEYWORD_implements },
168 { "include", KEYWORD_include },
169 { "include_once", KEYWORD_include_once },
170 { "instanceof", KEYWORD_instanceof },
171 { "insteadof", KEYWORD_insteadof },
172 { "interface", KEYWORD_interface },
173 { "namespace", KEYWORD_namespace },
174 { "new", KEYWORD_new },
175 { "or", KEYWORD_or },
176 { "print", KEYWORD_print },
177 { "private", KEYWORD_private },
178 { "protected", KEYWORD_protected },
179 { "public", KEYWORD_public },
180 { "require", KEYWORD_require },
181 { "require_once", KEYWORD_require_once },
182 { "return", KEYWORD_return },
183 { "static", KEYWORD_static },
184 { "switch", KEYWORD_switch },
185 { "throw", KEYWORD_throw },
186 { "trait", KEYWORD_trait },
187 { "try", KEYWORD_try },
188 { "use", KEYWORD_use },
189 { "var", KEYWORD_var },
190 { "while", KEYWORD_while },
191 { "xor", KEYWORD_xor },
192 { "yield", KEYWORD_yield }
196 typedef enum eTokenType {
197 TOKEN_UNDEFINED,
198 TOKEN_EOF,
199 TOKEN_CHARACTER,
200 TOKEN_CLOSE_PAREN,
201 TOKEN_SEMICOLON,
202 TOKEN_COLON,
203 TOKEN_COMMA,
204 TOKEN_KEYWORD,
205 TOKEN_OPEN_PAREN,
206 TOKEN_OPERATOR,
207 TOKEN_IDENTIFIER,
208 TOKEN_STRING,
209 TOKEN_PERIOD,
210 TOKEN_OPEN_CURLY,
211 TOKEN_CLOSE_CURLY,
212 TOKEN_EQUAL_SIGN,
213 TOKEN_OPEN_SQUARE,
214 TOKEN_CLOSE_SQUARE,
215 TOKEN_VARIABLE,
216 TOKEN_AMPERSAND
217 } tokenType;
219 typedef struct {
220 tokenType type;
221 keywordId keyword;
222 vString * string;
223 vString * scope;
224 unsigned long lineNumber;
225 MIOPos filePosition;
226 int parentKind; /* -1 if none */
227 } tokenInfo;
229 static langType Lang_php;
230 static langType Lang_zephir;
232 static boolean InPhp = FALSE; /* whether we are between <? ?> */
234 /* current statement details */
235 static struct {
236 accessType access;
237 implType impl;
238 } CurrentStatement;
240 /* Current namespace */
241 static vString *CurrentNamespace;
244 static void buildPhpKeywordHash (const langType language)
246 const size_t count = sizeof (PhpKeywordTable) / sizeof (PhpKeywordTable[0]);
247 size_t i;
248 for (i = 0; i < count ; i++)
250 const keywordDesc* const p = &PhpKeywordTable[i];
251 addKeyword (p->name, language, (int) p->id);
255 static const char *accessToString (const accessType access)
257 static const char *const names[COUNT_ACCESS] = {
258 "undefined",
259 "private",
260 "protected",
261 "public"
264 Assert (access < COUNT_ACCESS);
266 return names[access];
269 static const char *implToString (const implType impl)
271 static const char *const names[COUNT_IMPL] = {
272 "undefined",
273 "abstract"
276 Assert (impl < COUNT_IMPL);
278 return names[impl];
281 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
282 const phpKind kind, const accessType access)
284 static vString *fullScope = NULL;
285 int parentKind = -1;
287 if (fullScope == NULL)
288 fullScope = vStringNew ();
289 else
290 vStringClear (fullScope);
292 if (vStringLength (CurrentNamespace) > 0)
294 vStringCopy (fullScope, CurrentNamespace);
295 parentKind = K_NAMESPACE;
298 initTagEntry (e, vStringValue (token->string));
300 e->lineNumber = token->lineNumber;
301 e->filePosition = token->filePosition;
302 e->kindName = PhpKinds[kind].name;
303 e->kind = (char) PhpKinds[kind].letter;
305 if (access != ACCESS_UNDEFINED)
306 e->extensionFields.access = accessToString (access);
307 if (vStringLength (token->scope) > 0)
309 parentKind = token->parentKind;
310 if (vStringLength (fullScope) > 0)
311 vStringCatS (fullScope, SCOPE_SEPARATOR);
312 vStringCat (fullScope, token->scope);
314 if (vStringLength (fullScope) > 0)
316 Assert (parentKind >= 0);
318 vStringTerminate (fullScope);
319 e->extensionFields.scope[0] = PhpKinds[parentKind].name;
320 e->extensionFields.scope[1] = vStringValue (fullScope);
324 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
325 const accessType access)
327 if (PhpKinds[kind].enabled)
329 tagEntryInfo e;
331 initPhpEntry (&e, token, kind, access);
332 makeTagEntry (&e);
336 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
338 if (PhpKinds[K_NAMESPACE].enabled)
340 tagEntryInfo e;
342 initTagEntry (&e, vStringValue (name));
344 e.lineNumber = token->lineNumber;
345 e.filePosition = token->filePosition;
346 e.kindName = PhpKinds[K_NAMESPACE].name;
347 e.kind = (char) PhpKinds[K_NAMESPACE].letter;
349 makeTagEntry (&e);
353 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
354 vString *const inheritance, const implType impl)
356 if (PhpKinds[kind].enabled)
358 tagEntryInfo e;
360 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
362 if (impl != IMPL_UNDEFINED)
363 e.extensionFields.implementation = implToString (impl);
364 if (vStringLength (inheritance) > 0)
365 e.extensionFields.inheritance = vStringValue (inheritance);
367 makeTagEntry (&e);
371 static void makeFunctionTag (const tokenInfo *const token,
372 const vString *const arglist,
373 const accessType access, const implType impl)
375 if (PhpKinds[K_FUNCTION].enabled)
377 tagEntryInfo e;
379 initPhpEntry (&e, token, K_FUNCTION, access);
381 if (impl != IMPL_UNDEFINED)
382 e.extensionFields.implementation = implToString (impl);
383 if (arglist)
384 e.extensionFields.signature = vStringValue (arglist);
386 makeTagEntry (&e);
390 static tokenInfo *newToken (void)
392 tokenInfo *const token = xMalloc (1, tokenInfo);
394 token->type = TOKEN_UNDEFINED;
395 token->keyword = KEYWORD_NONE;
396 token->string = vStringNew ();
397 token->scope = vStringNew ();
398 token->lineNumber = getSourceLineNumber ();
399 token->filePosition = getInputFilePosition ();
400 token->parentKind = -1;
402 return token;
405 static void deleteToken (tokenInfo *const token)
407 vStringDelete (token->string);
408 vStringDelete (token->scope);
409 eFree (token);
412 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
413 boolean scope)
415 dest->lineNumber = src->lineNumber;
416 dest->filePosition = src->filePosition;
417 dest->type = src->type;
418 dest->keyword = src->keyword;
419 vStringCopy(dest->string, src->string);
420 dest->parentKind = src->parentKind;
421 if (scope)
422 vStringCopy(dest->scope, src->scope);
425 #if 0
426 #include <stdio.h>
428 static const char *tokenTypeName (const tokenType type)
430 switch (type)
432 case TOKEN_UNDEFINED: return "undefined";
433 case TOKEN_EOF: return "EOF";
434 case TOKEN_CHARACTER: return "character";
435 case TOKEN_CLOSE_PAREN: return "')'";
436 case TOKEN_SEMICOLON: return "';'";
437 case TOKEN_COLON: return "':'";
438 case TOKEN_COMMA: return "','";
439 case TOKEN_OPEN_PAREN: return "'('";
440 case TOKEN_OPERATOR: return "operator";
441 case TOKEN_IDENTIFIER: return "identifier";
442 case TOKEN_KEYWORD: return "keyword";
443 case TOKEN_STRING: return "string";
444 case TOKEN_PERIOD: return "'.'";
445 case TOKEN_OPEN_CURLY: return "'{'";
446 case TOKEN_CLOSE_CURLY: return "'}'";
447 case TOKEN_EQUAL_SIGN: return "'='";
448 case TOKEN_OPEN_SQUARE: return "'['";
449 case TOKEN_CLOSE_SQUARE: return "']'";
450 case TOKEN_VARIABLE: return "variable";
452 return NULL;
455 static void printToken (const tokenInfo *const token)
457 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
458 tokenTypeName (token->type),
459 token->lineNumber,
460 vStringValue (token->scope));
461 switch (token->type)
463 case TOKEN_IDENTIFIER:
464 case TOKEN_STRING:
465 case TOKEN_VARIABLE:
466 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
467 break;
469 case TOKEN_KEYWORD:
471 size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
472 size_t i;
474 fprintf (stderr, "\tkeyword:\t");
475 for (i = 0; i < n; i++)
477 if (PhpKeywordTable[i].id == token->keyword)
479 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
480 break;
483 if (i >= n)
484 fprintf (stderr, "(unknown)\n");
487 default: break;
490 #endif
492 static void addToScope (tokenInfo *const token, const vString *const extra)
494 if (vStringLength (token->scope) > 0)
495 vStringCatS (token->scope, SCOPE_SEPARATOR);
496 vStringCatS (token->scope, vStringValue (extra));
497 vStringTerminate(token->scope);
500 static boolean isIdentChar (const int c)
502 return (isalnum (c) || c == '_' || c >= 0x80);
505 static int skipToCharacter (const int c)
507 int d;
510 d = fileGetc ();
511 } while (d != EOF && d != c);
512 return d;
515 static void parseString (vString *const string, const int delimiter)
517 while (TRUE)
519 int c = fileGetc ();
521 if (c == '\\' && (c = fileGetc ()) != EOF)
522 vStringPut (string, (char) c);
523 else if (c == EOF || c == delimiter)
524 break;
525 else
526 vStringPut (string, (char) c);
528 vStringTerminate (string);
531 /* reads an HereDoc or a NowDoc (the part after the <<<).
532 * <<<[ \t]*(ID|'ID'|"ID")
533 * ...
534 * ID;?
536 * note that:
537 * 1) starting ID must be immediately followed by a newline;
538 * 2) closing ID is the same as opening one;
539 * 3) closing ID must be immediately followed by a newline or a semicolon
540 * then a newline.
542 * Example of a *single* valid heredoc:
543 * <<< FOO
544 * something
545 * something else
546 * FOO this is not an end
547 * FOO; this isn't either
548 * FOO; # neither this is
549 * FOO;
550 * # previous line was the end, but the semicolon wasn't required
552 static void parseHeredoc (vString *const string)
554 int c;
555 unsigned int len;
556 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
557 int quote = 0;
561 c = fileGetc ();
563 while (c == ' ' || c == '\t');
565 if (c == '\'' || c == '"')
567 quote = c;
568 c = fileGetc ();
570 for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
572 if (! isIdentChar (c))
573 break;
574 delimiter[len] = (char) c;
575 c = fileGetc ();
577 delimiter[len] = 0;
579 if (len == 0) /* no delimiter, give up */
580 goto error;
581 if (quote)
583 if (c != quote) /* no closing quote for quoted identifier, give up */
584 goto error;
585 c = fileGetc ();
587 if (c != '\r' && c != '\n') /* missing newline, give up */
588 goto error;
592 c = fileGetc ();
594 if (c != '\r' && c != '\n')
595 vStringPut (string, (char) c);
596 else
598 /* new line, check for a delimiter right after */
599 int nl = c;
600 int extra = EOF;
602 c = fileGetc ();
603 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
604 c = fileGetc ();
606 if (delimiter[len] != 0)
607 fileUngetc (c);
608 else
610 /* line start matched the delimiter, now check whether there
611 * is anything after it */
612 if (c == '\r' || c == '\n')
614 fileUngetc (c);
615 break;
617 else if (c == ';')
619 int d = fileGetc ();
620 if (d == '\r' || d == '\n')
622 /* put back the semicolon since it's not part of the
623 * string. we can't put back the newline, but it's a
624 * whitespace character nobody cares about it anyway */
625 fileUngetc (';');
626 break;
628 else
630 /* put semicolon in the string and continue */
631 extra = ';';
632 fileUngetc (d);
636 /* if we are here it wasn't a delimiter, so put everything in the
637 * string */
638 vStringPut (string, (char) nl);
639 vStringNCatS (string, delimiter, len);
640 if (extra != EOF)
641 vStringPut (string, (char) extra);
644 while (c != EOF);
646 vStringTerminate (string);
648 return;
650 error:
651 fileUngetc (c);
654 static void parseIdentifier (vString *const string, const int firstChar)
656 int c = firstChar;
659 vStringPut (string, (char) c);
660 c = fileGetc ();
661 } while (isIdentChar (c));
662 fileUngetc (c);
663 vStringTerminate (string);
666 static keywordId analyzeToken (vString *const name, langType language)
668 vString *keyword = vStringNew ();
669 keywordId result;
670 vStringCopyToLower (keyword, name);
671 result = lookupKeyword (vStringValue (keyword), language);
672 vStringDelete (keyword);
673 return result;
676 static boolean isSpace (int c)
678 return (c == '\t' || c == ' ' || c == '\v' ||
679 c == '\n' || c == '\r' || c == '\f');
682 static int skipWhitespaces (int c)
684 while (isSpace (c))
685 c = fileGetc ();
686 return c;
689 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
691 * This is ugly, but the whole "<script language=php>" tag is and we can't
692 * really do better without adding a lot of code only for this */
693 static boolean isOpenScriptLanguagePhp (int c)
695 int quote = 0;
697 /* <script[:white:]+language[:white:]*= */
698 if (c != '<' ||
699 tolower ((c = fileGetc ())) != 's' ||
700 tolower ((c = fileGetc ())) != 'c' ||
701 tolower ((c = fileGetc ())) != 'r' ||
702 tolower ((c = fileGetc ())) != 'i' ||
703 tolower ((c = fileGetc ())) != 'p' ||
704 tolower ((c = fileGetc ())) != 't' ||
705 ! isSpace ((c = fileGetc ())) ||
706 tolower ((c = skipWhitespaces (c))) != 'l' ||
707 tolower ((c = fileGetc ())) != 'a' ||
708 tolower ((c = fileGetc ())) != 'n' ||
709 tolower ((c = fileGetc ())) != 'g' ||
710 tolower ((c = fileGetc ())) != 'u' ||
711 tolower ((c = fileGetc ())) != 'a' ||
712 tolower ((c = fileGetc ())) != 'g' ||
713 tolower ((c = fileGetc ())) != 'e' ||
714 (c = skipWhitespaces (fileGetc ())) != '=')
715 return FALSE;
717 /* (php|'php'|"php")> */
718 c = skipWhitespaces (fileGetc ());
719 if (c == '"' || c == '\'')
721 quote = c;
722 c = fileGetc ();
724 if (tolower (c) != 'p' ||
725 tolower ((c = fileGetc ())) != 'h' ||
726 tolower ((c = fileGetc ())) != 'p' ||
727 (quote != 0 && (c = fileGetc ()) != quote) ||
728 (c = skipWhitespaces (fileGetc ())) != '>')
729 return FALSE;
731 return TRUE;
734 static int findPhpStart (void)
736 int c;
739 if ((c = fileGetc ()) == '<')
741 c = fileGetc ();
742 /* <? and <?php, but not <?xml */
743 if (c == '?')
745 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
746 if (tolower ((c = fileGetc ())) != 'x' ||
747 tolower ((c = fileGetc ())) != 'm' ||
748 tolower ((c = fileGetc ())) != 'l')
750 break;
753 /* <script language="php"> */
754 else
756 fileUngetc (c);
757 if (isOpenScriptLanguagePhp ('<'))
758 break;
762 while (c != EOF);
764 return c;
767 static int skipSingleComment (void)
769 int c;
772 c = fileGetc ();
773 if (c == '\r')
775 int next = fileGetc ();
776 if (next != '\n')
777 fileUngetc (next);
778 else
779 c = next;
781 /* ?> in single-line comments leaves PHP mode */
782 else if (c == '?')
784 int next = fileGetc ();
785 if (next == '>')
786 InPhp = FALSE;
787 else
788 fileUngetc (next);
790 } while (InPhp && c != EOF && c != '\n' && c != '\r');
791 return c;
794 static void readToken (tokenInfo *const token)
796 int c;
798 token->type = TOKEN_UNDEFINED;
799 token->keyword = KEYWORD_NONE;
800 vStringClear (token->string);
802 getNextChar:
804 if (! InPhp)
806 c = findPhpStart ();
807 if (c != EOF)
808 InPhp = TRUE;
810 else
811 c = fileGetc ();
813 c = skipWhitespaces (c);
815 token->lineNumber = getSourceLineNumber ();
816 token->filePosition = getInputFilePosition ();
818 switch (c)
820 case EOF: token->type = TOKEN_EOF; break;
821 case '(': token->type = TOKEN_OPEN_PAREN; break;
822 case ')': token->type = TOKEN_CLOSE_PAREN; break;
823 case ';': token->type = TOKEN_SEMICOLON; break;
824 case ',': token->type = TOKEN_COMMA; break;
825 case '.': token->type = TOKEN_PERIOD; break;
826 case ':': token->type = TOKEN_COLON; break;
827 case '{': token->type = TOKEN_OPEN_CURLY; break;
828 case '}': token->type = TOKEN_CLOSE_CURLY; break;
829 case '[': token->type = TOKEN_OPEN_SQUARE; break;
830 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
831 case '&': token->type = TOKEN_AMPERSAND; break;
833 case '=':
835 int d = fileGetc ();
836 if (d == '=' || d == '>')
837 token->type = TOKEN_OPERATOR;
838 else
840 fileUngetc (d);
841 token->type = TOKEN_EQUAL_SIGN;
843 break;
846 case '\'':
847 case '"':
848 token->type = TOKEN_STRING;
849 parseString (token->string, c);
850 token->lineNumber = getSourceLineNumber ();
851 token->filePosition = getInputFilePosition ();
852 break;
854 case '<':
856 int d = fileGetc ();
857 if (d == '/')
859 /* </script[:white:]*> */
860 if (tolower ((d = fileGetc ())) == 's' &&
861 tolower ((d = fileGetc ())) == 'c' &&
862 tolower ((d = fileGetc ())) == 'r' &&
863 tolower ((d = fileGetc ())) == 'i' &&
864 tolower ((d = fileGetc ())) == 'p' &&
865 tolower ((d = fileGetc ())) == 't' &&
866 (d = skipWhitespaces (fileGetc ())) == '>')
868 InPhp = FALSE;
869 goto getNextChar;
871 else
873 fileUngetc (d);
874 token->type = TOKEN_UNDEFINED;
877 else if (d == '<' && (d = fileGetc ()) == '<')
879 token->type = TOKEN_STRING;
880 parseHeredoc (token->string);
882 else
884 fileUngetc (d);
885 token->type = TOKEN_UNDEFINED;
887 break;
890 case '#': /* comment */
891 skipSingleComment ();
892 goto getNextChar;
893 break;
895 case '+':
896 case '-':
897 case '*':
898 case '%':
900 int d = fileGetc ();
901 if (d != '=')
902 fileUngetc (d);
903 token->type = TOKEN_OPERATOR;
904 break;
907 case '/': /* division or comment start */
909 int d = fileGetc ();
910 if (d == '/') /* single-line comment */
912 skipSingleComment ();
913 goto getNextChar;
915 else if (d == '*')
919 c = skipToCharacter ('*');
920 if (c != EOF)
922 c = fileGetc ();
923 if (c == '/')
924 break;
925 else
926 fileUngetc (c);
928 } while (c != EOF && c != '\0');
929 goto getNextChar;
931 else
933 if (d != '=')
934 fileUngetc (d);
935 token->type = TOKEN_OPERATOR;
937 break;
940 case '$': /* variable start */
942 int d = fileGetc ();
943 if (! isIdentChar (d))
945 fileUngetc (d);
946 token->type = TOKEN_UNDEFINED;
948 else
950 parseIdentifier (token->string, d);
951 token->type = TOKEN_VARIABLE;
953 break;
956 case '?': /* maybe the end of the PHP chunk */
958 int d = fileGetc ();
959 if (d == '>')
961 InPhp = FALSE;
962 goto getNextChar;
964 else
966 fileUngetc (d);
967 token->type = TOKEN_UNDEFINED;
969 break;
972 default:
973 if (! isIdentChar (c))
974 token->type = TOKEN_UNDEFINED;
975 else
977 parseIdentifier (token->string, c);
978 token->keyword = analyzeToken (token->string, getSourceLanguage ());
979 if (token->keyword == KEYWORD_NONE)
980 token->type = TOKEN_IDENTIFIER;
981 else
982 token->type = TOKEN_KEYWORD;
984 break;
987 if (token->type == TOKEN_SEMICOLON ||
988 token->type == TOKEN_OPEN_CURLY ||
989 token->type == TOKEN_CLOSE_CURLY)
991 /* reset current statement details on statement end, and when entering
992 * a deeper scope.
993 * it is a bit ugly to do this in readToken(), but it makes everything
994 * a lot simpler. */
995 CurrentStatement.access = ACCESS_UNDEFINED;
996 CurrentStatement.impl = IMPL_UNDEFINED;
1000 static void enterScope (tokenInfo *const parentToken,
1001 const vString *const extraScope,
1002 const int parentKind);
1004 /* parses a class or an interface:
1005 * class Foo {}
1006 * class Foo extends Bar {}
1007 * class Foo extends Bar implements iFoo, iBar {}
1008 * interface iFoo {}
1009 * interface iBar extends iFoo {} */
1010 static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
1012 boolean readNext = TRUE;
1013 implType impl = CurrentStatement.impl;
1014 tokenInfo *name;
1015 vString *inheritance = NULL;
1017 readToken (token);
1018 if (token->type != TOKEN_IDENTIFIER)
1019 return FALSE;
1021 name = newToken ();
1022 copyToken (name, token, TRUE);
1024 inheritance = vStringNew ();
1025 /* skip until the open bracket and assume every identifier (not keyword)
1026 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
1029 readToken (token);
1031 if (token->type == TOKEN_IDENTIFIER)
1033 if (vStringLength (inheritance) > 0)
1034 vStringPut (inheritance, ',');
1035 vStringCat (inheritance, token->string);
1038 while (token->type != TOKEN_EOF &&
1039 token->type != TOKEN_OPEN_CURLY);
1041 makeClassOrIfaceTag (kind, name, inheritance, impl);
1043 if (token->type == TOKEN_OPEN_CURLY)
1044 enterScope (token, name->string, K_CLASS);
1045 else
1046 readNext = FALSE;
1048 deleteToken (name);
1049 vStringDelete (inheritance);
1051 return readNext;
1054 /* parses a trait:
1055 * trait Foo {} */
1056 static boolean parseTrait (tokenInfo *const token)
1058 boolean readNext = TRUE;
1059 tokenInfo *name;
1061 readToken (token);
1062 if (token->type != TOKEN_IDENTIFIER)
1063 return FALSE;
1065 name = newToken ();
1066 copyToken (name, token, TRUE);
1068 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1070 readToken (token);
1071 if (token->type == TOKEN_OPEN_CURLY)
1072 enterScope (token, name->string, K_TRAIT);
1073 else
1074 readNext = FALSE;
1076 deleteToken (name);
1078 return readNext;
1081 /* parse a function
1083 * if @name is NULL, parses a normal function
1084 * function myfunc($foo, $bar) {}
1085 * function &myfunc($foo, $bar) {}
1087 * if @name is not NULL, parses an anonymous function with name @name
1088 * $foo = function($foo, $bar) {}
1089 * $foo = function&($foo, $bar) {}
1090 * $foo = function($foo, $bar) use ($x, &$y) {} */
1091 static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
1093 boolean readNext = TRUE;
1094 accessType access = CurrentStatement.access;
1095 implType impl = CurrentStatement.impl;
1096 tokenInfo *nameFree = NULL;
1098 readToken (token);
1099 /* skip a possible leading ampersand (return by reference) */
1100 if (token->type == TOKEN_AMPERSAND)
1101 readToken (token);
1103 if (! name)
1105 if (token->type != TOKEN_IDENTIFIER)
1106 return FALSE;
1108 name = nameFree = newToken ();
1109 copyToken (nameFree, token, TRUE);
1110 readToken (token);
1113 if (token->type == TOKEN_OPEN_PAREN)
1115 vString *arglist = vStringNew ();
1116 int depth = 1;
1118 vStringPut (arglist, '(');
1121 readToken (token);
1123 switch (token->type)
1125 case TOKEN_OPEN_PAREN: depth++; break;
1126 case TOKEN_CLOSE_PAREN: depth--; break;
1127 default: break;
1129 /* display part */
1130 switch (token->type)
1132 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1133 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1134 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1135 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1136 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1137 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1138 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1139 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1140 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1141 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1142 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1143 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1144 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1146 case TOKEN_IDENTIFIER:
1147 case TOKEN_KEYWORD:
1148 case TOKEN_VARIABLE:
1150 switch (vStringLast (arglist))
1152 case 0:
1153 case ' ':
1154 case '{':
1155 case '(':
1156 case '[':
1157 case '.':
1158 /* no need for a space between those and the identifier */
1159 break;
1161 default:
1162 vStringPut (arglist, ' ');
1163 break;
1165 if (token->type == TOKEN_VARIABLE)
1166 vStringPut (arglist, '$');
1167 vStringCat (arglist, token->string);
1168 break;
1171 default: break;
1174 while (token->type != TOKEN_EOF && depth > 0);
1176 vStringTerminate (arglist);
1178 makeFunctionTag (name, arglist, access, impl);
1179 vStringDelete (arglist);
1181 readToken (token); /* normally it's an open brace or "use" keyword */
1184 /* if parsing Zephir, skip function return type hint */
1185 if (getSourceLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)
1188 readToken (token);
1189 while (token->type != TOKEN_EOF &&
1190 token->type != TOKEN_OPEN_CURLY &&
1191 token->type != TOKEN_CLOSE_CURLY &&
1192 token->type != TOKEN_SEMICOLON);
1195 /* skip use(...) */
1196 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1198 readToken (token);
1199 if (token->type == TOKEN_OPEN_PAREN)
1201 int depth = 1;
1205 readToken (token);
1206 switch (token->type)
1208 case TOKEN_OPEN_PAREN: depth++; break;
1209 case TOKEN_CLOSE_PAREN: depth--; break;
1210 default: break;
1213 while (token->type != TOKEN_EOF && depth > 0);
1215 readToken (token);
1219 if (token->type == TOKEN_OPEN_CURLY)
1220 enterScope (token, name->string, K_FUNCTION);
1221 else
1222 readNext = FALSE;
1224 if (nameFree)
1225 deleteToken (nameFree);
1227 return readNext;
1230 /* parses declarations of the form
1231 * const NAME = VALUE */
1232 static boolean parseConstant (tokenInfo *const token)
1234 tokenInfo *name;
1236 readToken (token); /* skip const keyword */
1237 if (token->type != TOKEN_IDENTIFIER)
1238 return FALSE;
1240 name = newToken ();
1241 copyToken (name, token, TRUE);
1243 readToken (token);
1244 if (token->type == TOKEN_EQUAL_SIGN)
1245 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1247 deleteToken (name);
1249 return token->type == TOKEN_EQUAL_SIGN;
1252 /* parses declarations of the form
1253 * define('NAME', 'VALUE')
1254 * define(NAME, 'VALUE) */
1255 static boolean parseDefine (tokenInfo *const token)
1257 int depth = 1;
1259 readToken (token); /* skip "define" identifier */
1260 if (token->type != TOKEN_OPEN_PAREN)
1261 return FALSE;
1263 readToken (token);
1264 if (token->type == TOKEN_STRING ||
1265 token->type == TOKEN_IDENTIFIER)
1267 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1268 readToken (token);
1271 /* skip until the close parenthesis.
1272 * no need to handle nested blocks since they would be invalid
1273 * in this context anyway (the VALUE may only be a scalar, like
1274 * 42
1275 * (42)
1276 * and alike) */
1277 while (token->type != TOKEN_EOF && depth > 0)
1279 switch (token->type)
1281 case TOKEN_OPEN_PAREN: depth++; break;
1282 case TOKEN_CLOSE_PAREN: depth--; break;
1283 default: break;
1285 readToken (token);
1288 return FALSE;
1291 /* parses declarations of the form
1292 * $var = VALUE
1293 * $var; */
1294 static boolean parseVariable (tokenInfo *const token)
1296 tokenInfo *name;
1297 boolean readNext = TRUE;
1298 accessType access = CurrentStatement.access;
1300 name = newToken ();
1301 copyToken (name, token, TRUE);
1303 readToken (token);
1304 if (token->type == TOKEN_EQUAL_SIGN)
1306 phpKind kind = K_VARIABLE;
1308 if (token->parentKind == K_FUNCTION)
1309 kind = K_LOCAL_VARIABLE;
1311 readToken (token);
1312 if (token->type == TOKEN_KEYWORD &&
1313 token->keyword == KEYWORD_function &&
1314 PhpKinds[kind].enabled)
1316 if (parseFunction (token, name))
1317 readToken (token);
1318 readNext = (boolean) (token->type == TOKEN_SEMICOLON);
1320 else
1322 makeSimplePhpTag (name, kind, access);
1323 readNext = FALSE;
1326 else if (token->type == TOKEN_SEMICOLON)
1328 /* generate tags for variable declarations in classes
1329 * class Foo {
1330 * protected $foo;
1332 * but don't get fooled by stuff like $foo = $bar; */
1333 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1334 makeSimplePhpTag (name, K_VARIABLE, access);
1336 else
1337 readNext = FALSE;
1339 deleteToken (name);
1341 return readNext;
1344 /* parses namespace declarations
1345 * namespace Foo {}
1346 * namespace Foo\Bar {}
1347 * namespace Foo;
1348 * namespace Foo\Bar;
1349 * namespace;
1350 * napespace {} */
1351 static boolean parseNamespace (tokenInfo *const token)
1353 tokenInfo *nsToken = newToken ();
1355 vStringClear (CurrentNamespace);
1356 copyToken (nsToken, token, FALSE);
1360 readToken (token);
1361 if (token->type == TOKEN_IDENTIFIER)
1363 if (vStringLength (CurrentNamespace) > 0)
1364 vStringPut (CurrentNamespace, '\\');
1365 vStringCat (CurrentNamespace, token->string);
1368 while (token->type != TOKEN_EOF &&
1369 token->type != TOKEN_SEMICOLON &&
1370 token->type != TOKEN_OPEN_CURLY);
1372 vStringTerminate (CurrentNamespace);
1373 if (vStringLength (CurrentNamespace) > 0)
1374 makeNamespacePhpTag (nsToken, CurrentNamespace);
1376 if (token->type == TOKEN_OPEN_CURLY)
1377 enterScope (token, NULL, -1);
1379 deleteToken (nsToken);
1381 return TRUE;
1384 static void enterScope (tokenInfo *const parentToken,
1385 const vString *const extraScope,
1386 const int parentKind)
1388 tokenInfo *token = newToken ();
1389 int origParentKind = parentToken->parentKind;
1391 copyToken (token, parentToken, TRUE);
1393 if (extraScope)
1395 addToScope (token, extraScope);
1396 token->parentKind = parentKind;
1399 readToken (token);
1400 while (token->type != TOKEN_EOF &&
1401 token->type != TOKEN_CLOSE_CURLY)
1403 boolean readNext = TRUE;
1405 switch (token->type)
1407 case TOKEN_OPEN_CURLY:
1408 enterScope (token, NULL, -1);
1409 break;
1411 case TOKEN_KEYWORD:
1412 switch (token->keyword)
1414 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1415 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1416 case KEYWORD_trait: readNext = parseTrait (token); break;
1417 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1418 case KEYWORD_const: readNext = parseConstant (token); break;
1419 case KEYWORD_define: readNext = parseDefine (token); break;
1421 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1423 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1424 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1425 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1426 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1428 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1430 default: break;
1432 break;
1434 case TOKEN_VARIABLE:
1435 readNext = parseVariable (token);
1436 break;
1438 default: break;
1441 if (readNext)
1442 readToken (token);
1445 copyToken (parentToken, token, FALSE);
1446 parentToken->parentKind = origParentKind;
1447 deleteToken (token);
1450 static void findTags (void)
1452 tokenInfo *const token = newToken ();
1454 CurrentStatement.access = ACCESS_UNDEFINED;
1455 CurrentStatement.impl = IMPL_UNDEFINED;
1456 CurrentNamespace = vStringNew ();
1460 enterScope (token, NULL, -1);
1462 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1464 vStringDelete (CurrentNamespace);
1465 deleteToken (token);
1468 static void findPhpTags (void)
1470 InPhp = FALSE;
1471 findTags ();
1474 static void findZephirTags (void)
1476 InPhp = TRUE;
1477 findTags ();
1480 static void initializePhpParser (const langType language)
1482 Lang_php = language;
1483 buildPhpKeywordHash (language);
1486 static void initializeZephirParser (const langType language)
1488 Lang_zephir = language;
1489 buildPhpKeywordHash (language);
1492 extern parserDefinition* PhpParser (void)
1494 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1495 parserDefinition* def = parserNew ("PHP");
1496 def->kinds = PhpKinds;
1497 def->kindCount = KIND_COUNT (PhpKinds);
1498 def->extensions = extensions;
1499 def->parser = findPhpTags;
1500 def->initialize = initializePhpParser;
1501 return def;
1504 extern parserDefinition* ZephirParser (void)
1506 static const char *const extensions [] = { "zep", NULL };
1507 parserDefinition* def = parserNew ("Zephir");
1508 def->kinds = PhpKinds;
1509 def->kindCount = KIND_COUNT (PhpKinds);
1510 def->extensions = extensions;
1511 def->parser = findZephirTags;
1512 def->initialize = initializeZephirParser;
1513 return def;
1516 /* vi:set tabstop=4 shiftwidth=4: */