Rename tagEntryInfo.extensionFields.scope
[geany-mirror.git] / ctags / parsers / php.c
blob1b67e6c75f00760d90bdb473bf936b50e6d6f902
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "main.h"
16 #include "parse.h"
17 #include "read.h"
18 #include "vstring.h"
19 #include "keyword.h"
20 #include "entry.h"
21 #include "routines.h"
22 #include "debug.h"
25 #define SCOPE_SEPARATOR "::"
28 typedef enum {
29 KEYWORD_NONE = -1,
30 KEYWORD_abstract,
31 KEYWORD_and,
32 KEYWORD_as,
33 KEYWORD_break,
34 KEYWORD_callable,
35 KEYWORD_case,
36 KEYWORD_catch,
37 KEYWORD_class,
38 KEYWORD_clone,
39 KEYWORD_const,
40 KEYWORD_continue,
41 KEYWORD_declare,
42 KEYWORD_define,
43 KEYWORD_default,
44 KEYWORD_do,
45 KEYWORD_echo,
46 KEYWORD_else,
47 KEYWORD_elif,
48 KEYWORD_enddeclare,
49 KEYWORD_endfor,
50 KEYWORD_endforeach,
51 KEYWORD_endif,
52 KEYWORD_endswitch,
53 KEYWORD_endwhile,
54 KEYWORD_extends,
55 KEYWORD_final,
56 KEYWORD_finally,
57 KEYWORD_for,
58 KEYWORD_foreach,
59 KEYWORD_function,
60 KEYWORD_global,
61 KEYWORD_goto,
62 KEYWORD_if,
63 KEYWORD_implements,
64 KEYWORD_include,
65 KEYWORD_include_once,
66 KEYWORD_instanceof,
67 KEYWORD_insteadof,
68 KEYWORD_interface,
69 KEYWORD_namespace,
70 KEYWORD_new,
71 KEYWORD_or,
72 KEYWORD_print,
73 KEYWORD_private,
74 KEYWORD_protected,
75 KEYWORD_public,
76 KEYWORD_require,
77 KEYWORD_require_once,
78 KEYWORD_return,
79 KEYWORD_static,
80 KEYWORD_switch,
81 KEYWORD_throw,
82 KEYWORD_trait,
83 KEYWORD_try,
84 KEYWORD_use,
85 KEYWORD_var,
86 KEYWORD_while,
87 KEYWORD_xor,
88 KEYWORD_yield
89 } keywordId;
91 typedef enum {
92 ACCESS_UNDEFINED,
93 ACCESS_PRIVATE,
94 ACCESS_PROTECTED,
95 ACCESS_PUBLIC,
96 COUNT_ACCESS
97 } accessType;
99 typedef enum {
100 IMPL_UNDEFINED,
101 IMPL_ABSTRACT,
102 COUNT_IMPL
103 } implType;
105 typedef enum {
106 K_CLASS,
107 K_DEFINE,
108 K_FUNCTION,
109 K_INTERFACE,
110 K_LOCAL_VARIABLE,
111 K_NAMESPACE,
112 K_TRAIT,
113 K_VARIABLE,
114 COUNT_KIND
115 } phpKind;
117 static kindOption PhpKinds[COUNT_KIND] = {
118 { TRUE, 'c', "class", "classes" },
119 { TRUE, 'd', "define", "constant definitions" },
120 { TRUE, 'f', "function", "functions" },
121 { TRUE, 'i', "interface", "interfaces" },
122 { FALSE, 'l', "local", "local variables" },
123 { TRUE, 'n', "namespace", "namespaces" },
124 { TRUE, 't', "trait", "traits" },
125 { TRUE, 'v', "variable", "variables" }
128 static const keywordTable PhpKeywordTable[] = {
129 /* keyword keyword ID */
130 { "abstract", KEYWORD_abstract },
131 { "and", KEYWORD_and },
132 { "as", KEYWORD_as },
133 { "break", KEYWORD_break },
134 { "callable", KEYWORD_callable },
135 { "case", KEYWORD_case },
136 { "catch", KEYWORD_catch },
137 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
138 { "class", KEYWORD_class },
139 { "clone", KEYWORD_clone },
140 { "const", KEYWORD_const },
141 { "continue", KEYWORD_continue },
142 { "declare", KEYWORD_declare },
143 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
144 { "default", KEYWORD_default },
145 { "do", KEYWORD_do },
146 { "echo", KEYWORD_echo },
147 { "else", KEYWORD_else },
148 { "elseif", KEYWORD_elif },
149 { "enddeclare", KEYWORD_enddeclare },
150 { "endfor", KEYWORD_endfor },
151 { "endforeach", KEYWORD_endforeach },
152 { "endif", KEYWORD_endif },
153 { "endswitch", KEYWORD_endswitch },
154 { "endwhile", KEYWORD_endwhile },
155 { "extends", KEYWORD_extends },
156 { "final", KEYWORD_final },
157 { "finally", KEYWORD_finally },
158 { "for", KEYWORD_for },
159 { "foreach", KEYWORD_foreach },
160 { "function", KEYWORD_function },
161 { "global", KEYWORD_global },
162 { "goto", KEYWORD_goto },
163 { "if", KEYWORD_if },
164 { "implements", KEYWORD_implements },
165 { "include", KEYWORD_include },
166 { "include_once", KEYWORD_include_once },
167 { "instanceof", KEYWORD_instanceof },
168 { "insteadof", KEYWORD_insteadof },
169 { "interface", KEYWORD_interface },
170 { "namespace", KEYWORD_namespace },
171 { "new", KEYWORD_new },
172 { "or", KEYWORD_or },
173 { "print", KEYWORD_print },
174 { "private", KEYWORD_private },
175 { "protected", KEYWORD_protected },
176 { "public", KEYWORD_public },
177 { "require", KEYWORD_require },
178 { "require_once", KEYWORD_require_once },
179 { "return", KEYWORD_return },
180 { "static", KEYWORD_static },
181 { "switch", KEYWORD_switch },
182 { "throw", KEYWORD_throw },
183 { "trait", KEYWORD_trait },
184 { "try", KEYWORD_try },
185 { "use", KEYWORD_use },
186 { "var", KEYWORD_var },
187 { "while", KEYWORD_while },
188 { "xor", KEYWORD_xor },
189 { "yield", KEYWORD_yield }
193 typedef enum eTokenType {
194 TOKEN_UNDEFINED,
195 TOKEN_EOF,
196 TOKEN_CHARACTER,
197 TOKEN_CLOSE_PAREN,
198 TOKEN_SEMICOLON,
199 TOKEN_COLON,
200 TOKEN_COMMA,
201 TOKEN_KEYWORD,
202 TOKEN_OPEN_PAREN,
203 TOKEN_OPERATOR,
204 TOKEN_IDENTIFIER,
205 TOKEN_STRING,
206 TOKEN_PERIOD,
207 TOKEN_OPEN_CURLY,
208 TOKEN_CLOSE_CURLY,
209 TOKEN_EQUAL_SIGN,
210 TOKEN_OPEN_SQUARE,
211 TOKEN_CLOSE_SQUARE,
212 TOKEN_VARIABLE,
213 TOKEN_AMPERSAND
214 } tokenType;
216 typedef struct {
217 tokenType type;
218 keywordId keyword;
219 vString * string;
220 vString * scope;
221 unsigned long lineNumber;
222 MIOPos filePosition;
223 int parentKind; /* -1 if none */
224 } tokenInfo;
226 static langType Lang_php;
227 static langType Lang_zephir;
229 static boolean InPhp = FALSE; /* whether we are between <? ?> */
231 /* current statement details */
232 static struct {
233 accessType access;
234 implType impl;
235 } CurrentStatement;
237 /* Current namespace */
238 static vString *CurrentNamespace;
241 static const char *accessToString (const accessType access)
243 static const char *const names[COUNT_ACCESS] = {
244 "undefined",
245 "private",
246 "protected",
247 "public"
250 Assert (access < COUNT_ACCESS);
252 return names[access];
255 static const char *implToString (const implType impl)
257 static const char *const names[COUNT_IMPL] = {
258 "undefined",
259 "abstract"
262 Assert (impl < COUNT_IMPL);
264 return names[impl];
267 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
268 const phpKind kind, const accessType access)
270 static vString *fullScope = NULL;
271 int parentKind = -1;
273 if (fullScope == NULL)
274 fullScope = vStringNew ();
275 else
276 vStringClear (fullScope);
278 if (vStringLength (CurrentNamespace) > 0)
280 vStringCopy (fullScope, CurrentNamespace);
281 parentKind = K_NAMESPACE;
284 initTagEntry (e, vStringValue (token->string));
286 e->lineNumber = token->lineNumber;
287 e->filePosition = token->filePosition;
288 e->kindName = PhpKinds[kind].name;
289 e->kind = (char) PhpKinds[kind].letter;
291 if (access != ACCESS_UNDEFINED)
292 e->extensionFields.access = accessToString (access);
293 if (vStringLength (token->scope) > 0)
295 parentKind = token->parentKind;
296 if (vStringLength (fullScope) > 0)
297 vStringCatS (fullScope, SCOPE_SEPARATOR);
298 vStringCat (fullScope, token->scope);
300 if (vStringLength (fullScope) > 0)
302 Assert (parentKind >= 0);
304 vStringTerminate (fullScope);
305 e->extensionFields.scopeKind = &(PhpKinds[parentKind]);
306 e->extensionFields.scopeName = vStringValue (fullScope);
310 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
311 const accessType access)
313 if (PhpKinds[kind].enabled)
315 tagEntryInfo e;
317 initPhpEntry (&e, token, kind, access);
318 makeTagEntry (&e);
322 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
324 if (PhpKinds[K_NAMESPACE].enabled)
326 tagEntryInfo e;
328 initTagEntry (&e, vStringValue (name));
330 e.lineNumber = token->lineNumber;
331 e.filePosition = token->filePosition;
332 e.kindName = PhpKinds[K_NAMESPACE].name;
333 e.kind = (char) PhpKinds[K_NAMESPACE].letter;
335 makeTagEntry (&e);
339 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
340 vString *const inheritance, const implType impl)
342 if (PhpKinds[kind].enabled)
344 tagEntryInfo e;
346 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
348 if (impl != IMPL_UNDEFINED)
349 e.extensionFields.implementation = implToString (impl);
350 if (vStringLength (inheritance) > 0)
351 e.extensionFields.inheritance = vStringValue (inheritance);
353 makeTagEntry (&e);
357 static void makeFunctionTag (const tokenInfo *const token,
358 const vString *const arglist,
359 const accessType access, const implType impl)
361 if (PhpKinds[K_FUNCTION].enabled)
363 tagEntryInfo e;
365 initPhpEntry (&e, token, K_FUNCTION, access);
367 if (impl != IMPL_UNDEFINED)
368 e.extensionFields.implementation = implToString (impl);
369 if (arglist)
370 e.extensionFields.signature = vStringValue (arglist);
372 makeTagEntry (&e);
376 static tokenInfo *newToken (void)
378 tokenInfo *const token = xMalloc (1, tokenInfo);
380 token->type = TOKEN_UNDEFINED;
381 token->keyword = KEYWORD_NONE;
382 token->string = vStringNew ();
383 token->scope = vStringNew ();
384 token->lineNumber = getSourceLineNumber ();
385 token->filePosition = getInputFilePosition ();
386 token->parentKind = -1;
388 return token;
391 static void deleteToken (tokenInfo *const token)
393 vStringDelete (token->string);
394 vStringDelete (token->scope);
395 eFree (token);
398 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
399 boolean scope)
401 dest->lineNumber = src->lineNumber;
402 dest->filePosition = src->filePosition;
403 dest->type = src->type;
404 dest->keyword = src->keyword;
405 vStringCopy(dest->string, src->string);
406 dest->parentKind = src->parentKind;
407 if (scope)
408 vStringCopy(dest->scope, src->scope);
411 #if 0
412 #include <stdio.h>
414 static const char *tokenTypeName (const tokenType type)
416 switch (type)
418 case TOKEN_UNDEFINED: return "undefined";
419 case TOKEN_EOF: return "EOF";
420 case TOKEN_CHARACTER: return "character";
421 case TOKEN_CLOSE_PAREN: return "')'";
422 case TOKEN_SEMICOLON: return "';'";
423 case TOKEN_COLON: return "':'";
424 case TOKEN_COMMA: return "','";
425 case TOKEN_OPEN_PAREN: return "'('";
426 case TOKEN_OPERATOR: return "operator";
427 case TOKEN_IDENTIFIER: return "identifier";
428 case TOKEN_KEYWORD: return "keyword";
429 case TOKEN_STRING: return "string";
430 case TOKEN_PERIOD: return "'.'";
431 case TOKEN_OPEN_CURLY: return "'{'";
432 case TOKEN_CLOSE_CURLY: return "'}'";
433 case TOKEN_EQUAL_SIGN: return "'='";
434 case TOKEN_OPEN_SQUARE: return "'['";
435 case TOKEN_CLOSE_SQUARE: return "']'";
436 case TOKEN_VARIABLE: return "variable";
438 return NULL;
441 static void printToken (const tokenInfo *const token)
443 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
444 tokenTypeName (token->type),
445 token->lineNumber,
446 vStringValue (token->scope));
447 switch (token->type)
449 case TOKEN_IDENTIFIER:
450 case TOKEN_STRING:
451 case TOKEN_VARIABLE:
452 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
453 break;
455 case TOKEN_KEYWORD:
457 size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
458 size_t i;
460 fprintf (stderr, "\tkeyword:\t");
461 for (i = 0; i < n; i++)
463 if (PhpKeywordTable[i].id == token->keyword)
465 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
466 break;
469 if (i >= n)
470 fprintf (stderr, "(unknown)\n");
473 default: break;
476 #endif
478 static void addToScope (tokenInfo *const token, const vString *const extra)
480 if (vStringLength (token->scope) > 0)
481 vStringCatS (token->scope, SCOPE_SEPARATOR);
482 vStringCatS (token->scope, vStringValue (extra));
483 vStringTerminate(token->scope);
486 static boolean isIdentChar (const int c)
488 return (isalnum (c) || c == '_' || c >= 0x80);
491 static int skipToCharacter (const int c)
493 int d;
496 d = getcFromInputFile ();
497 } while (d != EOF && d != c);
498 return d;
501 static void parseString (vString *const string, const int delimiter)
503 while (TRUE)
505 int c = getcFromInputFile ();
507 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
508 vStringPut (string, (char) c);
509 else if (c == EOF || c == delimiter)
510 break;
511 else
512 vStringPut (string, (char) c);
514 vStringTerminate (string);
517 /* reads an HereDoc or a NowDoc (the part after the <<<).
518 * <<<[ \t]*(ID|'ID'|"ID")
519 * ...
520 * ID;?
522 * note that:
523 * 1) starting ID must be immediately followed by a newline;
524 * 2) closing ID is the same as opening one;
525 * 3) closing ID must be immediately followed by a newline or a semicolon
526 * then a newline.
528 * Example of a *single* valid heredoc:
529 * <<< FOO
530 * something
531 * something else
532 * FOO this is not an end
533 * FOO; this isn't either
534 * FOO; # neither this is
535 * FOO;
536 * # previous line was the end, but the semicolon wasn't required
538 static void parseHeredoc (vString *const string)
540 int c;
541 unsigned int len;
542 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
543 int quote = 0;
547 c = getcFromInputFile ();
549 while (c == ' ' || c == '\t');
551 if (c == '\'' || c == '"')
553 quote = c;
554 c = getcFromInputFile ();
556 for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
558 if (! isIdentChar (c))
559 break;
560 delimiter[len] = (char) c;
561 c = getcFromInputFile ();
563 delimiter[len] = 0;
565 if (len == 0) /* no delimiter, give up */
566 goto error;
567 if (quote)
569 if (c != quote) /* no closing quote for quoted identifier, give up */
570 goto error;
571 c = getcFromInputFile ();
573 if (c != '\r' && c != '\n') /* missing newline, give up */
574 goto error;
578 c = getcFromInputFile ();
580 if (c != '\r' && c != '\n')
581 vStringPut (string, (char) c);
582 else
584 /* new line, check for a delimiter right after */
585 int nl = c;
586 int extra = EOF;
588 c = getcFromInputFile ();
589 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
590 c = getcFromInputFile ();
592 if (delimiter[len] != 0)
593 ungetcToInputFile (c);
594 else
596 /* line start matched the delimiter, now check whether there
597 * is anything after it */
598 if (c == '\r' || c == '\n')
600 ungetcToInputFile (c);
601 break;
603 else if (c == ';')
605 int d = getcFromInputFile ();
606 if (d == '\r' || d == '\n')
608 /* put back the semicolon since it's not part of the
609 * string. we can't put back the newline, but it's a
610 * whitespace character nobody cares about it anyway */
611 ungetcToInputFile (';');
612 break;
614 else
616 /* put semicolon in the string and continue */
617 extra = ';';
618 ungetcToInputFile (d);
622 /* if we are here it wasn't a delimiter, so put everything in the
623 * string */
624 vStringPut (string, (char) nl);
625 vStringNCatS (string, delimiter, len);
626 if (extra != EOF)
627 vStringPut (string, (char) extra);
630 while (c != EOF);
632 vStringTerminate (string);
634 return;
636 error:
637 ungetcToInputFile (c);
640 static void parseIdentifier (vString *const string, const int firstChar)
642 int c = firstChar;
645 vStringPut (string, (char) c);
646 c = getcFromInputFile ();
647 } while (isIdentChar (c));
648 ungetcToInputFile (c);
649 vStringTerminate (string);
652 static keywordId analyzeToken (vString *const name, langType language)
654 vString *keyword = vStringNew ();
655 keywordId result;
656 vStringCopyToLower (keyword, name);
657 result = lookupKeyword (vStringValue (keyword), language);
658 vStringDelete (keyword);
659 return result;
662 static boolean isSpace (int c)
664 return (c == '\t' || c == ' ' || c == '\v' ||
665 c == '\n' || c == '\r' || c == '\f');
668 static int skipWhitespaces (int c)
670 while (isSpace (c))
671 c = getcFromInputFile ();
672 return c;
675 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
677 * This is ugly, but the whole "<script language=php>" tag is and we can't
678 * really do better without adding a lot of code only for this */
679 static boolean isOpenScriptLanguagePhp (int c)
681 int quote = 0;
683 /* <script[:white:]+language[:white:]*= */
684 if (c != '<' ||
685 tolower ((c = getcFromInputFile ())) != 's' ||
686 tolower ((c = getcFromInputFile ())) != 'c' ||
687 tolower ((c = getcFromInputFile ())) != 'r' ||
688 tolower ((c = getcFromInputFile ())) != 'i' ||
689 tolower ((c = getcFromInputFile ())) != 'p' ||
690 tolower ((c = getcFromInputFile ())) != 't' ||
691 ! isSpace ((c = getcFromInputFile ())) ||
692 tolower ((c = skipWhitespaces (c))) != 'l' ||
693 tolower ((c = getcFromInputFile ())) != 'a' ||
694 tolower ((c = getcFromInputFile ())) != 'n' ||
695 tolower ((c = getcFromInputFile ())) != 'g' ||
696 tolower ((c = getcFromInputFile ())) != 'u' ||
697 tolower ((c = getcFromInputFile ())) != 'a' ||
698 tolower ((c = getcFromInputFile ())) != 'g' ||
699 tolower ((c = getcFromInputFile ())) != 'e' ||
700 (c = skipWhitespaces (getcFromInputFile ())) != '=')
701 return FALSE;
703 /* (php|'php'|"php")> */
704 c = skipWhitespaces (getcFromInputFile ());
705 if (c == '"' || c == '\'')
707 quote = c;
708 c = getcFromInputFile ();
710 if (tolower (c) != 'p' ||
711 tolower ((c = getcFromInputFile ())) != 'h' ||
712 tolower ((c = getcFromInputFile ())) != 'p' ||
713 (quote != 0 && (c = getcFromInputFile ()) != quote) ||
714 (c = skipWhitespaces (getcFromInputFile ())) != '>')
715 return FALSE;
717 return TRUE;
720 static int findPhpStart (void)
722 int c;
725 if ((c = getcFromInputFile ()) == '<')
727 c = getcFromInputFile ();
728 /* <? and <?php, but not <?xml */
729 if (c == '?')
731 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
732 if (tolower ((c = getcFromInputFile ())) != 'x' ||
733 tolower ((c = getcFromInputFile ())) != 'm' ||
734 tolower ((c = getcFromInputFile ())) != 'l')
736 break;
739 /* <script language="php"> */
740 else
742 ungetcToInputFile (c);
743 if (isOpenScriptLanguagePhp ('<'))
744 break;
748 while (c != EOF);
750 return c;
753 static int skipSingleComment (void)
755 int c;
758 c = getcFromInputFile ();
759 if (c == '\r')
761 int next = getcFromInputFile ();
762 if (next != '\n')
763 ungetcToInputFile (next);
764 else
765 c = next;
767 /* ?> in single-line comments leaves PHP mode */
768 else if (c == '?')
770 int next = getcFromInputFile ();
771 if (next == '>')
772 InPhp = FALSE;
773 else
774 ungetcToInputFile (next);
776 } while (InPhp && c != EOF && c != '\n' && c != '\r');
777 return c;
780 static void readToken (tokenInfo *const token)
782 int c;
784 token->type = TOKEN_UNDEFINED;
785 token->keyword = KEYWORD_NONE;
786 vStringClear (token->string);
788 getNextChar:
790 if (! InPhp)
792 c = findPhpStart ();
793 if (c != EOF)
794 InPhp = TRUE;
796 else
797 c = getcFromInputFile ();
799 c = skipWhitespaces (c);
801 token->lineNumber = getSourceLineNumber ();
802 token->filePosition = getInputFilePosition ();
804 switch (c)
806 case EOF: token->type = TOKEN_EOF; break;
807 case '(': token->type = TOKEN_OPEN_PAREN; break;
808 case ')': token->type = TOKEN_CLOSE_PAREN; break;
809 case ';': token->type = TOKEN_SEMICOLON; break;
810 case ',': token->type = TOKEN_COMMA; break;
811 case '.': token->type = TOKEN_PERIOD; break;
812 case ':': token->type = TOKEN_COLON; break;
813 case '{': token->type = TOKEN_OPEN_CURLY; break;
814 case '}': token->type = TOKEN_CLOSE_CURLY; break;
815 case '[': token->type = TOKEN_OPEN_SQUARE; break;
816 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
817 case '&': token->type = TOKEN_AMPERSAND; break;
819 case '=':
821 int d = getcFromInputFile ();
822 if (d == '=' || d == '>')
823 token->type = TOKEN_OPERATOR;
824 else
826 ungetcToInputFile (d);
827 token->type = TOKEN_EQUAL_SIGN;
829 break;
832 case '\'':
833 case '"':
834 token->type = TOKEN_STRING;
835 parseString (token->string, c);
836 token->lineNumber = getSourceLineNumber ();
837 token->filePosition = getInputFilePosition ();
838 break;
840 case '<':
842 int d = getcFromInputFile ();
843 if (d == '/')
845 /* </script[:white:]*> */
846 if (tolower ((d = getcFromInputFile ())) == 's' &&
847 tolower ((d = getcFromInputFile ())) == 'c' &&
848 tolower ((d = getcFromInputFile ())) == 'r' &&
849 tolower ((d = getcFromInputFile ())) == 'i' &&
850 tolower ((d = getcFromInputFile ())) == 'p' &&
851 tolower ((d = getcFromInputFile ())) == 't' &&
852 (d = skipWhitespaces (getcFromInputFile ())) == '>')
854 InPhp = FALSE;
855 goto getNextChar;
857 else
859 ungetcToInputFile (d);
860 token->type = TOKEN_UNDEFINED;
863 else if (d == '<' && (d = getcFromInputFile ()) == '<')
865 token->type = TOKEN_STRING;
866 parseHeredoc (token->string);
868 else
870 ungetcToInputFile (d);
871 token->type = TOKEN_UNDEFINED;
873 break;
876 case '#': /* comment */
877 skipSingleComment ();
878 goto getNextChar;
879 break;
881 case '+':
882 case '-':
883 case '*':
884 case '%':
886 int d = getcFromInputFile ();
887 if (d != '=')
888 ungetcToInputFile (d);
889 token->type = TOKEN_OPERATOR;
890 break;
893 case '/': /* division or comment start */
895 int d = getcFromInputFile ();
896 if (d == '/') /* single-line comment */
898 skipSingleComment ();
899 goto getNextChar;
901 else if (d == '*')
905 c = skipToCharacter ('*');
906 if (c != EOF)
908 c = getcFromInputFile ();
909 if (c == '/')
910 break;
911 else
912 ungetcToInputFile (c);
914 } while (c != EOF && c != '\0');
915 goto getNextChar;
917 else
919 if (d != '=')
920 ungetcToInputFile (d);
921 token->type = TOKEN_OPERATOR;
923 break;
926 case '$': /* variable start */
928 int d = getcFromInputFile ();
929 if (! isIdentChar (d))
931 ungetcToInputFile (d);
932 token->type = TOKEN_UNDEFINED;
934 else
936 parseIdentifier (token->string, d);
937 token->type = TOKEN_VARIABLE;
939 break;
942 case '?': /* maybe the end of the PHP chunk */
944 int d = getcFromInputFile ();
945 if (d == '>')
947 InPhp = FALSE;
948 goto getNextChar;
950 else
952 ungetcToInputFile (d);
953 token->type = TOKEN_UNDEFINED;
955 break;
958 default:
959 if (! isIdentChar (c))
960 token->type = TOKEN_UNDEFINED;
961 else
963 parseIdentifier (token->string, c);
964 token->keyword = analyzeToken (token->string, getSourceLanguage ());
965 if (token->keyword == KEYWORD_NONE)
966 token->type = TOKEN_IDENTIFIER;
967 else
968 token->type = TOKEN_KEYWORD;
970 break;
973 if (token->type == TOKEN_SEMICOLON ||
974 token->type == TOKEN_OPEN_CURLY ||
975 token->type == TOKEN_CLOSE_CURLY)
977 /* reset current statement details on statement end, and when entering
978 * a deeper scope.
979 * it is a bit ugly to do this in readToken(), but it makes everything
980 * a lot simpler. */
981 CurrentStatement.access = ACCESS_UNDEFINED;
982 CurrentStatement.impl = IMPL_UNDEFINED;
986 static void enterScope (tokenInfo *const parentToken,
987 const vString *const extraScope,
988 const int parentKind);
990 /* parses a class or an interface:
991 * class Foo {}
992 * class Foo extends Bar {}
993 * class Foo extends Bar implements iFoo, iBar {}
994 * interface iFoo {}
995 * interface iBar extends iFoo {} */
996 static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
998 boolean readNext = TRUE;
999 implType impl = CurrentStatement.impl;
1000 tokenInfo *name;
1001 vString *inheritance = NULL;
1003 readToken (token);
1004 if (token->type != TOKEN_IDENTIFIER)
1005 return FALSE;
1007 name = newToken ();
1008 copyToken (name, token, TRUE);
1010 inheritance = vStringNew ();
1011 /* skip until the open bracket and assume every identifier (not keyword)
1012 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
1015 readToken (token);
1017 if (token->type == TOKEN_IDENTIFIER)
1019 if (vStringLength (inheritance) > 0)
1020 vStringPut (inheritance, ',');
1021 vStringCat (inheritance, token->string);
1024 while (token->type != TOKEN_EOF &&
1025 token->type != TOKEN_OPEN_CURLY);
1027 makeClassOrIfaceTag (kind, name, inheritance, impl);
1029 if (token->type == TOKEN_OPEN_CURLY)
1030 enterScope (token, name->string, K_CLASS);
1031 else
1032 readNext = FALSE;
1034 deleteToken (name);
1035 vStringDelete (inheritance);
1037 return readNext;
1040 /* parses a trait:
1041 * trait Foo {} */
1042 static boolean parseTrait (tokenInfo *const token)
1044 boolean readNext = TRUE;
1045 tokenInfo *name;
1047 readToken (token);
1048 if (token->type != TOKEN_IDENTIFIER)
1049 return FALSE;
1051 name = newToken ();
1052 copyToken (name, token, TRUE);
1054 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1056 readToken (token);
1057 if (token->type == TOKEN_OPEN_CURLY)
1058 enterScope (token, name->string, K_TRAIT);
1059 else
1060 readNext = FALSE;
1062 deleteToken (name);
1064 return readNext;
1067 /* parse a function
1069 * if @name is NULL, parses a normal function
1070 * function myfunc($foo, $bar) {}
1071 * function &myfunc($foo, $bar) {}
1073 * if @name is not NULL, parses an anonymous function with name @name
1074 * $foo = function($foo, $bar) {}
1075 * $foo = function&($foo, $bar) {}
1076 * $foo = function($foo, $bar) use ($x, &$y) {} */
1077 static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
1079 boolean readNext = TRUE;
1080 accessType access = CurrentStatement.access;
1081 implType impl = CurrentStatement.impl;
1082 tokenInfo *nameFree = NULL;
1084 readToken (token);
1085 /* skip a possible leading ampersand (return by reference) */
1086 if (token->type == TOKEN_AMPERSAND)
1087 readToken (token);
1089 if (! name)
1091 if (token->type != TOKEN_IDENTIFIER)
1092 return FALSE;
1094 name = nameFree = newToken ();
1095 copyToken (nameFree, token, TRUE);
1096 readToken (token);
1099 if (token->type == TOKEN_OPEN_PAREN)
1101 vString *arglist = vStringNew ();
1102 int depth = 1;
1104 vStringPut (arglist, '(');
1107 readToken (token);
1109 switch (token->type)
1111 case TOKEN_OPEN_PAREN: depth++; break;
1112 case TOKEN_CLOSE_PAREN: depth--; break;
1113 default: break;
1115 /* display part */
1116 switch (token->type)
1118 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1119 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1120 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1121 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1122 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1123 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1124 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1125 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1126 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1127 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1128 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1129 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1130 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1132 case TOKEN_IDENTIFIER:
1133 case TOKEN_KEYWORD:
1134 case TOKEN_VARIABLE:
1136 switch (vStringLast (arglist))
1138 case 0:
1139 case ' ':
1140 case '{':
1141 case '(':
1142 case '[':
1143 case '.':
1144 /* no need for a space between those and the identifier */
1145 break;
1147 default:
1148 vStringPut (arglist, ' ');
1149 break;
1151 if (token->type == TOKEN_VARIABLE)
1152 vStringPut (arglist, '$');
1153 vStringCat (arglist, token->string);
1154 break;
1157 default: break;
1160 while (token->type != TOKEN_EOF && depth > 0);
1162 vStringTerminate (arglist);
1164 makeFunctionTag (name, arglist, access, impl);
1165 vStringDelete (arglist);
1167 readToken (token); /* normally it's an open brace or "use" keyword */
1170 /* if parsing Zephir, skip function return type hint */
1171 if (getSourceLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)
1174 readToken (token);
1175 while (token->type != TOKEN_EOF &&
1176 token->type != TOKEN_OPEN_CURLY &&
1177 token->type != TOKEN_CLOSE_CURLY &&
1178 token->type != TOKEN_SEMICOLON);
1181 /* skip use(...) */
1182 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1184 readToken (token);
1185 if (token->type == TOKEN_OPEN_PAREN)
1187 int depth = 1;
1191 readToken (token);
1192 switch (token->type)
1194 case TOKEN_OPEN_PAREN: depth++; break;
1195 case TOKEN_CLOSE_PAREN: depth--; break;
1196 default: break;
1199 while (token->type != TOKEN_EOF && depth > 0);
1201 readToken (token);
1205 if (token->type == TOKEN_OPEN_CURLY)
1206 enterScope (token, name->string, K_FUNCTION);
1207 else
1208 readNext = FALSE;
1210 if (nameFree)
1211 deleteToken (nameFree);
1213 return readNext;
1216 /* parses declarations of the form
1217 * const NAME = VALUE */
1218 static boolean parseConstant (tokenInfo *const token)
1220 tokenInfo *name;
1222 readToken (token); /* skip const keyword */
1223 if (token->type != TOKEN_IDENTIFIER)
1224 return FALSE;
1226 name = newToken ();
1227 copyToken (name, token, TRUE);
1229 readToken (token);
1230 if (token->type == TOKEN_EQUAL_SIGN)
1231 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1233 deleteToken (name);
1235 return token->type == TOKEN_EQUAL_SIGN;
1238 /* parses declarations of the form
1239 * define('NAME', 'VALUE')
1240 * define(NAME, 'VALUE) */
1241 static boolean parseDefine (tokenInfo *const token)
1243 int depth = 1;
1245 readToken (token); /* skip "define" identifier */
1246 if (token->type != TOKEN_OPEN_PAREN)
1247 return FALSE;
1249 readToken (token);
1250 if (token->type == TOKEN_STRING ||
1251 token->type == TOKEN_IDENTIFIER)
1253 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1254 readToken (token);
1257 /* skip until the close parenthesis.
1258 * no need to handle nested blocks since they would be invalid
1259 * in this context anyway (the VALUE may only be a scalar, like
1260 * 42
1261 * (42)
1262 * and alike) */
1263 while (token->type != TOKEN_EOF && depth > 0)
1265 switch (token->type)
1267 case TOKEN_OPEN_PAREN: depth++; break;
1268 case TOKEN_CLOSE_PAREN: depth--; break;
1269 default: break;
1271 readToken (token);
1274 return FALSE;
1277 /* parses declarations of the form
1278 * $var = VALUE
1279 * $var; */
1280 static boolean parseVariable (tokenInfo *const token)
1282 tokenInfo *name;
1283 boolean readNext = TRUE;
1284 accessType access = CurrentStatement.access;
1286 name = newToken ();
1287 copyToken (name, token, TRUE);
1289 readToken (token);
1290 if (token->type == TOKEN_EQUAL_SIGN)
1292 phpKind kind = K_VARIABLE;
1294 if (token->parentKind == K_FUNCTION)
1295 kind = K_LOCAL_VARIABLE;
1297 readToken (token);
1298 if (token->type == TOKEN_KEYWORD &&
1299 token->keyword == KEYWORD_function &&
1300 PhpKinds[kind].enabled)
1302 if (parseFunction (token, name))
1303 readToken (token);
1304 readNext = (boolean) (token->type == TOKEN_SEMICOLON);
1306 else
1308 makeSimplePhpTag (name, kind, access);
1309 readNext = FALSE;
1312 else if (token->type == TOKEN_SEMICOLON)
1314 /* generate tags for variable declarations in classes
1315 * class Foo {
1316 * protected $foo;
1318 * but don't get fooled by stuff like $foo = $bar; */
1319 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1320 makeSimplePhpTag (name, K_VARIABLE, access);
1322 else
1323 readNext = FALSE;
1325 deleteToken (name);
1327 return readNext;
1330 /* parses namespace declarations
1331 * namespace Foo {}
1332 * namespace Foo\Bar {}
1333 * namespace Foo;
1334 * namespace Foo\Bar;
1335 * namespace;
1336 * napespace {} */
1337 static boolean parseNamespace (tokenInfo *const token)
1339 tokenInfo *nsToken = newToken ();
1341 vStringClear (CurrentNamespace);
1342 copyToken (nsToken, token, FALSE);
1346 readToken (token);
1347 if (token->type == TOKEN_IDENTIFIER)
1349 if (vStringLength (CurrentNamespace) > 0)
1350 vStringPut (CurrentNamespace, '\\');
1351 vStringCat (CurrentNamespace, token->string);
1354 while (token->type != TOKEN_EOF &&
1355 token->type != TOKEN_SEMICOLON &&
1356 token->type != TOKEN_OPEN_CURLY);
1358 vStringTerminate (CurrentNamespace);
1359 if (vStringLength (CurrentNamespace) > 0)
1360 makeNamespacePhpTag (nsToken, CurrentNamespace);
1362 if (token->type == TOKEN_OPEN_CURLY)
1363 enterScope (token, NULL, -1);
1365 deleteToken (nsToken);
1367 return TRUE;
1370 static void enterScope (tokenInfo *const parentToken,
1371 const vString *const extraScope,
1372 const int parentKind)
1374 tokenInfo *token = newToken ();
1375 int origParentKind = parentToken->parentKind;
1377 copyToken (token, parentToken, TRUE);
1379 if (extraScope)
1381 addToScope (token, extraScope);
1382 token->parentKind = parentKind;
1385 readToken (token);
1386 while (token->type != TOKEN_EOF &&
1387 token->type != TOKEN_CLOSE_CURLY)
1389 boolean readNext = TRUE;
1391 switch (token->type)
1393 case TOKEN_OPEN_CURLY:
1394 enterScope (token, NULL, -1);
1395 break;
1397 case TOKEN_KEYWORD:
1398 switch (token->keyword)
1400 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1401 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1402 case KEYWORD_trait: readNext = parseTrait (token); break;
1403 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1404 case KEYWORD_const: readNext = parseConstant (token); break;
1405 case KEYWORD_define: readNext = parseDefine (token); break;
1407 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1409 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1410 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1411 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1412 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1414 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1416 default: break;
1418 break;
1420 case TOKEN_VARIABLE:
1421 readNext = parseVariable (token);
1422 break;
1424 default: break;
1427 if (readNext)
1428 readToken (token);
1431 copyToken (parentToken, token, FALSE);
1432 parentToken->parentKind = origParentKind;
1433 deleteToken (token);
1436 static void findTags (void)
1438 tokenInfo *const token = newToken ();
1440 CurrentStatement.access = ACCESS_UNDEFINED;
1441 CurrentStatement.impl = IMPL_UNDEFINED;
1442 CurrentNamespace = vStringNew ();
1446 enterScope (token, NULL, -1);
1448 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1450 vStringDelete (CurrentNamespace);
1451 deleteToken (token);
1454 static void findPhpTags (void)
1456 InPhp = FALSE;
1457 findTags ();
1460 static void findZephirTags (void)
1462 InPhp = TRUE;
1463 findTags ();
1466 static void initializePhpParser (const langType language)
1468 Lang_php = language;
1471 static void initializeZephirParser (const langType language)
1473 Lang_zephir = language;
1476 extern parserDefinition* PhpParser (void)
1478 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1479 parserDefinition* def = parserNew ("PHP");
1480 def->kinds = PhpKinds;
1481 def->kindCount = ARRAY_SIZE (PhpKinds);
1482 def->extensions = extensions;
1483 def->parser = findPhpTags;
1484 def->initialize = initializePhpParser;
1485 def->keywordTable = PhpKeywordTable;
1486 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1487 return def;
1490 extern parserDefinition* ZephirParser (void)
1492 static const char *const extensions [] = { "zep", NULL };
1493 parserDefinition* def = parserNew ("Zephir");
1494 def->kinds = PhpKinds;
1495 def->kindCount = ARRAY_SIZE (PhpKinds);
1496 def->extensions = extensions;
1497 def->parser = findZephirTags;
1498 def->initialize = initializeZephirParser;
1499 def->keywordTable = PhpKeywordTable;
1500 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1501 return def;
1504 /* vi:set tabstop=4 shiftwidth=4: */