FreeBasic: Update keywords
[geany-mirror.git] / tagmanager / ctags / php.c
blobb70356347af210a910f31937cc214412cf4d635f
1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
15 #include "main.h"
16 #include "parse.h"
17 #include "read.h"
18 #include "vstring.h"
19 #include "keyword.h"
20 #include "entry.h"
23 #define SCOPE_SEPARATOR "::"
26 typedef enum {
27 KEYWORD_NONE = -1,
28 KEYWORD_abstract,
29 KEYWORD_and,
30 KEYWORD_as,
31 KEYWORD_break,
32 KEYWORD_callable,
33 KEYWORD_case,
34 KEYWORD_catch,
35 KEYWORD_class,
36 KEYWORD_clone,
37 KEYWORD_const,
38 KEYWORD_continue,
39 KEYWORD_declare,
40 KEYWORD_define,
41 KEYWORD_default,
42 KEYWORD_do,
43 KEYWORD_echo,
44 KEYWORD_else,
45 KEYWORD_elif,
46 KEYWORD_enddeclare,
47 KEYWORD_endfor,
48 KEYWORD_endforeach,
49 KEYWORD_endif,
50 KEYWORD_endswitch,
51 KEYWORD_endwhile,
52 KEYWORD_extends,
53 KEYWORD_final,
54 KEYWORD_finally,
55 KEYWORD_for,
56 KEYWORD_foreach,
57 KEYWORD_function,
58 KEYWORD_global,
59 KEYWORD_goto,
60 KEYWORD_if,
61 KEYWORD_implements,
62 KEYWORD_include,
63 KEYWORD_include_once,
64 KEYWORD_instanceof,
65 KEYWORD_insteadof,
66 KEYWORD_interface,
67 KEYWORD_namespace,
68 KEYWORD_new,
69 KEYWORD_or,
70 KEYWORD_print,
71 KEYWORD_private,
72 KEYWORD_protected,
73 KEYWORD_public,
74 KEYWORD_require,
75 KEYWORD_require_once,
76 KEYWORD_return,
77 KEYWORD_static,
78 KEYWORD_switch,
79 KEYWORD_throw,
80 KEYWORD_trait,
81 KEYWORD_try,
82 KEYWORD_use,
83 KEYWORD_var,
84 KEYWORD_while,
85 KEYWORD_xor,
86 KEYWORD_yield
87 } keywordId;
89 typedef enum {
90 ACCESS_UNDEFINED,
91 ACCESS_PRIVATE,
92 ACCESS_PROTECTED,
93 ACCESS_PUBLIC,
94 COUNT_ACCESS
95 } accessType;
97 typedef enum {
98 IMPL_UNDEFINED,
99 IMPL_ABSTRACT,
100 COUNT_IMPL
101 } implType;
103 typedef enum {
104 K_CLASS,
105 K_DEFINE,
106 K_FUNCTION,
107 K_INTERFACE,
108 K_LOCAL_VARIABLE,
109 K_NAMESPACE,
110 K_TRAIT,
111 K_VARIABLE,
112 COUNT_KIND
113 } phpKind;
115 static kindOption PhpKinds[COUNT_KIND] = {
116 { TRUE, 'c', "class", "classes" },
117 { TRUE, 'm', "macro", "constant definitions" },
118 { TRUE, 'f', "function", "functions" },
119 { TRUE, 'i', "interface", "interfaces" },
120 { FALSE, 'l', "local", "local variables" },
121 { TRUE, 'n', "namespace", "namespaces" },
122 { TRUE, 's', "struct", "traits" },
123 { TRUE, 'v', "variable", "variables" }
126 typedef struct {
127 const char *name;
128 keywordId id;
129 } keywordDesc;
131 static const keywordDesc PhpKeywordTable[] = {
132 /* keyword keyword ID */
133 { "abstract", KEYWORD_abstract },
134 { "and", KEYWORD_and },
135 { "as", KEYWORD_as },
136 { "break", KEYWORD_break },
137 { "callable", KEYWORD_callable },
138 { "case", KEYWORD_case },
139 { "catch", KEYWORD_catch },
140 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
141 { "class", KEYWORD_class },
142 { "clone", KEYWORD_clone },
143 { "const", KEYWORD_const },
144 { "continue", KEYWORD_continue },
145 { "declare", KEYWORD_declare },
146 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
147 { "default", KEYWORD_default },
148 { "do", KEYWORD_do },
149 { "echo", KEYWORD_echo },
150 { "else", KEYWORD_else },
151 { "elseif", KEYWORD_elif },
152 { "enddeclare", KEYWORD_enddeclare },
153 { "endfor", KEYWORD_endfor },
154 { "endforeach", KEYWORD_endforeach },
155 { "endif", KEYWORD_endif },
156 { "endswitch", KEYWORD_endswitch },
157 { "endwhile", KEYWORD_endwhile },
158 { "extends", KEYWORD_extends },
159 { "final", KEYWORD_final },
160 { "finally", KEYWORD_finally },
161 { "for", KEYWORD_for },
162 { "foreach", KEYWORD_foreach },
163 { "function", KEYWORD_function },
164 { "global", KEYWORD_global },
165 { "goto", KEYWORD_goto },
166 { "if", KEYWORD_if },
167 { "implements", KEYWORD_implements },
168 { "include", KEYWORD_include },
169 { "include_once", KEYWORD_include_once },
170 { "instanceof", KEYWORD_instanceof },
171 { "insteadof", KEYWORD_insteadof },
172 { "interface", KEYWORD_interface },
173 { "namespace", KEYWORD_namespace },
174 { "new", KEYWORD_new },
175 { "or", KEYWORD_or },
176 { "print", KEYWORD_print },
177 { "private", KEYWORD_private },
178 { "protected", KEYWORD_protected },
179 { "public", KEYWORD_public },
180 { "require", KEYWORD_require },
181 { "require_once", KEYWORD_require_once },
182 { "return", KEYWORD_return },
183 { "static", KEYWORD_static },
184 { "switch", KEYWORD_switch },
185 { "throw", KEYWORD_throw },
186 { "trait", KEYWORD_trait },
187 { "try", KEYWORD_try },
188 { "use", KEYWORD_use },
189 { "var", KEYWORD_var },
190 { "while", KEYWORD_while },
191 { "xor", KEYWORD_xor },
192 { "yield", KEYWORD_yield }
196 typedef enum eTokenType {
197 TOKEN_UNDEFINED,
198 TOKEN_EOF,
199 TOKEN_CHARACTER,
200 TOKEN_CLOSE_PAREN,
201 TOKEN_SEMICOLON,
202 TOKEN_COLON,
203 TOKEN_COMMA,
204 TOKEN_KEYWORD,
205 TOKEN_OPEN_PAREN,
206 TOKEN_OPERATOR,
207 TOKEN_IDENTIFIER,
208 TOKEN_STRING,
209 TOKEN_PERIOD,
210 TOKEN_OPEN_CURLY,
211 TOKEN_CLOSE_CURLY,
212 TOKEN_EQUAL_SIGN,
213 TOKEN_OPEN_SQUARE,
214 TOKEN_CLOSE_SQUARE,
215 TOKEN_VARIABLE,
216 TOKEN_AMPERSAND
217 } tokenType;
219 typedef struct {
220 tokenType type;
221 keywordId keyword;
222 vString * string;
223 vString * scope;
224 unsigned long lineNumber;
225 MIOPos filePosition;
226 int parentKind; /* -1 if none */
227 } tokenInfo;
229 static langType Lang_php;
231 static boolean InPhp = FALSE; /* whether we are between <? ?> */
233 /* current statement details */
234 static struct {
235 accessType access;
236 implType impl;
237 } CurrentStatement;
239 /* Current namespace */
240 static vString *CurrentNamesapce;
243 static void buildPhpKeywordHash (void)
245 const size_t count = sizeof (PhpKeywordTable) / sizeof (PhpKeywordTable[0]);
246 size_t i;
247 for (i = 0; i < count ; i++)
249 const keywordDesc* const p = &PhpKeywordTable[i];
250 addKeyword (p->name, Lang_php, (int) p->id);
254 static const char *accessToString (const accessType access)
256 static const char *const names[COUNT_ACCESS] = {
257 "undefined",
258 "private",
259 "protected",
260 "public"
263 Assert (access < COUNT_ACCESS);
265 return names[access];
268 static const char *implToString (const implType impl)
270 static const char *const names[COUNT_IMPL] = {
271 "undefined",
272 "abstract"
275 Assert (impl < COUNT_IMPL);
277 return names[impl];
280 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
281 const phpKind kind, const accessType access)
283 static vString *fullScope = NULL;
284 int parentKind = -1;
286 if (fullScope == NULL)
287 fullScope = vStringNew ();
288 else
289 vStringClear (fullScope);
291 if (vStringLength (CurrentNamesapce) > 0)
293 vStringCopy (fullScope, CurrentNamesapce);
294 parentKind = K_NAMESPACE;
297 initTagEntry (e, vStringValue (token->string));
299 e->lineNumber = token->lineNumber;
300 e->filePosition = token->filePosition;
301 e->kindName = PhpKinds[kind].name;
302 e->kind = (char) PhpKinds[kind].letter;
304 if (access != ACCESS_UNDEFINED)
305 e->extensionFields.access = accessToString (access);
306 if (vStringLength (token->scope) > 0)
308 parentKind = token->parentKind;
309 if (vStringLength (fullScope) > 0)
310 vStringCatS (fullScope, SCOPE_SEPARATOR);
311 vStringCat (fullScope, token->scope);
313 if (vStringLength (fullScope) > 0)
315 Assert (parentKind >= 0);
317 vStringTerminate (fullScope);
318 e->extensionFields.scope[0] = PhpKinds[parentKind].name;
319 e->extensionFields.scope[1] = vStringValue (fullScope);
323 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
324 const accessType access)
326 if (PhpKinds[kind].enabled)
328 tagEntryInfo e;
330 initPhpEntry (&e, token, kind, access);
331 makeTagEntry (&e);
335 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
337 if (PhpKinds[K_NAMESPACE].enabled)
339 tagEntryInfo e;
341 initTagEntry (&e, vStringValue (name));
343 e.lineNumber = token->lineNumber;
344 e.filePosition = token->filePosition;
345 e.kindName = PhpKinds[K_NAMESPACE].name;
346 e.kind = (char) PhpKinds[K_NAMESPACE].letter;
348 makeTagEntry (&e);
352 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
353 vString *const inheritance, const implType impl)
355 if (PhpKinds[kind].enabled)
357 tagEntryInfo e;
359 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
361 if (impl != IMPL_UNDEFINED)
362 e.extensionFields.implementation = implToString (impl);
363 if (vStringLength (inheritance) > 0)
364 e.extensionFields.inheritance = vStringValue (inheritance);
366 makeTagEntry (&e);
370 static void makeFunctionTag (const tokenInfo *const token,
371 const vString *const arglist,
372 const accessType access, const implType impl)
374 if (PhpKinds[K_FUNCTION].enabled)
376 tagEntryInfo e;
378 initPhpEntry (&e, token, K_FUNCTION, access);
380 if (impl != IMPL_UNDEFINED)
381 e.extensionFields.implementation = implToString (impl);
382 if (arglist)
383 e.extensionFields.arglist = vStringValue (arglist);
385 makeTagEntry (&e);
389 static tokenInfo *newToken (void)
391 tokenInfo *const token = xMalloc (1, tokenInfo);
393 token->type = TOKEN_UNDEFINED;
394 token->keyword = KEYWORD_NONE;
395 token->string = vStringNew ();
396 token->scope = vStringNew ();
397 token->lineNumber = getSourceLineNumber ();
398 token->filePosition = getInputFilePosition ();
399 token->parentKind = -1;
401 return token;
404 static void deleteToken (tokenInfo *const token)
406 vStringDelete (token->string);
407 vStringDelete (token->scope);
408 eFree (token);
411 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
412 boolean scope)
414 dest->lineNumber = src->lineNumber;
415 dest->filePosition = src->filePosition;
416 dest->type = src->type;
417 dest->keyword = src->keyword;
418 vStringCopy(dest->string, src->string);
419 dest->parentKind = src->parentKind;
420 if (scope)
421 vStringCopy(dest->scope, src->scope);
424 #if 0
425 #include <stdio.h>
427 static const char *tokenTypeName (const tokenType type)
429 switch (type)
431 case TOKEN_UNDEFINED: return "undefined";
432 case TOKEN_EOF: return "EOF";
433 case TOKEN_CHARACTER: return "character";
434 case TOKEN_CLOSE_PAREN: return "')'";
435 case TOKEN_SEMICOLON: return "';'";
436 case TOKEN_COLON: return "':'";
437 case TOKEN_COMMA: return "','";
438 case TOKEN_OPEN_PAREN: return "'('";
439 case TOKEN_OPERATOR: return "operator";
440 case TOKEN_IDENTIFIER: return "identifier";
441 case TOKEN_KEYWORD: return "keyword";
442 case TOKEN_STRING: return "string";
443 case TOKEN_PERIOD: return "'.'";
444 case TOKEN_OPEN_CURLY: return "'{'";
445 case TOKEN_CLOSE_CURLY: return "'}'";
446 case TOKEN_EQUAL_SIGN: return "'='";
447 case TOKEN_OPEN_SQUARE: return "'['";
448 case TOKEN_CLOSE_SQUARE: return "']'";
449 case TOKEN_VARIABLE: return "variable";
451 return NULL;
454 static void printToken (const tokenInfo *const token)
456 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
457 tokenTypeName (token->type),
458 token->lineNumber,
459 vStringValue (token->scope));
460 switch (token->type)
462 case TOKEN_IDENTIFIER:
463 case TOKEN_STRING:
464 case TOKEN_VARIABLE:
465 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
466 break;
468 case TOKEN_KEYWORD:
470 size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
471 size_t i;
473 fprintf (stderr, "\tkeyword:\t");
474 for (i = 0; i < n; i++)
476 if (PhpKeywordTable[i].id == token->keyword)
478 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
479 break;
482 if (i >= n)
483 fprintf (stderr, "(unknown)\n");
486 default: break;
489 #endif
491 static void addToScope (tokenInfo *const token, const vString *const extra)
493 if (vStringLength (token->scope) > 0)
494 vStringCatS (token->scope, SCOPE_SEPARATOR);
495 vStringCatS (token->scope, vStringValue (extra));
496 vStringTerminate(token->scope);
499 static boolean isIdentChar (const int c)
501 return (isalnum (c) || c == '_' || c >= 0x80);
504 static int skipToCharacter (const int c)
506 int d;
509 d = fileGetc ();
510 } while (d != EOF && d != c);
511 return d;
514 static void parseString (vString *const string, const int delimiter)
516 while (TRUE)
518 int c = fileGetc ();
520 if (c == '\\' && (c = fileGetc ()) != EOF)
521 vStringPut (string, (char) c);
522 else if (c == EOF || c == delimiter)
523 break;
524 else
525 vStringPut (string, (char) c);
527 vStringTerminate (string);
530 /* reads an HereDoc or a NowDoc (the part after the <<<).
531 * <<<[ \t]*(ID|'ID'|"ID")
532 * ...
533 * ID;?
535 * note that:
536 * 1) starting ID must be immediately followed by a newline;
537 * 2) closing ID is the same as opening one;
538 * 3) closing ID must be immediately followed by a newline or a semicolon
539 * then a newline.
541 * Example of a *single* valid heredoc:
542 * <<< FOO
543 * something
544 * something else
545 * FOO this is not an end
546 * FOO; this isn't either
547 * FOO; # neither this is
548 * FOO;
549 * # previous line was the end, but the semicolon wasn't required
551 static void parseHeredoc (vString *const string)
553 int c;
554 unsigned int len;
555 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
556 int quote = 0;
560 c = fileGetc ();
562 while (c == ' ' || c == '\t');
564 if (c == '\'' || c == '"')
566 quote = c;
567 c = fileGetc ();
569 for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
571 if (! isIdentChar (c))
572 break;
573 delimiter[len] = (char) c;
574 c = fileGetc ();
576 delimiter[len] = 0;
578 if (len == 0) /* no delimiter, give up */
579 goto error;
580 if (quote)
582 if (c != quote) /* no closing quote for quoted identifier, give up */
583 goto error;
584 c = fileGetc ();
586 if (c != '\r' && c != '\n') /* missing newline, give up */
587 goto error;
591 c = fileGetc ();
593 if (c != '\r' && c != '\n')
594 vStringPut (string, (char) c);
595 else
597 /* new line, check for a delimiter right after */
598 int nl = c;
599 int extra = EOF;
601 c = fileGetc ();
602 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
603 c = fileGetc ();
605 if (delimiter[len] != 0)
606 fileUngetc (c);
607 else
609 /* line start matched the delimiter, now check whether there
610 * is anything after it */
611 if (c == '\r' || c == '\n')
613 fileUngetc (c);
614 break;
616 else if (c == ';')
618 int d = fileGetc ();
619 if (d == '\r' || d == '\n')
621 /* put back the semicolon since it's not part of the
622 * string. we can't put back the newline, but it's a
623 * whitespace character nobody cares about it anyway */
624 fileUngetc (';');
625 break;
627 else
629 /* put semicolon in the string and continue */
630 extra = ';';
631 fileUngetc (d);
635 /* if we are here it wasn't a delimiter, so put everything in the
636 * string */
637 vStringPut (string, (char) nl);
638 vStringNCatS (string, delimiter, len);
639 if (extra != EOF)
640 vStringPut (string, (char) extra);
643 while (c != EOF);
645 vStringTerminate (string);
647 return;
649 error:
650 fileUngetc (c);
653 static void parseIdentifier (vString *const string, const int firstChar)
655 int c = firstChar;
658 vStringPut (string, (char) c);
659 c = fileGetc ();
660 } while (isIdentChar (c));
661 fileUngetc (c);
662 vStringTerminate (string);
665 static keywordId analyzeToken (vString *const name, langType language)
667 vString *keyword = vStringNew ();
668 keywordId result;
669 vStringCopyToLower (keyword, name);
670 result = lookupKeyword (vStringValue (keyword), language);
671 vStringDelete (keyword);
672 return result;
675 static boolean isSpace (int c)
677 return (c == '\t' || c == ' ' || c == '\v' ||
678 c == '\n' || c == '\r' || c == '\f');
681 static int skipWhitespaces (int c)
683 while (isSpace (c))
684 c = fileGetc ();
685 return c;
688 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
690 * This is ugly, but the whole "<script language=php>" tag is and we can't
691 * really do better without adding a lot of code only for this */
692 static boolean isOpenScriptLanguagePhp (int c)
694 int quote = 0;
696 /* <script[:white:]+language[:white:]*= */
697 if (c != '<' ||
698 tolower ((c = fileGetc ())) != 's' ||
699 tolower ((c = fileGetc ())) != 'c' ||
700 tolower ((c = fileGetc ())) != 'r' ||
701 tolower ((c = fileGetc ())) != 'i' ||
702 tolower ((c = fileGetc ())) != 'p' ||
703 tolower ((c = fileGetc ())) != 't' ||
704 ! isSpace ((c = fileGetc ())) ||
705 tolower ((c = skipWhitespaces (c))) != 'l' ||
706 tolower ((c = fileGetc ())) != 'a' ||
707 tolower ((c = fileGetc ())) != 'n' ||
708 tolower ((c = fileGetc ())) != 'g' ||
709 tolower ((c = fileGetc ())) != 'u' ||
710 tolower ((c = fileGetc ())) != 'a' ||
711 tolower ((c = fileGetc ())) != 'g' ||
712 tolower ((c = fileGetc ())) != 'e' ||
713 (c = skipWhitespaces (fileGetc ())) != '=')
714 return FALSE;
716 /* (php|'php'|"php")> */
717 c = skipWhitespaces (fileGetc ());
718 if (c == '"' || c == '\'')
720 quote = c;
721 c = fileGetc ();
723 if (tolower (c) != 'p' ||
724 tolower ((c = fileGetc ())) != 'h' ||
725 tolower ((c = fileGetc ())) != 'p' ||
726 (quote != 0 && (c = fileGetc ()) != quote) ||
727 (c = skipWhitespaces (fileGetc ())) != '>')
728 return FALSE;
730 return TRUE;
733 static int findPhpStart (void)
735 int c;
738 if ((c = fileGetc ()) == '<')
740 c = fileGetc ();
741 /* <? and <?php, but not <?xml */
742 if (c == '?')
744 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
745 if (tolower ((c = fileGetc ())) != 'x' ||
746 tolower ((c = fileGetc ())) != 'm' ||
747 tolower ((c = fileGetc ())) != 'l')
749 break;
752 /* <script language="php"> */
753 else
755 fileUngetc (c);
756 if (isOpenScriptLanguagePhp ('<'))
757 break;
761 while (c != EOF);
763 return c;
766 static int skipSingleComment (void)
768 int c;
771 c = fileGetc ();
772 if (c == '\r')
774 int next = fileGetc ();
775 if (next != '\n')
776 fileUngetc (next);
777 else
778 c = next;
780 /* ?> in single-line comments leaves PHP mode */
781 else if (c == '?')
783 int next = fileGetc ();
784 if (next == '>')
785 InPhp = FALSE;
786 else
787 fileUngetc (next);
789 } while (InPhp && c != EOF && c != '\n' && c != '\r');
790 return c;
793 static void readToken (tokenInfo *const token)
795 int c;
797 token->type = TOKEN_UNDEFINED;
798 token->keyword = KEYWORD_NONE;
799 vStringClear (token->string);
801 getNextChar:
803 if (! InPhp)
805 c = findPhpStart ();
806 if (c != EOF)
807 InPhp = TRUE;
809 else
810 c = fileGetc ();
812 c = skipWhitespaces (c);
814 token->lineNumber = getSourceLineNumber ();
815 token->filePosition = getInputFilePosition ();
817 switch (c)
819 case EOF: token->type = TOKEN_EOF; break;
820 case '(': token->type = TOKEN_OPEN_PAREN; break;
821 case ')': token->type = TOKEN_CLOSE_PAREN; break;
822 case ';': token->type = TOKEN_SEMICOLON; break;
823 case ',': token->type = TOKEN_COMMA; break;
824 case '.': token->type = TOKEN_PERIOD; break;
825 case ':': token->type = TOKEN_COLON; break;
826 case '{': token->type = TOKEN_OPEN_CURLY; break;
827 case '}': token->type = TOKEN_CLOSE_CURLY; break;
828 case '[': token->type = TOKEN_OPEN_SQUARE; break;
829 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
830 case '&': token->type = TOKEN_AMPERSAND; break;
832 case '=':
834 int d = fileGetc ();
835 if (d == '=' || d == '>')
836 token->type = TOKEN_OPERATOR;
837 else
839 fileUngetc (d);
840 token->type = TOKEN_EQUAL_SIGN;
842 break;
845 case '\'':
846 case '"':
847 token->type = TOKEN_STRING;
848 parseString (token->string, c);
849 token->lineNumber = getSourceLineNumber ();
850 token->filePosition = getInputFilePosition ();
851 break;
853 case '<':
855 int d = fileGetc ();
856 if (d == '/')
858 /* </script[:white:]*> */
859 if (tolower ((d = fileGetc ())) == 's' &&
860 tolower ((d = fileGetc ())) == 'c' &&
861 tolower ((d = fileGetc ())) == 'r' &&
862 tolower ((d = fileGetc ())) == 'i' &&
863 tolower ((d = fileGetc ())) == 'p' &&
864 tolower ((d = fileGetc ())) == 't' &&
865 (d = skipWhitespaces (fileGetc ())) == '>')
867 InPhp = FALSE;
868 goto getNextChar;
870 else
872 fileUngetc (d);
873 token->type = TOKEN_UNDEFINED;
876 else if (d == '<' && (d = fileGetc ()) == '<')
878 token->type = TOKEN_STRING;
879 parseHeredoc (token->string);
881 else
883 fileUngetc (d);
884 token->type = TOKEN_UNDEFINED;
886 break;
889 case '#': /* comment */
890 skipSingleComment ();
891 goto getNextChar;
892 break;
894 case '+':
895 case '-':
896 case '*':
897 case '%':
899 int d = fileGetc ();
900 if (d != '=')
901 fileUngetc (d);
902 token->type = TOKEN_OPERATOR;
903 break;
906 case '/': /* division or comment start */
908 int d = fileGetc ();
909 if (d == '/') /* single-line comment */
911 skipSingleComment ();
912 goto getNextChar;
914 else if (d == '*')
918 c = skipToCharacter ('*');
919 if (c != EOF)
921 c = fileGetc ();
922 if (c == '/')
923 break;
924 else
925 fileUngetc (c);
927 } while (c != EOF && c != '\0');
928 goto getNextChar;
930 else
932 if (d != '=')
933 fileUngetc (d);
934 token->type = TOKEN_OPERATOR;
936 break;
939 case '$': /* variable start */
941 int d = fileGetc ();
942 if (! isIdentChar (d))
944 fileUngetc (d);
945 token->type = TOKEN_UNDEFINED;
947 else
949 parseIdentifier (token->string, d);
950 token->type = TOKEN_VARIABLE;
952 break;
955 case '?': /* maybe the end of the PHP chunk */
957 int d = fileGetc ();
958 if (d == '>')
960 InPhp = FALSE;
961 goto getNextChar;
963 else
965 fileUngetc (d);
966 token->type = TOKEN_UNDEFINED;
968 break;
971 default:
972 if (! isIdentChar (c))
973 token->type = TOKEN_UNDEFINED;
974 else
976 parseIdentifier (token->string, c);
977 token->keyword = analyzeToken (token->string, Lang_php);
978 if (token->keyword == KEYWORD_NONE)
979 token->type = TOKEN_IDENTIFIER;
980 else
981 token->type = TOKEN_KEYWORD;
983 break;
986 if (token->type == TOKEN_SEMICOLON ||
987 token->type == TOKEN_OPEN_CURLY ||
988 token->type == TOKEN_CLOSE_CURLY)
990 /* reset current statement details on statement end, and when entering
991 * a deeper scope.
992 * it is a bit ugly to do this in readToken(), but it makes everything
993 * a lot simpler. */
994 CurrentStatement.access = ACCESS_UNDEFINED;
995 CurrentStatement.impl = IMPL_UNDEFINED;
999 static void enterScope (tokenInfo *const parentToken,
1000 const vString *const extraScope,
1001 const int parentKind);
1003 /* parses a class or an interface:
1004 * class Foo {}
1005 * class Foo extends Bar {}
1006 * class Foo extends Bar implements iFoo, iBar {}
1007 * interface iFoo {}
1008 * interface iBar extends iFoo {} */
1009 static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
1011 boolean readNext = TRUE;
1012 implType impl = CurrentStatement.impl;
1013 tokenInfo *name;
1014 vString *inheritance = NULL;
1016 readToken (token);
1017 if (token->type != TOKEN_IDENTIFIER)
1018 return FALSE;
1020 name = newToken ();
1021 copyToken (name, token, TRUE);
1023 inheritance = vStringNew ();
1024 /* skip until the open bracket and assume every identifier (not keyword)
1025 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
1028 readToken (token);
1030 if (token->type == TOKEN_IDENTIFIER)
1032 if (vStringLength (inheritance) > 0)
1033 vStringPut (inheritance, ',');
1034 vStringCat (inheritance, token->string);
1037 while (token->type != TOKEN_EOF &&
1038 token->type != TOKEN_OPEN_CURLY);
1040 makeClassOrIfaceTag (kind, name, inheritance, impl);
1042 if (token->type == TOKEN_OPEN_CURLY)
1043 enterScope (token, name->string, K_CLASS);
1044 else
1045 readNext = FALSE;
1047 deleteToken (name);
1048 vStringDelete (inheritance);
1050 return readNext;
1053 /* parses a trait:
1054 * trait Foo {} */
1055 static boolean parseTrait (tokenInfo *const token)
1057 boolean readNext = TRUE;
1058 tokenInfo *name;
1060 readToken (token);
1061 if (token->type != TOKEN_IDENTIFIER)
1062 return FALSE;
1064 name = newToken ();
1065 copyToken (name, token, TRUE);
1067 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1069 readToken (token);
1070 if (token->type == TOKEN_OPEN_CURLY)
1071 enterScope (token, name->string, K_TRAIT);
1072 else
1073 readNext = FALSE;
1075 deleteToken (name);
1077 return readNext;
1080 /* parse a function
1082 * if @name is NULL, parses a normal function
1083 * function myfunc($foo, $bar) {}
1084 * function &myfunc($foo, $bar) {}
1086 * if @name is not NULL, parses an anonymous function with name @name
1087 * $foo = function($foo, $bar) {}
1088 * $foo = function&($foo, $bar) {}
1089 * $foo = function($foo, $bar) use ($x, &$y) {} */
1090 static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
1092 boolean readNext = TRUE;
1093 accessType access = CurrentStatement.access;
1094 implType impl = CurrentStatement.impl;
1095 tokenInfo *nameFree = NULL;
1097 readToken (token);
1098 /* skip a possible leading ampersand (return by reference) */
1099 if (token->type == TOKEN_AMPERSAND)
1100 readToken (token);
1102 if (! name)
1104 if (token->type != TOKEN_IDENTIFIER)
1105 return FALSE;
1107 name = nameFree = newToken ();
1108 copyToken (nameFree, token, TRUE);
1109 readToken (token);
1112 if (token->type == TOKEN_OPEN_PAREN)
1114 vString *arglist = vStringNew ();
1115 int depth = 1;
1117 vStringPut (arglist, '(');
1120 readToken (token);
1122 switch (token->type)
1124 case TOKEN_OPEN_PAREN: depth++; break;
1125 case TOKEN_CLOSE_PAREN: depth--; break;
1126 default: break;
1128 /* display part */
1129 switch (token->type)
1131 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1132 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1133 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1134 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1135 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1136 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1137 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1138 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1139 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1140 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1141 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1142 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1143 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
1145 case TOKEN_IDENTIFIER:
1146 case TOKEN_KEYWORD:
1147 case TOKEN_VARIABLE:
1149 switch (vStringLast (arglist))
1151 case 0:
1152 case ' ':
1153 case '{':
1154 case '(':
1155 case '[':
1156 case '.':
1157 /* no need for a space between those and the identifier */
1158 break;
1160 default:
1161 vStringPut (arglist, ' ');
1162 break;
1164 if (token->type == TOKEN_VARIABLE)
1165 vStringPut (arglist, '$');
1166 vStringCat (arglist, token->string);
1167 break;
1170 default: break;
1173 while (token->type != TOKEN_EOF && depth > 0);
1175 vStringTerminate (arglist);
1177 makeFunctionTag (name, arglist, access, impl);
1178 vStringDelete (arglist);
1180 readToken (token); /* normally it's an open brace or "use" keyword */
1183 /* skip use(...) */
1184 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1186 readToken (token);
1187 if (token->type == TOKEN_OPEN_PAREN)
1189 int depth = 1;
1193 readToken (token);
1194 switch (token->type)
1196 case TOKEN_OPEN_PAREN: depth++; break;
1197 case TOKEN_CLOSE_PAREN: depth--; break;
1198 default: break;
1201 while (token->type != TOKEN_EOF && depth > 0);
1203 readToken (token);
1207 if (token->type == TOKEN_OPEN_CURLY)
1208 enterScope (token, name->string, K_FUNCTION);
1209 else
1210 readNext = FALSE;
1212 if (nameFree)
1213 deleteToken (nameFree);
1215 return readNext;
1218 /* parses declarations of the form
1219 * const NAME = VALUE */
1220 static boolean parseConstant (tokenInfo *const token)
1222 tokenInfo *name;
1224 readToken (token); /* skip const keyword */
1225 if (token->type != TOKEN_IDENTIFIER)
1226 return FALSE;
1228 name = newToken ();
1229 copyToken (name, token, TRUE);
1231 readToken (token);
1232 if (token->type == TOKEN_EQUAL_SIGN)
1233 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1235 deleteToken (name);
1237 return token->type == TOKEN_EQUAL_SIGN;
1240 /* parses declarations of the form
1241 * define('NAME', 'VALUE')
1242 * define(NAME, 'VALUE) */
1243 static boolean parseDefine (tokenInfo *const token)
1245 int depth = 1;
1247 readToken (token); /* skip "define" identifier */
1248 if (token->type != TOKEN_OPEN_PAREN)
1249 return FALSE;
1251 readToken (token);
1252 if (token->type == TOKEN_STRING ||
1253 token->type == TOKEN_IDENTIFIER)
1255 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1256 readToken (token);
1259 /* skip until the close parenthesis.
1260 * no need to handle nested blocks since they would be invalid
1261 * in this context anyway (the VALUE may only be a scalar, like
1262 * 42
1263 * (42)
1264 * and alike) */
1265 while (token->type != TOKEN_EOF && depth > 0)
1267 switch (token->type)
1269 case TOKEN_OPEN_PAREN: depth++; break;
1270 case TOKEN_CLOSE_PAREN: depth--; break;
1271 default: break;
1273 readToken (token);
1276 return FALSE;
1279 /* parses declarations of the form
1280 * $var = VALUE
1281 * $var; */
1282 static boolean parseVariable (tokenInfo *const token)
1284 tokenInfo *name;
1285 boolean readNext = TRUE;
1286 accessType access = CurrentStatement.access;
1288 name = newToken ();
1289 copyToken (name, token, TRUE);
1291 readToken (token);
1292 if (token->type == TOKEN_EQUAL_SIGN)
1294 phpKind kind = K_VARIABLE;
1296 if (token->parentKind == K_FUNCTION)
1297 kind = K_LOCAL_VARIABLE;
1299 readToken (token);
1300 if (token->type == TOKEN_KEYWORD &&
1301 token->keyword == KEYWORD_function &&
1302 PhpKinds[kind].enabled)
1304 if (parseFunction (token, name))
1305 readToken (token);
1306 readNext = (boolean) (token->type == TOKEN_SEMICOLON);
1308 else
1310 makeSimplePhpTag (name, kind, access);
1311 readNext = FALSE;
1314 else if (token->type == TOKEN_SEMICOLON)
1316 /* generate tags for variable declarations in classes
1317 * class Foo {
1318 * protected $foo;
1320 * but don't get fooled by stuff like $foo = $bar; */
1321 if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
1322 makeSimplePhpTag (name, K_VARIABLE, access);
1324 else
1325 readNext = FALSE;
1327 deleteToken (name);
1329 return readNext;
1332 /* parses namespace declarations
1333 * namespace Foo {}
1334 * namespace Foo\Bar {}
1335 * namespace Foo;
1336 * namespace Foo\Bar;
1337 * namespace;
1338 * napespace {} */
1339 static boolean parseNamespace (tokenInfo *const token)
1341 tokenInfo *nsToken = newToken ();
1343 vStringClear (CurrentNamesapce);
1344 copyToken (nsToken, token, FALSE);
1348 readToken (token);
1349 if (token->type == TOKEN_IDENTIFIER)
1351 if (vStringLength (CurrentNamesapce) > 0)
1352 vStringPut (CurrentNamesapce, '\\');
1353 vStringCat (CurrentNamesapce, token->string);
1356 while (token->type != TOKEN_EOF &&
1357 token->type != TOKEN_SEMICOLON &&
1358 token->type != TOKEN_OPEN_CURLY);
1360 vStringTerminate (CurrentNamesapce);
1361 if (vStringLength (CurrentNamesapce) > 0)
1362 makeNamespacePhpTag (nsToken, CurrentNamesapce);
1364 if (token->type == TOKEN_OPEN_CURLY)
1365 enterScope (token, NULL, -1);
1367 deleteToken (nsToken);
1369 return TRUE;
1372 static void enterScope (tokenInfo *const parentToken,
1373 const vString *const extraScope,
1374 const int parentKind)
1376 tokenInfo *token = newToken ();
1377 int origParentKind = parentToken->parentKind;
1379 copyToken (token, parentToken, TRUE);
1381 if (extraScope)
1383 addToScope (token, extraScope);
1384 token->parentKind = parentKind;
1387 readToken (token);
1388 while (token->type != TOKEN_EOF &&
1389 token->type != TOKEN_CLOSE_CURLY)
1391 boolean readNext = TRUE;
1393 switch (token->type)
1395 case TOKEN_OPEN_CURLY:
1396 enterScope (token, NULL, -1);
1397 break;
1399 case TOKEN_KEYWORD:
1400 switch (token->keyword)
1402 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS); break;
1403 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE); break;
1404 case KEYWORD_trait: readNext = parseTrait (token); break;
1405 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1406 case KEYWORD_const: readNext = parseConstant (token); break;
1407 case KEYWORD_define: readNext = parseDefine (token); break;
1409 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1411 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1412 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1413 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1414 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1416 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1418 default: break;
1420 break;
1422 case TOKEN_VARIABLE:
1423 readNext = parseVariable (token);
1424 break;
1426 default: break;
1429 if (readNext)
1430 readToken (token);
1433 copyToken (parentToken, token, FALSE);
1434 parentToken->parentKind = origParentKind;
1435 deleteToken (token);
1438 static void findPhpTags (void)
1440 tokenInfo *const token = newToken ();
1442 InPhp = FALSE;
1443 CurrentStatement.access = ACCESS_UNDEFINED;
1444 CurrentStatement.impl = IMPL_UNDEFINED;
1445 CurrentNamesapce = vStringNew ();
1449 enterScope (token, NULL, -1);
1451 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1453 vStringDelete (CurrentNamesapce);
1454 deleteToken (token);
1457 static void initialize (const langType language)
1459 Lang_php = language;
1460 buildPhpKeywordHash ();
1463 extern parserDefinition* PhpParser (void)
1465 static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
1466 parserDefinition* def = parserNew ("PHP");
1467 def->kinds = PhpKinds;
1468 def->kindCount = KIND_COUNT (PhpKinds);
1469 def->extensions = extensions;
1470 def->parser = findPhpTags;
1471 def->initialize = initialize;
1472 return def;
1475 /* vi:set tabstop=4 shiftwidth=4: */