Fix catch trace bug and inline cost bug
[hiphop-php.git] / hphp / parser / hphp.ll
blob67e27b0c8a34d0cea3662150e32dd954200d935b
1 %{ /* -*- mode: c++ -*- */
2 #include "hphp/parser/scanner.h"
4 // macros for flex
5 #define YYSTYPE HPHP::ScannerToken
6 #define YYLTYPE HPHP::Location
7 #define YY_EXTRA_TYPE HPHP::Scanner*
8 #define _scanner yyextra
9 #define YY_INPUT(buf,result,max) _scanner->read(buf,result,max)
10 #define YY_FATAL_ERROR(msg) \
11   do { \
12     struct yyguts_t *yyg = (struct yyguts_t *)yyscanner; \
13     _scanner->error(msg); \
14   } while (0) \
16 #undef YY_READ_BUF_SIZE
17 #undef YY_BUF_SIZE
18 #define YY_READ_BUF_SIZE 1024*128 /* for reading from input */
19 #define YY_BUF_SIZE 1024*64 /* for pattern matching */
21 #define DECLARE_YYCURSOR \
22   char *&cursor = yyg->yy_c_buf_p; *cursor = yyg->yy_hold_char;
23 #define DECLARE_YYLIMIT \
24   char *limit = YY_CURRENT_BUFFER->yy_ch_buf + yyg->yy_n_chars;
25 #define YYCURSOR  cursor
26 #define YYLIMIT   limit
27 #define RESET_YYCURSOR yyg->yy_hold_char = *YYCURSOR; *YYCURSOR = '\0';
29 // macros for rules
30 #define RETTOKEN(t) do {_scanner->setToken(yytext, yyleng, t); return t;} \
31   while (0)
32 #define RETSTEP(t)  do {_scanner->stepPos(yytext, yyleng, t); return t;} \
33   while (0)
34 #define SETTOKEN(t) _scanner->setToken(yytext, yyleng, t)
35 #define STEPPOS(t)  _scanner->stepPos(yytext, yyleng, t)
37 #define XHP_ONLY_KEYWORD(tok) do {                           \
38   RETTOKEN(_scanner->isXHPSyntaxEnabled() ? (tok) : T_STRING); \
39 } while (0)
41 #define HH_ONLY_KEYWORD(tok) do {                               \
42   RETTOKEN(_scanner->isHHSyntaxEnabled() ? (tok) : T_STRING); \
43 } while (0)
45 #define IS_LABEL_START(c) \
46   (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || \
47    (c) == '_' || (c) >= 0x7F)
49 /**
50  * "Next token" types tell us how to treat a token based on the previous
51  * token for the purpose of recognizing XHP tags, XHP class names, XHP
52  * category names, and type lists.
53  *   XhpTag:
54  *     '<' will be treated as the start of an XHP tag
55  *   XhpTagMaybe:
56  *     '<' will be treated as possibly being the start of an XHP tag;
57  *     we will scan ahead looking at subsequent characters to figure
58  *     out if '<' is definitely the start of an XHP tag
59  *   XhpClassName:
60  *     ':' will be treated as the start of an XHP class name
61  *   XhpCategoryName:
62  *     '%' will be treated as the start of an XHP category name
63  *   TypeListMaybe:
64  *     '<' should be recognized as possibly being the start of a type list;
65  *     this will be resolved by inspecting subsequent tokens
66  */
67 namespace NextTokenType {
68   static const int Normal = 0x1;
69   static const int XhpTag = 0x2;
70   static const int XhpTagMaybe = 0x4;
71   static const int XhpClassName = 0x8;
72   static const int XhpCategoryName = 0x10;
73   static const int TypeListMaybe = 0x20;
76 static int getNextTokenType(int t) {
77   switch (t) {
78     case '=': case '.': case '+': case '-': case '*': case '/': case '%':
79     case '!': case '~': case '&': case '^': case '<': case '>': case '?':
80     case ':': case '[': case '{': case ';': case '@': case -1:
81     case T_LOGICAL_OR:
82     case T_LOGICAL_XOR:
83     case T_LOGICAL_AND:
84     case T_SL:
85     case T_SR:
86     case T_BOOLEAN_OR:
87     case T_BOOLEAN_AND:
88     case T_IS_EQUAL:
89     case T_IS_NOT_EQUAL:
90     case T_IS_IDENTICAL:
91     case T_IS_NOT_IDENTICAL:
92     case T_IS_SMALLER_OR_EQUAL:
93     case T_IS_GREATER_OR_EQUAL:
94     case T_PLUS_EQUAL:
95     case T_MINUS_EQUAL:
96     case T_MUL_EQUAL:
97     case T_DIV_EQUAL:
98     case T_CONCAT_EQUAL:
99     case T_MOD_EQUAL:
100     case T_AND_EQUAL:
101     case T_OR_EQUAL:
102     case T_XOR_EQUAL:
103     case T_SL_EQUAL:
104     case T_SR_EQUAL:
105     case T_ECHO:
106     case T_PRINT:
107     case T_CLONE:
108     case T_EXIT:
109     case T_RETURN:
110     case T_YIELD:
111     case T_AWAIT:
112     case T_NEW:
113     case T_INSTANCEOF:
114     case T_DOUBLE_ARROW:
115     case T_NS_SEPARATOR:
116     case T_INLINE_HTML:
117     case T_INT_CAST:
118     case T_DOUBLE_CAST:
119     case T_STRING_CAST:
120     case T_ARRAY_CAST:
121     case T_OBJECT_CAST:
122     case T_BOOL_CAST:
123     case T_UNSET_CAST:
124     case T_UNRESOLVED_LT:
125     case T_AS:
126       return NextTokenType::XhpTag |
127              NextTokenType::XhpClassName;
128     case ',': case '(': case '|':
129       return NextTokenType::XhpTag |
130              NextTokenType::XhpClassName |
131              NextTokenType::XhpCategoryName;
132     case '}':
133       return NextTokenType::XhpTagMaybe |
134              NextTokenType::XhpClassName;
135     case T_INC:
136     case T_DEC:
137       return NextTokenType::XhpTagMaybe;
138     case T_EXTENDS:
139     case T_CLASS:
140     case T_PRIVATE:
141     case T_PROTECTED:
142     case T_PUBLIC:
143     case T_STATIC:
144       return NextTokenType::XhpClassName;
145     case T_STRING:
146     case T_XHP_CHILDREN:
147     case T_XHP_REQUIRED:
148     case T_XHP_ENUM:
149     case T_ARRAY:
150       return NextTokenType::TypeListMaybe;
151     case T_XHP_ATTRIBUTE:
152       return NextTokenType::XhpClassName |
153              NextTokenType::TypeListMaybe;
154     case T_XHP_CATEGORY:
155       return NextTokenType::XhpCategoryName |
156              NextTokenType::TypeListMaybe;
157     default:
158       return NextTokenType::Normal;
159   }
164 %x ST_IN_HTML
165 %x ST_IN_SCRIPTING
166 %x ST_AFTER_HASHBANG
167 %x ST_DOUBLE_QUOTES
168 %x ST_BACKQUOTE
169 %x ST_HEREDOC
170 %x ST_NOWDOC
171 %x ST_END_HEREDOC
172 %x ST_LOOKING_FOR_PROPERTY
173 %x ST_LOOKING_FOR_VARNAME
174 %x ST_LOOKING_FOR_COLON
175 %x ST_VAR_OFFSET
176 %x ST_LT_CHECK
177 %x ST_COMMENT
178 %x ST_DOC_COMMENT
179 %x ST_ONE_LINE_COMMENT
181 %x ST_XHP_IN_TAG
182 %x ST_XHP_END_SINGLETON_TAG
183 %x ST_XHP_END_CLOSE_TAG
184 %x ST_XHP_CHILD
185 %x ST_XHP_COMMENT
187 %option stack
189 LNUM    [0-9]+
190 DNUM    ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
191 EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
192 HNUM    "0x"[0-9a-fA-F]+
193 LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
194 WHITESPACE [ \n\r\t]+
195 TABS_AND_SPACES [ \t]*
196 TOKENS [;:,.\[\]()|^&+\-*/=%!~$<>?@]
197 ANY_CHAR (.|[\n])
198 NEWLINE ("\r"|"\n"|"\r\n")
199 XHPLABEL {LABEL}([:-]{LABEL})*
200 COMMENT_REGEX ("/*"([^\*]|("*"[^/]))*"*/"|("//"|"#")[^\r\n]*{NEWLINE})
201 WHITESPACE_AND_COMMENTS ([ \n\r\t]|({COMMENT_REGEX}))+
204  * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
205  * or a { and therefore will be taken literally. The case of literal $ before
206  * a variable or "${" is handled in a rule for each string type
207  */
208 DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\"\\{]|("\\"{ANY_CHAR})))
209 BACKQUOTE_LITERAL_DOLLAR     ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR})))
212  * CHARS matches everything up to a variable or "{$"
213  * {'s are matched as long as they aren't followed by a $
214  * The case of { before "{$" is handled in a rule for each string type
216  * For heredocs, matching continues across/after newlines if/when it's known
217  * that the next line doesn't contain a possible ending label
218  */
219 DOUBLE_QUOTES_CHARS ("{"*([^$\"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
220 BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
224 <ST_IN_SCRIPTING>"exit"                 { RETTOKEN(T_EXIT);}
225 <ST_IN_SCRIPTING>"die"                  { RETTOKEN(T_EXIT);}
226 <ST_IN_SCRIPTING>"function"             { RETTOKEN(T_FUNCTION);}
227 <ST_IN_SCRIPTING>"const"                { RETTOKEN(T_CONST);}
228 <ST_IN_SCRIPTING>"return"               { RETTOKEN(T_RETURN); }
229 <ST_IN_SCRIPTING>"yield"                { RETTOKEN(T_YIELD);}
230 <ST_IN_SCRIPTING>"try"                  { RETTOKEN(T_TRY);}
231 <ST_IN_SCRIPTING>"catch"                { RETTOKEN(T_CATCH);}
232 <ST_IN_SCRIPTING>"finally"              { RETTOKEN(T_FINALLY);}
233 <ST_IN_SCRIPTING>"throw"                { RETTOKEN(T_THROW);}
234 <ST_IN_SCRIPTING>"if"                   { RETTOKEN(T_IF);}
235 <ST_IN_SCRIPTING>"elseif"               { RETTOKEN(T_ELSEIF);}
236 <ST_IN_SCRIPTING>"endif"                { RETTOKEN(T_ENDIF);}
237 <ST_IN_SCRIPTING>"else"                 { RETTOKEN(T_ELSE);}
238 <ST_IN_SCRIPTING>"while"                { RETTOKEN(T_WHILE);}
239 <ST_IN_SCRIPTING>"endwhile"             { RETTOKEN(T_ENDWHILE);}
240 <ST_IN_SCRIPTING>"do"                   { RETTOKEN(T_DO);}
241 <ST_IN_SCRIPTING>"for"                  { RETTOKEN(T_FOR);}
242 <ST_IN_SCRIPTING>"endfor"               { RETTOKEN(T_ENDFOR);}
243 <ST_IN_SCRIPTING>"foreach"              { RETTOKEN(T_FOREACH);}
244 <ST_IN_SCRIPTING>"endforeach"           { RETTOKEN(T_ENDFOREACH);}
245 <ST_IN_SCRIPTING>"declare"              { RETTOKEN(T_DECLARE);}
246 <ST_IN_SCRIPTING>"enddeclare"           { RETTOKEN(T_ENDDECLARE);}
247 <ST_IN_SCRIPTING>"instanceof"           { RETTOKEN(T_INSTANCEOF);}
248 <ST_IN_SCRIPTING>"as"                   { RETTOKEN(T_AS);}
249 <ST_IN_SCRIPTING>"switch"               { RETTOKEN(T_SWITCH);}
250 <ST_IN_SCRIPTING>"endswitch"            { RETTOKEN(T_ENDSWITCH);}
251 <ST_IN_SCRIPTING>"case"                 { RETTOKEN(T_CASE);}
252 <ST_IN_SCRIPTING>"default"              { RETTOKEN(T_DEFAULT);}
253 <ST_IN_SCRIPTING>"break"                { RETTOKEN(T_BREAK);}
254 <ST_IN_SCRIPTING>"continue"             { RETTOKEN(T_CONTINUE);}
255 <ST_IN_SCRIPTING>"goto"                 { RETTOKEN(T_GOTO);}
256 <ST_IN_SCRIPTING>"echo"                 { RETTOKEN(T_ECHO);}
257 <ST_IN_SCRIPTING>"print"                { RETTOKEN(T_PRINT);}
258 <ST_IN_SCRIPTING>"class"                { RETTOKEN(T_CLASS);}
259 <ST_IN_SCRIPTING>"interface"            { RETTOKEN(T_INTERFACE);}
260 <ST_IN_SCRIPTING>"trait"                { RETTOKEN(T_TRAIT);}
261 <ST_IN_SCRIPTING>"insteadof"            { RETTOKEN(T_INSTEADOF);}
262 <ST_IN_SCRIPTING>"extends"              { RETTOKEN(T_EXTENDS);}
263 <ST_IN_SCRIPTING>"implements"           { RETTOKEN(T_IMPLEMENTS);}
264 <ST_IN_SCRIPTING>"attribute"            { XHP_ONLY_KEYWORD(T_XHP_ATTRIBUTE); }
265 <ST_IN_SCRIPTING>"category"             { XHP_ONLY_KEYWORD(T_XHP_CATEGORY); }
266 <ST_IN_SCRIPTING>"children"             { XHP_ONLY_KEYWORD(T_XHP_CHILDREN); }
267 <ST_IN_SCRIPTING>"required"             { XHP_ONLY_KEYWORD(T_XHP_REQUIRED); }
268 <ST_IN_SCRIPTING>"enum"                 { XHP_ONLY_KEYWORD(T_XHP_ENUM); }
270 <ST_IN_SCRIPTING>"->" {
271         STEPPOS(T_OBJECT_OPERATOR);
272         yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
273         return T_OBJECT_OPERATOR;
276 <ST_LOOKING_FOR_PROPERTY>"->" {
277         RETSTEP(T_OBJECT_OPERATOR);
280 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
281         SETTOKEN(T_STRING);
282         yy_pop_state(yyscanner);
283         return T_STRING;
286 <ST_LOOKING_FOR_PROPERTY>{WHITESPACE} {
287         RETSTEP(T_WHITESPACE);
290 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
291         yyless(0);
292         yy_pop_state(yyscanner);
295 <ST_IN_SCRIPTING>"::"                { RETSTEP(T_DOUBLE_COLON);}
296 <ST_IN_SCRIPTING>"\\"                { RETTOKEN(T_NS_SEPARATOR);}
297 <ST_IN_SCRIPTING>"new"               { RETTOKEN(T_NEW);}
298 <ST_IN_SCRIPTING>"clone"             { RETTOKEN(T_CLONE);}
299 <ST_IN_SCRIPTING>"var"               { RETTOKEN(T_VAR);}
301 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
302   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
303     RETSTEP(T_INT_CAST);
304   }
305   yyless(1);
306   RETSTEP('(');
309 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
310   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
311     RETSTEP(T_DOUBLE_CAST);
312   }
313   yyless(1);
314   RETSTEP('(');
317 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
318   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
319     RETSTEP(T_STRING_CAST);
320   }
321   yyless(1);
322   RETSTEP('(');
325 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
326   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
327     RETSTEP(T_ARRAY_CAST);
328   }
329   yyless(1);
330   RETSTEP('(');
333 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
334   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
335     RETSTEP(T_OBJECT_CAST);
336   }
337   yyless(1);
338   RETSTEP('(');
341 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
342   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
343     RETSTEP(T_BOOL_CAST);
344   }
345   yyless(1);
346   RETSTEP('(');
349 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
350   if (_scanner->lastToken() != T_FUNCTION || !_scanner->isHHSyntaxEnabled()) {
351     RETSTEP(T_UNSET_CAST);
352   }
353   yyless(1);
354   RETSTEP('(');
357 <ST_IN_SCRIPTING>"eval"               { RETTOKEN(T_EVAL);}
358 <ST_IN_SCRIPTING>"include"            { RETTOKEN(T_INCLUDE);}
359 <ST_IN_SCRIPTING>"include_once"       { RETTOKEN(T_INCLUDE_ONCE);}
360 <ST_IN_SCRIPTING>"require"            { RETTOKEN(T_REQUIRE);}
361 <ST_IN_SCRIPTING>"require_once"       { RETTOKEN(T_REQUIRE_ONCE);}
362 <ST_IN_SCRIPTING>"namespace"          { RETTOKEN(T_NAMESPACE);}
363 <ST_IN_SCRIPTING>"use"                { RETTOKEN(T_USE);}
364 <ST_IN_SCRIPTING>"global"             { RETTOKEN(T_GLOBAL);}
365 <ST_IN_SCRIPTING>"isset"              { RETTOKEN(T_ISSET);}
366 <ST_IN_SCRIPTING>"empty"              { RETTOKEN(T_EMPTY);}
367 <ST_IN_SCRIPTING>"__halt_compiler"    { RETTOKEN(T_HALT_COMPILER);}
368 <ST_IN_SCRIPTING>"__compiler_halt_offset__" { RETTOKEN(T_COMPILER_HALT_OFFSET);}
369 <ST_IN_SCRIPTING>"static"             { RETTOKEN(T_STATIC);}
370 <ST_IN_SCRIPTING>"abstract"           { RETTOKEN(T_ABSTRACT);}
371 <ST_IN_SCRIPTING>"final"              { RETTOKEN(T_FINAL);}
372 <ST_IN_SCRIPTING>"private"            { RETTOKEN(T_PRIVATE);}
373 <ST_IN_SCRIPTING>"protected"          { RETTOKEN(T_PROTECTED);}
374 <ST_IN_SCRIPTING>"public"             { RETTOKEN(T_PUBLIC);}
375 <ST_IN_SCRIPTING>"unset"              { RETTOKEN(T_UNSET);}
376 <ST_IN_SCRIPTING>"=>"                 { RETSTEP(T_DOUBLE_ARROW);}
377 <ST_IN_SCRIPTING>"list"               { RETTOKEN(T_LIST);}
378 <ST_IN_SCRIPTING>"array"              { RETTOKEN(T_ARRAY);}
379 <ST_IN_SCRIPTING>"++"                 { RETSTEP(T_INC);}
380 <ST_IN_SCRIPTING>"--"                 { RETSTEP(T_DEC);}
381 <ST_IN_SCRIPTING>"==="                { RETSTEP(T_IS_IDENTICAL);}
382 <ST_IN_SCRIPTING>"!=="                { RETSTEP(T_IS_NOT_IDENTICAL);}
383 <ST_IN_SCRIPTING>"=="                 { RETSTEP(T_IS_EQUAL);}
384 <ST_IN_SCRIPTING>"!="|"<>"            { RETSTEP(T_IS_NOT_EQUAL);}
385 <ST_IN_SCRIPTING>"<="                 { RETSTEP(T_IS_SMALLER_OR_EQUAL);}
386 <ST_IN_SCRIPTING>">="                 { RETSTEP(T_IS_GREATER_OR_EQUAL);}
387 <ST_IN_SCRIPTING>"+="                 { RETSTEP(T_PLUS_EQUAL);}
388 <ST_IN_SCRIPTING>"-="                 { RETSTEP(T_MINUS_EQUAL);}
389 <ST_IN_SCRIPTING>"*="                 { RETSTEP(T_MUL_EQUAL);}
390 <ST_IN_SCRIPTING>"/="                 { RETSTEP(T_DIV_EQUAL);}
391 <ST_IN_SCRIPTING>".="                 { RETSTEP(T_CONCAT_EQUAL);}
392 <ST_IN_SCRIPTING>"%="                 { RETSTEP(T_MOD_EQUAL);}
393 <ST_IN_SCRIPTING>"<<="                { RETSTEP(T_SL_EQUAL);}
394 <ST_IN_SCRIPTING>">>="                { RETSTEP(T_SR_EQUAL);}
395 <ST_IN_SCRIPTING>"&="                 { RETSTEP(T_AND_EQUAL);}
396 <ST_IN_SCRIPTING>"|="                 { RETSTEP(T_OR_EQUAL);}
397 <ST_IN_SCRIPTING>"^="                 { RETSTEP(T_XOR_EQUAL);}
398 <ST_IN_SCRIPTING>"||"                 { RETSTEP(T_BOOLEAN_OR);}
399 <ST_IN_SCRIPTING>"&&"                 { RETSTEP(T_BOOLEAN_AND);}
400 <ST_IN_SCRIPTING>"OR"                 { RETTOKEN(T_LOGICAL_OR);}
401 <ST_IN_SCRIPTING>"AND"                { RETTOKEN(T_LOGICAL_AND);}
402 <ST_IN_SCRIPTING>"XOR"                { RETTOKEN(T_LOGICAL_XOR);}
403 <ST_IN_SCRIPTING>"<<"                 { RETSTEP(T_SL);}
405 <ST_IN_SCRIPTING>"shape"              { HH_ONLY_KEYWORD(T_SHAPE); }
406 <ST_IN_SCRIPTING>"type"               { HH_ONLY_KEYWORD(T_UNRESOLVED_TYPE); }
407 <ST_IN_SCRIPTING>"newtype"            { HH_ONLY_KEYWORD(T_UNRESOLVED_NEWTYPE); }
408 <ST_IN_SCRIPTING>"await"              { HH_ONLY_KEYWORD(T_AWAIT);}
409 <ST_IN_SCRIPTING>"async"/{WHITESPACE_AND_COMMENTS}[a-zA-Z0-9_\x7f-\xff] {
410   HH_ONLY_KEYWORD(T_ASYNC);
413 <ST_IN_SCRIPTING>"tuple"/("("|{WHITESPACE_AND_COMMENTS}"(") {
414   HH_ONLY_KEYWORD(T_TUPLE);
417 <ST_IN_SCRIPTING>"?"/":"[a-zA-Z_\x7f-\xff] {
418   int ntt = getNextTokenType(_scanner->lastToken());
419   if (!_scanner->isXHPSyntaxEnabled() ||
420       ((ntt & NextTokenType::XhpClassName) && _scanner->lastToken() != '}')) {
421     RETSTEP('?');
422   }
423   /* If XHP is enabled and "?:" occurs in a place where an XHP class name is
424      not expected or it occurs after "}", drop into the ST_LOOKING_FOR_COLON
425      state to avoid potentially treating ":" as the beginning of an XHP class
426      name */
427   BEGIN(ST_LOOKING_FOR_COLON);
428   RETSTEP('?');
431 <ST_LOOKING_FOR_COLON>":" {
432   BEGIN(ST_IN_SCRIPTING);
433   RETSTEP(':');
436 <ST_IN_SCRIPTING>"..." {
437   if (!_scanner->isHHSyntaxEnabled()) {
438     yyless(1);
439     RETSTEP('.');
440   }
441   RETTOKEN(T_VARARG);
444 <ST_IN_SCRIPTING>">>" {
445   if (_scanner->getLookaheadLtDepth() < 2) {
446     RETSTEP(T_SR);
447   }
448   yyless(1);
449   RETSTEP('>');
452 <ST_IN_SCRIPTING>"<"[a-zA-Z_\x7f-\xff] {
453   if (!_scanner->isXHPSyntaxEnabled()) {
454     assert(!_scanner->isHHSyntaxEnabled());
455     yyless(1);
456     RETSTEP('<');
457   }
458   int ntt = getNextTokenType(_scanner->lastToken());
459   if (ntt & NextTokenType::XhpTag) {
460     yyless(1);
461     STEPPOS(T_XHP_TAG_LT);
462     yy_push_state(ST_XHP_IN_TAG, yyscanner);
463     return T_XHP_TAG_LT;
464   }
465   if (ntt & NextTokenType::XhpTagMaybe) {
466     // Shift to state state ST_LT_CHECK to do a more extensive check to
467     // determine if this is the beginning of an XHP tag.
468     yyless(0);
469     BEGIN(ST_LT_CHECK);
470     break;
471   }
472   yyless(1);
473   if (_scanner->isHHSyntaxEnabled() && (ntt & NextTokenType::TypeListMaybe)) {
474     // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
475     // to resolve this.
476     RETSTEP(T_UNRESOLVED_LT);
477   }
478   RETSTEP('<');
481 <ST_IN_SCRIPTING>"<" {
482   if (_scanner->isHHSyntaxEnabled()) {
483     int ntt = getNextTokenType(_scanner->lastToken());
484     if (ntt & NextTokenType::TypeListMaybe) {
485       // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
486       // to resolve this.
487       RETSTEP(T_UNRESOLVED_LT);
488     }
489   }
490   RETSTEP('<');
493 <ST_LT_CHECK>"<"{XHPLABEL}(">"|"/>"|{WHITESPACE_AND_COMMENTS}(">"|"/>"|[a-zA-Z_\x7f-\xff])) {
494   BEGIN(ST_IN_SCRIPTING);
495   yyless(1);
496   STEPPOS(T_XHP_TAG_LT);
497   yy_push_state(ST_XHP_IN_TAG, yyscanner);
498   return T_XHP_TAG_LT;
501 <ST_LT_CHECK>"<" {
502   BEGIN(ST_IN_SCRIPTING);
503   RETSTEP('<');
506 <ST_IN_SCRIPTING>":"{XHPLABEL}  {
507   if (_scanner->isXHPSyntaxEnabled()) {
508     int ntt = getNextTokenType(_scanner->lastToken());
509     if (ntt & NextTokenType::XhpClassName) {
510       yytext++; yyleng--; // skipping the first colon
511       RETTOKEN(T_XHP_LABEL);
512     }
513   }
514   yyless(1);
515   RETSTEP(':');
518 <ST_IN_SCRIPTING>"%"{XHPLABEL}  {
519   if (_scanner->isXHPSyntaxEnabled()) {
520     int ntt = getNextTokenType(_scanner->lastToken());
521     if (ntt & NextTokenType::XhpCategoryName) {
522       yytext++; yyleng--; // skipping "%"
523       RETTOKEN(T_XHP_CATEGORY_LABEL);
524     }
525   }
526   yyless(1);
527   RETSTEP('%');
530 <ST_IN_SCRIPTING>{TOKENS}             {RETSTEP(yytext[0]);}
532 <ST_IN_SCRIPTING>"{" {
533         STEPPOS('{');
534         yy_push_state(ST_IN_SCRIPTING, yyscanner);
535         return '{';
538 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
539         STEPPOS(T_DOLLAR_OPEN_CURLY_BRACES);
540         yy_push_state(ST_LOOKING_FOR_VARNAME, yyscanner);
541         return T_DOLLAR_OPEN_CURLY_BRACES;
544 <ST_IN_SCRIPTING>"}"/":"[a-zA-Z_\x7f-\xff] {
545         STEPPOS('}');
546         // We need to be robust against a '}' in PHP code with
547         // no corresponding '{'
548         struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
549         if (yyg->yy_start_stack_ptr) {
550           yy_pop_state(yyscanner);
551           if (YY_START == ST_IN_SCRIPTING) {
552             /* If XHP is enabled and "}:" occurs (and "}" does not cause us
553                to transition to some state other than ST_IN_SCRIPTING), drop
554                into the ST_LOOKING_FOR_COLON state to avoid potentially
555                treating ":" as the beginning of an XHP class name */
556             BEGIN(ST_LOOKING_FOR_COLON);
557           }
558         }
559         return '}';
562 <ST_IN_SCRIPTING>"}" {
563         STEPPOS('}');
564         // We need to be robust against a '}' in PHP code with
565         // no corresponding '{'
566         struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
567         if (yyg->yy_start_stack_ptr) yy_pop_state(yyscanner);
568         return '}';
571 <ST_LOOKING_FOR_VARNAME>{LABEL} {
572         SETTOKEN(T_STRING_VARNAME);
573         // Change state to IN_SCRIPTING; current state will be popped
574         // when we encounter '}'
575         BEGIN(ST_IN_SCRIPTING);
576         return T_STRING_VARNAME;
579 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
580         yyless(0);
581         // Change state to IN_SCRIPTING; current state will be popped
582         // when we encounter '}'
583         BEGIN(ST_IN_SCRIPTING);
586 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{LNUM} {
587         errno = 0;
588         long ret = strtoll(yytext, NULL, 0);
589         if (errno == ERANGE || ret < 0) {
590                 _scanner->error("Dec number is too big: %s", yytext);
591                 if (_scanner->isHHFile()) {
592                         RETTOKEN(T_HH_ERROR);
593                 }
594         }
595         RETTOKEN(T_LNUMBER);
598 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{HNUM} {
599         errno = 0;
600         long ret = strtoull(yytext, NULL, 16);
601         if (errno == ERANGE || ret < 0) {
602                 _scanner->error("Hex number is too big: %s", yytext);
603                 if (_scanner->isHHFile()) {
604                         RETTOKEN(T_HH_ERROR);
605                 }
606         }
607         RETTOKEN(T_LNUMBER);
610 <ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */
611         errno = 0;
612         long ret = strtoll(yytext, NULL, 0);
613         if (ret == LLONG_MAX && errno == ERANGE) {
614                 _scanner->error("Offset number is too big: %s", yytext);
615                 if (_scanner->isHHFile()) {
616                         RETTOKEN(T_HH_ERROR);
617                 }
618         }
619         RETTOKEN(T_NUM_STRING);
622 <ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
623         RETTOKEN(T_NUM_STRING);
626 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{DNUM}|{EXPONENT_DNUM} {
627         RETTOKEN(T_DNUMBER);
630 <ST_IN_SCRIPTING>"__CLASS__"            { RETTOKEN(T_CLASS_C); }
631 <ST_IN_SCRIPTING>"__TRAIT__"            { RETTOKEN(T_TRAIT_C); }
632 <ST_IN_SCRIPTING>"__FUNCTION__"         { RETTOKEN(T_FUNC_C); }
633 <ST_IN_SCRIPTING>"__METHOD__"           { RETTOKEN(T_METHOD_C);}
634 <ST_IN_SCRIPTING>"__LINE__"             { RETTOKEN(T_LINE); }
635 <ST_IN_SCRIPTING>"__FILE__"             { RETTOKEN(T_FILE); }
636 <ST_IN_SCRIPTING>"__DIR__"              { RETTOKEN(T_DIR); }
637 <ST_IN_SCRIPTING>"__NAMESPACE__"        { RETTOKEN(T_NS_C); }
639 <INITIAL>"#"[^\n]*"\n" {
640         _scanner->setHashBang(yytext, yyleng, T_INLINE_HTML);
641         BEGIN(ST_IN_SCRIPTING);
642         yy_push_state(ST_AFTER_HASHBANG, yyscanner);
643         return T_INLINE_HTML;
646 <INITIAL>(([^<#]|"<"[^?%s<]){1,400})|"<s"|"<" {
647         SETTOKEN(T_INLINE_HTML);
648         BEGIN(ST_IN_SCRIPTING);
649         yy_push_state(ST_IN_HTML, yyscanner);
650         return T_INLINE_HTML;
653 <ST_IN_HTML,ST_AFTER_HASHBANG>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
654         SETTOKEN(T_INLINE_HTML);
655         BEGIN(ST_IN_HTML);
656         return T_INLINE_HTML;
659 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<?"|("<?php"([ \t]|{NEWLINE}))|"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"\'php\'"){WHITESPACE}*">" {
660         if (_scanner->shortTags() || yyleng > 2) {
661           SETTOKEN(T_OPEN_TAG);
662           if (YY_START == INITIAL) {
663             BEGIN(ST_IN_SCRIPTING);
664           } else {
665             yy_pop_state(yyscanner);
666           }
667           return T_OPEN_TAG;
668         } else {
669           SETTOKEN(T_INLINE_HTML);
670           if (YY_START == INITIAL) {
671             BEGIN(ST_IN_SCRIPTING);
672             yy_push_state(ST_IN_HTML, yyscanner);
673           } else if (YY_START == ST_AFTER_HASHBANG) {
674             BEGIN(ST_IN_HTML);
675           }
676           return T_INLINE_HTML;
677         }
680 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%="|"<?=" {
681         if ((yytext[1]=='%' && _scanner->aspTags()) ||
682             (yytext[1]=='?' && _scanner->shortTags())) {
683           if (YY_START == INITIAL) {
684             BEGIN(ST_IN_SCRIPTING);
685           } else {
686             yy_pop_state(yyscanner);
687           }
688           RETTOKEN(T_ECHO); //return T_OPEN_TAG_WITH_ECHO;
689         } else {
690           if (YY_START == INITIAL) {
691             BEGIN(ST_IN_SCRIPTING);
692             yy_push_state(ST_IN_HTML, yyscanner);
693           } else if (YY_START == ST_AFTER_HASHBANG) {
694             BEGIN(ST_IN_HTML);
695           }
696           RETTOKEN(T_INLINE_HTML);
697         }
700 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%" {
701         if (_scanner->aspTags()) {
702           if (YY_START == INITIAL) {
703             BEGIN(ST_IN_SCRIPTING);
704           } else {
705             yy_pop_state(yyscanner);
706           }
707           RETTOKEN(T_OPEN_TAG);
708         } else {
709           if (YY_START == INITIAL) {
710             BEGIN(ST_IN_SCRIPTING);
711             yy_push_state(ST_IN_HTML, yyscanner);
712           } else if (YY_START == ST_AFTER_HASHBANG) {
713             BEGIN(ST_IN_HTML);
714           }
715           RETTOKEN(T_INLINE_HTML);
716         }
719 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<?hh"([ \t]|{NEWLINE}) {
720         if (YY_START == INITIAL) {
721           BEGIN(ST_IN_SCRIPTING);
722         } else if (YY_START == ST_AFTER_HASHBANG) {
723           yy_pop_state(yyscanner);
724         } else {
725           _scanner->error("HH mode: content before <?hh");
726           return T_HH_ERROR;
727         }
728         STEPPOS(T_OPEN_TAG);
729         _scanner->setHHFile();
730         return T_OPEN_TAG;
733 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
734         _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
735         return T_VARIABLE;
738 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
739         yyless(yyleng - 3);
740         yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
741         _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
742         return T_VARIABLE;
745 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
746         yyless(yyleng - 1);
747         yy_push_state(ST_VAR_OFFSET, yyscanner);
748         _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
749         return T_VARIABLE;
752 <ST_VAR_OFFSET>"]" {
753         yy_pop_state(yyscanner);
754         return ']';
757 <ST_VAR_OFFSET>{TOKENS}|[{}\"`] {
758         /* Only '[' can be valid, but returning other tokens will allow
759            a more explicit parse error */
760         return yytext[0];
763 <ST_VAR_OFFSET>[ \n\r\t\\\'#] {
764         /* Invalid rule to return a more explicit parse error with proper
765            line number */
766         yyless(0);
767         yy_pop_state(yyscanner);
768         RETSTEP(T_ENCAPSED_AND_WHITESPACE);
771 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
772         RETTOKEN(T_STRING);
775 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{WHITESPACE} {
776         RETSTEP(T_WHITESPACE);
779 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>"#"|"//" {
780         yy_push_state(ST_ONE_LINE_COMMENT, yyscanner);
781         yymore();
784 <ST_ONE_LINE_COMMENT>"?"|"%"|">" {
785         yymore();
788 <ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} {
789         switch (yytext[yyleng-1]) {
790         case '?':
791         case '%':
792         case '>':
793                 yyless(yyleng-1);
794                 yymore();
795                 break;
796         default:
797                 STEPPOS(T_COMMENT);
798                 yy_pop_state(yyscanner);
799                 return T_COMMENT;
800         }
803 <ST_ONE_LINE_COMMENT>{NEWLINE} {
804         STEPPOS(T_COMMENT);
805         yy_pop_state(yyscanner);
806         return T_COMMENT;
809 <ST_ONE_LINE_COMMENT>"?>"|"%>" {
810         if (_scanner->isHHFile()) {
811           _scanner->error("HH mode: ?> not allowed");
812           return T_HH_ERROR;
813         }
814         if (_scanner->aspTags() || yytext[yyleng-2] != '%') {
815           _scanner->setToken(yytext, yyleng-2, yytext, yyleng-2, T_COMMENT);
816                 yyless(yyleng-2);
817                 yy_pop_state(yyscanner);
818                 return T_COMMENT;
819         } else {
820                 yymore();
821         }
824 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>"/**"{WHITESPACE} {
825         yy_push_state(ST_DOC_COMMENT, yyscanner);
826         yymore();
829 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>"/*" {
830         yy_push_state(ST_COMMENT, yyscanner);
831         yymore();
834 <ST_COMMENT,ST_DOC_COMMENT>[^*]+ {
835         yymore();
838 <ST_DOC_COMMENT>"*/" {
839         SETTOKEN(T_DOC_COMMENT);
840         yy_pop_state(yyscanner);
841         return T_DOC_COMMENT;
844 <ST_COMMENT>"*/" {
845         STEPPOS(T_COMMENT);
846         yy_pop_state(yyscanner);
847         return T_COMMENT;
850 <ST_COMMENT,ST_DOC_COMMENT>"*" {
851         yymore();
854 <ST_XHP_COMMENT>[^-]+ {
855         yymore();
858 <ST_XHP_COMMENT>"-->" {
859         STEPPOS(T_COMMENT);
860         yy_pop_state(yyscanner);
861         return T_COMMENT;
864 <ST_XHP_COMMENT>"-" {
865         yymore();
868 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
869         if (_scanner->isHHFile()) {
870           _scanner->error("HH mode: ?> not allowed");
871           return T_HH_ERROR;
872         }
873         yy_push_state(ST_IN_HTML, yyscanner);
874         if (_scanner->full()) {
875           RETSTEP(T_CLOSE_TAG);
876         } else {
877           RETSTEP(';');
878         }
881 <ST_IN_SCRIPTING>"</script"{WHITESPACE}*">"{NEWLINE}? {
882         yy_push_state(ST_IN_HTML, yyscanner);
883         if (_scanner->full()) {
884           RETSTEP(T_CLOSE_TAG);
885         } else {
886           RETSTEP(';');
887         }
890 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
891         if (_scanner->aspTags()) {
892                 yy_push_state(ST_IN_HTML, yyscanner);
893                 if (_scanner->full()) {
894                   RETSTEP(T_CLOSE_TAG);
895                 } else {
896                   RETSTEP(';');
897                 }
898         } else {
899                 yyless(1);
900                 _scanner->setToken(yytext, 1, yytext, 1);
901                 RETSTEP(yytext[0]);
902         }
905 <ST_IN_SCRIPTING>(b?[\"]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)[\"]) {
906         int bprefix = (yytext[0] != '"') ? 1 : 0;
907         std::string strval =
908           _scanner->escape(yytext + bprefix + 1,
909                            yyleng - bprefix - 2, '"');
910         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
911         return T_CONSTANT_ENCAPSED_STRING;
914 <ST_IN_SCRIPTING>(b?[\']([^\'\\]|("\\"{ANY_CHAR}))*[\']?) {
915         int bprefix = (yytext[0] != '\'') ? 1 : 0;
916         int closed = (yytext[yyleng - 1] == '\'');
917         std::string strval =
918           _scanner->escape(yytext + bprefix + 1,
919                            yyleng - bprefix - 2, '\'');
920         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
921         return closed ? T_CONSTANT_ENCAPSED_STRING : T_ENCAPSED_AND_WHITESPACE;
924 <ST_IN_SCRIPTING>b?[\"] {
925         int bprefix = (yytext[0] != '"') ? 1 : 0;
926         _scanner->setToken(yytext, yyleng, yytext + bprefix, yyleng - bprefix);
927         BEGIN(ST_DOUBLE_QUOTES);
928         return '\"';
931 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|[']{LABEL}[']|["]{LABEL}["]){NEWLINE} {
932         int bprefix = (yytext[0] != '<') ? 1 : 0;
933         int label_len = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
934         char *s = yytext+bprefix+3;
935         while ((*s == ' ') || (*s == '\t')) {
936                 s++;
937                 label_len--;
938         }
939         if (*s == '\'') {
940                 s++;
941                 label_len -= 2;
942                 BEGIN(ST_NOWDOC);
943         } else {
944                 if (*s == '"') {
945                        s++;
946                        label_len -= 2;
947                 }
948                 BEGIN(ST_HEREDOC);
949         }
950         _scanner->setHeredocLabel(s, label_len);
951         _scanner->setToken(yytext, yyleng, s, label_len);
952         return T_START_HEREDOC;
955 <ST_IN_SCRIPTING>[`] {
956         STEPPOS('`');
957         BEGIN(ST_BACKQUOTE);
958         return '`';
961 <ST_XHP_IN_TAG>{XHPLABEL} {
962         RETTOKEN(T_XHP_LABEL);
965 <ST_XHP_IN_TAG>"=" {
966   RETSTEP(yytext[0]);
969 <ST_XHP_IN_TAG>["][^"]*["] {
970   _scanner->setToken(yytext, yyleng, yytext+1, yyleng-2);
971   return T_XHP_TEXT;
974 <ST_XHP_IN_TAG>[{] {
975   STEPPOS('{');
976   yy_push_state(ST_IN_SCRIPTING, yyscanner);
977   return '{';
980 <ST_XHP_IN_TAG>">" {
981   STEPPOS(T_XHP_TAG_GT);
982   BEGIN(ST_XHP_CHILD);
983   return T_XHP_TAG_GT;
986 <ST_XHP_IN_TAG>"/>" {
987   BEGIN(ST_XHP_END_SINGLETON_TAG);
988   yyless(1);
989   return '/';
992 <ST_XHP_IN_TAG>{ANY_CHAR} {
993   // This rule ensures we get a reasonable syntax error message
994   // when unexpected characters occur inside XHP tags
995   STEPPOS(yytext[0]);
996   _scanner->error("Unexpected character in input: '%c' (ASCII=%d)",
997                   yytext[0], yytext[0]);
998   return yytext[0];
1001 <ST_XHP_END_SINGLETON_TAG>">" {
1002   STEPPOS(T_XHP_TAG_GT);
1003   yy_pop_state(yyscanner);
1004   return T_XHP_TAG_GT;
1007 <ST_XHP_CHILD>"<!--" {
1008   yy_push_state(ST_XHP_COMMENT, yyscanner);
1009   yymore();
1012 <ST_XHP_CHILD>[^{<]+ {
1013   RETTOKEN(T_XHP_TEXT);
1016 <ST_XHP_CHILD>"{" {
1017   STEPPOS('{');
1018   yy_push_state(ST_IN_SCRIPTING, yyscanner);
1019   return '{';
1022 <ST_XHP_CHILD>"</" {
1023   BEGIN(ST_XHP_END_CLOSE_TAG);
1024   yyless(1);
1025   RETSTEP(T_XHP_TAG_LT);
1028 <ST_XHP_END_CLOSE_TAG>"/" {
1029   RETSTEP('/');
1032 <ST_XHP_END_CLOSE_TAG>{XHPLABEL} {
1033   RETTOKEN(T_XHP_LABEL);
1036 <ST_XHP_END_CLOSE_TAG>">" {
1037   STEPPOS(T_XHP_TAG_GT);
1038   yy_pop_state(yyscanner);
1039   return T_XHP_TAG_GT;
1042 <ST_XHP_CHILD>"<" {
1043   STEPPOS(T_XHP_TAG_LT);
1044   yy_push_state(ST_XHP_IN_TAG, yyscanner);
1045   return T_XHP_TAG_LT;
1048 <ST_HEREDOC,ST_NOWDOC>{ANY_CHAR} {
1049   int refillResult = EOB_ACT_CONTINUE_SCAN;
1050   std::vector<std::string> docPieces;
1051   size_t totalDocSize = 0;
1052   std::string entireDoc;
1053   int docLabelLen = _scanner->getHeredocLabelLen();
1054   bool isHeredoc = (YYSTATE == ST_HEREDOC);
1055   DECLARE_YYCURSOR;
1056   DECLARE_YYLIMIT;
1058   YYCURSOR--;
1060   // The rules that lead to this state all consume an end-of-line.
1061   bool lookingForEndLabel = true;
1063   while (refillResult == EOB_ACT_CONTINUE_SCAN) {
1064     while (YYCURSOR < YYLIMIT) {
1065       switch (*YYCURSOR++) {
1066         case '\r':
1067           lookingForEndLabel = true;
1068           continue;
1069         case '\n':
1070           lookingForEndLabel = true;
1071           continue;
1072         case '$':
1073           lookingForEndLabel = false;
1074           if (isHeredoc) {
1075             if (YYCURSOR == YYLIMIT) {
1076               --YYCURSOR;
1077               goto doc_scan_get_more_buffer;
1078             }
1079             if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
1080               --YYCURSOR;
1081               goto doc_scan_done;
1082             }
1083           }
1084           continue;
1085         case '{':
1086           lookingForEndLabel = false;
1087           if (isHeredoc) {
1088             if (YYCURSOR == YYLIMIT) {
1089               --YYCURSOR;
1090               goto doc_scan_get_more_buffer;
1091             }
1092             if (*YYCURSOR == '$') {
1093               --YYCURSOR;
1094               goto doc_scan_done;
1095             }
1096           }
1097           continue;
1098         case '\\':
1099           lookingForEndLabel = false;
1100           if (isHeredoc) {
1101             if (YYCURSOR == YYLIMIT) {
1102               --YYCURSOR;
1103               goto doc_scan_get_more_buffer;
1104             }
1105             if (*YYCURSOR != '\n' && *YYCURSOR != '\r') {
1106               YYCURSOR++;
1107             }
1108           }
1109           continue;
1110         default:
1111           if (lookingForEndLabel) {
1112             lookingForEndLabel = false;
1114             // Check for ending label on this line.
1115             if (!IS_LABEL_START(YYCURSOR[-1])) continue;
1117             // Adjust cursor to the start of the potential label.
1118             // If a label is recgonized, we want the cursor pointing at it.
1119             --YYCURSOR;
1121             if ((docLabelLen + 2) > (YYLIMIT - YYCURSOR)) {
1122               lookingForEndLabel = true;
1123               goto doc_scan_get_more_buffer;
1124             }
1126             if (!memcmp(YYCURSOR, _scanner->getHeredocLabel(), docLabelLen)) {
1127               const char *end = YYCURSOR + docLabelLen;
1128               if (*end == ';') {
1129                 end++;
1130               }
1131               if (*end == '\n' || *end == '\r') {
1132                 BEGIN(ST_END_HEREDOC);
1133                 goto doc_scan_done;
1134               }
1135             }
1136             ++YYCURSOR; // No label found, consume this character.
1137           }
1138           continue;
1139       }
1140     }
1142 doc_scan_get_more_buffer:
1143     // We ran off the end of the buffer, but no end label has been found.
1144     // Save off the string we have so far, re-fill the buffer, and repeat.
1145     yyleng = YYCURSOR - yytext;
1146     docPieces.emplace_back(yytext, yyleng);
1147     if (totalDocSize >= entireDoc.max_size() - yyleng) {
1148       _scanner->error("%sdoc too large", isHeredoc ? "Here" : "Now");
1149       return 0;
1150     }
1151     totalDocSize += yyleng;
1153     // yy_get_next_buffer() needs the text pointing at the data we want to keep
1154     // in the buffer, and the cursor pointing off the end. It will move what's
1155     // at yytext (if anything) to the beginning of the buffer and fill the rest
1156     // with new data.
1157     yytext = yytext + yyleng;
1158     yyleng = 0;
1159     YYCURSOR = YYLIMIT + 1;
1160     refillResult = yy_get_next_buffer(yyscanner);
1162     // Point to the beginning of the (possibly new) buffer.
1163     YYCURSOR = yyg->yy_c_buf_p = yytext;
1164     YYLIMIT = YY_CURRENT_BUFFER->yy_ch_buf + yyg->yy_n_chars;
1165   }
1167   _scanner->error("Unterminated %sdoc at end of file",
1168                   isHeredoc ? "here" : "now");
1169   return 0;
1171 doc_scan_done:
1172   yyleng = YYCURSOR - yytext;
1173   totalDocSize += yyleng;
1174   RESET_YYCURSOR;
1176   if (totalDocSize > 0) {
1177     entireDoc.reserve(totalDocSize);
1179     for (const auto& piece: docPieces) {
1180       entireDoc.append(piece);
1181     }
1183     if (yyleng > 0) {
1184       entireDoc.append(yytext, yyleng);
1185     }
1187     // Newline before label will be subtracted from returned text, but
1188     // raw text will include it, for zend_highlight/strip, tokenizer, etc.
1189     int newline = 0;
1190     bool endLabelFound = (YYSTATE == ST_END_HEREDOC);
1191     if (endLabelFound && (entireDoc.length() > 0)) {
1192       auto it = entireDoc.end();
1193       if (*--it == '\n') {
1194         ++newline;
1195         if ((entireDoc.length() > 1) && (*--it == '\r')) {
1196           ++newline;
1197         }
1198       }
1199     }
1201     if (isHeredoc) {
1202       std::string escapedDoc = _scanner->escape(entireDoc.c_str(),
1203                                                 entireDoc.length() - newline,
1204                                                 0);
1205       _scanner->setToken(entireDoc.c_str(), entireDoc.length(),
1206                          escapedDoc.c_str(), escapedDoc.length());
1207     } else {
1208       _scanner->setToken(entireDoc.c_str(), entireDoc.length(),
1209                          entireDoc.c_str(), entireDoc.length() - newline);
1210     }
1211     return T_ENCAPSED_AND_WHITESPACE;
1212   } else {
1213     // No data before the label means we just go right to ST_END_HEREDOC
1214     // without forming a new token.
1215   }
1218 <ST_END_HEREDOC>{LABEL} {
1219         BEGIN(ST_IN_SCRIPTING);
1220         RETSTEP(T_END_HEREDOC);
1223 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
1224         _scanner->setToken(yytext, 1, yytext, 1);
1225         yy_push_state(ST_IN_SCRIPTING, yyscanner);
1226         yyless(1);
1227         return T_CURLY_OPEN;
1230 <ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ {
1231         std::string strval = _scanner->escape(yytext, yyleng, '"');
1232         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
1233         return T_ENCAPSED_AND_WHITESPACE;
1236 <ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[\"])) {
1237         yyless(yyleng - 1);
1238         std::string strval = _scanner->escape(yytext, yyleng, '"');
1239         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
1240         return T_ENCAPSED_AND_WHITESPACE;
1243 <ST_BACKQUOTE>{BACKQUOTE_CHARS}+ {
1244         std::string strval = _scanner->escape(yytext, yyleng, '`');
1245         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
1246         return T_ENCAPSED_AND_WHITESPACE;
1249 <ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) {
1250         yyless(yyleng - 1);
1251         std::string strval = _scanner->escape(yytext, yyleng, '`');
1252         _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length());
1253         return T_ENCAPSED_AND_WHITESPACE;
1256 <ST_DOUBLE_QUOTES>[\"] {
1257         BEGIN(ST_IN_SCRIPTING);
1258         return '"';
1261 <ST_BACKQUOTE>[\`] {
1262         BEGIN(ST_IN_SCRIPTING);
1263         return '`';
1266 <ST_COMMENT,ST_DOC_COMMENT><<EOF>> {
1267         _scanner->error("Unterminated comment at end of file");
1268         return 0;
1271 <*>{ANY_CHAR} {
1272         _scanner->error("Unexpected character in input: '%c' (ASCII=%d)",
1273                         yytext[0], yytext[0]);
1278 namespace HPHP {
1279   void Scanner::init() {
1280     yylex_init_extra(this, &m_yyscanner);
1281     struct yyguts_t *yyg = (struct yyguts_t *)m_yyscanner;
1282     BEGIN(INITIAL);
1283   }
1285   int Scanner::scan() {
1286     return yylex(m_token, m_loc, m_yyscanner);
1287   }
1289   void Scanner::reset() {
1290     void *yyscanner = (void *)m_yyscanner;
1291     struct yyguts_t *yyg = (struct yyguts_t *)m_yyscanner;
1292     YY_FLUSH_BUFFER;
1293     yylex_destroy(m_yyscanner);
1294   }
1296   static void suppress_unused_errors() {
1297     yyunput(0,0,0);
1298     yy_top_state(0);
1299     suppress_unused_errors();
1300   }
1303 extern "C" {
1304   int yywrap(yyscan_t yyscanner) {
1305     return 1;
1306   }