2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_PARSER_SCANNER_H_
18 #define incl_HPHP_PARSER_SCANNER_H_
27 #include <folly/portability/String.h>
28 #include <folly/portability/Unistd.h>
30 #include "hphp/util/exception.h"
31 #include "hphp/util/portability.h"
32 #include "hphp/parser/location.h"
33 #include "hphp/parser/hphp.tab.hpp"
35 #ifndef YY_TYPEDEF_YY_SIZE_T
36 #define YY_TYPEDEF_YY_SIZE_T
37 typedef size_t yy_size_t
;
41 ///////////////////////////////////////////////////////////////////////////////
55 void setNum(TokenID num
) {
59 void set(TokenID num
, const char* t
) {
64 void set(TokenID num
, const std::string
& t
) {
69 void operator++(TokenID
) {
73 ScannerToken
& operator=(const ScannerToken
& other
) {
75 m_text
= other
.m_text
;
79 const std::string
& text() const {
83 bool same(const char* s
) const {
84 return strcasecmp(m_text
.c_str(), s
) == 0;
87 void setText(const char* t
, int len
) {
88 m_text
= std::string(t
, len
);
91 void setText(const char* t
) {
95 void setText(const std::string
& t
) {
99 void setText(const ScannerToken
& token
) {
100 m_text
= token
.m_text
;
111 void xhpLabel(bool prefix
= true);
112 bool htmlTrim(); // true if non-empty after trimming
113 void xhpDecode(); // xhp supports more entities than html
116 /* Internal token id. */
122 struct LookaheadToken
{
128 struct LookaheadSlab
{
129 static const int SlabSize
= 32;
130 LookaheadToken m_data
[SlabSize
];
133 LookaheadSlab
* m_next
;
137 LookaheadSlab
* m_head
;
138 LookaheadSlab
* m_tail
;
144 LookaheadSlab
* s
= m_head
;
153 return !m_head
|| (m_head
->m_beginPos
== m_head
->m_endPos
);
156 LookaheadSlab
* m_slab
;
158 const LookaheadToken
& operator*() const {
159 return m_slab
->m_data
[m_pos
];
161 LookaheadToken
& operator*() {
162 return m_slab
->m_data
[m_pos
];
164 const LookaheadToken
* operator->() const {
165 return m_slab
->m_data
+ m_pos
;
167 LookaheadToken
* operator->() {
168 return m_slab
->m_data
+ m_pos
;
173 if (m_pos
< m_slab
->m_endPos
) return;
174 m_slab
= m_slab
->m_next
;
176 m_pos
= m_slab
->m_beginPos
;
179 iterator
& operator++() {
183 iterator
operator++(int) {
188 bool operator==(const iterator
& it
) const {
189 if (m_slab
!= it
.m_slab
) return false;
190 if (!m_slab
) return true;
191 return (m_pos
== it
.m_pos
);
197 iterator
appendNew();
200 ///////////////////////////////////////////////////////////////////////////////
204 AllowShortTags
= 0x01, // allow <?
205 AllowAspTags
= 0x02, // allow <% %>
206 ReturnAllTokens
= 0x04, // return comments and whitespaces
207 AllowXHPSyntax
= 0x08, // allow XHP syntax
208 AllowHipHopSyntax
= 0x18, // allow HipHop-specific syntax (which
209 // includes XHP syntax)
213 Scanner(const std::string
& filename
, int type
, bool md5
= false);
214 Scanner(std::istream
&stream
, int type
, const char *fileName
= "",
216 Scanner(const char *source
, int len
, int type
, const char *fileName
= "",
220 const std::string
&getMd5() const {
224 int scanToken(ScannerToken
&t
, Location
&l
);
225 int fetchToken(ScannerToken
&t
, Location
&l
);
226 void nextLookahead(TokenStore::iterator
& pos
);
227 bool tryParseNSType(TokenStore::iterator
& pos
);
228 bool tryParseTypeList(TokenStore::iterator
& pos
);
229 bool tryParseFuncTypeList(TokenStore::iterator
& pos
);
230 bool tryParseNonEmptyLambdaParams(TokenStore::iterator
& pos
);
231 void parseApproxParamDefVal(TokenStore::iterator
& pos
);
234 * Called by parser or tokenizer.
236 int getNextToken(ScannerToken
&t
, Location
&l
);
237 const std::string
&getError() const { return m_error
;}
238 Location
*getLocation() const { return m_loc
;}
241 * Implemented in hphp.x, as they need to call yy functions.
248 * Called by lex.yy.cpp for YY_INPUT (see hphp.x)
250 int read(char *text
, yy_size_t
&result
, yy_size_t max
);
251 // Overload for older versions of flex.
252 int read(char *text
, int &result
, yy_size_t max
);
255 * Called by scanner rules.
257 bool shortTags() const { return (m_type
& AllowShortTags
) == AllowShortTags
;}
258 bool aspTags() const { return (m_type
& AllowAspTags
) == AllowAspTags
;}
259 bool full() const { return (m_type
& ReturnAllTokens
) == ReturnAllTokens
;}
260 int lastToken() const { return m_lastToken
;}
261 void setToken(const char *rawText
, int rawLeng
, int type
= -1) {
262 m_token
->setText(rawText
, rawLeng
);
263 incLoc(rawText
, rawLeng
, type
);
265 void stepPos(const char *rawText
, int rawLeng
, int type
= -1) {
267 m_token
->setText(rawText
, rawLeng
);
269 incLoc(rawText
, rawLeng
, type
);
271 void setToken(const char *rawText
, int rawLeng
,
272 const char *ytext
, int yleng
, int type
= -1) {
274 m_token
->setText(rawText
, rawLeng
);
276 m_token
->setText(ytext
, yleng
);
278 incLoc(rawText
, rawLeng
, type
);
280 // also used for YY_FATAL_ERROR in hphp.x
281 void error(ATTRIBUTE_PRINTF_STRING
const char* fmt
, ...)
282 ATTRIBUTE_PRINTF(2,3);
283 void warn(ATTRIBUTE_PRINTF_STRING
const char* fmt
, ...)
284 ATTRIBUTE_PRINTF(2,3);
285 std::string
escape(const char *str
, int len
, char quote_type
) const;
288 * Called by scanner rules for doc comments.
290 void setDocComment(const char *ytext
, int yleng
) {
291 m_docComment
.assign(ytext
, yleng
);
293 void setDocComment(const std::string
& com
) {
296 std::string
detachDocComment() {
297 std::string dc
= m_docComment
;
298 m_docComment
.clear();
303 * Called by scanner rules for HEREDOC/NOWDOC.
305 void setHeredocLabel(const char *label
, int len
) {
306 m_heredocLabel
.assign(label
, len
);
308 int getHeredocLabelLen() const {
309 return m_heredocLabel
.length();
311 const char *getHeredocLabel() const {
312 return m_heredocLabel
.data();
314 void resetHeredoc() {
315 m_heredocLabel
.clear();
319 * Enables HipHop syntax for this file.
325 bool isHHFile() const {
329 bool isXHPSyntaxEnabled() const {
330 return ((m_type
& AllowXHPSyntax
) == AllowXHPSyntax
) || m_isHHFile
;
333 bool isHHSyntaxEnabled() const {
334 return ((m_type
& AllowHipHopSyntax
) == AllowHipHopSyntax
) || m_isHHFile
;
337 int getLookaheadLtDepth() {
338 return m_lookaheadLtDepth
;
342 bool tryParseShapeType(TokenStore::iterator
& pos
);
343 bool tryParseShapeMemberList(TokenStore::iterator
& pos
);
345 bool nextIfToken(TokenStore::iterator
& pos
, int tok
);
349 std::string m_filename
;
351 std::istream
*m_stream
;
352 std::stringstream m_sstream
; // XHP helper
353 const char *m_source
;
368 // These fields are used to temporarily hold pointers to token/location
369 // storage while the lexer is active to facilitate functions such as
370 // setToken() and incLoc()
371 ScannerToken
*m_token
;
375 std::string m_docComment
;
376 std::string m_heredocLabel
;
378 // fields for XHP parsing
380 void incLoc(const char *rawText
, int rawLeng
, int type
);
383 TokenStore m_lookahead
;
384 int m_lookaheadLtDepth
;
387 ///////////////////////////////////////////////////////////////////////////////
390 #endif // incl_HPHP_PARSER_SCANNER_H_