Drop unused tc_dump_helpers_addrs.txt file
[hiphop-php.git] / hphp / parser / scanner.h
blobdeefa12baba52c71e608cf120aade60c316485f4
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_PARSER_SCANNER_H_
18 #define incl_HPHP_PARSER_SCANNER_H_
20 #include <sstream>
21 #include <cstdint>
22 #include <string>
23 #include <limits>
24 #include <cstdlib>
25 #include <limits.h>
27 #include <folly/portability/String.h>
28 #include <folly/portability/Unistd.h>
30 #include "hphp/util/exception.h"
31 #include "hphp/util/portability.h"
32 #include "hphp/parser/location.h"
33 #include "hphp/parser/hphp.tab.hpp"
35 #ifndef YY_TYPEDEF_YY_SIZE_T
36 #define YY_TYPEDEF_YY_SIZE_T
37 typedef size_t yy_size_t;
38 #endif
40 namespace HPHP {
41 ///////////////////////////////////////////////////////////////////////////////
43 using TokenID = int;
45 struct ScannerToken {
46 void reset() {
47 m_num = 0;
48 m_text.clear();
51 TokenID num() const {
52 return m_num;
55 void setNum(TokenID num) {
56 m_num = num;
59 void set(TokenID num, const char* t) {
60 m_num = num;
61 m_text = t;
64 void set(TokenID num, const std::string& t) {
65 m_num = num;
66 m_text = t;
69 void operator++(TokenID) {
70 ++m_num;
73 ScannerToken& operator=(const ScannerToken& other) {
74 m_num = other.m_num;
75 m_text = other.m_text;
76 return *this;
79 const std::string& text() const {
80 return m_text;
83 bool same(const char* s) const {
84 return strcasecmp(m_text.c_str(), s) == 0;
87 void setText(const char* t, int len) {
88 m_text = std::string(t, len);
91 void setText(const char* t) {
92 m_text = t;
95 void setText(const std::string& t) {
96 m_text = t;
99 void setText(const ScannerToken& token) {
100 m_text = token.m_text;
103 bool check() const {
104 return m_check;
107 void setCheck() {
108 m_check = true;
111 void xhpLabel(bool prefix = true);
112 bool htmlTrim(); // true if non-empty after trimming
113 void xhpDecode(); // xhp supports more entities than html
115 protected:
116 /* Internal token id. */
117 TokenID m_num{0};
118 std::string m_text;
119 bool m_check{false};
122 struct LookaheadToken {
123 ScannerToken token;
124 Location loc;
125 int t;
128 struct LookaheadSlab {
129 static const int SlabSize = 32;
130 LookaheadToken m_data[SlabSize];
131 int m_beginPos;
132 int m_endPos;
133 LookaheadSlab* m_next;
136 struct TokenStore {
137 LookaheadSlab* m_head;
138 LookaheadSlab* m_tail;
139 TokenStore() {
140 m_head = nullptr;
141 m_tail = nullptr;
143 ~TokenStore() {
144 LookaheadSlab* s = m_head;
145 LookaheadSlab* next;
146 while (s) {
147 next = s->m_next;
148 delete s;
149 s = next;
152 bool empty() {
153 return !m_head || (m_head->m_beginPos == m_head->m_endPos);
155 struct iterator {
156 LookaheadSlab* m_slab;
157 int m_pos;
158 const LookaheadToken& operator*() const {
159 return m_slab->m_data[m_pos];
161 LookaheadToken& operator*() {
162 return m_slab->m_data[m_pos];
164 const LookaheadToken* operator->() const {
165 return m_slab->m_data + m_pos;
167 LookaheadToken* operator->() {
168 return m_slab->m_data + m_pos;
170 void next() {
171 if (!m_slab) return;
172 ++m_pos;
173 if (m_pos < m_slab->m_endPos) return;
174 m_slab = m_slab->m_next;
175 if (!m_slab) return;
176 m_pos = m_slab->m_beginPos;
177 return;
179 iterator& operator++() {
180 next();
181 return *this;
183 iterator operator++(int) {
184 iterator it = *this;
185 next();
186 return it;
188 bool operator==(const iterator& it) const {
189 if (m_slab != it.m_slab) return false;
190 if (!m_slab) return true;
191 return (m_pos == it.m_pos);
194 iterator begin();
195 iterator end();
196 void popFront();
197 iterator appendNew();
200 ///////////////////////////////////////////////////////////////////////////////
202 struct Scanner {
203 enum Type {
204 AllowShortTags = 0x01, // allow <?
205 AllowAspTags = 0x02, // allow <% %>
206 ReturnAllTokens = 0x04, // return comments and whitespaces
207 AllowXHPSyntax = 0x08, // allow XHP syntax
208 AllowHipHopSyntax = 0x18, // allow HipHop-specific syntax (which
209 // includes XHP syntax)
212 public:
213 Scanner(const std::string& filename, int type, bool md5 = false);
214 Scanner(std::istream &stream, int type, const char *fileName = "",
215 bool md5 = false);
216 Scanner(const char *source, int len, int type, const char *fileName = "",
217 bool md5 = false);
218 ~Scanner();
220 const std::string &getMd5() const {
221 return m_md5;
224 int scanToken(ScannerToken &t, Location &l);
225 int fetchToken(ScannerToken &t, Location &l);
226 void nextLookahead(TokenStore::iterator& pos);
227 bool tryParseNSType(TokenStore::iterator& pos);
228 bool tryParseTypeList(TokenStore::iterator& pos);
229 bool tryParseFuncTypeList(TokenStore::iterator& pos);
230 bool tryParseNonEmptyLambdaParams(TokenStore::iterator& pos);
231 void parseApproxParamDefVal(TokenStore::iterator& pos);
234 * Called by parser or tokenizer.
236 int getNextToken(ScannerToken &t, Location &l);
237 const std::string &getError() const { return m_error;}
238 Location *getLocation() const { return m_loc;}
241 * Implemented in hphp.x, as they need to call yy functions.
243 void init();
244 void reset();
245 int scan();
248 * Called by lex.yy.cpp for YY_INPUT (see hphp.x)
250 int read(char *text, yy_size_t &result, yy_size_t max);
251 // Overload for older versions of flex.
252 int read(char *text, int &result, yy_size_t max);
255 * Called by scanner rules.
257 bool shortTags() const { return (m_type & AllowShortTags) == AllowShortTags;}
258 bool aspTags() const { return (m_type & AllowAspTags) == AllowAspTags;}
259 bool full() const { return (m_type & ReturnAllTokens) == ReturnAllTokens;}
260 int lastToken() const { return m_lastToken;}
261 void setToken(const char *rawText, int rawLeng, int type = -1) {
262 m_token->setText(rawText, rawLeng);
263 incLoc(rawText, rawLeng, type);
265 void stepPos(const char *rawText, int rawLeng, int type = -1) {
266 if (shortTags()) {
267 m_token->setText(rawText, rawLeng);
269 incLoc(rawText, rawLeng, type);
271 void setToken(const char *rawText, int rawLeng,
272 const char *ytext, int yleng, int type = -1) {
273 if (full()) {
274 m_token->setText(rawText, rawLeng);
275 } else {
276 m_token->setText(ytext, yleng);
278 incLoc(rawText, rawLeng, type);
280 // also used for YY_FATAL_ERROR in hphp.x
281 void error(ATTRIBUTE_PRINTF_STRING const char* fmt, ...)
282 ATTRIBUTE_PRINTF(2,3);
283 void warn(ATTRIBUTE_PRINTF_STRING const char* fmt, ...)
284 ATTRIBUTE_PRINTF(2,3);
285 std::string escape(const char *str, int len, char quote_type) const;
288 * Called by scanner rules for doc comments.
290 void setDocComment(const char *ytext, int yleng) {
291 m_docComment.assign(ytext, yleng);
293 void setDocComment(const std::string& com) {
294 m_docComment = com;
296 std::string detachDocComment() {
297 std::string dc = m_docComment;
298 m_docComment.clear();
299 return dc;
303 * Called by scanner rules for HEREDOC/NOWDOC.
305 void setHeredocLabel(const char *label, int len) {
306 m_heredocLabel.assign(label, len);
308 int getHeredocLabelLen() const {
309 return m_heredocLabel.length();
311 const char *getHeredocLabel() const {
312 return m_heredocLabel.data();
314 void resetHeredoc() {
315 m_heredocLabel.clear();
319 * Enables HipHop syntax for this file.
321 void setHHFile() {
322 m_isHHFile = 1;
325 bool isHHFile() const {
326 return m_isHHFile;
329 bool isXHPSyntaxEnabled() const {
330 return ((m_type & AllowXHPSyntax) == AllowXHPSyntax) || m_isHHFile;
333 bool isHHSyntaxEnabled() const {
334 return ((m_type & AllowHipHopSyntax) == AllowHipHopSyntax) || m_isHHFile;
337 int getLookaheadLtDepth() {
338 return m_lookaheadLtDepth;
341 private:
342 bool tryParseShapeType(TokenStore::iterator& pos);
343 bool tryParseShapeMemberList(TokenStore::iterator& pos);
345 bool nextIfToken(TokenStore::iterator& pos, int tok);
347 void computeMd5();
349 std::string m_filename;
350 bool m_streamOwner;
351 std::istream *m_stream;
352 std::stringstream m_sstream; // XHP helper
353 const char *m_source;
354 int m_len;
355 int m_pos;
356 std::string m_md5;
358 enum State {
359 Start = -1,
360 NoLineFeed,
361 HadLineFeed,
363 State m_state;
365 int m_type;
366 void *m_yyscanner;
368 // These fields are used to temporarily hold pointers to token/location
369 // storage while the lexer is active to facilitate functions such as
370 // setToken() and incLoc()
371 ScannerToken *m_token;
372 Location *m_loc;
374 std::string m_error;
375 std::string m_docComment;
376 std::string m_heredocLabel;
378 // fields for XHP parsing
379 int m_lastToken;
380 void incLoc(const char *rawText, int rawLeng, int type);
381 bool m_isHHFile;
383 TokenStore m_lookahead;
384 int m_lookaheadLtDepth;
387 ///////////////////////////////////////////////////////////////////////////////
390 #endif // incl_HPHP_PARSER_SCANNER_H_