1 // Copyright (c) Facebook, Inc. and its affiliates.
3 // This source code is licensed under the MIT license found in the
4 // LICENSE file in the "hack" directory of this source tree.
11 use newtype::newtype_int;
13 // 1-based line number.
14 newtype_int!(Line, u32, LineMap, LineSet);
16 #[derive(Debug, PartialEq, Eq, Copy, Clone, strum_macros::IntoStaticStr)]
17 pub(crate) enum Token<'a> {
18 // See Lexer::from_slice for regex definitions
19 Global(&'a [u8], Line),
20 Variable(&'a [u8], Line),
21 TripleStrLiteral(&'a [u8], Line),
23 StrLiteral(&'a [u8], Line),
34 Number(&'a [u8], Line),
39 Identifier(&'a [u8], Line),
41 Error(&'a [u8], Line),
45 pub(crate) fn error(&self, err: impl std::fmt::Display) -> anyhow::Error {
46 anyhow!("Error [line {line}]: {err} ({self:?})", line = self.line())
49 pub(crate) fn line(&self) -> Line {
51 Token::CloseBracket(u)
52 | Token::CloseCurly(u)
53 | Token::CloseParen(u)
62 | Token::Identifier(_, u)
66 | Token::OpenBracket(u)
70 | Token::StrLiteral(_, u)
71 | Token::TripleStrLiteral(_, u)
72 | Token::Variable(_, u)
73 | Token::Variadic(u) => *u,
77 pub(crate) fn as_bytes(&self) -> &'a [u8] {
80 | Token::Variable(u, _)
81 | Token::TripleStrLiteral(u, _)
83 | Token::StrLiteral(u, _)
85 | Token::Identifier(u, _)
86 | Token::Error(u, _) => u,
87 Token::Semicolon(_) => b";",
88 Token::Dash(_) => b"-",
89 Token::OpenCurly(_) => b"{",
90 Token::OpenBracket(_) => b"[",
91 Token::OpenParen(_) => b"(",
92 Token::CloseParen(_) => b")",
93 Token::CloseBracket(_) => b"]",
94 Token::CloseCurly(_) => b"}",
95 Token::Equal(_) => b"=",
96 Token::Comma(_) => b",",
99 Token::Colon(_) => b":",
100 Token::Variadic(_) => b"...",
101 Token::Newline(_) => b"\n",
105 /// Only str_literal and triple_str_literal can be parsed into a new tokenizer.
106 /// To create a new tokenizer that still has accurate error reporting, we want to pass the line
107 /// So `into_str_literal_and_line` and `into_triple_str_literal_and_line` return a Result of bytes rep and line # or bail
108 pub(crate) fn into_triple_str_literal_and_line(self) -> Result<(&'a [u8], Line)> {
110 Token::TripleStrLiteral(vec_u8, pos) => Ok((vec_u8, pos)),
111 _ => bail!("Expected a triple str literal, got: {}", self),
115 pub(crate) fn into_global(self) -> Result<&'a [u8]> {
117 Token::Global(vec_u8, _) => Ok(vec_u8),
118 _ => bail!("Expected a global, got: {}", self),
122 pub(crate) fn into_variable(self) -> Result<&'a [u8]> {
124 Token::Variable(vec_u8, _) => Ok(vec_u8),
125 _ => bail!("Expected a variable, got: {}", self),
129 pub(crate) fn into_triple_str_literal(self) -> Result<&'a [u8]> {
131 Token::TripleStrLiteral(vec_u8, _) => Ok(vec_u8),
132 _ => bail!("Expected a triple str literal, got: {}", self),
136 pub(crate) fn into_decl(self) -> Result<&'a [u8]> {
138 Token::Decl(vec_u8, _) => Ok(vec_u8),
139 _ => bail!("Expected a decl, got: {}", self),
143 pub(crate) fn into_str_literal(self) -> Result<&'a [u8]> {
145 Token::StrLiteral(vec_u8, _) => Ok(vec_u8),
146 _ => bail!("Expected a str literal, got: {}", self),
150 pub(crate) fn into_number(self) -> Result<&'a [u8]> {
152 Token::Number(vec_u8, _) => Ok(vec_u8),
153 _ => bail!("Expected a number, got: {}", self),
157 pub(crate) fn into_identifier(self) -> Result<&'a [u8]> {
159 Token::Identifier(vec_u8, _) => Ok(vec_u8),
160 _ => bail!("Expected an identifier, got: {}", self),
164 pub(crate) fn into_semicolon(self) -> Result<&'a [u8]> {
166 Token::Semicolon(_) => Ok(self.as_bytes()),
167 _ => bail!("Expected a semicolon, got: {}", self),
171 pub(crate) fn into_dash(self) -> Result<&'a [u8]> {
173 Token::Dash(_) => Ok(self.as_bytes()),
174 _ => bail!("Expected a dash, got: {}", self),
178 pub(crate) fn into_open_curly(self) -> Result<&'a [u8]> {
180 Token::OpenCurly(_) => Ok(self.as_bytes()),
181 _ => bail!("Expected an open curly, got: {}", self),
185 pub(crate) fn into_open_bracket(self) -> Result<&'a [u8]> {
187 Token::OpenBracket(_) => Ok(self.as_bytes()),
188 _ => bail!("Expected an open bracket, got: {}", self),
192 pub(crate) fn into_open_paren(self) -> Result<&'a [u8]> {
194 Token::OpenParen(_) => Ok(self.as_bytes()),
195 _ => bail!("Expected an open paren, got: {}", self),
199 pub(crate) fn into_close_paren(self) -> Result<&'a [u8]> {
201 Token::CloseParen(_) => Ok(self.as_bytes()),
202 _ => bail!("Expected a close paren, got: {}", self),
206 pub(crate) fn into_close_bracket(self) -> Result<&'a [u8]> {
208 Token::CloseBracket(_) => Ok(self.as_bytes()),
209 _ => bail!("Expected a close bracket, got: {}", self),
213 pub(crate) fn into_close_curly(self) -> Result<&'a [u8]> {
215 Token::CloseCurly(_) => Ok(self.as_bytes()),
216 _ => bail!("Expected a close curly, got: {}", self),
220 pub(crate) fn into_equal(self) -> Result<&'a [u8]> {
222 Token::Equal(_) => Ok(self.as_bytes()),
223 _ => bail!("Expected an equal, got: {}", self),
227 pub(crate) fn into_comma(self) -> Result<&'a [u8]> {
229 Token::Comma(_) => Ok(self.as_bytes()),
230 _ => bail!("Expected a comma, got: {}", self),
234 pub(crate) fn into_lt(self) -> Result<&'a [u8]> {
236 Token::Lt(_) => Ok(self.as_bytes()),
237 _ => bail!("Expected a lt (<), got: {}", self),
241 pub(crate) fn into_gt(self) -> Result<&'a [u8]> {
243 Token::Gt(_) => Ok(self.as_bytes()),
244 _ => bail!("Expected a gt (>), got: {}", self),
248 pub(crate) fn into_colon(self) -> Result<&'a [u8]> {
250 Token::Colon(_) => Ok(self.as_bytes()),
251 _ => bail!("Expected a colon, got: {}", self),
255 pub(crate) fn is_newline(&self) -> bool {
256 matches!(self, Token::Newline(..))
259 pub(crate) fn is_triple_str_literal(&self) -> bool {
260 matches!(self, Token::TripleStrLiteral(..))
263 pub(crate) fn is_decl(&self) -> bool {
264 matches!(self, Token::Decl(..))
267 pub(crate) fn is_str_literal(&self) -> bool {
268 matches!(self, Token::StrLiteral(..))
271 pub(crate) fn is_number(&self) -> bool {
272 matches!(self, Token::Number(..))
275 pub(crate) fn is_identifier(&self) -> bool {
276 matches!(self, Token::Identifier(..))
279 pub(crate) fn is_semicolon(&self) -> bool {
280 matches!(self, Token::Semicolon(_))
283 pub(crate) fn is_dash(&self) -> bool {
284 matches!(self, Token::Dash(_))
287 pub(crate) fn is_open_bracket(&self) -> bool {
288 matches!(self, Token::OpenBracket(_))
291 pub(crate) fn is_close_paren(&self) -> bool {
292 matches!(self, Token::CloseParen(_))
295 pub(crate) fn is_close_bracket(&self) -> bool {
296 matches!(self, Token::CloseBracket(_))
299 pub(crate) fn is_close_curly(&self) -> bool {
300 matches!(self, Token::CloseCurly(_))
303 pub(crate) fn is_equal(&self) -> bool {
304 matches!(self, Token::Equal(_))
307 pub(crate) fn is_lt(&self) -> bool {
308 matches!(self, Token::Lt(_))
311 pub(crate) fn is_gt(&self) -> bool {
312 matches!(self, Token::Gt(_))
315 pub(crate) fn is_variadic(&self) -> bool {
316 matches!(self, Token::Variadic(_))
320 impl fmt::Display for Token<'_> {
321 /// Purpose of this fmt: so that vec of u8 (internal str representation of each token) is printed as a string rather than bytes
322 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323 let text = std::str::from_utf8(self.as_bytes()).map_err(|_| fmt::Error)?;
324 let variant: &str = (*self).into();
325 let line = self.line();
326 write!(f, r#"{variant}("{text}", line: {line})"#)