hphp/hack/src/utils/escaper.rs

   1 // Copyright (c) 2019, Facebook, Inc.
   2 // All rights reserved.
   3 //
   4 // This source code is licensed under the MIT license found in the
   5 // LICENSE file in the "hack" directory of this source tree.
   6 //
   7 // Implementation of string escaping logic.
   8 // See http://php.net/manual/en/language.types.string.php
   9
  10 use std::{borrow::Cow, error::Error, fmt, io::Write};
  11
  12 use bstr::{BStr, BString};
  13 use bumpalo::Bump;
  14
  15 #[derive(Debug)]
  16 pub struct InvalidString {
  17     pub msg: String,
  18 }
  19
  20 impl fmt::Display for InvalidString {
  21     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  22         write!(f, "{}", self.msg)
  23     }
  24 }
  25
  26 impl Error for InvalidString {
  27     fn description(&self) -> &str {
  28         &self.msg
  29     }
  30 }
  31
  32 impl<'a> From<&'a str> for InvalidString {
  33     fn from(x: &'a str) -> Self {
  34         Self {
  35             msg: String::from(x),
  36         }
  37     }
  38 }
  39
  40 trait GrowableBytes {
  41     fn push(&mut self, byte: u8);
  42     fn extend_from_slice(&mut self, slice: &[u8]);
  43 }
  44
  45 impl GrowableBytes for Vec<u8> {
  46     fn push(&mut self, byte: u8) {
  47         self.push(byte)
  48     }
  49     fn extend_from_slice(&mut self, slice: &[u8]) {
  50         self.extend_from_slice(slice)
  51     }
  52 }
  53
  54 impl GrowableBytes for bumpalo::collections::Vec<'_, u8> {
  55     fn push(&mut self, byte: u8) {
  56         self.push(byte)
  57     }
  58     fn extend_from_slice(&mut self, slice: &[u8]) {
  59         self.extend_from_slice(slice)
  60     }
  61 }
  62
  63 fn is_printable(c: u8) -> bool {
  64     (b' '..=b'~').contains(&c)
  65 }
  66
  67 pub fn is_lit_printable(c: u8) -> bool {
  68     is_printable(c) && c != b'\\' && c != b'\"'
  69 }
  70
  71 fn is_hex(c: u8) -> bool {
  72     (b'0'..=b'9').contains(&c) || (b'a'..=b'f').contains(&c) || (b'A'..=b'F').contains(&c)
  73 }
  74
  75 fn is_oct(c: u8) -> bool {
  76     (b'0'..=b'7').contains(&c)
  77 }
  78
  79 /// This escapes a string using the format understood by the assembler
  80 /// and php serialization. The assembler and php serialization probably
  81 /// don't actually have the same rules but this should safely fit in both.
  82 /// It will escape $ in octal so that it can also be used as a PHP double
  83 /// string.
  84 pub fn escape_char(c: u8) -> Option<Cow<'static, [u8]>> {
  85     match c {
  86         b'\n' => Some((&b"\\n"[..]).into()),
  87         b'\r' => Some((&b"\\r"[..]).into()),
  88         b'\t' => Some((&b"\\t"[..]).into()),
  89         b'\\' => Some((&b"\\\\"[..]).into()),
  90         b'"' => Some((&b"\\\""[..]).into()),
  91         b'$' => None,
  92         c if is_lit_printable(c) => None,
  93         c => {
  94             let mut r = vec![];
  95             write!(r, "\\{:03o}", c).unwrap();
  96             Some(r.into())
  97         }
  98     }
  99 }
 100
 101 /// `impl Into<..>` allows escape to take a String, consider the following,
 102 /// let a = {
 103 ///    let b = String::from("b");
 104 ///     escape(b)
 105 /// };
 106 ///
 107 /// Replacing `escape(b)` by `escape(&b)` leaks a reference of b to outer scope hence
 108 /// compilation error.
 109 pub fn escape<'a>(s: impl Into<Cow<'a, str>>) -> Cow<'a, str> {
 110     escape_by(s.into(), escape_char)
 111 }
 112
 113 pub fn escape_bstr<'a>(s: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
 114     escape_bstr_by(s.into(), escape_char)
 115 }
 116
 117 fn cow_str_to_bytes(s: Cow<'_, str>) -> Cow<'_, [u8]> {
 118     match s {
 119         Cow::Borrowed(s) => s.as_bytes().into(),
 120         Cow::Owned(s) => s.into_bytes().into(),
 121     }
 122 }
 123
 124 fn cow_bstr_to_bytes(s: Cow<'_, BStr>) -> Cow<'_, [u8]> {
 125     match s {
 126         Cow::Borrowed(s) => s.as_ref().into(),
 127         Cow::Owned(s) => <Vec<u8>>::from(s).into(),
 128     }
 129 }
 130
 131 pub fn escape_by<F>(s: Cow<'_, str>, f: F) -> Cow<'_, str>
 132 where
 133     F: Fn(u8) -> Option<Cow<'static, [u8]>>,
 134 {
 135     let r = escape_byte_by(cow_str_to_bytes(s), f);
 136     match r {
 137         Cow::Borrowed(s) => unsafe { std::str::from_utf8_unchecked(s) }.into(),
 138         Cow::Owned(s) => unsafe { String::from_utf8_unchecked(s) }.into(),
 139     }
 140 }
 141
 142 pub fn escape_bstr_by<'a, F>(s: Cow<'a, BStr>, f: F) -> Cow<'a, BStr>
 143 where
 144     F: Fn(u8) -> Option<Cow<'static, [u8]>>,
 145 {
 146     let r = escape_byte_by(cow_bstr_to_bytes(s), f);
 147     match r {
 148         Cow::Borrowed(s) => <&BStr>::from(s).into(),
 149         Cow::Owned(s) => BString::from(s).into(),
 150     }
 151 }
 152
 153 fn escape_byte_by<F: Fn(u8) -> Option<Cow<'static, [u8]>>>(
 154     cow: Cow<'_, [u8]>,
 155     f: F,
 156 ) -> Cow<'_, [u8]> {
 157     let mut c = vec![];
 158     let mut copied = false;
 159     let s = cow.as_ref();
 160     for i in 0..s.len() {
 161         match f(s[i]) {
 162             None if copied => c.push(s[i]),
 163             Some(cc) => {
 164                 if copied {
 165                     c.extend_from_slice(cc.as_ref());
 166                 } else {
 167                     c.extend_from_slice(&s[..i]);
 168                     c.extend_from_slice(cc.as_ref());
 169                     copied = true;
 170                 }
 171             }
 172             _ => {}
 173         }
 174     }
 175     if copied { c.into() } else { cow }
 176 }
 177
 178 fn codepoint_to_utf8(n: u32, output: &mut impl GrowableBytes) -> Result<(), InvalidString> {
 179     if n <= 0x7f {
 180         output.push(n as u8);
 181     } else if n <= 0x7ff {
 182         output.push(0xc0 | (n >> 6) as u8);
 183         output.push(0x80 | (n & 0b111111) as u8);
 184     } else if n <= 0x00ffff {
 185         output.push(0xe0 | (n >> 12) as u8);
 186         output.push(0x80 | ((n >> 6) & 0b111111) as u8);
 187         output.push(0x80 | (n & 0x3f) as u8);
 188     } else if n <= 0x10ffff {
 189         output.push(0xf0 | (n >> 18) as u8);
 190         output.push(0x80 | ((n >> 12) & 0b111111) as u8);
 191         output.push(0x80 | ((n >> 6) & 0b111111) as u8);
 192         output.push(0x80 | (n & 0x3f) as u8);
 193     } else {
 194         return Err("UTF-8 codepoint too large".into());
 195     }
 196     Ok(())
 197 }
 198
 199 fn parse_int(s: &[u8], base: u32) -> Result<u32, InvalidString> {
 200     // input `s` can be assumed only contains ascii digits and 'aA' - 'fF',
 201     // it is safe to call from_utf8 here.
 202     let s = match std::str::from_utf8(s) {
 203         Ok(s) => s,
 204         _ => {
 205             return Err("invalid numeric escape".into());
 206         }
 207     };
 208     let s = u32::from_str_radix(s, base);
 209     match s {
 210         Ok(v) => Ok(v),
 211         _ => Err("invalid numeric escape".into()),
 212     }
 213 }
 214
 215 fn parse_numeric_escape(trim_to_byte: bool, s: &[u8], base: u32) -> Result<u8, InvalidString> {
 216     match parse_int(s, base) {
 217         Ok(v) => {
 218             if !trim_to_byte && (v > 255) {
 219                 Err("Invalid UTF-8 code point.".into())
 220             } else {
 221                 Ok(v as u8)
 222             }
 223         }
 224         Err(_) => Err("Invalid UTF-8 code point.".into()),
 225     }
 226 }
 227
 228 #[derive(PartialEq)]
 229 pub enum LiteralKind {
 230     LiteralHeredoc,
 231     LiteralDoubleQuote,
 232     LiteralLongString,
 233 }
 234
 235 /// Copies `s` into `output`, replacing escape sequences with the characters
 236 /// they represent.
 237 ///
 238 /// The output is NOT guaranteed to be valid UTF-8. While this function will
 239 /// return `Err` in some cases where the input contains an escape sequence
 240 /// specifying an invalid codepoint, it will return invalid UTF-8 in some
 241 /// circumstances (e.g., for invalid UTF-8 encoded as hex or octal byte escapes,
 242 /// or UTF-16 encoded as \u escapes).
 243 fn unescape_literal(
 244     literal_kind: LiteralKind,
 245     s: &str,
 246     output: &mut impl GrowableBytes,
 247 ) -> Result<(), InvalidString> {
 248     struct Scanner<'a> {
 249         s: &'a [u8],
 250         i: usize,
 251     }
 252     impl<'a> Scanner<'a> {
 253         fn new(s: &'a [u8]) -> Self {
 254             Self { s, i: 0 }
 255         }
 256         fn is_empty(&self) -> bool {
 257             self.i >= self.s.len()
 258         }
 259         fn next(&mut self) -> Result<u8, InvalidString> {
 260             if self.i >= self.s.len() {
 261                 return Err("string ended early".into());
 262             }
 263             let r = self.s[self.i];
 264             self.i += 1;
 265             Ok(r)
 266         }
 267         fn take_if(&mut self, f: impl Fn(u8) -> bool, size: usize) -> &'a [u8] {
 268             let l = usize::min(size + self.i, self.s.len());
 269             let mut c = self.i;
 270             while c < l && f(self.s[c]) {
 271                 c += 1;
 272             }
 273             let r = &self.s[self.i..c];
 274             self.i = c;
 275             r
 276         }
 277         fn peek(&self) -> Option<u8> {
 278             if self.i < self.s.len() {
 279                 Some(self.s[self.i])
 280             } else {
 281                 None
 282             }
 283         }
 284         fn back(&mut self) {
 285             if self.i > 0 {
 286                 self.i -= 1;
 287             }
 288         }
 289     }
 290
 291     let mut s = Scanner::new(s.as_bytes());
 292     while !s.is_empty() {
 293         let c = s.next()?;
 294         if c != b'\\' || s.is_empty() {
 295             output.push(c);
 296         } else {
 297             let c = s.next()?;
 298             match c {
 299                 b'a' if literal_kind == LiteralKind::LiteralLongString => output.push(b'\x07'),
 300                 b'b' if literal_kind == LiteralKind::LiteralLongString => output.push(b'\x08'),
 301                 b'\'' => output.extend_from_slice(b"\\\'"),
 302                 b'n' => match literal_kind {
 303                     LiteralKind::LiteralLongString => {}
 304                     _ => output.push(b'\n'),
 305                 },
 306                 b'r' => match literal_kind {
 307                     LiteralKind::LiteralLongString => {}
 308                     _ => output.push(b'\r'),
 309                 },
 310                 b't' => output.push(b'\t'),
 311                 b'v' => output.push(b'\x0b'),
 312                 b'e' => output.push(b'\x1b'),
 313                 b'f' => output.push(b'\x0c'),
 314                 b'\\' => output.push(b'\\'),
 315                 b'?' if literal_kind == LiteralKind::LiteralLongString => output.push(b'\x3f'),
 316                 b'$' if literal_kind != LiteralKind::LiteralLongString => output.push(b'$'),
 317                 b'\"' => match literal_kind {
 318                     LiteralKind::LiteralDoubleQuote | LiteralKind::LiteralLongString => {
 319                         output.push(b'\"')
 320                     }
 321                     _ => output.extend_from_slice(b"\\\""),
 322                 },
 323                 b'u' if literal_kind != LiteralKind::LiteralLongString
 324                     && s.peek() == Some(b'{') =>
 325                 {
 326                     let _ = s.next()?;
 327                     let unicode = s.take_if(|c| c != b'}', 6);
 328                     let n = parse_int(unicode, 16)?;
 329                     codepoint_to_utf8(n, output)?;
 330                     let n = s.next()?;
 331                     if n != b'}' {
 332                         return Err("Invalid UTF-8 escape sequence".into());
 333                     }
 334                 }
 335                 b'x' | b'X' => {
 336                     let hex = s.take_if(is_hex, 2);
 337                     if hex.is_empty() {
 338                         output.push(b'\\');
 339                         output.push(c);
 340                     } else {
 341                         let c = parse_numeric_escape(false, hex, 16)?;
 342                         output.push(c as u8);
 343                     }
 344                 }
 345                 c if is_oct(c) => {
 346                     s.back();
 347                     let oct = s.take_if(is_oct, 3);
 348                     let c = parse_numeric_escape(true, oct, 8)?;
 349                     output.push(c as u8);
 350                 }
 351                 c => {
 352                     output.push(b'\\');
 353                     output.push(c);
 354                 }
 355             }
 356         }
 357     }
 358     Ok(())
 359 }
 360
 361 fn unescape_literal_into_string(
 362     literal_kind: LiteralKind,
 363     s: &str,
 364 ) -> Result<BString, InvalidString> {
 365     let mut output = Vec::with_capacity(s.len());
 366     unescape_literal(literal_kind, s, &mut output)?;
 367     Ok(output.into())
 368 }
 369
 370 fn unescape_literal_into_arena<'a>(
 371     literal_kind: LiteralKind,
 372     s: &str,
 373     arena: &'a Bump,
 374 ) -> Result<&'a BStr, InvalidString> {
 375     let mut output = bumpalo::collections::Vec::with_capacity_in(s.len(), arena);
 376     unescape_literal(literal_kind, s, &mut output)?;
 377     Ok(output.into_bump_slice().into())
 378 }
 379
 380 pub fn unescape_double(s: &str) -> Result<BString, InvalidString> {
 381     unescape_literal_into_string(LiteralKind::LiteralDoubleQuote, s)
 382 }
 383
 384 pub fn unescape_heredoc(s: &str) -> Result<BString, InvalidString> {
 385     unescape_literal_into_string(LiteralKind::LiteralHeredoc, s)
 386 }
 387
 388 pub fn unescape_double_in<'a>(s: &str, arena: &'a Bump) -> Result<&'a BStr, InvalidString> {
 389     unescape_literal_into_arena(LiteralKind::LiteralDoubleQuote, s, arena)
 390 }
 391
 392 pub fn unescape_heredoc_in<'a>(s: &str, arena: &'a Bump) -> Result<&'a BStr, InvalidString> {
 393     unescape_literal_into_arena(LiteralKind::LiteralHeredoc, s, arena)
 394 }
 395
 396 /// Copies `s` into `output`, replacing escape sequences with the characters
 397 /// they represent. The bytes added to `output` will be valid UTF-8.
 398 fn unescape_single_or_nowdoc(
 399     is_nowdoc: bool,
 400     s: &str,
 401     output: &mut impl GrowableBytes,
 402 ) -> Result<(), InvalidString> {
 403     let s = s.as_bytes();
 404     let len = s.len();
 405     let mut idx = 0;
 406     while idx < len {
 407         let c = s[idx];
 408         if is_nowdoc || c != b'\\' {
 409             output.push(c)
 410         } else {
 411             idx += 1;
 412             if !idx < len {
 413                 return Err("string ended early".into());
 414             }
 415             let c = s[idx];
 416             match c {
 417                 b'\'' | b'\\' => output.push(c),
 418                 // unrecognized escapes are just copied over
 419                 _ => {
 420                     output.push(b'\\');
 421                     output.push(c);
 422                 }
 423             }
 424         }
 425         idx += 1;
 426     }
 427     Ok(())
 428 }
 429
 430 fn unescape_single_or_nowdoc_into_string(
 431     is_nowdoc: bool,
 432     s: &str,
 433 ) -> Result<String, InvalidString> {
 434     let mut output = Vec::with_capacity(s.len());
 435     unescape_single_or_nowdoc(is_nowdoc, s, &mut output)?;
 436     // Safety: s is a valid &str, and unescape_single_or_nowdoc copies it into
 437     // output, only adding and removing valid UTF-8 codepoints.
 438     Ok(unsafe { String::from_utf8_unchecked(output) })
 439 }
 440
 441 fn unescape_single_or_nowdoc_into_arena<'a>(
 442     is_nowdoc: bool,
 443     s: &str,
 444     arena: &'a Bump,
 445 ) -> Result<&'a str, InvalidString> {
 446     let mut output = bumpalo::collections::Vec::with_capacity_in(s.len(), arena);
 447     unescape_single_or_nowdoc(is_nowdoc, s, &mut output)?;
 448     // Safety: s is a valid &str, and unescape_single_or_nowdoc copies it into
 449     // output, only adding and removing valid UTF-8 codepoints.
 450     let string = unsafe { bumpalo::collections::String::from_utf8_unchecked(output) };
 451     Ok(string.into_bump_str())
 452 }
 453
 454 pub fn unescape_single(s: &str) -> Result<String, InvalidString> {
 455     unescape_single_or_nowdoc_into_string(false, s)
 456 }
 457
 458 pub fn unescape_nowdoc(s: &str) -> Result<String, InvalidString> {
 459     unescape_single_or_nowdoc_into_string(true, s)
 460 }
 461
 462 pub fn unescape_single_in<'a>(s: &str, arena: &'a Bump) -> Result<&'a str, InvalidString> {
 463     unescape_single_or_nowdoc_into_arena(false, s, arena)
 464 }
 465
 466 pub fn unescape_nowdoc_in<'a>(s: &str, arena: &'a Bump) -> Result<&'a str, InvalidString> {
 467     unescape_single_or_nowdoc_into_arena(true, s, arena)
 468 }
 469
 470 pub fn unescape_long_string(s: &str) -> Result<BString, InvalidString> {
 471     unescape_literal_into_string(LiteralKind::LiteralLongString, s)
 472 }
 473
 474 pub fn unescape_long_string_in<'a>(s: &str, arena: &'a Bump) -> Result<&'a BStr, InvalidString> {
 475     unescape_literal_into_arena(LiteralKind::LiteralLongString, s, arena)
 476 }
 477
 478 pub fn extract_unquoted_string(
 479     content: &str,
 480     start: usize,
 481     len: usize,
 482 ) -> Result<String, InvalidString> {
 483     let substr = content
 484         .get(start..start + len)
 485         .ok_or_else(|| InvalidString::from("out of bounds or sliced at non-codepoint-boundary"))?;
 486     Ok(unquote_str(substr).into())
 487 }
 488
 489 /// Remove single quotes, double quotes, backticks, or heredoc/nowdoc delimiters
 490 /// surrounding a string literal.
 491 pub fn unquote_str(content: &str) -> &str {
 492     let unquoted = unquote_slice(content.as_bytes());
 493     // Safety: content is a valid &str. unquote_slice finds ASCII delimiters and
 494     // removes the prefix and suffix surrounding them. Because it uses ASCII
 495     // delimiters, we know it is slicing at codepoint boundaries.
 496     unsafe { std::str::from_utf8_unchecked(unquoted) }
 497 }
 498
 499 fn find(s: &[u8], needle: u8) -> Option<usize> {
 500     for (i, &c) in s.iter().enumerate() {
 501         if c == needle {
 502             return Some(i);
 503         }
 504     }
 505     None
 506 }
 507
 508 fn rfind(s: &[u8], needle: u8) -> Option<usize> {
 509     let mut i = s.len();
 510     while i > 0 {
 511         i -= 1;
 512         if s[i] == needle {
 513             return Some(i);
 514         }
 515     }
 516     None
 517 }
 518
 519 /// Remove single quotes, double quotes, backticks, or heredoc/nowdoc delimiters
 520 /// surrounding a string literal. If the input slice is valid UTF-8, the output
 521 /// slice will also be valid UTF-8.
 522 pub fn unquote_slice(content: &[u8]) -> &[u8] {
 523     if content.len() < 2 {
 524         content
 525     } else if content.starts_with(b"<<<") {
 526         // The heredoc case
 527         // These types of strings begin with an opening line containing <<<
 528         // followed by a string to use as a terminator (which is optionally
 529         // quoted), and end with a line containing only the terminator.
 530         // We need to drop the opening line and terminator line.
 531         match (find(content, b'\n'), rfind(content, b'\n')) {
 532             (Some(start), Some(end)) => {
 533                 // An empty heredoc, this way, will have start >= end
 534                 if start >= end {
 535                     &[]
 536                 } else {
 537                     &content[start + 1..end]
 538                 }
 539             }
 540             _ => content,
 541         }
 542     } else {
 543         let c1 = content[0];
 544         let c2 = content[content.len() - 1];
 545         if c1 == c2 && (c1 == b'\'' || c1 == b'"' || c1 == b'`') {
 546             &content[1..content.len() - 1]
 547         } else {
 548             content
 549         }
 550     }
 551 }
 552
 553 #[cfg(test)]
 554 mod tests {
 555     use super::*;
 556     use bstr::B;
 557     use pretty_assertions::assert_eq; // make assert_eq print huge diffs more human-readable
 558
 559     #[test]
 560     fn unescape_single_or_nowdoc() {
 561         assert_eq!(unescape_single("").unwrap(), "");
 562         assert_eq!(unescape_nowdoc("").unwrap(), "");
 563         assert_eq!(unescape_long_string("").unwrap(), "");
 564         assert_eq!(unescape_double("").unwrap(), "");
 565         assert_eq!(unescape_heredoc("").unwrap(), "");
 566
 567         assert_eq!(
 568             unescape_single("home \\\\$").unwrap(),
 569             "home \\$".to_string()
 570         );
 571         assert_eq!(unescape_nowdoc("home \\$").unwrap(), "home \\$".to_string());
 572         assert_eq!(unescape_single("home \\'").unwrap(), "home '".to_string());
 573         assert_eq!(unescape_nowdoc("home \\'").unwrap(), "home \\'".to_string());
 574         assert_eq!(unescape_nowdoc("\\`").unwrap(), "\\`");
 575         assert_eq!(unescape_single("\\a\\\'").unwrap(), "\\a'");
 576         assert_eq!(unescape_long_string("\\a").unwrap(), "\x07");
 577         assert_eq!(unescape_long_string("\\v").unwrap(), "\x0b");
 578         assert_eq!(unescape_long_string("\\\'").unwrap(), "\\\'");
 579         assert_eq!(unescape_long_string("\\\\").unwrap(), "\\");
 580         assert_eq!(unescape_long_string("?").unwrap(), "\x3f");
 581         assert_eq!(unescape_long_string("$").unwrap(), "$");
 582
 583         assert_eq!(unescape_long_string("\\b").unwrap(), "\x08");
 584         assert_eq!(unescape_long_string("\\e").unwrap(), "\x1b");
 585         assert_eq!(unescape_long_string("\\f").unwrap(), "\x0c");
 586         assert_eq!(unescape_long_string("\\\"").unwrap(), "\"");
 587         assert_eq!(unescape_long_string("\\`").unwrap(), "\\`");
 588         assert_eq!(unescape_heredoc("\\\"").unwrap(), "\\\"");
 589         assert_eq!(unescape_heredoc("\\p").unwrap(), "\\p");
 590         assert_eq!(unescape_long_string("\\r").unwrap(), "");
 591         assert_eq!(unescape_double("\\u{b1}").unwrap(), "±");
 592
 593         assert_eq!(unescape_double("\\x27\\x22").unwrap(), "\'\"");
 594         assert_eq!(unescape_double("\\X27\\X22").unwrap(), "\'\"");
 595         assert_eq!(
 596             unescape_double("\\141\\156\\143\\150\\157\\162").unwrap(),
 597             "anchor"
 598         );
 599         assert_eq!(unescape_long_string("\\xb1").unwrap(), B(&[177u8]));
 600
 601         let euro = "\u{20AC}"; // as bytes [226, 130, 172]
 602         assert_eq!(
 603             unescape_long_string(euro).unwrap(),
 604             B(&[226u8, 130u8, 172u8])
 605         );
 606         assert_eq!(unescape_long_string("\\xb1").unwrap(), B(&[177u8]));
 607
 608         let euro = "\u{20AC}"; // as bytes [226, 130, 172]
 609         assert_eq!(
 610             unescape_long_string(euro).unwrap(),
 611             B(&[226u8, 130u8, 172u8])
 612         );
 613
 614         let invalid = r#"\u{D800}\u{DF1E}"#;
 615         assert_eq!(
 616             unescape_double(invalid).unwrap(),
 617             B(&[237u8, 160u8, 128u8, 237u8, 188u8, 158u8])
 618         );
 619     }
 620
 621     #[test]
 622     fn parse_int_test() {
 623         assert_eq!(parse_int(b"2", 10).unwrap(), 2);
 624         assert!(parse_int(b"h", 10).is_err());
 625         assert_eq!(parse_int(b"12", 8).unwrap(), 10);
 626         assert_eq!(parse_int(b"b1", 16).unwrap(), 177)
 627     }
 628
 629     #[test]
 630     fn escape_char_test() {
 631         let escape_char_ = |c: u8| -> String {
 632             let r = escape_char(c)
 633                 .unwrap_or_else(|| vec![c].into())
 634                 .into_owned();
 635             unsafe { String::from_utf8_unchecked(r) }
 636         };
 637
 638         assert_eq!(escape_char_(b'a'), "a");
 639         assert_eq!(escape_char_(b'$'), "$");
 640         assert_eq!(escape_char_(b'\"'), "\\\"");
 641         assert_eq!(escape_char_(0), "\\000");
 642         assert_eq!(escape("house"), "house");
 643         assert_eq!(escape("\n"), "\\n");
 644         assert_eq!(escape("red\n\t\r$?"), "red\\n\\t\\r$?");
 645         assert!(is_oct(b'5'));
 646         assert!(!is_oct(b'a'));
 647     }
 648
 649     #[test]
 650     fn extract_unquoted_string_test() {
 651         assert_eq!(extract_unquoted_string("'a'", 0, 3).unwrap(), "a");
 652         assert_eq!(extract_unquoted_string("\"a\"", 0, 3).unwrap(), "a");
 653         assert_eq!(extract_unquoted_string("`a`", 0, 3).unwrap(), "a");
 654         assert_eq!(extract_unquoted_string("", 0, 0).unwrap(), "");
 655         assert_eq!(extract_unquoted_string("''", 0, 2).unwrap(), "");
 656         assert_eq!(extract_unquoted_string("'a", 0, 2).unwrap(), "'a");
 657         assert_eq!(extract_unquoted_string("a", 0, 1).unwrap(), "a");
 658         assert_eq!(extract_unquoted_string("<<<EOT\n\nEOT", 0, 11).unwrap(), "");
 659         assert_eq!(
 660             extract_unquoted_string("<<<EOT\na\nEOT", 0, 12).unwrap(),
 661             "a"
 662         );
 663     }
 664
 665     #[test]
 666     fn rfind_test() {
 667         assert_eq!(rfind(b"", b'a'), None);
 668         assert_eq!(rfind(b"a", b'a'), Some(0));
 669         assert_eq!(rfind(b"b", b'a'), None);
 670         assert_eq!(rfind(b"ba", b'a'), Some(1));
 671     }
 672
 673     #[test]
 674     fn unquote_str_test() {
 675         assert_eq!(unquote_str(""), "");
 676         assert_eq!(unquote_str("''"), "");
 677         assert_eq!(unquote_str("\"\""), "");
 678         assert_eq!(unquote_str("``"), "");
 679
 680         assert_eq!(unquote_str("'a'"), "a");
 681         assert_eq!(unquote_str("\"a\""), "a");
 682         assert_eq!(unquote_str("`a`"), "a");
 683         assert_eq!(unquote_str(r#"`a\``"#), r#"a\`"#);
 684
 685         assert_eq!(unquote_str("<<<EOT\nEOT"), "");
 686         assert_eq!(unquote_str("<<<EOT\n\nEOT"), "");
 687         assert_eq!(unquote_str("<<<EOT\n\n\nEOT"), "\n");
 688         assert_eq!(unquote_str("<<<EOT\na\nEOT"), "a");
 689         assert_eq!(unquote_str("<<<EOT\n\na\n\nEOT"), "\na\n");
 690
 691         assert_eq!(unquote_str("'"), "'");
 692         assert_eq!(unquote_str("\""), "\"");
 693         assert_eq!(unquote_str("`"), "`");
 694
 695         assert_eq!(unquote_str("a"), "a");
 696         assert_eq!(unquote_str("`a"), "`a");
 697         assert_eq!(unquote_str(" `a`"), " `a`");
 698         assert_eq!(unquote_str("'a\""), "'a\"");
 699
 700         assert_eq!(unquote_str("<<<"), "<<<");
 701         assert_eq!(unquote_str("<<<EOTEOT"), "<<<EOTEOT");
 702     }
 703 }