2 * Copyright (c) 2015, Facebook, Inc.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
11 (* Implementation of string escaping stuff. Ugggggggh.
12 * See http://php.net/manual/en/language.types.string.php *)
14 exception Invalid_string
of string;;
16 let is_printable c
= c
>= ' '
&& c
<= '~'
17 let is_lit_printable c
= is_printable c
&& c
<> '
\\'
&& c
<> '
\"'
19 let is_hex c
= (c
>= '
0'
&& c
<= '
9'
) ||
20 (c
>= 'a'
&& c
<= 'f'
) ||
21 (c
>= 'A'
&& c
<= 'F'
)
22 let is_oct c
= c
>= '
0'
&& c
<= '
7'
24 (* This escapes a string using the format understood by the assembler
25 * and php serialization. The assembler and php serialization probably
26 * don't actually have the same rules but this should safely fit in both.
27 * It will escape $ in octal so that it can also be used as a PHP double
30 let escape_char = function
38 | c when is_lit_printable c -> String.make 1 c
39 | c -> Printf.sprintf "\\%03o
" (Char.code c)
41 let escape ?(f = escape_char) s =
42 let buf = Buffer.create (String.length s) in
43 String.iter (fun c -> Buffer.add_string buf @@ f c) s;
46 (* Convert a codepoint to utf-8, appending the the bytes to a buffer *)
47 let codepoint_to_utf8 n buf =
48 let add i = Buffer.add_char buf (Char.chr i) in
49 if n <= 0x7F then begin
51 end else if n <= 0x7FF then begin
52 add (0xC0 lor ((n lsr 6) ));
53 add (0x80 lor ((n ) land 0x3F));
54 end else if n <= 0x00FFFF then begin
55 add (0xE0 lor ((n lsr 12) ));
56 add (0x80 lor ((n lsr 6) land 0x3F));
57 add (0x80 lor ((n ) land 0x3F));
58 end else if n <= 0x10FFFF then begin
59 add (0xF0 lor ((n lsr 18) ));
60 add (0x80 lor ((n lsr 12) land 0x3F));
61 add (0x80 lor ((n lsr 6) land 0x3F));
62 add (0x80 lor ((n ) land 0x3F));
64 raise (Invalid_string "UTF
-8 codepoint too large
")
69 with _ -> raise (Invalid_string "invalid numeric
escape")
70 let parse_numeric_escape ?(trim_to_byte = false) s =
72 let v = parse_int s in
73 let v = if trim_to_byte then v land 0xFF else v in
75 with _ -> raise (Invalid_string "escaped character too large
")
79 | Literal_double_quote
82 let unescape_literal literal_kind s =
83 let len = String.length s in
84 let buf = Buffer.create len in
87 if !idx >= len then raise (Invalid_string "string ended early
") else
88 let c = s.[!idx] in (incr idx; c)
91 (* Count how many characters, starting at the current string index.
92 * Will always stop at i=max. *)
93 let rec count_f f ~max i =
94 if i >= max || !idx + i >= len || not (f s.[!idx+i]) then i
95 else count_f f max (i+1)
100 if c <> '\\' then Buffer.add_char buf c else begin
103 | '\'' -> Buffer.add_string buf "\\\'"
104 | 'n' -> Buffer.add_char buf '\n'
105 | 'r' -> Buffer.add_char buf '\r'
106 | 't' -> Buffer.add_char buf '\t'
107 | 'v' -> Buffer.add_char buf '\x0b'
108 | 'e' -> Buffer.add_char buf '\x1b'
109 | 'f' -> Buffer.add_char buf '\x0c'
110 | '\\' -> Buffer.add_char buf '\\'
111 | '$' -> Buffer.add_char buf '$'
113 if literal_kind = Literal_backtick
114 then Buffer.add_char buf '`'
115 else Buffer.add_string buf "\\`
"
117 if literal_kind = Literal_double_quote
118 then Buffer.add_char buf '\"'
119 else Buffer.add_string buf "\\\""
120 | 'u' when !idx < len && s.[!idx] = '{' ->
122 let unicode_count = count_f (fun c -> c <> '}') ~max:6 0 in
123 let n = parse_int ("0x
" ^ String.sub s (!idx) unicode_count) in
124 codepoint_to_utf8 n buf;
125 idx := !idx + unicode_count;
126 if next () <> '}' then
127 raise (Invalid_string "Invalid UTF
-8 escape sequence
")
129 let hex_count = count_f is_hex ~max:2 0 in
130 if hex_count = 0 then
131 Buffer.add_string buf "\\x
"
133 let c = parse_numeric_escape ("0x
" ^ String.sub s (!idx) hex_count) in
134 Buffer.add_char buf c;
135 idx := !idx + hex_count
138 let oct_count = count_f is_oct ~max:3 0 in
139 let c = parse_numeric_escape
140 ~trim_to_byte:true ("0o
" ^ String.sub s (!idx) oct_count) in
141 Buffer.add_char buf c;
142 idx := !idx + oct_count
143 (* unrecognized escapes are just copied over *)
145 Buffer.add_char buf '\\';
146 Buffer.add_char buf c
153 let unescape_double s =
154 unescape_literal Literal_double_quote s
156 let unescape_backtick s =
157 unescape_literal Literal_backtick s
159 let unescape_heredoc s =
160 unescape_literal Literal_heredoc s
162 let unescape_single_or_nowdoc ~is_nowdoc s =
163 let len = String.length s in
164 let buf = Buffer.create len in
167 if !idx >= len then raise (Invalid_string "string ended early
") else
168 let c = s.[!idx] in (incr idx; c)
173 if c <> '\\' then Buffer.add_char buf c else begin
176 | '\'' when not is_nowdoc -> Buffer.add_char buf '\''
177 | '\\' when not is_nowdoc -> Buffer.add_char buf '\\'
178 (* unrecognized escapes are just copied over *)
180 Buffer.add_char buf '\\';
181 Buffer.add_char buf c
188 let unescape_single s =
189 unescape_single_or_nowdoc ~is_nowdoc:false s
191 let unescape_nowdoc s =
192 unescape_single_or_nowdoc ~is_nowdoc:true s