2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/base/string-util.h"
18 #include "hphp/util/zend/zend_html.h"
19 #include "hphp/runtime/base/zend-string.h"
20 #include "hphp/runtime/base/zend-url.h"
21 #include "hphp/runtime/base/runtime-error.h"
22 #include "hphp/runtime/base/array_iterator.h"
23 #include "hphp/runtime/base/builtin_functions.h"
26 ///////////////////////////////////////////////////////////////////////////////
29 String
StringUtil::ToLower(CStrRef input
,
30 ToLowerType type
/*= ToLowerType::All */) {
31 if (input
.empty()) return input
;
33 int len
= input
.size();
36 case ToLowerType::All
:
37 ret
= string_to_lower(input
.data(), len
);
39 case ToLowerType::First
:
40 ret
= string_to_lower_first(input
.data(), len
);
42 case ToLowerType::Words
:
43 ret
= string_to_lower_words(input
.data(), len
);
49 return String(ret
, len
, AttachString
);
52 String
StringUtil::ToUpper(CStrRef input
,
53 ToUpperType type
/*= ToUpperType::All */) {
54 if (input
.empty()) return input
;
56 int len
= input
.size();
59 case ToUpperType::All
:
60 ret
= string_to_upper(input
.data(), len
);
62 case ToUpperType::First
:
63 ret
= string_to_upper_first(input
.data(), len
);
65 case ToUpperType::Words
:
66 ret
= string_to_upper_words(input
.data(), len
);
72 return String(ret
, len
, AttachString
);
75 String
StringUtil::Trim(CStrRef input
, TrimType type
/* = TrimType::Both */,
76 CStrRef charlist
/* = k_HPHP_TRIM_CHARLIST */) {
77 if (input
.empty()) return input
;
78 int len
= input
.size();
79 char *ret
= string_trim(input
.data(), len
,
80 charlist
.data(), charlist
.length(),
81 static_cast<int>(type
));
85 return String(ret
, len
, AttachString
);
88 String
StringUtil::Pad(CStrRef input
, int final_length
,
89 CStrRef pad_string
/* = " " */,
90 PadType type
/* = PadType::Right */) {
91 int len
= input
.size();
92 char *ret
= string_pad(input
.data(), len
, final_length
, pad_string
.data(),
93 pad_string
.size(), static_cast<int>(type
));
94 if (ret
) return String(ret
, len
, AttachString
);
98 String
StringUtil::Reverse(CStrRef input
) {
99 if (input
.empty()) return input
;
100 int len
= input
.size();
101 return String(string_reverse(input
.data(), len
), len
, AttachString
);
104 String
StringUtil::Repeat(CStrRef input
, int count
) {
106 raise_warning("Second argument has to be greater than or equal to 0");
112 if (!input
.empty()) {
113 int len
= input
.size();
114 char *ret
= string_repeat(input
.data(), len
, count
);
116 return String(ret
, len
, AttachString
);
122 String
StringUtil::Shuffle(CStrRef input
) {
123 if (!input
.empty()) {
124 int len
= input
.size();
125 char *ret
= string_shuffle(input
.data(), len
);
127 return String(ret
, len
, AttachString
);
133 String
StringUtil::StripHTMLTags(CStrRef input
,
134 CStrRef allowable_tags
/* = "" */) {
135 if (input
.empty()) return input
;
136 int len
= input
.size();
137 char *ret
= string_strip_tags(input
.data(), len
, allowable_tags
.data(),
138 allowable_tags
.size(), false);
139 return String(ret
, len
, AttachString
);
142 String
StringUtil::WordWrap(CStrRef input
, int width
,
143 CStrRef wordbreak
/* = "\n" */,
144 bool cut
/* = false */) {
145 if (!input
.empty()) {
146 int len
= input
.size();
147 char *ret
= string_wordwrap(input
.data(), len
, width
, wordbreak
.data(),
148 wordbreak
.size(), cut
);
150 return String(ret
, len
, AttachString
);
157 ///////////////////////////////////////////////////////////////////////////////
160 Variant
StringUtil::Explode(CStrRef input
, CStrRef delimiter
,
161 int limit
/* = 0x7FFFFFFF */) {
162 if (delimiter
.empty()) {
163 throw_invalid_argument("delimiter: (empty)");
167 Array
ret(Array::Create());
177 int pos
= input
.find(delimiter
);
181 int len
= delimiter
.size();
184 ret
.append(input
.substr(pos0
, pos
- pos0
));
187 } while ((pos
= input
.find(delimiter
, pos
)) >= 0 && --limit
> 1);
189 if (pos0
<= input
.size()) {
190 ret
.append(input
.substr(pos0
));
193 } else if (limit
< 0) {
194 int pos
= input
.find(delimiter
);
196 vector
<int> positions
;
197 int len
= delimiter
.size();
201 positions
.push_back(pos0
);
202 positions
.push_back(pos
- pos0
);
206 } while ((pos
= input
.find(delimiter
, pos
)) >= 0);
208 if (pos0
<= input
.size()) {
209 positions
.push_back(pos0
);
210 positions
.push_back(input
.size() - pos0
);
213 int iMax
= (found
+ limit
) << 1;
214 for (int i
= 0; i
< iMax
; i
+= 2) {
215 ret
.append(input
.substr(positions
[i
], positions
[i
+1]));
217 } // else we have negative limit and delimiter not found
225 String
StringUtil::Implode(CArrRef items
, CStrRef delim
) {
226 int size
= items
.size();
227 if (size
== 0) return "";
229 String
* sitems
= (String
*)smart_malloc(size
* sizeof(String
));
231 int lenDelim
= delim
.size();
233 for (ArrayIter
iter(items
); iter
; ++iter
) {
234 new (&sitems
[i
]) String(iter
.second().toString());
235 len
+= sitems
[i
].size() + lenDelim
;
238 len
-= lenDelim
; // always one delimiter less than count of items
241 String s
= String(len
, ReserveString
);
242 char *buffer
= s
.mutableSlice().ptr
;
243 const char *sdelim
= delim
.data();
245 for (int i
= 0; i
< size
; i
++) {
246 String
&item
= sitems
[i
];
248 memcpy(p
, sdelim
, lenDelim
);
251 int lenItem
= item
.size();
253 memcpy(p
, item
.data(), lenItem
);
259 assert(p
- buffer
== len
);
260 return s
.setSize(len
);
263 Variant
StringUtil::Split(CStrRef str
, int split_length
/* = 1 */) {
264 if (split_length
<= 0) {
265 throw_invalid_argument(
266 "The length of each segment must be greater than zero"
272 int len
= str
.size();
273 if (split_length
>= len
) {
276 for (int i
= 0; i
< len
; i
+= split_length
) {
277 ret
.append(str
.substr(i
, split_length
));
283 Variant
StringUtil::ChunkSplit(CStrRef body
, int chunklen
/* = 76 */,
284 CStrRef end
/* = "\r\n" */) {
286 throw_invalid_argument("chunklen: (non-positive)");
291 int len
= body
.size();
292 if (chunklen
>= len
) {
296 char *chunked
= string_chunk_split(body
.data(), len
, end
.c_str(),
297 end
.size(), chunklen
);
298 return String(chunked
, len
, AttachString
);
303 ///////////////////////////////////////////////////////////////////////////////
306 String
StringUtil::CEncode(CStrRef input
, CStrRef charlist
) {
307 String chars
= charlist
;
308 if (chars
.isNull()) {
309 chars
= String("\\\x00\x01..\x1f\x7f..\xff", 10, CopyString
);
311 if (input
.empty() || chars
.empty()) return input
;
312 int len
= input
.size();
313 char *ret
= string_addcslashes(input
.c_str(), len
, chars
.data(),
315 return String(ret
, len
, AttachString
);
318 String
StringUtil::CDecode(CStrRef input
) {
319 if (input
.empty()) return input
;
320 int len
= input
.size();
321 char *ret
= string_stripcslashes(input
.c_str(), len
);
322 return String(ret
, len
, AttachString
);
325 String
StringUtil::SqlEncode(CStrRef input
) {
326 if (input
.empty()) return input
;
327 int len
= input
.size();
328 char *ret
= string_addslashes(input
.c_str(), len
);
329 return String(ret
, len
, AttachString
);
332 String
StringUtil::SqlDecode(CStrRef input
) {
333 if (input
.empty()) return input
;
334 int len
= input
.size();
335 char *ret
= string_stripslashes(input
.c_str(), len
);
336 return String(ret
, len
, AttachString
);
339 String
StringUtil::RegExEncode(CStrRef input
) {
340 if (input
.empty()) return input
;
341 int len
= input
.size();
342 char *ret
= string_quotemeta(input
.c_str(), len
);
343 return String(ret
, len
, AttachString
);
346 String
StringUtil::HtmlEncode(CStrRef input
, QuoteStyle quoteStyle
,
347 const char *charset
, bool nbsp
) {
348 if (input
.empty()) return input
;
352 if (strcasecmp(charset
, "ISO-8859-1") == 0) {
354 } else if (strcasecmp(charset
, "UTF-8")) {
355 throw NotImplementedException(charset
);
358 int len
= input
.size();
359 char *ret
= string_html_encode(input
.data(), len
,
360 quoteStyle
!= QuoteStyle::No
,
361 quoteStyle
== QuoteStyle::Both
,
364 raise_error("HtmlEncode called on too large input (%d)", len
);
366 return String(ret
, len
, AttachString
);
369 #define A1(v, ch) ((v)|((ch) & 64 ? 0 : 1uLL<<((ch)&63)))
370 #define A2(v, ch) ((v)|((ch) & 64 ? 1uLL<<((ch)&63) : 0))
372 static const AsciiMap mapNoQuotes
= {
373 { A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'),
374 A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@') }
377 static const AsciiMap mapDoubleQuotes
= {
378 { A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'),
379 A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"') }
382 static const AsciiMap mapBothQuotes
= {
383 { A1(A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\''),
384 A2(A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\'') }
387 static const AsciiMap mapNothing
= {};
389 String
StringUtil::HtmlEncodeExtra(CStrRef input
, QuoteStyle quoteStyle
,
390 const char *charset
, bool nbsp
,
392 if (input
.empty()) return input
;
395 int flags
= STRING_HTML_ENCODE_UTF8
;
397 flags
|= STRING_HTML_ENCODE_NBSP
;
399 if (RuntimeOption::Utf8izeReplace
) {
400 flags
|= STRING_HTML_ENCODE_UTF8IZE_REPLACE
;
402 if (!*charset
|| strcasecmp(charset
, "UTF-8") == 0) {
403 } else if (strcasecmp(charset
, "ISO-8859-1") == 0) {
404 flags
&= ~STRING_HTML_ENCODE_UTF8
;
406 throw NotImplementedException(charset
);
412 switch (quoteStyle
) {
413 case QuoteStyle::FBUtf8Only
:
415 flags
|= STRING_HTML_ENCODE_HIGH
;
417 case QuoteStyle::FBUtf8
:
419 flags
|= STRING_HTML_ENCODE_HIGH
;
421 case QuoteStyle::Both
:
424 case QuoteStyle::Double
:
425 am
= &mapDoubleQuotes
;
432 raise_error("Unknown quote style: %d", (int)quoteStyle
);
435 if (quoteStyle
!= QuoteStyle::FBUtf8Only
&& extra
.toBoolean()) {
438 for (ArrayIter
iter(extra
); iter
; ++iter
) {
439 String item
= iter
.second().toString();
440 char c
= item
.data()[0];
441 tmp
.map
[c
& 64 ? 1 : 0] |= 1uLL << (c
& 63);
445 int len
= input
.size();
446 char *ret
= string_html_encode_extra(input
.data(), len
,
447 (StringHtmlEncoding
)flags
, am
);
449 raise_error("HtmlEncode called on too large input (%d)", len
);
451 return String(ret
, len
, AttachString
);
454 String
StringUtil::HtmlDecode(CStrRef input
, QuoteStyle quoteStyle
,
455 const char *charset
, bool all
) {
456 if (input
.empty()) return input
;
460 int len
= input
.size();
461 char *ret
= string_html_decode(input
.data(), len
,
462 quoteStyle
!= QuoteStyle::No
,
463 quoteStyle
== QuoteStyle::Both
,
466 // null iff charset was not recognized
467 throw NotImplementedException(charset
);
468 // (charset is not null, see assertion above)
471 return String(ret
, len
, AttachString
);
474 String
StringUtil::QuotedPrintableEncode(CStrRef input
) {
475 if (input
.empty()) return input
;
476 int len
= input
.size();
477 char *ret
= string_quoted_printable_encode(input
.data(), len
);
478 return String(ret
, len
, AttachString
);
481 String
StringUtil::QuotedPrintableDecode(CStrRef input
) {
482 if (input
.empty()) return input
;
483 int len
= input
.size();
484 char *ret
= string_quoted_printable_decode(input
.data(), len
, false);
485 return String(ret
, len
, AttachString
);
488 String
StringUtil::HexEncode(CStrRef input
) {
489 if (input
.empty()) return input
;
490 int len
= input
.size();
491 char *ret
= string_bin2hex(input
.data(), len
);
492 return String(ret
, len
, AttachString
);
495 String
StringUtil::HexDecode(CStrRef input
) {
496 if (input
.empty()) return input
;
497 int len
= input
.size();
498 char *ret
= string_hex2bin(input
.data(), len
);
499 return String(ret
, len
, AttachString
);
502 String
StringUtil::UUEncode(CStrRef input
) {
503 if (input
.empty()) return input
;
506 char *encoded
= string_uuencode(input
.data(), input
.size(), len
);
507 return String(encoded
, len
, AttachString
);
510 String
StringUtil::UUDecode(CStrRef input
) {
511 if (!input
.empty()) {
513 char *decoded
= string_uudecode(input
.data(), input
.size(), len
);
515 return String(decoded
, len
, AttachString
);
521 String
StringUtil::Base64Encode(CStrRef input
) {
522 int len
= input
.size();
523 char *ret
= string_base64_encode(input
.data(), len
);
524 return String(ret
, len
, AttachString
);
527 String
StringUtil::Base64Decode(CStrRef input
, bool strict
/* = false */) {
528 int len
= input
.size();
529 char *ret
= string_base64_decode(input
.data(), len
, strict
);
530 return String(ret
, len
, AttachString
);
533 String
StringUtil::UrlEncode(CStrRef input
, bool encodePlus
/* = true */) {
534 int len
= input
.size();
537 ret
= url_encode(input
.data(), len
);
539 ret
= url_raw_encode(input
.data(), len
);
541 return String(ret
, len
, AttachString
);
544 String
StringUtil::UrlDecode(CStrRef input
, bool decodePlus
/* = true */) {
545 int len
= input
.size();
548 ret
= url_decode(input
.data(), len
);
550 ret
= url_raw_decode(input
.data(), len
);
552 return String(ret
, len
, AttachString
);
555 ///////////////////////////////////////////////////////////////////////////////
558 String
StringUtil::MoneyFormat(const char *format
, double value
) {
560 char *formatted
= string_money_format(format
, value
);
561 return formatted
? String(formatted
, AttachString
) : String();
564 ///////////////////////////////////////////////////////////////////////////////
567 String
StringUtil::Translate(CStrRef input
, CStrRef from
, CStrRef to
) {
568 if (input
.empty()) return input
;
570 int len
= input
.size();
571 String
retstr(len
, ReserveString
);
572 char *ret
= retstr
.mutableSlice().ptr
;
573 memcpy(ret
, input
.data(), len
);
574 auto trlen
= std::min(from
.size(), to
.size());
575 string_translate(ret
, len
, from
.data(), to
.data(), trlen
);
576 return retstr
.setSize(len
);
579 String
StringUtil::ROT13(CStrRef input
) {
580 if (input
.empty()) return input
;
581 return String(string_rot13(input
.data(), input
.size()),
582 input
.size(), AttachString
);
585 int64_t StringUtil::CRC32(CStrRef input
) {
586 return string_crc32(input
.data(), input
.size());
589 String
StringUtil::Crypt(CStrRef input
, const char *salt
/* = "" */) {
590 return String(string_crypt(input
.c_str(), salt
), AttachString
);
593 String
StringUtil::MD5(CStrRef input
, bool raw
/* = false */) {
595 char *ret
= string_md5(input
.data(), input
.size(), raw
, len
);
596 return String(ret
, len
, AttachString
);
599 String
StringUtil::SHA1(CStrRef input
, bool raw
/* = false */) {
601 char *ret
= string_sha1(input
.data(), input
.size(), raw
, len
);
602 return String(ret
, len
, AttachString
);
605 ///////////////////////////////////////////////////////////////////////////////