Rename files in runtime/base, part 6
[hiphop-php.git] / hphp / runtime / base / string-util.cpp
blobfb912fe99732b50feabd572e025a162e1eebdd03
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/base/string-util.h"
18 #include "hphp/util/zend/zend_html.h"
19 #include "hphp/runtime/base/zend-string.h"
20 #include "hphp/runtime/base/zend-url.h"
21 #include "hphp/runtime/base/runtime-error.h"
22 #include "hphp/runtime/base/array_iterator.h"
23 #include "hphp/runtime/base/builtin_functions.h"
25 namespace HPHP {
26 ///////////////////////////////////////////////////////////////////////////////
27 // manipulations
29 String StringUtil::ToLower(CStrRef input,
30 ToLowerType type /*= ToLowerType::All */) {
31 if (input.empty()) return input;
33 int len = input.size();
34 char *ret = nullptr;
35 switch (type) {
36 case ToLowerType::All:
37 ret = string_to_lower(input.data(), len);
38 break;
39 case ToLowerType::First:
40 ret = string_to_lower_first(input.data(), len);
41 break;
42 case ToLowerType::Words:
43 ret = string_to_lower_words(input.data(), len);
44 break;
45 default:
46 assert(false);
47 break;
49 return String(ret, len, AttachString);
52 String StringUtil::ToUpper(CStrRef input,
53 ToUpperType type /*= ToUpperType::All */) {
54 if (input.empty()) return input;
56 int len = input.size();
57 char *ret = nullptr;
58 switch (type) {
59 case ToUpperType::All:
60 ret = string_to_upper(input.data(), len);
61 break;
62 case ToUpperType::First:
63 ret = string_to_upper_first(input.data(), len);
64 break;
65 case ToUpperType::Words:
66 ret = string_to_upper_words(input.data(), len);
67 break;
68 default:
69 assert(false);
70 break;
72 return String(ret, len, AttachString);
75 String StringUtil::Trim(CStrRef input, TrimType type /* = TrimType::Both */,
76 CStrRef charlist /* = k_HPHP_TRIM_CHARLIST */) {
77 if (input.empty()) return input;
78 int len = input.size();
79 char *ret = string_trim(input.data(), len,
80 charlist.data(), charlist.length(),
81 static_cast<int>(type));
82 if (!ret) {
83 return input;
85 return String(ret, len, AttachString);
88 String StringUtil::Pad(CStrRef input, int final_length,
89 CStrRef pad_string /* = " " */,
90 PadType type /* = PadType::Right */) {
91 int len = input.size();
92 char *ret = string_pad(input.data(), len, final_length, pad_string.data(),
93 pad_string.size(), static_cast<int>(type));
94 if (ret) return String(ret, len, AttachString);
95 return String();
98 String StringUtil::Reverse(CStrRef input) {
99 if (input.empty()) return input;
100 int len = input.size();
101 return String(string_reverse(input.data(), len), len, AttachString);
104 String StringUtil::Repeat(CStrRef input, int count) {
105 if (count < 0) {
106 raise_warning("Second argument has to be greater than or equal to 0");
107 return String();
109 if (count == 0) {
110 return "";
112 if (!input.empty()) {
113 int len = input.size();
114 char *ret = string_repeat(input.data(), len, count);
115 if (ret) {
116 return String(ret, len, AttachString);
119 return input;
122 String StringUtil::Shuffle(CStrRef input) {
123 if (!input.empty()) {
124 int len = input.size();
125 char *ret = string_shuffle(input.data(), len);
126 if (ret) {
127 return String(ret, len, AttachString);
130 return input;
133 String StringUtil::StripHTMLTags(CStrRef input,
134 CStrRef allowable_tags /* = "" */) {
135 if (input.empty()) return input;
136 int len = input.size();
137 char *ret = string_strip_tags(input.data(), len, allowable_tags.data(),
138 allowable_tags.size(), false);
139 return String(ret, len, AttachString);
142 String StringUtil::WordWrap(CStrRef input, int width,
143 CStrRef wordbreak /* = "\n" */,
144 bool cut /* = false */) {
145 if (!input.empty()) {
146 int len = input.size();
147 char *ret = string_wordwrap(input.data(), len, width, wordbreak.data(),
148 wordbreak.size(), cut);
149 if (ret) {
150 return String(ret, len, AttachString);
152 return String();
154 return input;
157 ///////////////////////////////////////////////////////////////////////////////
158 // splits/joins
160 Variant StringUtil::Explode(CStrRef input, CStrRef delimiter,
161 int limit /* = 0x7FFFFFFF */) {
162 if (delimiter.empty()) {
163 throw_invalid_argument("delimiter: (empty)");
164 return false;
167 Array ret(Array::Create());
169 if (input.empty()) {
170 if (limit >= 0) {
171 ret.append("");
173 return ret;
176 if (limit > 1) {
177 int pos = input.find(delimiter);
178 if (pos < 0) {
179 ret.append(input);
180 } else {
181 int len = delimiter.size();
182 int pos0 = 0;
183 do {
184 ret.append(input.substr(pos0, pos - pos0));
185 pos += len;
186 pos0 = pos;
187 } while ((pos = input.find(delimiter, pos)) >= 0 && --limit > 1);
189 if (pos0 <= input.size()) {
190 ret.append(input.substr(pos0));
193 } else if (limit < 0) {
194 int pos = input.find(delimiter);
195 if (pos >= 0) {
196 vector<int> positions;
197 int len = delimiter.size();
198 int pos0 = 0;
199 int found = 0;
200 do {
201 positions.push_back(pos0);
202 positions.push_back(pos - pos0);
203 pos += len;
204 pos0 = pos;
205 found++;
206 } while ((pos = input.find(delimiter, pos)) >= 0);
208 if (pos0 <= input.size()) {
209 positions.push_back(pos0);
210 positions.push_back(input.size() - pos0);
211 found++;
213 int iMax = (found + limit) << 1;
214 for (int i = 0; i < iMax; i += 2) {
215 ret.append(input.substr(positions[i], positions[i+1]));
217 } // else we have negative limit and delimiter not found
218 } else {
219 ret.append(input);
222 return ret;
225 String StringUtil::Implode(CArrRef items, CStrRef delim) {
226 int size = items.size();
227 if (size == 0) return "";
229 String* sitems = (String*)smart_malloc(size * sizeof(String));
230 int len = 0;
231 int lenDelim = delim.size();
232 int i = 0;
233 for (ArrayIter iter(items); iter; ++iter) {
234 new (&sitems[i]) String(iter.second().toString());
235 len += sitems[i].size() + lenDelim;
236 i++;
238 len -= lenDelim; // always one delimiter less than count of items
239 assert(i == size);
241 String s = String(len, ReserveString);
242 char *buffer = s.mutableSlice().ptr;
243 const char *sdelim = delim.data();
244 char *p = buffer;
245 for (int i = 0; i < size; i++) {
246 String &item = sitems[i];
247 if (i && lenDelim) {
248 memcpy(p, sdelim, lenDelim);
249 p += lenDelim;
251 int lenItem = item.size();
252 if (lenItem) {
253 memcpy(p, item.data(), lenItem);
254 p += lenItem;
256 sitems[i].~String();
258 smart_free(sitems);
259 assert(p - buffer == len);
260 return s.setSize(len);
263 Variant StringUtil::Split(CStrRef str, int split_length /* = 1 */) {
264 if (split_length <= 0) {
265 throw_invalid_argument(
266 "The length of each segment must be greater than zero"
268 return false;
271 Array ret;
272 int len = str.size();
273 if (split_length >= len) {
274 ret.append(str);
275 } else {
276 for (int i = 0; i < len; i += split_length) {
277 ret.append(str.substr(i, split_length));
280 return ret;
283 Variant StringUtil::ChunkSplit(CStrRef body, int chunklen /* = 76 */,
284 CStrRef end /* = "\r\n" */) {
285 if (chunklen <= 0) {
286 throw_invalid_argument("chunklen: (non-positive)");
287 return false;
290 String ret;
291 int len = body.size();
292 if (chunklen >= len) {
293 ret = body;
294 ret += end;
295 } else {
296 char *chunked = string_chunk_split(body.data(), len, end.c_str(),
297 end.size(), chunklen);
298 return String(chunked, len, AttachString);
300 return ret;
303 ///////////////////////////////////////////////////////////////////////////////
304 // encoding/decoding
306 String StringUtil::CEncode(CStrRef input, CStrRef charlist) {
307 String chars = charlist;
308 if (chars.isNull()) {
309 chars = String("\\\x00\x01..\x1f\x7f..\xff", 10, CopyString);
311 if (input.empty() || chars.empty()) return input;
312 int len = input.size();
313 char *ret = string_addcslashes(input.c_str(), len, chars.data(),
314 chars.size());
315 return String(ret, len, AttachString);
318 String StringUtil::CDecode(CStrRef input) {
319 if (input.empty()) return input;
320 int len = input.size();
321 char *ret = string_stripcslashes(input.c_str(), len);
322 return String(ret, len, AttachString);
325 String StringUtil::SqlEncode(CStrRef input) {
326 if (input.empty()) return input;
327 int len = input.size();
328 char *ret = string_addslashes(input.c_str(), len);
329 return String(ret, len, AttachString);
332 String StringUtil::SqlDecode(CStrRef input) {
333 if (input.empty()) return input;
334 int len = input.size();
335 char *ret = string_stripslashes(input.c_str(), len);
336 return String(ret, len, AttachString);
339 String StringUtil::RegExEncode(CStrRef input) {
340 if (input.empty()) return input;
341 int len = input.size();
342 char *ret = string_quotemeta(input.c_str(), len);
343 return String(ret, len, AttachString);
346 String StringUtil::HtmlEncode(CStrRef input, QuoteStyle quoteStyle,
347 const char *charset, bool nbsp) {
348 if (input.empty()) return input;
350 assert(charset);
351 bool utf8 = true;
352 if (strcasecmp(charset, "ISO-8859-1") == 0) {
353 utf8 = false;
354 } else if (strcasecmp(charset, "UTF-8")) {
355 throw NotImplementedException(charset);
358 int len = input.size();
359 char *ret = string_html_encode(input.data(), len,
360 quoteStyle != QuoteStyle::No,
361 quoteStyle == QuoteStyle::Both,
362 utf8, nbsp);
363 if (!ret) {
364 raise_error("HtmlEncode called on too large input (%d)", len);
366 return String(ret, len, AttachString);
369 #define A1(v, ch) ((v)|((ch) & 64 ? 0 : 1uLL<<((ch)&63)))
370 #define A2(v, ch) ((v)|((ch) & 64 ? 1uLL<<((ch)&63) : 0))
372 static const AsciiMap mapNoQuotes = {
373 { A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'),
374 A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@') }
377 static const AsciiMap mapDoubleQuotes = {
378 { A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'),
379 A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"') }
382 static const AsciiMap mapBothQuotes = {
383 { A1(A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\''),
384 A2(A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\'') }
387 static const AsciiMap mapNothing = {};
389 String StringUtil::HtmlEncodeExtra(CStrRef input, QuoteStyle quoteStyle,
390 const char *charset, bool nbsp,
391 Array extra) {
392 if (input.empty()) return input;
394 assert(charset);
395 int flags = STRING_HTML_ENCODE_UTF8;
396 if (nbsp) {
397 flags |= STRING_HTML_ENCODE_NBSP;
399 if (RuntimeOption::Utf8izeReplace) {
400 flags |= STRING_HTML_ENCODE_UTF8IZE_REPLACE;
402 if (!*charset || strcasecmp(charset, "UTF-8") == 0) {
403 } else if (strcasecmp(charset, "ISO-8859-1") == 0) {
404 flags &= ~STRING_HTML_ENCODE_UTF8;
405 } else {
406 throw NotImplementedException(charset);
409 const AsciiMap *am;
410 AsciiMap tmp;
412 switch (quoteStyle) {
413 case QuoteStyle::FBUtf8Only:
414 am = &mapNothing;
415 flags |= STRING_HTML_ENCODE_HIGH;
416 break;
417 case QuoteStyle::FBUtf8:
418 am = &mapBothQuotes;
419 flags |= STRING_HTML_ENCODE_HIGH;
420 break;
421 case QuoteStyle::Both:
422 am = &mapBothQuotes;
423 break;
424 case QuoteStyle::Double:
425 am = &mapDoubleQuotes;
426 break;
427 case QuoteStyle::No:
428 am = &mapNoQuotes;
429 break;
430 default:
431 am = &mapNothing;
432 raise_error("Unknown quote style: %d", (int)quoteStyle);
435 if (quoteStyle != QuoteStyle::FBUtf8Only && extra.toBoolean()) {
436 tmp = *am;
437 am = &tmp;
438 for (ArrayIter iter(extra); iter; ++iter) {
439 String item = iter.second().toString();
440 char c = item.data()[0];
441 tmp.map[c & 64 ? 1 : 0] |= 1uLL << (c & 63);
445 int len = input.size();
446 char *ret = string_html_encode_extra(input.data(), len,
447 (StringHtmlEncoding)flags, am);
448 if (!ret) {
449 raise_error("HtmlEncode called on too large input (%d)", len);
451 return String(ret, len, AttachString);
454 String StringUtil::HtmlDecode(CStrRef input, QuoteStyle quoteStyle,
455 const char *charset, bool all) {
456 if (input.empty()) return input;
458 assert(charset);
460 int len = input.size();
461 char *ret = string_html_decode(input.data(), len,
462 quoteStyle != QuoteStyle::No,
463 quoteStyle == QuoteStyle::Both,
464 charset, all);
465 if (!ret) {
466 // null iff charset was not recognized
467 throw NotImplementedException(charset);
468 // (charset is not null, see assertion above)
471 return String(ret, len, AttachString);
474 String StringUtil::QuotedPrintableEncode(CStrRef input) {
475 if (input.empty()) return input;
476 int len = input.size();
477 char *ret = string_quoted_printable_encode(input.data(), len);
478 return String(ret, len, AttachString);
481 String StringUtil::QuotedPrintableDecode(CStrRef input) {
482 if (input.empty()) return input;
483 int len = input.size();
484 char *ret = string_quoted_printable_decode(input.data(), len, false);
485 return String(ret, len, AttachString);
488 String StringUtil::HexEncode(CStrRef input) {
489 if (input.empty()) return input;
490 int len = input.size();
491 char *ret = string_bin2hex(input.data(), len);
492 return String(ret, len, AttachString);
495 String StringUtil::HexDecode(CStrRef input) {
496 if (input.empty()) return input;
497 int len = input.size();
498 char *ret = string_hex2bin(input.data(), len);
499 return String(ret, len, AttachString);
502 String StringUtil::UUEncode(CStrRef input) {
503 if (input.empty()) return input;
505 int len;
506 char *encoded = string_uuencode(input.data(), input.size(), len);
507 return String(encoded, len, AttachString);
510 String StringUtil::UUDecode(CStrRef input) {
511 if (!input.empty()) {
512 int len;
513 char *decoded = string_uudecode(input.data(), input.size(), len);
514 if (decoded) {
515 return String(decoded, len, AttachString);
518 return String();
521 String StringUtil::Base64Encode(CStrRef input) {
522 int len = input.size();
523 char *ret = string_base64_encode(input.data(), len);
524 return String(ret, len, AttachString);
527 String StringUtil::Base64Decode(CStrRef input, bool strict /* = false */) {
528 int len = input.size();
529 char *ret = string_base64_decode(input.data(), len, strict);
530 return String(ret, len, AttachString);
533 String StringUtil::UrlEncode(CStrRef input, bool encodePlus /* = true */) {
534 int len = input.size();
535 char *ret;
536 if (encodePlus) {
537 ret = url_encode(input.data(), len);
538 } else {
539 ret = url_raw_encode(input.data(), len);
541 return String(ret, len, AttachString);
544 String StringUtil::UrlDecode(CStrRef input, bool decodePlus /* = true */) {
545 int len = input.size();
546 char *ret;
547 if (decodePlus) {
548 ret = url_decode(input.data(), len);
549 } else {
550 ret = url_raw_decode(input.data(), len);
552 return String(ret, len, AttachString);
555 ///////////////////////////////////////////////////////////////////////////////
556 // formatting
558 String StringUtil::MoneyFormat(const char *format, double value) {
559 assert(format);
560 char *formatted = string_money_format(format, value);
561 return formatted ? String(formatted, AttachString) : String();
564 ///////////////////////////////////////////////////////////////////////////////
565 // hashing
567 String StringUtil::Translate(CStrRef input, CStrRef from, CStrRef to) {
568 if (input.empty()) return input;
570 int len = input.size();
571 String retstr(len, ReserveString);
572 char *ret = retstr.mutableSlice().ptr;
573 memcpy(ret, input.data(), len);
574 auto trlen = std::min(from.size(), to.size());
575 string_translate(ret, len, from.data(), to.data(), trlen);
576 return retstr.setSize(len);
579 String StringUtil::ROT13(CStrRef input) {
580 if (input.empty()) return input;
581 return String(string_rot13(input.data(), input.size()),
582 input.size(), AttachString);
585 int64_t StringUtil::CRC32(CStrRef input) {
586 return string_crc32(input.data(), input.size());
589 String StringUtil::Crypt(CStrRef input, const char *salt /* = "" */) {
590 return String(string_crypt(input.c_str(), salt), AttachString);
593 String StringUtil::MD5(CStrRef input, bool raw /* = false */) {
594 int len;
595 char *ret = string_md5(input.data(), input.size(), raw, len);
596 return String(ret, len, AttachString);
599 String StringUtil::SHA1(CStrRef input, bool raw /* = false */) {
600 int len;
601 char *ret = string_sha1(input.data(), input.size(), raw, len);
602 return String(ret, len, AttachString);
605 ///////////////////////////////////////////////////////////////////////////////