2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include <test/test_ext_mb.h>
18 #include <runtime/ext/ext_mb.h>
19 #include <runtime/ext/ext_string.h>
20 #include <runtime/ext/ext_array.h>
22 ///////////////////////////////////////////////////////////////////////////////
24 bool TestExtMb::RunTests(const std::string
&which
) {
26 f_mb_internal_encoding("UTF-8");
28 RUN_TEST(test_mb_list_encodings
);
29 RUN_TEST(test_mb_list_encodings_alias_names
);
30 RUN_TEST(test_mb_list_mime_names
);
31 RUN_TEST(test_mb_check_encoding
);
32 RUN_TEST(test_mb_convert_case
);
33 RUN_TEST(test_mb_convert_encoding
);
34 RUN_TEST(test_mb_convert_kana
);
35 RUN_TEST(test_mb_convert_variables
);
36 RUN_TEST(test_mb_decode_mimeheader
);
37 RUN_TEST(test_mb_decode_numericentity
);
38 RUN_TEST(test_mb_detect_encoding
);
39 RUN_TEST(test_mb_detect_order
);
40 RUN_TEST(test_mb_encode_mimeheader
);
41 RUN_TEST(test_mb_encode_numericentity
);
42 RUN_TEST(test_mb_ereg_match
);
43 RUN_TEST(test_mb_ereg_replace
);
44 RUN_TEST(test_mb_ereg_search_getpos
);
45 RUN_TEST(test_mb_ereg_search_getregs
);
46 RUN_TEST(test_mb_ereg_search_init
);
47 RUN_TEST(test_mb_ereg_search_pos
);
48 RUN_TEST(test_mb_ereg_search_regs
);
49 RUN_TEST(test_mb_ereg_search_setpos
);
50 RUN_TEST(test_mb_ereg_search
);
51 RUN_TEST(test_mb_ereg
);
52 RUN_TEST(test_mb_eregi_replace
);
53 RUN_TEST(test_mb_eregi
);
54 RUN_TEST(test_mb_get_info
);
55 RUN_TEST(test_mb_http_input
);
56 RUN_TEST(test_mb_http_output
);
57 RUN_TEST(test_mb_internal_encoding
);
58 RUN_TEST(test_mb_language
);
59 RUN_TEST(test_mb_output_handler
);
60 RUN_TEST(test_mb_parse_str
);
61 RUN_TEST(test_mb_preferred_mime_name
);
62 RUN_TEST(test_mb_regex_encoding
);
63 RUN_TEST(test_mb_regex_set_options
);
64 RUN_TEST(test_mb_send_mail
);
65 RUN_TEST(test_mb_split
);
66 RUN_TEST(test_mb_strcut
);
67 RUN_TEST(test_mb_strimwidth
);
68 RUN_TEST(test_mb_stripos
);
69 RUN_TEST(test_mb_stristr
);
70 RUN_TEST(test_mb_strlen
);
71 RUN_TEST(test_mb_strpos
);
72 RUN_TEST(test_mb_strrchr
);
73 RUN_TEST(test_mb_strrichr
);
74 RUN_TEST(test_mb_strripos
);
75 RUN_TEST(test_mb_strrpos
);
76 RUN_TEST(test_mb_strstr
);
77 RUN_TEST(test_mb_strtolower
);
78 RUN_TEST(test_mb_strtoupper
);
79 RUN_TEST(test_mb_strwidth
);
80 RUN_TEST(test_mb_substitute_character
);
81 RUN_TEST(test_mb_substr_count
);
82 RUN_TEST(test_mb_substr
);
87 ///////////////////////////////////////////////////////////////////////////////
89 bool TestExtMb::test_mb_list_encodings() {
90 VERIFY(!same(f_array_search("UTF-8", f_mb_list_encodings()), false));
94 static const StaticString
95 s_Quoted_Printable("Quoted-Printable"),
96 s_UUENCODE("UUENCODE");
98 bool TestExtMb::test_mb_list_encodings_alias_names() {
99 VS(f_mb_list_encodings_alias_names()[s_Quoted_Printable
],
100 CREATE_VECTOR1("qprint"));
104 bool TestExtMb::test_mb_list_mime_names() {
105 VS(f_mb_list_mime_names()[s_UUENCODE
], "x-uuencode");
109 bool TestExtMb::test_mb_check_encoding() {
110 VERIFY(f_mb_check_encoding("Pr\xC3\x9C\xC3\x9D""fung", "UTF-8"));
114 bool TestExtMb::test_mb_convert_case() {
115 String str
= "mary had a Little lamb and she loved it so";
116 str
= f_mb_convert_case(str
, k_MB_CASE_UPPER
, "UTF-8");
117 VS(str
, "MARY HAD A LITTLE LAMB AND SHE LOVED IT SO");
118 str
= f_mb_convert_case(str
, k_MB_CASE_TITLE
, "UTF-8");
119 VS(str
, "Mary Had A Little Lamb And She Loved It So");
123 bool TestExtMb::test_mb_convert_encoding() {
124 String str
= "Pr\xC3\x9C""fung";
126 VS(f_mb_convert_encoding(str
, "ISO-8859-1", "UTF-8"), "Pr\xDC""fung");
127 VS(f_mb_convert_encoding(str
, "ISO-8859-1", "UTF-8, JIS"), "Pr\xDC""fung");
128 VS(f_mb_convert_encoding(str
, "ISO-8859-1", "auto"), "Pr\xDC""fung");
133 bool TestExtMb::test_mb_convert_kana() {
134 VS(f_mb_convert_kana("foo"), "foo");
138 bool TestExtMb::test_mb_convert_variables() {
139 Variant str
= "Pr\xC3\x9C""fung";
140 Variant str1
= "Pr\xC3\x9C""fung";
141 Variant str2
= "Pr\xC3\x9C""fung";
142 Variant inputenc
= f_mb_convert_variables(5, "ISO-8859-1", "UTF-8", ref(str
),
143 CREATE_VECTOR2(ref(str1
),
145 VS(str
, "Pr\xDC""fung");
146 VS(str1
, "Pr\xDC""fung");
147 VS(str2
, "Pr\xDC""fung");
151 bool TestExtMb::test_mb_decode_mimeheader() {
152 f_mb_internal_encoding("ISO-8859-1");
153 VS(f_mb_decode_mimeheader("Subject: =?UTF-8?B?UHLDnGZ1bmcgUHLDnGZ1bmc=?=\n"),
154 "Subject: Pr\xDC""fung Pr\xDC""fung");
155 f_mb_internal_encoding("UTF-8");
159 bool TestExtMb::test_mb_decode_numericentity() {
160 Array convmap
= CREATE_VECTOR4(0x0, 0x2FFFF, 0, 0xFFFF);
161 VS(f_mb_decode_numericentity("’ἀâ", convmap
, "UTF-8"),
162 "\xe2\x80\x99\xe1\xbc\x80\xc3\xa2");
166 bool TestExtMb::test_mb_detect_encoding() {
167 String str
= "Pr\xC3\x9C\xC3\x9D""fung";
169 /* Detect character encoding with current detect_order */
170 VS(f_mb_detect_encoding(str
), "UTF-8");
172 /* "auto" is expanded to "ASCII,JIS,UTF-8,EUC-JP,SJIS" */
173 VS(f_mb_detect_encoding(str
, "auto"), "UTF-8");
175 /* Specify encoding_list character encoding by comma separated list */
176 VS(f_mb_detect_encoding(str
, "JIS, eucjp-win, sjis-win"), "SJIS-win");
178 /* Use array to specify encoding_list */
179 Array ary
= CREATE_VECTOR3("ASCII", "JIS", "EUC-JP");
180 VS(f_mb_detect_encoding(str
, ary
), "EUC-JP");
185 bool TestExtMb::test_mb_detect_order() {
186 String str
= "Pr\xC3\x9C\xC3\x9D""fung";
188 /* Set detection order by enumerated list */
190 f_mb_detect_order("eucjp-win,sjis-win,UTF-8");
191 VS(f_mb_detect_encoding(str
), "SJIS-win");
192 f_mb_detect_order("eucjp-win,UTF-8,sjis-win");
193 VS(f_mb_detect_encoding(str
), "UTF-8");
196 /* Set detection order by array */
198 f_mb_detect_order(CREATE_VECTOR3("eucjp-win", "sjis-win", "UTF-8"));
199 VS(f_mb_detect_encoding(str
), "SJIS-win");
200 f_mb_detect_order(CREATE_VECTOR3("eucjp-win", "UTF-8", "sjis-win"));
201 VS(f_mb_detect_encoding(str
), "UTF-8");
204 /* Display current detection order */
205 VS(f_implode(", ", f_mb_detect_order()), "eucJP-win, UTF-8, SJIS-win");
210 bool TestExtMb::test_mb_encode_mimeheader() {
211 f_mb_internal_encoding("ISO-8859-1");
212 VS(f_mb_encode_mimeheader("Subject: Pr\xDC""fung Pr\xDC""fung",
214 "Subject: =?UTF-8?B?UHLDnGZ1bmcgUHLDnGZ1bmc=?=");
215 f_mb_internal_encoding("UTF-8");
219 bool TestExtMb::test_mb_encode_numericentity() {
220 Array convmap
= CREATE_VECTOR4(0x0, 0x2FFFF, 0, 0xFFFF);
221 VS(f_mb_encode_numericentity("\xe2\x80\x99\xe1\xbc\x80\xc3\xa2",
223 "’ἀâ");
227 bool TestExtMb::test_mb_ereg_match() {
228 VERIFY(!f_mb_ereg_match("a", "some apples"));
229 VERIFY(f_mb_ereg_match("a", "a kiwi"));
230 VERIFY(f_mb_ereg_match(".*a", "some apples"));
234 bool TestExtMb::test_mb_ereg_replace() {
236 String str
= "This is a test";
237 VS(f_str_replace(" is", " was", str
), "This was a test");
238 VS(f_mb_ereg_replace("( )is", "\\1was", str
), "This was a test");
239 VS(f_mb_ereg_replace("(( )is)", "\\2was", str
), "This was a test");
243 String str
= "This string has four words.";
244 str
= f_mb_ereg_replace("four", num
, str
);
245 VS(str
, "This string has 4 words.");
248 String test
= "http://test.com/test";
249 test
= f_mb_ereg_replace("[[:alpha:]]+://[^<>[:space:]]+[[:alnum:]/]",
250 "<a href=\"\\0\">\\0</a>", test
);
251 VS(test
, "<a href=\"http://test.com/test\">http://test.com/test</a>");
256 bool TestExtMb::test_mb_ereg_search_getpos() {
257 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
259 f_mb_regex_encoding("UTF-8");
260 f_mb_ereg_search_init(str
, reg
);
261 Variant r
= f_mb_ereg_search();
262 r
= f_mb_ereg_search_getregs(); // get first result
263 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
264 VS(f_mb_ereg_search_getpos(), 10);
268 bool TestExtMb::test_mb_ereg_search_getregs() {
269 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
271 f_mb_regex_encoding("UTF-8");
272 f_mb_ereg_search_init(str
, reg
);
273 Variant r
= f_mb_ereg_search();
274 r
= f_mb_ereg_search_getregs(); // get first result
275 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
279 bool TestExtMb::test_mb_ereg_search_init() {
280 VERIFY(f_mb_ereg_search_init("abcdefabcdabc", "abc"));
284 bool TestExtMb::test_mb_ereg_search_pos() {
285 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
287 f_mb_regex_encoding("UTF-8");
288 f_mb_ereg_search_init(str
, reg
);
289 Variant r
= f_mb_ereg_search();
290 r
= f_mb_ereg_search_getregs(); // get first result
291 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
292 VS(f_mb_ereg_search_pos(), CREATE_VECTOR2(11, 3));
296 bool TestExtMb::test_mb_ereg_search_regs() {
297 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
299 f_mb_regex_encoding("UTF-8");
300 f_mb_ereg_search_init(str
, reg
);
301 Variant r
= f_mb_ereg_search();
302 r
= f_mb_ereg_search_getregs(); // get first result
303 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
304 r
= f_mb_ereg_search_regs(); // get next result
305 VS(r
, CREATE_VECTOR1("abc"));
309 bool TestExtMb::test_mb_ereg_search_setpos() {
310 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
312 f_mb_regex_encoding("UTF-8");
313 f_mb_ereg_search_init(str
, reg
);
314 Variant r
= f_mb_ereg_search();
315 r
= f_mb_ereg_search_getregs(); // get first result
316 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
317 VERIFY(f_mb_ereg_search_setpos(15));
318 r
= f_mb_ereg_search_regs(); // get next result
319 VS(r
, CREATE_VECTOR1("p\xC3\x9C"));
323 bool TestExtMb::test_mb_ereg_search() {
324 String str
= "Pr\xC3\x9C\xC3\x9D""fung abc p\xC3\x9C";
326 f_mb_regex_encoding("UTF-8");
327 f_mb_ereg_search_init(str
, reg
);
328 Variant r
= f_mb_ereg_search();
329 r
= f_mb_ereg_search_getregs(); // get first result
330 VS(r
, CREATE_VECTOR1("Pr\xC3\x9C\xC3\x9D""fung"));
334 bool TestExtMb::test_mb_ereg() {
336 String date
= "1973-04-30";
337 VERIFY(f_mb_ereg("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", date
, ref(regs
)));
341 VS(regs
[0], "1973-04-30");
345 bool TestExtMb::test_mb_eregi_replace() {
346 String pattern
= "(>[^<]*)(suffix)";
347 String replacement
= "\\1<span class=\"search\">\\2</span>";
348 String body
= ">whateversuffix";
349 body
= f_mb_eregi_replace(pattern
, replacement
, body
);
350 VS(body
, ">whatever<span class=\"search\">suffix</span>");
354 bool TestExtMb::test_mb_eregi() {
356 VERIFY(f_mb_eregi("z", str
));
360 bool TestExtMb::test_mb_get_info() {
361 static const StaticString
s_detect_order("detect_order");
362 VERIFY(!f_mb_get_info()[s_detect_order
].toArray().empty());
366 bool TestExtMb::test_mb_http_input() {
367 // TODO: test this in TestServer
368 VS(f_mb_http_input(), false);
372 bool TestExtMb::test_mb_http_output() {
373 // TODO: test this in TestServer
374 VS(f_mb_http_output(), "pass");
378 bool TestExtMb::test_mb_internal_encoding() {
379 /* Set internal character encoding to UTF-8 */
380 f_mb_internal_encoding("UTF-8");
382 /* Display current internal character encoding */
383 VS(f_mb_internal_encoding(), "UTF-8");
387 bool TestExtMb::test_mb_language() {
388 VS(f_mb_language(), "uni");
392 bool TestExtMb::test_mb_output_handler() {
393 // TODO: test this in TestServer
397 bool TestExtMb::test_mb_parse_str() {
398 static const StaticString
402 f_mb_parse_str("first=value&arr[]=foo+bar&arr[]=baz", ref(output
));
403 VS(output
[s_first
], "value");
404 VS(output
[s_arr
], "baz"); // bug in mb_parse_str not following PHP's
408 bool TestExtMb::test_mb_preferred_mime_name() {
409 VS(f_mb_preferred_mime_name("sjis-win"), "Shift_JIS");
413 bool TestExtMb::test_mb_regex_encoding() {
414 VERIFY(f_mb_regex_encoding("UTF-8"));
415 VS(f_mb_regex_encoding(), "UTF-8");
419 bool TestExtMb::test_mb_regex_set_options() {
420 VS(f_mb_regex_set_options(), "pr");
421 VERIFY(f_mb_regex_set_options("pz"));
422 VS(f_mb_regex_set_options(), "pz");
426 bool TestExtMb::test_mb_send_mail() {
427 //VERIFY(f_mb_send_mail("hzhao@facebook.com", __func__, "test");
431 bool TestExtMb::test_mb_split() {
432 String date
= "04/30/1973";
433 Array ret
= f_mb_split("[/.-]", date
);
440 bool TestExtMb::test_mb_strcut() {
441 VS(f_mb_strcut("abcdef", 1), "bcdef");
442 VS(f_mb_strcut("abcdef", 1, 3), "bcd");
443 VS(f_mb_strcut("abcdef", 0, 4), "abcd");
444 VS(f_mb_strcut("abcdef", 0, 8), "abcdef");
445 VS(f_mb_strcut("abcdef", -1, 1), "f");
447 VS(f_mb_strcut("\xC3\x9C""bcdef", 2), "bcdef");
448 VS(f_mb_strcut("\xC3\x9C""bcdef", 2, 3), "bcd");
449 VS(f_mb_strcut("\xC3\x9C""bcdef", 0, 4), "\xC3\x9C""bc");
450 VS(f_mb_strcut("\xC3\x9C""bcdef", 0, 8), "\xC3\x9C""bcdef");
451 VS(f_mb_strcut("\xC3\x9C""bcdef", -1, 1), "f");
456 bool TestExtMb::test_mb_strimwidth() {
457 VS(f_mb_strimwidth("long string", 0, 6, "..>"), "lon..>");
458 VS(f_mb_strimwidth("\xC3\x9C""long string", 0, 6, "..>"), "\xC3\x9C""lo..>");
462 bool TestExtMb::test_mb_stripos() {
463 VS(f_mb_stripos("abcdef abcdef", "A", 1), 7);
464 VS(f_mb_stripos("abcdef\xC3\x9C""abcdef", "A", 1), 7);
468 bool TestExtMb::test_mb_stristr() {
469 VS(f_mb_stristr("Hello World!", "earth"), false);
473 bool TestExtMb::test_mb_strlen() {
474 VS(f_mb_strlen("test"), 4);
475 VS(f_mb_strlen("Pr\xC3\x9C\xC3\x9D""fung"), 8);
479 bool TestExtMb::test_mb_strpos() {
480 VS(f_mb_strpos("abcdef abcdef", "a", 1), 7);
481 VS(f_mb_strpos("abcdef\xC3\x9C""abcdef", "a", 1), 7);
482 VS(f_mb_strpos("abcdef\xC3\x9C""abcdef", "A", 1), false);
486 bool TestExtMb::test_mb_strrchr() {
488 String text
= "Line 1\nLine 2\nLine 3";
489 VS(f_mb_strrchr(text
, "\n"), "\nLine 3");
492 String text
= "Line 1\nLine 2\xC3\x9C""Line 3";
493 VS(f_strrchr(text
, "\x9C"), "\x9C""Line 3");
494 // f_mb_strrchr behaves differently in different versions of
495 // libmbfl (https://github.com/facebook/hiphop-php/issues/68)
496 VERIFY(f_mb_strrchr(text
, "\x9C").same(false) ||
497 f_mb_strrchr(text
, "\x9C").same("Line 3"));
502 bool TestExtMb::test_mb_strrichr() {
504 String text
= "Line 1\nLine 2\nLine 3";
505 VS(f_mb_strrichr(text
, "l"), "Line 3");
510 bool TestExtMb::test_mb_strripos() {
511 VS(f_mb_strripos("abcdef abcdef", "A"), 7);
512 VS(f_mb_strripos("abcdef\xC3\x9C""abcdef", "A"), 7);
516 bool TestExtMb::test_mb_strrpos() {
517 VS(f_mb_strrpos("abcdef abcdef", "a"), 7);
518 VS(f_mb_strrpos("abcdef\xC3\x9C""abcdef", "a"), 7);
522 bool TestExtMb::test_mb_strstr() {
523 String email
= "name@example.com";
524 VS(f_mb_strstr(email
, "@"), "@example.com");
528 bool TestExtMb::test_mb_strtolower() {
529 String str
= "Mary Had A Little Lamb and She LOVED It So";
530 str
= f_mb_strtolower(str
);
531 VS(str
, "mary had a little lamb and she loved it so");
532 VS(f_mb_strtolower("ABC"), "abc");
536 bool TestExtMb::test_mb_strtoupper() {
537 String str
= "Mary Had A Little Lamb and She LOVED It So";
538 str
= f_mb_strtoupper(str
);
539 VS(str
, "MARY HAD A LITTLE LAMB AND SHE LOVED IT SO");
540 VS(f_mb_strtoupper("abc"), "ABC");
544 bool TestExtMb::test_mb_strwidth() {
545 VS(f_mb_strwidth("Pr\xC3\x9C""fung"), 7);
549 bool TestExtMb::test_mb_substitute_character() {
550 /* Set with Unicode U+3013 (GETA MARK) */
551 f_mb_substitute_character(0x3013);
552 VS(f_mb_substitute_character(), 0x3013);
555 f_mb_substitute_character("long");
557 /* Display current setting */
558 VS(f_mb_substitute_character(), "long");
562 bool TestExtMb::test_mb_substr_count() {
563 VS(f_mb_substr_count("This is a test", "is"), 2);
564 String text
= "This is a test";
565 VS(f_mb_substr_count(text
, "is"), 2);
567 // different from substr_count
568 // f_mb_strrchr behaves differently in different versions of
569 // libmbfl (https://github.com/facebook/hiphop-php/issues/68)
570 VERIFY(f_mb_substr_count("gcdgcdgcd", "gcdgcd").same(2) ||
571 f_mb_substr_count("gcdgcdgcd", "gcdgcd").same(1));
575 bool TestExtMb::test_mb_substr() {
576 VS(f_mb_substr("abcdef", 1), "bcdef");
577 VS(f_mb_substr("abcdef", 1, 3), "bcd");
578 VS(f_mb_substr("abcdef", 0, 4), "abcd");
579 VS(f_mb_substr("abcdef", 0, 8), "abcdef");
580 VS(f_mb_substr("abcdef", -1, 1), "f");
582 VS(f_mb_substr("\xC3\x9C""bcdef", 1), "bcdef");
583 VS(f_mb_substr("\xC3\x9C""bcdef", 1, 3), "bcd");
584 VS(f_mb_substr("\xC3\x9C""bcdef", 0, 4), "\xC3\x9C""bcd");
585 VS(f_mb_substr("\xC3\x9C""bcdef", 0, 8), "\xC3\x9C""bcdef");
586 VS(f_mb_substr("\xC3\x9C""bcdef", -1, 1), "f");