MDL-81616 upgrade: add the 4.4.0 separation line to all upgrade scripts
[moodle.git] / lib / tests / text_test.php
blob148575e8c63b35707baea861db4d5e45e864f3d6
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * core_text unit tests.
20 * @package core
21 * @category phpunit
22 * @copyright 2012 Petr Skoda {@link http://skodak.org}
23 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
26 defined('MOODLE_INTERNAL') || die();
29 /**
30 * Unit tests for our utf-8 aware text processing.
32 * @package core
33 * @category phpunit
34 * @copyright 2010 Petr Skoda (http://skodak.org)
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36 * @coversDefaultClass \core_text
39 class text_test extends advanced_testcase {
41 /**
42 * Tests the static parse charset method.
44 * @covers ::parse_charset()
46 public function test_parse_charset() {
47 $this->assertSame('windows-1250', core_text::parse_charset('Cp1250'));
48 // Some encoding moodle does not use.
49 $this->assertSame('windows-1252', core_text::parse_charset('ms-ansi'));
52 /**
53 * Tests the static convert method.
55 * @covers ::convert()
57 public function test_convert() {
58 $this->assertSame('', core_text::convert('', 'utf-8', 'utf-8'));
59 $utf8 = "Žluťoučký koníček";
60 $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
61 $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
62 $this->assertSame($iso2, core_text::convert($utf8, 'utf-8', 'iso-8859-2'));
63 $this->assertSame($utf8, core_text::convert($iso2, 'iso-8859-2', 'utf-8'));
64 $this->assertSame($win, core_text::convert($utf8, 'utf-8', 'win-1250'));
65 $this->assertSame($utf8, core_text::convert($win, 'win-1250', 'utf-8'));
66 $this->assertSame($iso2, core_text::convert($win, 'win-1250', 'iso-8859-2'));
67 $this->assertSame($win, core_text::convert($iso2, 'iso-8859-2', 'win-1250'));
68 $this->assertSame($iso2, core_text::convert($iso2, 'iso-8859-2', 'iso-8859-2'));
69 $this->assertSame($win, core_text::convert($win, 'win-1250', 'cp1250'));
70 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
72 $utf8 = '言語設定';
73 $str = pack("H*", "b8c0b8ecc0dfc4ea"); // EUC-JP
74 $this->assertSame($str, core_text::convert($utf8, 'utf-8', 'EUC-JP'));
75 $this->assertSame($utf8, core_text::convert($str, 'EUC-JP', 'utf-8'));
76 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
78 $str = pack("H*", "1b24423840386c405f446a1b2842"); // ISO-2022-JP
79 $this->assertSame($str, core_text::convert($utf8, 'utf-8', 'ISO-2022-JP'));
80 $this->assertSame($utf8, core_text::convert($str, 'ISO-2022-JP', 'utf-8'));
81 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
83 $str = pack("H*", "8cbe8cea90dd92e8"); // SHIFT-JIS
84 $this->assertSame($str, core_text::convert($utf8, 'utf-8', 'SHIFT-JIS'));
85 $this->assertSame($utf8, core_text::convert($str, 'SHIFT-JIS', 'utf-8'));
86 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
88 $utf8 = '简体中文';
89 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB2312
90 $this->assertSame($str, core_text::convert($utf8, 'utf-8', 'GB2312'));
91 $this->assertSame($utf8, core_text::convert($str, 'GB2312', 'utf-8'));
92 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
94 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB18030
95 $this->assertSame($str, core_text::convert($utf8, 'utf-8', 'GB18030'));
96 $this->assertSame($utf8, core_text::convert($str, 'GB18030', 'utf-8'));
97 $this->assertSame($utf8, core_text::convert($utf8, 'utf-8', 'utf-8'));
99 $utf8 = "Žluťoučký koníček";
100 $this->assertSame('Zlutoucky konicek', core_text::convert($utf8, 'utf-8', 'ascii'));
101 $this->assertSame($utf8, core_text::convert($utf8.chr(130), 'utf-8', 'utf-8'));
102 $utf8 = "Der eine stößt den Speer zum Mann";
103 $this->assertSame('Der eine stosst den Speer zum Mann', core_text::convert($utf8, 'utf-8', 'ascii'));
104 $iso1 = core_text::convert($utf8, 'utf-8', 'iso-8859-1');
105 $this->assertSame('Der eine stosst den Speer zum Mann', core_text::convert($iso1, 'iso-8859-1', 'ascii'));
106 $utf8 = "A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. アクセシビリティ. fi";
107 $this->assertSame("A ae Ubermensch pa hoyeste niva! I a lublu PHP! est'. akuseshibiriti. fi",
108 core_text::convert($utf8, 'utf-8', 'ascii'));
110 // Check that null argument is allowed.
111 $this->assertSame('', core_text::convert(null, 'utf-8', 'ascii'));
115 * Tests the static sub string method.
117 * @covers ::substr()
119 public function test_substr() {
120 $str = "Žluťoučký koníček";
121 $this->assertSame($str, core_text::substr($str, 0));
122 $this->assertSame('luťoučký koníček', core_text::substr($str, 1));
123 $this->assertSame('luť', core_text::substr($str, 1, 3));
124 $this->assertSame($str, core_text::substr($str, 0, 100));
125 $this->assertSame('če', core_text::substr($str, -3, 2));
127 $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
128 $this->assertSame(core_text::convert('luť', 'utf-8', 'iso-8859-2'), core_text::substr($iso2, 1, 3, 'iso-8859-2'));
129 $this->assertSame(core_text::convert($str, 'utf-8', 'iso-8859-2'), core_text::substr($iso2, 0, 100, 'iso-8859-2'));
130 $this->assertSame(core_text::convert('če', 'utf-8', 'iso-8859-2'), core_text::substr($iso2, -3, 2, 'iso-8859-2'));
132 $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
133 $this->assertSame(core_text::convert('luť', 'utf-8', 'cp1250'), core_text::substr($win, 1, 3, 'cp1250'));
134 $this->assertSame(core_text::convert($str, 'utf-8', 'cp1250'), core_text::substr($win, 0, 100, 'cp1250'));
135 $this->assertSame(core_text::convert('če', 'utf-8', 'cp1250'), core_text::substr($win, -3, 2, 'cp1250'));
137 $str = pack("H*", "b8c0b8ecc0dfc4ea"); // EUC-JP
138 $s = pack("H*", "b8ec"); // EUC-JP
139 $this->assertSame($s, core_text::substr($str, 1, 1, 'EUC-JP'));
141 $str = pack("H*", "1b24423840386c405f446a1b2842"); // ISO-2022-JP
142 $s = pack("H*", "1b2442386c1b2842"); // ISO-2022-JP
143 $this->assertSame($s, core_text::substr($str, 1, 1, 'ISO-2022-JP'));
145 $str = pack("H*", "8cbe8cea90dd92e8"); // SHIFT-JIS
146 $s = pack("H*", "8cea"); // SHIFT-JIS
147 $this->assertSame($s, core_text::substr($str, 1, 1, 'SHIFT-JIS'));
149 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB2312
150 $s = pack("H*", "cce5"); // GB2312
151 $this->assertSame($s, core_text::substr($str, 1, 1, 'GB2312'));
153 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB18030
154 $s = pack("H*", "cce5"); // GB18030
155 $this->assertSame($s, core_text::substr($str, 1, 1, 'GB18030'));
157 // Check that null argument is allowed.
158 $this->assertSame('', core_text::substr(null, 1, 1));
159 $this->assertSame('', core_text::substr(null, 1));
160 $this->assertSame('', core_text::substr(null, 1, 1, 'cp1250'));
161 $this->assertSame('', core_text::substr(null, 1, null, 'cp1250'));
165 * Tests the static string length method.
167 * @covers ::strlen()
169 public function test_strlen() {
170 $str = "Žluťoučký koníček";
171 $this->assertSame(17, core_text::strlen($str));
173 $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
174 $this->assertSame(17, core_text::strlen($iso2, 'iso-8859-2'));
176 $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
177 $this->assertSame(17, core_text::strlen($win, 'cp1250'));
179 $str = pack("H*", "b8ec"); // EUC-JP
180 $this->assertSame(1, core_text::strlen($str, 'EUC-JP'));
181 $str = pack("H*", "b8c0b8ecc0dfc4ea"); // EUC-JP
182 $this->assertSame(4, core_text::strlen($str, 'EUC-JP'));
184 $str = pack("H*", "1b2442386c1b2842"); // ISO-2022-JP
185 $this->assertSame(1, core_text::strlen($str, 'ISO-2022-JP'));
186 $str = pack("H*", "1b24423840386c405f446a1b2842"); // ISO-2022-JP
187 $this->assertSame(4, core_text::strlen($str, 'ISO-2022-JP'));
189 $str = pack("H*", "8cea"); // SHIFT-JIS
190 $this->assertSame(1, core_text::strlen($str, 'SHIFT-JIS'));
191 $str = pack("H*", "8cbe8cea90dd92e8"); // SHIFT-JIS
192 $this->assertSame(4, core_text::strlen($str, 'SHIFT-JIS'));
194 $str = pack("H*", "cce5"); // GB2312
195 $this->assertSame(1, core_text::strlen($str, 'GB2312'));
196 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB2312
197 $this->assertSame(4, core_text::strlen($str, 'GB2312'));
199 $str = pack("H*", "cce5"); // GB18030
200 $this->assertSame(1, core_text::strlen($str, 'GB18030'));
201 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB18030
202 $this->assertSame(4, core_text::strlen($str, 'GB18030'));
204 // Check that null argument is allowed.
205 $this->assertSame(0, core_text::strlen(null));
206 $this->assertSame(0, core_text::strlen(null, 'cp1250'));
210 * Test unicode safe string truncation.
212 * @covers ::str_max_bytes()
214 public function test_str_max_bytes() {
215 // These are all 3 byte characters, so this is a 12-byte string.
216 $str = '言語設定';
218 $this->assertEquals(12, strlen($str));
220 // Step back, shortening the string 1 byte at a time. Should remove in 1 char chunks.
221 $conv = core_text::str_max_bytes($str, 12);
222 $this->assertEquals(12, strlen($conv));
223 $this->assertSame('言語設定', $conv);
224 $conv = core_text::str_max_bytes($str, 11);
225 $this->assertEquals(9, strlen($conv));
226 $this->assertSame('言語設', $conv);
227 $conv = core_text::str_max_bytes($str, 10);
228 $this->assertEquals(9, strlen($conv));
229 $this->assertSame('言語設', $conv);
230 $conv = core_text::str_max_bytes($str, 9);
231 $this->assertEquals(9, strlen($conv));
232 $this->assertSame('言語設', $conv);
233 $conv = core_text::str_max_bytes($str, 8);
234 $this->assertEquals(6, strlen($conv));
235 $this->assertSame('言語', $conv);
237 // Now try a mixed byte string.
238 $str = '言語設a定';
240 $this->assertEquals(13, strlen($str));
242 $conv = core_text::str_max_bytes($str, 11);
243 $this->assertEquals(10, strlen($conv));
244 $this->assertSame('言語設a', $conv);
245 $conv = core_text::str_max_bytes($str, 10);
246 $this->assertEquals(10, strlen($conv));
247 $this->assertSame('言語設a', $conv);
248 $conv = core_text::str_max_bytes($str, 9);
249 $this->assertEquals(9, strlen($conv));
250 $this->assertSame('言語設', $conv);
251 $conv = core_text::str_max_bytes($str, 8);
252 $this->assertEquals(6, strlen($conv));
253 $this->assertSame('言語', $conv);
255 // Test 0 byte case.
256 $conv = core_text::str_max_bytes($str, 0);
257 $this->assertEquals(0, strlen($conv));
258 $this->assertSame('', $conv);
260 // Check that null argument is allowed.
261 $this->assertSame('', core_text::str_max_bytes(null, 1));
265 * Tests the static strtolower method.
267 * @covers ::strtolower()
269 public function test_strtolower() {
270 $str = "Žluťoučký koníček";
271 $low = 'žluťoučký koníček';
272 $this->assertSame($low, core_text::strtolower($str));
274 $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
275 $this->assertSame(core_text::convert($low, 'utf-8', 'iso-8859-2'), core_text::strtolower($iso2, 'iso-8859-2'));
277 $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
278 $this->assertSame(core_text::convert($low, 'utf-8', 'cp1250'), core_text::strtolower($win, 'cp1250'));
280 $str = '言語設定';
281 $this->assertSame($str, core_text::strtolower($str));
283 $str = '简体中文';
284 $this->assertSame($str, core_text::strtolower($str));
286 $str = pack("H*", "1b24423840386c405f446a1b2842"); // ISO-2022-JP
287 $this->assertSame($str, core_text::strtolower($str, 'ISO-2022-JP'));
289 $str = pack("H*", "8cbe8cea90dd92e8"); // SHIFT-JIS
290 $this->assertSame($str, core_text::strtolower($str, 'SHIFT-JIS'));
292 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB2312
293 $this->assertSame($str, core_text::strtolower($str, 'GB2312'));
295 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB18030
296 $this->assertSame($str, core_text::strtolower($str, 'GB18030'));
298 $str = 1309528800;
299 $this->assertSame((string)$str, core_text::strtolower($str));
301 // Check that null argument is allowed.
302 $this->assertSame('', core_text::strtolower(null));
303 $this->assertSame('', core_text::strtolower(null, 'cp1250'));
307 * Tests the static strtoupper.
309 * @covers ::strtoupper()
311 public function test_strtoupper() {
312 $str = "Žluťoučký koníček";
313 $up = 'ŽLUŤOUČKÝ KONÍČEK';
314 $this->assertSame($up, core_text::strtoupper($str));
316 $iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
317 $this->assertSame(core_text::convert($up, 'utf-8', 'iso-8859-2'), core_text::strtoupper($iso2, 'iso-8859-2'));
319 $win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
320 $this->assertSame(core_text::convert($up, 'utf-8', 'cp1250'), core_text::strtoupper($win, 'cp1250'));
322 $str = '言語設定';
323 $this->assertSame($str, core_text::strtoupper($str));
325 $str = '简体中文';
326 $this->assertSame($str, core_text::strtoupper($str));
328 $str = pack("H*", "1b24423840386c405f446a1b2842"); // ISO-2022-JP
329 $this->assertSame($str, core_text::strtoupper($str, 'ISO-2022-JP'));
331 $str = pack("H*", "8cbe8cea90dd92e8"); // SHIFT-JIS
332 $this->assertSame($str, core_text::strtoupper($str, 'SHIFT-JIS'));
334 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB2312
335 $this->assertSame($str, core_text::strtoupper($str, 'GB2312'));
337 $str = pack("H*", "bcf2cce5d6d0cec4"); // GB18030
338 $this->assertSame($str, core_text::strtoupper($str, 'GB18030'));
340 // Check that null argument is allowed.
341 $this->assertSame('', core_text::strtoupper(null));
342 $this->assertSame('', core_text::strtoupper(null, 'cp1250'));
346 * Test the strrev method.
348 * @covers ::strrev()
350 public function test_strrev() {
351 $strings = array(
352 "Žluťoučký koníček" => "kečínok ýkčuoťulŽ",
353 'ŽLUŤOUČKÝ KONÍČEK' => "KEČÍNOK ÝKČUOŤULŽ",
354 '言語設定' => '定設語言',
355 '简体中文' => '文中体简',
356 "Der eine stößt den Speer zum Mann" => "nnaM muz reepS ned tßöts enie reD"
358 foreach ($strings as $before => $after) {
359 // Make sure we can reverse it both ways and that it comes out the same.
360 $this->assertSame($after, core_text::strrev($before));
361 $this->assertSame($before, core_text::strrev($after));
362 // Reverse it twice to be doubly sure.
363 $this->assertSame($after, core_text::strrev(core_text::strrev($after)));
366 // Check that null argument is allowed.
367 $this->assertSame('', core_text::strrev(null));
371 * Tests the static strpos method.
373 * @covers ::strpos()
375 public function test_strpos() {
376 $str = "Žluťoučký koníček";
377 $this->assertSame(10, core_text::strpos($str, 'koníč'));
379 // Check that null argument is allowed.
380 $this->assertSame(false, core_text::strpos(null, 'a'));
384 * Tests the static strrpos.
386 * @covers ::strrpos()
388 public function test_strrpos() {
389 $str = "Žluťoučký koníček";
390 $this->assertSame(11, core_text::strrpos($str, 'o'));
392 // Check that null argument is allowed.
393 $this->assertSame(false, core_text::strrpos(null, 'o'));
397 * Tests the static specialtoascii method.
399 * @covers ::specialtoascii()
401 public function test_specialtoascii() {
402 $str = "Žluťoučký koníček";
403 $this->assertSame('Zlutoucky konicek', core_text::specialtoascii($str));
405 $utf8 = "Der eine stößt den Speer zum Mann";
406 $iso1 = core_text::convert($utf8, 'utf-8', 'iso-8859-1');
407 $this->assertSame('Der eine stosst den Speer zum Mann', core_text::specialtoascii($utf8));
408 $this->assertSame('Der eine stosst den Speer zum Mann', core_text::specialtoascii($iso1, 'iso-8859-1'));
410 $str = 'àáâãäçèéêëìíîïñòóôõöùúûüýÿÀÁÂÃÄÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝ';
411 $this->assertSame('aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY', core_text::specialtoascii($str));
413 $utf8 = 'A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. fi';
414 $this->assertSame('A ae Ubermensch pa hoyeste niva! I a lublu PHP! est\'. fi', core_text::specialtoascii($utf8, 'utf8'));
416 $utf8 = 'キャンパス Αλφαβητικός Κατάλογος Лорем ипсум долор сит амет';
417 $this->assertSame('kyanpasu Alphabetikos Katalogos Lorem ipsum dolor sit amet', core_text::specialtoascii($utf8));
419 // Check that null argument is allowed.
420 $this->assertSame('', core_text::specialtoascii(null));
421 $this->assertSame('', core_text::specialtoascii(null, 'ascii'));
425 * Tests the static encode_mimeheader method.
427 * @covers ::encode_mimeheader()
428 * @covers \moodle_phpmailer::encodeHeader()
430 public function test_encode_mimeheader() {
431 global $CFG;
432 require_once($CFG->libdir.'/phpmailer/moodle_phpmailer.php');
433 $mailer = new moodle_phpmailer();
435 // Encode short string with non-latin characters.
436 $str = "Žluťoučký koníček";
437 $encodedstr = '=?utf-8?B?xb1sdcWlb3XEjWvDvSBrb27DrcSNZWs=?=';
438 $this->assertSame($encodedstr, core_text::encode_mimeheader($str));
439 $this->assertSame($encodedstr, $mailer->encodeHeader($str));
440 $this->assertSame('"' . $encodedstr . '"', $mailer->encodeHeader($str, 'phrase'));
442 // Encode short string without non-latin characters. Make sure the quotes are escaped in quoted email headers.
443 $latinstr = 'text"with quotes';
444 $this->assertSame($latinstr, core_text::encode_mimeheader($latinstr));
445 $this->assertSame($latinstr, $mailer->encodeHeader($latinstr));
446 $this->assertSame('"text\\"with quotes"', $mailer->encodeHeader($latinstr, 'phrase'));
448 // Encode long string without non-latin characters.
449 $longlatinstr = 'This is a very long text that still should not be split into several lines in the email headers because '.
450 'it does not have any non-latin characters. The "quotes" and \\backslashes should be escaped only if it\'s a part of email address';
451 $this->assertSame($longlatinstr, core_text::encode_mimeheader($longlatinstr));
452 $this->assertSame($longlatinstr, $mailer->encodeHeader($longlatinstr));
453 $longlatinstrwithslash = preg_replace(['/\\\\/', "/\"/"], ['\\\\\\', '\\"'], $longlatinstr);
454 $this->assertSame('"' . $longlatinstrwithslash . '"', $mailer->encodeHeader($longlatinstr, 'phrase'));
456 // Encode long string with non-latin characters.
457 $longstr = "Неопознанная ошибка в файле C:\\tmp\\: \"Не пользуйтесь виндоуз\"";
458 $encodedlongstr = "=?utf-8?B?0J3QtdC+0L/QvtC30L3QsNC90L3QsNGPINC+0YjQuNCx0LrQsCDQsiDRhNCw?=
459 =?utf-8?B?0LnQu9C1IEM6XHRtcFw6ICLQndC1INC/0L7Qu9GM0LfRg9C50YLQtdGB?=
460 =?utf-8?B?0Ywg0LLQuNC90LTQvtGD0Lci?=";
461 $this->assertSame($encodedlongstr, $mailer->encodeHeader($longstr));
462 $this->assertSame('"' . $encodedlongstr . '"', $mailer->encodeHeader($longstr, 'phrase'));
464 // Check that null argument is allowed.
465 $this->assertSame('', core_text::encode_mimeheader(null));
469 * Tests the static entities_to_utf8 method.
471 * @covers ::entities_to_utf8()
473 public function test_entities_to_utf8() {
474 $str = "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
475 $this->assertSame("Žluťoučký koníček©\"&<>§«", core_text::entities_to_utf8($str));
477 // Check that null argument is allowed.
478 $this->assertSame('', core_text::entities_to_utf8(null));
482 * Tests the static utf8_to_entities method.
484 * @covers ::utf8_to_entities()
486 public function test_utf8_to_entities() {
487 $str = "&#x17d;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
488 $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#x10d;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", core_text::utf8_to_entities($str));
489 $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", core_text::utf8_to_entities($str, true));
491 $str = "&#381;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
492 $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek&#xa9;\"&<>&#xa7;&#xab;", core_text::utf8_to_entities($str, false, true));
493 $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek&#169;\"&<>&#167;&#171;", core_text::utf8_to_entities($str, true, true));
495 // Check that null argument is allowed.
496 $this->assertSame('', core_text::utf8_to_entities(null));
497 $this->assertSame('', core_text::utf8_to_entities(null, true));
501 * Tests the static trim_utf8_bom method.
503 * @covers ::trim_utf8_bom()
505 public function test_trim_utf8_bom() {
506 $bom = "\xef\xbb\xbf";
507 $str = "Žluťoučký koníček";
508 $this->assertSame($str.$bom, core_text::trim_utf8_bom($bom.$str.$bom));
510 // Check that null argument is allowed.
511 $this->assertSame(null, core_text::trim_utf8_bom(null));
515 * Tests the static remove_unicode_non_characters method.
517 * @covers ::remove_unicode_non_characters()
519 public function test_remove_unicode_non_characters() {
520 // Confirm that texts which don't contain these characters are unchanged.
521 $this->assertSame('Frogs!', core_text::remove_unicode_non_characters('Frogs!'));
523 // Even if they contain some very scary characters.
524 $example = html_entity_decode('A&#xfffd;&#x1d15f;B', ENT_COMPAT);
525 $this->assertSame($example, core_text::remove_unicode_non_characters($example));
527 // Non-characters are removed wherever they may be, with other characters left.
528 $example = html_entity_decode('&#xfffe;A&#xffff;B&#x8fffe;C&#xfdd0;D&#xfffd;E&#xfdd5;', ENT_COMPAT);
529 $expected = html_entity_decode('ABCD&#xfffd;E', ENT_COMPAT);
530 $this->assertSame($expected, core_text::remove_unicode_non_characters($example));
532 // If you only have a non-character, you get empty string.
533 $example = html_entity_decode('&#xfffe;', ENT_COMPAT);
534 $this->assertSame('', core_text::remove_unicode_non_characters($example));
536 // Check that null argument is allowed.
537 $this->assertSame(null, core_text::trim_utf8_bom(null));
541 * Tests the static get_encodings method.
543 * @covers ::get_encodings()
545 public function test_get_encodings() {
546 $encodings = core_text::get_encodings();
547 $this->assertTrue(is_array($encodings));
548 $this->assertTrue(count($encodings) > 1);
549 $this->assertTrue(isset($encodings['UTF-8']));
553 * Tests the static code2utf8 method.
555 * @covers ::code2utf8()
557 public function test_code2utf8() {
558 $this->assertSame('Ž', core_text::code2utf8(381));
562 * Tests the static utf8ord method.
564 * @covers ::utf8ord()
566 public function test_utf8ord() {
567 $this->assertSame(ord(''), core_text::utf8ord(''));
568 $this->assertSame(ord('f'), core_text::utf8ord('f'));
569 $this->assertSame(0x03B1, core_text::utf8ord('α'));
570 $this->assertSame(0x0439, core_text::utf8ord('й'));
571 $this->assertSame(0x2FA1F, core_text::utf8ord('𯨟'));
572 $this->assertSame(381, core_text::utf8ord('Ž'));
574 // Check that null argument is allowed.
575 $this->assertSame(ord(''), core_text::utf8ord(null));
579 * Tests the static strtotitle method.
581 * @covers ::strtotitle()
583 public function test_strtotitle() {
584 $str = "žluťoučký koníček";
585 $this->assertSame("Žluťoučký Koníček", core_text::strtotitle($str));
587 // Check that null argument is allowed.
588 $this->assertSame(null, core_text::strtotitle(null));
592 * Test strrchr.
594 * @covers ::strrchr()
596 public function test_strrchr() {
597 $str = "Žluťoučký koníček";
598 $this->assertSame('koníček', core_text::strrchr($str, 'koní'));
599 $this->assertSame('Žluťoučký ', core_text::strrchr($str, 'koní', true));
600 $this->assertFalse(core_text::strrchr($str, 'A'));
601 $this->assertFalse(core_text::strrchr($str, 'ç', true));
603 // Check that null argument is allowed.
604 $this->assertSame(false, core_text::strrchr(null, 'o'));
608 * Tests the static checker is_charset_supported
610 * @dataProvider is_charset_supported_provider()
611 * @param string $charset
612 * @param bool $expected
613 * @covers ::is_charset_supported()
615 public function test_is_charset_supported(string $charset, bool $expected) {
616 $charset = core_text::parse_charset($charset);
617 $this->assertEquals($expected, core_text::is_charset_supported($charset));
621 * Provider for the test_is_charset_supported()
622 * @return array[]
624 public function is_charset_supported_provider() {
625 return [
626 "Check unsupported windows charset" => [
627 "cp1250", false
629 "Check supported windows charset" => [
630 "cp1252", true