lib/classes/text.php

   1 <?php
   2 // This file is part of Moodle - http://moodle.org/
   3 //
   4 // Moodle is free software: you can redistribute it and/or modify
   5 // it under the terms of the GNU General Public License as published by
   6 // the Free Software Foundation, either version 3 of the License, or
   7 // (at your option) any later version.
   8 //
   9 // Moodle is distributed in the hope that it will be useful,
  10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 // GNU General Public License for more details.
  13 //
  14 // You should have received a copy of the GNU General Public License
  15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 /**
  18  * Defines string apis
  19  *
  20  * @package    core
  21  * @copyright  (C) 2001-3001 Eloy Lafuente (stronk7) {@link http://contiento.com}
  22  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23  */
  24
  25 defined('MOODLE_INTERNAL') || die();
  26
  27 /**
  28  * defines string api's for manipulating strings
  29  *
  30  * This class is used to manipulate strings under Moodle 1.6 an later. As
  31  * utf-8 text become mandatory a pool of safe functions under this encoding
  32  * become necessary. The name of the methods is exactly the
  33  * same than their PHP originals.
  34  *
  35  * This class was previously based on Typo3 which has now been removed and uses
  36  * native functions now.
  37  *
  38  * @package   core
  39  * @category  string
  40  * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  41  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  42  */
  43 class core_text {
  44     /** @var string Byte order mark for UTF-8 */
  45     const UTF8_BOM = "\xef\xbb\xbf";
  46
  47     /**
  48      * @var string[] Array of strings representing Unicode non-characters
  49      */
  50     protected static $noncharacters;
  51
  52     /**
  53      * Check whether the charset is supported by mbstring.
  54      * @param string $charset Normalised charset
  55      * @return bool
  56      */
  57     public static function is_charset_supported(string $charset): bool {
  58         static $cache = null;
  59         if (!$cache) {
  60             $cache = array_flip(array_map('strtolower', mb_list_encodings()));
  61         }
  62
  63         if (isset($cache[strtolower($charset)])) {
  64             return true;
  65         }
  66
  67         // We haven't found the charset, check if mb has aliases for the charset.
  68         try {
  69             return mb_encoding_aliases($charset) !== false;
  70         } catch (Throwable $e) {
  71             // A ValueError will be thrown if unsupported.
  72         }
  73
  74         return false;
  75     }
  76
  77     /**
  78      * Reset internal textlib caches.
  79      * @static
  80      * @deprecated since Moodle 4.0. See MDL-53544.
  81      * @todo To be removed in Moodle 4.4 - MDL-71748
  82      */
  83     public static function reset_caches() {
  84         debugging("reset_caches() is deprecated. Typo3 has been removed and caches aren't used anymore.", DEBUG_DEVELOPER);
  85     }
  86
  87     /**
  88      * Standardise charset name
  89      *
  90      * Please note it does not mean the returned charset is actually supported.
  91      *
  92      * @static
  93      * @param string $charset raw charset name
  94      * @return string normalised lowercase charset name
  95      */
  96     public static function parse_charset($charset) {
  97         $charset = strtolower($charset ?? '');
  98
  99         if ($charset === 'utf8' or $charset === 'utf-8') {
 100             return 'utf-8';
 101         }
 102
 103         if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) {
 104             return 'windows-'.$matches[2];
 105         }
 106
 107         if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) {
 108             return $charset;
 109         }
 110
 111         if ($charset === 'euc-jp') {
 112             return 'euc-jp';
 113         }
 114         if ($charset === 'iso-2022-jp') {
 115             return 'iso-2022-jp';
 116         }
 117         if ($charset === 'shift-jis' or $charset === 'shift_jis') {
 118             return 'shift_jis';
 119         }
 120         if ($charset === 'gb2312') {
 121             return 'gb2312';
 122         }
 123         if ($charset === 'gb18030') {
 124             return 'gb18030';
 125         }
 126         if ($charset === 'ms-ansi') {
 127             return 'windows-1252';
 128         }
 129
 130         // We have reached this stage and haven't matched with anything. Return the original.
 131         return $charset;
 132     }
 133
 134     /**
 135      * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter.
 136      * If both source and target are utf-8 it tries to fix invalid characters only.
 137      *
 138      * @param string $text
 139      * @param string $fromCS source encoding
 140      * @param string $toCS result encoding
 141      * @return string|bool converted string or false on error
 142      */
 143     public static function convert($text, $fromCS, $toCS='utf-8') {
 144         $fromCS = self::parse_charset($fromCS);
 145         $toCS   = self::parse_charset($toCS);
 146
 147         $text = (string)$text; // we can work only with strings
 148
 149         if ($text === '') {
 150             return '';
 151         }
 152
 153         if ($fromCS === 'utf-8') {
 154             $text = fix_utf8($text);
 155             if ($toCS === 'utf-8') {
 156                 return $text;
 157             }
 158         }
 159
 160         if ($toCS === 'ascii') {
 161             // Try to normalize the conversion a bit if the target is ascii.
 162             return self::specialtoascii($text, $fromCS);
 163         }
 164
 165         // Prevent any error notices, do not use //IGNORE so that we get
 166         // consistent result if iconv fails.
 167         return @iconv($fromCS, $toCS.'//TRANSLIT', $text);
 168     }
 169
 170     /**
 171      * Multibyte safe substr() function, uses mbstring or iconv
 172      *
 173      * @param string $text string to truncate
 174      * @param int $start negative value means from end
 175      * @param int $len maximum length of characters beginning from start
 176      * @param string $charset encoding of the text
 177      * @return string portion of string specified by the $start and $len
 178      */
 179     public static function substr($text, $start, $len=null, $charset='utf-8') {
 180         $charset = self::parse_charset($charset);
 181
 182         // Check whether the charset is supported by mbstring. CP1250 is not supported. Fall back to iconv.
 183         if (self::is_charset_supported($charset)) {
 184             $result = mb_substr($text ?? '', $start, $len, $charset);
 185         } else {
 186             $result = (string)iconv_substr($text ?? '', $start, $len, $charset);
 187         }
 188
 189         return $result;
 190     }
 191
 192     /**
 193      * Truncates a string to no more than a certain number of bytes in a multi-byte safe manner.
 194      * UTF-8 only!
 195      *
 196      * @param string $string String to truncate
 197      * @param int $bytes Maximum length of bytes in the result
 198      * @return string Portion of string specified by $bytes
 199      * @since Moodle 3.1
 200      */
 201     public static function str_max_bytes($string, $bytes) {
 202         return mb_strcut($string ?? '', 0, $bytes, 'UTF-8');
 203     }
 204
 205     /**
 206      * Finds the last occurrence of a character in a string within another.
 207      * UTF-8 ONLY safe mb_strrchr().
 208      *
 209      * @param string $haystack The string from which to get the last occurrence of needle.
 210      * @param string $needle The string to find in haystack.
 211      * @param boolean $part If true, returns the portion before needle, else return the portion after (including needle).
 212      * @return string|false False when not found.
 213      * @since Moodle 2.4.6, 2.5.2, 2.6
 214      */
 215     public static function strrchr($haystack, $needle, $part = false) {
 216         if (is_null($haystack)) {
 217             // Compatibility with behavior in PHP before version 8.1.
 218             return false;
 219         }
 220         return mb_strrchr($haystack, $needle, $part, 'UTF-8');
 221     }
 222
 223     /**
 224      * Multibyte safe strlen() function, uses mbstring or iconv
 225      *
 226      * @param string $text input string
 227      * @param string $charset encoding of the text
 228      * @return int number of characters
 229      */
 230     public static function strlen($text, $charset='utf-8') {
 231         $charset = self::parse_charset($charset);
 232
 233         if (self::is_charset_supported($charset)) {
 234             return mb_strlen($text ?? '', $charset);
 235         }
 236
 237         return iconv_strlen($text ?? '', $charset);
 238     }
 239
 240     /**
 241      * Multibyte safe strtolower() function, uses mbstring.
 242      *
 243      * @param string $text input string
 244      * @param string $charset encoding of the text (may not work for all encodings)
 245      * @return string lower case text
 246      */
 247     public static function strtolower($text, $charset='utf-8') {
 248         $charset = self::parse_charset($charset);
 249
 250         // Confirm mbstring can handle the charset.
 251         if (self::is_charset_supported($charset)) {
 252             return mb_strtolower($text ?? '', $charset);
 253         }
 254
 255         // The mbstring extension cannot handle the charset. Convert to UTF-8.
 256         $convertedtext = self::convert($text, $charset, 'utf-8');
 257         $result = mb_strtolower($convertedtext);
 258         $result = self::convert($result, 'utf-8', $charset);
 259         return $result;
 260     }
 261
 262     /**
 263      * Multibyte safe strtoupper() function, uses mbstring.
 264      *
 265      * @param string $text input string
 266      * @param string $charset encoding of the text (may not work for all encodings)
 267      * @return string upper case text
 268      */
 269     public static function strtoupper($text, $charset='utf-8') {
 270         $charset = self::parse_charset($charset);
 271
 272         // Confirm mbstring can handle the charset.
 273         if (self::is_charset_supported($charset)) {
 274             return mb_strtoupper($text ?? '', $charset);
 275         }
 276
 277         // The mbstring extension cannot handle the charset. Convert to UTF-8.
 278         $convertedtext = self::convert($text, $charset, 'utf-8');
 279         $result = mb_strtoupper($convertedtext);
 280         $result = self::convert($result, 'utf-8', $charset);
 281         return $result;
 282     }
 283
 284     /**
 285      * Find the position of the first occurrence of a substring in a string.
 286      * UTF-8 ONLY safe strpos(), uses mbstring
 287      *
 288      * @param string $haystack the string to search in
 289      * @param string $needle one or more charachters to search for
 290      * @param int $offset offset from begining of string
 291      * @return int the numeric position of the first occurrence of needle in haystack.
 292      */
 293     public static function strpos($haystack, $needle, $offset=0) {
 294         return mb_strpos($haystack ?? '', $needle, $offset, 'UTF-8');
 295     }
 296
 297     /**
 298      * Find the position of the last occurrence of a substring in a string
 299      * UTF-8 ONLY safe strrpos(), uses mbstring
 300      *
 301      * @param string $haystack the string to search in
 302      * @param string $needle one or more charachters to search for
 303      * @return int the numeric position of the last occurrence of needle in haystack
 304      */
 305     public static function strrpos($haystack, $needle) {
 306         if (is_null($haystack)) {
 307             // Compatibility with behavior in PHP before version 8.1.
 308             return false;
 309         }
 310         return mb_strrpos($haystack, $needle, 0, 'UTF-8');
 311     }
 312
 313     /**
 314      * Reverse UTF-8 multibytes character sets (used for RTL languages)
 315      * (We only do this because there is no mb_strrev or iconv_strrev)
 316      *
 317      * @param string $str the multibyte string to reverse
 318      * @return string the reversed multi byte string
 319      */
 320     public static function strrev($str) {
 321         preg_match_all('/./us', $str ?? '', $ar);
 322         return join('', array_reverse($ar[0]));
 323     }
 324
 325     /**
 326      * Try to convert upper unicode characters to plain ascii,
 327      * the returned string may contain unconverted unicode characters.
 328      *
 329      * With the removal of typo3, iconv conversions was found to be the best alternative to Typo3's function.
 330      * However using the standard iconv call
 331      *      iconv($charset, 'ASCII//TRANSLIT//IGNORE', (string) $text);
 332      * resulted in invalid strings with special character from Russian/Japanese. To solve this, the transliterator was
 333      * used but this resulted in empty strings for certain strings in our test. It was decided to use a combo of the 2
 334      * to cover all our bases. Refer MDL-53544 for further information.
 335      *
 336      * @param string $text input string
 337      * @param string $charset encoding of the text
 338      * @return string converted ascii string
 339      */
 340     public static function specialtoascii($text, $charset='utf-8') {
 341         $charset = self::parse_charset($charset);
 342         $oldlevel = error_reporting(E_PARSE);
 343
 344         // Always convert to utf-8, so transliteration can do its work always.
 345         if ($charset !== 'utf-8') {
 346             $text = iconv($charset, 'utf-8'.'//TRANSLIT', $text);
 347         }
 348         $text = transliterator_transliterate('Any-Latin; Latin-ASCII', (string) $text);
 349
 350         // Still, apply iconv because some chars are not handled by transliterate.
 351         $result = iconv('utf-8', 'ASCII//TRANSLIT//IGNORE', (string) $text);
 352
 353         error_reporting($oldlevel);
 354         return $result;
 355     }
 356
 357     /**
 358      * Generate a correct base64 encoded header to be used in MIME mail messages.
 359      * This function seems to be 100% compliant with RFC1342. Credits go to:
 360      * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
 361      *
 362      * @param string $text input string
 363      * @param string $charset encoding of the text
 364      * @return string base64 encoded header
 365      */
 366     public static function encode_mimeheader($text, $charset='utf-8') {
 367         if (empty($text)) {
 368             return (string)$text;
 369         }
 370         // Normalize charset
 371         $charset = self::parse_charset($charset);
 372         // If the text is pure ASCII, we don't need to encode it
 373         if (self::convert($text, $charset, 'ascii') == $text) {
 374             return $text;
 375         }
 376         // Although RFC says that line feed should be \r\n, it seems that
 377         // some mailers double convert \r, so we are going to use \n alone
 378         $linefeed="\n";
 379         // Define start and end of every chunk
 380         $start = "=?$charset?B?";
 381         $end = "?=";
 382         // Accumulate results
 383         $encoded = '';
 384         // Max line length is 75 (including start and end)
 385         $length = 75 - strlen($start) - strlen($end);
 386         // Multi-byte ratio
 387         $multilength = self::strlen($text, $charset);
 388         // Detect if strlen and friends supported
 389         if ($multilength === false) {
 390             if ($charset == 'GB18030' or $charset == 'gb18030') {
 391                 while (strlen($text)) {
 392                     // try to encode first 22 chars - we expect most chars are two bytes long
 393                     if (preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,22}/m', $text, $matches)) {
 394                         $chunk = $matches[0];
 395                         $encchunk = base64_encode($chunk);
 396                         if (strlen($encchunk) > $length) {
 397                             // find first 11 chars - each char in 4 bytes - worst case scenario
 398                             preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,11}/m', $text, $matches);
 399                             $chunk = $matches[0];
 400                             $encchunk = base64_encode($chunk);
 401                         }
 402                         $text = substr($text, strlen($chunk));
 403                         $encoded .= ' '.$start.$encchunk.$end.$linefeed;
 404                     } else {
 405                         break;
 406                     }
 407                 }
 408                 $encoded = trim($encoded);
 409                 return $encoded;
 410             } else {
 411                 return false;
 412             }
 413         }
 414         $ratio = $multilength / strlen($text);
 415         // Base64 ratio
 416         $magic = $avglength = floor(3 * $length * $ratio / 4);
 417         // basic infinite loop protection
 418         $maxiterations = strlen($text)*2;
 419         $iteration = 0;
 420         // Iterate over the string in magic chunks
 421         for ($i=0; $i <= $multilength; $i+=$magic) {
 422             if ($iteration++ > $maxiterations) {
 423                 return false; // probably infinite loop
 424             }
 425             $magic = $avglength;
 426             $offset = 0;
 427             // Ensure the chunk fits in length, reducing magic if necessary
 428             do {
 429                 $magic -= $offset;
 430                 $chunk = self::substr($text, $i, $magic, $charset);
 431                 $chunk = base64_encode($chunk);
 432                 $offset++;
 433             } while (strlen($chunk) > $length);
 434             // This chunk doesn't break any multi-byte char. Use it.
 435             if ($chunk)
 436                 $encoded .= ' '.$start.$chunk.$end.$linefeed;
 437         }
 438         // Strip the first space and the last linefeed
 439         $encoded = substr($encoded, 1, -strlen($linefeed));
 440
 441         return $encoded;
 442     }
 443
 444     /**
 445      * Returns HTML entity transliteration table.
 446      * @return array with (html entity => utf-8) elements
 447      */
 448     protected static function get_entities_table() {
 449         static $trans_tbl = null;
 450
 451         // Generate/create $trans_tbl
 452         if (!isset($trans_tbl)) {
 453             if (version_compare(phpversion(), '5.3.4') < 0) {
 454                 $trans_tbl = array();
 455                 foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
 456                     $trans_tbl[$key] = self::convert($val, 'ISO-8859-1', 'utf-8');
 457                 }
 458
 459             } else if (version_compare(phpversion(), '5.4.0') < 0) {
 460                 $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
 461                 $trans_tbl = array_flip($trans_tbl);
 462
 463             } else {
 464                 $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
 465                 $trans_tbl = array_flip($trans_tbl);
 466             }
 467         }
 468
 469         return $trans_tbl;
 470     }
 471
 472     /**
 473      * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
 474      * Original from laurynas dot butkus at gmail at:
 475      * http://php.net/manual/en/function.html-entity-decode.php#75153
 476      * with some custom mods to provide more functionality
 477      *
 478      * @param string $str input string
 479      * @param boolean $htmlent convert also html entities (defaults to true)
 480      * @return string encoded UTF-8 string
 481      */
 482     public static function entities_to_utf8($str, $htmlent=true) {
 483         static $callback1 = null ;
 484         static $callback2 = null ;
 485
 486         if (!$callback1 or !$callback2) {
 487             $callback1 = function($matches) {
 488                 return core_text::code2utf8(hexdec($matches[1]));
 489             };
 490             $callback2 = function($matches) {
 491                 return core_text::code2utf8($matches[1]);
 492             };
 493         }
 494
 495         $result = (string)$str;
 496         $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
 497         $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
 498
 499         // Replace literal entities (if desired)
 500         if ($htmlent) {
 501             $trans_tbl = self::get_entities_table();
 502             // It should be safe to search for ascii strings and replace them with utf-8 here.
 503             $result = strtr($result, $trans_tbl);
 504         }
 505         // Return utf8-ised string
 506         return $result;
 507     }
 508
 509     /**
 510      * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
 511      *
 512      * @param string $str input string
 513      * @param boolean $dec output decadic only number entities
 514      * @param boolean $nonnum remove all non-numeric entities
 515      * @return string converted string
 516      */
 517     public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
 518         static $callback = null ;
 519
 520         if ($nonnum) {
 521             $str = self::entities_to_utf8($str, true);
 522         }
 523
 524         $result = mb_strtolower(mb_encode_numericentity($str ?? '', [0xa0, 0xffff, 0, 0xffff], 'UTF-8', true));
 525
 526         // We cannot use the decimal equivalent of the above call due to the unit test and our allowance for
 527         // entities to be entered within the provided $str. Refer to the correspond unit test for examples.
 528         if ($dec) {
 529             if (!$callback) {
 530                 $callback = function($matches) {
 531                     return '&#' . hexdec($matches[1]) . ';';
 532                 };
 533             }
 534             $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
 535         }
 536
 537         return $result;
 538     }
 539
 540     /**
 541      * Removes the BOM from unicode string {@link http://unicode.org/faq/utf_bom.html}
 542      *
 543      * @param string $str input string
 544      * @return string
 545      */
 546     public static function trim_utf8_bom($str) {
 547         if (is_null($str)) {
 548             return null;
 549         }
 550         $bom = self::UTF8_BOM;
 551         if (strpos($str, $bom) === 0) {
 552             return substr($str, strlen($bom));
 553         }
 554         return $str;
 555     }
 556
 557     /**
 558      * There are a number of Unicode non-characters including the byte-order mark (which may appear
 559      * multiple times in a string) and also other ranges. These can cause problems for some
 560      * processing.
 561      *
 562      * This function removes the characters using string replace, so that the rest of the string
 563      * remains unchanged.
 564      *
 565      * @param string $value Input string
 566      * @return string Cleaned string value
 567      * @since Moodle 3.5
 568      */
 569     public static function remove_unicode_non_characters($value) {
 570         // Set up list of all Unicode non-characters for fast replacing.
 571         if (!self::$noncharacters) {
 572             self::$noncharacters = [];
 573             // This list of characters is based on the Unicode standard. It includes the last two
 574             // characters of each code planes 0-16 inclusive...
 575             for ($plane = 0; $plane <= 16; $plane++) {
 576                 $base = ($plane === 0 ? '' : dechex($plane));
 577                 self::$noncharacters[] = html_entity_decode('&#x' . $base . 'fffe;');
 578                 self::$noncharacters[] = html_entity_decode('&#x' . $base . 'ffff;');
 579             }
 580             // ...And the character range U+FDD0 to U+FDEF.
 581             for ($char = 0xfdd0; $char <= 0xfdef; $char++) {
 582                 self::$noncharacters[] = html_entity_decode('&#x' . dechex($char) . ';');
 583             }
 584         }
 585
 586         // Do character replacement.
 587         return str_replace(self::$noncharacters, '', $value);
 588     }
 589
 590     /**
 591      * Returns encoding options for select boxes, utf-8 and platform encoding first
 592      *
 593      * @return array encodings
 594      */
 595     public static function get_encodings() {
 596         $encodings = array();
 597         $encodings['UTF-8'] = 'UTF-8';
 598         $winenc = strtoupper(get_string('localewincharset', 'langconfig'));
 599         if ($winenc != '') {
 600             $encodings[$winenc] = $winenc;
 601         }
 602         $nixenc = strtoupper(get_string('oldcharset', 'langconfig'));
 603         $encodings[$nixenc] = $nixenc;
 604
 605         $listedencodings = mb_list_encodings();
 606         foreach ($listedencodings as $enc) {
 607             $enc = strtoupper($enc);
 608             $encodings[$enc] = $enc;
 609         }
 610         return $encodings;
 611     }
 612
 613     /**
 614      * Returns the utf8 string corresponding to the unicode value
 615      * (from php.net, courtesy - romans@void.lv)
 616      *
 617      * @param  int    $num one unicode value
 618      * @return string the UTF-8 char corresponding to the unicode value
 619      */
 620     public static function code2utf8($num) {
 621         if ($num < 128) {
 622             return chr($num);
 623         }
 624         if ($num < 2048) {
 625             return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
 626         }
 627         if ($num < 65536) {
 628             return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 629         }
 630         if ($num < 2097152) {
 631             return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 632         }
 633         return '';
 634     }
 635
 636     /**
 637      * Returns the code of the given UTF-8 character
 638      *
 639      * @param  string $utf8char one UTF-8 character
 640      * @return int    the code of the given character
 641      */
 642     public static function utf8ord($utf8char) {
 643         if ($utf8char == '') {
 644             return 0;
 645         }
 646         $ord0 = ord($utf8char[0]);
 647         if ($ord0 >= 0 && $ord0 <= 127) {
 648             return $ord0;
 649         }
 650         $ord1 = ord($utf8char[1]);
 651         if ($ord0 >= 192 && $ord0 <= 223) {
 652             return ($ord0 - 192) * 64 + ($ord1 - 128);
 653         }
 654         $ord2 = ord($utf8char[2]);
 655         if ($ord0 >= 224 && $ord0 <= 239) {
 656             return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
 657         }
 658         $ord3 = ord($utf8char[3]);
 659         if ($ord0 >= 240 && $ord0 <= 247) {
 660             return ($ord0 - 240) * 262144 + ($ord1 - 128 )* 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
 661         }
 662         return false;
 663     }
 664
 665     /**
 666      * Makes first letter of each word capital - words must be separated by spaces.
 667      * Use with care, this function does not work properly in many locales!!!
 668      *
 669      * @param string $text input string
 670      * @return string
 671      */
 672     public static function strtotitle($text) {
 673         if (empty($text)) {
 674             return $text;
 675         }
 676
 677         return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8');
 678     }
 679 }