classes/kohana/utf8.php

   1 <?php defined('SYSPATH') or die('No direct script access.');
   2 /**
   3  * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
   4  * of files. Provides multi-byte aware replacement string functions.
   5  *
   6  * For UTF-8 support to work correctly, the following requirements must be met:
   7  *
   8  * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
   9  * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10  *   is highly recommended (--enable-unicode-properties)
  11  * - UTF-8 conversion will be much more reliable if the
  12  *   [iconv extension](http://php.net/iconv) is loaded
  13  * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  14  *   but must not be overloading string functions
  15  *
  16  * [!!] This file is licensed differently from the rest of Kohana. As a port of
  17  * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  18  *
  19  * @package    Kohana
  20  * @category   Base
  21  * @author     Kohana Team
  22  * @copyright  (c) 2007-2012 Kohana Team
  23  * @copyright  (c) 2005 Harry Fuecks
  24  * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  25  */
  26 class Kohana_UTF8 {
  27
  28         /**
  29          * @var  boolean  Does the server support UTF-8 natively?
  30          */
  31         public static $server_utf8 = NULL;
  32
  33         /**
  34          * @var  array  List of called methods that have had their required file included.
  35          */
  36         public static $called = array();
  37
  38         /**
  39          * Recursively cleans arrays, objects, and strings. Removes ASCII control
  40          * codes and converts to the requested charset while silently discarding
  41          * incompatible characters.
  42          *
  43          *     UTF8::clean($_GET); // Clean GET data
  44          *
  45          * [!!] This method requires [Iconv](http://php.net/iconv)
  46          *
  47          * @param   mixed   $var        variable to clean
  48          * @param   string  $charset    character set, defaults to Kohana::$charset
  49          * @return  mixed
  50          * @uses    UTF8::strip_ascii_ctrl
  51          * @uses    UTF8::is_ascii
  52          */
  53         public static function clean($var, $charset = NULL)
  54         {
  55                 if ( ! $charset)
  56                 {
  57                         // Use the application character set
  58                         $charset = Kohana::$charset;
  59                 }
  60
  61                 if (is_array($var) OR is_object($var))
  62                 {
  63                         foreach ($var as $key => $val)
  64                         {
  65                                 // Recursion!
  66                                 $var[UTF8::clean($key)] = UTF8::clean($val);
  67                         }
  68                 }
  69                 elseif (is_string($var) AND $var !== '')
  70                 {
  71                         // Remove control characters
  72                         $var = UTF8::strip_ascii_ctrl($var);
  73
  74                         if ( ! UTF8::is_ascii($var))
  75                         {
  76
  77                                 // Set the mb_substitute_character() value into temporary variable
  78                                 $mb_substitute_character = mb_substitute_character();
  79
  80                                 // Disable substituting illigal characters with the default '?' character
  81                                 mb_substitute_character('none');
  82
  83                                 // mb_convert_encoding is expensive, so it is only used when needed
  84                                 $var = mb_convert_encoding($var, $charset, $charset);
  85
  86                                 // Reset mb_substitute_character() value back to the original setting
  87                                 mb_substitute_character($mb_substitute_character);
  88
  89                         }
  90                 }
  91
  92                 return $var;
  93         }
  94
  95         /**
  96          * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  97          * determine when to use native functions or UTF-8 functions.
  98          *
  99          *     $ascii = UTF8::is_ascii($str);
 100          *
 101          * @param   mixed   $str    string or array of strings to check
 102          * @return  boolean
 103          */
 104         public static function is_ascii($str)
 105         {
 106                 if (is_array($str))
 107                 {
 108                         $str = implode($str);
 109                 }
 110
 111                 return ! preg_match('/[^\x00-\x7F]/S', $str);
 112         }
 113
 114         /**
 115          * Strips out device control codes in the ASCII range.
 116          *
 117          *     $str = UTF8::strip_ascii_ctrl($str);
 118          *
 119          * @param   string  $str    string to clean
 120          * @return  string
 121          */
 122         public static function strip_ascii_ctrl($str)
 123         {
 124                 return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
 125         }
 126
 127         /**
 128          * Strips out all non-7bit ASCII bytes.
 129          *
 130          *     $str = UTF8::strip_non_ascii($str);
 131          *
 132          * @param   string  $str    string to clean
 133          * @return  string
 134          */
 135         public static function strip_non_ascii($str)
 136         {
 137                 return preg_replace('/[^\x00-\x7F]+/S', '', $str);
 138         }
 139
 140         /**
 141          * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
 142          *
 143          *     $ascii = UTF8::transliterate_to_ascii($utf8);
 144          *
 145          * @author  Andreas Gohr <andi@splitbrain.org>
 146          * @param   string  $str    string to transliterate
 147          * @param   integer $case   -1 lowercase only, +1 uppercase only, 0 both cases
 148          * @return  string
 149          */
 150         public static function transliterate_to_ascii($str, $case = 0)
 151         {
 152                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 153                 {
 154                         require Kohana::find_file('utf8', __FUNCTION__);
 155
 156                         // Function has been called
 157                         UTF8::$called[__FUNCTION__] = TRUE;
 158                 }
 159
 160                 return _transliterate_to_ascii($str, $case);
 161         }
 162
 163         /**
 164          * Returns the length of the given string. This is a UTF8-aware version
 165          * of [strlen](http://php.net/strlen).
 166          *
 167          *     $length = UTF8::strlen($str);
 168          *
 169          * @param   string  $str    string being measured for length
 170          * @return  integer
 171          * @uses    UTF8::$server_utf8
 172          */
 173         public static function strlen($str)
 174         {
 175                 if (UTF8::$server_utf8)
 176                         return mb_strlen($str, Kohana::$charset);
 177
 178                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 179                 {
 180                         require Kohana::find_file('utf8', __FUNCTION__);
 181
 182                         // Function has been called
 183                         UTF8::$called[__FUNCTION__] = TRUE;
 184                 }
 185
 186                 return _strlen($str);
 187         }
 188
 189         /**
 190          * Finds position of first occurrence of a UTF-8 string. This is a
 191          * UTF8-aware version of [strpos](http://php.net/strpos).
 192          *
 193          *     $position = UTF8::strpos($str, $search);
 194          *
 195          * @author  Harry Fuecks <hfuecks@gmail.com>
 196          * @param   string  $str    haystack
 197          * @param   string  $search needle
 198          * @param   integer $offset offset from which character in haystack to start searching
 199          * @return  integer position of needle
 200          * @return  boolean FALSE if the needle is not found
 201          * @uses    UTF8::$server_utf8
 202          */
 203         public static function strpos($str, $search, $offset = 0)
 204         {
 205                 if (UTF8::$server_utf8)
 206                         return mb_strpos($str, $search, $offset, Kohana::$charset);
 207
 208                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 209                 {
 210                         require Kohana::find_file('utf8', __FUNCTION__);
 211
 212                         // Function has been called
 213                         UTF8::$called[__FUNCTION__] = TRUE;
 214                 }
 215
 216                 return _strpos($str, $search, $offset);
 217         }
 218
 219         /**
 220          * Finds position of last occurrence of a char in a UTF-8 string. This is
 221          * a UTF8-aware version of [strrpos](http://php.net/strrpos).
 222          *
 223          *     $position = UTF8::strrpos($str, $search);
 224          *
 225          * @author  Harry Fuecks <hfuecks@gmail.com>
 226          * @param   string  $str    haystack
 227          * @param   string  $search needle
 228          * @param   integer $offset offset from which character in haystack to start searching
 229          * @return  integer position of needle
 230          * @return  boolean FALSE if the needle is not found
 231          * @uses    UTF8::$server_utf8
 232          */
 233         public static function strrpos($str, $search, $offset = 0)
 234         {
 235                 if (UTF8::$server_utf8)
 236                         return mb_strrpos($str, $search, $offset, Kohana::$charset);
 237
 238                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 239                 {
 240                         require Kohana::find_file('utf8', __FUNCTION__);
 241
 242                         // Function has been called
 243                         UTF8::$called[__FUNCTION__] = TRUE;
 244                 }
 245
 246                 return _strrpos($str, $search, $offset);
 247         }
 248
 249         /**
 250          * Returns part of a UTF-8 string. This is a UTF8-aware version
 251          * of [substr](http://php.net/substr).
 252          *
 253          *     $sub = UTF8::substr($str, $offset);
 254          *
 255          * @author  Chris Smith <chris@jalakai.co.uk>
 256          * @param   string  $str    input string
 257          * @param   integer $offset offset
 258          * @param   integer $length length limit
 259          * @return  string
 260          * @uses    UTF8::$server_utf8
 261          * @uses    Kohana::$charset
 262          */
 263         public static function substr($str, $offset, $length = NULL)
 264         {
 265                 if (UTF8::$server_utf8)
 266                         return ($length === NULL)
 267                                 ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
 268                                 : mb_substr($str, $offset, $length, Kohana::$charset);
 269
 270                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 271                 {
 272                         require Kohana::find_file('utf8', __FUNCTION__);
 273
 274                         // Function has been called
 275                         UTF8::$called[__FUNCTION__] = TRUE;
 276                 }
 277
 278                 return _substr($str, $offset, $length);
 279         }
 280
 281         /**
 282          * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
 283          * version of [substr_replace](http://php.net/substr_replace).
 284          *
 285          *     $str = UTF8::substr_replace($str, $replacement, $offset);
 286          *
 287          * @author  Harry Fuecks <hfuecks@gmail.com>
 288          * @param   string  $str            input string
 289          * @param   string  $replacement    replacement string
 290          * @param   integer $offset         offset
 291          * @return  string
 292          */
 293         public static function substr_replace($str, $replacement, $offset, $length = NULL)
 294         {
 295                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 296                 {
 297                         require Kohana::find_file('utf8', __FUNCTION__);
 298
 299                         // Function has been called
 300                         UTF8::$called[__FUNCTION__] = TRUE;
 301                 }
 302
 303                 return _substr_replace($str, $replacement, $offset, $length);
 304         }
 305
 306         /**
 307          * Makes a UTF-8 string lowercase. This is a UTF8-aware version
 308          * of [strtolower](http://php.net/strtolower).
 309          *
 310          *     $str = UTF8::strtolower($str);
 311          *
 312          * @author  Andreas Gohr <andi@splitbrain.org>
 313          * @param   string  $str    mixed case string
 314          * @return  string
 315          * @uses    UTF8::$server_utf8
 316          */
 317         public static function strtolower($str)
 318         {
 319                 if (UTF8::$server_utf8)
 320                         return mb_strtolower($str, Kohana::$charset);
 321
 322                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 323                 {
 324                         require Kohana::find_file('utf8', __FUNCTION__);
 325
 326                         // Function has been called
 327                         UTF8::$called[__FUNCTION__] = TRUE;
 328                 }
 329
 330                 return _strtolower($str);
 331         }
 332
 333         /**
 334          * Makes a UTF-8 string uppercase. This is a UTF8-aware version
 335          * of [strtoupper](http://php.net/strtoupper).
 336          *
 337          * @author  Andreas Gohr <andi@splitbrain.org>
 338          * @param   string  $str    mixed case string
 339          * @return  string
 340          * @uses    UTF8::$server_utf8
 341          * @uses    Kohana::$charset
 342          */
 343         public static function strtoupper($str)
 344         {
 345                 if (UTF8::$server_utf8)
 346                         return mb_strtoupper($str, Kohana::$charset);
 347
 348                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 349                 {
 350                         require Kohana::find_file('utf8', __FUNCTION__);
 351
 352                         // Function has been called
 353                         UTF8::$called[__FUNCTION__] = TRUE;
 354                 }
 355
 356                 return _strtoupper($str);
 357         }
 358
 359         /**
 360          * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
 361          * version of [ucfirst](http://php.net/ucfirst).
 362          *
 363          *     $str = UTF8::ucfirst($str);
 364          *
 365          * @author  Harry Fuecks <hfuecks@gmail.com>
 366          * @param   string  $str    mixed case string
 367          * @return  string
 368          */
 369         public static function ucfirst($str)
 370         {
 371                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 372                 {
 373                         require Kohana::find_file('utf8', __FUNCTION__);
 374
 375                         // Function has been called
 376                         UTF8::$called[__FUNCTION__] = TRUE;
 377                 }
 378
 379                 return _ucfirst($str);
 380         }
 381
 382         /**
 383          * Makes the first character of every word in a UTF-8 string uppercase.
 384          * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
 385          *
 386          *     $str = UTF8::ucwords($str);
 387          *
 388          * @author  Harry Fuecks <hfuecks@gmail.com>
 389          * @param   string  $str    mixed case string
 390          * @return  string
 391          * @uses    UTF8::$server_utf8
 392          */
 393         public static function ucwords($str)
 394         {
 395                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 396                 {
 397                         require Kohana::find_file('utf8', __FUNCTION__);
 398
 399                         // Function has been called
 400                         UTF8::$called[__FUNCTION__] = TRUE;
 401                 }
 402
 403                 return _ucwords($str);
 404         }
 405
 406         /**
 407          * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
 408          * of [strcasecmp](http://php.net/strcasecmp).
 409          *
 410          *     $compare = UTF8::strcasecmp($str1, $str2);
 411          *
 412          * @author  Harry Fuecks <hfuecks@gmail.com>
 413          * @param   string  $str1   string to compare
 414          * @param   string  $str2   string to compare
 415          * @return  integer less than 0 if str1 is less than str2
 416          * @return  integer greater than 0 if str1 is greater than str2
 417          * @return  integer 0 if they are equal
 418          */
 419         public static function strcasecmp($str1, $str2)
 420         {
 421                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 422                 {
 423                         require Kohana::find_file('utf8', __FUNCTION__);
 424
 425                         // Function has been called
 426                         UTF8::$called[__FUNCTION__] = TRUE;
 427                 }
 428
 429                 return _strcasecmp($str1, $str2);
 430         }
 431
 432         /**
 433          * Returns a string or an array with all occurrences of search in subject
 434          * (ignoring case) and replaced with the given replace value. This is a
 435          * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
 436          *
 437          * [!!] This function is very slow compared to the native version. Avoid
 438          * using it when possible.
 439          *
 440          * @author  Harry Fuecks <hfuecks@gmail.com
 441          * @param   string|array    $search     text to replace
 442          * @param   string|array    $replace    replacement text
 443          * @param   string|array    $str        subject text
 444          * @param   integer         $count      number of matched and replaced needles will be returned via this parameter which is passed by reference
 445          * @return  string  if the input was a string
 446          * @return  array   if the input was an array
 447          */
 448         public static function str_ireplace($search, $replace, $str, & $count = NULL)
 449         {
 450                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 451                 {
 452                         require Kohana::find_file('utf8', __FUNCTION__);
 453
 454                         // Function has been called
 455                         UTF8::$called[__FUNCTION__] = TRUE;
 456                 }
 457
 458                 return _str_ireplace($search, $replace, $str, $count);
 459         }
 460
 461         /**
 462          * Case-insensitive UTF-8 version of strstr. Returns all of input string
 463          * from the first occurrence of needle to the end. This is a UTF8-aware
 464          * version of [stristr](http://php.net/stristr).
 465          *
 466          *     $found = UTF8::stristr($str, $search);
 467          *
 468          * @author Harry Fuecks <hfuecks@gmail.com>
 469          * @param   string  $str    input string
 470          * @param   string  $search needle
 471          * @return  string  matched substring if found
 472          * @return  FALSE   if the substring was not found
 473          */
 474         public static function stristr($str, $search)
 475         {
 476                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 477                 {
 478                         require Kohana::find_file('utf8', __FUNCTION__);
 479
 480                         // Function has been called
 481                         UTF8::$called[__FUNCTION__] = TRUE;
 482                 }
 483
 484                 return _stristr($str, $search);
 485         }
 486
 487         /**
 488          * Finds the length of the initial segment matching mask. This is a
 489          * UTF8-aware version of [strspn](http://php.net/strspn).
 490          *
 491          *     $found = UTF8::strspn($str, $mask);
 492          *
 493          * @author Harry Fuecks <hfuecks@gmail.com>
 494          * @param   string  $str    input string
 495          * @param   string  $mask   mask for search
 496          * @param   integer $offset start position of the string to examine
 497          * @param   integer $length length of the string to examine
 498          * @return  integer length of the initial segment that contains characters in the mask
 499          */
 500         public static function strspn($str, $mask, $offset = NULL, $length = NULL)
 501         {
 502                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 503                 {
 504                         require Kohana::find_file('utf8', __FUNCTION__);
 505
 506                         // Function has been called
 507                         UTF8::$called[__FUNCTION__] = TRUE;
 508                 }
 509
 510                 return _strspn($str, $mask, $offset, $length);
 511         }
 512
 513         /**
 514          * Finds the length of the initial segment not matching mask. This is a
 515          * UTF8-aware version of [strcspn](http://php.net/strcspn).
 516          *
 517          *     $found = UTF8::strcspn($str, $mask);
 518          *
 519          * @author  Harry Fuecks <hfuecks@gmail.com>
 520          * @param   string  $str    input string
 521          * @param   string  $mask   mask for search
 522          * @param   integer $offset start position of the string to examine
 523          * @param   integer $length length of the string to examine
 524          * @return  integer length of the initial segment that contains characters not in the mask
 525          */
 526         public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
 527         {
 528                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 529                 {
 530                         require Kohana::find_file('utf8', __FUNCTION__);
 531
 532                         // Function has been called
 533                         UTF8::$called[__FUNCTION__] = TRUE;
 534                 }
 535
 536                 return _strcspn($str, $mask, $offset, $length);
 537         }
 538
 539         /**
 540          * Pads a UTF-8 string to a certain length with another string. This is a
 541          * UTF8-aware version of [str_pad](http://php.net/str_pad).
 542          *
 543          *     $str = UTF8::str_pad($str, $length);
 544          *
 545          * @author  Harry Fuecks <hfuecks@gmail.com>
 546          * @param   string  $str                input string
 547          * @param   integer $final_str_length   desired string length after padding
 548          * @param   string  $pad_str            string to use as padding
 549          * @param   string  $pad_type           padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
 550          * @return  string
 551          */
 552         public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
 553         {
 554                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 555                 {
 556                         require Kohana::find_file('utf8', __FUNCTION__);
 557
 558                         // Function has been called
 559                         UTF8::$called[__FUNCTION__] = TRUE;
 560                 }
 561
 562                 return _str_pad($str, $final_str_length, $pad_str, $pad_type);
 563         }
 564
 565         /**
 566          * Converts a UTF-8 string to an array. This is a UTF8-aware version of
 567          * [str_split](http://php.net/str_split).
 568          *
 569          *     $array = UTF8::str_split($str);
 570          *
 571          * @author  Harry Fuecks <hfuecks@gmail.com>
 572          * @param   string  $str            input string
 573          * @param   integer $split_length   maximum length of each chunk
 574          * @return  array
 575          */
 576         public static function str_split($str, $split_length = 1)
 577         {
 578                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 579                 {
 580                         require Kohana::find_file('utf8', __FUNCTION__);
 581
 582                         // Function has been called
 583                         UTF8::$called[__FUNCTION__] = TRUE;
 584                 }
 585
 586                 return _str_split($str, $split_length);
 587         }
 588
 589         /**
 590          * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
 591          *
 592          *     $str = UTF8::strrev($str);
 593          *
 594          * @author  Harry Fuecks <hfuecks@gmail.com>
 595          * @param   string  $str    string to be reversed
 596          * @return  string
 597          */
 598         public static function strrev($str)
 599         {
 600                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 601                 {
 602                         require Kohana::find_file('utf8', __FUNCTION__);
 603
 604                         // Function has been called
 605                         UTF8::$called[__FUNCTION__] = TRUE;
 606                 }
 607
 608                 return _strrev($str);
 609         }
 610
 611         /**
 612          * Strips whitespace (or other UTF-8 characters) from the beginning and
 613          * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
 614          *
 615          *     $str = UTF8::trim($str);
 616          *
 617          * @author  Andreas Gohr <andi@splitbrain.org>
 618          * @param   string  $str        input string
 619          * @param   string  $charlist   string of characters to remove
 620          * @return  string
 621          */
 622         public static function trim($str, $charlist = NULL)
 623         {
 624                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 625                 {
 626                         require Kohana::find_file('utf8', __FUNCTION__);
 627
 628                         // Function has been called
 629                         UTF8::$called[__FUNCTION__] = TRUE;
 630                 }
 631
 632                 return _trim($str, $charlist);
 633         }
 634
 635         /**
 636          * Strips whitespace (or other UTF-8 characters) from the beginning of
 637          * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
 638          *
 639          *     $str = UTF8::ltrim($str);
 640          *
 641          * @author  Andreas Gohr <andi@splitbrain.org>
 642          * @param   string  $str        input string
 643          * @param   string  $charlist   string of characters to remove
 644          * @return  string
 645          */
 646         public static function ltrim($str, $charlist = NULL)
 647         {
 648                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 649                 {
 650                         require Kohana::find_file('utf8', __FUNCTION__);
 651
 652                         // Function has been called
 653                         UTF8::$called[__FUNCTION__] = TRUE;
 654                 }
 655
 656                 return _ltrim($str, $charlist);
 657         }
 658
 659         /**
 660          * Strips whitespace (or other UTF-8 characters) from the end of a string.
 661          * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
 662          *
 663          *     $str = UTF8::rtrim($str);
 664          *
 665          * @author  Andreas Gohr <andi@splitbrain.org>
 666          * @param   string  $str        input string
 667          * @param   string  $charlist   string of characters to remove
 668          * @return  string
 669          */
 670         public static function rtrim($str, $charlist = NULL)
 671         {
 672                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 673                 {
 674                         require Kohana::find_file('utf8', __FUNCTION__);
 675
 676                         // Function has been called
 677                         UTF8::$called[__FUNCTION__] = TRUE;
 678                 }
 679
 680                 return _rtrim($str, $charlist);
 681         }
 682
 683         /**
 684          * Returns the unicode ordinal for a character. This is a UTF8-aware
 685          * version of [ord](http://php.net/ord).
 686          *
 687          *     $digit = UTF8::ord($character);
 688          *
 689          * @author  Harry Fuecks <hfuecks@gmail.com>
 690          * @param   string  $chr    UTF-8 encoded character
 691          * @return  integer
 692          */
 693         public static function ord($chr)
 694         {
 695                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 696                 {
 697                         require Kohana::find_file('utf8', __FUNCTION__);
 698
 699                         // Function has been called
 700                         UTF8::$called[__FUNCTION__] = TRUE;
 701                 }
 702
 703                 return _ord($chr);
 704         }
 705
 706         /**
 707          * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
 708          * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
 709          * Occurrences of the BOM are ignored. Surrogates are not allowed.
 710          *
 711          *     $array = UTF8::to_unicode($str);
 712          *
 713          * The Original Code is Mozilla Communicator client code.
 714          * The Initial Developer of the Original Code is Netscape Communications Corporation.
 715          * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
 716          * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
 717          * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
 718          *
 719          * @param   string  $str    UTF-8 encoded string
 720          * @return  array   unicode code points
 721          * @return  FALSE   if the string is invalid
 722          */
 723         public static function to_unicode($str)
 724         {
 725                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 726                 {
 727                         require Kohana::find_file('utf8', __FUNCTION__);
 728
 729                         // Function has been called
 730                         UTF8::$called[__FUNCTION__] = TRUE;
 731                 }
 732
 733                 return _to_unicode($str);
 734         }
 735
 736         /**
 737          * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
 738          * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
 739          * Occurrences of the BOM are ignored. Surrogates are not allowed.
 740          *
 741          *     $str = UTF8::to_unicode($array);
 742          *
 743          * The Original Code is Mozilla Communicator client code.
 744          * The Initial Developer of the Original Code is Netscape Communications Corporation.
 745          * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
 746          * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
 747          * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
 748          *
 749          * @param   array   $str    unicode code points representing a string
 750          * @return  string  utf8 string of characters
 751          * @return  boolean FALSE if a code point cannot be found
 752          */
 753         public static function from_unicode($arr)
 754         {
 755                 if ( ! isset(UTF8::$called[__FUNCTION__]))
 756                 {
 757                         require Kohana::find_file('utf8', __FUNCTION__);
 758
 759                         // Function has been called
 760                         UTF8::$called[__FUNCTION__] = TRUE;
 761                 }
 762
 763                 return _from_unicode($arr);
 764         }
 765
 766 } // End UTF8
 767
 768 if (Kohana_UTF8::$server_utf8 === NULL)
 769 {
 770         // Determine if this server supports UTF-8 natively
 771         Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
 772 }