vendor/mpdf/mpdf/src/Shaper/Indic.php

   1 <?php
   2
   3 namespace Mpdf\Shaper;
   4
   5 use Mpdf\Ucdn;
   6
   7 class Indic
   8 {
   9         /* FROM hb-ot-shape-complex-indic-private.hh */
  10
  11         // indic_category
  12         const OT_X = 0;
  13         const OT_C = 1;
  14         const OT_V = 2;
  15         const OT_N = 3;
  16         const OT_H = 4;
  17         const OT_ZWNJ = 5;
  18         const OT_ZWJ = 6;
  19         const OT_M = 7; /* Matra or Dependent Vowel */
  20         const OT_SM = 8;
  21         const OT_VD = 9;
  22         const OT_A = 10;
  23         const OT_NBSP = 11;
  24         const OT_DOTTEDCIRCLE = 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */
  25         const OT_RS = 13; /* Register Shifter, used in Khmer OT spec */
  26         const OT_COENG = 14;
  27         const OT_REPHA = 15;
  28
  29         const OT_RA = 16; /* Not explicitly listed in the OT spec, but used in the grammar. */
  30         const OT_CM = 17;
  31
  32         /* Visual positions in a syllable from left to right. */
  33         /* FROM hb-ot-shape-complex-indic-private.hh */
  34
  35         // indic_position
  36         const POS_START = 0;
  37
  38         const POS_RA_TO_BECOME_REPH = 1;
  39         const POS_PRE_M = 2;
  40         const POS_PRE_C = 3;
  41
  42         const POS_BASE_C = 4;
  43         const POS_AFTER_MAIN = 5;
  44
  45         const POS_ABOVE_C = 6;
  46
  47         const POS_BEFORE_SUB = 7;
  48         const POS_BELOW_C = 8;
  49         const POS_AFTER_SUB = 9;
  50
  51         const POS_BEFORE_POST = 10;
  52         const POS_POST_C = 11;
  53         const POS_AFTER_POST = 12;
  54
  55         const POS_FINAL_C = 13;
  56         const POS_SMVD = 14;
  57
  58         const POS_END = 15;
  59
  60         /*
  61          * Basic features.
  62          * These features are applied in order, one at a time, after initial_reordering.
  63          */
  64
  65         /*
  66          * Must be in the same order as the indic_features array. Ones starting with _ are F_GLOBAL
  67          * Ones without the _ are only applied where the mask says!
  68          */
  69
  70         const _NUKT = 0;
  71         const _AKHN = 1;
  72         const RPHF = 2;
  73         const _RKRF = 3;
  74         const PREF = 4;
  75         const BLWF = 5;
  76         const HALF = 6;
  77         const ABVF = 7;
  78         const PSTF = 8;
  79         const CFAR = 9; // Khmer only
  80         const _VATU = 10;
  81         const _CJCT = 11;
  82         const INIT = 12;
  83
  84         // Based on indic_category used to make string to find syllables
  85         // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-indic-private.hh
  86         public static $indic_category_char = [
  87                 'x',
  88                 'C',
  89                 'V',
  90                 'N',
  91                 'H',
  92                 'Z',
  93                 'J',
  94                 'M',
  95                 'S',
  96                 'v',
  97                 'A', /* Spec gives Andutta U+0952 as OT_A. However, testing shows that Uniscribe
  98                  * treats U+0951..U+0952 all as OT_VD - see set_indic_properties */
  99                 's',
 100                 'D',
 101                 'F', /* Register shift Khmer only */
 102                 'G', /* Khmer only */
 103                 'r', /* 0D4E (dot reph) only one in Malayalam */
 104                 'R',
 105                 'm', /* Consonant medial only used in Indic 0A75 in Gurmukhi  (0A00..0A7F)  : also in Lao, Myanmar, Tai Tham, Javanese & Cham  */
 106         ];
 107
 108         public static function set_indic_properties(&$info, $scriptblock)
 109         {
 110                 $u = $info['uni'];
 111                 $type = self::indic_get_categories($u);
 112                 $cat = ($type & 0x7F);
 113                 $pos = ($type >> 8);
 114
 115                 /*
 116                  * Re-assign category
 117                  */
 118
 119                 if ($u == 0x17D1) {
 120                         $cat = self::OT_X;
 121                 }
 122
 123                 if ($cat == self::OT_X && self::in_range($u, 0x17CB, 0x17D3)) { /* Khmer Various signs */
 124                         /* These are like Top Matras. */
 125                         $cat = self::OT_M;
 126                         $pos = self::POS_ABOVE_C;
 127                 }
 128
 129                 if ($u == 0x17C6) {
 130                         $cat = self::OT_N;
 131                 } /* Khmer Bindu doesn't like to be repositioned. */
 132
 133                 if ($u == 0x17D2) {
 134                         $cat = self::OT_COENG;
 135                 } /* Khmer coeng */
 136
 137                 /* The spec says U+0952 is OT_A.        However, testing shows that Uniscribe
 138                  * treats U+0951..U+0952 all as OT_VD.
 139                  * TESTS:
 140                  * U+092E,U+0947,U+0952
 141                  * U+092E,U+0952,U+0947
 142                  * U+092E,U+0947,U+0951
 143                  * U+092E,U+0951,U+0947
 144                  * */
 145                 //if ($u == 0x0952) $cat = self::OT_A;
 146                 if (self::in_range($u, 0x0951, 0x0954)) {
 147                         $cat = self::OT_VD;
 148                 }
 149
 150                 if ($u == 0x200C) {
 151                         $cat = self::OT_ZWNJ;
 152                 } else if ($u == 0x200D) {
 153                         $cat = self::OT_ZWJ;
 154                 } else if ($u == 0x25CC) {
 155                         $cat = self::OT_DOTTEDCIRCLE;
 156                 } else if ($u == 0x0A71) {
 157                         $cat = self::OT_SM;
 158                 } /* GURMUKHI ADDAK.    More like consonant medial. like 0A75. */
 159
 160                 if ($cat == self::OT_REPHA) {
 161                         /* There are two kinds of characters marked as Repha:
 162                          * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
 163                          * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
 164                          *
 165                          * We recategorize the first kind to look like a Nukta and attached to the base directly.
 166                          */
 167                         if ($info['general_category'] == Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
 168                                 $cat = self::OT_N;
 169                         }
 170                 }
 171
 172                 /*
 173                  * Re-assign position.
 174                  */
 175
 176                 if ((self::FLAG($cat) & (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)))) { // = CONSONANT_FLAGS like is_consonant
 177                         if ($scriptblock == Ucdn::SCRIPT_KHMER) {
 178                                 $pos = self::POS_BELOW_C;
 179                         } /* Khmer differs from Indic here. */
 180                         else {
 181                                 $pos = self::POS_BASE_C;
 182                         } /* Will recategorize later based on font lookups. */
 183
 184                         if (self::is_ra($u)) {
 185                                 $cat = self::OT_RA;
 186                         }
 187                 } else if ($cat == self::OT_M) {
 188                         $pos = self::matra_position($u, $pos);
 189                 } else if ($cat == self::OT_SM || $cat == self::OT_VD) {
 190                         $pos = self::POS_SMVD;
 191                 }
 192
 193                 if ($u == 0x0B01) {
 194                         $pos = self::POS_BEFORE_SUB;
 195                 } /* Oriya Bindu is BeforeSub in the spec. */
 196
 197                 $info['indic_category'] = $cat;
 198                 $info['indic_position'] = $pos;
 199         }
 200
 201         // syllable_type
 202         const CONSONANT_SYLLABLE = 0;
 203         const VOWEL_SYLLABLE = 1;
 204         const STANDALONE_CLUSTER = 2;
 205         const BROKEN_CLUSTER = 3;
 206         const NON_INDIC_CLUSTER = 4;
 207
 208         public static function set_syllables(&$o, $s, &$broken_syllables)
 209         {
 210                 $ptr = 0;
 211                 $syllable_serial = 1;
 212                 $broken_syllables = false;
 213
 214                 while ($ptr < strlen($s)) {
 215                         $match = '';
 216                         $syllable_length = 1;
 217                         $syllable_type = self::NON_INDIC_CLUSTER;
 218                         // CONSONANT_SYLLABLE Consonant syllable
 219                         // From OT spec:
 220                         if (preg_match('/^([CR]m*[N]?(H[ZJ]?|[ZJ]H))*[CR]m*[N]?[A]?(H[ZJ]?|[M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
 221                                 // From HarfBuzz:
 222                                 //if (preg_match('/^r?([CR]J?(Z?[N]{0,2})?[ZJ]?H(J[N]?)?){0,4}[CR]J?(Z?[N]{0,2})?A?((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 223                                 $syllable_length = strlen($ma[0]);
 224                                 $syllable_type = self::CONSONANT_SYLLABLE;
 225                         } // VOWEL_SYLLABLE Vowel-based syllable
 226                         // From OT spec:
 227                         else if (preg_match('/^(RH|r)?V[N]?([ZJ]?H[CR]m*|J[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
 228                                 // From HarfBuzz:
 229                                 //else if (preg_match('/^(RH|r)?V(Z?[N]{0,2})?(J|([ZJ]?H(J[N]?)?[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2})/', substr($s,$ptr), $ma)) {
 230                                 $syllable_length = strlen($ma[0]);
 231                                 $syllable_type = self::VOWEL_SYLLABLE;
 232                         } /* Apply only if it's a word start. */
 233                         // STANDALONE_CLUSTER Stand Alone syllable at start of word
 234                         // From OT spec:
 235                         else if (($ptr == 0 ||
 236                                 $o[$ptr - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER ||
 237                                 $o[$ptr - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
 238                                 ) && (preg_match('/^(RH|r)?[sD][N]?([ZJ]?H[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma))) {
 239                                 // From HarfBuzz:
 240                                 // && (preg_match('/^(RH|r)?[sD](Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 241                                 $syllable_length = strlen($ma[0]);
 242                                 $syllable_type = self::STANDALONE_CLUSTER;
 243                         } // BROKEN_CLUSTER syllable
 244                         else if (preg_match('/^(RH|r)?[N]?([ZJ]?H[CR])?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
 245                                 // From HarfBuzz:
 246                                 //else if (preg_match('/^(RH|r)?(Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 247                                 if (strlen($ma[0])) { // May match blank
 248                                         $syllable_length = strlen($ma[0]);
 249                                         $syllable_type = self::BROKEN_CLUSTER;
 250                                         $broken_syllables = true;
 251                                 }
 252                         }
 253
 254                         for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
 255                                 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
 256                         }
 257                         $ptr += $syllable_length;
 258                         $syllable_serial++;
 259                         if ($syllable_serial == 16) {
 260                                 $syllable_serial = 1;
 261                         }
 262                 }
 263         }
 264
 265         public static function set_syllables_sinhala(&$o, $s, &$broken_syllables)
 266         {
 267                 $ptr = 0;
 268                 $syllable_serial = 1;
 269                 $broken_syllables = false;
 270
 271                 while ($ptr < strlen($s)) {
 272                         $match = '';
 273                         $syllable_length = 1;
 274                         $syllable_type = self::NON_INDIC_CLUSTER;
 275                         // CONSONANT_SYLLABLE Consonant syllable
 276                         // From OT spec:
 277                         if (preg_match('/^([CR]HJ|[CR]JH){0,8}[CR][HM]{0,3}[S]{0,1}/', substr($s, $ptr), $ma)) {
 278                                 $syllable_length = strlen($ma[0]);
 279                                 $syllable_type = self::CONSONANT_SYLLABLE;
 280                         } // VOWEL_SYLLABLE Vowel-based syllable
 281                         // From OT spec:
 282                         else if (preg_match('/^V[S]{0,1}/', substr($s, $ptr), $ma)) {
 283                                 $syllable_length = strlen($ma[0]);
 284                                 $syllable_type = self::VOWEL_SYLLABLE;
 285                         }
 286
 287                         for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
 288                                 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
 289                         }
 290                         $ptr += $syllable_length;
 291                         $syllable_serial++;
 292                         if ($syllable_serial == 16) {
 293                                 $syllable_serial = 1;
 294                         }
 295                 }
 296         }
 297
 298         public static function set_syllables_khmer(&$o, $s, &$broken_syllables)
 299         {
 300                 $ptr = 0;
 301                 $syllable_serial = 1;
 302                 $broken_syllables = false;
 303
 304                 while ($ptr < strlen($s)) {
 305                         $match = '';
 306                         $syllable_length = 1;
 307                         $syllable_type = self::NON_INDIC_CLUSTER;
 308                         // CONSONANT_SYLLABLE Consonant syllable
 309                         if (preg_match('/^r?([CR]J?((Z?F)?[N]{0,2})?[ZJ]?G(JN?)?){0,4}[CR]J?((Z?F)?[N]{0,2})?A?((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
 310                                 $syllable_length = strlen($ma[0]);
 311                                 $syllable_type = self::CONSONANT_SYLLABLE;
 312                         } // VOWEL_SYLLABLE Vowel-based syllable
 313                         else if (preg_match('/^(RH|r)?V((Z?F)?[N]{0,2})?(J|([ZJ]?G(JN?)?[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2})/', substr($s, $ptr), $ma)) {
 314                                 $syllable_length = strlen($ma[0]);
 315                                 $syllable_type = self::VOWEL_SYLLABLE;
 316                         } // BROKEN_CLUSTER syllable
 317                         else if (preg_match('/^(RH|r)?((Z?F)?[N]{0,2})?(([ZJ]?G(JN?)?)[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
 318                                 if (strlen($ma[0])) { // May match blank
 319                                         $syllable_length = strlen($ma[0]);
 320                                         $syllable_type = self::BROKEN_CLUSTER;
 321                                         $broken_syllables = true;
 322                                 }
 323                         }
 324
 325                         for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
 326                                 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
 327                         }
 328                         $ptr += $syllable_length;
 329                         $syllable_serial++;
 330                         if ($syllable_serial == 16) {
 331                                 $syllable_serial = 1;
 332                         }
 333                 }
 334         }
 335
 336         public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle)
 337         {
 338
 339                 self::update_consonant_positions($info, $GSUBdata);
 340
 341                 if ($broken_syllables && $dottedcircle) {
 342                         self::insert_dotted_circles($info, $dottedcircle);
 343                 }
 344
 345                 $count = count($info);
 346                 if (!$count) {
 347                         return;
 348                 }
 349                 $last = 0;
 350                 $last_syllable = $info[0]['syllable'];
 351                 for ($i = 1; $i < $count; $i++) {
 352                         if ($last_syllable != $info[$i]['syllable']) {
 353                                 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
 354                                 $last = $i;
 355                                 $last_syllable = $info[$last]['syllable'];
 356                         }
 357                 }
 358                 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
 359         }
 360
 361         public static function update_consonant_positions(&$info, $GSUBdata)
 362         {
 363                 $count = count($info);
 364                 for ($i = 0; $i < $count; $i++) {
 365                         if ($info[$i]['indic_position'] == self::POS_BASE_C) {
 366                                 $c = $info[$i]['uni'];
 367                                 // If would substitute...
 368                                 if (isset($GSUBdata['pref'][$c])) {
 369                                         $info[$i]['indic_position'] = self::POS_POST_C;
 370                                 } else if (isset($GSUBdata['blwf'][$c])) {
 371                                         $info[$i]['indic_position'] = self::POS_BELOW_C;
 372                                 } else if (isset($GSUBdata['pstf'][$c])) {
 373                                         $info[$i]['indic_position'] = self::POS_POST_C;
 374                                 }
 375                         }
 376                 }
 377         }
 378
 379         public static function insert_dotted_circles(&$info, $dottedcircle)
 380         {
 381                 $idx = 0;
 382                 $last_syllable = 0;
 383                 while ($idx < count($info)) {
 384                         $syllable = $info[$idx]['syllable'];
 385                         $syllable_type = ($syllable & 0x0F);
 386                         if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
 387                                 $last_syllable = $syllable;
 388
 389                                 $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
 390
 391                                 /* Insert dottedcircle after possible Repha. */
 392                                 while ($idx < count($info) && $last_syllable == $info[$idx]['syllable'] && $info[$idx]['indic_category'] == self::OT_REPHA) {
 393                                         $idx++;
 394                                 }
 395                                 array_splice($info, $idx, 0, $dottedcircle);
 396                         } else {
 397                                 $idx++;
 398                         }
 399                 }
 400
 401                 // I am not sue how this code below got in here, since $idx should now be > count($info) and thus invalid.
 402                 // In case I am missing something(!) I'll leave a warning here for now:
 403                 if (isset($info[$idx])) {
 404                         throw new \Mpdf\MpdfException('Unexpected error occured in Indic processing');
 405                 }
 406                 // In case of final bloken cluster...
 407                 //$syllable = $info[$idx]['syllable'];
 408                 //$syllable_type = ($syllable & 0x0F);
 409                 //if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
 410                 //      $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
 411                 //      array_splice($info, $idx, 0, $dottedcircle);
 412                 //}
 413         }
 414
 415         /* Rules from:
 416          * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
 417
 418         public static function initial_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
 419         {
 420                 /* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */
 421                 /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
 422                 /* standalone_cluster: We treat NBSP/dotted-circle as if they are consonants, so we should just chain. */
 423
 424                 $syllable_type = ($info[$start]['syllable'] & 0x0F);
 425                 if ($syllable_type == self::NON_INDIC_CLUSTER) {
 426                         return;
 427                 }
 428                 if ($syllable_type == self::BROKEN_CLUSTER || $syllable_type == self::STANDALONE_CLUSTER) {
 429                         //if ($uniscribe_bug_compatible) {
 430                         /* For dotted-circle, this is what Uniscribe does:
 431                          * If dotted-circle is the last glyph, it just does nothing.
 432                          * i.e. It doesn't form Reph. */
 433                         if ($info[$end - 1]['indic_category'] == self::OT_DOTTEDCIRCLE) {
 434                                 return;
 435                         }
 436                 }
 437
 438                 /* 1. Find base consonant:
 439                  *
 440                  * The shaping engine finds the base consonant of the syllable, using the
 441                  * following algorithm: starting from the end of the syllable, move backwards
 442                  * until a consonant is found that does not have a below-base or post-base
 443                  * form (post-base forms have to follow below-base forms), or that is not a
 444                  * pre-base reordering Ra, or arrive at the first consonant. The consonant
 445                  * stopped at will be the base.
 446                  *
 447                  *      o If the syllable starts with Ra + Halant (in a script that has Reph)
 448                  *      and has more than one consonant, Ra is excluded from candidates for
 449                  *      base consonants.
 450                  */
 451
 452                 $base = $end;
 453                 $has_reph = false;
 454                 $limit = $start;
 455
 456                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 457                         /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 458                          *      and has more than one consonant, Ra is excluded from candidates for
 459                          *      base consonants. */
 460                         if (count($GSUBdata['rphf']) /* ?? $indic_plan->mask_array[RPHF] */ && $start + 3 <= $end &&
 461                                 (
 462                                 ($indic_config[4] == self::REPH_MODE_IMPLICIT && !self::is_joiner($info[$start + 2])) ||
 463                                 ($indic_config[4] == self::REPH_MODE_EXPLICIT && $info[$start + 2]['indic_category'] == self::OT_ZWJ)
 464                                 )) {
 465                                 /* See if it matches the 'rphf' feature. */
 466                                 //$glyphs = array($info[$start]['uni'], $info[$start + 1]['uni']);
 467                                 //if ($indic_plan->rphf->would_substitute ($glyphs, count($glyphs), true, face)) {
 468                                 if (isset($GSUBdata['rphf'][$info[$start]['uni']]) && self::is_halant_or_coeng($info[$start + 1])) {
 469                                         $limit += 2;
 470                                         while ($limit < $end && self::is_joiner($info[$limit])) {
 471                                                 $limit++;
 472                                         }
 473                                         $base = $start;
 474                                         $has_reph = true;
 475                                 }
 476                         } else if ($indic_config[4] == self::REPH_MODE_LOG_REPHA && $info[$start]['indic_category'] == self::OT_REPHA) {
 477                                 $limit += 1;
 478                                 while ($limit < $end && self::is_joiner($info[$limit])) {
 479                                         $limit++;
 480                                 }
 481                                 $base = $start;
 482                                 $has_reph = true;
 483                         }
 484                 }
 485
 486                 switch ($indic_config[2]) { // base_pos
 487                         case self::BASE_POS_LAST:
 488                                 /* -> starting from the end of the syllable, move backwards */
 489                                 $i = $end;
 490                                 $seen_below = false;
 491                                 do {
 492                                         $i--;
 493                                         /* -> until a consonant is found */
 494                                         if (self::is_consonant($info[$i])) {
 495                                                 /* -> that does not have a below-base or post-base form
 496                                                  * (post-base forms have to follow below-base forms), */
 497                                                 if ($info[$i]['indic_position'] != self::POS_BELOW_C && ($info[$i]['indic_position'] != self::POS_POST_C || $seen_below)) {
 498                                                         $base = $i;
 499                                                         break;
 500                                                 }
 501                                                 if ($info[$i]['indic_position'] == self::POS_BELOW_C) {
 502                                                         $seen_below = true;
 503                                                 }
 504
 505                                                 /* -> or that is not a pre-base reordering Ra,
 506                                                  *
 507                                                  * IMPLEMENTATION NOTES:
 508                                                  *
 509                                                  * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped
 510                                                  * by the logic above already.
 511                                                  */
 512
 513                                                 /* -> or arrive at the first consonant. The consonant stopped at will
 514                                                  * be the base. */
 515                                                 $base = $i;
 516                                         } else {
 517                                                 /* A ZWJ after a Halant stops the base search, and requests an explicit
 518                                                  * half form.
 519                                                  * [A ZWJ before a Halant, requests a subjoined form instead, and hence
 520                                                  * search continues. This is particularly important for Bengali
 521                                                  * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya] */
 522                                                 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWJ && $info[$i - 1]['indic_category'] == self::OT_H) {
 523                                                         if (!defined("OMIT_INDIC_FIX_1") || OMIT_INDIC_FIX_1 != 1) {
 524                                                                 $base = $i;
 525                                                         } // INDIC_FIX_1
 526                                                         break;
 527                                                 }
 528                                                 // ZKI8
 529                                                 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWNJ) {
 530                                                         break;
 531                                                 }
 532                                         }
 533                                 } while ($i > $limit);
 534                                 break;
 535
 536                         case self::BASE_POS_FIRST:
 537                                 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
 538
 539                                 if (!$has_reph) {
 540                                         $base = $limit;
 541                                 }
 542
 543                                 /* Find the last base consonant that is not blocked by ZWJ.     If there is
 544                                  * a ZWJ right before a base consonant, that would request a subjoined form. */
 545                                 for ($i = $limit; $i < $end; $i++) {
 546                                         if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) {
 547                                                 if ($limit < $i && $info[$i - 1]['indic_category'] == self::OT_ZWJ) {
 548                                                         break;
 549                                                 } else {
 550                                                         $base = $i;
 551                                                 }
 552                                         }
 553                                 }
 554
 555                                 /* Mark all subsequent consonants as below. */
 556                                 for ($i = $base + 1; $i < $end; $i++) {
 557                                         if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) {
 558                                                 $info[$i]['indic_position'] = self::POS_BELOW_C;
 559                                         }
 560                                 }
 561                                 break;
 562                         //default:
 563                         //assert (false);
 564                         /* fallthrough */
 565                 }
 566
 567                 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 568                  *      and has more than one consonant, Ra is excluded from candidates for
 569                  *      base consonants.
 570                  *
 571                  *      Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
 572                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 573                         if ($has_reph && $base == $start && $limit - $base <= 2) {
 574                                 /* Have no other consonant, so Reph is not formed and Ra becomes base. */
 575                                 $has_reph = false;
 576                         }
 577                 }
 578
 579                 /* 2. Decompose and reorder Matras:
 580                  *
 581                  * Each matra and any syllable modifier sign in the cluster are moved to the
 582                  * appropriate position relative to the consonant(s) in the cluster. The
 583                  * shaping engine decomposes two- or three-part matras into their constituent
 584                  * parts before any repositioning. Matra characters are classified by which
 585                  * consonant in a conjunct they have affinity for and are reordered to the
 586                  * following positions:
 587                  *
 588                  *              o Before first half form in the syllable
 589                  *              o After subjoined consonants
 590                  *              o After post-form consonant
 591                  *              o After main consonant (for above marks)
 592                  *
 593                  * IMPLEMENTATION NOTES:
 594                  *
 595                  * The normalize() routine has already decomposed matras for us, so we don't
 596                  * need to worry about that.
 597                  */
 598
 599
 600                 /* 3.   Reorder marks to canonical order:
 601                  *
 602                  * Adjacent nukta and halant or nukta and vedic sign are always repositioned
 603                  * if necessary, so that the nukta is first.
 604                  *
 605                  * IMPLEMENTATION NOTES:
 606                  *
 607                  * Use the combining Class from Unicode categories? to bubble_sort.
 608                  */
 609
 610                 /* Reorder characters */
 611
 612                 for ($i = $start; $i < $base; $i++) {
 613                         $info[$i]['indic_position'] = min(self::POS_PRE_C, $info[$i]['indic_position']);
 614                 }
 615
 616                 if ($base < $end) {
 617                         $info[$base]['indic_position'] = self::POS_BASE_C;
 618                 }
 619
 620                 /* Mark final consonants. A final consonant is one appearing after a matra,
 621                  * ? only in Khmer. */
 622                 for ($i = $base + 1; $i < $end; $i++) {
 623                         if ($info[$i]['indic_category'] == self::OT_M) {
 624                                 for ($j = $i + 1; $j < $end; $j++) {
 625                                         if (self::is_consonant($info[$j])) {
 626                                                 $info[$j]['indic_position'] = self::POS_FINAL_C;
 627                                                 break;
 628                                         }
 629                                 }
 630                                 break;
 631                         }
 632                 }
 633
 634                 /* Handle beginning Ra */
 635                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 636                         if ($has_reph) {
 637                                 $info[$start]['indic_position'] = self::POS_RA_TO_BECOME_REPH;
 638                         }
 639                 }
 640
 641
 642                 /* For old-style Indic script tags, move the first post-base Halant after
 643                  * last consonant.      Only do this if there is *not* a Halant after last
 644                  * consonant. Otherwise it becomes messy. */
 645                 if ($is_old_spec) {
 646                         for ($i = $base + 1; $i < $end; $i++) {
 647                                 if ($info[$i]['indic_category'] == self::OT_H) {
 648                                         for ($j = $end - 1; $j > $i; $j--) {
 649                                                 if (self::is_consonant($info[$j]) || $info[$j]['indic_category'] == self::OT_H) {
 650                                                         break;
 651                                                 }
 652                                         }
 653                                         if ($info[$j]['indic_category'] != self::OT_H && $j > $i) {
 654                                                 /* Move Halant to after last consonant. */
 655                                                 self::_move_info_pos($info, $i, $j + 1);
 656                                         }
 657                                         break;
 658                                 }
 659                         }
 660                 }
 661
 662                 /* Attach misc marks to previous char to move with them. */
 663                 $last_pos = self::POS_START;
 664                 for ($i = $start; $i < $end; $i++) {
 665                         if ((self::FLAG($info[$i]['indic_category']) & (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ) | self::FLAG(self::OT_N) | self::FLAG(self::OT_RS) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG) ))) {
 666                                 $info[$i]['indic_position'] = $last_pos;
 667                                 if ($info[$i]['indic_category'] == self::OT_H && $info[$i]['indic_position'] == self::POS_PRE_M) {
 668                                         /*
 669                                          * Uniscribe doesn't move the Halant with Left Matra.
 670                                          * TEST: U+092B,U+093F,U+094DE
 671                                          * We follow.   This is important for the Sinhala
 672                                          * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
 673                                          * where U+0DD9 is a left matra and U+0DCA is the virama.
 674                                          * We don't want to move the virama with the left matra.
 675                                          * TEST: U+0D9A,U+0DDA
 676                                          */
 677                                         for ($j = $i; $j > $start; $j--) {
 678                                                 if ($info[$j - 1]['indic_position'] != self::POS_PRE_M) {
 679                                                         $info[$i]['indic_position'] = $info[$j - 1]['indic_position'];
 680                                                         break;
 681                                                 }
 682                                         }
 683                                 }
 684                         } else if ($info[$i]['indic_position'] != self::POS_SMVD) {
 685                                 $last_pos = $info[$i]['indic_position'];
 686                         }
 687                 }
 688
 689                 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
 690                 $last_halant = $end;
 691                 for ($i = $base + 1; $i < $end; $i++) {
 692                         if (self::is_halant_or_coeng($info[$i])) {
 693                                 $last_halant = $i;
 694                         } else if (self::is_consonant($info[$i])) {
 695                                 for ($j = $last_halant; $j < $i; $j++) {
 696                                         if ($info[$j]['indic_position'] != self::POS_SMVD) {
 697                                                 $info[$j]['indic_position'] = $info[$i]['indic_position'];
 698                                         }
 699                                 }
 700                         }
 701                 }
 702
 703
 704                 if ($scriptblock == Ucdn::SCRIPT_KHMER) {
 705                         /* KHMER_FIX_2 */
 706                         /* Move Coeng+RO (Halant,Ra) sequence before base consonant. */
 707                         for ($i = $base + 1; $i < $end; $i++) {
 708                                 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) {
 709                                         $info[$i]['indic_position'] = self::POS_PRE_C;
 710                                         $info[$i + 1]['indic_position'] = self::POS_PRE_C;
 711                                         break;
 712                                 }
 713                         }
 714                 }
 715
 716
 717                 /*
 718                   if (!defined("OMIT_INDIC_FIX_2") || OMIT_INDIC_FIX_2 != 1) {
 719                   // INDIC_FIX_2
 720                   $ZWNJ_found = false;
 721                   $POST_ZWNJ_c_found = false;
 722                   for ($i = $base + 1; $i < $end; $i++) {
 723                   if ($info[$i]['indic_category'] == self::OT_ZWNJ) { $ZWNJ_found = true; }
 724                   else if ($ZWNJ_found && $info[$i]['indic_category'] == self::OT_C) { $POST_ZWNJ_c_found = true; }
 725                   else if ($POST_ZWNJ_c_found && $info[$i]['indic_position'] == self::POS_BEFORE_SUB) { $info[$i]['indic_position'] = self::POS_AFTER_SUB; }
 726                   }
 727                   }
 728                  */
 729
 730                 /* Setup masks now */
 731                 for ($i = $start; $i < $end; $i++) {
 732                         $info[$i]['mask'] = 0;
 733                 }
 734
 735
 736                 if ($scriptblock == Ucdn::SCRIPT_KHMER) {
 737                         /* Find a Coeng+RO (Halant,Ra) sequence and mark it for pre-base processing. */
 738                         $mask = self::FLAG(self::PREF);
 739                         for ($i = $base; $i < $end - 1; $i++) { /* KHMER_FIX_1 From $start (not base) */
 740                                 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) {
 741                                         $info[$i]['mask'] |= self::FLAG(self::PREF);
 742                                         $info[$i + 1]['mask'] |= self::FLAG(self::PREF);
 743
 744                                         /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
 745                                          * Read the feature spec.
 746                                          * This allows distinguishing the following cases with MS Khmer fonts:
 747                                          * U+1784,U+17D2,U+179A,U+17D2,U+1782  [C+Coeng+RO+Coeng+C] => Should activate CFAR
 748                                          * U+1784,U+17D2,U+1782,U+17D2,U+179A  [C+Coeng+C+Coeng+RO] => Should NOT activate CFAR
 749                                          */
 750                                         for ($j = ($i + 2); $j < $end; $j++) {
 751                                                 $info[$j]['mask'] |= self::FLAG(self::CFAR);
 752                                         }
 753
 754                                         break;
 755                                 }
 756                         }
 757                 }
 758
 759
 760
 761                 /* Sit tight, rock 'n roll! */
 762                 self::bubble_sort($info, $start, $end - $start);
 763
 764                 /* Find base again */
 765                 $base = $end;
 766                 for ($i = $start; $i < $end; $i++) {
 767                         if ($info[$i]['indic_position'] == self::POS_BASE_C) {
 768                                 $base = $i;
 769                                 break;
 770                         }
 771                 }
 772
 773                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 774                         /* Reph */
 775                         for ($i = $start; $i < $end; $i++) {
 776                                 if ($info[$i]['indic_position'] == self::POS_RA_TO_BECOME_REPH) {
 777                                         $info[$i]['mask'] |= self::FLAG(self::RPHF);
 778                                 }
 779                         }
 780
 781                         /* Pre-base */
 782                         $mask = self::FLAG(self::HALF);
 783                         for ($i = $start; $i < $base; $i++) {
 784                                 $info[$i]['mask'] |= $mask;
 785                         }
 786                 }
 787
 788                 /* Post-base */
 789                 $mask = (self::FLAG(self::BLWF) | self::FLAG(self::ABVF) | self::FLAG(self::PSTF));
 790                 for ($i = $base + 1; $i < $end; $i++) {
 791                         $info[$i]['mask'] |= $mask;
 792                 }
 793
 794
 795                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 796                         if (!defined("OMIT_INDIC_FIX_3") || OMIT_INDIC_FIX_3 != 1) {
 797                                 /* INDIC_FIX_3 */
 798                                 /* Find a (pre-base) Consonant, Halant,Ra sequence and mark Halant|Ra for below-base BLWF processing. */
 799                                 // TEST CASE &#x995;&#x9cd;&#x9b0;&#x9cd;&#x995; in FreeSans versus Vrinda
 800                                 if (($base - $start) >= 3) {
 801                                         for ($i = $start; $i < ($base - 2); $i++) {
 802                                                 if (self::is_consonant($info[$i])) {
 803                                                         if (self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i + 2]['uni'])) {
 804                                                                 // If would substitute Halant+Ra...BLWF
 805                                                                 if (isset($GSUBdata['blwf'][$info[$i + 2]['uni']])) {
 806                                                                         $info[$i + 1]['mask'] |= self::FLAG(self::BLWF);
 807                                                                         $info[$i + 2]['mask'] |= self::FLAG(self::BLWF);
 808                                                                 } /* If would not substitute as blwf, mark Ra+Halant for RPHF using following Halant (if present) */ else if (self::is_halant_or_coeng($info[$i + 3])) {
 809                                                                         $info[$i + 2]['mask'] |= self::FLAG(self::RPHF);
 810                                                                         $info[$i + 3]['mask'] |= self::FLAG(self::RPHF);
 811                                                                 }
 812                                                                 break;
 813                                                         }
 814                                                 }
 815                                         }
 816                                 }
 817                         }
 818                 }
 819
 820
 821
 822                 if ($is_old_spec && $scriptblock == Ucdn::SCRIPT_DEVANAGARI) {
 823                         /* Old-spec eye-lash Ra needs special handling. From the spec:
 824                          * "The feature 'below-base form' is applied to consonants
 825                          * having below-base forms and following the base consonant.
 826                          * The exception is vattu, which may appear below half forms
 827                          * as well as below the base glyph. The feature 'below-base
 828                          * form' will be applied to all such occurrences of Ra as well."
 829                          *
 830                          * Test case: U+0924,U+094D,U+0930,U+094d,U+0915
 831                          * with Sanskrit 2003 font.
 832                          *
 833                          * However, note that Ra,Halant,ZWJ is the correct way to
 834                          * request eyelash form of Ra, so we wouldbn't inhibit it
 835                          * in that sequence.
 836                          *
 837                          * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
 838                          */
 839                         for ($i = $start; ($i + 1) < $base; $i++) {
 840                                 if ($info[$i]['indic_category'] == self::OT_RA && $info[$i + 1]['indic_category'] == self::OT_H &&
 841                                         ($i + 2 == $base || $info[$i + 2]['indic_category'] != self::OT_ZWJ)) {
 842                                         $info[$i]['mask'] |= self::FLAG(self::BLWF);
 843                                         $info[$i + 1]['mask'] |= self::FLAG(self::BLWF);
 844                                 }
 845                         }
 846                 }
 847
 848                 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
 849                         if (count($GSUBdata['pref']) && $base + 2 < $end) {
 850                                 /* Find a Halant,Ra sequence and mark it for pre-base processing. */
 851                                 for ($i = $base + 1; $i + 1 < $end; $i++) {
 852                                         // If old_spec find Ra-Halant...
 853                                         if ((isset($GSUBdata['pref'][$info[$i + 1]['uni']]) && self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni']) ) ||
 854                                                 ($is_old_spec && isset($GSUBdata['pref'][$info[$i]['uni']]) && self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i]['uni']) )
 855                                         ) {
 856                                                 $info[$i++]['mask'] |= self::FLAG(self::PREF);
 857                                                 $info[$i++]['mask'] |= self::FLAG(self::PREF);
 858                                                 break;
 859                                         }
 860                                 }
 861                         }
 862                 }
 863
 864
 865                 /* Apply ZWJ/ZWNJ effects */
 866                 for ($i = $start + 1; $i < $end; $i++) {
 867                         if (self::is_joiner($info[$i])) {
 868                                 $non_joiner = ($info[$i]['indic_category'] == self::OT_ZWNJ);
 869                                 $j = $i;
 870                                 while ($j > $start) {
 871                                         if (defined("OMIT_INDIC_FIX_4") && OMIT_INDIC_FIX_4 == 1) {
 872                                                 // INDIC_FIX_4 = do nothing - carry on //
 873                                                 // ZWNJ should block H C from forming blwf post-base - need to unmask backwards beyond first consonant arrived at //
 874                                                 if (!self::is_consonant($info[$j])) {
 875                                                         break;
 876                                                 }
 877                                         }
 878                                         $j--;
 879
 880                                         /* ZWJ/ZWNJ should disable CJCT.        They do that by simply
 881                                          * being there, since we don't skip them for the CJCT
 882                                          * feature (ie. F_MANUAL_ZWJ) */
 883
 884                                         /* A ZWNJ disables HALF. */
 885                                         if ($non_joiner) {
 886                                                 $info[$j]['mask'] &= ~(self::FLAG(self::HALF) | self::FLAG(self::BLWF));
 887                                         }
 888                                 }
 889                         }
 890                 }
 891         }
 892
 893         public static function final_reordering(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec)
 894         {
 895                 $count = count($info);
 896                 if (!$count) {
 897                         return;
 898                 }
 899                 $last = 0;
 900                 $last_syllable = $info[0]['syllable'];
 901                 for ($i = 1; $i < $count; $i++) {
 902                         if ($last_syllable != $info[$i]['syllable']) {
 903                                 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
 904                                 $last = $i;
 905                                 $last_syllable = $info[$last]['syllable'];
 906                         }
 907                 }
 908                 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
 909         }
 910
 911         public static function final_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
 912         {
 913
 914                 /* 4. Final reordering:
 915                  *
 916                  * After the localized forms and basic shaping forms GSUB features have been
 917                  * applied (see below), the shaping engine performs some final glyph
 918                  * reordering before applying all the remaining font features to the entire
 919                  * cluster.
 920                  */
 921
 922                 /* Find base again */
 923                 for ($base = $start; $base < $end; $base++) {
 924                         if ($info[$base]['indic_position'] >= self::POS_BASE_C) {
 925                                 if ($start < $base && $info[$base]['indic_position'] > self::POS_BASE_C) {
 926                                         $base--;
 927                                 }
 928                                 break;
 929                         }
 930                 }
 931                 if ($base == $end && $start < $base && $info[$base - 1]['indic_category'] != self::OT_ZWJ) {
 932                         $base--;
 933                 }
 934                 while ($start < $base && isset($info[$base]) && ($info[$base]['indic_category'] == self::OT_H || $info[$base]['indic_category'] == self::OT_N)) {
 935                         $base--;
 936                 }
 937
 938
 939                 /*      o Reorder matras:
 940                  *
 941                  *      If a pre-base matra character had been reordered before applying basic
 942                  *      features, the glyph can be moved closer to the main consonant based on
 943                  *      whether half-forms had been formed. Actual position for the matra is
 944                  *      defined as "after last standalone halant glyph, after initial matra
 945                  *      position and before the main consonant". If ZWJ or ZWNJ follow this
 946                  *      halant, position is moved after it.
 947                  */
 948
 949
 950                 if ($start + 1 < $end && $start < $base) { /* Otherwise there can't be any pre-base matra characters. */
 951                         /* If we lost track of base, alas, position before last thingy. */
 952                         $new_pos = ($base == $end) ? $base - 2 : $base - 1;
 953
 954                         /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
 955                          * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
 956                          * We want to position matra after them.
 957                          */
 958                         if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) {
 959                                 while ($new_pos > $start && !(self::is_one_of($info[$new_pos], (self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG))))) {
 960                                         $new_pos--;
 961                                 }
 962
 963                                 /* If we found no Halant we are done.
 964                                  * Otherwise only proceed if the Halant does
 965                                  * not belong to the Matra itself! */
 966                                 if (self::is_halant_or_coeng($info[$new_pos]) && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
 967                                         /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 968                                         if ($new_pos + 1 < $end && self::is_joiner($info[$new_pos + 1])) {
 969                                                 $new_pos++;
 970                                         }
 971                                 } else {
 972                                         $new_pos = $start;
 973                                 } /* No move. */
 974                         }
 975
 976                         if ($start < $new_pos && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
 977                                 /* Now go see if there's actually any matras... */
 978                                 for ($i = $new_pos; $i > $start; $i--) {
 979                                         if ($info[$i - 1]['indic_position'] == self::POS_PRE_M) {
 980                                                 $old_pos = $i - 1;
 981                                                 //memmove (&info[$old_pos], &info[$old_pos + 1], ($new_pos - $old_pos) * sizeof ($info[0]));
 982                                                 self::_move_info_pos($info, $old_pos, $new_pos + 1);
 983
 984                                                 if ($old_pos < $base && $base <= $new_pos) { /* Shouldn't actually happen. */
 985                                                         $base--;
 986                                                 }
 987                                                 $new_pos--;
 988                                         }
 989                                 }
 990                         }
 991                 }
 992
 993
 994                 /*      o Reorder reph:
 995                  *
 996                  *      Reph's original position is always at the beginning of the syllable,
 997                  *      (i.e. it is not reordered at the character reordering stage). However,
 998                  *      it will be reordered according to the basic-forms shaping results.
 999                  *      Possible positions for reph, depending on the script, are; after main,
1000                  *      before post-base consonant forms, and after post-base consonant forms.
1001                  */
1002
1003                 /* If there's anything after the Ra that has the REPH pos, it ought to be halant.
1004                  * Which means that the font has failed to ligate the Reph.     In which case, we
1005                  * shouldn't move. */
1006                 if ($start + 1 < $end &&
1007                         $info[$start]['indic_position'] == self::POS_RA_TO_BECOME_REPH && $info[$start + 1]['indic_position'] != self::POS_RA_TO_BECOME_REPH) {
1008                         $reph_pos = $indic_config[3];
1009                         $skip_to_reph_step_5 = false;
1010                         $skip_to_reph_move = false;
1011
1012                         /*      1. If reph should be positioned after post-base consonant forms,
1013                          *      proceed to step 5.
1014                          */
1015                         if ($reph_pos == self::REPH_POS_AFTER_POST) {
1016                                 $skip_to_reph_step_5 = true;
1017                         }
1018
1019                         /*      2. If the reph repositioning class is not after post-base: target
1020                          *      position is after the first explicit halant glyph between the
1021                          *      first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1022                          *      are following this halant, position is moved after it. If such
1023                          *      position is found, this is the target position. Otherwise,
1024                          *      proceed to the next step.
1025                          *
1026                          *      Note: in old-implementation fonts, where classifications were
1027                          *      fixed in shaping engine, there was no case where reph position
1028                          *      will be found on this step.
1029                          */
1030
1031                         if (!$skip_to_reph_step_5) {
1032                                 $new_reph_pos = $start + 1;
1033
1034                                 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) {
1035                                         $new_reph_pos++;
1036                                 }
1037
1038                                 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
1039                                         /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1040                                         if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) {
1041                                                 $new_reph_pos++;
1042                                         }
1043                                         $skip_to_reph_move = true;
1044                                 }
1045                         }
1046
1047                         /*      3. If reph should be repositioned after the main consonant: find the
1048                          *      first consonant not ligated with main, or find the first
1049                          *      consonant that is not a potential pre-base reordering Ra.
1050                          */
1051                         if ($reph_pos == self::REPH_POS_AFTER_MAIN && !$skip_to_reph_move && !$skip_to_reph_step_5) {
1052                                 $new_reph_pos = $base;
1053                                 /* XXX Skip potential pre-base reordering Ra. */
1054                                 while ($new_reph_pos + 1 < $end && $info[$new_reph_pos + 1]['indic_position'] <= self::POS_AFTER_MAIN) {
1055                                         $new_reph_pos++;
1056                                 }
1057                                 if ($new_reph_pos < $end) {
1058                                         $skip_to_reph_move = true;
1059                                 }
1060                         }
1061
1062                         /*      4. If reph should be positioned before post-base consonant, find
1063                          *      first post-base classified consonant not ligated with main. If no
1064                          *      consonant is found, the target position should be before the
1065                          *      first matra, syllable modifier sign or vedic sign.
1066                          */
1067                         /* This is our take on what step 4 is trying to say (and failing, BADLY). */
1068                         if ($reph_pos == self::REPH_POS_AFTER_SUB && !$skip_to_reph_move && !$skip_to_reph_step_5) {
1069                                 $new_reph_pos = $base;
1070                                 while ($new_reph_pos < $end && isset($info[$new_reph_pos + 1]['indic_position']) &&
1071                                 !( self::FLAG($info[$new_reph_pos + 1]['indic_position']) & (self::FLAG(self::POS_POST_C) | self::FLAG(self::POS_AFTER_POST) | self::FLAG(self::POS_SMVD)))) {
1072                                         $new_reph_pos++;
1073                                 }
1074                                 if ($new_reph_pos < $end) {
1075                                         $skip_to_reph_move = true;
1076                                 }
1077                         }
1078
1079                         /*      5. If no consonant is found in steps 3 or 4, move reph to a position
1080                          *              immediately before the first post-base matra, syllable modifier
1081                          *              sign or vedic sign that has a reordering class after the intended
1082                          *              reph position. For example, if the reordering position for reph
1083                          *              is post-main, it will skip above-base matras that also have a
1084                          *              post-main position.
1085                          */
1086                         if (!$skip_to_reph_move) {
1087                                 /* Copied from step 2. */
1088                                 $new_reph_pos = $start + 1;
1089                                 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) {
1090                                         $new_reph_pos++;
1091                                 }
1092
1093                                 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
1094                                         /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1095                                         if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) {
1096                                                 $new_reph_pos++;
1097                                         }
1098                                         $skip_to_reph_move = true;
1099                                 }
1100                         }
1101
1102
1103                         /*      6. Otherwise, reorder reph to the end of the syllable.
1104                          */
1105                         if (!$skip_to_reph_move) {
1106                                 $new_reph_pos = $end - 1;
1107                                 while ($new_reph_pos > $start && $info[$new_reph_pos]['indic_position'] == self::POS_SMVD) {
1108                                         $new_reph_pos--;
1109                                 }
1110
1111                                 /*
1112                                  * If the Reph is to be ending up after a Matra,Halant sequence,
1113                                  * position it before that Halant so it can interact with the Matra.
1114                                  * However, if it's a plain Consonant,Halant we shouldn't do that.
1115                                  * Uniscribe doesn't do this.
1116                                  * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1117                                  */
1118                                 //if (!$hb_options.uniscribe_bug_compatible && self::is_halant_or_coeng($info[$new_reph_pos])) {
1119                                 if (self::is_halant_or_coeng($info[$new_reph_pos])) {
1120                                         for ($i = $base + 1; $i < $new_reph_pos; $i++) {
1121                                                 if ($info[$i]['indic_category'] == self::OT_M) {
1122                                                         /* Ok, got it. */
1123                                                         $new_reph_pos--;
1124                                                 }
1125                                         }
1126                                 }
1127                         }
1128
1129
1130                         /* Move */
1131                         self::_move_info_pos($info, $start, $new_reph_pos + 1);
1132
1133                         if ($start < $base && $base <= $new_reph_pos) {
1134                                 $base--;
1135                         }
1136                 }
1137
1138
1139                 /*      o Reorder pre-base reordering consonants:
1140                  *
1141                  *      If a pre-base reordering consonant is found, reorder it according to
1142                  *      the following rules:
1143                  */
1144
1145
1146                 if (count($GSUBdata['pref']) && $base + 1 < $end) { /* Otherwise there can't be any pre-base reordering Ra. */
1147                         for ($i = $base + 1; $i < $end; $i++) {
1148                                 if ($info[$i]['mask'] & self::FLAG(self::PREF)) {
1149                                         /*      1. Only reorder a glyph produced by substitution during application
1150                                          *      of the <pref> feature. (Note that a font may shape a Ra consonant with
1151                                          *      the feature generally but block it in certain contexts.)
1152                                          */
1153                                         // ??? Need to TEST if actual substitution has occurred
1154                                         if ($i + 1 == $end || ($info[$i + 1]['mask'] & self::FLAG(self::PREF)) == 0) {
1155                                                 /*
1156                                                  *      2. Try to find a target position the same way as for pre-base matra.
1157                                                  *      If it is found, reorder pre-base consonant glyph.
1158                                                  *
1159                                                  *      3. If position is not found, reorder immediately before main
1160                                                  *      consonant.
1161                                                  */
1162                                                 $new_pos = $base;
1163                                                 /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
1164                                                  * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1165                                                  * We want to position matra after them.
1166                                                  */
1167                                                 if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) {
1168                                                         while ($new_pos > $start &&
1169                                                         !(self::is_one_of($info[$new_pos - 1], self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG)))) {
1170                                                                 $new_pos--;
1171                                                         }
1172
1173                                                         /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
1174                                                          * split matra, it should be reordered to *before* the left part of such matra. */
1175                                                         if ($new_pos > $start && $info[$new_pos - 1]['indic_category'] == self::OT_M) {
1176                                                                 $old_pos = $i;
1177                                                                 for ($i = $base + 1; $i < $old_pos; $i++) {
1178                                                                         if ($info[$i]['indic_category'] == self::OT_M) {
1179                                                                                 $new_pos--;
1180                                                                                 break;
1181                                                                         }
1182                                                                 }
1183                                                         }
1184                                                 }
1185
1186                                                 if ($new_pos > $start && self::is_halant_or_coeng($info[$new_pos - 1])) {
1187                                                         /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
1188                                                         if ($new_pos < $end && self::is_joiner($info[$new_pos])) {
1189                                                                 $new_pos++;
1190                                                         }
1191                                                 }
1192
1193                                                 $old_pos = $i;
1194                                                 self::_move_info_pos($info, $old_pos, $new_pos);
1195
1196                                                 if ($new_pos <= $base && $base < $old_pos) {
1197                                                         $base++;
1198                                                 }
1199                                         }
1200
1201                                         break;
1202                                 }
1203                         }
1204                 }
1205
1206
1207                 /* Apply 'init' to the Left Matra if it's a word start. */
1208                 if ($info[$start]['indic_position'] == self::POS_PRE_M &&
1209                         ($start == 0 ||
1210                         ($info[$start - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_FORMAT || $info[$start - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
1211                         )) {
1212                         $info[$start]['mask'] |= self::FLAG(self::INIT);
1213                 }
1214
1215
1216                 /*
1217                  * Finish off and go home!
1218                  */
1219         }
1220
1221         public static function _move_info_pos(&$info, $from, $to)
1222         {
1223                 $t = [];
1224                 $t[0] = $info[$from];
1225                 if ($from > $to) {
1226                         array_splice($info, $from, 1);
1227                         array_splice($info, $to, 0, $t);
1228                 } else {
1229                         array_splice($info, $to, 0, $t);
1230                         array_splice($info, $from, 1);
1231                 }
1232         }
1233
1234         public static $ra_chars = [
1235                 0x0930 => 1, /* Devanagari */
1236                 0x09B0 => 1, /* Bengali */
1237                 0x09F0 => 1, /* Bengali (Assamese) */
1238                 0x0A30 => 1, /* Gurmukhi */ /* No Reph */
1239                 0x0AB0 => 1, /* Gujarati */
1240                 0x0B30 => 1, /* Oriya */
1241                 0x0BB0 => 1, /* Tamil */ /* No Reph */
1242                 0x0C30 => 1, /* Telugu */ /* Reph formed only with ZWJ */
1243                 0x0CB0 => 1, /* Kannada */
1244                 0x0D30 => 1, /* Malayalam */ /* No Reph, Logical Repha */
1245                 0x0DBB => 1, /* Sinhala */ /* Reph formed only with ZWJ */
1246                 0x179A => 1, /* Khmer */ /* No Reph, Visual Repha */
1247         ];
1248
1249         public static function is_ra($u)
1250         {
1251                 if (isset(self::$ra_chars[$u])) {
1252                         return true;
1253                 }
1254                 return false;
1255         }
1256
1257         public static function is_one_of($info, $flags)
1258         {
1259                 if (isset($info['is_ligature']) && $info['is_ligature']) {
1260                         return false;
1261                 } /* If it ligated, all bets are off. */
1262                 return !!(self::FLAG($info['indic_category']) & $flags);
1263         }
1264
1265         public static function is_joiner($info)
1266         {
1267                 return self::is_one_of($info, (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ)));
1268         }
1269
1270         /* Vowels and placeholders treated as if they were consonants. */
1271
1272         public static function is_consonant($info)
1273         {
1274                 return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)));
1275         }
1276
1277         public static function is_halant_or_coeng($info)
1278         {
1279                 return self::is_one_of($info, (self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG)));
1280         }
1281
1282         // From hb-private.hh
1283         public static function in_range($u, $lo, $hi)
1284         {
1285                 if ((($lo ^ $hi) & $lo) == 0 && (($lo ^ $hi) & $hi) == ($lo ^ $hi) && (($lo ^ $hi) & (($lo ^ $hi) + 1)) == 0) {
1286                         return ($u & ~($lo ^ $hi)) == $lo;
1287                 } else {
1288                         return $lo <= $u && $u <= $hi;
1289                 }
1290         }
1291
1292         // From hb-private.hh
1293         public static function FLAG($x)
1294         {
1295                 return (1 << ($x));
1296         }
1297
1298         // BELOW from hb-ot-shape-complex-indic.cc
1299
1300         /*
1301          * Indic configurations.
1302          */
1303
1304         // base_position
1305         const BASE_POS_FIRST = 0;
1306         const BASE_POS_LAST = 1;
1307
1308         // reph_position
1309         const REPH_POS_DEFAULT = 10; // POS_BEFORE_POST,
1310
1311         const REPH_POS_AFTER_MAIN = 5; // POS_AFTER_MAIN,
1312
1313         const REPH_POS_BEFORE_SUB = 7; // POS_BEFORE_SUB,
1314         const REPH_POS_AFTER_SUB = 9; // POS_AFTER_SUB,
1315         const REPH_POS_BEFORE_POST = 10; // POS_BEFORE_POST,
1316         const REPH_POS_AFTER_POST = 12; // POS_AFTER_POST
1317
1318         // reph_mode
1319         const REPH_MODE_IMPLICIT = 0;  /* Reph formed out of initial Ra,H sequence. */
1320         const REPH_MODE_EXPLICIT = 1;  /* Reph formed out of initial Ra,H,ZWJ sequence. */
1321         const REPH_MODE_VIS_REPHA = 2; /* Encoded Repha character, no reordering needed. */
1322         const REPH_MODE_LOG_REPHA = 3; /* Encoded Repha character, needs reordering. */
1323
1324         /*
1325           struct of indic_configs{
1326           KEY - script;
1327           0 - has_old_spec;
1328           1 - virama;
1329           2 - base_pos;
1330           3 - reph_pos;
1331           4 - reph_mode;
1332           };
1333          */
1334
1335         public static $indic_configs = [/* index is SCRIPT_number from UCDN */
1336                 9 => [true, 0x094D, 1, 10, 0],
1337                 10 => [true, 0x09CD, 1, 9, 0],
1338                 11 => [true, 0x0A4D, 1, 7, 0],
1339                 12 => [true, 0x0ACD, 1, 10, 0],
1340                 13 => [true, 0x0B4D, 1, 5, 0],
1341                 14 => [true, 0x0BCD, 1, 12, 0],
1342                 15 => [true, 0x0C4D, 1, 12, 1],
1343                 16 => [true, 0x0CCD, 1, 12, 0],
1344                 17 => [true, 0x0D4D, 1, 5, 3],
1345                 18 => [false, 0x0DCA, 0, 5, 1], /* Sinhala */
1346                 30 => [false, 0x17D2, 0, 10, 2], /* Khmer */
1347                 84 => [false, 0xA9C0, 1, 10, 0], /* Javanese */
1348         ];
1349
1350
1351
1352         /*
1353
1354           // from "hb-ot-shape-complex-indic-table.cc"
1355
1356
1357           const ISC_A    = 0; //        INDIC_SYLLABIC_CATEGORY_AVAGRAHA                Avagraha
1358           const ISC_Bi = 8; //  INDIC_SYLLABIC_CATEGORY_BINDU                   Bindu
1359           const ISC_C    = 1; //        INDIC_SYLLABIC_CATEGORY_CONSONANT               Consonant
1360           const ISC_CD = 1; //  INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD          Consonant_Dead
1361           const ISC_CF = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL         Consonant_Final
1362           const ISC_CHL = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER   Consonant_Head_Letter
1363           const ISC_CM = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL                Consonant_Medial
1364           const ISC_CP = 11; // INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER   Consonant_Placeholder
1365           const ISC_CR = 15; // INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA         Consonant_Repha
1366           const ISC_CS = 1; //  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED     Consonant_Subjoined
1367           const ISC_ML = 0; //  INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER        Modifying_Letter
1368           const ISC_N    = 3; //        INDIC_SYLLABIC_CATEGORY_NUKTA                   Nukta
1369           const ISC_x    = 0; //        INDIC_SYLLABIC_CATEGORY_OTHER                   Other
1370           const ISC_RS = 13; // INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER        Register_Shifter
1371           const ISC_TL = 0; //  INDIC_SYLLABIC_CATEGORY_TONE_LETTER             Tone_Letter
1372           const ISC_TM = 3; //  INDIC_SYLLABIC_CATEGORY_TONE_MARK               Tone_Mark
1373           const ISC_V    = 4; //        INDIC_SYLLABIC_CATEGORY_VIRAMA          Virama
1374           const ISC_Vs = 8; //  INDIC_SYLLABIC_CATEGORY_VISARGA         Visarga
1375           const ISC_Vo = 2; //  INDIC_SYLLABIC_CATEGORY_VOWEL                   Vowel
1376           const ISC_M    = 7; //        INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT Vowel_Dependent
1377           const ISC_VI = 2; //  INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT       Vowel_Independent
1378
1379           const IMC_B    = 8; //        INDIC_MATRA_CATEGORY_BOTTOM                     Bottom
1380           const IMC_BR = 11; // INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT   Bottom_And_Right
1381           const IMC_I    = 15; //       INDIC_MATRA_CATEGORY_INVISIBLE          Invisible
1382           const IMC_L    = 3; //        INDIC_MATRA_CATEGORY_LEFT                       Left
1383           const IMC_LR = 11; // INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT             Left_And_Right
1384           const IMC_x    = 15; //       INDIC_MATRA_CATEGORY_NOT_APPLICABLE             Not_Applicable
1385           const IMC_O    = 5; //        INDIC_MATRA_CATEGORY_OVERSTRUCK         Overstruck
1386           const IMC_R    = 11; //       INDIC_MATRA_CATEGORY_RIGHT                      Right
1387           const IMC_T    = 6; //        INDIC_MATRA_CATEGORY_TOP                        Top
1388           const IMC_TB = 8; //  INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM             Top_And_Bottom
1389           const IMC_TBR = 11; //        INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT   Top_And_Bottom_And_Right
1390           const IMC_TL = 6; //  INDIC_MATRA_CATEGORY_TOP_AND_LEFT               Top_And_Left
1391           const IMC_TLR = 11; //        INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT     Top_And_Left_And_Right
1392           const IMC_TR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_RIGHT              Top_And_Right
1393           const IMC_VOL = 2; // INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT          Visual_Order_Left
1394
1395           If in original table = _(C,x), that = ISC_C,IMC_x
1396           Value is IMC_x << 8 (or IMC_x * 256) = 3840
1397           plus ISC_C = 1, so = 3841
1398
1399          */
1400
1401         public static $indic_table = [
1402                 /* Devanagari  (0900..097F) */
1403
1404                 /* 0900 */ 3848, 3848, 3848, 3848, 3842, 3842, 3842, 3842,
1405                 /* 0908 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1406                 /* 0910 */ 3842, 3842, 3842, 3842, 3842, 3841, 3841, 3841,
1407                 /* 0918 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1408                 /* 0920 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1409                 /* 0928 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1410                 /* 0930 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1411                 /* 0938 */ 3841, 3841, 1543, 2823, 3843, 3840, 2823, 775,
1412                 /* 0940 */ 2823, 2055, 2055, 2055, 2055, 1543, 1543, 1543,
1413                 /* 0948 */ 1543, 2823, 2823, 2823, 2823, 2052, 775, 2823,
1414                 /* 0950 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 2055,
1415                 /* 0958 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1416                 /* 0960 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1417                 /* 0968 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1418                 /* 0970 */ 3840, 3840, 3842, 3842, 3842, 3842, 3842, 3842,
1419                 /* 0978 */ 3840, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1420                 /* Bengali  (0980..09FF) */
1421
1422                 /* 0980 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1423                 /* 0988 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1424                 /* 0990 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1425                 /* 0998 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1426                 /* 09A0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1427                 /* 09A8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1428                 /* 09B0 */ 3841, 3840, 3841, 3840, 3840, 3840, 3841, 3841,
1429                 /* 09B8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1430                 /* 09C0 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1431                 /* 09C8 */ 775, 3840, 3840, 2823, 2823, 2052, 3841, 3840,
1432                 /* 09D0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1433                 /* 09D8 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1434                 /* 09E0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1435                 /* 09E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1436                 /* 09F0 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1437                 /* 09F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1438                 /* Gurmukhi  (0A00..0A7F) */
1439
1440                 /* 0A00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1441                 /* 0A08 */ 3842, 3842, 3842, 3840, 3840, 3840, 3840, 3842,
1442                 /* 0A10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1443                 /* 0A18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1444                 /* 0A20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1445                 /* 0A28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1446                 /* 0A30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3840,
1447                 /* 0A38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1448                 /* 0A40 */ 2823, 2055, 2055, 3840, 3840, 3840, 3840, 1543,
1449                 /* 0A48 */ 1543, 3840, 3840, 1543, 1543, 2052, 3840, 3840,
1450                 /* 0A50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1451                 /* 0A58 */ 3840, 3841, 3841, 3841, 3841, 3840, 3841, 3840,
1452                 /* 0A60 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1453                 /* 0A68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1454                 /* 0A70 */ 3848, 3840, 13841, 13841, 3840, 3857, 3840, 3840,
1455                 /* 0A78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1456                 /* Gujarati  (0A80..0AFF) */
1457
1458                 /* 0A80 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1459                 /* 0A88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3840, 3842,
1460                 /* 0A90 */ 3842, 3842, 3840, 3842, 3842, 3841, 3841, 3841,
1461                 /* 0A98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1462                 /* 0AA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1463                 /* 0AA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1464                 /* 0AB0 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1465                 /* 0AB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1466                 /* 0AC0 */ 2823, 2055, 2055, 2055, 2055, 1543, 3840, 1543,
1467                 /* 0AC8 */ 1543, 2823, 3840, 2823, 2823, 2052, 3840, 3840,
1468                 /* 0AD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1469                 /* 0AD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1470                 /* 0AE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1471                 /* 0AE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1472                 /* 0AF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1473                 /* 0AF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1474                 /* Oriya  (0B00..0B7F) */
1475
1476                 /* 0B00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1477                 /* 0B08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1478                 /* 0B10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1479                 /* 0B18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1480                 /* 0B20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1481                 /* 0B28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1482                 /* 0B30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1483                 /* 0B38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1484                 /* 0B40 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1485                 /* 0B48 */ 1543, 3840, 3840, 2823, 2823, 2052, 3840, 3840,
1486                 /* 0B50 */ 3840, 3840, 3840, 3840, 3840, 3840, 1543, 2823,
1487                 /* 0B58 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1488                 /* 0B60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1489                 /* 0B68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1490                 /* 0B70 */ 3840, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1491                 /* 0B78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1492                 /* Tamil  (0B80..0BFF) */
1493
1494                 /* 0B80 */ 3840, 3840, 3848, 3840, 3840, 3842, 3842, 3842,
1495                 /* 0B88 */ 3842, 3842, 3842, 3840, 3840, 3840, 3842, 3842,
1496                 /* 0B90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3840, 3840,
1497                 /* 0B98 */ 3840, 3841, 3841, 3840, 3841, 3840, 3841, 3841,
1498                 /* 0BA0 */ 3840, 3840, 3840, 3841, 3841, 3840, 3840, 3840,
1499                 /* 0BA8 */ 3841, 3841, 3841, 3840, 3840, 3840, 3841, 3841,
1500                 /* 0BB0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1501                 /* 0BB8 */ 3841, 3841, 3840, 3840, 3840, 3840, 2823, 2823,
1502                 /* 0BC0 */ 1543, 2055, 2055, 3840, 3840, 3840, 775, 775,
1503                 /* 0BC8 */ 775, 3840, 2823, 2823, 2823, 1540, 3840, 3840,
1504                 /* 0BD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1505                 /* 0BD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1506                 /* 0BE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1507                 /* 0BE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1508                 /* 0BF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1509                 /* 0BF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1510                 /* Telugu  (0C00..0C7F) */
1511
1512                 /* 0C00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1513                 /* 0C08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1514                 /* 0C10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1515                 /* 0C18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1516                 /* 0C20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1517                 /* 0C28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1518                 /* 0C30 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1519                 /* 0C38 */ 3841, 3841, 3840, 3840, 3840, 3840, 1543, 1543,
1520                 /* 0C40 */ 1543, 2823, 2823, 2823, 2823, 3840, 1543, 1543,
1521                 /* 0C48 */ 2055, 3840, 1543, 1543, 1543, 1540, 3840, 3840,
1522                 /* 0C50 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 3840,
1523                 /* 0C58 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1524                 /* 0C60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1525                 /* 0C68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1526                 /* 0C70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1527                 /* 0C78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1528                 /* Kannada  (0C80..0CFF) */
1529
1530                 /* 0C80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1531                 /* 0C88 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1532                 /* 0C90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1533                 /* 0C98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1534                 /* 0CA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1535                 /* 0CA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1536                 /* 0CB0 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1537                 /* 0CB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1538                 /* 0CC0 */ 2823, 2823, 2823, 2823, 2823, 3840, 1543, 2823,
1539                 /* 0CC8 */ 2823, 3840, 2823, 2823, 1543, 1540, 3840, 3840,
1540                 /* 0CD0 */ 3840, 3840, 3840, 3840, 3840, 2823, 2823, 3840,
1541                 /* 0CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3841, 3840,
1542                 /* 0CE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1543                 /* 0CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1544                 /* 0CF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1545                 /* 0CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1546                 /* Malayalam  (0D00..0D7F) */
1547
1548                 /* 0D00 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1549                 /* 0D08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1550                 /* 0D10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1551                 /* 0D18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1552                 /* 0D20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1553                 /* 0D28 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1554                 /* 0D30 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1555                 /* 0D38 */ 3841, 3841, 3841, 3840, 3840, 3840, 2823, 2823,
1556                 /* 0D40 */ 2823, 2823, 2823, 2055, 2055, 3840, 775, 775,
1557                 /* 0D48 */ 775, 3840, 2823, 2823, 2823, 1540, 3855, 3840,
1558                 /* 0D50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1559                 /* 0D58 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1560                 /* 0D60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1561                 /* 0D68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1562                 /* 0D70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1563                 /* 0D78 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1564                 /* Sinhala  (0D80..0DFF) */
1565
1566                 /* 0D80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1567                 /* 0D88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1568                 /* 0D90 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3840,
1569                 /* 0D98 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1570                 /* 0DA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1571                 /* 0DA8 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1572                 /* 0DB0 */ 3841, 3841, 3840, 3841, 3841, 3841, 3841, 3841,
1573                 /* 0DB8 */ 3841, 3841, 3841, 3841, 3840, 3841, 3840, 3840,
1574                 /* 0DC0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3840,
1575                 /* 0DC8 */ 3840, 3840, 1540, 3840, 3840, 3840, 3840, 2823,
1576                 /* 0DD0 */ 2823, 2823, 1543, 1543, 2055, 3840, 2055, 3840,
1577                 /* 0DD8 */ 2823, 775, 1543, 775, 2823, 2823, 2823, 2823,
1578                 /* 0DE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1579                 /* 0DE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1580                 /* 0DF0 */ 3840, 3840, 2823, 2823, 3840, 3840, 3840, 3840,
1581                 /* 0DF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1582                 /* Vedic Extensions  (1CD0..1CFF) */
1583
1584                 /* 1CD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1585                 /* 1CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1586                 /* 1CE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1587                 /* 1CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1588                 /* 1CF0 */ 3840, 3840, 3848, 3848, 3840, 3840, 3840, 3840,
1589                 /* 1CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1590         ];
1591
1592         public static $khmer_table = [
1593                 /* Khmer  (1780..17FF) */
1594
1595                 /* 1780 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1596                 /* 1788 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1597                 /* 1790 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1598                 /* 1798 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1599                 /* 17A0 */ 3841, 3841, 3841, 3842, 3842, 3842, 3842, 3842,
1600                 /* 17A8 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1601                 /* 17B0 */ 3842, 3842, 3842, 3842, 3840, 3840, 2823, 1543,
1602                 /* 17B8 */ 1543, 1543, 1543, 2055, 2055, 2055, 1543, 2823,
1603                 /* 17C0 */ 2823, 775, 775, 775, 2823, 2823, 3848, 3848,
1604                 /* 17C8 */ 2823, 3853, 3853, 3840, 3855, 3840, 3840, 3840,
1605                 /* 17D0 */ 3840, 1540, 3844, 3840, 3840, 3840, 3840, 3840,
1606                 /* 17D8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1607                 /* 17E0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1608                 /* 17E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1609                 /* 17F0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1610                 /* 17F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1611         ];
1612
1613         // from "hb-ot-shape-complex-indic-table.cc"
1614         public static function indic_get_categories($u)
1615         {
1616                 if (0x0900 <= $u && $u <= 0x0DFF) {
1617                         return self::$indic_table[$u - 0x0900 + 0]; // offset 0 for Most "indic"
1618                 }
1619                 if (0x1CD0 <= $u && $u <= 0x1D00) {
1620                         return self::$indic_table[$u - 0x1CD0 + 1152]; // offset for Vedic extensions
1621                 }
1622                 if (0x1780 <= $u && $u <= 0x17FF) {
1623                         return self::$khmer_table[$u - 0x1780];  // Khmer
1624                 }
1625                 if ($u == 0x00A0) {
1626                         return 3851; // (ISC_CP | (IMC_x << 8))
1627                 }
1628                 if ($u == 0x25CC) {
1629                         return 3851; // (ISC_CP | (IMC_x << 8))
1630                 }
1631                 return 3840; // (ISC_x | (IMC_x << 8))
1632         }
1633
1634         // BELOW from hb-ot-shape-complex-indic.cc
1635         /*
1636          * Indic shaper.
1637          */
1638
1639         public static function IN_HALF_BLOCK($u, $Base)
1640         {
1641                 return (($u & ~0x7F) == $Base);
1642         }
1643
1644         public static function IS_DEVA($u)
1645         {
1646                 return self::IN_HALF_BLOCK($u, 0x0900);
1647         }
1648
1649         public static function IS_BENG($u)
1650         {
1651                 return self::IN_HALF_BLOCK($u, 0x0980);
1652         }
1653
1654         public static function IS_GURU($u)
1655         {
1656                 return self::IN_HALF_BLOCK($u, 0x0A00);
1657         }
1658
1659         public static function IS_GUJR($u)
1660         {
1661                 return self::IN_HALF_BLOCK($u, 0x0A80);
1662         }
1663
1664         public static function IS_ORYA($u)
1665         {
1666                 return self::IN_HALF_BLOCK($u, 0x0B00);
1667         }
1668
1669         public static function IS_TAML($u)
1670         {
1671                 return self::IN_HALF_BLOCK($u, 0x0B80);
1672         }
1673
1674         public static function IS_TELU($u)
1675         {
1676                 return self::IN_HALF_BLOCK($u, 0x0C00);
1677         }
1678
1679         public static function IS_KNDA($u)
1680         {
1681                 return self::IN_HALF_BLOCK($u, 0x0C80);
1682         }
1683
1684         public static function IS_MLYM($u)
1685         {
1686                 return self::IN_HALF_BLOCK($u, 0x0D00);
1687         }
1688
1689         public static function IS_SINH($u)
1690         {
1691                 return self::IN_HALF_BLOCK($u, 0x0D80);
1692         }
1693
1694         public static function IS_KHMR($u)
1695         {
1696                 return self::IN_HALF_BLOCK($u, 0x1780);
1697         }
1698
1699         public static function MATRA_POS_LEFT($u)
1700         {
1701                 return self::POS_PRE_M;
1702         }
1703
1704         public static function MATRA_POS_RIGHT($u)
1705         {
1706                 return
1707                         (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1708                                 (self::IS_BENG($u) ? self::POS_AFTER_POST :
1709                                         (self::IS_GURU($u) ? self::POS_AFTER_POST :
1710                                                 (self::IS_GUJR($u) ? self::POS_AFTER_POST :
1711                                                         (self::IS_ORYA($u) ? self::POS_AFTER_POST :
1712                                                                 (self::IS_TAML($u) ? self::POS_AFTER_POST :
1713                                                                         (self::IS_TELU($u) ? ($u <= 0x0C42 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) :
1714                                                                                 (self::IS_KNDA($u) ? ($u < 0x0CC3 || $u > 0xCD6 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) :
1715                                                                                         (self::IS_MLYM($u) ? self::POS_AFTER_POST :
1716                                                                                                 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1717                                                                                                         (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1718                                                                                                                 self::POS_AFTER_SUB))))))))))); /* default */
1719         }
1720
1721         public static function MATRA_POS_TOP($u)
1722         {
1723                 return /* BENG and MLYM don't have top matras. */
1724                         (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1725                                 (self::IS_GURU($u) ? self::POS_AFTER_POST : /* Deviate from spec */
1726                                         (self::IS_GUJR($u) ? self::POS_AFTER_SUB :
1727                                                 (self::IS_ORYA($u) ? self::POS_AFTER_MAIN :
1728                                                         (self::IS_TAML($u) ? self::POS_AFTER_SUB :
1729                                                                 (self::IS_TELU($u) ? self::POS_BEFORE_SUB :
1730                                                                         (self::IS_KNDA($u) ? self::POS_BEFORE_SUB :
1731                                                                                 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1732                                                                                         (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1733                                                                                                 self::POS_AFTER_SUB))))))))); /* default */
1734         }
1735
1736         public static function MATRA_POS_BOTTOM($u)
1737         {
1738                 return
1739                         (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1740                                 (self::IS_BENG($u) ? self::POS_AFTER_SUB :
1741                                         (self::IS_GURU($u) ? self::POS_AFTER_POST :
1742                                                 (self::IS_GUJR($u) ? self::POS_AFTER_POST :
1743                                                         (self::IS_ORYA($u) ? self::POS_AFTER_SUB :
1744                                                                 (self::IS_TAML($u) ? self::POS_AFTER_POST :
1745                                                                         (self::IS_TELU($u) ? self::POS_BEFORE_SUB :
1746                                                                                 (self::IS_KNDA($u) ? self::POS_BEFORE_SUB :
1747                                                                                         (self::IS_MLYM($u) ? self::POS_AFTER_POST :
1748                                                                                                 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1749                                                                                                         (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1750                                                                                                                 self::POS_AFTER_SUB))))))))))); /* default */
1751         }
1752
1753         public static function matra_position($u, $side)
1754         {
1755                 switch ($side) {
1756                         case self::POS_PRE_C:
1757                                 return self::MATRA_POS_LEFT($u);
1758                         case self::POS_POST_C:
1759                                 return self::MATRA_POS_RIGHT($u);
1760                         case self::POS_ABOVE_C:
1761                                 return self::MATRA_POS_TOP($u);
1762                         case self::POS_BELOW_C:
1763                                 return self::MATRA_POS_BOTTOM($u);
1764                 }
1765                 return $side;
1766         }
1767
1768         // vowel matras that have to be split into two parts.
1769         // From Harfbuzz (old)
1770         // New HarfBuzz uses /src/hb-ucdn/ucdn.c and unicodedata_db.h for full method of decomposition for all characters
1771         // Should always fully decompose and then recompose back, but we will just do the split matras
1772         public static function decompose_indic($ab)
1773         {
1774                 $sub = [];
1775                 switch ($ab) {
1776                         /*
1777                          * Decompose split matras.
1778                          */
1779                         /* bengali */
1780                         case 0x9cb:
1781                                 $sub[0] = 0x9c7;
1782                                 $sub[1] = 0x9be;
1783                                 return $sub;
1784                         case 0x9cc:
1785                                 $sub[0] = 0x9c7;
1786                                 $sub[1] = 0x9d7;
1787                                 return $sub;
1788                         /* oriya */
1789                         case 0xb48:
1790                                 $sub[0] = 0xb47;
1791                                 $sub[1] = 0xb56;
1792                                 return $sub;
1793                         case 0xb4b:
1794                                 $sub[0] = 0xb47;
1795                                 $sub[1] = 0xb3e;
1796                                 return $sub;
1797                         case 0xb4c:
1798                                 $sub[0] = 0xb47;
1799                                 $sub[1] = 0xb57;
1800                                 return $sub;
1801                         /* tamil */
1802                         case 0xbca:
1803                                 $sub[0] = 0xbc6;
1804                                 $sub[1] = 0xbbe;
1805                                 return $sub;
1806                         case 0xbcb:
1807                                 $sub[0] = 0xbc7;
1808                                 $sub[1] = 0xbbe;
1809                                 return $sub;
1810                         case 0xbcc:
1811                                 $sub[0] = 0xbc6;
1812                                 $sub[1] = 0xbd7;
1813                                 return $sub;
1814                         /* telugu */
1815                         case 0xc48:
1816                                 $sub[0] = 0xc46;
1817                                 $sub[1] = 0xc56;
1818                                 return $sub;
1819                         /* kannada */
1820                         case 0xcc0:
1821                                 $sub[0] = 0xcbf;
1822                                 $sub[1] = 0xcd5;
1823                                 return $sub;
1824                         case 0xcc7:
1825                                 $sub[0] = 0xcc6;
1826                                 $sub[1] = 0xcd5;
1827                                 return $sub;
1828                         case 0xcc8:
1829                                 $sub[0] = 0xcc6;
1830                                 $sub[1] = 0xcd6;
1831                                 return $sub;
1832                         case 0xcca:
1833                                 $sub[0] = 0xcc6;
1834                                 $sub[1] = 0xcc2;
1835                                 return $sub;
1836                         case 0xccb:
1837                                 $sub[0] = 0xcc6;
1838                                 $sub[1] = 0xcc2;
1839                                 $sub[2] = 0xcd5;
1840                                 return $sub;
1841                         /* malayalam */
1842                         case 0xd4a:
1843                                 $sub[0] = 0xd46;
1844                                 $sub[1] = 0xd3e;
1845                                 return $sub;
1846                         case 0xd4b:
1847                                 $sub[0] = 0xd47;
1848                                 $sub[1] = 0xd3e;
1849                                 return $sub;
1850                         case 0xd4c:
1851                                 $sub[0] = 0xd46;
1852                                 $sub[1] = 0xd57;
1853                                 return $sub;
1854                         /* sinhala */
1855                         // NB Some fonts break with these Sinhala decomps (although this is Uniscribe spec)
1856                         // Can check if character would be substituted by pstf and only decompose if true
1857                         // e.g. if (isset($GSUBdata['pstf'][$ab])) - would need to pass $GSUBdata as parameter to this function
1858                         case 0xdda:
1859                                 $sub[0] = 0xdd9;
1860                                 $sub[1] = 0xdca;
1861                                 return $sub;
1862                         case 0xddc:
1863                                 $sub[0] = 0xdd9;
1864                                 $sub[1] = 0xdcf;
1865                                 return $sub;
1866                         case 0xddd:
1867                                 $sub[0] = 0xdd9;
1868                                 $sub[1] = 0xdcf;
1869                                 $sub[2] = 0xdca;
1870                                 return $sub;
1871                         case 0xdde:
1872                                 $sub[0] = 0xdd9;
1873                                 $sub[1] = 0xddf;
1874                                 return $sub;
1875                         /* khmer */
1876                         case 0x17be:
1877                                 $sub[0] = 0x17c1;
1878                                 $sub[1] = 0x17be;
1879                                 return $sub;
1880                         case 0x17bf:
1881                                 $sub[0] = 0x17c1;
1882                                 $sub[1] = 0x17bf;
1883                                 return $sub;
1884                         case 0x17c0:
1885                                 $sub[0] = 0x17c1;
1886                                 $sub[1] = 0x17c0;
1887                                 return $sub;
1888
1889                         case 0x17c4:
1890                                 $sub[0] = 0x17c1;
1891                                 $sub[1] = 0x17c4;
1892                                 return $sub;
1893                         case 0x17c5:
1894                                 $sub[0] = 0x17c1;
1895                                 $sub[1] = 0x17c5;
1896                                 return $sub;
1897                         /* tibetan - included here although does not use Inidc shaper in other ways  */
1898                         case 0xf73:
1899                                 $sub[0] = 0xf71;
1900                                 $sub[1] = 0xf72;
1901                                 return $sub;
1902                         case 0xf75:
1903                                 $sub[0] = 0xf71;
1904                                 $sub[1] = 0xf74;
1905                                 return $sub;
1906                         case 0xf76:
1907                                 $sub[0] = 0xfb2;
1908                                 $sub[1] = 0xf80;
1909                                 return $sub;
1910                         case 0xf77:
1911                                 $sub[0] = 0xfb2;
1912                                 $sub[1] = 0xf81;
1913                                 return $sub;
1914                         case 0xf78:
1915                                 $sub[0] = 0xfb3;
1916                                 $sub[1] = 0xf80;
1917                                 return $sub;
1918                         case 0xf79:
1919                                 $sub[0] = 0xfb3;
1920                                 $sub[1] = 0xf71;
1921                                 $sub[2] = 0xf80;
1922                                 return $sub;
1923                         case 0xf81:
1924                                 $sub[0] = 0xf71;
1925                                 $sub[1] = 0xf80;
1926                                 return $sub;
1927                 }
1928                 return false;
1929         }
1930
1931         public static function bubble_sort(&$arr, $start, $len)
1932         {
1933                 if ($len < 2) {
1934                         return;
1935                 }
1936                 $k = $start + $len - 2;
1937                 while ($k >= $start) {
1938                         for ($j = $start; $j <= $k; $j++) {
1939                                 if ($arr[$j]['indic_position'] > $arr[$j + 1]['indic_position']) {
1940                                         $t = $arr[$j];
1941                                         $arr[$j] = $arr[$j + 1];
1942                                         $arr[$j + 1] = $t;
1943                                 }
1944                         }
1945                         $k--;
1946                 }
1947         }
1948 }