9 /* FROM hb-ot-shape-complex-indic-private.hh */
19 const OT_M
= 7; /* Matra or Dependent Vowel */
24 const OT_DOTTEDCIRCLE
= 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */
25 const OT_RS
= 13; /* Register Shifter, used in Khmer OT spec */
29 const OT_RA
= 16; /* Not explicitly listed in the OT spec, but used in the grammar. */
32 /* Visual positions in a syllable from left to right. */
33 /* FROM hb-ot-shape-complex-indic-private.hh */
38 const POS_RA_TO_BECOME_REPH
= 1;
43 const POS_AFTER_MAIN
= 5;
45 const POS_ABOVE_C
= 6;
47 const POS_BEFORE_SUB
= 7;
48 const POS_BELOW_C
= 8;
49 const POS_AFTER_SUB
= 9;
51 const POS_BEFORE_POST
= 10;
52 const POS_POST_C
= 11;
53 const POS_AFTER_POST
= 12;
55 const POS_FINAL_C
= 13;
62 * These features are applied in order, one at a time, after initial_reordering.
66 * Must be in the same order as the indic_features array. Ones starting with _ are F_GLOBAL
67 * Ones without the _ are only applied where the mask says!
79 const CFAR
= 9; // Khmer only
84 // Based on indic_category used to make string to find syllables
85 // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-indic-private.hh
86 public static $indic_category_char = [
97 'A', /* Spec gives Andutta U+0952 as OT_A. However, testing shows that Uniscribe
98 * treats U+0951..U+0952 all as OT_VD - see set_indic_properties */
101 'F', /* Register shift Khmer only */
102 'G', /* Khmer only */
103 'r', /* 0D4E (dot reph) only one in Malayalam */
105 'm', /* Consonant medial only used in Indic 0A75 in Gurmukhi (0A00..0A7F) : also in Lao, Myanmar, Tai Tham, Javanese & Cham */
108 public static function set_indic_properties(&$info, $scriptblock)
111 $type = self
::indic_get_categories($u);
112 $cat = ($type & 0x7F);
123 if ($cat == self
::OT_X
&& self
::in_range($u, 0x17CB, 0x17D3)) { /* Khmer Various signs */
124 /* These are like Top Matras. */
126 $pos = self
::POS_ABOVE_C
;
131 } /* Khmer Bindu doesn't like to be repositioned. */
134 $cat = self
::OT_COENG
;
137 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
138 * treats U+0951..U+0952 all as OT_VD.
140 * U+092E,U+0947,U+0952
141 * U+092E,U+0952,U+0947
142 * U+092E,U+0947,U+0951
143 * U+092E,U+0951,U+0947
145 //if ($u == 0x0952) $cat = self::OT_A;
146 if (self
::in_range($u, 0x0951, 0x0954)) {
151 $cat = self
::OT_ZWNJ
;
152 } else if ($u == 0x200D) {
154 } else if ($u == 0x25CC) {
155 $cat = self
::OT_DOTTEDCIRCLE
;
156 } else if ($u == 0x0A71) {
158 } /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */
160 if ($cat == self
::OT_REPHA
) {
161 /* There are two kinds of characters marked as Repha:
162 * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
163 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
165 * We recategorize the first kind to look like a Nukta and attached to the base directly.
167 if ($info['general_category'] == Ucdn
::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
) {
173 * Re-assign position.
176 if ((self
::FLAG($cat) & (self
::FLAG(self
::OT_C
) | self
::FLAG(self
::OT_CM
) | self
::FLAG(self
::OT_RA
) | self
::FLAG(self
::OT_V
) | self
::FLAG(self
::OT_NBSP
) | self
::FLAG(self
::OT_DOTTEDCIRCLE
)))) { // = CONSONANT_FLAGS like is_consonant
177 if ($scriptblock == Ucdn
::SCRIPT_KHMER
) {
178 $pos = self
::POS_BELOW_C
;
179 } /* Khmer differs from Indic here. */
181 $pos = self
::POS_BASE_C
;
182 } /* Will recategorize later based on font lookups. */
184 if (self
::is_ra($u)) {
187 } else if ($cat == self
::OT_M
) {
188 $pos = self
::matra_position($u, $pos);
189 } else if ($cat == self
::OT_SM ||
$cat == self
::OT_VD
) {
190 $pos = self
::POS_SMVD
;
194 $pos = self
::POS_BEFORE_SUB
;
195 } /* Oriya Bindu is BeforeSub in the spec. */
197 $info['indic_category'] = $cat;
198 $info['indic_position'] = $pos;
202 const CONSONANT_SYLLABLE
= 0;
203 const VOWEL_SYLLABLE
= 1;
204 const STANDALONE_CLUSTER
= 2;
205 const BROKEN_CLUSTER
= 3;
206 const NON_INDIC_CLUSTER
= 4;
208 public static function set_syllables(&$o, $s, &$broken_syllables)
211 $syllable_serial = 1;
212 $broken_syllables = false;
214 while ($ptr < strlen($s)) {
216 $syllable_length = 1;
217 $syllable_type = self
::NON_INDIC_CLUSTER
;
218 // CONSONANT_SYLLABLE Consonant syllable
220 if (preg_match('/^([CR]m*[N]?(H[ZJ]?|[ZJ]H))*[CR]m*[N]?[A]?(H[ZJ]?|[M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
222 //if (preg_match('/^r?([CR]J?(Z?[N]{0,2})?[ZJ]?H(J[N]?)?){0,4}[CR]J?(Z?[N]{0,2})?A?((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
223 $syllable_length = strlen($ma[0]);
224 $syllable_type = self
::CONSONANT_SYLLABLE
;
225 } // VOWEL_SYLLABLE Vowel-based syllable
227 else if (preg_match('/^(RH|r)?V[N]?([ZJ]?H[CR]m*|J[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
229 //else if (preg_match('/^(RH|r)?V(Z?[N]{0,2})?(J|([ZJ]?H(J[N]?)?[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2})/', substr($s,$ptr), $ma)) {
230 $syllable_length = strlen($ma[0]);
231 $syllable_type = self
::VOWEL_SYLLABLE
;
232 } /* Apply only if it's a word start. */
233 // STANDALONE_CLUSTER Stand Alone syllable at start of word
235 else if (($ptr == 0 ||
236 $o[$ptr - 1]['general_category'] < Ucdn
::UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER ||
237 $o[$ptr - 1]['general_category'] > Ucdn
::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
238 ) && (preg_match('/^(RH|r)?[sD][N]?([ZJ]?H[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma))) {
240 // && (preg_match('/^(RH|r)?[sD](Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
241 $syllable_length = strlen($ma[0]);
242 $syllable_type = self
::STANDALONE_CLUSTER
;
243 } // BROKEN_CLUSTER syllable
244 else if (preg_match('/^(RH|r)?[N]?([ZJ]?H[CR])?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
246 //else if (preg_match('/^(RH|r)?(Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
247 if (strlen($ma[0])) { // May match blank
248 $syllable_length = strlen($ma[0]);
249 $syllable_type = self
::BROKEN_CLUSTER
;
250 $broken_syllables = true;
254 for ($i = $ptr; $i < $ptr +
$syllable_length; $i++
) {
255 $o[$i]['syllable'] = ($syllable_serial << 4) |
$syllable_type;
257 $ptr +
= $syllable_length;
259 if ($syllable_serial == 16) {
260 $syllable_serial = 1;
265 public static function set_syllables_sinhala(&$o, $s, &$broken_syllables)
268 $syllable_serial = 1;
269 $broken_syllables = false;
271 while ($ptr < strlen($s)) {
273 $syllable_length = 1;
274 $syllable_type = self
::NON_INDIC_CLUSTER
;
275 // CONSONANT_SYLLABLE Consonant syllable
277 if (preg_match('/^([CR]HJ|[CR]JH){0,8}[CR][HM]{0,3}[S]{0,1}/', substr($s, $ptr), $ma)) {
278 $syllable_length = strlen($ma[0]);
279 $syllable_type = self
::CONSONANT_SYLLABLE
;
280 } // VOWEL_SYLLABLE Vowel-based syllable
282 else if (preg_match('/^V[S]{0,1}/', substr($s, $ptr), $ma)) {
283 $syllable_length = strlen($ma[0]);
284 $syllable_type = self
::VOWEL_SYLLABLE
;
287 for ($i = $ptr; $i < $ptr +
$syllable_length; $i++
) {
288 $o[$i]['syllable'] = ($syllable_serial << 4) |
$syllable_type;
290 $ptr +
= $syllable_length;
292 if ($syllable_serial == 16) {
293 $syllable_serial = 1;
298 public static function set_syllables_khmer(&$o, $s, &$broken_syllables)
301 $syllable_serial = 1;
302 $broken_syllables = false;
304 while ($ptr < strlen($s)) {
306 $syllable_length = 1;
307 $syllable_type = self
::NON_INDIC_CLUSTER
;
308 // CONSONANT_SYLLABLE Consonant syllable
309 if (preg_match('/^r?([CR]J?((Z?F)?[N]{0,2})?[ZJ]?G(JN?)?){0,4}[CR]J?((Z?F)?[N]{0,2})?A?((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
310 $syllable_length = strlen($ma[0]);
311 $syllable_type = self
::CONSONANT_SYLLABLE
;
312 } // VOWEL_SYLLABLE Vowel-based syllable
313 else if (preg_match('/^(RH|r)?V((Z?F)?[N]{0,2})?(J|([ZJ]?G(JN?)?[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2})/', substr($s, $ptr), $ma)) {
314 $syllable_length = strlen($ma[0]);
315 $syllable_type = self
::VOWEL_SYLLABLE
;
316 } // BROKEN_CLUSTER syllable
317 else if (preg_match('/^(RH|r)?((Z?F)?[N]{0,2})?(([ZJ]?G(JN?)?)[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
318 if (strlen($ma[0])) { // May match blank
319 $syllable_length = strlen($ma[0]);
320 $syllable_type = self
::BROKEN_CLUSTER
;
321 $broken_syllables = true;
325 for ($i = $ptr; $i < $ptr +
$syllable_length; $i++
) {
326 $o[$i]['syllable'] = ($syllable_serial << 4) |
$syllable_type;
328 $ptr +
= $syllable_length;
330 if ($syllable_serial == 16) {
331 $syllable_serial = 1;
336 public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle)
339 self
::update_consonant_positions($info, $GSUBdata);
341 if ($broken_syllables && $dottedcircle) {
342 self
::insert_dotted_circles($info, $dottedcircle);
345 $count = count($info);
350 $last_syllable = $info[0]['syllable'];
351 for ($i = 1; $i < $count; $i++
) {
352 if ($last_syllable != $info[$i]['syllable']) {
353 self
::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
355 $last_syllable = $info[$last]['syllable'];
358 self
::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
361 public static function update_consonant_positions(&$info, $GSUBdata)
363 $count = count($info);
364 for ($i = 0; $i < $count; $i++
) {
365 if ($info[$i]['indic_position'] == self
::POS_BASE_C
) {
366 $c = $info[$i]['uni'];
367 // If would substitute...
368 if (isset($GSUBdata['pref'][$c])) {
369 $info[$i]['indic_position'] = self
::POS_POST_C
;
370 } else if (isset($GSUBdata['blwf'][$c])) {
371 $info[$i]['indic_position'] = self
::POS_BELOW_C
;
372 } else if (isset($GSUBdata['pstf'][$c])) {
373 $info[$i]['indic_position'] = self
::POS_POST_C
;
379 public static function insert_dotted_circles(&$info, $dottedcircle)
383 while ($idx < count($info)) {
384 $syllable = $info[$idx]['syllable'];
385 $syllable_type = ($syllable & 0x0F);
386 if ($last_syllable != $syllable && $syllable_type == self
::BROKEN_CLUSTER
) {
387 $last_syllable = $syllable;
389 $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
391 /* Insert dottedcircle after possible Repha. */
392 while ($idx < count($info) && $last_syllable == $info[$idx]['syllable'] && $info[$idx]['indic_category'] == self
::OT_REPHA
) {
395 array_splice($info, $idx, 0, $dottedcircle);
401 // I am not sue how this code below got in here, since $idx should now be > count($info) and thus invalid.
402 // In case I am missing something(!) I'll leave a warning here for now:
403 if (isset($info[$idx])) {
404 throw new \Mpdf\
MpdfException('Unexpected error occured in Indic processing');
406 // In case of final bloken cluster...
407 //$syllable = $info[$idx]['syllable'];
408 //$syllable_type = ($syllable & 0x0F);
409 //if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
410 // $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
411 // array_splice($info, $idx, 0, $dottedcircle);
416 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
418 public static function initial_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
420 /* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */
421 /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
422 /* standalone_cluster: We treat NBSP/dotted-circle as if they are consonants, so we should just chain. */
424 $syllable_type = ($info[$start]['syllable'] & 0x0F);
425 if ($syllable_type == self
::NON_INDIC_CLUSTER
) {
428 if ($syllable_type == self
::BROKEN_CLUSTER ||
$syllable_type == self
::STANDALONE_CLUSTER
) {
429 //if ($uniscribe_bug_compatible) {
430 /* For dotted-circle, this is what Uniscribe does:
431 * If dotted-circle is the last glyph, it just does nothing.
432 * i.e. It doesn't form Reph. */
433 if ($info[$end - 1]['indic_category'] == self
::OT_DOTTEDCIRCLE
) {
438 /* 1. Find base consonant:
440 * The shaping engine finds the base consonant of the syllable, using the
441 * following algorithm: starting from the end of the syllable, move backwards
442 * until a consonant is found that does not have a below-base or post-base
443 * form (post-base forms have to follow below-base forms), or that is not a
444 * pre-base reordering Ra, or arrive at the first consonant. The consonant
445 * stopped at will be the base.
447 * o If the syllable starts with Ra + Halant (in a script that has Reph)
448 * and has more than one consonant, Ra is excluded from candidates for
456 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
457 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
458 * and has more than one consonant, Ra is excluded from candidates for
459 * base consonants. */
460 if (count($GSUBdata['rphf']) /* ?? $indic_plan->mask_array[RPHF] */ && $start +
3 <= $end &&
462 ($indic_config[4] == self
::REPH_MODE_IMPLICIT
&& !self
::is_joiner($info[$start +
2])) ||
463 ($indic_config[4] == self
::REPH_MODE_EXPLICIT
&& $info[$start +
2]['indic_category'] == self
::OT_ZWJ
)
465 /* See if it matches the 'rphf' feature. */
466 //$glyphs = array($info[$start]['uni'], $info[$start + 1]['uni']);
467 //if ($indic_plan->rphf->would_substitute ($glyphs, count($glyphs), true, face)) {
468 if (isset($GSUBdata['rphf'][$info[$start]['uni']]) && self
::is_halant_or_coeng($info[$start +
1])) {
470 while ($limit < $end && self
::is_joiner($info[$limit])) {
476 } else if ($indic_config[4] == self
::REPH_MODE_LOG_REPHA
&& $info[$start]['indic_category'] == self
::OT_REPHA
) {
478 while ($limit < $end && self
::is_joiner($info[$limit])) {
486 switch ($indic_config[2]) { // base_pos
487 case self
::BASE_POS_LAST
:
488 /* -> starting from the end of the syllable, move backwards */
493 /* -> until a consonant is found */
494 if (self
::is_consonant($info[$i])) {
495 /* -> that does not have a below-base or post-base form
496 * (post-base forms have to follow below-base forms), */
497 if ($info[$i]['indic_position'] != self
::POS_BELOW_C
&& ($info[$i]['indic_position'] != self
::POS_POST_C ||
$seen_below)) {
501 if ($info[$i]['indic_position'] == self
::POS_BELOW_C
) {
505 /* -> or that is not a pre-base reordering Ra,
507 * IMPLEMENTATION NOTES:
509 * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped
510 * by the logic above already.
513 /* -> or arrive at the first consonant. The consonant stopped at will
517 /* A ZWJ after a Halant stops the base search, and requests an explicit
519 * [A ZWJ before a Halant, requests a subjoined form instead, and hence
520 * search continues. This is particularly important for Bengali
521 * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya] */
522 if ($start < $i && $info[$i]['indic_category'] == self
::OT_ZWJ
&& $info[$i - 1]['indic_category'] == self
::OT_H
) {
523 if (!defined("OMIT_INDIC_FIX_1") || OMIT_INDIC_FIX_1
!= 1) {
529 if ($start < $i && $info[$i]['indic_category'] == self
::OT_ZWNJ
) {
533 } while ($i > $limit);
536 case self
::BASE_POS_FIRST
:
537 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
543 /* Find the last base consonant that is not blocked by ZWJ. If there is
544 * a ZWJ right before a base consonant, that would request a subjoined form. */
545 for ($i = $limit; $i < $end; $i++
) {
546 if (self
::is_consonant($info[$i]) && $info[$i]['indic_position'] == self
::POS_BASE_C
) {
547 if ($limit < $i && $info[$i - 1]['indic_category'] == self
::OT_ZWJ
) {
555 /* Mark all subsequent consonants as below. */
556 for ($i = $base +
1; $i < $end; $i++
) {
557 if (self
::is_consonant($info[$i]) && $info[$i]['indic_position'] == self
::POS_BASE_C
) {
558 $info[$i]['indic_position'] = self
::POS_BELOW_C
;
567 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
568 * and has more than one consonant, Ra is excluded from candidates for
571 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
572 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
573 if ($has_reph && $base == $start && $limit - $base <= 2) {
574 /* Have no other consonant, so Reph is not formed and Ra becomes base. */
579 /* 2. Decompose and reorder Matras:
581 * Each matra and any syllable modifier sign in the cluster are moved to the
582 * appropriate position relative to the consonant(s) in the cluster. The
583 * shaping engine decomposes two- or three-part matras into their constituent
584 * parts before any repositioning. Matra characters are classified by which
585 * consonant in a conjunct they have affinity for and are reordered to the
586 * following positions:
588 * o Before first half form in the syllable
589 * o After subjoined consonants
590 * o After post-form consonant
591 * o After main consonant (for above marks)
593 * IMPLEMENTATION NOTES:
595 * The normalize() routine has already decomposed matras for us, so we don't
596 * need to worry about that.
600 /* 3. Reorder marks to canonical order:
602 * Adjacent nukta and halant or nukta and vedic sign are always repositioned
603 * if necessary, so that the nukta is first.
605 * IMPLEMENTATION NOTES:
607 * Use the combining Class from Unicode categories? to bubble_sort.
610 /* Reorder characters */
612 for ($i = $start; $i < $base; $i++
) {
613 $info[$i]['indic_position'] = min(self
::POS_PRE_C
, $info[$i]['indic_position']);
617 $info[$base]['indic_position'] = self
::POS_BASE_C
;
620 /* Mark final consonants. A final consonant is one appearing after a matra,
621 * ? only in Khmer. */
622 for ($i = $base +
1; $i < $end; $i++
) {
623 if ($info[$i]['indic_category'] == self
::OT_M
) {
624 for ($j = $i +
1; $j < $end; $j++
) {
625 if (self
::is_consonant($info[$j])) {
626 $info[$j]['indic_position'] = self
::POS_FINAL_C
;
634 /* Handle beginning Ra */
635 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
637 $info[$start]['indic_position'] = self
::POS_RA_TO_BECOME_REPH
;
642 /* For old-style Indic script tags, move the first post-base Halant after
643 * last consonant. Only do this if there is *not* a Halant after last
644 * consonant. Otherwise it becomes messy. */
646 for ($i = $base +
1; $i < $end; $i++
) {
647 if ($info[$i]['indic_category'] == self
::OT_H
) {
648 for ($j = $end - 1; $j > $i; $j--) {
649 if (self
::is_consonant($info[$j]) ||
$info[$j]['indic_category'] == self
::OT_H
) {
653 if ($info[$j]['indic_category'] != self
::OT_H
&& $j > $i) {
654 /* Move Halant to after last consonant. */
655 self
::_move_info_pos($info, $i, $j +
1);
662 /* Attach misc marks to previous char to move with them. */
663 $last_pos = self
::POS_START
;
664 for ($i = $start; $i < $end; $i++
) {
665 if ((self
::FLAG($info[$i]['indic_category']) & (self
::FLAG(self
::OT_ZWJ
) | self
::FLAG(self
::OT_ZWNJ
) | self
::FLAG(self
::OT_N
) | self
::FLAG(self
::OT_RS
) | self
::FLAG(self
::OT_H
) | self
::FLAG(self
::OT_COENG
) ))) {
666 $info[$i]['indic_position'] = $last_pos;
667 if ($info[$i]['indic_category'] == self
::OT_H
&& $info[$i]['indic_position'] == self
::POS_PRE_M
) {
669 * Uniscribe doesn't move the Halant with Left Matra.
670 * TEST: U+092B,U+093F,U+094DE
671 * We follow. This is important for the Sinhala
672 * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
673 * where U+0DD9 is a left matra and U+0DCA is the virama.
674 * We don't want to move the virama with the left matra.
675 * TEST: U+0D9A,U+0DDA
677 for ($j = $i; $j > $start; $j--) {
678 if ($info[$j - 1]['indic_position'] != self
::POS_PRE_M
) {
679 $info[$i]['indic_position'] = $info[$j - 1]['indic_position'];
684 } else if ($info[$i]['indic_position'] != self
::POS_SMVD
) {
685 $last_pos = $info[$i]['indic_position'];
689 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
691 for ($i = $base +
1; $i < $end; $i++
) {
692 if (self
::is_halant_or_coeng($info[$i])) {
694 } else if (self
::is_consonant($info[$i])) {
695 for ($j = $last_halant; $j < $i; $j++
) {
696 if ($info[$j]['indic_position'] != self
::POS_SMVD
) {
697 $info[$j]['indic_position'] = $info[$i]['indic_position'];
704 if ($scriptblock == Ucdn
::SCRIPT_KHMER
) {
706 /* Move Coeng+RO (Halant,Ra) sequence before base consonant. */
707 for ($i = $base +
1; $i < $end; $i++
) {
708 if (self
::is_halant_or_coeng($info[$i]) && self
::is_ra($info[$i +
1]['uni'])) {
709 $info[$i]['indic_position'] = self
::POS_PRE_C
;
710 $info[$i +
1]['indic_position'] = self
::POS_PRE_C
;
718 if (!defined("OMIT_INDIC_FIX_2") || OMIT_INDIC_FIX_2 != 1) {
721 $POST_ZWNJ_c_found = false;
722 for ($i = $base + 1; $i < $end; $i++) {
723 if ($info[$i]['indic_category'] == self::OT_ZWNJ) { $ZWNJ_found = true; }
724 else if ($ZWNJ_found && $info[$i]['indic_category'] == self::OT_C) { $POST_ZWNJ_c_found = true; }
725 else if ($POST_ZWNJ_c_found && $info[$i]['indic_position'] == self::POS_BEFORE_SUB) { $info[$i]['indic_position'] = self::POS_AFTER_SUB; }
730 /* Setup masks now */
731 for ($i = $start; $i < $end; $i++
) {
732 $info[$i]['mask'] = 0;
736 if ($scriptblock == Ucdn
::SCRIPT_KHMER
) {
737 /* Find a Coeng+RO (Halant,Ra) sequence and mark it for pre-base processing. */
738 $mask = self
::FLAG(self
::PREF
);
739 for ($i = $base; $i < $end - 1; $i++
) { /* KHMER_FIX_1 From $start (not base) */
740 if (self
::is_halant_or_coeng($info[$i]) && self
::is_ra($info[$i +
1]['uni'])) {
741 $info[$i]['mask'] |
= self
::FLAG(self
::PREF
);
742 $info[$i +
1]['mask'] |
= self
::FLAG(self
::PREF
);
744 /* Mark the subsequent stuff with 'cfar'. Used in Khmer.
745 * Read the feature spec.
746 * This allows distinguishing the following cases with MS Khmer fonts:
747 * U+1784,U+17D2,U+179A,U+17D2,U+1782 [C+Coeng+RO+Coeng+C] => Should activate CFAR
748 * U+1784,U+17D2,U+1782,U+17D2,U+179A [C+Coeng+C+Coeng+RO] => Should NOT activate CFAR
750 for ($j = ($i +
2); $j < $end; $j++
) {
751 $info[$j]['mask'] |
= self
::FLAG(self
::CFAR
);
761 /* Sit tight, rock 'n roll! */
762 self
::bubble_sort($info, $start, $end - $start);
764 /* Find base again */
766 for ($i = $start; $i < $end; $i++
) {
767 if ($info[$i]['indic_position'] == self
::POS_BASE_C
) {
773 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
775 for ($i = $start; $i < $end; $i++
) {
776 if ($info[$i]['indic_position'] == self
::POS_RA_TO_BECOME_REPH
) {
777 $info[$i]['mask'] |
= self
::FLAG(self
::RPHF
);
782 $mask = self
::FLAG(self
::HALF
);
783 for ($i = $start; $i < $base; $i++
) {
784 $info[$i]['mask'] |
= $mask;
789 $mask = (self
::FLAG(self
::BLWF
) | self
::FLAG(self
::ABVF
) | self
::FLAG(self
::PSTF
));
790 for ($i = $base +
1; $i < $end; $i++
) {
791 $info[$i]['mask'] |
= $mask;
795 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
796 if (!defined("OMIT_INDIC_FIX_3") || OMIT_INDIC_FIX_3
!= 1) {
798 /* Find a (pre-base) Consonant, Halant,Ra sequence and mark Halant|Ra for below-base BLWF processing. */
799 // TEST CASE ক্র্ক in FreeSans versus Vrinda
800 if (($base - $start) >= 3) {
801 for ($i = $start; $i < ($base - 2); $i++
) {
802 if (self
::is_consonant($info[$i])) {
803 if (self
::is_halant_or_coeng($info[$i +
1]) && self
::is_ra($info[$i +
2]['uni'])) {
804 // If would substitute Halant+Ra...BLWF
805 if (isset($GSUBdata['blwf'][$info[$i +
2]['uni']])) {
806 $info[$i +
1]['mask'] |
= self
::FLAG(self
::BLWF
);
807 $info[$i +
2]['mask'] |
= self
::FLAG(self
::BLWF
);
808 } /* If would not substitute as blwf, mark Ra+Halant for RPHF using following Halant (if present) */ else if (self
::is_halant_or_coeng($info[$i +
3])) {
809 $info[$i +
2]['mask'] |
= self
::FLAG(self
::RPHF
);
810 $info[$i +
3]['mask'] |
= self
::FLAG(self
::RPHF
);
822 if ($is_old_spec && $scriptblock == Ucdn
::SCRIPT_DEVANAGARI
) {
823 /* Old-spec eye-lash Ra needs special handling. From the spec:
824 * "The feature 'below-base form' is applied to consonants
825 * having below-base forms and following the base consonant.
826 * The exception is vattu, which may appear below half forms
827 * as well as below the base glyph. The feature 'below-base
828 * form' will be applied to all such occurrences of Ra as well."
830 * Test case: U+0924,U+094D,U+0930,U+094d,U+0915
831 * with Sanskrit 2003 font.
833 * However, note that Ra,Halant,ZWJ is the correct way to
834 * request eyelash form of Ra, so we wouldbn't inhibit it
837 * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
839 for ($i = $start; ($i +
1) < $base; $i++
) {
840 if ($info[$i]['indic_category'] == self
::OT_RA
&& $info[$i +
1]['indic_category'] == self
::OT_H
&&
841 ($i +
2 == $base ||
$info[$i +
2]['indic_category'] != self
::OT_ZWJ
)) {
842 $info[$i]['mask'] |
= self
::FLAG(self
::BLWF
);
843 $info[$i +
1]['mask'] |
= self
::FLAG(self
::BLWF
);
848 if ($scriptblock != Ucdn
::SCRIPT_KHMER
) {
849 if (count($GSUBdata['pref']) && $base +
2 < $end) {
850 /* Find a Halant,Ra sequence and mark it for pre-base processing. */
851 for ($i = $base +
1; $i +
1 < $end; $i++
) {
852 // If old_spec find Ra-Halant...
853 if ((isset($GSUBdata['pref'][$info[$i +
1]['uni']]) && self
::is_halant_or_coeng($info[$i]) && self
::is_ra($info[$i +
1]['uni']) ) ||
854 ($is_old_spec && isset($GSUBdata['pref'][$info[$i]['uni']]) && self
::is_halant_or_coeng($info[$i +
1]) && self
::is_ra($info[$i]['uni']) )
856 $info[$i++
]['mask'] |
= self
::FLAG(self
::PREF
);
857 $info[$i++
]['mask'] |
= self
::FLAG(self
::PREF
);
865 /* Apply ZWJ/ZWNJ effects */
866 for ($i = $start +
1; $i < $end; $i++
) {
867 if (self
::is_joiner($info[$i])) {
868 $non_joiner = ($info[$i]['indic_category'] == self
::OT_ZWNJ
);
870 while ($j > $start) {
871 if (defined("OMIT_INDIC_FIX_4") && OMIT_INDIC_FIX_4
== 1) {
872 // INDIC_FIX_4 = do nothing - carry on //
873 // ZWNJ should block H C from forming blwf post-base - need to unmask backwards beyond first consonant arrived at //
874 if (!self
::is_consonant($info[$j])) {
880 /* ZWJ/ZWNJ should disable CJCT. They do that by simply
881 * being there, since we don't skip them for the CJCT
882 * feature (ie. F_MANUAL_ZWJ) */
884 /* A ZWNJ disables HALF. */
886 $info[$j]['mask'] &= ~
(self
::FLAG(self
::HALF
) | self
::FLAG(self
::BLWF
));
893 public static function final_reordering(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec)
895 $count = count($info);
900 $last_syllable = $info[0]['syllable'];
901 for ($i = 1; $i < $count; $i++
) {
902 if ($last_syllable != $info[$i]['syllable']) {
903 self
::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
905 $last_syllable = $info[$last]['syllable'];
908 self
::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
911 public static function final_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
914 /* 4. Final reordering:
916 * After the localized forms and basic shaping forms GSUB features have been
917 * applied (see below), the shaping engine performs some final glyph
918 * reordering before applying all the remaining font features to the entire
922 /* Find base again */
923 for ($base = $start; $base < $end; $base++
) {
924 if ($info[$base]['indic_position'] >= self
::POS_BASE_C
) {
925 if ($start < $base && $info[$base]['indic_position'] > self
::POS_BASE_C
) {
931 if ($base == $end && $start < $base && $info[$base - 1]['indic_category'] != self
::OT_ZWJ
) {
934 while ($start < $base && isset($info[$base]) && ($info[$base]['indic_category'] == self
::OT_H ||
$info[$base]['indic_category'] == self
::OT_N
)) {
941 * If a pre-base matra character had been reordered before applying basic
942 * features, the glyph can be moved closer to the main consonant based on
943 * whether half-forms had been formed. Actual position for the matra is
944 * defined as "after last standalone halant glyph, after initial matra
945 * position and before the main consonant". If ZWJ or ZWNJ follow this
946 * halant, position is moved after it.
950 if ($start +
1 < $end && $start < $base) { /* Otherwise there can't be any pre-base matra characters. */
951 /* If we lost track of base, alas, position before last thingy. */
952 $new_pos = ($base == $end) ?
$base - 2 : $base - 1;
954 /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
955 * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
956 * We want to position matra after them.
958 if ($scriptblock != Ucdn
::SCRIPT_MALAYALAM
&& $scriptblock != Ucdn
::SCRIPT_TAMIL
) {
959 while ($new_pos > $start && !(self
::is_one_of($info[$new_pos], (self
::FLAG(self
::OT_M
) | self
::FLAG(self
::OT_H
) | self
::FLAG(self
::OT_COENG
))))) {
963 /* If we found no Halant we are done.
964 * Otherwise only proceed if the Halant does
965 * not belong to the Matra itself! */
966 if (self
::is_halant_or_coeng($info[$new_pos]) && $info[$new_pos]['indic_position'] != self
::POS_PRE_M
) {
967 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
968 if ($new_pos +
1 < $end && self
::is_joiner($info[$new_pos +
1])) {
976 if ($start < $new_pos && $info[$new_pos]['indic_position'] != self
::POS_PRE_M
) {
977 /* Now go see if there's actually any matras... */
978 for ($i = $new_pos; $i > $start; $i--) {
979 if ($info[$i - 1]['indic_position'] == self
::POS_PRE_M
) {
981 //memmove (&info[$old_pos], &info[$old_pos + 1], ($new_pos - $old_pos) * sizeof ($info[0]));
982 self
::_move_info_pos($info, $old_pos, $new_pos +
1);
984 if ($old_pos < $base && $base <= $new_pos) { /* Shouldn't actually happen. */
996 * Reph's original position is always at the beginning of the syllable,
997 * (i.e. it is not reordered at the character reordering stage). However,
998 * it will be reordered according to the basic-forms shaping results.
999 * Possible positions for reph, depending on the script, are; after main,
1000 * before post-base consonant forms, and after post-base consonant forms.
1003 /* If there's anything after the Ra that has the REPH pos, it ought to be halant.
1004 * Which means that the font has failed to ligate the Reph. In which case, we
1005 * shouldn't move. */
1006 if ($start +
1 < $end &&
1007 $info[$start]['indic_position'] == self
::POS_RA_TO_BECOME_REPH
&& $info[$start +
1]['indic_position'] != self
::POS_RA_TO_BECOME_REPH
) {
1008 $reph_pos = $indic_config[3];
1009 $skip_to_reph_step_5 = false;
1010 $skip_to_reph_move = false;
1012 /* 1. If reph should be positioned after post-base consonant forms,
1013 * proceed to step 5.
1015 if ($reph_pos == self
::REPH_POS_AFTER_POST
) {
1016 $skip_to_reph_step_5 = true;
1019 /* 2. If the reph repositioning class is not after post-base: target
1020 * position is after the first explicit halant glyph between the
1021 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1022 * are following this halant, position is moved after it. If such
1023 * position is found, this is the target position. Otherwise,
1024 * proceed to the next step.
1026 * Note: in old-implementation fonts, where classifications were
1027 * fixed in shaping engine, there was no case where reph position
1028 * will be found on this step.
1031 if (!$skip_to_reph_step_5) {
1032 $new_reph_pos = $start +
1;
1034 while ($new_reph_pos < $base && !self
::is_halant_or_coeng($info[$new_reph_pos])) {
1038 if ($new_reph_pos < $base && self
::is_halant_or_coeng($info[$new_reph_pos])) {
1039 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1040 if ($new_reph_pos +
1 < $base && self
::is_joiner($info[$new_reph_pos +
1])) {
1043 $skip_to_reph_move = true;
1047 /* 3. If reph should be repositioned after the main consonant: find the
1048 * first consonant not ligated with main, or find the first
1049 * consonant that is not a potential pre-base reordering Ra.
1051 if ($reph_pos == self
::REPH_POS_AFTER_MAIN
&& !$skip_to_reph_move && !$skip_to_reph_step_5) {
1052 $new_reph_pos = $base;
1053 /* XXX Skip potential pre-base reordering Ra. */
1054 while ($new_reph_pos +
1 < $end && $info[$new_reph_pos +
1]['indic_position'] <= self
::POS_AFTER_MAIN
) {
1057 if ($new_reph_pos < $end) {
1058 $skip_to_reph_move = true;
1062 /* 4. If reph should be positioned before post-base consonant, find
1063 * first post-base classified consonant not ligated with main. If no
1064 * consonant is found, the target position should be before the
1065 * first matra, syllable modifier sign or vedic sign.
1067 /* This is our take on what step 4 is trying to say (and failing, BADLY). */
1068 if ($reph_pos == self
::REPH_POS_AFTER_SUB
&& !$skip_to_reph_move && !$skip_to_reph_step_5) {
1069 $new_reph_pos = $base;
1070 while ($new_reph_pos < $end && isset($info[$new_reph_pos +
1]['indic_position']) &&
1071 !( self
::FLAG($info[$new_reph_pos +
1]['indic_position']) & (self
::FLAG(self
::POS_POST_C
) | self
::FLAG(self
::POS_AFTER_POST
) | self
::FLAG(self
::POS_SMVD
)))) {
1074 if ($new_reph_pos < $end) {
1075 $skip_to_reph_move = true;
1079 /* 5. If no consonant is found in steps 3 or 4, move reph to a position
1080 * immediately before the first post-base matra, syllable modifier
1081 * sign or vedic sign that has a reordering class after the intended
1082 * reph position. For example, if the reordering position for reph
1083 * is post-main, it will skip above-base matras that also have a
1084 * post-main position.
1086 if (!$skip_to_reph_move) {
1087 /* Copied from step 2. */
1088 $new_reph_pos = $start +
1;
1089 while ($new_reph_pos < $base && !self
::is_halant_or_coeng($info[$new_reph_pos])) {
1093 if ($new_reph_pos < $base && self
::is_halant_or_coeng($info[$new_reph_pos])) {
1094 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1095 if ($new_reph_pos +
1 < $base && self
::is_joiner($info[$new_reph_pos +
1])) {
1098 $skip_to_reph_move = true;
1103 /* 6. Otherwise, reorder reph to the end of the syllable.
1105 if (!$skip_to_reph_move) {
1106 $new_reph_pos = $end - 1;
1107 while ($new_reph_pos > $start && $info[$new_reph_pos]['indic_position'] == self
::POS_SMVD
) {
1112 * If the Reph is to be ending up after a Matra,Halant sequence,
1113 * position it before that Halant so it can interact with the Matra.
1114 * However, if it's a plain Consonant,Halant we shouldn't do that.
1115 * Uniscribe doesn't do this.
1116 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1118 //if (!$hb_options.uniscribe_bug_compatible && self::is_halant_or_coeng($info[$new_reph_pos])) {
1119 if (self
::is_halant_or_coeng($info[$new_reph_pos])) {
1120 for ($i = $base +
1; $i < $new_reph_pos; $i++
) {
1121 if ($info[$i]['indic_category'] == self
::OT_M
) {
1131 self
::_move_info_pos($info, $start, $new_reph_pos +
1);
1133 if ($start < $base && $base <= $new_reph_pos) {
1139 /* o Reorder pre-base reordering consonants:
1141 * If a pre-base reordering consonant is found, reorder it according to
1142 * the following rules:
1146 if (count($GSUBdata['pref']) && $base +
1 < $end) { /* Otherwise there can't be any pre-base reordering Ra. */
1147 for ($i = $base +
1; $i < $end; $i++
) {
1148 if ($info[$i]['mask'] & self
::FLAG(self
::PREF
)) {
1149 /* 1. Only reorder a glyph produced by substitution during application
1150 * of the <pref> feature. (Note that a font may shape a Ra consonant with
1151 * the feature generally but block it in certain contexts.)
1153 // ??? Need to TEST if actual substitution has occurred
1154 if ($i +
1 == $end ||
($info[$i +
1]['mask'] & self
::FLAG(self
::PREF
)) == 0) {
1156 * 2. Try to find a target position the same way as for pre-base matra.
1157 * If it is found, reorder pre-base consonant glyph.
1159 * 3. If position is not found, reorder immediately before main
1163 /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
1164 * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1165 * We want to position matra after them.
1167 if ($scriptblock != Ucdn
::SCRIPT_MALAYALAM
&& $scriptblock != Ucdn
::SCRIPT_TAMIL
) {
1168 while ($new_pos > $start &&
1169 !(self
::is_one_of($info[$new_pos - 1], self
::FLAG(self
::OT_M
) | self
::FLAG(self
::OT_H
) | self
::FLAG(self
::OT_COENG
)))) {
1173 /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
1174 * split matra, it should be reordered to *before* the left part of such matra. */
1175 if ($new_pos > $start && $info[$new_pos - 1]['indic_category'] == self
::OT_M
) {
1177 for ($i = $base +
1; $i < $old_pos; $i++
) {
1178 if ($info[$i]['indic_category'] == self
::OT_M
) {
1186 if ($new_pos > $start && self
::is_halant_or_coeng($info[$new_pos - 1])) {
1187 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
1188 if ($new_pos < $end && self
::is_joiner($info[$new_pos])) {
1194 self
::_move_info_pos($info, $old_pos, $new_pos);
1196 if ($new_pos <= $base && $base < $old_pos) {
1207 /* Apply 'init' to the Left Matra if it's a word start. */
1208 if ($info[$start]['indic_position'] == self
::POS_PRE_M
&&
1210 ($info[$start - 1]['general_category'] < Ucdn
::UNICODE_GENERAL_CATEGORY_FORMAT ||
$info[$start - 1]['general_category'] > Ucdn
::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
)
1212 $info[$start]['mask'] |
= self
::FLAG(self
::INIT
);
1217 * Finish off and go home!
1221 public static function _move_info_pos(&$info, $from, $to)
1224 $t[0] = $info[$from];
1226 array_splice($info, $from, 1);
1227 array_splice($info, $to, 0, $t);
1229 array_splice($info, $to, 0, $t);
1230 array_splice($info, $from, 1);
1234 public static $ra_chars = [
1235 0x0930 => 1, /* Devanagari */
1236 0x09B0 => 1, /* Bengali */
1237 0x09F0 => 1, /* Bengali (Assamese) */
1238 0x0A30 => 1, /* Gurmukhi */ /* No Reph */
1239 0x0AB0 => 1, /* Gujarati */
1240 0x0B30 => 1, /* Oriya */
1241 0x0BB0 => 1, /* Tamil */ /* No Reph */
1242 0x0C30 => 1, /* Telugu */ /* Reph formed only with ZWJ */
1243 0x0CB0 => 1, /* Kannada */
1244 0x0D30 => 1, /* Malayalam */ /* No Reph, Logical Repha */
1245 0x0DBB => 1, /* Sinhala */ /* Reph formed only with ZWJ */
1246 0x179A => 1, /* Khmer */ /* No Reph, Visual Repha */
1249 public static function is_ra($u)
1251 if (isset(self
::$ra_chars[$u])) {
1257 public static function is_one_of($info, $flags)
1259 if (isset($info['is_ligature']) && $info['is_ligature']) {
1261 } /* If it ligated, all bets are off. */
1262 return !!(self
::FLAG($info['indic_category']) & $flags);
1265 public static function is_joiner($info)
1267 return self
::is_one_of($info, (self
::FLAG(self
::OT_ZWJ
) | self
::FLAG(self
::OT_ZWNJ
)));
1270 /* Vowels and placeholders treated as if they were consonants. */
1272 public static function is_consonant($info)
1274 return self
::is_one_of($info, (self
::FLAG(self
::OT_C
) | self
::FLAG(self
::OT_CM
) | self
::FLAG(self
::OT_RA
) | self
::FLAG(self
::OT_V
) | self
::FLAG(self
::OT_NBSP
) | self
::FLAG(self
::OT_DOTTEDCIRCLE
)));
1277 public static function is_halant_or_coeng($info)
1279 return self
::is_one_of($info, (self
::FLAG(self
::OT_H
) | self
::FLAG(self
::OT_COENG
)));
1282 // From hb-private.hh
1283 public static function in_range($u, $lo, $hi)
1285 if ((($lo ^
$hi) & $lo) == 0 && (($lo ^
$hi) & $hi) == ($lo ^
$hi) && (($lo ^
$hi) & (($lo ^
$hi) +
1)) == 0) {
1286 return ($u & ~
($lo ^
$hi)) == $lo;
1288 return $lo <= $u && $u <= $hi;
1292 // From hb-private.hh
1293 public static function FLAG($x)
1298 // BELOW from hb-ot-shape-complex-indic.cc
1301 * Indic configurations.
1305 const BASE_POS_FIRST
= 0;
1306 const BASE_POS_LAST
= 1;
1309 const REPH_POS_DEFAULT
= 10; // POS_BEFORE_POST,
1311 const REPH_POS_AFTER_MAIN
= 5; // POS_AFTER_MAIN,
1313 const REPH_POS_BEFORE_SUB
= 7; // POS_BEFORE_SUB,
1314 const REPH_POS_AFTER_SUB
= 9; // POS_AFTER_SUB,
1315 const REPH_POS_BEFORE_POST
= 10; // POS_BEFORE_POST,
1316 const REPH_POS_AFTER_POST
= 12; // POS_AFTER_POST
1319 const REPH_MODE_IMPLICIT
= 0; /* Reph formed out of initial Ra,H sequence. */
1320 const REPH_MODE_EXPLICIT
= 1; /* Reph formed out of initial Ra,H,ZWJ sequence. */
1321 const REPH_MODE_VIS_REPHA
= 2; /* Encoded Repha character, no reordering needed. */
1322 const REPH_MODE_LOG_REPHA
= 3; /* Encoded Repha character, needs reordering. */
1325 struct of indic_configs{
1335 public static $indic_configs = [/* index is SCRIPT_number from UCDN */
1336 9 => [true, 0x094D, 1, 10, 0],
1337 10 => [true, 0x09CD, 1, 9, 0],
1338 11 => [true, 0x0A4D, 1, 7, 0],
1339 12 => [true, 0x0ACD, 1, 10, 0],
1340 13 => [true, 0x0B4D, 1, 5, 0],
1341 14 => [true, 0x0BCD, 1, 12, 0],
1342 15 => [true, 0x0C4D, 1, 12, 1],
1343 16 => [true, 0x0CCD, 1, 12, 0],
1344 17 => [true, 0x0D4D, 1, 5, 3],
1345 18 => [false, 0x0DCA, 0, 5, 1], /* Sinhala */
1346 30 => [false, 0x17D2, 0, 10, 2], /* Khmer */
1347 84 => [false, 0xA9C0, 1, 10, 0], /* Javanese */
1354 // from "hb-ot-shape-complex-indic-table.cc"
1357 const ISC_A = 0; // INDIC_SYLLABIC_CATEGORY_AVAGRAHA Avagraha
1358 const ISC_Bi = 8; // INDIC_SYLLABIC_CATEGORY_BINDU Bindu
1359 const ISC_C = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT Consonant
1360 const ISC_CD = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD Consonant_Dead
1361 const ISC_CF = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL Consonant_Final
1362 const ISC_CHL = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER Consonant_Head_Letter
1363 const ISC_CM = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL Consonant_Medial
1364 const ISC_CP = 11; // INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER Consonant_Placeholder
1365 const ISC_CR = 15; // INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA Consonant_Repha
1366 const ISC_CS = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED Consonant_Subjoined
1367 const ISC_ML = 0; // INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER Modifying_Letter
1368 const ISC_N = 3; // INDIC_SYLLABIC_CATEGORY_NUKTA Nukta
1369 const ISC_x = 0; // INDIC_SYLLABIC_CATEGORY_OTHER Other
1370 const ISC_RS = 13; // INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER Register_Shifter
1371 const ISC_TL = 0; // INDIC_SYLLABIC_CATEGORY_TONE_LETTER Tone_Letter
1372 const ISC_TM = 3; // INDIC_SYLLABIC_CATEGORY_TONE_MARK Tone_Mark
1373 const ISC_V = 4; // INDIC_SYLLABIC_CATEGORY_VIRAMA Virama
1374 const ISC_Vs = 8; // INDIC_SYLLABIC_CATEGORY_VISARGA Visarga
1375 const ISC_Vo = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL Vowel
1376 const ISC_M = 7; // INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT Vowel_Dependent
1377 const ISC_VI = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT Vowel_Independent
1379 const IMC_B = 8; // INDIC_MATRA_CATEGORY_BOTTOM Bottom
1380 const IMC_BR = 11; // INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT Bottom_And_Right
1381 const IMC_I = 15; // INDIC_MATRA_CATEGORY_INVISIBLE Invisible
1382 const IMC_L = 3; // INDIC_MATRA_CATEGORY_LEFT Left
1383 const IMC_LR = 11; // INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT Left_And_Right
1384 const IMC_x = 15; // INDIC_MATRA_CATEGORY_NOT_APPLICABLE Not_Applicable
1385 const IMC_O = 5; // INDIC_MATRA_CATEGORY_OVERSTRUCK Overstruck
1386 const IMC_R = 11; // INDIC_MATRA_CATEGORY_RIGHT Right
1387 const IMC_T = 6; // INDIC_MATRA_CATEGORY_TOP Top
1388 const IMC_TB = 8; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM Top_And_Bottom
1389 const IMC_TBR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT Top_And_Bottom_And_Right
1390 const IMC_TL = 6; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT Top_And_Left
1391 const IMC_TLR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT Top_And_Left_And_Right
1392 const IMC_TR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_RIGHT Top_And_Right
1393 const IMC_VOL = 2; // INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT Visual_Order_Left
1395 If in original table = _(C,x), that = ISC_C,IMC_x
1396 Value is IMC_x << 8 (or IMC_x * 256) = 3840
1397 plus ISC_C = 1, so = 3841
1401 public static $indic_table = [
1402 /* Devanagari (0900..097F) */
1404 /* 0900 */ 3848, 3848, 3848, 3848, 3842, 3842, 3842, 3842,
1405 /* 0908 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1406 /* 0910 */ 3842, 3842, 3842, 3842, 3842, 3841, 3841, 3841,
1407 /* 0918 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1408 /* 0920 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1409 /* 0928 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1410 /* 0930 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1411 /* 0938 */ 3841, 3841, 1543, 2823, 3843, 3840, 2823, 775,
1412 /* 0940 */ 2823, 2055, 2055, 2055, 2055, 1543, 1543, 1543,
1413 /* 0948 */ 1543, 2823, 2823, 2823, 2823, 2052, 775, 2823,
1414 /* 0950 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 2055,
1415 /* 0958 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1416 /* 0960 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1417 /* 0968 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1418 /* 0970 */ 3840, 3840, 3842, 3842, 3842, 3842, 3842, 3842,
1419 /* 0978 */ 3840, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1420 /* Bengali (0980..09FF) */
1422 /* 0980 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1423 /* 0988 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1424 /* 0990 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1425 /* 0998 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1426 /* 09A0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1427 /* 09A8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1428 /* 09B0 */ 3841, 3840, 3841, 3840, 3840, 3840, 3841, 3841,
1429 /* 09B8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1430 /* 09C0 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1431 /* 09C8 */ 775, 3840, 3840, 2823, 2823, 2052, 3841, 3840,
1432 /* 09D0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1433 /* 09D8 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1434 /* 09E0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1435 /* 09E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1436 /* 09F0 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1437 /* 09F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1438 /* Gurmukhi (0A00..0A7F) */
1440 /* 0A00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1441 /* 0A08 */ 3842, 3842, 3842, 3840, 3840, 3840, 3840, 3842,
1442 /* 0A10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1443 /* 0A18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1444 /* 0A20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1445 /* 0A28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1446 /* 0A30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3840,
1447 /* 0A38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1448 /* 0A40 */ 2823, 2055, 2055, 3840, 3840, 3840, 3840, 1543,
1449 /* 0A48 */ 1543, 3840, 3840, 1543, 1543, 2052, 3840, 3840,
1450 /* 0A50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1451 /* 0A58 */ 3840, 3841, 3841, 3841, 3841, 3840, 3841, 3840,
1452 /* 0A60 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1453 /* 0A68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1454 /* 0A70 */ 3848, 3840, 13841, 13841, 3840, 3857, 3840, 3840,
1455 /* 0A78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1456 /* Gujarati (0A80..0AFF) */
1458 /* 0A80 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1459 /* 0A88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3840, 3842,
1460 /* 0A90 */ 3842, 3842, 3840, 3842, 3842, 3841, 3841, 3841,
1461 /* 0A98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1462 /* 0AA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1463 /* 0AA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1464 /* 0AB0 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1465 /* 0AB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1466 /* 0AC0 */ 2823, 2055, 2055, 2055, 2055, 1543, 3840, 1543,
1467 /* 0AC8 */ 1543, 2823, 3840, 2823, 2823, 2052, 3840, 3840,
1468 /* 0AD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1469 /* 0AD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1470 /* 0AE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1471 /* 0AE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1472 /* 0AF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1473 /* 0AF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1474 /* Oriya (0B00..0B7F) */
1476 /* 0B00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1477 /* 0B08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1478 /* 0B10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1479 /* 0B18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1480 /* 0B20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1481 /* 0B28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1482 /* 0B30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1483 /* 0B38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1484 /* 0B40 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1485 /* 0B48 */ 1543, 3840, 3840, 2823, 2823, 2052, 3840, 3840,
1486 /* 0B50 */ 3840, 3840, 3840, 3840, 3840, 3840, 1543, 2823,
1487 /* 0B58 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1488 /* 0B60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1489 /* 0B68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1490 /* 0B70 */ 3840, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1491 /* 0B78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1492 /* Tamil (0B80..0BFF) */
1494 /* 0B80 */ 3840, 3840, 3848, 3840, 3840, 3842, 3842, 3842,
1495 /* 0B88 */ 3842, 3842, 3842, 3840, 3840, 3840, 3842, 3842,
1496 /* 0B90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3840, 3840,
1497 /* 0B98 */ 3840, 3841, 3841, 3840, 3841, 3840, 3841, 3841,
1498 /* 0BA0 */ 3840, 3840, 3840, 3841, 3841, 3840, 3840, 3840,
1499 /* 0BA8 */ 3841, 3841, 3841, 3840, 3840, 3840, 3841, 3841,
1500 /* 0BB0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1501 /* 0BB8 */ 3841, 3841, 3840, 3840, 3840, 3840, 2823, 2823,
1502 /* 0BC0 */ 1543, 2055, 2055, 3840, 3840, 3840, 775, 775,
1503 /* 0BC8 */ 775, 3840, 2823, 2823, 2823, 1540, 3840, 3840,
1504 /* 0BD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1505 /* 0BD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1506 /* 0BE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1507 /* 0BE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1508 /* 0BF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1509 /* 0BF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1510 /* Telugu (0C00..0C7F) */
1512 /* 0C00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1513 /* 0C08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1514 /* 0C10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1515 /* 0C18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1516 /* 0C20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1517 /* 0C28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1518 /* 0C30 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1519 /* 0C38 */ 3841, 3841, 3840, 3840, 3840, 3840, 1543, 1543,
1520 /* 0C40 */ 1543, 2823, 2823, 2823, 2823, 3840, 1543, 1543,
1521 /* 0C48 */ 2055, 3840, 1543, 1543, 1543, 1540, 3840, 3840,
1522 /* 0C50 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 3840,
1523 /* 0C58 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1524 /* 0C60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1525 /* 0C68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1526 /* 0C70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1527 /* 0C78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1528 /* Kannada (0C80..0CFF) */
1530 /* 0C80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1531 /* 0C88 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1532 /* 0C90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1533 /* 0C98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1534 /* 0CA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1535 /* 0CA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1536 /* 0CB0 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1537 /* 0CB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1538 /* 0CC0 */ 2823, 2823, 2823, 2823, 2823, 3840, 1543, 2823,
1539 /* 0CC8 */ 2823, 3840, 2823, 2823, 1543, 1540, 3840, 3840,
1540 /* 0CD0 */ 3840, 3840, 3840, 3840, 3840, 2823, 2823, 3840,
1541 /* 0CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3841, 3840,
1542 /* 0CE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1543 /* 0CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1544 /* 0CF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1545 /* 0CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1546 /* Malayalam (0D00..0D7F) */
1548 /* 0D00 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1549 /* 0D08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1550 /* 0D10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1551 /* 0D18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1552 /* 0D20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1553 /* 0D28 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1554 /* 0D30 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1555 /* 0D38 */ 3841, 3841, 3841, 3840, 3840, 3840, 2823, 2823,
1556 /* 0D40 */ 2823, 2823, 2823, 2055, 2055, 3840, 775, 775,
1557 /* 0D48 */ 775, 3840, 2823, 2823, 2823, 1540, 3855, 3840,
1558 /* 0D50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1559 /* 0D58 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1560 /* 0D60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1561 /* 0D68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1562 /* 0D70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1563 /* 0D78 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1564 /* Sinhala (0D80..0DFF) */
1566 /* 0D80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1567 /* 0D88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1568 /* 0D90 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3840,
1569 /* 0D98 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1570 /* 0DA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1571 /* 0DA8 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1572 /* 0DB0 */ 3841, 3841, 3840, 3841, 3841, 3841, 3841, 3841,
1573 /* 0DB8 */ 3841, 3841, 3841, 3841, 3840, 3841, 3840, 3840,
1574 /* 0DC0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3840,
1575 /* 0DC8 */ 3840, 3840, 1540, 3840, 3840, 3840, 3840, 2823,
1576 /* 0DD0 */ 2823, 2823, 1543, 1543, 2055, 3840, 2055, 3840,
1577 /* 0DD8 */ 2823, 775, 1543, 775, 2823, 2823, 2823, 2823,
1578 /* 0DE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1579 /* 0DE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1580 /* 0DF0 */ 3840, 3840, 2823, 2823, 3840, 3840, 3840, 3840,
1581 /* 0DF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1582 /* Vedic Extensions (1CD0..1CFF) */
1584 /* 1CD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1585 /* 1CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1586 /* 1CE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1587 /* 1CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1588 /* 1CF0 */ 3840, 3840, 3848, 3848, 3840, 3840, 3840, 3840,
1589 /* 1CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1592 public static $khmer_table = [
1593 /* Khmer (1780..17FF) */
1595 /* 1780 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1596 /* 1788 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1597 /* 1790 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1598 /* 1798 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1599 /* 17A0 */ 3841, 3841, 3841, 3842, 3842, 3842, 3842, 3842,
1600 /* 17A8 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1601 /* 17B0 */ 3842, 3842, 3842, 3842, 3840, 3840, 2823, 1543,
1602 /* 17B8 */ 1543, 1543, 1543, 2055, 2055, 2055, 1543, 2823,
1603 /* 17C0 */ 2823, 775, 775, 775, 2823, 2823, 3848, 3848,
1604 /* 17C8 */ 2823, 3853, 3853, 3840, 3855, 3840, 3840, 3840,
1605 /* 17D0 */ 3840, 1540, 3844, 3840, 3840, 3840, 3840, 3840,
1606 /* 17D8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1607 /* 17E0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1608 /* 17E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1609 /* 17F0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1610 /* 17F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1613 // from "hb-ot-shape-complex-indic-table.cc"
1614 public static function indic_get_categories($u)
1616 if (0x0900 <= $u && $u <= 0x0DFF) {
1617 return self
::$indic_table[$u - 0x0900 +
0]; // offset 0 for Most "indic"
1619 if (0x1CD0 <= $u && $u <= 0x1D00) {
1620 return self
::$indic_table[$u - 0x1CD0 +
1152]; // offset for Vedic extensions
1622 if (0x1780 <= $u && $u <= 0x17FF) {
1623 return self
::$khmer_table[$u - 0x1780]; // Khmer
1626 return 3851; // (ISC_CP | (IMC_x << 8))
1629 return 3851; // (ISC_CP | (IMC_x << 8))
1631 return 3840; // (ISC_x | (IMC_x << 8))
1634 // BELOW from hb-ot-shape-complex-indic.cc
1639 public static function IN_HALF_BLOCK($u, $Base)
1641 return (($u & ~
0x7F) == $Base);
1644 public static function IS_DEVA($u)
1646 return self
::IN_HALF_BLOCK($u, 0x0900);
1649 public static function IS_BENG($u)
1651 return self
::IN_HALF_BLOCK($u, 0x0980);
1654 public static function IS_GURU($u)
1656 return self
::IN_HALF_BLOCK($u, 0x0A00);
1659 public static function IS_GUJR($u)
1661 return self
::IN_HALF_BLOCK($u, 0x0A80);
1664 public static function IS_ORYA($u)
1666 return self
::IN_HALF_BLOCK($u, 0x0B00);
1669 public static function IS_TAML($u)
1671 return self
::IN_HALF_BLOCK($u, 0x0B80);
1674 public static function IS_TELU($u)
1676 return self
::IN_HALF_BLOCK($u, 0x0C00);
1679 public static function IS_KNDA($u)
1681 return self
::IN_HALF_BLOCK($u, 0x0C80);
1684 public static function IS_MLYM($u)
1686 return self
::IN_HALF_BLOCK($u, 0x0D00);
1689 public static function IS_SINH($u)
1691 return self
::IN_HALF_BLOCK($u, 0x0D80);
1694 public static function IS_KHMR($u)
1696 return self
::IN_HALF_BLOCK($u, 0x1780);
1699 public static function MATRA_POS_LEFT($u)
1701 return self
::POS_PRE_M
;
1704 public static function MATRA_POS_RIGHT($u)
1707 (self
::IS_DEVA($u) ? self
::POS_AFTER_SUB
:
1708 (self
::IS_BENG($u) ? self
::POS_AFTER_POST
:
1709 (self
::IS_GURU($u) ? self
::POS_AFTER_POST
:
1710 (self
::IS_GUJR($u) ? self
::POS_AFTER_POST
:
1711 (self
::IS_ORYA($u) ? self
::POS_AFTER_POST
:
1712 (self
::IS_TAML($u) ? self
::POS_AFTER_POST
:
1713 (self
::IS_TELU($u) ?
($u <= 0x0C42 ? self
::POS_BEFORE_SUB
: self
::POS_AFTER_SUB
) :
1714 (self
::IS_KNDA($u) ?
($u < 0x0CC3 ||
$u > 0xCD6 ? self
::POS_BEFORE_SUB
: self
::POS_AFTER_SUB
) :
1715 (self
::IS_MLYM($u) ? self
::POS_AFTER_POST
:
1716 (self
::IS_SINH($u) ? self
::POS_AFTER_SUB
:
1717 (self
::IS_KHMR($u) ? self
::POS_AFTER_POST
:
1718 self
::POS_AFTER_SUB
))))))))))); /* default */
1721 public static function MATRA_POS_TOP($u)
1723 return /* BENG and MLYM don't have top matras. */
1724 (self
::IS_DEVA($u) ? self
::POS_AFTER_SUB
:
1725 (self
::IS_GURU($u) ? self
::POS_AFTER_POST
: /* Deviate from spec */
1726 (self
::IS_GUJR($u) ? self
::POS_AFTER_SUB
:
1727 (self
::IS_ORYA($u) ? self
::POS_AFTER_MAIN
:
1728 (self
::IS_TAML($u) ? self
::POS_AFTER_SUB
:
1729 (self
::IS_TELU($u) ? self
::POS_BEFORE_SUB
:
1730 (self
::IS_KNDA($u) ? self
::POS_BEFORE_SUB
:
1731 (self
::IS_SINH($u) ? self
::POS_AFTER_SUB
:
1732 (self
::IS_KHMR($u) ? self
::POS_AFTER_POST
:
1733 self
::POS_AFTER_SUB
))))))))); /* default */
1736 public static function MATRA_POS_BOTTOM($u)
1739 (self
::IS_DEVA($u) ? self
::POS_AFTER_SUB
:
1740 (self
::IS_BENG($u) ? self
::POS_AFTER_SUB
:
1741 (self
::IS_GURU($u) ? self
::POS_AFTER_POST
:
1742 (self
::IS_GUJR($u) ? self
::POS_AFTER_POST
:
1743 (self
::IS_ORYA($u) ? self
::POS_AFTER_SUB
:
1744 (self
::IS_TAML($u) ? self
::POS_AFTER_POST
:
1745 (self
::IS_TELU($u) ? self
::POS_BEFORE_SUB
:
1746 (self
::IS_KNDA($u) ? self
::POS_BEFORE_SUB
:
1747 (self
::IS_MLYM($u) ? self
::POS_AFTER_POST
:
1748 (self
::IS_SINH($u) ? self
::POS_AFTER_SUB
:
1749 (self
::IS_KHMR($u) ? self
::POS_AFTER_POST
:
1750 self
::POS_AFTER_SUB
))))))))))); /* default */
1753 public static function matra_position($u, $side)
1756 case self
::POS_PRE_C
:
1757 return self
::MATRA_POS_LEFT($u);
1758 case self
::POS_POST_C
:
1759 return self
::MATRA_POS_RIGHT($u);
1760 case self
::POS_ABOVE_C
:
1761 return self
::MATRA_POS_TOP($u);
1762 case self
::POS_BELOW_C
:
1763 return self
::MATRA_POS_BOTTOM($u);
1768 // vowel matras that have to be split into two parts.
1769 // From Harfbuzz (old)
1770 // New HarfBuzz uses /src/hb-ucdn/ucdn.c and unicodedata_db.h for full method of decomposition for all characters
1771 // Should always fully decompose and then recompose back, but we will just do the split matras
1772 public static function decompose_indic($ab)
1777 * Decompose split matras.
1855 // NB Some fonts break with these Sinhala decomps (although this is Uniscribe spec)
1856 // Can check if character would be substituted by pstf and only decompose if true
1857 // e.g. if (isset($GSUBdata['pstf'][$ab])) - would need to pass $GSUBdata as parameter to this function
1897 /* tibetan - included here although does not use Inidc shaper in other ways */
1931 public static function bubble_sort(&$arr, $start, $len)
1936 $k = $start +
$len - 2;
1937 while ($k >= $start) {
1938 for ($j = $start; $j <= $k; $j++
) {
1939 if ($arr[$j]['indic_position'] > $arr[$j +
1]['indic_position']) {
1941 $arr[$j] = $arr[$j +
1];