composer package updates
[openemr.git] / vendor / mpdf / mpdf / src / Shaper / Indic.php
blob19cedda8fa8a5618620884088448b4bf755b94ef
1 <?php
3 namespace Mpdf\Shaper;
5 use Mpdf\Ucdn;
7 class Indic
9 /* FROM hb-ot-shape-complex-indic-private.hh */
11 // indic_category
12 const OT_X = 0;
13 const OT_C = 1;
14 const OT_V = 2;
15 const OT_N = 3;
16 const OT_H = 4;
17 const OT_ZWNJ = 5;
18 const OT_ZWJ = 6;
19 const OT_M = 7; /* Matra or Dependent Vowel */
20 const OT_SM = 8;
21 const OT_VD = 9;
22 const OT_A = 10;
23 const OT_NBSP = 11;
24 const OT_DOTTEDCIRCLE = 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */
25 const OT_RS = 13; /* Register Shifter, used in Khmer OT spec */
26 const OT_COENG = 14;
27 const OT_REPHA = 15;
29 const OT_RA = 16; /* Not explicitly listed in the OT spec, but used in the grammar. */
30 const OT_CM = 17;
32 /* Visual positions in a syllable from left to right. */
33 /* FROM hb-ot-shape-complex-indic-private.hh */
35 // indic_position
36 const POS_START = 0;
38 const POS_RA_TO_BECOME_REPH = 1;
39 const POS_PRE_M = 2;
40 const POS_PRE_C = 3;
42 const POS_BASE_C = 4;
43 const POS_AFTER_MAIN = 5;
45 const POS_ABOVE_C = 6;
47 const POS_BEFORE_SUB = 7;
48 const POS_BELOW_C = 8;
49 const POS_AFTER_SUB = 9;
51 const POS_BEFORE_POST = 10;
52 const POS_POST_C = 11;
53 const POS_AFTER_POST = 12;
55 const POS_FINAL_C = 13;
56 const POS_SMVD = 14;
58 const POS_END = 15;
61 * Basic features.
62 * These features are applied in order, one at a time, after initial_reordering.
66 * Must be in the same order as the indic_features array. Ones starting with _ are F_GLOBAL
67 * Ones without the _ are only applied where the mask says!
70 const _NUKT = 0;
71 const _AKHN = 1;
72 const RPHF = 2;
73 const _RKRF = 3;
74 const PREF = 4;
75 const BLWF = 5;
76 const HALF = 6;
77 const ABVF = 7;
78 const PSTF = 8;
79 const CFAR = 9; // Khmer only
80 const _VATU = 10;
81 const _CJCT = 11;
82 const INIT = 12;
84 // Based on indic_category used to make string to find syllables
85 // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-indic-private.hh
86 public static $indic_category_char = [
87 'x',
88 'C',
89 'V',
90 'N',
91 'H',
92 'Z',
93 'J',
94 'M',
95 'S',
96 'v',
97 'A', /* Spec gives Andutta U+0952 as OT_A. However, testing shows that Uniscribe
98 * treats U+0951..U+0952 all as OT_VD - see set_indic_properties */
99 's',
100 'D',
101 'F', /* Register shift Khmer only */
102 'G', /* Khmer only */
103 'r', /* 0D4E (dot reph) only one in Malayalam */
104 'R',
105 'm', /* Consonant medial only used in Indic 0A75 in Gurmukhi (0A00..0A7F) : also in Lao, Myanmar, Tai Tham, Javanese & Cham */
108 public static function set_indic_properties(&$info, $scriptblock)
110 $u = $info['uni'];
111 $type = self::indic_get_categories($u);
112 $cat = ($type & 0x7F);
113 $pos = ($type >> 8);
116 * Re-assign category
119 if ($u == 0x17D1) {
120 $cat = self::OT_X;
123 if ($cat == self::OT_X && self::in_range($u, 0x17CB, 0x17D3)) { /* Khmer Various signs */
124 /* These are like Top Matras. */
125 $cat = self::OT_M;
126 $pos = self::POS_ABOVE_C;
129 if ($u == 0x17C6) {
130 $cat = self::OT_N;
131 } /* Khmer Bindu doesn't like to be repositioned. */
133 if ($u == 0x17D2) {
134 $cat = self::OT_COENG;
135 } /* Khmer coeng */
137 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
138 * treats U+0951..U+0952 all as OT_VD.
139 * TESTS:
140 * U+092E,U+0947,U+0952
141 * U+092E,U+0952,U+0947
142 * U+092E,U+0947,U+0951
143 * U+092E,U+0951,U+0947
144 * */
145 //if ($u == 0x0952) $cat = self::OT_A;
146 if (self::in_range($u, 0x0951, 0x0954)) {
147 $cat = self::OT_VD;
150 if ($u == 0x200C) {
151 $cat = self::OT_ZWNJ;
152 } else if ($u == 0x200D) {
153 $cat = self::OT_ZWJ;
154 } else if ($u == 0x25CC) {
155 $cat = self::OT_DOTTEDCIRCLE;
156 } else if ($u == 0x0A71) {
157 $cat = self::OT_SM;
158 } /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */
160 if ($cat == self::OT_REPHA) {
161 /* There are two kinds of characters marked as Repha:
162 * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
163 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
165 * We recategorize the first kind to look like a Nukta and attached to the base directly.
167 if ($info['general_category'] == Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
168 $cat = self::OT_N;
173 * Re-assign position.
176 if ((self::FLAG($cat) & (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)))) { // = CONSONANT_FLAGS like is_consonant
177 if ($scriptblock == Ucdn::SCRIPT_KHMER) {
178 $pos = self::POS_BELOW_C;
179 } /* Khmer differs from Indic here. */
180 else {
181 $pos = self::POS_BASE_C;
182 } /* Will recategorize later based on font lookups. */
184 if (self::is_ra($u)) {
185 $cat = self::OT_RA;
187 } else if ($cat == self::OT_M) {
188 $pos = self::matra_position($u, $pos);
189 } else if ($cat == self::OT_SM || $cat == self::OT_VD) {
190 $pos = self::POS_SMVD;
193 if ($u == 0x0B01) {
194 $pos = self::POS_BEFORE_SUB;
195 } /* Oriya Bindu is BeforeSub in the spec. */
197 $info['indic_category'] = $cat;
198 $info['indic_position'] = $pos;
201 // syllable_type
202 const CONSONANT_SYLLABLE = 0;
203 const VOWEL_SYLLABLE = 1;
204 const STANDALONE_CLUSTER = 2;
205 const BROKEN_CLUSTER = 3;
206 const NON_INDIC_CLUSTER = 4;
208 public static function set_syllables(&$o, $s, &$broken_syllables)
210 $ptr = 0;
211 $syllable_serial = 1;
212 $broken_syllables = false;
214 while ($ptr < strlen($s)) {
215 $match = '';
216 $syllable_length = 1;
217 $syllable_type = self::NON_INDIC_CLUSTER;
218 // CONSONANT_SYLLABLE Consonant syllable
219 // From OT spec:
220 if (preg_match('/^([CR]m*[N]?(H[ZJ]?|[ZJ]H))*[CR]m*[N]?[A]?(H[ZJ]?|[M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
221 // From HarfBuzz:
222 //if (preg_match('/^r?([CR]J?(Z?[N]{0,2})?[ZJ]?H(J[N]?)?){0,4}[CR]J?(Z?[N]{0,2})?A?((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
223 $syllable_length = strlen($ma[0]);
224 $syllable_type = self::CONSONANT_SYLLABLE;
225 } // VOWEL_SYLLABLE Vowel-based syllable
226 // From OT spec:
227 else if (preg_match('/^(RH|r)?V[N]?([ZJ]?H[CR]m*|J[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
228 // From HarfBuzz:
229 //else if (preg_match('/^(RH|r)?V(Z?[N]{0,2})?(J|([ZJ]?H(J[N]?)?[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2})/', substr($s,$ptr), $ma)) {
230 $syllable_length = strlen($ma[0]);
231 $syllable_type = self::VOWEL_SYLLABLE;
232 } /* Apply only if it's a word start. */
233 // STANDALONE_CLUSTER Stand Alone syllable at start of word
234 // From OT spec:
235 else if (($ptr == 0 ||
236 $o[$ptr - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER ||
237 $o[$ptr - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
238 ) && (preg_match('/^(RH|r)?[sD][N]?([ZJ]?H[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma))) {
239 // From HarfBuzz:
240 // && (preg_match('/^(RH|r)?[sD](Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
241 $syllable_length = strlen($ma[0]);
242 $syllable_type = self::STANDALONE_CLUSTER;
243 } // BROKEN_CLUSTER syllable
244 else if (preg_match('/^(RH|r)?[N]?([ZJ]?H[CR])?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) {
245 // From HarfBuzz:
246 //else if (preg_match('/^(RH|r)?(Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
247 if (strlen($ma[0])) { // May match blank
248 $syllable_length = strlen($ma[0]);
249 $syllable_type = self::BROKEN_CLUSTER;
250 $broken_syllables = true;
254 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
255 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
257 $ptr += $syllable_length;
258 $syllable_serial++;
259 if ($syllable_serial == 16) {
260 $syllable_serial = 1;
265 public static function set_syllables_sinhala(&$o, $s, &$broken_syllables)
267 $ptr = 0;
268 $syllable_serial = 1;
269 $broken_syllables = false;
271 while ($ptr < strlen($s)) {
272 $match = '';
273 $syllable_length = 1;
274 $syllable_type = self::NON_INDIC_CLUSTER;
275 // CONSONANT_SYLLABLE Consonant syllable
276 // From OT spec:
277 if (preg_match('/^([CR]HJ|[CR]JH){0,8}[CR][HM]{0,3}[S]{0,1}/', substr($s, $ptr), $ma)) {
278 $syllable_length = strlen($ma[0]);
279 $syllable_type = self::CONSONANT_SYLLABLE;
280 } // VOWEL_SYLLABLE Vowel-based syllable
281 // From OT spec:
282 else if (preg_match('/^V[S]{0,1}/', substr($s, $ptr), $ma)) {
283 $syllable_length = strlen($ma[0]);
284 $syllable_type = self::VOWEL_SYLLABLE;
287 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
288 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
290 $ptr += $syllable_length;
291 $syllable_serial++;
292 if ($syllable_serial == 16) {
293 $syllable_serial = 1;
298 public static function set_syllables_khmer(&$o, $s, &$broken_syllables)
300 $ptr = 0;
301 $syllable_serial = 1;
302 $broken_syllables = false;
304 while ($ptr < strlen($s)) {
305 $match = '';
306 $syllable_length = 1;
307 $syllable_type = self::NON_INDIC_CLUSTER;
308 // CONSONANT_SYLLABLE Consonant syllable
309 if (preg_match('/^r?([CR]J?((Z?F)?[N]{0,2})?[ZJ]?G(JN?)?){0,4}[CR]J?((Z?F)?[N]{0,2})?A?((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
310 $syllable_length = strlen($ma[0]);
311 $syllable_type = self::CONSONANT_SYLLABLE;
312 } // VOWEL_SYLLABLE Vowel-based syllable
313 else if (preg_match('/^(RH|r)?V((Z?F)?[N]{0,2})?(J|([ZJ]?G(JN?)?[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2})/', substr($s, $ptr), $ma)) {
314 $syllable_length = strlen($ma[0]);
315 $syllable_type = self::VOWEL_SYLLABLE;
316 } // BROKEN_CLUSTER syllable
317 else if (preg_match('/^(RH|r)?((Z?F)?[N]{0,2})?(([ZJ]?G(JN?)?)[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) {
318 if (strlen($ma[0])) { // May match blank
319 $syllable_length = strlen($ma[0]);
320 $syllable_type = self::BROKEN_CLUSTER;
321 $broken_syllables = true;
325 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
326 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
328 $ptr += $syllable_length;
329 $syllable_serial++;
330 if ($syllable_serial == 16) {
331 $syllable_serial = 1;
336 public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle)
339 self::update_consonant_positions($info, $GSUBdata);
341 if ($broken_syllables && $dottedcircle) {
342 self::insert_dotted_circles($info, $dottedcircle);
345 $count = count($info);
346 if (!$count) {
347 return;
349 $last = 0;
350 $last_syllable = $info[0]['syllable'];
351 for ($i = 1; $i < $count; $i++) {
352 if ($last_syllable != $info[$i]['syllable']) {
353 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
354 $last = $i;
355 $last_syllable = $info[$last]['syllable'];
358 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
361 public static function update_consonant_positions(&$info, $GSUBdata)
363 $count = count($info);
364 for ($i = 0; $i < $count; $i++) {
365 if ($info[$i]['indic_position'] == self::POS_BASE_C) {
366 $c = $info[$i]['uni'];
367 // If would substitute...
368 if (isset($GSUBdata['pref'][$c])) {
369 $info[$i]['indic_position'] = self::POS_POST_C;
370 } else if (isset($GSUBdata['blwf'][$c])) {
371 $info[$i]['indic_position'] = self::POS_BELOW_C;
372 } else if (isset($GSUBdata['pstf'][$c])) {
373 $info[$i]['indic_position'] = self::POS_POST_C;
379 public static function insert_dotted_circles(&$info, $dottedcircle)
381 $idx = 0;
382 $last_syllable = 0;
383 while ($idx < count($info)) {
384 $syllable = $info[$idx]['syllable'];
385 $syllable_type = ($syllable & 0x0F);
386 if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
387 $last_syllable = $syllable;
389 $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
391 /* Insert dottedcircle after possible Repha. */
392 while ($idx < count($info) && $last_syllable == $info[$idx]['syllable'] && $info[$idx]['indic_category'] == self::OT_REPHA) {
393 $idx++;
395 array_splice($info, $idx, 0, $dottedcircle);
396 } else {
397 $idx++;
401 // I am not sue how this code below got in here, since $idx should now be > count($info) and thus invalid.
402 // In case I am missing something(!) I'll leave a warning here for now:
403 if (isset($info[$idx])) {
404 throw new \Mpdf\MpdfException('Unexpected error occured in Indic processing');
406 // In case of final bloken cluster...
407 //$syllable = $info[$idx]['syllable'];
408 //$syllable_type = ($syllable & 0x0F);
409 //if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
410 // $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
411 // array_splice($info, $idx, 0, $dottedcircle);
415 /* Rules from:
416 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
418 public static function initial_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
420 /* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */
421 /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
422 /* standalone_cluster: We treat NBSP/dotted-circle as if they are consonants, so we should just chain. */
424 $syllable_type = ($info[$start]['syllable'] & 0x0F);
425 if ($syllable_type == self::NON_INDIC_CLUSTER) {
426 return;
428 if ($syllable_type == self::BROKEN_CLUSTER || $syllable_type == self::STANDALONE_CLUSTER) {
429 //if ($uniscribe_bug_compatible) {
430 /* For dotted-circle, this is what Uniscribe does:
431 * If dotted-circle is the last glyph, it just does nothing.
432 * i.e. It doesn't form Reph. */
433 if ($info[$end - 1]['indic_category'] == self::OT_DOTTEDCIRCLE) {
434 return;
438 /* 1. Find base consonant:
440 * The shaping engine finds the base consonant of the syllable, using the
441 * following algorithm: starting from the end of the syllable, move backwards
442 * until a consonant is found that does not have a below-base or post-base
443 * form (post-base forms have to follow below-base forms), or that is not a
444 * pre-base reordering Ra, or arrive at the first consonant. The consonant
445 * stopped at will be the base.
447 * o If the syllable starts with Ra + Halant (in a script that has Reph)
448 * and has more than one consonant, Ra is excluded from candidates for
449 * base consonants.
452 $base = $end;
453 $has_reph = false;
454 $limit = $start;
456 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
457 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
458 * and has more than one consonant, Ra is excluded from candidates for
459 * base consonants. */
460 if (count($GSUBdata['rphf']) /* ?? $indic_plan->mask_array[RPHF] */ && $start + 3 <= $end &&
462 ($indic_config[4] == self::REPH_MODE_IMPLICIT && !self::is_joiner($info[$start + 2])) ||
463 ($indic_config[4] == self::REPH_MODE_EXPLICIT && $info[$start + 2]['indic_category'] == self::OT_ZWJ)
464 )) {
465 /* See if it matches the 'rphf' feature. */
466 //$glyphs = array($info[$start]['uni'], $info[$start + 1]['uni']);
467 //if ($indic_plan->rphf->would_substitute ($glyphs, count($glyphs), true, face)) {
468 if (isset($GSUBdata['rphf'][$info[$start]['uni']]) && self::is_halant_or_coeng($info[$start + 1])) {
469 $limit += 2;
470 while ($limit < $end && self::is_joiner($info[$limit])) {
471 $limit++;
473 $base = $start;
474 $has_reph = true;
476 } else if ($indic_config[4] == self::REPH_MODE_LOG_REPHA && $info[$start]['indic_category'] == self::OT_REPHA) {
477 $limit += 1;
478 while ($limit < $end && self::is_joiner($info[$limit])) {
479 $limit++;
481 $base = $start;
482 $has_reph = true;
486 switch ($indic_config[2]) { // base_pos
487 case self::BASE_POS_LAST:
488 /* -> starting from the end of the syllable, move backwards */
489 $i = $end;
490 $seen_below = false;
491 do {
492 $i--;
493 /* -> until a consonant is found */
494 if (self::is_consonant($info[$i])) {
495 /* -> that does not have a below-base or post-base form
496 * (post-base forms have to follow below-base forms), */
497 if ($info[$i]['indic_position'] != self::POS_BELOW_C && ($info[$i]['indic_position'] != self::POS_POST_C || $seen_below)) {
498 $base = $i;
499 break;
501 if ($info[$i]['indic_position'] == self::POS_BELOW_C) {
502 $seen_below = true;
505 /* -> or that is not a pre-base reordering Ra,
507 * IMPLEMENTATION NOTES:
509 * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped
510 * by the logic above already.
513 /* -> or arrive at the first consonant. The consonant stopped at will
514 * be the base. */
515 $base = $i;
516 } else {
517 /* A ZWJ after a Halant stops the base search, and requests an explicit
518 * half form.
519 * [A ZWJ before a Halant, requests a subjoined form instead, and hence
520 * search continues. This is particularly important for Bengali
521 * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya] */
522 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWJ && $info[$i - 1]['indic_category'] == self::OT_H) {
523 if (!defined("OMIT_INDIC_FIX_1") || OMIT_INDIC_FIX_1 != 1) {
524 $base = $i;
525 } // INDIC_FIX_1
526 break;
528 // ZKI8
529 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWNJ) {
530 break;
533 } while ($i > $limit);
534 break;
536 case self::BASE_POS_FIRST:
537 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
539 if (!$has_reph) {
540 $base = $limit;
543 /* Find the last base consonant that is not blocked by ZWJ. If there is
544 * a ZWJ right before a base consonant, that would request a subjoined form. */
545 for ($i = $limit; $i < $end; $i++) {
546 if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) {
547 if ($limit < $i && $info[$i - 1]['indic_category'] == self::OT_ZWJ) {
548 break;
549 } else {
550 $base = $i;
555 /* Mark all subsequent consonants as below. */
556 for ($i = $base + 1; $i < $end; $i++) {
557 if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) {
558 $info[$i]['indic_position'] = self::POS_BELOW_C;
561 break;
562 //default:
563 //assert (false);
564 /* fallthrough */
567 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
568 * and has more than one consonant, Ra is excluded from candidates for
569 * base consonants.
571 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
572 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
573 if ($has_reph && $base == $start && $limit - $base <= 2) {
574 /* Have no other consonant, so Reph is not formed and Ra becomes base. */
575 $has_reph = false;
579 /* 2. Decompose and reorder Matras:
581 * Each matra and any syllable modifier sign in the cluster are moved to the
582 * appropriate position relative to the consonant(s) in the cluster. The
583 * shaping engine decomposes two- or three-part matras into their constituent
584 * parts before any repositioning. Matra characters are classified by which
585 * consonant in a conjunct they have affinity for and are reordered to the
586 * following positions:
588 * o Before first half form in the syllable
589 * o After subjoined consonants
590 * o After post-form consonant
591 * o After main consonant (for above marks)
593 * IMPLEMENTATION NOTES:
595 * The normalize() routine has already decomposed matras for us, so we don't
596 * need to worry about that.
600 /* 3. Reorder marks to canonical order:
602 * Adjacent nukta and halant or nukta and vedic sign are always repositioned
603 * if necessary, so that the nukta is first.
605 * IMPLEMENTATION NOTES:
607 * Use the combining Class from Unicode categories? to bubble_sort.
610 /* Reorder characters */
612 for ($i = $start; $i < $base; $i++) {
613 $info[$i]['indic_position'] = min(self::POS_PRE_C, $info[$i]['indic_position']);
616 if ($base < $end) {
617 $info[$base]['indic_position'] = self::POS_BASE_C;
620 /* Mark final consonants. A final consonant is one appearing after a matra,
621 * ? only in Khmer. */
622 for ($i = $base + 1; $i < $end; $i++) {
623 if ($info[$i]['indic_category'] == self::OT_M) {
624 for ($j = $i + 1; $j < $end; $j++) {
625 if (self::is_consonant($info[$j])) {
626 $info[$j]['indic_position'] = self::POS_FINAL_C;
627 break;
630 break;
634 /* Handle beginning Ra */
635 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
636 if ($has_reph) {
637 $info[$start]['indic_position'] = self::POS_RA_TO_BECOME_REPH;
642 /* For old-style Indic script tags, move the first post-base Halant after
643 * last consonant. Only do this if there is *not* a Halant after last
644 * consonant. Otherwise it becomes messy. */
645 if ($is_old_spec) {
646 for ($i = $base + 1; $i < $end; $i++) {
647 if ($info[$i]['indic_category'] == self::OT_H) {
648 for ($j = $end - 1; $j > $i; $j--) {
649 if (self::is_consonant($info[$j]) || $info[$j]['indic_category'] == self::OT_H) {
650 break;
653 if ($info[$j]['indic_category'] != self::OT_H && $j > $i) {
654 /* Move Halant to after last consonant. */
655 self::_move_info_pos($info, $i, $j + 1);
657 break;
662 /* Attach misc marks to previous char to move with them. */
663 $last_pos = self::POS_START;
664 for ($i = $start; $i < $end; $i++) {
665 if ((self::FLAG($info[$i]['indic_category']) & (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ) | self::FLAG(self::OT_N) | self::FLAG(self::OT_RS) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG) ))) {
666 $info[$i]['indic_position'] = $last_pos;
667 if ($info[$i]['indic_category'] == self::OT_H && $info[$i]['indic_position'] == self::POS_PRE_M) {
669 * Uniscribe doesn't move the Halant with Left Matra.
670 * TEST: U+092B,U+093F,U+094DE
671 * We follow. This is important for the Sinhala
672 * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
673 * where U+0DD9 is a left matra and U+0DCA is the virama.
674 * We don't want to move the virama with the left matra.
675 * TEST: U+0D9A,U+0DDA
677 for ($j = $i; $j > $start; $j--) {
678 if ($info[$j - 1]['indic_position'] != self::POS_PRE_M) {
679 $info[$i]['indic_position'] = $info[$j - 1]['indic_position'];
680 break;
684 } else if ($info[$i]['indic_position'] != self::POS_SMVD) {
685 $last_pos = $info[$i]['indic_position'];
689 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
690 $last_halant = $end;
691 for ($i = $base + 1; $i < $end; $i++) {
692 if (self::is_halant_or_coeng($info[$i])) {
693 $last_halant = $i;
694 } else if (self::is_consonant($info[$i])) {
695 for ($j = $last_halant; $j < $i; $j++) {
696 if ($info[$j]['indic_position'] != self::POS_SMVD) {
697 $info[$j]['indic_position'] = $info[$i]['indic_position'];
704 if ($scriptblock == Ucdn::SCRIPT_KHMER) {
705 /* KHMER_FIX_2 */
706 /* Move Coeng+RO (Halant,Ra) sequence before base consonant. */
707 for ($i = $base + 1; $i < $end; $i++) {
708 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) {
709 $info[$i]['indic_position'] = self::POS_PRE_C;
710 $info[$i + 1]['indic_position'] = self::POS_PRE_C;
711 break;
718 if (!defined("OMIT_INDIC_FIX_2") || OMIT_INDIC_FIX_2 != 1) {
719 // INDIC_FIX_2
720 $ZWNJ_found = false;
721 $POST_ZWNJ_c_found = false;
722 for ($i = $base + 1; $i < $end; $i++) {
723 if ($info[$i]['indic_category'] == self::OT_ZWNJ) { $ZWNJ_found = true; }
724 else if ($ZWNJ_found && $info[$i]['indic_category'] == self::OT_C) { $POST_ZWNJ_c_found = true; }
725 else if ($POST_ZWNJ_c_found && $info[$i]['indic_position'] == self::POS_BEFORE_SUB) { $info[$i]['indic_position'] = self::POS_AFTER_SUB; }
730 /* Setup masks now */
731 for ($i = $start; $i < $end; $i++) {
732 $info[$i]['mask'] = 0;
736 if ($scriptblock == Ucdn::SCRIPT_KHMER) {
737 /* Find a Coeng+RO (Halant,Ra) sequence and mark it for pre-base processing. */
738 $mask = self::FLAG(self::PREF);
739 for ($i = $base; $i < $end - 1; $i++) { /* KHMER_FIX_1 From $start (not base) */
740 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) {
741 $info[$i]['mask'] |= self::FLAG(self::PREF);
742 $info[$i + 1]['mask'] |= self::FLAG(self::PREF);
744 /* Mark the subsequent stuff with 'cfar'. Used in Khmer.
745 * Read the feature spec.
746 * This allows distinguishing the following cases with MS Khmer fonts:
747 * U+1784,U+17D2,U+179A,U+17D2,U+1782 [C+Coeng+RO+Coeng+C] => Should activate CFAR
748 * U+1784,U+17D2,U+1782,U+17D2,U+179A [C+Coeng+C+Coeng+RO] => Should NOT activate CFAR
750 for ($j = ($i + 2); $j < $end; $j++) {
751 $info[$j]['mask'] |= self::FLAG(self::CFAR);
754 break;
761 /* Sit tight, rock 'n roll! */
762 self::bubble_sort($info, $start, $end - $start);
764 /* Find base again */
765 $base = $end;
766 for ($i = $start; $i < $end; $i++) {
767 if ($info[$i]['indic_position'] == self::POS_BASE_C) {
768 $base = $i;
769 break;
773 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
774 /* Reph */
775 for ($i = $start; $i < $end; $i++) {
776 if ($info[$i]['indic_position'] == self::POS_RA_TO_BECOME_REPH) {
777 $info[$i]['mask'] |= self::FLAG(self::RPHF);
781 /* Pre-base */
782 $mask = self::FLAG(self::HALF);
783 for ($i = $start; $i < $base; $i++) {
784 $info[$i]['mask'] |= $mask;
788 /* Post-base */
789 $mask = (self::FLAG(self::BLWF) | self::FLAG(self::ABVF) | self::FLAG(self::PSTF));
790 for ($i = $base + 1; $i < $end; $i++) {
791 $info[$i]['mask'] |= $mask;
795 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
796 if (!defined("OMIT_INDIC_FIX_3") || OMIT_INDIC_FIX_3 != 1) {
797 /* INDIC_FIX_3 */
798 /* Find a (pre-base) Consonant, Halant,Ra sequence and mark Halant|Ra for below-base BLWF processing. */
799 // TEST CASE &#x995;&#x9cd;&#x9b0;&#x9cd;&#x995; in FreeSans versus Vrinda
800 if (($base - $start) >= 3) {
801 for ($i = $start; $i < ($base - 2); $i++) {
802 if (self::is_consonant($info[$i])) {
803 if (self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i + 2]['uni'])) {
804 // If would substitute Halant+Ra...BLWF
805 if (isset($GSUBdata['blwf'][$info[$i + 2]['uni']])) {
806 $info[$i + 1]['mask'] |= self::FLAG(self::BLWF);
807 $info[$i + 2]['mask'] |= self::FLAG(self::BLWF);
808 } /* If would not substitute as blwf, mark Ra+Halant for RPHF using following Halant (if present) */ else if (self::is_halant_or_coeng($info[$i + 3])) {
809 $info[$i + 2]['mask'] |= self::FLAG(self::RPHF);
810 $info[$i + 3]['mask'] |= self::FLAG(self::RPHF);
812 break;
822 if ($is_old_spec && $scriptblock == Ucdn::SCRIPT_DEVANAGARI) {
823 /* Old-spec eye-lash Ra needs special handling. From the spec:
824 * "The feature 'below-base form' is applied to consonants
825 * having below-base forms and following the base consonant.
826 * The exception is vattu, which may appear below half forms
827 * as well as below the base glyph. The feature 'below-base
828 * form' will be applied to all such occurrences of Ra as well."
830 * Test case: U+0924,U+094D,U+0930,U+094d,U+0915
831 * with Sanskrit 2003 font.
833 * However, note that Ra,Halant,ZWJ is the correct way to
834 * request eyelash form of Ra, so we wouldbn't inhibit it
835 * in that sequence.
837 * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
839 for ($i = $start; ($i + 1) < $base; $i++) {
840 if ($info[$i]['indic_category'] == self::OT_RA && $info[$i + 1]['indic_category'] == self::OT_H &&
841 ($i + 2 == $base || $info[$i + 2]['indic_category'] != self::OT_ZWJ)) {
842 $info[$i]['mask'] |= self::FLAG(self::BLWF);
843 $info[$i + 1]['mask'] |= self::FLAG(self::BLWF);
848 if ($scriptblock != Ucdn::SCRIPT_KHMER) {
849 if (count($GSUBdata['pref']) && $base + 2 < $end) {
850 /* Find a Halant,Ra sequence and mark it for pre-base processing. */
851 for ($i = $base + 1; $i + 1 < $end; $i++) {
852 // If old_spec find Ra-Halant...
853 if ((isset($GSUBdata['pref'][$info[$i + 1]['uni']]) && self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni']) ) ||
854 ($is_old_spec && isset($GSUBdata['pref'][$info[$i]['uni']]) && self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i]['uni']) )
856 $info[$i++]['mask'] |= self::FLAG(self::PREF);
857 $info[$i++]['mask'] |= self::FLAG(self::PREF);
858 break;
865 /* Apply ZWJ/ZWNJ effects */
866 for ($i = $start + 1; $i < $end; $i++) {
867 if (self::is_joiner($info[$i])) {
868 $non_joiner = ($info[$i]['indic_category'] == self::OT_ZWNJ);
869 $j = $i;
870 while ($j > $start) {
871 if (defined("OMIT_INDIC_FIX_4") && OMIT_INDIC_FIX_4 == 1) {
872 // INDIC_FIX_4 = do nothing - carry on //
873 // ZWNJ should block H C from forming blwf post-base - need to unmask backwards beyond first consonant arrived at //
874 if (!self::is_consonant($info[$j])) {
875 break;
878 $j--;
880 /* ZWJ/ZWNJ should disable CJCT. They do that by simply
881 * being there, since we don't skip them for the CJCT
882 * feature (ie. F_MANUAL_ZWJ) */
884 /* A ZWNJ disables HALF. */
885 if ($non_joiner) {
886 $info[$j]['mask'] &= ~(self::FLAG(self::HALF) | self::FLAG(self::BLWF));
893 public static function final_reordering(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec)
895 $count = count($info);
896 if (!$count) {
897 return;
899 $last = 0;
900 $last_syllable = $info[0]['syllable'];
901 for ($i = 1; $i < $count; $i++) {
902 if ($last_syllable != $info[$i]['syllable']) {
903 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
904 $last = $i;
905 $last_syllable = $info[$last]['syllable'];
908 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
911 public static function final_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end)
914 /* 4. Final reordering:
916 * After the localized forms and basic shaping forms GSUB features have been
917 * applied (see below), the shaping engine performs some final glyph
918 * reordering before applying all the remaining font features to the entire
919 * cluster.
922 /* Find base again */
923 for ($base = $start; $base < $end; $base++) {
924 if ($info[$base]['indic_position'] >= self::POS_BASE_C) {
925 if ($start < $base && $info[$base]['indic_position'] > self::POS_BASE_C) {
926 $base--;
928 break;
931 if ($base == $end && $start < $base && $info[$base - 1]['indic_category'] != self::OT_ZWJ) {
932 $base--;
934 while ($start < $base && isset($info[$base]) && ($info[$base]['indic_category'] == self::OT_H || $info[$base]['indic_category'] == self::OT_N)) {
935 $base--;
939 /* o Reorder matras:
941 * If a pre-base matra character had been reordered before applying basic
942 * features, the glyph can be moved closer to the main consonant based on
943 * whether half-forms had been formed. Actual position for the matra is
944 * defined as "after last standalone halant glyph, after initial matra
945 * position and before the main consonant". If ZWJ or ZWNJ follow this
946 * halant, position is moved after it.
950 if ($start + 1 < $end && $start < $base) { /* Otherwise there can't be any pre-base matra characters. */
951 /* If we lost track of base, alas, position before last thingy. */
952 $new_pos = ($base == $end) ? $base - 2 : $base - 1;
954 /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
955 * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
956 * We want to position matra after them.
958 if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) {
959 while ($new_pos > $start && !(self::is_one_of($info[$new_pos], (self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG))))) {
960 $new_pos--;
963 /* If we found no Halant we are done.
964 * Otherwise only proceed if the Halant does
965 * not belong to the Matra itself! */
966 if (self::is_halant_or_coeng($info[$new_pos]) && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
967 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
968 if ($new_pos + 1 < $end && self::is_joiner($info[$new_pos + 1])) {
969 $new_pos++;
971 } else {
972 $new_pos = $start;
973 } /* No move. */
976 if ($start < $new_pos && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
977 /* Now go see if there's actually any matras... */
978 for ($i = $new_pos; $i > $start; $i--) {
979 if ($info[$i - 1]['indic_position'] == self::POS_PRE_M) {
980 $old_pos = $i - 1;
981 //memmove (&info[$old_pos], &info[$old_pos + 1], ($new_pos - $old_pos) * sizeof ($info[0]));
982 self::_move_info_pos($info, $old_pos, $new_pos + 1);
984 if ($old_pos < $base && $base <= $new_pos) { /* Shouldn't actually happen. */
985 $base--;
987 $new_pos--;
994 /* o Reorder reph:
996 * Reph's original position is always at the beginning of the syllable,
997 * (i.e. it is not reordered at the character reordering stage). However,
998 * it will be reordered according to the basic-forms shaping results.
999 * Possible positions for reph, depending on the script, are; after main,
1000 * before post-base consonant forms, and after post-base consonant forms.
1003 /* If there's anything after the Ra that has the REPH pos, it ought to be halant.
1004 * Which means that the font has failed to ligate the Reph. In which case, we
1005 * shouldn't move. */
1006 if ($start + 1 < $end &&
1007 $info[$start]['indic_position'] == self::POS_RA_TO_BECOME_REPH && $info[$start + 1]['indic_position'] != self::POS_RA_TO_BECOME_REPH) {
1008 $reph_pos = $indic_config[3];
1009 $skip_to_reph_step_5 = false;
1010 $skip_to_reph_move = false;
1012 /* 1. If reph should be positioned after post-base consonant forms,
1013 * proceed to step 5.
1015 if ($reph_pos == self::REPH_POS_AFTER_POST) {
1016 $skip_to_reph_step_5 = true;
1019 /* 2. If the reph repositioning class is not after post-base: target
1020 * position is after the first explicit halant glyph between the
1021 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1022 * are following this halant, position is moved after it. If such
1023 * position is found, this is the target position. Otherwise,
1024 * proceed to the next step.
1026 * Note: in old-implementation fonts, where classifications were
1027 * fixed in shaping engine, there was no case where reph position
1028 * will be found on this step.
1031 if (!$skip_to_reph_step_5) {
1032 $new_reph_pos = $start + 1;
1034 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) {
1035 $new_reph_pos++;
1038 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
1039 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1040 if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) {
1041 $new_reph_pos++;
1043 $skip_to_reph_move = true;
1047 /* 3. If reph should be repositioned after the main consonant: find the
1048 * first consonant not ligated with main, or find the first
1049 * consonant that is not a potential pre-base reordering Ra.
1051 if ($reph_pos == self::REPH_POS_AFTER_MAIN && !$skip_to_reph_move && !$skip_to_reph_step_5) {
1052 $new_reph_pos = $base;
1053 /* XXX Skip potential pre-base reordering Ra. */
1054 while ($new_reph_pos + 1 < $end && $info[$new_reph_pos + 1]['indic_position'] <= self::POS_AFTER_MAIN) {
1055 $new_reph_pos++;
1057 if ($new_reph_pos < $end) {
1058 $skip_to_reph_move = true;
1062 /* 4. If reph should be positioned before post-base consonant, find
1063 * first post-base classified consonant not ligated with main. If no
1064 * consonant is found, the target position should be before the
1065 * first matra, syllable modifier sign or vedic sign.
1067 /* This is our take on what step 4 is trying to say (and failing, BADLY). */
1068 if ($reph_pos == self::REPH_POS_AFTER_SUB && !$skip_to_reph_move && !$skip_to_reph_step_5) {
1069 $new_reph_pos = $base;
1070 while ($new_reph_pos < $end && isset($info[$new_reph_pos + 1]['indic_position']) &&
1071 !( self::FLAG($info[$new_reph_pos + 1]['indic_position']) & (self::FLAG(self::POS_POST_C) | self::FLAG(self::POS_AFTER_POST) | self::FLAG(self::POS_SMVD)))) {
1072 $new_reph_pos++;
1074 if ($new_reph_pos < $end) {
1075 $skip_to_reph_move = true;
1079 /* 5. If no consonant is found in steps 3 or 4, move reph to a position
1080 * immediately before the first post-base matra, syllable modifier
1081 * sign or vedic sign that has a reordering class after the intended
1082 * reph position. For example, if the reordering position for reph
1083 * is post-main, it will skip above-base matras that also have a
1084 * post-main position.
1086 if (!$skip_to_reph_move) {
1087 /* Copied from step 2. */
1088 $new_reph_pos = $start + 1;
1089 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) {
1090 $new_reph_pos++;
1093 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
1094 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1095 if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) {
1096 $new_reph_pos++;
1098 $skip_to_reph_move = true;
1103 /* 6. Otherwise, reorder reph to the end of the syllable.
1105 if (!$skip_to_reph_move) {
1106 $new_reph_pos = $end - 1;
1107 while ($new_reph_pos > $start && $info[$new_reph_pos]['indic_position'] == self::POS_SMVD) {
1108 $new_reph_pos--;
1112 * If the Reph is to be ending up after a Matra,Halant sequence,
1113 * position it before that Halant so it can interact with the Matra.
1114 * However, if it's a plain Consonant,Halant we shouldn't do that.
1115 * Uniscribe doesn't do this.
1116 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1118 //if (!$hb_options.uniscribe_bug_compatible && self::is_halant_or_coeng($info[$new_reph_pos])) {
1119 if (self::is_halant_or_coeng($info[$new_reph_pos])) {
1120 for ($i = $base + 1; $i < $new_reph_pos; $i++) {
1121 if ($info[$i]['indic_category'] == self::OT_M) {
1122 /* Ok, got it. */
1123 $new_reph_pos--;
1130 /* Move */
1131 self::_move_info_pos($info, $start, $new_reph_pos + 1);
1133 if ($start < $base && $base <= $new_reph_pos) {
1134 $base--;
1139 /* o Reorder pre-base reordering consonants:
1141 * If a pre-base reordering consonant is found, reorder it according to
1142 * the following rules:
1146 if (count($GSUBdata['pref']) && $base + 1 < $end) { /* Otherwise there can't be any pre-base reordering Ra. */
1147 for ($i = $base + 1; $i < $end; $i++) {
1148 if ($info[$i]['mask'] & self::FLAG(self::PREF)) {
1149 /* 1. Only reorder a glyph produced by substitution during application
1150 * of the <pref> feature. (Note that a font may shape a Ra consonant with
1151 * the feature generally but block it in certain contexts.)
1153 // ??? Need to TEST if actual substitution has occurred
1154 if ($i + 1 == $end || ($info[$i + 1]['mask'] & self::FLAG(self::PREF)) == 0) {
1156 * 2. Try to find a target position the same way as for pre-base matra.
1157 * If it is found, reorder pre-base consonant glyph.
1159 * 3. If position is not found, reorder immediately before main
1160 * consonant.
1162 $new_pos = $base;
1163 /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
1164 * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1165 * We want to position matra after them.
1167 if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) {
1168 while ($new_pos > $start &&
1169 !(self::is_one_of($info[$new_pos - 1], self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG)))) {
1170 $new_pos--;
1173 /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
1174 * split matra, it should be reordered to *before* the left part of such matra. */
1175 if ($new_pos > $start && $info[$new_pos - 1]['indic_category'] == self::OT_M) {
1176 $old_pos = $i;
1177 for ($i = $base + 1; $i < $old_pos; $i++) {
1178 if ($info[$i]['indic_category'] == self::OT_M) {
1179 $new_pos--;
1180 break;
1186 if ($new_pos > $start && self::is_halant_or_coeng($info[$new_pos - 1])) {
1187 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
1188 if ($new_pos < $end && self::is_joiner($info[$new_pos])) {
1189 $new_pos++;
1193 $old_pos = $i;
1194 self::_move_info_pos($info, $old_pos, $new_pos);
1196 if ($new_pos <= $base && $base < $old_pos) {
1197 $base++;
1201 break;
1207 /* Apply 'init' to the Left Matra if it's a word start. */
1208 if ($info[$start]['indic_position'] == self::POS_PRE_M &&
1209 ($start == 0 ||
1210 ($info[$start - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_FORMAT || $info[$start - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
1211 )) {
1212 $info[$start]['mask'] |= self::FLAG(self::INIT);
1217 * Finish off and go home!
1221 public static function _move_info_pos(&$info, $from, $to)
1223 $t = [];
1224 $t[0] = $info[$from];
1225 if ($from > $to) {
1226 array_splice($info, $from, 1);
1227 array_splice($info, $to, 0, $t);
1228 } else {
1229 array_splice($info, $to, 0, $t);
1230 array_splice($info, $from, 1);
1234 public static $ra_chars = [
1235 0x0930 => 1, /* Devanagari */
1236 0x09B0 => 1, /* Bengali */
1237 0x09F0 => 1, /* Bengali (Assamese) */
1238 0x0A30 => 1, /* Gurmukhi */ /* No Reph */
1239 0x0AB0 => 1, /* Gujarati */
1240 0x0B30 => 1, /* Oriya */
1241 0x0BB0 => 1, /* Tamil */ /* No Reph */
1242 0x0C30 => 1, /* Telugu */ /* Reph formed only with ZWJ */
1243 0x0CB0 => 1, /* Kannada */
1244 0x0D30 => 1, /* Malayalam */ /* No Reph, Logical Repha */
1245 0x0DBB => 1, /* Sinhala */ /* Reph formed only with ZWJ */
1246 0x179A => 1, /* Khmer */ /* No Reph, Visual Repha */
1249 public static function is_ra($u)
1251 if (isset(self::$ra_chars[$u])) {
1252 return true;
1254 return false;
1257 public static function is_one_of($info, $flags)
1259 if (isset($info['is_ligature']) && $info['is_ligature']) {
1260 return false;
1261 } /* If it ligated, all bets are off. */
1262 return !!(self::FLAG($info['indic_category']) & $flags);
1265 public static function is_joiner($info)
1267 return self::is_one_of($info, (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ)));
1270 /* Vowels and placeholders treated as if they were consonants. */
1272 public static function is_consonant($info)
1274 return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)));
1277 public static function is_halant_or_coeng($info)
1279 return self::is_one_of($info, (self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG)));
1282 // From hb-private.hh
1283 public static function in_range($u, $lo, $hi)
1285 if ((($lo ^ $hi) & $lo) == 0 && (($lo ^ $hi) & $hi) == ($lo ^ $hi) && (($lo ^ $hi) & (($lo ^ $hi) + 1)) == 0) {
1286 return ($u & ~($lo ^ $hi)) == $lo;
1287 } else {
1288 return $lo <= $u && $u <= $hi;
1292 // From hb-private.hh
1293 public static function FLAG($x)
1295 return (1 << ($x));
1298 // BELOW from hb-ot-shape-complex-indic.cc
1301 * Indic configurations.
1304 // base_position
1305 const BASE_POS_FIRST = 0;
1306 const BASE_POS_LAST = 1;
1308 // reph_position
1309 const REPH_POS_DEFAULT = 10; // POS_BEFORE_POST,
1311 const REPH_POS_AFTER_MAIN = 5; // POS_AFTER_MAIN,
1313 const REPH_POS_BEFORE_SUB = 7; // POS_BEFORE_SUB,
1314 const REPH_POS_AFTER_SUB = 9; // POS_AFTER_SUB,
1315 const REPH_POS_BEFORE_POST = 10; // POS_BEFORE_POST,
1316 const REPH_POS_AFTER_POST = 12; // POS_AFTER_POST
1318 // reph_mode
1319 const REPH_MODE_IMPLICIT = 0; /* Reph formed out of initial Ra,H sequence. */
1320 const REPH_MODE_EXPLICIT = 1; /* Reph formed out of initial Ra,H,ZWJ sequence. */
1321 const REPH_MODE_VIS_REPHA = 2; /* Encoded Repha character, no reordering needed. */
1322 const REPH_MODE_LOG_REPHA = 3; /* Encoded Repha character, needs reordering. */
1325 struct of indic_configs{
1326 KEY - script;
1327 0 - has_old_spec;
1328 1 - virama;
1329 2 - base_pos;
1330 3 - reph_pos;
1331 4 - reph_mode;
1335 public static $indic_configs = [/* index is SCRIPT_number from UCDN */
1336 9 => [true, 0x094D, 1, 10, 0],
1337 10 => [true, 0x09CD, 1, 9, 0],
1338 11 => [true, 0x0A4D, 1, 7, 0],
1339 12 => [true, 0x0ACD, 1, 10, 0],
1340 13 => [true, 0x0B4D, 1, 5, 0],
1341 14 => [true, 0x0BCD, 1, 12, 0],
1342 15 => [true, 0x0C4D, 1, 12, 1],
1343 16 => [true, 0x0CCD, 1, 12, 0],
1344 17 => [true, 0x0D4D, 1, 5, 3],
1345 18 => [false, 0x0DCA, 0, 5, 1], /* Sinhala */
1346 30 => [false, 0x17D2, 0, 10, 2], /* Khmer */
1347 84 => [false, 0xA9C0, 1, 10, 0], /* Javanese */
1354 // from "hb-ot-shape-complex-indic-table.cc"
1357 const ISC_A = 0; // INDIC_SYLLABIC_CATEGORY_AVAGRAHA Avagraha
1358 const ISC_Bi = 8; // INDIC_SYLLABIC_CATEGORY_BINDU Bindu
1359 const ISC_C = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT Consonant
1360 const ISC_CD = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD Consonant_Dead
1361 const ISC_CF = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL Consonant_Final
1362 const ISC_CHL = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER Consonant_Head_Letter
1363 const ISC_CM = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL Consonant_Medial
1364 const ISC_CP = 11; // INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER Consonant_Placeholder
1365 const ISC_CR = 15; // INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA Consonant_Repha
1366 const ISC_CS = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED Consonant_Subjoined
1367 const ISC_ML = 0; // INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER Modifying_Letter
1368 const ISC_N = 3; // INDIC_SYLLABIC_CATEGORY_NUKTA Nukta
1369 const ISC_x = 0; // INDIC_SYLLABIC_CATEGORY_OTHER Other
1370 const ISC_RS = 13; // INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER Register_Shifter
1371 const ISC_TL = 0; // INDIC_SYLLABIC_CATEGORY_TONE_LETTER Tone_Letter
1372 const ISC_TM = 3; // INDIC_SYLLABIC_CATEGORY_TONE_MARK Tone_Mark
1373 const ISC_V = 4; // INDIC_SYLLABIC_CATEGORY_VIRAMA Virama
1374 const ISC_Vs = 8; // INDIC_SYLLABIC_CATEGORY_VISARGA Visarga
1375 const ISC_Vo = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL Vowel
1376 const ISC_M = 7; // INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT Vowel_Dependent
1377 const ISC_VI = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT Vowel_Independent
1379 const IMC_B = 8; // INDIC_MATRA_CATEGORY_BOTTOM Bottom
1380 const IMC_BR = 11; // INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT Bottom_And_Right
1381 const IMC_I = 15; // INDIC_MATRA_CATEGORY_INVISIBLE Invisible
1382 const IMC_L = 3; // INDIC_MATRA_CATEGORY_LEFT Left
1383 const IMC_LR = 11; // INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT Left_And_Right
1384 const IMC_x = 15; // INDIC_MATRA_CATEGORY_NOT_APPLICABLE Not_Applicable
1385 const IMC_O = 5; // INDIC_MATRA_CATEGORY_OVERSTRUCK Overstruck
1386 const IMC_R = 11; // INDIC_MATRA_CATEGORY_RIGHT Right
1387 const IMC_T = 6; // INDIC_MATRA_CATEGORY_TOP Top
1388 const IMC_TB = 8; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM Top_And_Bottom
1389 const IMC_TBR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT Top_And_Bottom_And_Right
1390 const IMC_TL = 6; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT Top_And_Left
1391 const IMC_TLR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT Top_And_Left_And_Right
1392 const IMC_TR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_RIGHT Top_And_Right
1393 const IMC_VOL = 2; // INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT Visual_Order_Left
1395 If in original table = _(C,x), that = ISC_C,IMC_x
1396 Value is IMC_x << 8 (or IMC_x * 256) = 3840
1397 plus ISC_C = 1, so = 3841
1401 public static $indic_table = [
1402 /* Devanagari (0900..097F) */
1404 /* 0900 */ 3848, 3848, 3848, 3848, 3842, 3842, 3842, 3842,
1405 /* 0908 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1406 /* 0910 */ 3842, 3842, 3842, 3842, 3842, 3841, 3841, 3841,
1407 /* 0918 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1408 /* 0920 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1409 /* 0928 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1410 /* 0930 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1411 /* 0938 */ 3841, 3841, 1543, 2823, 3843, 3840, 2823, 775,
1412 /* 0940 */ 2823, 2055, 2055, 2055, 2055, 1543, 1543, 1543,
1413 /* 0948 */ 1543, 2823, 2823, 2823, 2823, 2052, 775, 2823,
1414 /* 0950 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 2055,
1415 /* 0958 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1416 /* 0960 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1417 /* 0968 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1418 /* 0970 */ 3840, 3840, 3842, 3842, 3842, 3842, 3842, 3842,
1419 /* 0978 */ 3840, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1420 /* Bengali (0980..09FF) */
1422 /* 0980 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1423 /* 0988 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1424 /* 0990 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1425 /* 0998 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1426 /* 09A0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1427 /* 09A8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1428 /* 09B0 */ 3841, 3840, 3841, 3840, 3840, 3840, 3841, 3841,
1429 /* 09B8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1430 /* 09C0 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1431 /* 09C8 */ 775, 3840, 3840, 2823, 2823, 2052, 3841, 3840,
1432 /* 09D0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1433 /* 09D8 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1434 /* 09E0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1435 /* 09E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1436 /* 09F0 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1437 /* 09F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1438 /* Gurmukhi (0A00..0A7F) */
1440 /* 0A00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1441 /* 0A08 */ 3842, 3842, 3842, 3840, 3840, 3840, 3840, 3842,
1442 /* 0A10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1443 /* 0A18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1444 /* 0A20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1445 /* 0A28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1446 /* 0A30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3840,
1447 /* 0A38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1448 /* 0A40 */ 2823, 2055, 2055, 3840, 3840, 3840, 3840, 1543,
1449 /* 0A48 */ 1543, 3840, 3840, 1543, 1543, 2052, 3840, 3840,
1450 /* 0A50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1451 /* 0A58 */ 3840, 3841, 3841, 3841, 3841, 3840, 3841, 3840,
1452 /* 0A60 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1453 /* 0A68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1454 /* 0A70 */ 3848, 3840, 13841, 13841, 3840, 3857, 3840, 3840,
1455 /* 0A78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1456 /* Gujarati (0A80..0AFF) */
1458 /* 0A80 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1459 /* 0A88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3840, 3842,
1460 /* 0A90 */ 3842, 3842, 3840, 3842, 3842, 3841, 3841, 3841,
1461 /* 0A98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1462 /* 0AA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1463 /* 0AA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1464 /* 0AB0 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1465 /* 0AB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
1466 /* 0AC0 */ 2823, 2055, 2055, 2055, 2055, 1543, 3840, 1543,
1467 /* 0AC8 */ 1543, 2823, 3840, 2823, 2823, 2052, 3840, 3840,
1468 /* 0AD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1469 /* 0AD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1470 /* 0AE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1471 /* 0AE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1472 /* 0AF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1473 /* 0AF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1474 /* Oriya (0B00..0B7F) */
1476 /* 0B00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1477 /* 0B08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842,
1478 /* 0B10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841,
1479 /* 0B18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1480 /* 0B20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1481 /* 0B28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1482 /* 0B30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
1483 /* 0B38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1484 /* 0B40 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
1485 /* 0B48 */ 1543, 3840, 3840, 2823, 2823, 2052, 3840, 3840,
1486 /* 0B50 */ 3840, 3840, 3840, 3840, 3840, 3840, 1543, 2823,
1487 /* 0B58 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
1488 /* 0B60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1489 /* 0B68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1490 /* 0B70 */ 3840, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1491 /* 0B78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1492 /* Tamil (0B80..0BFF) */
1494 /* 0B80 */ 3840, 3840, 3848, 3840, 3840, 3842, 3842, 3842,
1495 /* 0B88 */ 3842, 3842, 3842, 3840, 3840, 3840, 3842, 3842,
1496 /* 0B90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3840, 3840,
1497 /* 0B98 */ 3840, 3841, 3841, 3840, 3841, 3840, 3841, 3841,
1498 /* 0BA0 */ 3840, 3840, 3840, 3841, 3841, 3840, 3840, 3840,
1499 /* 0BA8 */ 3841, 3841, 3841, 3840, 3840, 3840, 3841, 3841,
1500 /* 0BB0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1501 /* 0BB8 */ 3841, 3841, 3840, 3840, 3840, 3840, 2823, 2823,
1502 /* 0BC0 */ 1543, 2055, 2055, 3840, 3840, 3840, 775, 775,
1503 /* 0BC8 */ 775, 3840, 2823, 2823, 2823, 1540, 3840, 3840,
1504 /* 0BD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1505 /* 0BD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1506 /* 0BE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1507 /* 0BE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1508 /* 0BF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1509 /* 0BF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1510 /* Telugu (0C00..0C7F) */
1512 /* 0C00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842,
1513 /* 0C08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1514 /* 0C10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1515 /* 0C18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1516 /* 0C20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1517 /* 0C28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1518 /* 0C30 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1519 /* 0C38 */ 3841, 3841, 3840, 3840, 3840, 3840, 1543, 1543,
1520 /* 0C40 */ 1543, 2823, 2823, 2823, 2823, 3840, 1543, 1543,
1521 /* 0C48 */ 2055, 3840, 1543, 1543, 1543, 1540, 3840, 3840,
1522 /* 0C50 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 3840,
1523 /* 0C58 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
1524 /* 0C60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1525 /* 0C68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1526 /* 0C70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1527 /* 0C78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1528 /* Kannada (0C80..0CFF) */
1530 /* 0C80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1531 /* 0C88 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1532 /* 0C90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1533 /* 0C98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1534 /* 0CA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1535 /* 0CA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1536 /* 0CB0 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
1537 /* 0CB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
1538 /* 0CC0 */ 2823, 2823, 2823, 2823, 2823, 3840, 1543, 2823,
1539 /* 0CC8 */ 2823, 3840, 2823, 2823, 1543, 1540, 3840, 3840,
1540 /* 0CD0 */ 3840, 3840, 3840, 3840, 3840, 2823, 2823, 3840,
1541 /* 0CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3841, 3840,
1542 /* 0CE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1543 /* 0CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1544 /* 0CF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1545 /* 0CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1546 /* Malayalam (0D00..0D7F) */
1548 /* 0D00 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1549 /* 0D08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842,
1550 /* 0D10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841,
1551 /* 0D18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1552 /* 0D20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1553 /* 0D28 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1554 /* 0D30 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1555 /* 0D38 */ 3841, 3841, 3841, 3840, 3840, 3840, 2823, 2823,
1556 /* 0D40 */ 2823, 2823, 2823, 2055, 2055, 3840, 775, 775,
1557 /* 0D48 */ 775, 3840, 2823, 2823, 2823, 1540, 3855, 3840,
1558 /* 0D50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
1559 /* 0D58 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1560 /* 0D60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840,
1561 /* 0D68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1562 /* 0D70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1563 /* 0D78 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1564 /* Sinhala (0D80..0DFF) */
1566 /* 0D80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842,
1567 /* 0D88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1568 /* 0D90 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3840,
1569 /* 0D98 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
1570 /* 0DA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1571 /* 0DA8 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1572 /* 0DB0 */ 3841, 3841, 3840, 3841, 3841, 3841, 3841, 3841,
1573 /* 0DB8 */ 3841, 3841, 3841, 3841, 3840, 3841, 3840, 3840,
1574 /* 0DC0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3840,
1575 /* 0DC8 */ 3840, 3840, 1540, 3840, 3840, 3840, 3840, 2823,
1576 /* 0DD0 */ 2823, 2823, 1543, 1543, 2055, 3840, 2055, 3840,
1577 /* 0DD8 */ 2823, 775, 1543, 775, 2823, 2823, 2823, 2823,
1578 /* 0DE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1579 /* 0DE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1580 /* 0DF0 */ 3840, 3840, 2823, 2823, 3840, 3840, 3840, 3840,
1581 /* 0DF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1582 /* Vedic Extensions (1CD0..1CFF) */
1584 /* 1CD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1585 /* 1CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1586 /* 1CE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1587 /* 1CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1588 /* 1CF0 */ 3840, 3840, 3848, 3848, 3840, 3840, 3840, 3840,
1589 /* 1CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1592 public static $khmer_table = [
1593 /* Khmer (1780..17FF) */
1595 /* 1780 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1596 /* 1788 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1597 /* 1790 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1598 /* 1798 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
1599 /* 17A0 */ 3841, 3841, 3841, 3842, 3842, 3842, 3842, 3842,
1600 /* 17A8 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
1601 /* 17B0 */ 3842, 3842, 3842, 3842, 3840, 3840, 2823, 1543,
1602 /* 17B8 */ 1543, 1543, 1543, 2055, 2055, 2055, 1543, 2823,
1603 /* 17C0 */ 2823, 775, 775, 775, 2823, 2823, 3848, 3848,
1604 /* 17C8 */ 2823, 3853, 3853, 3840, 3855, 3840, 3840, 3840,
1605 /* 17D0 */ 3840, 1540, 3844, 3840, 3840, 3840, 3840, 3840,
1606 /* 17D8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1607 /* 17E0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1608 /* 17E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1609 /* 17F0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1610 /* 17F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
1613 // from "hb-ot-shape-complex-indic-table.cc"
1614 public static function indic_get_categories($u)
1616 if (0x0900 <= $u && $u <= 0x0DFF) {
1617 return self::$indic_table[$u - 0x0900 + 0]; // offset 0 for Most "indic"
1619 if (0x1CD0 <= $u && $u <= 0x1D00) {
1620 return self::$indic_table[$u - 0x1CD0 + 1152]; // offset for Vedic extensions
1622 if (0x1780 <= $u && $u <= 0x17FF) {
1623 return self::$khmer_table[$u - 0x1780]; // Khmer
1625 if ($u == 0x00A0) {
1626 return 3851; // (ISC_CP | (IMC_x << 8))
1628 if ($u == 0x25CC) {
1629 return 3851; // (ISC_CP | (IMC_x << 8))
1631 return 3840; // (ISC_x | (IMC_x << 8))
1634 // BELOW from hb-ot-shape-complex-indic.cc
1636 * Indic shaper.
1639 public static function IN_HALF_BLOCK($u, $Base)
1641 return (($u & ~0x7F) == $Base);
1644 public static function IS_DEVA($u)
1646 return self::IN_HALF_BLOCK($u, 0x0900);
1649 public static function IS_BENG($u)
1651 return self::IN_HALF_BLOCK($u, 0x0980);
1654 public static function IS_GURU($u)
1656 return self::IN_HALF_BLOCK($u, 0x0A00);
1659 public static function IS_GUJR($u)
1661 return self::IN_HALF_BLOCK($u, 0x0A80);
1664 public static function IS_ORYA($u)
1666 return self::IN_HALF_BLOCK($u, 0x0B00);
1669 public static function IS_TAML($u)
1671 return self::IN_HALF_BLOCK($u, 0x0B80);
1674 public static function IS_TELU($u)
1676 return self::IN_HALF_BLOCK($u, 0x0C00);
1679 public static function IS_KNDA($u)
1681 return self::IN_HALF_BLOCK($u, 0x0C80);
1684 public static function IS_MLYM($u)
1686 return self::IN_HALF_BLOCK($u, 0x0D00);
1689 public static function IS_SINH($u)
1691 return self::IN_HALF_BLOCK($u, 0x0D80);
1694 public static function IS_KHMR($u)
1696 return self::IN_HALF_BLOCK($u, 0x1780);
1699 public static function MATRA_POS_LEFT($u)
1701 return self::POS_PRE_M;
1704 public static function MATRA_POS_RIGHT($u)
1706 return
1707 (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1708 (self::IS_BENG($u) ? self::POS_AFTER_POST :
1709 (self::IS_GURU($u) ? self::POS_AFTER_POST :
1710 (self::IS_GUJR($u) ? self::POS_AFTER_POST :
1711 (self::IS_ORYA($u) ? self::POS_AFTER_POST :
1712 (self::IS_TAML($u) ? self::POS_AFTER_POST :
1713 (self::IS_TELU($u) ? ($u <= 0x0C42 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) :
1714 (self::IS_KNDA($u) ? ($u < 0x0CC3 || $u > 0xCD6 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) :
1715 (self::IS_MLYM($u) ? self::POS_AFTER_POST :
1716 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1717 (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1718 self::POS_AFTER_SUB))))))))))); /* default */
1721 public static function MATRA_POS_TOP($u)
1723 return /* BENG and MLYM don't have top matras. */
1724 (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1725 (self::IS_GURU($u) ? self::POS_AFTER_POST : /* Deviate from spec */
1726 (self::IS_GUJR($u) ? self::POS_AFTER_SUB :
1727 (self::IS_ORYA($u) ? self::POS_AFTER_MAIN :
1728 (self::IS_TAML($u) ? self::POS_AFTER_SUB :
1729 (self::IS_TELU($u) ? self::POS_BEFORE_SUB :
1730 (self::IS_KNDA($u) ? self::POS_BEFORE_SUB :
1731 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1732 (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1733 self::POS_AFTER_SUB))))))))); /* default */
1736 public static function MATRA_POS_BOTTOM($u)
1738 return
1739 (self::IS_DEVA($u) ? self::POS_AFTER_SUB :
1740 (self::IS_BENG($u) ? self::POS_AFTER_SUB :
1741 (self::IS_GURU($u) ? self::POS_AFTER_POST :
1742 (self::IS_GUJR($u) ? self::POS_AFTER_POST :
1743 (self::IS_ORYA($u) ? self::POS_AFTER_SUB :
1744 (self::IS_TAML($u) ? self::POS_AFTER_POST :
1745 (self::IS_TELU($u) ? self::POS_BEFORE_SUB :
1746 (self::IS_KNDA($u) ? self::POS_BEFORE_SUB :
1747 (self::IS_MLYM($u) ? self::POS_AFTER_POST :
1748 (self::IS_SINH($u) ? self::POS_AFTER_SUB :
1749 (self::IS_KHMR($u) ? self::POS_AFTER_POST :
1750 self::POS_AFTER_SUB))))))))))); /* default */
1753 public static function matra_position($u, $side)
1755 switch ($side) {
1756 case self::POS_PRE_C:
1757 return self::MATRA_POS_LEFT($u);
1758 case self::POS_POST_C:
1759 return self::MATRA_POS_RIGHT($u);
1760 case self::POS_ABOVE_C:
1761 return self::MATRA_POS_TOP($u);
1762 case self::POS_BELOW_C:
1763 return self::MATRA_POS_BOTTOM($u);
1765 return $side;
1768 // vowel matras that have to be split into two parts.
1769 // From Harfbuzz (old)
1770 // New HarfBuzz uses /src/hb-ucdn/ucdn.c and unicodedata_db.h for full method of decomposition for all characters
1771 // Should always fully decompose and then recompose back, but we will just do the split matras
1772 public static function decompose_indic($ab)
1774 $sub = [];
1775 switch ($ab) {
1777 * Decompose split matras.
1779 /* bengali */
1780 case 0x9cb:
1781 $sub[0] = 0x9c7;
1782 $sub[1] = 0x9be;
1783 return $sub;
1784 case 0x9cc:
1785 $sub[0] = 0x9c7;
1786 $sub[1] = 0x9d7;
1787 return $sub;
1788 /* oriya */
1789 case 0xb48:
1790 $sub[0] = 0xb47;
1791 $sub[1] = 0xb56;
1792 return $sub;
1793 case 0xb4b:
1794 $sub[0] = 0xb47;
1795 $sub[1] = 0xb3e;
1796 return $sub;
1797 case 0xb4c:
1798 $sub[0] = 0xb47;
1799 $sub[1] = 0xb57;
1800 return $sub;
1801 /* tamil */
1802 case 0xbca:
1803 $sub[0] = 0xbc6;
1804 $sub[1] = 0xbbe;
1805 return $sub;
1806 case 0xbcb:
1807 $sub[0] = 0xbc7;
1808 $sub[1] = 0xbbe;
1809 return $sub;
1810 case 0xbcc:
1811 $sub[0] = 0xbc6;
1812 $sub[1] = 0xbd7;
1813 return $sub;
1814 /* telugu */
1815 case 0xc48:
1816 $sub[0] = 0xc46;
1817 $sub[1] = 0xc56;
1818 return $sub;
1819 /* kannada */
1820 case 0xcc0:
1821 $sub[0] = 0xcbf;
1822 $sub[1] = 0xcd5;
1823 return $sub;
1824 case 0xcc7:
1825 $sub[0] = 0xcc6;
1826 $sub[1] = 0xcd5;
1827 return $sub;
1828 case 0xcc8:
1829 $sub[0] = 0xcc6;
1830 $sub[1] = 0xcd6;
1831 return $sub;
1832 case 0xcca:
1833 $sub[0] = 0xcc6;
1834 $sub[1] = 0xcc2;
1835 return $sub;
1836 case 0xccb:
1837 $sub[0] = 0xcc6;
1838 $sub[1] = 0xcc2;
1839 $sub[2] = 0xcd5;
1840 return $sub;
1841 /* malayalam */
1842 case 0xd4a:
1843 $sub[0] = 0xd46;
1844 $sub[1] = 0xd3e;
1845 return $sub;
1846 case 0xd4b:
1847 $sub[0] = 0xd47;
1848 $sub[1] = 0xd3e;
1849 return $sub;
1850 case 0xd4c:
1851 $sub[0] = 0xd46;
1852 $sub[1] = 0xd57;
1853 return $sub;
1854 /* sinhala */
1855 // NB Some fonts break with these Sinhala decomps (although this is Uniscribe spec)
1856 // Can check if character would be substituted by pstf and only decompose if true
1857 // e.g. if (isset($GSUBdata['pstf'][$ab])) - would need to pass $GSUBdata as parameter to this function
1858 case 0xdda:
1859 $sub[0] = 0xdd9;
1860 $sub[1] = 0xdca;
1861 return $sub;
1862 case 0xddc:
1863 $sub[0] = 0xdd9;
1864 $sub[1] = 0xdcf;
1865 return $sub;
1866 case 0xddd:
1867 $sub[0] = 0xdd9;
1868 $sub[1] = 0xdcf;
1869 $sub[2] = 0xdca;
1870 return $sub;
1871 case 0xdde:
1872 $sub[0] = 0xdd9;
1873 $sub[1] = 0xddf;
1874 return $sub;
1875 /* khmer */
1876 case 0x17be:
1877 $sub[0] = 0x17c1;
1878 $sub[1] = 0x17be;
1879 return $sub;
1880 case 0x17bf:
1881 $sub[0] = 0x17c1;
1882 $sub[1] = 0x17bf;
1883 return $sub;
1884 case 0x17c0:
1885 $sub[0] = 0x17c1;
1886 $sub[1] = 0x17c0;
1887 return $sub;
1889 case 0x17c4:
1890 $sub[0] = 0x17c1;
1891 $sub[1] = 0x17c4;
1892 return $sub;
1893 case 0x17c5:
1894 $sub[0] = 0x17c1;
1895 $sub[1] = 0x17c5;
1896 return $sub;
1897 /* tibetan - included here although does not use Inidc shaper in other ways */
1898 case 0xf73:
1899 $sub[0] = 0xf71;
1900 $sub[1] = 0xf72;
1901 return $sub;
1902 case 0xf75:
1903 $sub[0] = 0xf71;
1904 $sub[1] = 0xf74;
1905 return $sub;
1906 case 0xf76:
1907 $sub[0] = 0xfb2;
1908 $sub[1] = 0xf80;
1909 return $sub;
1910 case 0xf77:
1911 $sub[0] = 0xfb2;
1912 $sub[1] = 0xf81;
1913 return $sub;
1914 case 0xf78:
1915 $sub[0] = 0xfb3;
1916 $sub[1] = 0xf80;
1917 return $sub;
1918 case 0xf79:
1919 $sub[0] = 0xfb3;
1920 $sub[1] = 0xf71;
1921 $sub[2] = 0xf80;
1922 return $sub;
1923 case 0xf81:
1924 $sub[0] = 0xf71;
1925 $sub[1] = 0xf80;
1926 return $sub;
1928 return false;
1931 public static function bubble_sort(&$arr, $start, $len)
1933 if ($len < 2) {
1934 return;
1936 $k = $start + $len - 2;
1937 while ($k >= $start) {
1938 for ($j = $start; $j <= $k; $j++) {
1939 if ($arr[$j]['indic_position'] > $arr[$j + 1]['indic_position']) {
1940 $t = $arr[$j];
1941 $arr[$j] = $arr[$j + 1];
1942 $arr[$j + 1] = $t;
1945 $k--;