fix php 5.6 in docker dev env (#1740)
[openemr.git] / vendor / mpdf / mpdf / classes / otl.php
blob0fe88b58d2cb55f474a9a09d644055fbb9bbefd6
1 <?php
3 require_once __DIR__ . '/../MpdfException.php';
5 define("_OTL_OLD_SPEC_COMPAT_1", true);
7 define("_DICT_NODE_TYPE_SPLIT", 0x01);
8 define("_DICT_NODE_TYPE_LINEAR", 0x02);
9 define("_DICT_INTERMEDIATE_MATCH", 0x03);
10 define("_DICT_FINAL_MATCH", 0x04);
12 class otl
15 var $mpdf;
17 var $arabLeftJoining;
19 var $arabRightJoining;
21 var $arabTransparentJoin;
23 var $arabTransparent;
25 var $GSUBdata;
27 var $GPOSdata;
29 var $GSUBfont;
31 var $fontkey;
33 var $ttfOTLdata;
35 var $glyphIDtoUni;
37 var $_pos;
39 var $GSUB_offset;
41 var $GPOS_offset;
43 var $MarkAttachmentType;
45 var $MarkGlyphSets;
47 var $GlyphClassMarks;
49 var $GlyphClassLigatures;
51 var $GlyphClassBases;
53 var $GlyphClassComponents;
55 var $Ignores;
57 var $LuCoverage;
59 var $OTLdata;
61 var $assocLigs;
63 var $assocMarks;
65 var $shaper;
67 var $restrictToSyllable;
69 var $lbdicts; // Line-breaking dictionaries
71 var $LuDataCache;
73 var $debugOTL = false;
75 public function __construct(mPDF $mpdf)
77 $this->mpdf = $mpdf;
79 $this->arabic_initialise();
80 $this->current_fh = '';
82 $this->lbdicts = array();
83 $this->LuDataCache = array();
86 function applyOTL($str, $useOTL)
88 $this->OTLdata = array();
89 if (trim($str) == '') {
90 return $str;
92 if (!$useOTL) {
93 return $str;
96 // 1. Load GDEF data
97 //==============================
98 $this->fontkey = $this->mpdf->CurrentFont['fontkey'];
99 $this->glyphIDtoUni = $this->mpdf->CurrentFont['glyphIDtoUni'];
100 if (!isset($this->GDEFdata[$this->fontkey])) {
101 include(_MPDF_TTFONTDATAPATH . $this->fontkey . '.GDEFdata.php');
102 $this->GSUB_offset = $this->GDEFdata[$this->fontkey]['GSUB_offset'] = $GSUB_offset;
103 $this->GPOS_offset = $this->GDEFdata[$this->fontkey]['GPOS_offset'] = $GPOS_offset;
104 $this->GSUB_length = $this->GDEFdata[$this->fontkey]['GSUB_length'] = $GSUB_length;
105 $this->MarkAttachmentType = $this->GDEFdata[$this->fontkey]['MarkAttachmentType'] = $MarkAttachmentType;
106 $this->MarkGlyphSets = $this->GDEFdata[$this->fontkey]['MarkGlyphSets'] = $MarkGlyphSets;
107 $this->GlyphClassMarks = $this->GDEFdata[$this->fontkey]['GlyphClassMarks'] = $GlyphClassMarks;
108 $this->GlyphClassLigatures = $this->GDEFdata[$this->fontkey]['GlyphClassLigatures'] = $GlyphClassLigatures;
109 $this->GlyphClassComponents = $this->GDEFdata[$this->fontkey]['GlyphClassComponents'] = $GlyphClassComponents;
110 $this->GlyphClassBases = $this->GDEFdata[$this->fontkey]['GlyphClassBases'] = $GlyphClassBases;
111 } else {
112 $this->GSUB_offset = $this->GDEFdata[$this->fontkey]['GSUB_offset'];
113 $this->GPOS_offset = $this->GDEFdata[$this->fontkey]['GPOS_offset'];
114 $this->GSUB_length = $this->GDEFdata[$this->fontkey]['GSUB_length'];
115 $this->MarkAttachmentType = $this->GDEFdata[$this->fontkey]['MarkAttachmentType'];
116 $this->MarkGlyphSets = $this->GDEFdata[$this->fontkey]['MarkGlyphSets'];
117 $this->GlyphClassMarks = $this->GDEFdata[$this->fontkey]['GlyphClassMarks'];
118 $this->GlyphClassLigatures = $this->GDEFdata[$this->fontkey]['GlyphClassLigatures'];
119 $this->GlyphClassComponents = $this->GDEFdata[$this->fontkey]['GlyphClassComponents'];
120 $this->GlyphClassBases = $this->GDEFdata[$this->fontkey]['GlyphClassBases'];
123 // 2. Prepare string as HEX string and Analyse character properties
124 //=================================================================
125 $earr = $this->mpdf->UTF8StringToArray($str, false);
127 $scriptblock = 0;
128 $scriptblocks = array();
129 $scriptblocks[0] = 0;
130 $vstr = '';
131 $OTLdata = array();
132 $subchunk = 0;
133 $charctr = 0;
134 foreach ($earr as $char) {
135 $ucd_record = UCDN::get_ucd_record($char);
136 $sbl = $ucd_record[6];
138 // Special case - Arabic End of Ayah
139 if ($char == 1757) {
140 $sbl = UCDN::SCRIPT_ARABIC;
143 if ($sbl && $sbl != 40 && $sbl != 102) {
144 if ($scriptblock == 0) {
145 $scriptblock = $sbl;
146 $scriptblocks[$subchunk] = $scriptblock;
147 } else if ($scriptblock > 0 && $scriptblock != $sbl) {
148 // *************************************************
149 // NEW (non-common) Script encountered in this chunk. Start a new subchunk
150 $subchunk++;
151 $scriptblock = $sbl;
152 $charctr = 0;
153 $scriptblocks[$subchunk] = $scriptblock;
157 $OTLdata[$subchunk][$charctr]['general_category'] = $ucd_record[0];
158 $OTLdata[$subchunk][$charctr]['bidi_type'] = $ucd_record[2];
160 //$OTLdata[$subchunk][$charctr]['combining_class'] = $ucd_record[1];
161 //$OTLdata[$subchunk][$charctr]['bidi_type'] = $ucd_record[2];
162 //$OTLdata[$subchunk][$charctr]['mirrored'] = $ucd_record[3];
163 //$OTLdata[$subchunk][$charctr]['east_asian_width'] = $ucd_record[4];
164 //$OTLdata[$subchunk][$charctr]['normalization_check'] = $ucd_record[5];
165 //$OTLdata[$subchunk][$charctr]['script'] = $ucd_record[6];
167 $charasstr = $this->unicode_hex($char);
169 if (strpos($this->GlyphClassMarks, $charasstr) !== false) {
170 $OTLdata[$subchunk][$charctr]['group'] = 'M';
171 } else if ($char == 32 || $char == 12288) {
172 $OTLdata[$subchunk][$charctr]['group'] = 'S';
173 } // 12288 = 0x3000 = CJK space
174 else {
175 $OTLdata[$subchunk][$charctr]['group'] = 'C';
178 $OTLdata[$subchunk][$charctr]['uni'] = $char;
179 $OTLdata[$subchunk][$charctr]['hex'] = $charasstr;
180 $charctr++;
183 /* PROCESS EACH SUBCHUNK WITH DIFFERENT SCRIPTS */
184 for ($sch = 0; $sch <= $subchunk; $sch++) {
185 $this->OTLdata = $OTLdata[$sch];
186 $scriptblock = $scriptblocks[$sch];
188 // 3. Get Appropriate Scripts, and Shaper engine from analysing text and list of available scripts/langsys in font
189 //==============================
190 // Based on actual script block of text, select shaper (and line-breaking dictionaries)
191 if (UCDN::SCRIPT_DEVANAGARI <= $scriptblock && $scriptblock <= UCDN::SCRIPT_MALAYALAM) {
192 $this->shaper = "I";
193 } // INDIC shaper
194 else if ($scriptblock == UCDN::SCRIPT_ARABIC || $scriptblock == UCDN::SCRIPT_SYRIAC) {
195 $this->shaper = "A";
196 } // ARABIC shaper
197 else if ($scriptblock == UCDN::SCRIPT_NKO || $scriptblock == UCDN::SCRIPT_MANDAIC) {
198 $this->shaper = "A";
199 } // ARABIC shaper
200 else if ($scriptblock == UCDN::SCRIPT_KHMER) {
201 $this->shaper = "K";
202 } // KHMER shaper
203 else if ($scriptblock == UCDN::SCRIPT_THAI) {
204 $this->shaper = "T";
205 } // THAI shaper
206 else if ($scriptblock == UCDN::SCRIPT_LAO) {
207 $this->shaper = "L";
208 } // LAO shaper
209 else if ($scriptblock == UCDN::SCRIPT_SINHALA) {
210 $this->shaper = "S";
211 } // SINHALA shaper
212 else if ($scriptblock == UCDN::SCRIPT_MYANMAR) {
213 $this->shaper = "M";
214 } // MYANMAR shaper
215 else if ($scriptblock == UCDN::SCRIPT_NEW_TAI_LUE) {
216 $this->shaper = "E";
217 } // SEA South East Asian shaper
218 else if ($scriptblock == UCDN::SCRIPT_CHAM) {
219 $this->shaper = "E";
220 } // SEA South East Asian shaper
221 else if ($scriptblock == UCDN::SCRIPT_TAI_THAM) {
222 $this->shaper = "E";
223 } // SEA South East Asian shaper
224 else
225 $this->shaper = "";
226 // Get scripttag based on actual text script
227 $scripttag = UCDN::$uni_scriptblock[$scriptblock];
229 $GSUBscriptTag = '';
230 $GSUBlangsys = '';
231 $GPOSscriptTag = '';
232 $GPOSlangsys = '';
233 $is_old_spec = false;
235 $ScriptLang = $this->mpdf->CurrentFont['GSUBScriptLang'];
236 if (count($ScriptLang)) {
237 list($GSUBscriptTag, $is_old_spec) = $this->_getOTLscriptTag($ScriptLang, $scripttag, $scriptblock, $this->shaper, $useOTL, 'GSUB');
238 if ($this->mpdf->fontLanguageOverride && strpos($ScriptLang[$GSUBscriptTag], $this->mpdf->fontLanguageOverride) !== false) {
239 $GSUBlangsys = str_pad($this->mpdf->fontLanguageOverride, 4);
240 } else if ($GSUBscriptTag && isset($ScriptLang[$GSUBscriptTag]) && $ScriptLang[$GSUBscriptTag] != '') {
241 $GSUBlangsys = $this->_getOTLLangTag($this->mpdf->currentLang, $ScriptLang[$GSUBscriptTag]);
244 $ScriptLang = $this->mpdf->CurrentFont['GPOSScriptLang'];
246 // NB If after GSUB, the same script/lang exist for GPOS, just use these...
247 if ($GSUBscriptTag && $GSUBlangsys && isset($ScriptLang[$GSUBscriptTag]) && strpos($ScriptLang[$GSUBscriptTag], $GSUBlangsys) !== false) {
248 $GPOSlangsys = $GSUBlangsys;
249 $GPOSscriptTag = $GSUBscriptTag;
252 // else repeat for GPOS
253 // [Font XBRiyaz has GSUB tables for latn, but not GPOS for latn]
254 else if (count($ScriptLang)) {
255 list($GPOSscriptTag, $dummy) = $this->_getOTLscriptTag($ScriptLang, $scripttag, $scriptblock, $this->shaper, $useOTL, 'GPOS');
256 if ($GPOSscriptTag && $this->mpdf->fontLanguageOverride && strpos($ScriptLang[$GPOSscriptTag], $this->mpdf->fontLanguageOverride) !== false) {
257 $GPOSlangsys = str_pad($this->mpdf->fontLanguageOverride, 4);
258 } else if ($GPOSscriptTag && isset($ScriptLang[$GPOSscriptTag]) && $ScriptLang[$GPOSscriptTag] != '') {
259 $GPOSlangsys = $this->_getOTLLangTag($this->mpdf->currentLang, $ScriptLang[$GPOSscriptTag]);
263 ////////////////////////////////////////////////////////////////
264 // This is just for the font_dump_OTL utility to set script and langsys override
265 if (isset($this->mpdf->overrideOTLsettings) && isset($this->mpdf->overrideOTLsettings[$this->fontkey])) {
266 $GSUBscriptTag = $GPOSscriptTag = $this->mpdf->overrideOTLsettings[$this->fontkey]['script'];
267 $GSUBlangsys = $GPOSlangsys = $this->mpdf->overrideOTLsettings[$this->fontkey]['lang'];
269 ////////////////////////////////////////////////////////////////
271 if (!$GSUBscriptTag && !$GSUBlangsys && !$GPOSscriptTag && !$GPOSlangsys) {
272 // Remove ZWJ and ZWNJ
273 for ($i = 0; $i < count($this->OTLdata); $i++) {
274 if ($this->OTLdata[$i]['uni'] == 8204 || $this->OTLdata[$i]['uni'] == 8205) {
275 array_splice($this->OTLdata, $i, 1);
278 $this->schOTLdata[$sch] = $this->OTLdata;
279 $this->OTLdata = array();
280 continue;
283 // Don't use MYANMAR shaper unless using v2 scripttag
284 if ($this->shaper == 'M' && $GSUBscriptTag != 'mym2') {
285 $this->shaper = '';
288 $GSUBFeatures = (isset($this->mpdf->CurrentFont['GSUBFeatures'][$GSUBscriptTag][$GSUBlangsys]) ? $this->mpdf->CurrentFont['GSUBFeatures'][$GSUBscriptTag][$GSUBlangsys] : false);
289 $GPOSFeatures = (isset($this->mpdf->CurrentFont['GPOSFeatures'][$GPOSscriptTag][$GPOSlangsys]) ? $this->mpdf->CurrentFont['GPOSFeatures'][$GPOSscriptTag][$GPOSlangsys] : false);
291 $this->assocLigs = array(); // Ligatures[$posarr lpos] => nc
292 $this->assocMarks = array(); // assocMarks[$posarr mpos] => array(compID, ligPos)
294 if (!isset($this->GDEFdata[$this->fontkey]['GSUBGPOStables'])) {
295 $this->ttfOTLdata = $this->GDEFdata[$this->fontkey]['GSUBGPOStables'] = file_get_contents(_MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBGPOStables.dat', 'rb');
296 if (!$this->ttfOTLdata) {
297 throw new MpdfException('Can\'t open file ' . _MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBGPOStables.dat');
299 } else {
300 $this->ttfOTLdata = $this->GDEFdata[$this->fontkey]['GSUBGPOStables'];
304 if ($this->debugOTL) {
305 $this->_dumpproc('BEGIN', '-', '-', '-', '-', -1, '-', 0);
309 ////////////////////////////////////////////////////////////////
310 ////////////////////////////////////////////////////////////////
311 ///////// LINE BREAKING FOR KHMER, THAI + LAO /////////////////
312 ////////////////////////////////////////////////////////////////
313 ////////////////////////////////////////////////////////////////
314 // Insert U+200B at word boundaries using dictionaries
315 if ($this->mpdf->useDictionaryLBR && ($this->shaper == "K" || $this->shaper == "T" || $this->shaper == "L")) {
316 // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries
317 $this->SEAlineBreaking();
319 // Insert U+200B at word boundaries for Tibetan
320 else if ($this->mpdf->useTibetanLBR && $scriptblock == UCDN::SCRIPT_TIBETAN) {
321 // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries
322 $this->TibetanlineBreaking();
324 ////////////////////////////////////////////////////////////////
325 ////////////////////////////////////////////////////////////////
326 ////////// GSUB /////////////////////////////////
327 ////////////////////////////////////////////////////////////////
328 ////////////////////////////////////////////////////////////////
329 if (($useOTL & 0xFF) && $GSUBscriptTag && $GSUBlangsys && $GSUBFeatures) {
331 // 4. Load GSUB data, Coverage & Lookups
332 //=================================================================
334 $this->GSUBfont = $this->fontkey . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys;
336 if (!isset($this->GSUBdata[$this->GSUBfont])) {
337 if (file_exists(_MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys . '.php')) {
338 include_once(_MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys . '.php');
339 $this->GSUBdata[$this->GSUBfont]['rtlSUB'] = $rtlSUB;
340 $this->GSUBdata[$this->GSUBfont]['finals'] = $finals;
341 if ($this->shaper == 'I') {
342 $this->GSUBdata[$this->GSUBfont]['rphf'] = $rphf;
343 $this->GSUBdata[$this->GSUBfont]['half'] = $half;
344 $this->GSUBdata[$this->GSUBfont]['pref'] = $pref;
345 $this->GSUBdata[$this->GSUBfont]['blwf'] = $blwf;
346 $this->GSUBdata[$this->GSUBfont]['pstf'] = $pstf;
348 } else {
349 $this->GSUBdata[$this->GSUBfont] = array('rtlSUB' => array(), 'rphf' => array(), 'rphf' => array(),
350 'pref' => array(), 'blwf' => array(), 'pstf' => array(), 'finals' => ''
355 if (!isset($this->GSUBdata[$this->fontkey])) {
356 include(_MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBdata.php');
357 $this->GSLuCoverage = $this->GSUBdata[$this->fontkey]['GSLuCoverage'] = $GSLuCoverage;
358 } else {
359 $this->GSLuCoverage = $this->GSUBdata[$this->fontkey]['GSLuCoverage'];
362 $this->GSUBLookups = $this->mpdf->CurrentFont['GSUBLookups'];
365 // 5(A). GSUB - Shaper - ARABIC
366 //==============================
367 if ($this->shaper == 'A') {
368 //-----------------------------------------------------------------------------------
369 // a. Apply initial GSUB Lookups (in order specified in lookup list but only selecting from certain tags)
370 //-----------------------------------------------------------------------------------
371 $tags = 'locl ccmp';
372 $omittags = '';
373 $usetags = $tags;
374 if (!empty($this->mpdf->OTLtags)) {
375 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, true);
377 $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys);
379 //-----------------------------------------------------------------------------------
380 // b. Apply context-specific forms GSUB Lookups (initial, isolated, medial, final)
381 //-----------------------------------------------------------------------------------
382 // Arab and Syriac are the only scripts requiring the special joining - which takes the place of
383 // isol fina medi init rules in GSUB (+ fin2 fin3 med2 in Syriac syrc)
384 $tags = 'isol fina fin2 fin3 medi med2 init';
385 $omittags = '';
386 $usetags = $tags;
387 if (!empty($this->mpdf->OTLtags)) {
388 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, true);
391 $this->arabGlyphs = $this->GSUBdata[$this->GSUBfont]['rtlSUB'];
393 $gcms = explode("| ", $this->GlyphClassMarks);
394 $gcm = array();
395 foreach ($gcms AS $g) {
396 $gcm[hexdec($g)] = 1;
398 $this->arabTransparentJoin = $this->arabTransparent + $gcm;
399 $this->arabic_shaper($usetags, $GSUBscriptTag);
401 //-----------------------------------------------------------------------------------
402 // c. Set Kashida points (after joining occurred - medi, fina, init) but before other substitutions
403 //-----------------------------------------------------------------------------------
404 //if ($scriptblock == UCDN::SCRIPT_ARABIC ) {
405 for ($i = 0; $i < count($this->OTLdata); $i++) {
406 // Put the kashida marker on the character BEFORE which is inserted the kashida
407 // Kashida marker is inverse of priority i.e. Priority 1 => 7, Priority 7 => 1.
408 // Priority 1 User-inserted Kashida 0640 = Tatweel
409 // The user entered a Kashida in a position
410 // Position: Before the user-inserted kashida
411 if ($this->OTLdata[$i]['uni'] == 0x0640) {
412 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 8; // Put before the next character
415 // Priority 2 Seen (0633) FEB3, FEB4; Sad (0635) FEBB, FEBC
416 // Initial or medial form
417 // Connecting to the next character
418 // Position: After the character
419 else if ($this->OTLdata[$i]['uni'] == 0xFEB3 || $this->OTLdata[$i]['uni'] == 0xFEB4 || $this->OTLdata[$i]['uni'] == 0xFEBB || $this->OTLdata[$i]['uni'] == 0xFEBC) {
420 $checkpos = $i + 1;
421 while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) {
422 $checkpos++;
424 if (isset($this->OTLdata[$checkpos])) {
425 $this->OTLdata[$checkpos]['GPOSinfo']['kashida'] = 7; // Put after marks on next character
429 // Priority 3 Taa Marbutah (0629) FE94; Haa (062D) FEA2; Dal (062F) FEAA
430 // Final form
431 // Connecting to previous character
432 // Position: Before the character
433 else if ($this->OTLdata[$i]['uni'] == 0xFE94 || $this->OTLdata[$i]['uni'] == 0xFEA2 || $this->OTLdata[$i]['uni'] == 0xFEAA) {
434 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 6;
437 // Priority 4 Alef (0627) FE8E; Tah (0637) FEC2; Lam (0644) FEDE; Kaf (0643) FEDA; Gaf (06AF) FB93
438 // Final form
439 // Connecting to previous character
440 // Position: Before the character
441 else if ($this->OTLdata[$i]['uni'] == 0xFE8E || $this->OTLdata[$i]['uni'] == 0xFEC2 || $this->OTLdata[$i]['uni'] == 0xFEDE || $this->OTLdata[$i]['uni'] == 0xFEDA || $this->OTLdata[$i]['uni'] == 0xFB93) {
442 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 5;
445 // Priority 5 RA (0631) FEAE; Ya (064A) FEF2 FEF4; Alef Maqsurah (0649) FEF0 FBE9
446 // Final or Medial form
447 // Connected to preceding medial BAA (0628) = FE92
448 // Position: Before preceding medial Baa
449 // Although not mentioned in spec, added Farsi Yeh (06CC) FBFD FBFF; equivalent to 064A or 0649
450 else if ($this->OTLdata[$i]['uni'] == 0xFEAE || $this->OTLdata[$i]['uni'] == 0xFEF2 || $this->OTLdata[$i]['uni'] == 0xFEF0 || $this->OTLdata[$i]['uni'] == 0xFEF4 || $this->OTLdata[$i]['uni'] == 0xFBE9 || $this->OTLdata[$i]['uni'] == 0xFBFD || $this->OTLdata[$i]['uni'] == 0xFBFF
452 $checkpos = $i - 1;
453 while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) {
454 $checkpos--;
456 if (isset($this->OTLdata[$checkpos]) && $this->OTLdata[$checkpos]['uni'] == 0xFE92) {
457 $this->OTLdata[$checkpos]['GPOSinfo']['kashida'] = 4; // ******* Before preceding BAA
461 // Priority 6 WAW (0648) FEEE; Ain (0639) FECA; Qaf (0642) FED6; Fa (0641) FED2
462 // Final form
463 // Connecting to previous character
464 // Position: Before the character
465 else if ($this->OTLdata[$i]['uni'] == 0xFEEE || $this->OTLdata[$i]['uni'] == 0xFECA || $this->OTLdata[$i]['uni'] == 0xFED6 || $this->OTLdata[$i]['uni'] == 0xFED2) {
466 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 3;
469 // Priority 7 Other connecting characters
470 // Final form
471 // Connecting to previous character
472 // Position: Before the character
473 /* This isn't in the spec, but using MS WORD as a basis, give a lower priority to the 3 characters already checked
474 in (5) above. Test case:
475 &#x62e;&#x652;&#x631;&#x64e;&#x649;&#x670;
476 &#x641;&#x64e;&#x62a;&#x64f;&#x630;&#x64e;&#x643;&#x651;&#x650;&#x631;
479 if (!isset($this->OTLdata[$i]['GPOSinfo']['kashida'])) {
480 if (strpos($this->GSUBdata[$this->GSUBfont]['finals'], $this->OTLdata[$i]['hex']) !== false) { // ANY OTHER FINAL FORM
481 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 2;
482 } else if (strpos('0FEAE 0FEF0 0FEF2', $this->OTLdata[$i]['hex']) !== false) { // not already included in 5 above
483 $this->OTLdata[$i]['GPOSinfo']['kashida'] = 1;
488 //-----------------------------------------------------------------------------------
489 // d. Apply Presentation Forms GSUB Lookups (+ any discretionary) - Apply one at a time in Feature order
490 //-----------------------------------------------------------------------------------
491 $tags = 'rlig calt liga clig mset';
493 $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo';
494 $usetags = $tags;
495 if (!empty($this->mpdf->OTLtags)) {
496 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false);
499 $ts = explode(' ', $usetags);
500 foreach ($ts AS $ut) { // - Apply one at a time in Feature order
501 $this->_applyGSUBrules($ut, $GSUBscriptTag, $GSUBlangsys);
503 //-----------------------------------------------------------------------------------
504 // e. NOT IN SPEC
505 // If space precedes a mark -> substitute a &nbsp; before the Mark, to prevent line breaking Test:
506 //-----------------------------------------------------------------------------------
507 for ($ptr = 1; $ptr < count($this->OTLdata); $ptr++) {
508 if ($this->OTLdata[$ptr]['general_category'] == UCDN::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK && $this->OTLdata[$ptr - 1]['uni'] == 32) {
509 $this->OTLdata[$ptr - 1]['uni'] = 0xa0;
510 $this->OTLdata[$ptr - 1]['hex'] = '000A0';
515 // 5(I). GSUB - Shaper - INDIC and SINHALA and KHMER
516 //===================================
517 else if ($this->shaper == 'I' || $this->shaper == 'K' || $this->shaper == 'S') {
518 $this->restrictToSyllable = true;
519 //-----------------------------------------------------------------------------------
520 // a. First decompose/compose split mattras
521 // (normalize) ??????? Nukta/Halant order etc ??????????????????????????????????????????????????????????????????????????
522 //-----------------------------------------------------------------------------------
523 for ($ptr = 0; $ptr < count($this->OTLdata); $ptr++) {
524 $char = $this->OTLdata[$ptr]['uni'];
525 $sub = INDIC::decompose_indic($char);
526 if ($sub) {
527 $newinfo = array();
528 for ($i = 0; $i < count($sub); $i++) {
529 $newinfo[$i] = array();
530 $ucd_record = UCDN::get_ucd_record($sub[$i]);
531 $newinfo[$i]['general_category'] = $ucd_record[0];
532 $newinfo[$i]['bidi_type'] = $ucd_record[2];
533 $charasstr = $this->unicode_hex($sub[$i]);
534 if (strpos($this->GlyphClassMarks, $charasstr) !== false) {
535 $newinfo[$i]['group'] = 'M';
536 } else {
537 $newinfo[$i]['group'] = 'C';
539 $newinfo[$i]['uni'] = $sub[$i];
540 $newinfo[$i]['hex'] = $charasstr;
542 array_splice($this->OTLdata, $ptr, 1, $newinfo);
543 $ptr += count($sub) - 1;
545 /* Only Composition-exclusion exceptions that we want to recompose. */
546 if ($this->shaper == 'I') {
547 if ($char == 0x09AF && isset($this->OTLdata[$ptr + 1]) && $this->OTLdata[$ptr + 1]['uni'] == 0x09BC) {
548 $sub = 0x09DF;
549 $newinfo = array();
550 $newinfo[0] = array();
551 $ucd_record = UCDN::get_ucd_record($sub);
552 $newinfo[0]['general_category'] = $ucd_record[0];
553 $newinfo[0]['bidi_type'] = $ucd_record[2];
554 $newinfo[0]['group'] = 'C';
555 $newinfo[0]['uni'] = $sub;
556 $newinfo[0]['hex'] = $this->unicode_hex($sub);
557 array_splice($this->OTLdata, $ptr, 2, $newinfo);
561 //-----------------------------------------------------------------------------------
562 // b. Analyse characters - group as syllables/clusters (Indic); invalid diacritics; add dotted circle
563 //-----------------------------------------------------------------------------------
564 $indic_category_string = '';
565 foreach ($this->OTLdata AS $eid => $c) {
566 INDIC::set_indic_properties($this->OTLdata[$eid], $scriptblock); // sets ['indic_category'] and ['indic_position']
567 //$c['general_category']
568 //$c['combining_class']
569 //$c['uni'] = $char;
571 $indic_category_string .= INDIC::$indic_category_char[$this->OTLdata[$eid]['indic_category']];
574 $broken_syllables = false;
575 if ($this->shaper == 'I') {
576 INDIC::set_syllables($this->OTLdata, $indic_category_string, $broken_syllables);
577 } else if ($this->shaper == 'S') {
578 INDIC::set_syllables_sinhala($this->OTLdata, $indic_category_string, $broken_syllables);
579 } else if ($this->shaper == 'K') {
580 INDIC::set_syllables_khmer($this->OTLdata, $indic_category_string, $broken_syllables);
582 $indic_category_string = '';
584 //-----------------------------------------------------------------------------------
585 // c. Initial Re-ordering (Indic / Khmer / Sinhala)
586 //-----------------------------------------------------------------------------------
587 // Find base consonant
588 // Decompose/compose and reorder Matras
589 // Reorder marks to canonical order
591 $indic_config = INDIC::$indic_configs[$scriptblock];
592 $dottedcircle = false;
593 if ($broken_syllables) {
594 if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25CC)) {
595 $dottedcircle = array();
596 $ucd_record = UCDN::get_ucd_record(0x25CC);
597 $dottedcircle[0]['general_category'] = $ucd_record[0];
598 $dottedcircle[0]['bidi_type'] = $ucd_record[2];
599 $dottedcircle[0]['group'] = 'C';
600 $dottedcircle[0]['uni'] = 0x25CC;
601 $dottedcircle[0]['indic_category'] = INDIC::OT_DOTTEDCIRCLE;
602 $dottedcircle[0]['indic_position'] = INDIC::POS_BASE_C;
604 $dottedcircle[0]['hex'] = '025CC'; // TEMPORARY *****
607 INDIC::initial_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle);
609 //-----------------------------------------------------------------------------------
610 // d. Apply initial and basic shaping forms GSUB Lookups (one at a time)
611 //-----------------------------------------------------------------------------------
612 if ($this->shaper == 'I' || $this->shaper == 'S') {
613 $tags = 'locl ccmp nukt akhn rphf rkrf pref blwf half pstf vatu cjct';
614 } else if ($this->shaper == 'K') {
615 $tags = 'locl ccmp pref blwf abvf pstf cfar';
617 $this->_applyGSUBrulesIndic($tags, $GSUBscriptTag, $GSUBlangsys, $is_old_spec);
619 //-----------------------------------------------------------------------------------
620 // e. Final Re-ordering (Indic / Khmer / Sinhala)
621 //-----------------------------------------------------------------------------------
622 // Reorder matras
623 // Reorder reph
624 // Reorder pre-base reordering consonants:
626 INDIC::final_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $indic_config, $scriptblock, $is_old_spec);
628 //-----------------------------------------------------------------------------------
629 // f. Apply 'init' feature to first syllable in word (indicated by ['mask']) INDIC::FLAG(INDIC::INIT);
630 //-----------------------------------------------------------------------------------
631 if ($this->shaper == 'I' || $this->shaper == 'S') {
632 $tags = 'init';
633 $this->_applyGSUBrulesIndic($tags, $GSUBscriptTag, $GSUBlangsys, $is_old_spec);
636 //-----------------------------------------------------------------------------------
637 // g. Apply Presentation Forms GSUB Lookups (+ any discretionary)
638 //-----------------------------------------------------------------------------------
639 $tags = 'pres abvs blws psts haln rlig calt liga clig mset';
641 $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo';
642 $usetags = $tags;
643 if (!empty($this->mpdf->OTLtags)) {
644 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false);
646 if ($this->shaper == 'K') { // Features are applied one at a time, working through each codepoint
647 $this->_applyGSUBrulesSingly($usetags, $GSUBscriptTag, $GSUBlangsys);
648 } else {
649 $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys);
651 $this->restrictToSyllable = false;
655 // 5(M). GSUB - Shaper - MYANMAR (ONLY mym2)
656 //==============================
657 // NB Old style 'mymr' is left to go through the default shaper
658 else if ($this->shaper == 'M') {
659 $this->restrictToSyllable = true;
660 //-----------------------------------------------------------------------------------
661 // a. Analyse characters - group as syllables/clusters (Myanmar); invalid diacritics; add dotted circle
662 //-----------------------------------------------------------------------------------
663 $myanmar_category_string = '';
664 foreach ($this->OTLdata AS $eid => $c) {
665 MYANMAR::set_myanmar_properties($this->OTLdata[$eid]); // sets ['myanmar_category'] and ['myanmar_position']
666 $myanmar_category_string .= MYANMAR::$myanmar_category_char[$this->OTLdata[$eid]['myanmar_category']];
668 $broken_syllables = false;
669 MYANMAR::set_syllables($this->OTLdata, $myanmar_category_string, $broken_syllables);
670 $myanmar_category_string = '';
672 //-----------------------------------------------------------------------------------
673 // b. Re-ordering (Myanmar mym2)
674 //-----------------------------------------------------------------------------------
675 $dottedcircle = false;
676 if ($broken_syllables) {
677 if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25CC)) {
678 $dottedcircle = array();
679 $ucd_record = UCDN::get_ucd_record(0x25CC);
680 $dottedcircle[0]['general_category'] = $ucd_record[0];
681 $dottedcircle[0]['bidi_type'] = $ucd_record[2];
682 $dottedcircle[0]['group'] = 'C';
683 $dottedcircle[0]['uni'] = 0x25CC;
684 $dottedcircle[0]['myanmar_category'] = MYANMAR::OT_DOTTEDCIRCLE;
685 $dottedcircle[0]['myanmar_position'] = MYANMAR::POS_BASE_C;
686 $dottedcircle[0]['hex'] = '025CC';
689 MYANMAR::reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $dottedcircle);
691 //-----------------------------------------------------------------------------------
692 // c. Apply initial and basic shaping forms GSUB Lookups (one at a time)
693 //-----------------------------------------------------------------------------------
695 $tags = 'locl ccmp rphf pref blwf pstf';
696 $this->_applyGSUBrulesMyanmar($tags, $GSUBscriptTag, $GSUBlangsys);
698 //-----------------------------------------------------------------------------------
699 // d. Apply Presentation Forms GSUB Lookups (+ any discretionary)
700 //-----------------------------------------------------------------------------------
701 $tags = 'pres abvs blws psts haln rlig calt liga clig mset';
702 $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo';
703 $usetags = $tags;
704 if (!empty($this->mpdf->OTLtags)) {
705 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false);
707 $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys);
708 $this->restrictToSyllable = false;
712 // 5(E). GSUB - Shaper - SEA South East Asian (New Tai Lue, Cham, Tai Tam)
713 //==============================
714 else if ($this->shaper == 'E') {
715 /* HarfBuzz says: If the designer designed the font for the 'DFLT' script,
716 * use the default shaper. Otherwise, use the SEA shaper.
717 * Note that for some simple scripts, there may not be *any*
718 * GSUB/GPOS needed, so there may be no scripts found! */
720 $this->restrictToSyllable = true;
721 //-----------------------------------------------------------------------------------
722 // a. Analyse characters - group as syllables/clusters (Indic); invalid diacritics; add dotted circle
723 //-----------------------------------------------------------------------------------
724 $sea_category_string = '';
725 foreach ($this->OTLdata AS $eid => $c) {
726 SEA::set_sea_properties($this->OTLdata[$eid], $scriptblock); // sets ['sea_category'] and ['sea_position']
727 //$c['general_category']
728 //$c['combining_class']
729 //$c['uni'] = $char;
731 $sea_category_string .= SEA::$sea_category_char[$this->OTLdata[$eid]['sea_category']];
734 $broken_syllables = false;
735 SEA::set_syllables($this->OTLdata, $sea_category_string, $broken_syllables);
736 $sea_category_string = '';
738 //-----------------------------------------------------------------------------------
739 // b. Apply locl and ccmp shaping forms - before initial re-ordering; GSUB Lookups (one at a time)
740 //-----------------------------------------------------------------------------------
741 $tags = 'locl ccmp';
742 $this->_applyGSUBrulesSingly($tags, $GSUBscriptTag, $GSUBlangsys);
744 //-----------------------------------------------------------------------------------
745 // c. Initial Re-ordering
746 //-----------------------------------------------------------------------------------
747 // Find base consonant
748 // Decompose/compose and reorder Matras
749 // Reorder marks to canonical order
751 $dottedcircle = false;
752 if ($broken_syllables) {
753 if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25CC)) {
754 $dottedcircle = array();
755 $ucd_record = UCDN::get_ucd_record(0x25CC);
756 $dottedcircle[0]['general_category'] = $ucd_record[0];
757 $dottedcircle[0]['bidi_type'] = $ucd_record[2];
758 $dottedcircle[0]['group'] = 'C';
759 $dottedcircle[0]['uni'] = 0x25CC;
760 $dottedcircle[0]['sea_category'] = SEA::OT_GB;
761 $dottedcircle[0]['sea_position'] = SEA::POS_BASE_C;
763 $dottedcircle[0]['hex'] = '025CC'; // TEMPORARY *****
766 SEA::initial_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $scriptblock, $dottedcircle);
768 //-----------------------------------------------------------------------------------
769 // d. Apply basic shaping forms GSUB Lookups (one at a time)
770 //-----------------------------------------------------------------------------------
771 $tags = 'pref abvf blwf pstf';
772 $this->_applyGSUBrulesSingly($tags, $GSUBscriptTag, $GSUBlangsys);
774 //-----------------------------------------------------------------------------------
775 // e. Final Re-ordering
776 //-----------------------------------------------------------------------------------
778 SEA::final_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $scriptblock);
780 //-----------------------------------------------------------------------------------
781 // f. Apply Presentation Forms GSUB Lookups (+ any discretionary)
782 //-----------------------------------------------------------------------------------
783 $tags = 'pres abvs blws psts';
785 $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo';
786 $usetags = $tags;
787 if (!empty($this->mpdf->OTLtags)) {
788 $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false);
790 $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys);
791 $this->restrictToSyllable = false;
795 // 5(D). GSUB - Shaper - DEFAULT (including THAI and LAO and MYANMAR v1 [mymr] and TIBETAN)
796 //==============================
797 else { // DEFAULT
798 //-----------------------------------------------------------------------------------
799 // a. First decompose/compose in Thai / Lao - Tibetan
800 //-----------------------------------------------------------------------------------
801 // Decomposition for THAI or LAO
802 /* This function implements the shaping logic documented here:
804 * http://linux.thai.net/~thep/th-otf/shaping.html
806 * The first shaping rule listed there is needed even if the font has Thai
807 * OpenType tables.
810 * The following is NOT specified in the MS OT Thai spec, however, it seems
811 * to be what Uniscribe and other engines implement. According to Eric Muller:
813 * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
814 * NIKHAHIT backwards over any tone mark (0E48-0E4B).
816 * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
818 * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
819 * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
820 * not what a user wanted, but the rendering is nevertheless nikhahit above
821 * chattawa.
823 * Same for Lao.
825 * Thai Lao
826 * SARA AM: U+0E33 U+0EB3
827 * SARA AA: U+0E32 U+0EB2
828 * Nikhahit: U+0E4D U+0ECD
830 * Testing shows that Uniscribe reorder the following marks:
831 * Thai: <0E31,0E34..0E37,0E47..0E4E>
832 * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE>
834 * Lao versions are the same as Thai + 0x80.
836 if ($this->shaper == 'T' || $this->shaper == 'L') {
837 for ($ptr = 0; $ptr < count($this->OTLdata); $ptr++) {
838 $char = $this->OTLdata[$ptr]['uni'];
839 if (($char & ~0x0080) == 0x0E33) { // if SARA_AM (U+0E33 or U+0EB3)
840 $NIKHAHIT = $char + 0x1A;
841 $SARA_AA = $char - 1;
842 $sub = array($SARA_AA, $NIKHAHIT);
844 $newinfo = array();
845 $ucd_record = UCDN::get_ucd_record($sub[0]);
846 $newinfo[0]['general_category'] = $ucd_record[0];
847 $newinfo[0]['bidi_type'] = $ucd_record[2];
848 $charasstr = $this->unicode_hex($sub[0]);
849 if (strpos($this->GlyphClassMarks, $charasstr) !== false) {
850 $newinfo[0]['group'] = 'M';
851 } else {
852 $newinfo[0]['group'] = 'C';
854 $newinfo[0]['uni'] = $sub[0];
855 $newinfo[0]['hex'] = $charasstr;
856 $this->OTLdata[$ptr] = $newinfo[0]; // Substitute SARA_AM => SARA_AA
858 $ntones = 0; // number of (preceding) tone marks
859 // IS_TONE_MARK ((x) & ~0x0080, 0x0E34 - 0x0E37, 0x0E47 - 0x0E4E, 0x0E31)
860 while (isset($this->OTLdata[$ptr - 1 - $ntones]) && (
861 ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x0080) == 0x0E31 ||
862 (($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x0080) >= 0x0E34 &&
863 ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x0080) <= 0x0E37) ||
864 (($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x0080) >= 0x0E47 &&
865 ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x0080) <= 0x0E4E)
868 $ntones++;
871 $newinfo = array();
872 $ucd_record = UCDN::get_ucd_record($sub[1]);
873 $newinfo[0]['general_category'] = $ucd_record[0];
874 $newinfo[0]['bidi_type'] = $ucd_record[2];
875 $charasstr = $this->unicode_hex($sub[1]);
876 if (strpos($this->GlyphClassMarks, $charasstr) !== false) {
877 $newinfo[0]['group'] = 'M';
878 } else {
879 $newinfo[0]['group'] = 'C';
881 $newinfo[0]['uni'] = $sub[1];
882 $newinfo[0]['hex'] = $charasstr;
883 // Insert NIKAHIT
884 array_splice($this->OTLdata, $ptr - $ntones, 0, $newinfo);
886 $ptr++;
891 if ($scriptblock == UCDN::SCRIPT_TIBETAN) {
892 // =========================
893 // Reordering TIBETAN
894 // =========================
895 // Tibetan does not need to need a shaper generally, as long as characters are presented in the correct order
896 // so we will do one minor change here:
897 // From ICU: If the present character is a number, and the next character is a pre-number combining mark
898 // then the two characters are reordered
899 // From MS OTL spec the following are Digit modifiers (Md): 0F18–0F19, 0F3E–0F3F
900 // Digits: 0F20–0F33
901 // On testing only 0x0F3F (pre-based mark) seems to need re-ordering
902 for ($ptr = 0; $ptr < count($this->OTLdata) - 1; $ptr++) {
903 if (INDIC::in_range($this->OTLdata[$ptr]['uni'], 0x0F20, 0x0F33) && $this->OTLdata[$ptr + 1]['uni'] == 0x0F3F) {
904 $tmp = $this->OTLdata[$ptr + 1];
905 $this->OTLdata[$ptr + 1] = $this->OTLdata[$ptr];
906 $this->OTLdata[$ptr] = $tmp;
911 // =========================
912 // Decomposition for TIBETAN
913 // =========================
914 /* Recommended, but does not seem to change anything...
915 for($ptr=0; $ptr<count($this->OTLdata); $ptr++) {
916 $char = $this->OTLdata[$ptr]['uni'];
917 $sub = INDIC::decompose_indic($char);
918 if ($sub) {
919 $newinfo = array();
920 for($i=0;$i<count($sub);$i++) {
921 $newinfo[$i] = array();
922 $ucd_record = UCDN::get_ucd_record($sub[$i]);
923 $newinfo[$i]['general_category'] = $ucd_record[0];
924 $newinfo[$i]['bidi_type'] = $ucd_record[2];
925 $charasstr = $this->unicode_hex($sub[$i]);
926 if (strpos($this->GlyphClassMarks, $charasstr)!==false) { $newinfo[$i]['group'] = 'M'; }
927 else { $newinfo[$i]['group'] = 'C'; }
928 $newinfo[$i]['uni'] = $sub[$i];
929 $newinfo[$i]['hex'] = $charasstr;
931 array_splice($this->OTLdata, $ptr, 1, $newinfo);
932 $ptr += count($sub)-1;
939 //-----------------------------------------------------------------------------------
940 // b. Apply all GSUB Lookups (in order specified in lookup list)
941 //-----------------------------------------------------------------------------------
942 $tags = 'locl ccmp pref blwf abvf pstf pres abvs blws psts haln rlig calt liga clig mset RQD';
943 // pref blwf abvf pstf required for Tibetan
944 // " RQD" is a non-standard tag in Garuda font - presumably intended to be used by default ? "ReQuireD"
945 // Being a 3 letter tag is non-standard, and does not allow it to be set by font-feature-settings
948 /* ?Add these until shapers witten?
949 Hangul: ljmo vjmo tjmo
952 $omittags = '';
953 $useGSUBtags = $tags;
954 if (!empty($this->mpdf->OTLtags)) {
955 $useGSUBtags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false);
957 // APPLY GSUB rules (as long as not Latin + SmallCaps - but not OTL smcp)
958 if (!(($this->mpdf->textvar & FC_SMALLCAPS) && $scriptblock == UCDN::SCRIPT_LATIN && strpos($useGSUBtags, 'smcp') === false)) {
959 $this->_applyGSUBrules($useGSUBtags, $GSUBscriptTag, $GSUBlangsys);
964 // Shapers - KHMER & THAI & LAO - Replace Word boundary marker with U+200B
965 // Also TIBETAN (no shaper)
966 //=======================================================
967 if (($this->shaper == "K" || $this->shaper == "T" || $this->shaper == "L") || $scriptblock == UCDN::SCRIPT_TIBETAN) {
968 // Set up properties to insert a U+200B character
969 $newinfo = array();
970 //$newinfo[0] = array('general_category' => 1, 'bidi_type' => 14, 'group' => 'S', 'uni' => 0x200B, 'hex' => '0200B');
971 $newinfo[0] = array(
972 'general_category' => UCDN::UNICODE_GENERAL_CATEGORY_FORMAT,
973 'bidi_type' => UCDN::BIDI_CLASS_BN,
974 'group' => 'S', 'uni' => 0x200B, 'hex' => '0200B');
975 // Then insert U+200B at (after) all word end boundaries
976 for ($i = count($this->OTLdata) - 1; $i > 0; $i--) {
977 // Make sure after GSUB that wordend has not been moved - check next char is not in the same syllable
978 if (isset($this->OTLdata[$i]['wordend']) && $this->OTLdata[$i]['wordend'] &&
979 isset($this->OTLdata[$i + 1]['uni']) && (!isset($this->OTLdata[$i + 1]['syllable']) || !isset($this->OTLdata[$i + 1]['syllable']) || $this->OTLdata[$i + 1]['syllable'] != $this->OTLdata[$i]['syllable'])) {
980 array_splice($this->OTLdata, $i + 1, 0, $newinfo);
981 $this->_updateLigatureMarks($i, 1);
982 } else if ($this->OTLdata[$i]['uni'] == 0x2e) { // Word end if Full-stop.
983 array_splice($this->OTLdata, $i + 1, 0, $newinfo);
984 $this->_updateLigatureMarks($i, 1);
990 // Shapers - INDIC & ARABIC & KHMER & SINHALA & MYANMAR - Remove ZWJ and ZWNJ
991 //=======================================================
992 if ($this->shaper == 'I' || $this->shaper == 'S' || $this->shaper == 'A' || $this->shaper == 'K' || $this->shaper == 'M') {
993 // Remove ZWJ and ZWNJ
994 for ($i = 0; $i < count($this->OTLdata); $i++) {
995 if ($this->OTLdata[$i]['uni'] == 8204 || $this->OTLdata[$i]['uni'] == 8205) {
996 array_splice($this->OTLdata, $i, 1);
997 $this->_updateLigatureMarks($i, -1);
1002 //print_r($this->OTLdata); echo '<br />';
1003 //print_r($this->assocMarks); echo '<br />';
1004 //print_r($this->assocLigs); exit;
1005 ////////////////////////////////////////////////////////////////
1006 ////////////////////////////////////////////////////////////////
1007 ////////// GPOS /////////////////////////////////
1008 ////////////////////////////////////////////////////////////////
1009 ////////////////////////////////////////////////////////////////
1011 if (($useOTL & 0xFF) && $GPOSscriptTag && $GPOSlangsys && $GPOSFeatures) {
1012 $this->Entry = array();
1013 $this->Exit = array();
1015 // 6. Load GPOS data, Coverage & Lookups
1016 //=================================================================
1017 if (!isset($this->GPOSdata[$this->fontkey])) {
1018 include(_MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GPOSdata.php');
1019 $this->LuCoverage = $this->GPOSdata[$this->fontkey]['LuCoverage'] = $LuCoverage;
1020 } else {
1021 $this->LuCoverage = $this->GPOSdata[$this->fontkey]['LuCoverage'];
1024 $this->GPOSLookups = $this->mpdf->CurrentFont['GPOSLookups'];
1027 // 7. Select Feature tags to use (incl optional)
1028 //==============================
1029 $tags = 'abvm blwm mark mkmk curs cpsp dist requ'; // Default set
1030 /* 'requ' is not listed in the Microsoft registry of Feature tags
1031 Found in Arial Unicode MS, it repositions the baseline for punctuation in Kannada script */
1033 // ZZZ96
1034 // Set kern to be included by default in non-Latin script (? just when shapers used)
1035 // Kern is used in some fonts to reposition marks etc. and is essential for correct display
1036 //if ($this->shaper) {$tags .= ' kern'; }
1037 if ($scriptblock != UCDN::SCRIPT_LATIN) {
1038 $tags .= ' kern';
1041 $omittags = '';
1042 $usetags = $tags;
1043 if (!empty($this->mpdf->OTLtags)) {
1044 $usetags = $this->_applyTagSettings($tags, $GPOSFeatures, $omittags, false);
1049 // 8. Get GPOS LookupList from Feature tags
1050 //==============================
1051 $LookupList = array();
1052 foreach ($GPOSFeatures AS $tag => $arr) {
1053 if (strpos($usetags, $tag) !== false) {
1054 foreach ($arr AS $lu) {
1055 $LookupList[$lu] = $tag;
1059 ksort($LookupList);
1062 // 9. Apply GPOS Lookups (in order specified in lookup list but selecting from specified tags)
1063 //==============================
1064 // APPLY THE GPOS RULES (as long as not Latin + SmallCaps - but not OTL smcp)
1065 if (!(($this->mpdf->textvar & FC_SMALLCAPS) && $scriptblock == UCDN::SCRIPT_LATIN && strpos($useGSUBtags, 'smcp') === false)) {
1066 $this->_applyGPOSrules($LookupList, $is_old_spec);
1067 // (sets: $this->OTLdata[n]['GPOSinfo'] XPlacement YPlacement XAdvance Entry Exit )
1070 // 10. Process cursive text
1071 //==============================
1072 if (count($this->Entry) || count($this->Exit)) {
1073 // RTL
1074 $incurs = false;
1075 for ($i = (count($this->OTLdata) - 1); $i >= 0; $i--) {
1076 if (isset($this->Entry[$i]) && isset($this->Entry[$i]['Y']) && $this->Entry[$i]['dir'] == 'RTL') {
1077 $nextbase = $i - 1; // Set as next base ignoring marks (next base reading RTL in logical oder
1078 while (isset($this->OTLdata[$nextbase]['hex']) && strpos($this->GlyphClassMarks, $this->OTLdata[$nextbase]['hex']) !== false) {
1079 $nextbase--;
1081 if (isset($this->Exit[$nextbase]) && isset($this->Exit[$nextbase]['Y'])) {
1082 $diff = $this->Entry[$i]['Y'] - $this->Exit[$nextbase]['Y'];
1083 if ($incurs === false) {
1084 $incurs = $diff;
1085 } else {
1086 $incurs += $diff;
1088 for ($j = ($i - 1); $j >= $nextbase; $j--) {
1089 if (isset($this->OTLdata[$j]['GPOSinfo']['YPlacement'])) {
1090 $this->OTLdata[$j]['GPOSinfo']['YPlacement'] += $incurs;
1091 } else {
1092 $this->OTLdata[$j]['GPOSinfo']['YPlacement'] = $incurs;
1095 if (isset($this->Exit[$i]['X']) && isset($this->Entry[$nextbase]['X'])) {
1096 $adj = -($this->Entry[$i]['X'] - $this->Exit[$nextbase]['X']);
1097 // If XAdvance is aplied - in order for PDF to position the Advance correctly need to place it on:
1098 // in RTL - the current glyph or the last of any associated marks
1099 if (isset($this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'])) {
1100 $this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'] += $adj;
1101 } else {
1102 $this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'] = $adj;
1105 } else {
1106 $incurs = false;
1108 } else if (strpos($this->GlyphClassMarks, $this->OTLdata[$i]['hex']) !== false) {
1109 continue;
1110 } // ignore Marks
1111 else {
1112 $incurs = false;
1115 // LTR
1116 $incurs = false;
1117 for ($i = 0; $i < count($this->OTLdata); $i++) {
1118 if (isset($this->Exit[$i]) && isset($this->Exit[$i]['Y']) && $this->Exit[$i]['dir'] == 'LTR') {
1119 $nextbase = $i + 1; // Set as next base ignoring marks
1120 while (strpos($this->GlyphClassMarks, $this->OTLdata[$nextbase]['hex']) !== false) {
1121 $nextbase++;
1123 if (isset($this->Entry[$nextbase]) && isset($this->Entry[$nextbase]['Y'])) {
1125 $diff = $this->Exit[$i]['Y'] - $this->Entry[$nextbase]['Y'];
1126 if ($incurs === false) {
1127 $incurs = $diff;
1128 } else {
1129 $incurs += $diff;
1131 for ($j = ($i + 1); $j <= $nextbase; $j++) {
1132 if (isset($this->OTLdata[$j]['GPOSinfo']['YPlacement'])) {
1133 $this->OTLdata[$j]['GPOSinfo']['YPlacement'] += $incurs;
1134 } else {
1135 $this->OTLdata[$j]['GPOSinfo']['YPlacement'] = $incurs;
1138 if (isset($this->Exit[$i]['X']) && isset($this->Entry[$nextbase]['X'])) {
1139 $adj = -($this->Exit[$i]['X'] - $this->Entry[$nextbase]['X']);
1140 // If XAdvance is aplied - in order for PDF to position the Advance correctly need to place it on:
1141 // in LTR - the next glyph, ignoring marks
1142 if (isset($this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'])) {
1143 $this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'] += $adj;
1144 } else {
1145 $this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'] = $adj;
1148 } else {
1149 $incurs = false;
1151 } else if (strpos($this->GlyphClassMarks, $this->OTLdata[$i]['hex']) !== false) {
1152 continue;
1153 } // ignore Marks
1154 else {
1155 $incurs = false;
1159 } // end GPOS
1161 if ($this->debugOTL) {
1162 $this->_dumpproc('END', '-', '-', '-', '-', 0, '-', 0);
1163 exit;
1166 $this->schOTLdata[$sch] = $this->OTLdata;
1167 $this->OTLdata = array();
1168 } // END foreach subchunk
1169 // 11. Re-assemble and return text string
1170 //==============================
1171 $newGPOSinfo = array();
1172 $newOTLdata = array();
1173 $newchar_data = array();
1174 $newgroup = '';
1175 $e = '';
1176 $ectr = 0;
1178 for ($sch = 0; $sch <= $subchunk; $sch++) {
1179 for ($i = 0; $i < count($this->schOTLdata[$sch]); $i++) {
1180 if (isset($this->schOTLdata[$sch][$i]['GPOSinfo'])) {
1181 $newGPOSinfo[$ectr] = $this->schOTLdata[$sch][$i]['GPOSinfo'];
1183 $newchar_data[$ectr] = array('bidi_class' => $this->schOTLdata[$sch][$i]['bidi_type'], 'uni' => $this->schOTLdata[$sch][$i]['uni']);
1184 $newgroup .= $this->schOTLdata[$sch][$i]['group'];
1185 $e.=code2utf($this->schOTLdata[$sch][$i]['uni']);
1186 if (isset($this->mpdf->CurrentFont['subset'])) {
1187 $this->mpdf->CurrentFont['subset'][$this->schOTLdata[$sch][$i]['uni']] = $this->schOTLdata[$sch][$i]['uni'];
1189 $ectr++;
1192 $this->OTLdata['GPOSinfo'] = $newGPOSinfo;
1193 $this->OTLdata['char_data'] = $newchar_data;
1194 $this->OTLdata['group'] = $newgroup;
1197 // This leaves OTLdata::GPOSinfo, ::bidi_type, & ::group
1199 return $e;
1202 function _applyTagSettings($tags, $Features, $omittags = '', $onlytags = false)
1204 if (empty($this->mpdf->OTLtags['Plus']) && empty($this->mpdf->OTLtags['Minus']) && empty($this->mpdf->OTLtags['FFPlus']) && empty($this->mpdf->OTLtags['FFMinus'])) {
1205 return $tags;
1208 // Use $tags as starting point
1209 $usetags = $tags;
1211 // Only set / unset tags which are in the font
1212 // Ignore tags which are in $omittags
1213 // If $onlytags, then just unset tags which are already in the Tag list
1215 $fp = $fm = $ffp = $ffm = '';
1217 // Font features to enable - set by font-variant-xx
1218 if (isset($this->mpdf->OTLtags['Plus']))
1219 $fp = $this->mpdf->OTLtags['Plus'];
1220 preg_match_all('/([a-zA-Z0-9]{4})/', $fp, $m);
1221 for ($i = 0; $i < count($m[0]); $i++) {
1222 $t = $m[1][$i];
1223 // Is it a valid tag?
1224 if (isset($Features[$t]) && strpos($omittags, $t) === false && (!$onlytags || strpos($tags, $t) !== false )) {
1225 $usetags .= ' ' . $t;
1229 // Font features to disable - set by font-variant-xx
1230 if (isset($this->mpdf->OTLtags['Minus']))
1231 $fm = $this->mpdf->OTLtags['Minus'];
1232 preg_match_all('/([a-zA-Z0-9]{4})/', $fm, $m);
1233 for ($i = 0; $i < count($m[0]); $i++) {
1234 $t = $m[1][$i];
1235 // Is it a valid tag?
1236 if (isset($Features[$t]) && strpos($omittags, $t) === false && (!$onlytags || strpos($tags, $t) !== false )) {
1237 $usetags = str_replace($t, '', $usetags);
1241 // Font features to enable - set by font-feature-settings
1242 if (isset($this->mpdf->OTLtags['FFPlus']))
1243 $ffp = $this->mpdf->OTLtags['FFPlus']; // Font Features - may include integer: salt4
1244 preg_match_all('/([a-zA-Z0-9]{4})([\d+]*)/', $ffp, $m);
1245 for ($i = 0; $i < count($m[0]); $i++) {
1246 $t = $m[1][$i];
1247 // Is it a valid tag?
1248 if (isset($Features[$t]) && strpos($omittags, $t) === false && (!$onlytags || strpos($tags, $t) !== false )) {
1249 $usetags .= ' ' . $m[0][$i]; // - may include integer: salt4
1253 // Font features to disable - set by font-feature-settings
1254 if (isset($this->mpdf->OTLtags['FFMinus']))
1255 $ffm = $this->mpdf->OTLtags['FFMinus'];
1256 preg_match_all('/([a-zA-Z0-9]{4})/', $ffm, $m);
1257 for ($i = 0; $i < count($m[0]); $i++) {
1258 $t = $m[1][$i];
1259 // Is it a valid tag?
1260 if (isset($Features[$t]) && strpos($omittags, $t) === false && (!$onlytags || strpos($tags, $t) !== false )) {
1261 $usetags = str_replace($t, '', $usetags);
1264 return $usetags;
1267 function _applyGSUBrules($usetags, $scriptTag, $langsys)
1269 // Features from all Tags are applied together, in Lookup List order.
1270 // For Indic - should be applied one syllable at a time
1271 // - Implemented in functions checkContextMatch and checkContextMatchMultiple by failing to match if outside scope of current 'syllable'
1272 // if $this->restrictToSyllable is true
1274 $GSUBFeatures = $this->mpdf->CurrentFont['GSUBFeatures'][$scriptTag][$langsys];
1275 $LookupList = array();
1276 foreach ($GSUBFeatures AS $tag => $arr) {
1277 if (strpos($usetags, $tag) !== false) {
1278 foreach ($arr AS $lu) {
1279 $LookupList[$lu] = $tag;
1283 ksort($LookupList);
1285 foreach ($LookupList AS $lu => $tag) {
1286 $Type = $this->GSUBLookups[$lu]['Type'];
1287 $Flag = $this->GSUBLookups[$lu]['Flag'];
1288 $MarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1289 $tagInt = 1;
1290 if (preg_match('/' . $tag . '([0-9]{1,2})/', $usetags, $m)) {
1291 $tagInt = $m[1];
1293 $ptr = 0;
1294 // Test each glyph sequentially
1295 while ($ptr < (count($this->OTLdata))) { // whilst there is another glyph ..0064
1296 $currGlyph = $this->OTLdata[$ptr]['hex'];
1297 $currGID = $this->OTLdata[$ptr]['uni'];
1298 $shift = 1;
1299 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $c => $subtable_offset) {
1300 // NB Coverage only looks at glyphs for position 1 (esp. 7.3 and 8.3)
1301 if (isset($this->GSLuCoverage[$lu][$c][$currGID])) {
1302 // Get rules from font GSUB subtable
1303 $shift = $this->_applyGSUBsubtable($lu, $c, $ptr, $currGlyph, $currGID, ($subtable_offset - $this->GSUB_offset), $Type, $Flag, $MarkFilteringSet, $this->GSLuCoverage[$lu][$c], 0, $tag, 0, $tagInt);
1305 if ($shift) {
1306 break;
1310 if ($shift == 0) {
1311 $shift = 1;
1313 $ptr += $shift;
1318 function _applyGSUBrulesSingly($usetags, $scriptTag, $langsys)
1320 // Features are applied one at a time, working through each codepoint
1322 $GSUBFeatures = $this->mpdf->CurrentFont['GSUBFeatures'][$scriptTag][$langsys];
1324 $tags = explode(' ', $usetags);
1325 foreach ($tags AS $usetag) {
1326 $LookupList = array();
1327 foreach ($GSUBFeatures AS $tag => $arr) {
1328 if (strpos($usetags, $tag) !== false) {
1329 foreach ($arr AS $lu) {
1330 $LookupList[$lu] = $tag;
1334 ksort($LookupList);
1336 $ptr = 0;
1337 // Test each glyph sequentially
1338 while ($ptr < (count($this->OTLdata))) { // whilst there is another glyph ..0064
1339 $currGlyph = $this->OTLdata[$ptr]['hex'];
1340 $currGID = $this->OTLdata[$ptr]['uni'];
1341 $shift = 1;
1343 foreach ($LookupList AS $lu => $tag) {
1344 $Type = $this->GSUBLookups[$lu]['Type'];
1345 $Flag = $this->GSUBLookups[$lu]['Flag'];
1346 $MarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1347 $tagInt = 1;
1348 if (preg_match('/' . $tag . '([0-9]{1,2})/', $usetags, $m)) {
1349 $tagInt = $m[1];
1352 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $c => $subtable_offset) {
1353 // NB Coverage only looks at glyphs for position 1 (esp. 7.3 and 8.3)
1354 if (isset($this->GSLuCoverage[$lu][$c][$currGID])) {
1355 // Get rules from font GSUB subtable
1356 $shift = $this->_applyGSUBsubtable($lu, $c, $ptr, $currGlyph, $currGID, ($subtable_offset - $this->GSUB_offset), $Type, $Flag, $MarkFilteringSet, $this->GSLuCoverage[$lu][$c], 0, $tag, 0, $tagInt);
1358 if ($shift) {
1359 break 2;
1364 if ($shift == 0) {
1365 $shift = 1;
1367 $ptr += $shift;
1372 function _applyGSUBrulesMyanmar($usetags, $scriptTag, $langsys)
1374 // $usetags = locl ccmp rphf pref blwf pstf';
1375 // applied to all characters
1377 $GSUBFeatures = $this->mpdf->CurrentFont['GSUBFeatures'][$scriptTag][$langsys];
1379 // ALL should be applied one syllable at a time
1380 // Implemented in functions checkContextMatch and checkContextMatchMultiple by failing to match if outside scope of current 'syllable'
1381 $tags = explode(' ', $usetags);
1382 foreach ($tags AS $usetag) {
1384 $LookupList = array();
1385 foreach ($GSUBFeatures AS $tag => $arr) {
1386 if ($tag == $usetag) {
1387 foreach ($arr AS $lu) {
1388 $LookupList[$lu] = $tag;
1392 ksort($LookupList);
1394 foreach ($LookupList AS $lu => $tag) {
1396 $Type = $this->GSUBLookups[$lu]['Type'];
1397 $Flag = $this->GSUBLookups[$lu]['Flag'];
1398 $MarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1399 $tagInt = 1;
1400 if (preg_match('/' . $tag . '([0-9]{1,2})/', $usetags, $m)) {
1401 $tagInt = $m[1];
1404 $ptr = 0;
1405 // Test each glyph sequentially
1406 while ($ptr < (count($this->OTLdata))) { // whilst there is another glyph ..0064
1407 $currGlyph = $this->OTLdata[$ptr]['hex'];
1408 $currGID = $this->OTLdata[$ptr]['uni'];
1409 $shift = 1;
1410 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $c => $subtable_offset) {
1411 // NB Coverage only looks at glyphs for position 1 (esp. 7.3 and 8.3)
1412 if (isset($this->GSLuCoverage[$lu][$c][$currGID])) {
1413 // Get rules from font GSUB subtable
1414 $shift = $this->_applyGSUBsubtable($lu, $c, $ptr, $currGlyph, $currGID, ($subtable_offset - $this->GSUB_offset), $Type, $Flag, $MarkFilteringSet, $this->GSLuCoverage[$lu][$c], 0, $usetag, 0, $tagInt);
1416 if ($shift) {
1417 break;
1421 if ($shift == 0) {
1422 $shift = 1;
1424 $ptr += $shift;
1430 function _applyGSUBrulesIndic($usetags, $scriptTag, $langsys, $is_old_spec)
1432 // $usetags = 'locl ccmp nukt akhn rphf rkrf pref blwf half pstf vatu cjct'; then later - init
1433 // rphf, pref, blwf, half, abvf, pstf, and init are only applied where ['mask'] indicates: INDIC::FLAG(INDIC::RPHF);
1434 // The rest are applied to all characters
1436 $GSUBFeatures = $this->mpdf->CurrentFont['GSUBFeatures'][$scriptTag][$langsys];
1438 // ALL should be applied one syllable at a time
1439 // Implemented in functions checkContextMatch and checkContextMatchMultiple by failing to match if outside scope of current 'syllable'
1440 $tags = explode(' ', $usetags);
1441 foreach ($tags AS $usetag) {
1443 $LookupList = array();
1444 foreach ($GSUBFeatures AS $tag => $arr) {
1445 if ($tag == $usetag) {
1446 foreach ($arr AS $lu) {
1447 $LookupList[$lu] = $tag;
1451 ksort($LookupList);
1453 foreach ($LookupList AS $lu => $tag) {
1455 $Type = $this->GSUBLookups[$lu]['Type'];
1456 $Flag = $this->GSUBLookups[$lu]['Flag'];
1457 $MarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1458 $tagInt = 1;
1459 if (preg_match('/' . $tag . '([0-9]{1,2})/', $usetags, $m)) {
1460 $tagInt = $m[1];
1463 $ptr = 0;
1464 // Test each glyph sequentially
1465 while ($ptr < (count($this->OTLdata))) { // whilst there is another glyph ..0064
1466 $currGlyph = $this->OTLdata[$ptr]['hex'];
1467 $currGID = $this->OTLdata[$ptr]['uni'];
1468 $shift = 1;
1469 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $c => $subtable_offset) {
1470 // NB Coverage only looks at glyphs for position 1 (esp. 7.3 and 8.3)
1471 if (isset($this->GSLuCoverage[$lu][$c][$currGID])) {
1472 if (strpos('rphf pref blwf half pstf cfar init', $usetag) !== false) { // only apply when mask indicates
1473 $mask = 0;
1474 switch ($usetag) {
1475 case 'rphf': $mask = (1 << (INDIC::RPHF));
1476 break;
1477 case 'pref': $mask = (1 << (INDIC::PREF));
1478 break;
1479 case 'blwf': $mask = (1 << (INDIC::BLWF));
1480 break;
1481 case 'half': $mask = (1 << (INDIC::HALF));
1482 break;
1483 case 'pstf': $mask = (1 << (INDIC::PSTF));
1484 break;
1485 case 'cfar': $mask = (1 << (INDIC::CFAR));
1486 break;
1487 case 'init': $mask = (1 << (INDIC::INIT));
1488 break;
1490 if (!($this->OTLdata[$ptr]['mask'] & $mask)) {
1491 continue;
1494 // Get rules from font GSUB subtable
1495 $shift = $this->_applyGSUBsubtable($lu, $c, $ptr, $currGlyph, $currGID, ($subtable_offset - $this->GSUB_offset), $Type, $Flag, $MarkFilteringSet, $this->GSLuCoverage[$lu][$c], 0, $usetag, $is_old_spec, $tagInt);
1497 if ($shift) {
1498 break;
1502 // Special case for Indic ZZZ99S
1503 // Check to substitute Halant-Consonant in PREF, BLWF or PSTF
1504 // i.e. new spec but GSUB tables have Consonant-Halant in Lookups e.g. FreeSerif, which
1505 // incorrectly just moved old spec tables to new spec. Uniscribe seems to cope with this
1506 // See also ttffontsuni.php
1507 // First check if current glyph is a Halant/Virama
1508 else if (_OTL_OLD_SPEC_COMPAT_1 && $Type == 4 && !$is_old_spec && strpos('0094D 009CD 00A4D 00ACD 00B4D 00BCD 00C4D 00CCD 00D4D', $currGlyph) !== false) {
1509 // only apply when 'pref blwf pstf' tags, and when mask indicates
1510 if (strpos('pref blwf pstf', $usetag) !== false) {
1511 $mask = 0;
1512 switch ($usetag) {
1513 case 'pref': $mask = (1 << (INDIC::PREF));
1514 break;
1515 case 'blwf': $mask = (1 << (INDIC::BLWF));
1516 break;
1517 case 'pstf': $mask = (1 << (INDIC::PSTF));
1518 break;
1520 if (!($this->OTLdata[$ptr]['mask'] & $mask)) {
1521 continue;
1524 $nextGlyph = $this->OTLdata[$ptr + 1]['hex'];
1525 $nextGID = $this->OTLdata[$ptr + 1]['uni'];
1526 if (isset($this->GSLuCoverage[$lu][$c][$nextGID])) {
1528 // Get rules from font GSUB subtable
1529 $shift = $this->_applyGSUBsubtableSpecial($lu, $c, $ptr, $currGlyph, $currGID, $nextGlyph, $nextGID, ($subtable_offset - $this->GSUB_offset), $Type, $this->GSLuCoverage[$lu][$c]);
1531 if ($shift) {
1532 break;
1538 if ($shift == 0) {
1539 $shift = 1;
1541 $ptr += $shift;
1547 function _applyGSUBsubtableSpecial($lookupID, $subtable, $ptr, $currGlyph, $currGID, $nextGlyph, $nextGID, $subtable_offset, $Type, $LuCoverage)
1550 // Special case for Indic
1551 // Check to substitute Halant-Consonant in PREF, BLWF or PSTF
1552 // i.e. new spec but GSUB tables have Consonant-Halant in Lookups e.g. FreeSerif, which
1553 // incorrectly just moved old spec tables to new spec. Uniscribe seems to cope with this
1554 // See also ttffontsuni.php
1556 $this->seek($subtable_offset);
1557 $SubstFormat = $this->read_ushort();
1559 // Subtable contains Consonant - Halant
1560 // Text string contains Halant ($CurrGlyph) - Consonant ($nextGlyph)
1561 // Halant has already been matched, and already checked that $nextGID is in Coverage table
1562 ////////////////////////////////////////////////////////////////////////////////
1563 // Only does: LookupType 4: Ligature Substitution Subtable : n to 1
1564 ////////////////////////////////////////////////////////////////////////////////
1565 $Coverage = $subtable_offset + $this->read_ushort();
1566 $NextGlyphPos = $LuCoverage[$nextGID];
1567 $LigSetCount = $this->read_short();
1569 $this->skip($NextGlyphPos * 2);
1570 $LigSet = $subtable_offset + $this->read_short();
1572 $this->seek($LigSet);
1573 $LigCount = $this->read_short();
1574 // LigatureSet i.e. all starting with the same Glyph $nextGlyph [Consonant]
1575 $LigatureOffset = array();
1576 for ($g = 0; $g < $LigCount; $g++) {
1577 $LigatureOffset[$g] = $LigSet + $this->read_ushort();
1579 for ($g = 0; $g < $LigCount; $g++) {
1580 // Ligature tables
1581 $this->seek($LigatureOffset[$g]);
1582 $LigGlyph = $this->read_ushort();
1583 $substitute = $this->glyphToChar($LigGlyph);
1584 $CompCount = $this->read_ushort();
1586 if ($CompCount != 2) {
1587 return 0;
1588 } // Only expecting to work with 2:1 (and no ignore characters in between)
1591 $gid = $this->read_ushort();
1592 $checkGlyph = $this->glyphToChar($gid); // Other component/input Glyphs starting at position 2 (arrayindex 1)
1594 if ($currGID == $checkGlyph) {
1595 $match = true;
1596 } else {
1597 $match = false;
1598 break;
1601 $GlyphPos = array();
1602 $GlyphPos[] = $ptr;
1603 $GlyphPos[] = $ptr + 1;
1606 if ($match) {
1607 $shift = $this->GSUBsubstitute($ptr, $substitute, 4, $GlyphPos); // GlyphPos contains positions to set null
1608 if ($shift)
1609 return 1;
1612 return 0;
1615 function _applyGSUBsubtable($lookupID, $subtable, $ptr, $currGlyph, $currGID, $subtable_offset, $Type, $Flag, $MarkFilteringSet, $LuCoverage, $level = 0, $currentTag, $is_old_spec, $tagInt)
1617 $ignore = $this->_getGCOMignoreString($Flag, $MarkFilteringSet);
1619 // Lets start
1620 $this->seek($subtable_offset);
1621 $SubstFormat = $this->read_ushort();
1623 ////////////////////////////////////////////////////////////////////////////////
1624 // LookupType 1: Single Substitution Subtable : 1 to 1
1625 ////////////////////////////////////////////////////////////////////////////////
1626 if ($Type == 1) {
1627 // Flag = Ignore
1628 if ($this->_checkGCOMignore($Flag, $currGlyph, $MarkFilteringSet)) {
1629 return 0;
1631 $CoverageOffset = $subtable_offset + $this->read_ushort();
1632 $GlyphPos = $LuCoverage[$currGID];
1633 //===========
1634 // Format 1:
1635 //===========
1636 if ($SubstFormat == 1) { // Calculated output glyph indices
1637 $DeltaGlyphID = $this->read_short();
1638 $this->seek($CoverageOffset);
1639 $glyphs = $this->_getCoverageGID();
1640 $GlyphID = $glyphs[$GlyphPos] + $DeltaGlyphID;
1642 //===========
1643 // Format 2:
1644 //===========
1645 else if ($SubstFormat == 2) { // Specified output glyph indices
1646 $GlyphCount = $this->read_ushort();
1647 $this->skip($GlyphPos * 2);
1648 $GlyphID = $this->read_ushort();
1651 $substitute = $this->glyphToChar($GlyphID);
1652 $shift = $this->GSUBsubstitute($ptr, $substitute, $Type);
1653 if ($this->debugOTL && $shift) {
1654 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1656 if ($shift)
1657 return 1;
1658 return 0;
1661 ////////////////////////////////////////////////////////////////////////////////
1662 // LookupType 2: Multiple Substitution Subtable : 1 to n
1663 ////////////////////////////////////////////////////////////////////////////////
1664 else if ($Type == 2) {
1665 // Flag = Ignore
1666 if ($this->_checkGCOMignore($Flag, $currGlyph, $MarkFilteringSet)) {
1667 return 0;
1669 $Coverage = $subtable_offset + $this->read_ushort();
1670 $GlyphPos = $LuCoverage[$currGID];
1671 $this->skip(2);
1672 $this->skip($GlyphPos * 2);
1673 $Sequences = $subtable_offset + $this->read_short();
1675 $this->seek($Sequences);
1676 $GlyphCount = $this->read_short();
1677 $SubstituteGlyphs = array();
1678 for ($g = 0; $g < $GlyphCount; $g++) {
1679 $sgid = $this->read_ushort();
1680 $SubstituteGlyphs[] = $this->glyphToChar($sgid);
1683 $shift = $this->GSUBsubstitute($ptr, $SubstituteGlyphs, $Type);
1684 if ($this->debugOTL && $shift) {
1685 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1687 if ($shift)
1688 return $shift;
1689 return 0;
1691 ////////////////////////////////////////////////////////////////////////////////
1692 // LookupType 3: Alternate Forms : 1 to 1(n)
1693 ////////////////////////////////////////////////////////////////////////////////
1694 else if ($Type == 3) {
1695 // Flag = Ignore
1696 if ($this->_checkGCOMignore($Flag, $currGlyph, $MarkFilteringSet)) {
1697 return 0;
1699 $Coverage = $subtable_offset + $this->read_ushort();
1700 $AlternateSetCount = $this->read_short();
1701 ///////////////////////////////////////////////////////////////////////////////!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1702 // Need to set alternate IF set by CSS3 font-feature for a tag
1703 // i.e. if this is 'salt' alternate may be set to 2
1704 // default value will be $alt=1 ( === index of 0 in list of alternates)
1705 $alt = 1; // $alt=1 points to Alternative[0]
1706 if ($tagInt > 1) {
1707 $alt = $tagInt;
1709 ///////////////////////////////////////////////////////////////////////////////!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1710 if ($alt == 0) {
1711 return 0;
1712 } // If specified alternate not present, cancel [ or could default $alt = 1 ?]
1714 $GlyphPos = $LuCoverage[$currGID];
1715 $this->skip($GlyphPos * 2);
1717 $AlternateSets = $subtable_offset + $this->read_short();
1718 $this->seek($AlternateSets);
1720 $AlternateGlyphCount = $this->read_short();
1721 if ($alt > $AlternateGlyphCount) {
1722 return 0;
1723 } // If specified alternate not present, cancel [ or could default $alt = 1 ?]
1725 $this->skip(($alt - 1) * 2);
1726 $GlyphID = $this->read_ushort();
1728 $substitute = $this->glyphToChar($GlyphID);
1729 $shift = $this->GSUBsubstitute($ptr, $substitute, $Type);
1730 if ($this->debugOTL && $shift) {
1731 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1733 if ($shift)
1734 return 1;
1735 return 0;
1737 ////////////////////////////////////////////////////////////////////////////////
1738 // LookupType 4: Ligature Substitution Subtable : n to 1
1739 ////////////////////////////////////////////////////////////////////////////////
1740 else if ($Type == 4) {
1741 // Flag = Ignore
1742 if ($this->_checkGCOMignore($Flag, $currGlyph, $MarkFilteringSet)) {
1743 return 0;
1745 $Coverage = $subtable_offset + $this->read_ushort();
1746 $FirstGlyphPos = $LuCoverage[$currGID];
1748 $LigSetCount = $this->read_short();
1750 $this->skip($FirstGlyphPos * 2);
1751 $LigSet = $subtable_offset + $this->read_short();
1753 $this->seek($LigSet);
1754 $LigCount = $this->read_short();
1755 // LigatureSet i.e. all starting with the same first Glyph $currGlyph
1756 $LigatureOffset = array();
1757 for ($g = 0; $g < $LigCount; $g++) {
1758 $LigatureOffset[$g] = $LigSet + $this->read_ushort();
1760 for ($g = 0; $g < $LigCount; $g++) {
1761 // Ligature tables
1762 $this->seek($LigatureOffset[$g]);
1763 $LigGlyph = $this->read_ushort(); // Output Ligature GlyphID
1764 $substitute = $this->glyphToChar($LigGlyph);
1765 $CompCount = $this->read_ushort();
1767 $spos = $ptr;
1768 $match = true;
1769 $GlyphPos = array();
1770 $GlyphPos[] = $spos;
1771 for ($l = 1; $l < $CompCount; $l++) {
1772 $gid = $this->read_ushort();
1773 $checkGlyph = $this->glyphToChar($gid); // Other component/input Glyphs starting at position 2 (arrayindex 1)
1775 $spos++;
1776 //while $this->OTLdata[$spos]['uni'] is an "ignore" => spos++
1777 while (isset($this->OTLdata[$spos]) && strpos($ignore, $this->OTLdata[$spos]['hex']) !== false) {
1778 $spos++;
1781 if (isset($this->OTLdata[$spos]) && $this->OTLdata[$spos]['uni'] == $checkGlyph) {
1782 $GlyphPos[] = $spos;
1783 } else {
1784 $match = false;
1785 break;
1790 if ($match) {
1791 $shift = $this->GSUBsubstitute($ptr, $substitute, $Type, $GlyphPos); // GlyphPos contains positions to set null
1792 if ($this->debugOTL && $shift) {
1793 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1795 if ($shift)
1796 return ($spos - $ptr + 1 - ($CompCount - 1));
1799 return 0;
1802 ////////////////////////////////////////////////////////////////////////////////
1803 // LookupType 5: Contextual Substitution Subtable
1804 ////////////////////////////////////////////////////////////////////////////////
1805 else if ($Type == 5) {
1806 //===========
1807 // Format 1: Simple Context Glyph Substitution
1808 //===========
1809 if ($SubstFormat == 1) {
1810 $CoverageTableOffset = $subtable_offset + $this->read_ushort();
1811 $SubRuleSetCount = $this->read_ushort();
1812 $SubRuleSetOffset = array();
1813 for ($b = 0; $b < $SubRuleSetCount; $b++) {
1814 $offset = $this->read_ushort();
1815 if ($offset == 0x0000) {
1816 $SubRuleSetOffset[] = $offset;
1817 } else {
1818 $SubRuleSetOffset[] = $subtable_offset + $offset;
1822 // SubRuleSet tables: All contexts beginning with the same glyph
1823 // Select the SubRuleSet required using the position of the glyph in the coverage table
1824 $GlyphPos = $LuCoverage[$currGID];
1825 if ($SubRuleSetOffset[$GlyphPos] > 0) {
1826 $this->seek($SubRuleSetOffset[$GlyphPos]);
1827 $SubRuleCnt = $this->read_ushort();
1828 $SubRule = array();
1829 for ($b = 0; $b < $SubRuleCnt; $b++) {
1830 $SubRule[$b] = $SubRuleSetOffset[$GlyphPos] + $this->read_ushort();
1832 for ($b = 0; $b < $SubRuleCnt; $b++) { // EACH RULE
1833 $this->seek($SubRule[$b]);
1834 $InputGlyphCount = $this->read_ushort();
1835 $SubstCount = $this->read_ushort();
1837 $Backtrack = array();
1838 $Lookahead = array();
1839 $Input = array();
1840 $Input[0] = $this->OTLdata[$ptr]['uni'];
1841 for ($r = 1; $r < $InputGlyphCount; $r++) {
1842 $gid = $this->read_ushort();
1843 $Input[$r] = $this->glyphToChar($gid);
1845 $matched = $this->checkContextMatch($Input, $Backtrack, $Lookahead, $ignore, $ptr);
1846 if ($matched) {
1847 if ($this->debugOTL) {
1848 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1850 for ($p = 0; $p < $SubstCount; $p++) { // EACH LOOKUP
1851 $SequenceIndex[$p] = $this->read_ushort();
1852 $LookupListIndex[$p] = $this->read_ushort();
1855 for ($p = 0; $p < $SubstCount; $p++) {
1856 // Apply $LookupListIndex at $SequenceIndex
1857 if ($SequenceIndex[$p] >= $InputGlyphCount) {
1858 continue;
1860 $lu = $LookupListIndex[$p];
1861 $luType = $this->GSUBLookups[$lu]['Type'];
1862 $luFlag = $this->GSUBLookups[$lu]['Flag'];
1863 $luMarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1865 $luptr = $matched[$SequenceIndex[$p]];
1866 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
1867 $lucurrGID = $this->OTLdata[$luptr]['uni'];
1869 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
1870 $shift = $this->_applyGSUBsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GSUB_offset), $luType, $luFlag, $luMarkFilteringSet, $this->GSLuCoverage[$lu][$luc], 1, $currentTag, $is_old_spec, $tagInt);
1871 if ($shift) {
1872 break;
1877 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
1878 return $shift;
1879 } /* OTL_FIX_3 */
1880 else
1881 return $InputGlyphCount; // should be + matched ignores in Input Sequence
1885 return 0;
1888 //===========
1889 // Format 2:
1890 //===========
1891 // Format 2: Class-based Context Glyph Substitution
1892 else if ($SubstFormat == 2) {
1894 $CoverageTableOffset = $subtable_offset + $this->read_ushort();
1895 $InputClassDefOffset = $subtable_offset + $this->read_ushort();
1896 $SubClassSetCnt = $this->read_ushort();
1897 $SubClassSetOffset = array();
1898 for ($b = 0; $b < $SubClassSetCnt; $b++) {
1899 $offset = $this->read_ushort();
1900 if ($offset == 0x0000) {
1901 $SubClassSetOffset[] = $offset;
1902 } else {
1903 $SubClassSetOffset[] = $subtable_offset + $offset;
1907 $InputClasses = $this->_getClasses($InputClassDefOffset);
1909 for ($s = 0; $s < $SubClassSetCnt; $s++) { // $SubClassSet is ordered by input class-may be NULL
1910 // Select $SubClassSet if currGlyph is in First Input Class
1911 if ($SubClassSetOffset[$s] > 0 && isset($InputClasses[$s][$currGID])) {
1912 $this->seek($SubClassSetOffset[$s]);
1913 $SubClassRuleCnt = $this->read_ushort();
1914 $SubClassRule = array();
1915 for ($b = 0; $b < $SubClassRuleCnt; $b++) {
1916 $SubClassRule[$b] = $SubClassSetOffset[$s] + $this->read_ushort();
1919 for ($b = 0; $b < $SubClassRuleCnt; $b++) { // EACH RULE
1920 $this->seek($SubClassRule[$b]);
1921 $InputGlyphCount = $this->read_ushort();
1922 $SubstCount = $this->read_ushort();
1923 $Input = array();
1924 for ($r = 1; $r < $InputGlyphCount; $r++) {
1925 $Input[$r] = $this->read_ushort();
1928 $inputClass = $s;
1930 $inputGlyphs = array();
1931 $inputGlyphs[0] = $InputClasses[$inputClass];
1933 if ($InputGlyphCount > 1) {
1934 // NB starts at 1
1935 for ($gcl = 1; $gcl < $InputGlyphCount; $gcl++) {
1936 $classindex = $Input[$gcl];
1937 if (isset($InputClasses[$classindex])) {
1938 $inputGlyphs[$gcl] = $InputClasses[$classindex];
1939 } else {
1940 $inputGlyphs[$gcl] = '';
1945 // Class 0 contains all the glyphs NOT in the other classes
1946 $class0excl = array();
1947 for ($gc = 1; $gc <= count($InputClasses); $gc++) {
1948 if (is_array($InputClasses[$gc]))
1949 $class0excl = $class0excl + $InputClasses[$gc];
1952 $backtrackGlyphs = array();
1953 $lookaheadGlyphs = array();
1955 $matched = $this->checkContextMatchMultipleUni($inputGlyphs, $backtrackGlyphs, $lookaheadGlyphs, $ignore, $ptr, $class0excl);
1956 if ($matched) {
1957 if ($this->debugOTL) {
1958 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
1960 for ($p = 0; $p < $SubstCount; $p++) { // EACH LOOKUP
1961 $SequenceIndex[$p] = $this->read_ushort();
1962 $LookupListIndex[$p] = $this->read_ushort();
1965 for ($p = 0; $p < $SubstCount; $p++) {
1966 // Apply $LookupListIndex at $SequenceIndex
1967 if ($SequenceIndex[$p] >= $InputGlyphCount) {
1968 continue;
1970 $lu = $LookupListIndex[$p];
1971 $luType = $this->GSUBLookups[$lu]['Type'];
1972 $luFlag = $this->GSUBLookups[$lu]['Flag'];
1973 $luMarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
1975 $luptr = $matched[$SequenceIndex[$p]];
1976 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
1977 $lucurrGID = $this->OTLdata[$luptr]['uni'];
1979 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
1980 $shift = $this->_applyGSUBsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GSUB_offset), $luType, $luFlag, $luMarkFilteringSet, $this->GSLuCoverage[$lu][$luc], 1, $currentTag, $is_old_spec, $tagInt);
1981 if ($shift) {
1982 break;
1987 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
1988 return $shift;
1989 } /* OTL_FIX_3 */
1990 else
1991 return $InputGlyphCount; // should be + matched ignores in Input Sequence
1997 return 0;
2000 //===========
2001 // Format 3:
2002 //===========
2003 // Format 3: Coverage-based Context Glyph Substitution
2004 else if ($SubstFormat == 3) {
2005 throw new MpdfException("GSUB Lookup Type " . $Type . " Format " . $SubstFormat . " not TESTED YET.");
2009 ////////////////////////////////////////////////////////////////////////////////
2010 // LookupType 6: Chaining Contextual Substitution Subtable
2011 ////////////////////////////////////////////////////////////////////////////////
2012 else if ($Type == 6) {
2014 //===========
2015 // Format 1:
2016 //===========
2017 // Format 1: Simple Chaining Context Glyph Substitution
2018 if ($SubstFormat == 1) {
2019 $Coverage = $subtable_offset + $this->read_ushort();
2020 $GlyphPos = $LuCoverage[$currGID];
2021 $ChainSubRuleSetCount = $this->read_ushort();
2022 // All of the ChainSubRule tables defining contexts that begin with the same first glyph are grouped together and defined in a ChainSubRuleSet table
2023 $this->skip($GlyphPos * 2);
2024 $ChainSubRuleSet = $subtable_offset + $this->read_ushort();
2025 $this->seek($ChainSubRuleSet);
2026 $ChainSubRuleCount = $this->read_ushort();
2028 for ($s = 0; $s < $ChainSubRuleCount; $s++) {
2029 $ChainSubRule[$s] = $ChainSubRuleSet + $this->read_ushort();
2032 for ($s = 0; $s < $ChainSubRuleCount; $s++) {
2033 $this->seek($ChainSubRule[$s]);
2035 $BacktrackGlyphCount = $this->read_ushort();
2036 $Backtrack = array();
2037 for ($b = 0; $b < $BacktrackGlyphCount; $b++) {
2038 $gid = $this->read_ushort();
2039 $Backtrack[] = $this->glyphToChar($gid);
2041 $Input = array();
2042 $Input[0] = $this->OTLdata[$ptr]['uni'];
2043 $InputGlyphCount = $this->read_ushort();
2044 for ($b = 1; $b < $InputGlyphCount; $b++) {
2045 $gid = $this->read_ushort();
2046 $Input[$b] = $this->glyphToChar($gid);
2048 $LookaheadGlyphCount = $this->read_ushort();
2049 $Lookahead = array();
2050 for ($b = 0; $b < $LookaheadGlyphCount; $b++) {
2051 $gid = $this->read_ushort();
2052 $Lookahead[] = $this->glyphToChar($gid);
2055 $matched = $this->checkContextMatch($Input, $Backtrack, $Lookahead, $ignore, $ptr);
2056 if ($matched) {
2057 if ($this->debugOTL) {
2058 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
2060 $SubstCount = $this->read_ushort();
2061 for ($p = 0; $p < $SubstCount; $p++) {
2062 // SubstLookupRecord
2063 $SubstLookupRecord[$p]['SequenceIndex'] = $this->read_ushort();
2064 $SubstLookupRecord[$p]['LookupListIndex'] = $this->read_ushort();
2066 for ($p = 0; $p < $SubstCount; $p++) {
2067 // Apply $SubstLookupRecord[$p]['LookupListIndex'] at $SubstLookupRecord[$p]['SequenceIndex']
2068 if ($SubstLookupRecord[$p]['SequenceIndex'] >= $InputGlyphCount) {
2069 continue;
2071 $lu = $SubstLookupRecord[$p]['LookupListIndex'];
2072 $luType = $this->GSUBLookups[$lu]['Type'];
2073 $luFlag = $this->GSUBLookups[$lu]['Flag'];
2074 $luMarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
2076 $luptr = $matched[$SubstLookupRecord[$p]['SequenceIndex']];
2077 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
2078 $lucurrGID = $this->OTLdata[$luptr]['uni'];
2080 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
2081 $shift = $this->_applyGSUBsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GSUB_offset), $luType, $luFlag, $luMarkFilteringSet, $this->GSLuCoverage[$lu][$luc], 1, $currentTag, $is_old_spec, $tagInt);
2082 if ($shift) {
2083 break;
2087 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
2088 return $shift;
2089 } /* OTL_FIX_3 */
2090 else
2091 return $InputGlyphCount; // should be + matched ignores in Input Sequence
2094 return 0;
2097 //===========
2098 // Format 2:
2099 //===========
2100 // Format 2: Class-based Chaining Context Glyph Substitution p257
2101 else if ($SubstFormat == 2) {
2103 // NB Format 2 specifies fixed class assignments (identical for each position in the backtrack, input, or lookahead sequence) and exclusive classes (a glyph cannot be in more than one class at a time)
2105 $CoverageTableOffset = $subtable_offset + $this->read_ushort();
2106 $BacktrackClassDefOffset = $subtable_offset + $this->read_ushort();
2107 $InputClassDefOffset = $subtable_offset + $this->read_ushort();
2108 $LookaheadClassDefOffset = $subtable_offset + $this->read_ushort();
2109 $ChainSubClassSetCnt = $this->read_ushort();
2110 $ChainSubClassSetOffset = array();
2111 for ($b = 0; $b < $ChainSubClassSetCnt; $b++) {
2112 $offset = $this->read_ushort();
2113 if ($offset == 0x0000) {
2114 $ChainSubClassSetOffset[] = $offset;
2115 } else {
2116 $ChainSubClassSetOffset[] = $subtable_offset + $offset;
2120 $BacktrackClasses = $this->_getClasses($BacktrackClassDefOffset);
2121 $InputClasses = $this->_getClasses($InputClassDefOffset);
2122 $LookaheadClasses = $this->_getClasses($LookaheadClassDefOffset);
2124 for ($s = 0; $s < $ChainSubClassSetCnt; $s++) { // $ChainSubClassSet is ordered by input class-may be NULL
2125 // Select $ChainSubClassSet if currGlyph is in First Input Class
2126 if ($ChainSubClassSetOffset[$s] > 0 && isset($InputClasses[$s][$currGID])) {
2127 $this->seek($ChainSubClassSetOffset[$s]);
2128 $ChainSubClassRuleCnt = $this->read_ushort();
2129 $ChainSubClassRule = array();
2130 for ($b = 0; $b < $ChainSubClassRuleCnt; $b++) {
2131 $ChainSubClassRule[$b] = $ChainSubClassSetOffset[$s] + $this->read_ushort();
2134 for ($b = 0; $b < $ChainSubClassRuleCnt; $b++) { // EACH RULE
2135 $this->seek($ChainSubClassRule[$b]);
2136 $BacktrackGlyphCount = $this->read_ushort();
2137 for ($r = 0; $r < $BacktrackGlyphCount; $r++) {
2138 $Backtrack[$r] = $this->read_ushort();
2140 $InputGlyphCount = $this->read_ushort();
2141 for ($r = 1; $r < $InputGlyphCount; $r++) {
2142 $Input[$r] = $this->read_ushort();
2144 $LookaheadGlyphCount = $this->read_ushort();
2145 for ($r = 0; $r < $LookaheadGlyphCount; $r++) {
2146 $Lookahead[$r] = $this->read_ushort();
2150 // These contain classes of glyphs as arrays
2151 // $InputClasses[(class)] e.g. 0x02E6,0x02E7,0x02E8
2152 // $LookaheadClasses[(class)]
2153 // $BacktrackClasses[(class)]
2154 // These contain arrays of classIndexes
2155 // [Backtrack] [Lookahead] and [Input] (Input is from the second position only)
2158 $inputClass = $s; //???
2160 $inputGlyphs = array();
2161 $inputGlyphs[0] = $InputClasses[$inputClass];
2163 if ($InputGlyphCount > 1) {
2164 // NB starts at 1
2165 for ($gcl = 1; $gcl < $InputGlyphCount; $gcl++) {
2166 $classindex = $Input[$gcl];
2167 if (isset($InputClasses[$classindex])) {
2168 $inputGlyphs[$gcl] = $InputClasses[$classindex];
2169 } else {
2170 $inputGlyphs[$gcl] = '';
2175 // Class 0 contains all the glyphs NOT in the other classes
2176 $class0excl = array();
2177 for ($gc = 1; $gc <= count($InputClasses); $gc++) {
2178 if (isset($InputClasses[$gc]))
2179 $class0excl = $class0excl + $InputClasses[$gc];
2182 if ($BacktrackGlyphCount) {
2183 for ($gcl = 0; $gcl < $BacktrackGlyphCount; $gcl++) {
2184 $classindex = $Backtrack[$gcl];
2185 if (isset($BacktrackClasses[$classindex])) {
2186 $backtrackGlyphs[$gcl] = $BacktrackClasses[$classindex];
2187 } else {
2188 $backtrackGlyphs[$gcl] = '';
2191 } else {
2192 $backtrackGlyphs = array();
2195 // Class 0 contains all the glyphs NOT in the other classes
2196 $bclass0excl = array();
2197 for ($gc = 1; $gc <= count($BacktrackClasses); $gc++) {
2198 if (isset($BacktrackClasses[$gc]))
2199 $bclass0excl = $bclass0excl + $BacktrackClasses[$gc];
2203 if ($LookaheadGlyphCount) {
2204 for ($gcl = 0; $gcl < $LookaheadGlyphCount; $gcl++) {
2205 $classindex = $Lookahead[$gcl];
2206 if (isset($LookaheadClasses[$classindex])) {
2207 $lookaheadGlyphs[$gcl] = $LookaheadClasses[$classindex];
2208 } else {
2209 $lookaheadGlyphs[$gcl] = '';
2212 } else {
2213 $lookaheadGlyphs = array();
2216 // Class 0 contains all the glyphs NOT in the other classes
2217 $lclass0excl = array();
2218 for ($gc = 1; $gc <= count($LookaheadClasses); $gc++) {
2219 if (isset($LookaheadClasses[$gc]))
2220 $lclass0excl = $lclass0excl + $LookaheadClasses[$gc];
2224 $matched = $this->checkContextMatchMultipleUni($inputGlyphs, $backtrackGlyphs, $lookaheadGlyphs, $ignore, $ptr, $class0excl, $bclass0excl, $lclass0excl);
2225 if ($matched) {
2226 if ($this->debugOTL) {
2227 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
2229 $SubstCount = $this->read_ushort();
2230 for ($p = 0; $p < $SubstCount; $p++) { // EACH LOOKUP
2231 $SequenceIndex[$p] = $this->read_ushort();
2232 $LookupListIndex[$p] = $this->read_ushort();
2235 for ($p = 0; $p < $SubstCount; $p++) {
2236 // Apply $LookupListIndex at $SequenceIndex
2237 if ($SequenceIndex[$p] >= $InputGlyphCount) {
2238 continue;
2240 $lu = $LookupListIndex[$p];
2241 $luType = $this->GSUBLookups[$lu]['Type'];
2242 $luFlag = $this->GSUBLookups[$lu]['Flag'];
2243 $luMarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
2245 $luptr = $matched[$SequenceIndex[$p]];
2246 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
2247 $lucurrGID = $this->OTLdata[$luptr]['uni'];
2249 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
2250 $shift = $this->_applyGSUBsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GSUB_offset), $luType, $luFlag, $luMarkFilteringSet, $this->GSLuCoverage[$lu][$luc], 1, $currentTag, $is_old_spec, $tagInt);
2251 if ($shift) {
2252 break;
2257 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
2258 return $shift;
2259 } /* OTL_FIX_3 */
2260 else
2261 return $InputGlyphCount; // should be + matched ignores in Input Sequence
2267 return 0;
2270 //===========
2271 // Format 3:
2272 //===========
2273 // Format 3: Coverage-based Chaining Context Glyph Substitution p259
2274 else if ($SubstFormat == 3) {
2276 $BacktrackGlyphCount = $this->read_ushort();
2277 for ($b = 0; $b < $BacktrackGlyphCount; $b++) {
2278 $CoverageBacktrackOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
2280 $InputGlyphCount = $this->read_ushort();
2281 for ($b = 0; $b < $InputGlyphCount; $b++) {
2282 $CoverageInputOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
2284 $LookaheadGlyphCount = $this->read_ushort();
2285 for ($b = 0; $b < $LookaheadGlyphCount; $b++) {
2286 $CoverageLookaheadOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
2288 $SubstCount = $this->read_ushort();
2289 $save_pos = $this->_pos; // Save the point just after PosCount
2291 $CoverageBacktrackGlyphs = array();
2292 for ($b = 0; $b < $BacktrackGlyphCount; $b++) {
2293 $this->seek($CoverageBacktrackOffset[$b]);
2294 $glyphs = $this->_getCoverage();
2295 $CoverageBacktrackGlyphs[$b] = implode("|", $glyphs);
2297 $CoverageInputGlyphs = array();
2298 for ($b = 0; $b < $InputGlyphCount; $b++) {
2299 $this->seek($CoverageInputOffset[$b]);
2300 $glyphs = $this->_getCoverage();
2301 $CoverageInputGlyphs[$b] = implode("|", $glyphs);
2303 $CoverageLookaheadGlyphs = array();
2304 for ($b = 0; $b < $LookaheadGlyphCount; $b++) {
2305 $this->seek($CoverageLookaheadOffset[$b]);
2306 $glyphs = $this->_getCoverage();
2307 $CoverageLookaheadGlyphs[$b] = implode("|", $glyphs);
2310 $matched = $this->checkContextMatchMultiple($CoverageInputGlyphs, $CoverageBacktrackGlyphs, $CoverageLookaheadGlyphs, $ignore, $ptr);
2311 if ($matched) {
2312 if ($this->debugOTL) {
2313 $this->_dumpproc('GSUB', $lookupID, $subtable, $Type, $SubstFormat, $ptr, $currGlyph, $level);
2316 $this->seek($save_pos); // Return to just after PosCount
2317 for ($p = 0; $p < $SubstCount; $p++) {
2318 // SubstLookupRecord
2319 $SubstLookupRecord[$p]['SequenceIndex'] = $this->read_ushort();
2320 $SubstLookupRecord[$p]['LookupListIndex'] = $this->read_ushort();
2322 for ($p = 0; $p < $SubstCount; $p++) {
2323 // Apply $SubstLookupRecord[$p]['LookupListIndex'] at $SubstLookupRecord[$p]['SequenceIndex']
2324 if ($SubstLookupRecord[$p]['SequenceIndex'] >= $InputGlyphCount) {
2325 continue;
2327 $lu = $SubstLookupRecord[$p]['LookupListIndex'];
2328 $luType = $this->GSUBLookups[$lu]['Type'];
2329 $luFlag = $this->GSUBLookups[$lu]['Flag'];
2330 $luMarkFilteringSet = $this->GSUBLookups[$lu]['MarkFilteringSet'];
2332 $luptr = $matched[$SubstLookupRecord[$p]['SequenceIndex']];
2333 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
2334 $lucurrGID = $this->OTLdata[$luptr]['uni'];
2336 foreach ($this->GSUBLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
2337 $shift = $this->_applyGSUBsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GSUB_offset), $luType, $luFlag, $luMarkFilteringSet, $this->GSLuCoverage[$lu][$luc], 1, $currentTag, $is_old_spec, $tagInt);
2338 if ($shift) {
2339 break;
2343 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
2344 return (isset($shift) ? $shift : 0);
2345 } /* OTL_FIX_3 */
2346 else
2347 return $InputGlyphCount; // should be + matched ignores in Input Sequence
2350 return 0;
2354 else {
2355 throw new MpdfException("GSUB Lookup Type " . $Type . " not supported.");
2359 function _updateLigatureMarks($pos, $n)
2361 if ($n > 0) {
2362 // Update position of Ligatures and associated Marks
2363 // Foreach lig/assocMarks
2364 // Any position lpos or mpos > $pos + count($substitute)
2365 // $this->assocMarks = array(); // assocMarks[$pos mpos] => array(compID, ligPos)
2366 // $this->assocLigs = array(); // Ligatures[$pos lpos] => nc
2367 for ($p = count($this->OTLdata) - 1; $p >= ($pos + $n); $p--) {
2368 if (isset($this->assocLigs[$p])) {
2369 $tmp = $this->assocLigs[$p];
2370 unset($this->assocLigs[$p]);
2371 $this->assocLigs[($p + $n)] = $tmp;
2374 for ($p = count($this->OTLdata) - 1; $p >= 0; $p--) {
2375 if (isset($this->assocMarks[$p])) {
2376 if ($this->assocMarks[$p]['ligPos'] >= ($pos + $n)) {
2377 $this->assocMarks[$p]['ligPos'] += $n;
2379 if ($p >= ($pos + $n)) {
2380 $tmp = $this->assocMarks[$p];
2381 unset($this->assocMarks[$p]);
2382 $this->assocMarks[($p + $n)] = $tmp;
2386 } else if ($n < 1) { // glyphs removed
2387 $nrem = -$n;
2388 // Update position of pre-existing Ligatures and associated Marks
2389 for ($p = ($pos + 1); $p < count($this->OTLdata); $p++) {
2390 if (isset($this->assocLigs[$p])) {
2391 $tmp = $this->assocLigs[$p];
2392 unset($this->assocLigs[$p]);
2393 $this->assocLigs[($p - $nrem)] = $tmp;
2396 for ($p = 0; $p < count($this->OTLdata); $p++) {
2397 if (isset($this->assocMarks[$p])) {
2398 if ($this->assocMarks[$p]['ligPos'] >= ($pos)) {
2399 $this->assocMarks[$p]['ligPos'] -= $nrem;
2401 if ($p > $pos) {
2402 $tmp = $this->assocMarks[$p];
2403 unset($this->assocMarks[$p]);
2404 $this->assocMarks[($p - $nrem)] = $tmp;
2411 function GSUBsubstitute($pos, $substitute, $Type, $GlyphPos = NULL)
2414 // LookupType 1: Simple Substitution Subtable : 1 to 1
2415 // LookupType 3: Alternate Forms : 1 to 1(n)
2416 if ($Type == 1 || $Type == 3) {
2417 $this->OTLdata[$pos]['uni'] = $substitute;
2418 $this->OTLdata[$pos]['hex'] = $this->unicode_hex($substitute);
2419 return 1;
2421 // LookupType 2: Multiple Substitution Subtable : 1 to n
2422 else if ($Type == 2) {
2423 for ($i = 0; $i < count($substitute); $i++) {
2424 $uni = $substitute[$i];
2425 $newOTLdata[$i] = array();
2426 $newOTLdata[$i]['uni'] = $uni;
2427 $newOTLdata[$i]['hex'] = $this->unicode_hex($uni);
2430 // Get types of new inserted chars - or replicate type of char being replaced
2431 // $bt = UCDN::get_bidi_class($uni);
2432 // if (!$bt) {
2433 $bt = $this->OTLdata[$pos]['bidi_type'];
2434 // }
2436 if (strpos($this->GlyphClassMarks, $newOTLdata[$i]['hex']) !== false) {
2437 $gp = 'M';
2438 } else if ($uni == 32) {
2439 $gp = 'S';
2440 } else {
2441 $gp = 'C';
2444 // Need to update matra_type ??? of new glyphs inserted ???????????????????????????????????????
2446 $newOTLdata[$i]['bidi_type'] = $bt;
2447 $newOTLdata[$i]['group'] = $gp;
2449 // Need to update details of new glyphs inserted
2450 $newOTLdata[$i]['general_category'] = $this->OTLdata[$pos]['general_category'];
2452 if ($this->shaper == 'I' || $this->shaper == 'K' || $this->shaper == 'S') {
2453 $newOTLdata[$i]['indic_category'] = $this->OTLdata[$pos]['indic_category'];
2454 $newOTLdata[$i]['indic_position'] = $this->OTLdata[$pos]['indic_position'];
2455 } else if ($this->shaper == 'M') {
2456 $newOTLdata[$i]['myanmar_category'] = $this->OTLdata[$pos]['myanmar_category'];
2457 $newOTLdata[$i]['myanmar_position'] = $this->OTLdata[$pos]['myanmar_position'];
2459 if (isset($this->OTLdata[$pos]['mask'])) {
2460 $newOTLdata[$i]['mask'] = $this->OTLdata[$pos]['mask'];
2462 if (isset($this->OTLdata[$pos]['syllable'])) {
2463 $newOTLdata[$i]['syllable'] = $this->OTLdata[$pos]['syllable'];
2466 if ($this->shaper == 'K' || $this->shaper == 'T' || $this->shaper == 'L') {
2467 if ($this->OTLdata[$pos]['wordend']) {
2468 $newOTLdata[count($substitute) - 1]['wordend'] = true;
2472 array_splice($this->OTLdata, $pos, 1, $newOTLdata); // Replace 1 with n
2473 // Update position of Ligatures and associated Marks
2474 // count($substitute)-1 is the number of glyphs added
2475 $nadd = count($substitute) - 1;
2476 $this->_updateLigatureMarks($pos, $nadd);
2477 return count($substitute);
2479 // LookupType 4: Ligature Substitution Subtable : n to 1
2480 else if ($Type == 4) {
2481 // Create Ligatures and associated Marks
2482 $firstGlyph = $this->OTLdata[$pos]['hex'];
2484 // If all components of the ligature are marks (and in the same syllable), we call this a mark ligature.
2485 $contains_marks = false;
2486 $contains_nonmarks = false;
2487 if (isset($this->OTLdata[$pos]['syllable'])) {
2488 $current_syllable = $this->OTLdata[$pos]['syllable'];
2489 } else {
2490 $current_syllable = 0;
2492 for ($i = 0; $i < count($GlyphPos); $i++) {
2493 // If subsequent components are not Marks as well - don't ligate
2494 $unistr = $this->OTLdata[$GlyphPos[$i]]['hex'];
2495 if ($this->restrictToSyllable && isset($this->OTLdata[$GlyphPos[$i]]['syllable']) && $this->OTLdata[$GlyphPos[$i]]['syllable'] != $current_syllable) {
2496 return 0;
2498 if (strpos($this->GlyphClassMarks, $unistr) !== false) {
2499 $contains_marks = true;
2500 } else {
2501 $contains_nonmarks = true;
2504 if ($contains_marks && !$contains_nonmarks) {
2505 // Mark Ligature (all components are Marks)
2506 $firstMarkAssoc = '';
2507 if (isset($this->assocMarks[$pos])) {
2508 $firstMarkAssoc = $this->assocMarks[$pos];
2510 // If all components of the ligature are marks, we call this a mark ligature.
2511 for ($i = 1; $i < count($GlyphPos); $i++) {
2513 // If subsequent components are not Marks as well - don't ligate
2514 // $unistr = $this->OTLdata[$GlyphPos[$i]]['hex'];
2515 // if (strpos($this->GlyphClassMarks, $unistr )===false) { return; }
2517 $nextMarkAssoc = '';
2518 if (isset($this->assocMarks[$GlyphPos[$i]])) {
2519 $nextMarkAssoc = $this->assocMarks[$GlyphPos[$i]];
2521 // If first component was attached to a previous ligature component,
2522 // all subsequent components should be attached to the same ligature
2523 // component, otherwise we shouldn't ligate them.
2524 // If first component was NOT attached to a previous ligature component,
2525 // all subsequent components should also NOT be attached to any ligature component,
2526 if ($firstMarkAssoc != $nextMarkAssoc) {
2527 // unless they are attached to the first component itself!
2528 // if (!is_array($nextMarkAssoc) || $nextMarkAssoc['ligPos']!= $pos) { return; }
2529 // Update/Edit - In test with myanmartext font
2530 // &#x1004;&#x103a;&#x1039;&#x1000;&#x1039;&#x1000;&#x103b;&#x103c;&#x103d;&#x1031;&#x102d;
2531 // => Lookup 17 E003 E066B E05A 102D
2532 // E003 and 102D should form a mark ligature, but 102D is already associated with (non-mark) ligature E05A
2533 // So instead of disallowing the mark ligature to form, just dissociate...
2534 if (!is_array($nextMarkAssoc) || $nextMarkAssoc['ligPos'] != $pos) {
2535 unset($this->assocMarks[$GlyphPos[$i]]);
2541 * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
2542 * the ligature to keep its old ligature id. This will allow it to attach to
2543 * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
2544 * and LAM,LAM,HEH form a ligature, they will leave SHADDA and FATHA wit a
2545 * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
2546 * later, we don't want them to lose their ligature id/component, otherwise
2547 * GPOS will fail to correctly position the mark ligature on top of the
2548 * LAM,LAM,HEH ligature.
2550 // So if is_array($firstMarkAssoc) - the new (Mark) ligature should keep this association
2552 $lastPos = $GlyphPos[(count($GlyphPos) - 1)];
2553 } else {
2555 * - Ligatures cannot be formed across glyphs attached to different components
2556 * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
2557 * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
2558 * However, it would be wrong to ligate that SHADDA,FATHA sequence.
2559 * There is an exception to this: If a ligature tries ligating with marks that
2560 * belong to it itself, go ahead, assuming that the font designer knows what
2561 * they are doing (otherwise it can break Indic stuff when a matra wants to
2562 * ligate with a conjunct...)
2566 * - If a ligature is formed of components that some of which are also ligatures
2567 * themselves, and those ligature components had marks attached to *their*
2568 * components, we have to attach the marks to the new ligature component
2569 * positions! Now *that*'s tricky! And these marks may be following the
2570 * last component of the whole sequence, so we should loop forward looking
2571 * for them and update them.
2573 * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
2574 * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
2575 * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
2576 * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
2577 * the new ligature with a component value of 2.
2579 * This in fact happened to a font... See:
2580 * https://bugzilla.gnome.org/show_bug.cgi?id=437633
2583 $currComp = 0;
2584 for ($i = 0; $i < count($GlyphPos); $i++) {
2585 if ($i > 0 && isset($this->assocLigs[$GlyphPos[$i]])) { // One of the other components is already a ligature
2586 $nc = $this->assocLigs[$GlyphPos[$i]];
2587 } else {
2588 $nc = 1;
2590 // While next char to right is a mark (but not the next matched glyph)
2591 // ?? + also include a Mark Ligature here
2592 $ic = 1;
2593 while ((($i == count($GlyphPos) - 1) || (isset($GlyphPos[$i + 1]) && ($GlyphPos[$i] + $ic) < $GlyphPos[$i + 1])) && isset($this->OTLdata[($GlyphPos[$i] + $ic)]) && strpos($this->GlyphClassMarks, $this->OTLdata[($GlyphPos[$i] + $ic)]['hex']) !== false) {
2594 $newComp = $currComp;
2595 if (isset($this->assocMarks[$GlyphPos[$i] + $ic])) { // One of the inbetween Marks is already associated with a Lig
2596 // OK as long as it is associated with the current Lig
2597 // if ($this->assocMarks[($GlyphPos[$i]+$ic)]['ligPos'] != ($GlyphPos[$i]+$ic)) { die("Problem #1"); }
2598 $newComp += $this->assocMarks[($GlyphPos[$i] + $ic)]['compID'];
2600 $this->assocMarks[($GlyphPos[$i] + $ic)] = array('compID' => $newComp, 'ligPos' => $pos);
2601 $ic++;
2603 $currComp += $nc;
2605 $lastPos = $GlyphPos[(count($GlyphPos) - 1)] + $ic - 1;
2606 $this->assocLigs[$pos] = $currComp; // Number of components in new Ligature
2609 // Now remove the unwanted glyphs and associated metadata
2610 $newOTLdata[0] = array();
2612 // Get types of new inserted chars - or replicate type of char being replaced
2613 // $bt = UCDN::get_bidi_class($substitute);
2614 // if (!$bt) {
2615 $bt = $this->OTLdata[$pos]['bidi_type'];
2616 // }
2618 if (strpos($this->GlyphClassMarks, $this->unicode_hex($substitute)) !== false) {
2619 $gp = 'M';
2620 } else if ($substitute == 32) {
2621 $gp = 'S';
2622 } else {
2623 $gp = 'C';
2626 // Need to update details of new glyphs inserted
2627 $newOTLdata[0]['general_category'] = $this->OTLdata[$pos]['general_category'];
2629 $newOTLdata[0]['bidi_type'] = $bt;
2630 $newOTLdata[0]['group'] = $gp;
2632 // KASHIDA: If forming a ligature when the last component was identified as a kashida point (final form)
2633 // If previous/first component of ligature is a medial form, then keep this as a kashida point
2634 // TEST (Arabic Typesetting) &#x64a;&#x64e;&#x646;&#x62a;&#x64f;&#x645;
2635 $ka = 0;
2636 if (isset($this->OTLdata[$GlyphPos[(count($GlyphPos) - 1)]]['GPOSinfo']['kashida'])) {
2637 $ka = $this->OTLdata[$GlyphPos[(count($GlyphPos) - 1)]]['GPOSinfo']['kashida'];
2639 if ($ka == 1 && isset($this->OTLdata[$pos]['form']) && $this->OTLdata[$pos]['form'] == 3) {
2640 $newOTLdata[0]['GPOSinfo']['kashida'] = $ka;
2643 $newOTLdata[0]['uni'] = $substitute;
2644 $newOTLdata[0]['hex'] = $this->unicode_hex($substitute);
2646 if ($this->shaper == 'I' || $this->shaper == 'K' || $this->shaper == 'S') {
2647 $newOTLdata[0]['indic_category'] = $this->OTLdata[$pos]['indic_category'];
2648 $newOTLdata[0]['indic_position'] = $this->OTLdata[$pos]['indic_position'];
2649 } else if ($this->shaper == 'M') {
2650 $newOTLdata[0]['myanmar_category'] = $this->OTLdata[$pos]['myanmar_category'];
2651 $newOTLdata[0]['myanmar_position'] = $this->OTLdata[$pos]['myanmar_position'];
2653 if (isset($this->OTLdata[$pos]['mask'])) {
2654 $newOTLdata[0]['mask'] = $this->OTLdata[$pos]['mask'];
2656 if (isset($this->OTLdata[$pos]['syllable'])) {
2657 $newOTLdata[0]['syllable'] = $this->OTLdata[$pos]['syllable'];
2660 $newOTLdata[0]['is_ligature'] = true;
2663 array_splice($this->OTLdata, $pos, 1, $newOTLdata);
2665 // GlyphPos contains array of arr_pos to set null - not necessarily contiguous
2666 // +- Remove any assocMarks or assocLigs from the main components (the ones that are deleted)
2667 for ($i = count($GlyphPos) - 1; $i > 0; $i--) {
2668 $gpos = $GlyphPos[$i];
2669 array_splice($this->OTLdata, $gpos, 1);
2670 unset($this->assocLigs[$gpos]);
2671 unset($this->assocMarks[$gpos]);
2673 // $this->assocLigs = array(); // Ligatures[$posarr lpos] => nc
2674 // $this->assocMarks = array(); // assocMarks[$posarr mpos] => array(compID, ligPos)
2675 // Update position of pre-existing Ligatures and associated Marks
2676 // Start after first GlyphPos
2677 // count($GlyphPos)-1 is the number of glyphs removed from string
2678 for ($p = ($GlyphPos[0] + 1); $p < (count($this->OTLdata) + count($GlyphPos) - 1); $p++) {
2679 $nrem = 0; // Number of Glyphs removed at this point in the string
2680 for ($i = 0; $i < count($GlyphPos); $i++) {
2681 if ($i > 0 && $p > $GlyphPos[$i]) {
2682 $nrem++;
2685 if (isset($this->assocLigs[$p])) {
2686 $tmp = $this->assocLigs[$p];
2687 unset($this->assocLigs[$p]);
2688 $this->assocLigs[($p - $nrem)] = $tmp;
2690 if (isset($this->assocMarks[$p])) {
2691 $tmp = $this->assocMarks[$p];
2692 unset($this->assocMarks[$p]);
2693 if ($tmp['ligPos'] > $GlyphPos[0]) {
2694 $tmp['ligPos'] -= $nrem;
2696 $this->assocMarks[($p - $nrem)] = $tmp;
2699 return 1;
2700 } else {
2701 return 0;
2705 ////////////////////////////////////////////////////////////////
2706 ////////////////////////////////////////////////////////////////
2707 ////////// ARABIC /////////////////////////////////
2708 ////////////////////////////////////////////////////////////////
2709 ////////////////////////////////////////////////////////////////
2711 function arabic_initialise()
2713 // cf. http://unicode.org/Public/UNIDATA/ArabicShaping.txt
2714 // http://unicode.org/Public/UNIDATA/extracted/DerivedJoiningType.txt
2715 // JOIN TO FOLLOWING LETTER IN LOGICAL ORDER (i.e. AS INITIAL/MEDIAL FORM) = Unicode Left-Joining (+ Dual-Joining + Join_Causing 00640)
2716 $this->arabLeftJoining = array(
2717 0x0620 => 1, 0x0626 => 1, 0x0628 => 1, 0x062A => 1, 0x062B => 1, 0x062C => 1, 0x062D => 1, 0x062E => 1,
2718 0x0633 => 1, 0x0634 => 1, 0x0635 => 1, 0x0636 => 1, 0x0637 => 1, 0x0638 => 1, 0x0639 => 1, 0x063A => 1,
2719 0x063B => 1, 0x063C => 1, 0x063D => 1, 0x063E => 1, 0x063F => 1, 0x0640 => 1, 0x0641 => 1, 0x0642 => 1,
2720 0x0643 => 1, 0x0644 => 1, 0x0645 => 1, 0x0646 => 1, 0x0647 => 1, 0x0649 => 1, 0x064A => 1, 0x066E => 1,
2721 0x066F => 1, 0x0678 => 1, 0x0679 => 1, 0x067A => 1, 0x067B => 1, 0x067C => 1, 0x067D => 1, 0x067E => 1,
2722 0x067F => 1, 0x0680 => 1, 0x0681 => 1, 0x0682 => 1, 0x0683 => 1, 0x0684 => 1, 0x0685 => 1, 0x0686 => 1,
2723 0x0687 => 1, 0x069A => 1, 0x069B => 1, 0x069C => 1, 0x069D => 1, 0x069E => 1, 0x069F => 1, 0x06A0 => 1,
2724 0x06A1 => 1, 0x06A2 => 1, 0x06A3 => 1, 0x06A4 => 1, 0x06A5 => 1, 0x06A6 => 1, 0x06A7 => 1, 0x06A8 => 1,
2725 0x06A9 => 1, 0x06AA => 1, 0x06AB => 1, 0x06AC => 1, 0x06AD => 1, 0x06AE => 1, 0x06AF => 1, 0x06B0 => 1,
2726 0x06B1 => 1, 0x06B2 => 1, 0x06B3 => 1, 0x06B4 => 1, 0x06B5 => 1, 0x06B6 => 1, 0x06B7 => 1, 0x06B8 => 1,
2727 0x06B9 => 1, 0x06BA => 1, 0x06BB => 1, 0x06BC => 1, 0x06BD => 1, 0x06BE => 1, 0x06BF => 1, 0x06C1 => 1,
2728 0x06C2 => 1, 0x06CC => 1, 0x06CE => 1, 0x06D0 => 1, 0x06D1 => 1, 0x06FA => 1, 0x06FB => 1, 0x06FC => 1,
2729 0x06FF => 1,
2730 /* Arabic Supplement */
2731 0x0750 => 1, 0x0751 => 1, 0x0752 => 1, 0x0753 => 1, 0x0754 => 1, 0x0755 => 1, 0x0756 => 1, 0x0757 => 1,
2732 0x0758 => 1, 0x075C => 1, 0x075D => 1, 0x075E => 1, 0x075F => 1, 0x0760 => 1, 0x0761 => 1, 0x0762 => 1,
2733 0x0763 => 1, 0x0764 => 1, 0x0765 => 1, 0x0766 => 1, 0x0767 => 1, 0x0768 => 1, 0x0769 => 1, 0x076A => 1,
2734 0x076D => 1, 0x076E => 1, 0x076F => 1, 0x0770 => 1, 0x0772 => 1, 0x0775 => 1, 0x0776 => 1, 0x0777 => 1,
2735 0x077A => 1, 0x077B => 1, 0x077C => 1, 0x077D => 1, 0x077E => 1, 0x077F => 1,
2736 /* Extended Arabic */
2737 0x08A0 => 1, 0x08A2 => 1, 0x08A3 => 1, 0x08A4 => 1, 0x08A5 => 1, 0x08A6 => 1, 0x08A7 => 1, 0x08A8 => 1,
2738 0x08A9 => 1,
2739 /* 'syrc' Syriac */
2740 0x0712 => 1, 0x0713 => 1, 0x0714 => 1, 0x071A => 1, 0x071B => 1, 0x071C => 1, 0x071D => 1, 0x071F => 1,
2741 0x0720 => 1, 0x0721 => 1, 0x0722 => 1, 0x0723 => 1, 0x0724 => 1, 0x0725 => 1, 0x0726 => 1, 0x0727 => 1,
2742 0x0729 => 1, 0x072B => 1, 0x072D => 1, 0x072E => 1, 0x074E => 1, 0x074F => 1,
2743 /* N'Ko */
2744 0x07CA => 1, 0x07CB => 1, 0x07CC => 1, 0x07CD => 1, 0x07CE => 1, 0x07CF => 1, 0x07D0 => 1, 0x07D1 => 1,
2745 0x07D2 => 1, 0x07D3 => 1, 0x07D4 => 1, 0x07D5 => 1, 0x07D6 => 1, 0x07D7 => 1, 0x07D8 => 1, 0x07D9 => 1,
2746 0x07DA => 1, 0x07DB => 1, 0x07DC => 1, 0x07DD => 1, 0x07DE => 1, 0x07DF => 1, 0x07E0 => 1, 0x07E1 => 1,
2747 0x07E2 => 1, 0x07E3 => 1, 0x07E4 => 1, 0x07E5 => 1, 0x07E6 => 1, 0x07E7 => 1, 0x07E8 => 1, 0x07E9 => 1,
2748 0x07EA => 1, 0x07FA => 1,
2749 /* Mandaic */
2750 0x0841 => 1, 0x0842 => 1, 0x0843 => 1, 0x0844 => 1, 0x0845 => 1, 0x0847 => 1, 0x0848 => 1, 0x084A => 1,
2751 0x084B => 1, 0x084C => 1, 0x084D => 1, 0x084E => 1, 0x0850 => 1, 0x0851 => 1, 0x0852 => 1, 0x0853 => 1,
2752 0x0855 => 1,
2753 /* ZWJ U+200D */
2754 0x0200D => 1);
2756 /* JOIN TO PREVIOUS LETTER IN LOGICAL ORDER (i.e. AS FINAL/MEDIAL FORM) = Unicode Right-Joining (+ Dual-Joining + Join_Causing) */
2757 $this->arabRightJoining = array(
2758 0x0620 => 1, 0x0622 => 1, 0x0623 => 1, 0x0624 => 1, 0x0625 => 1, 0x0626 => 1, 0x0627 => 1, 0x0628 => 1,
2759 0x0629 => 1, 0x062A => 1, 0x062B => 1, 0x062C => 1, 0x062D => 1, 0x062E => 1, 0x062F => 1, 0x0630 => 1,
2760 0x0631 => 1, 0x0632 => 1, 0x0633 => 1, 0x0634 => 1, 0x0635 => 1, 0x0636 => 1, 0x0637 => 1, 0x0638 => 1,
2761 0x0639 => 1, 0x063A => 1, 0x063B => 1, 0x063C => 1, 0x063D => 1, 0x063E => 1, 0x063F => 1, 0x0640 => 1,
2762 0x0641 => 1, 0x0642 => 1, 0x0643 => 1, 0x0644 => 1, 0x0645 => 1, 0x0646 => 1, 0x0647 => 1, 0x0648 => 1,
2763 0x0649 => 1, 0x064A => 1, 0x066E => 1, 0x066F => 1, 0x0671 => 1, 0x0672 => 1, 0x0673 => 1, 0x0675 => 1,
2764 0x0676 => 1, 0x0677 => 1, 0x0678 => 1, 0x0679 => 1, 0x067A => 1, 0x067B => 1, 0x067C => 1, 0x067D => 1,
2765 0x067E => 1, 0x067F => 1, 0x0680 => 1, 0x0681 => 1, 0x0682 => 1, 0x0683 => 1, 0x0684 => 1, 0x0685 => 1,
2766 0x0686 => 1, 0x0687 => 1, 0x0688 => 1, 0x0689 => 1, 0x068A => 1, 0x068B => 1, 0x068C => 1, 0x068D => 1,
2767 0x068E => 1, 0x068F => 1, 0x0690 => 1, 0x0691 => 1, 0x0692 => 1, 0x0693 => 1, 0x0694 => 1, 0x0695 => 1,
2768 0x0696 => 1, 0x0697 => 1, 0x0698 => 1, 0x0699 => 1, 0x069A => 1, 0x069B => 1, 0x069C => 1, 0x069D => 1,
2769 0x069E => 1, 0x069F => 1, 0x06A0 => 1, 0x06A1 => 1, 0x06A2 => 1, 0x06A3 => 1, 0x06A4 => 1, 0x06A5 => 1,
2770 0x06A6 => 1, 0x06A7 => 1, 0x06A8 => 1, 0x06A9 => 1, 0x06AA => 1, 0x06AB => 1, 0x06AC => 1, 0x06AD => 1,
2771 0x06AE => 1, 0x06AF => 1, 0x06B0 => 1, 0x06B1 => 1, 0x06B2 => 1, 0x06B3 => 1, 0x06B4 => 1, 0x06B5 => 1,
2772 0x06B6 => 1, 0x06B7 => 1, 0x06B8 => 1, 0x06B9 => 1, 0x06BA => 1, 0x06BB => 1, 0x06BC => 1, 0x06BD => 1,
2773 0x06BE => 1, 0x06BF => 1, 0x06C0 => 1, 0x06C1 => 1, 0x06C2 => 1, 0x06C3 => 1, 0x06C4 => 1, 0x06C5 => 1,
2774 0x06C6 => 1, 0x06C7 => 1, 0x06C8 => 1, 0x06C9 => 1, 0x06CA => 1, 0x06CB => 1, 0x06CC => 1, 0x06CD => 1,
2775 0x06CE => 1, 0x06CF => 1, 0x06D0 => 1, 0x06D1 => 1, 0x06D2 => 1, 0x06D3 => 1, 0x06D5 => 1, 0x06EE => 1,
2776 0x06EF => 1, 0x06FA => 1, 0x06FB => 1, 0x06FC => 1, 0x06FF => 1,
2777 /* Arabic Supplement */
2778 0x0750 => 1, 0x0751 => 1, 0x0752 => 1, 0x0753 => 1, 0x0754 => 1, 0x0755 => 1, 0x0756 => 1, 0x0757 => 1,
2779 0x0758 => 1, 0x0759 => 1, 0x075A => 1, 0x075B => 1, 0x075C => 1, 0x075D => 1, 0x075E => 1, 0x075F => 1,
2780 0x0760 => 1, 0x0761 => 1, 0x0762 => 1, 0x0763 => 1, 0x0764 => 1, 0x0765 => 1, 0x0766 => 1, 0x0767 => 1,
2781 0x0768 => 1, 0x0769 => 1, 0x076A => 1, 0x076B => 1, 0x076C => 1, 0x076D => 1, 0x076E => 1, 0x076F => 1,
2782 0x0770 => 1, 0x0771 => 1, 0x0772 => 1, 0x0773 => 1, 0x0774 => 1, 0x0775 => 1, 0x0776 => 1, 0x0777 => 1,
2783 0x0778 => 1, 0x0779 => 1, 0x077A => 1, 0x077B => 1, 0x077C => 1, 0x077D => 1, 0x077E => 1, 0x077F => 1,
2784 /* Extended Arabic */
2785 0x08A0 => 1, 0x08A2 => 1, 0x08A3 => 1, 0x08A4 => 1, 0x08A5 => 1, 0x08A6 => 1, 0x08A7 => 1, 0x08A8 => 1,
2786 0x08A9 => 1, 0x08AA => 1, 0x08AB => 1, 0x08AC => 1,
2787 /* 'syrc' Syriac */
2788 0x0710 => 1, 0x0712 => 1, 0x0713 => 1, 0x0714 => 1, 0x0715 => 1, 0x0716 => 1, 0x0717 => 1, 0x0718 => 1,
2789 0x0719 => 1, 0x071A => 1, 0x071B => 1, 0x071C => 1, 0x071D => 1, 0x071E => 1, 0x071F => 1, 0x0720 => 1,
2790 0x0721 => 1, 0x0722 => 1, 0x0723 => 1, 0x0724 => 1, 0x0725 => 1, 0x0726 => 1, 0x0727 => 1, 0x0728 => 1,
2791 0x0729 => 1, 0x072A => 1, 0x072B => 1, 0x072C => 1, 0x072D => 1, 0x072E => 1, 0x072F => 1, 0x074D => 1,
2792 0x074E => 1, 0x074F,
2793 /* N'Ko */
2794 0x07CA => 1, 0x07CB => 1, 0x07CC => 1, 0x07CD => 1, 0x07CE => 1, 0x07CF => 1, 0x07D0 => 1, 0x07D1 => 1,
2795 0x07D2 => 1, 0x07D3 => 1, 0x07D4 => 1, 0x07D5 => 1, 0x07D6 => 1, 0x07D7 => 1, 0x07D8 => 1, 0x07D9 => 1,
2796 0x07DA => 1, 0x07DB => 1, 0x07DC => 1, 0x07DD => 1, 0x07DE => 1, 0x07DF => 1, 0x07E0 => 1, 0x07E1 => 1,
2797 0x07E2 => 1, 0x07E3 => 1, 0x07E4 => 1, 0x07E5 => 1, 0x07E6 => 1, 0x07E7 => 1, 0x07E8 => 1, 0x07E9 => 1,
2798 0x07EA => 1, 0x07FA => 1,
2799 /* Mandaic */
2800 0x0841 => 1, 0x0842 => 1, 0x0843 => 1, 0x0844 => 1, 0x0845 => 1, 0x0847 => 1, 0x0848 => 1, 0x084A => 1,
2801 0x084B => 1, 0x084C => 1, 0x084D => 1, 0x084E => 1, 0x0850 => 1, 0x0851 => 1, 0x0852 => 1, 0x0853 => 1,
2802 0x0855 => 1,
2803 0x0840 => 1, 0x0846 => 1, 0x0849 => 1, 0x084F => 1, 0x0854 => 1, /* Right joining */
2804 /* ZWJ U+200D */
2805 0x0200D => 1);
2808 /* VOWELS = TRANSPARENT-JOINING = Unicode Transparent-Joining type (not just vowels) */
2809 $this->arabTransparent = array(
2810 0x0610 => 1, 0x0611 => 1, 0x0612 => 1, 0x0613 => 1, 0x0614 => 1, 0x0615 => 1, 0x0616 => 1, 0x0617 => 1,
2811 0x0618 => 1, 0x0619 => 1, 0x061A => 1, 0x064B => 1, 0x064C => 1, 0x064D => 1, 0x064E => 1, 0x064F => 1,
2812 0x0650 => 1, 0x0651 => 1, 0x0652 => 1, 0x0653 => 1, 0x0654 => 1, 0x0655 => 1, 0x0656 => 1, 0x0657 => 1,
2813 0x0658 => 1, 0x0659 => 1, 0x065A => 1, 0x065B => 1, 0x065C => 1, 0x065D => 1, 0x065E => 1, 0x065F => 1,
2814 0x0670 => 1, 0x06D6 => 1, 0x06D7 => 1, 0x06D8 => 1, 0x06D9 => 1, 0x06DA => 1, 0x06DB => 1, 0x06DC => 1,
2815 0x06DF => 1, 0x06E0 => 1, 0x06E1 => 1, 0x06E2 => 1, 0x06E3 => 1, 0x06E4 => 1, 0x06E7 => 1, 0x06E8 => 1,
2816 0x06EA => 1, 0x06EB => 1, 0x06EC => 1, 0x06ED => 1,
2817 /* Extended Arabic */
2818 0x08E4 => 1, 0x08E5 => 1, 0x08E6 => 1, 0x08E7 => 1, 0x08E8 => 1, 0x08E9 => 1, 0x08EA => 1, 0x08EB => 1,
2819 0x08EC => 1, 0x08ED => 1, 0x08EE => 1, 0x08EF => 1, 0x08F0 => 1, 0x08F1 => 1, 0x08F2 => 1, 0x08F3 => 1,
2820 0x08F4 => 1, 0x08F5 => 1, 0x08F6 => 1, 0x08F7 => 1, 0x08F8 => 1, 0x08F9 => 1, 0x08FA => 1, 0x08FB => 1,
2821 0x08FC => 1, 0x08FD => 1, 0x08FE => 1,
2822 /* Arabic ligatures in presentation form (converted in 'ccmp' in e.g. Arial and Times ? need to add others in this range) */
2823 0xFC5E => 1, 0xFC5F => 1, 0xFC60 => 1, 0xFC61 => 1, 0xFC62 => 1,
2824 /* 'syrc' Syriac */
2825 0x070F => 1, 0x0711 => 1, 0x0730 => 1, 0x0731 => 1, 0x0732 => 1, 0x0733 => 1, 0x0734 => 1, 0x0735 => 1,
2826 0x0736 => 1, 0x0737 => 1, 0x0738 => 1, 0x0739 => 1, 0x073A => 1, 0x073B => 1, 0x073C => 1, 0x073D => 1,
2827 0x073E => 1, 0x073F => 1, 0x0740 => 1, 0x0741 => 1, 0x0742 => 1, 0x0743 => 1, 0x0744 => 1, 0x0745 => 1,
2828 0x0746 => 1, 0x0747 => 1, 0x0748 => 1, 0x0749 => 1, 0x074A => 1,
2829 /* N'Ko */
2830 0x07EB => 1, 0x07EC => 1, 0x07ED => 1, 0x07EE => 1, 0x07EF => 1, 0x07F0 => 1, 0x07F1 => 1, 0x07F2 => 1,
2831 0x07F3 => 1,
2832 /* Mandaic */
2833 0x0859 => 1, 0x085A => 1, 0x085B => 1,
2837 function arabic_shaper($usetags, $scriptTag)
2839 $chars = array();
2840 for ($i = 0; $i < count($this->OTLdata); $i++) {
2841 $chars[] = $this->OTLdata[$i]['hex'];
2843 $crntChar = null;
2844 $prevChar = null;
2845 $nextChar = null;
2846 $output = array();
2847 $max = count($chars);
2848 for ($i = $max - 1; $i >= 0; $i--) {
2849 $crntChar = $chars[$i];
2850 if ($i > 0) {
2851 $prevChar = hexdec($chars[$i - 1]);
2852 } else {
2853 $prevChar = NULL;
2855 if ($prevChar && isset($this->arabTransparentJoin[$prevChar]) && isset($chars[$i - 2])) {
2856 $prevChar = hexdec($chars[$i - 2]);
2857 if ($prevChar && isset($this->arabTransparentJoin[$prevChar]) && isset($chars[$i - 3])) {
2858 $prevChar = hexdec($chars[$i - 3]);
2859 if ($prevChar && isset($this->arabTransparentJoin[$prevChar]) && isset($chars[$i - 4])) {
2860 $prevChar = hexdec($chars[$i - 4]);
2864 if ($crntChar && isset($this->arabTransparentJoin[hexdec($crntChar)])) {
2865 // If next_char = RightJoining && prev_char = LeftJoining:
2866 if (isset($chars[$i + 1]) && $chars[$i + 1] && isset($this->arabRightJoining[hexdec($chars[$i + 1])]) && $prevChar && isset($this->arabLeftJoining[$prevChar])) {
2867 $output[] = $this->get_arab_glyphs($crntChar, 1, $chars, $i, $scriptTag, $usetags); // <final> form
2868 } else {
2869 $output[] = $this->get_arab_glyphs($crntChar, 0, $chars, $i, $scriptTag, $usetags); // <isolated> form
2871 continue;
2873 if (hexdec($crntChar) < 128) {
2874 $output[] = array($crntChar, 0);
2875 $nextChar = $crntChar;
2876 continue;
2878 // 0=ISOLATED FORM :: 1=FINAL :: 2=INITIAL :: 3=MEDIAL
2879 $form = 0;
2880 if ($prevChar && isset($this->arabLeftJoining[$prevChar])) {
2881 $form++;
2883 if ($nextChar && isset($this->arabRightJoining[hexdec($nextChar)])) {
2884 $form += 2;
2886 $output[] = $this->get_arab_glyphs($crntChar, $form, $chars, $i, $scriptTag, $usetags);
2887 $nextChar = $crntChar;
2889 $ra = array_reverse($output);
2890 for ($i = 0; $i < count($this->OTLdata); $i++) {
2891 $this->OTLdata[$i]['uni'] = hexdec($ra[$i][0]);
2892 $this->OTLdata[$i]['hex'] = $ra[$i][0];
2893 $this->OTLdata[$i]['form'] = $ra[$i][1]; // Actaul form substituted 0=ISOLATED FORM :: 1=FINAL :: 2=INITIAL :: 3=MEDIAL
2897 function get_arab_glyphs($char, $type, &$chars, $i, $scriptTag, $usetags)
2900 // Optional Feature settings // doesn't control Syriac at present
2901 if (($type === 0 && strpos($usetags, 'isol') === false) || ($type === 1 && strpos($usetags, 'fina') === false) || ($type === 2 && strpos($usetags, 'init') === false) || ($type === 3 && strpos($usetags, 'medi') === false)) {
2902 return array($char, 0);
2905 // 0=ISOLATED FORM :: 1=FINAL :: 2=INITIAL :: 3=MEDIAL (:: 4=MED2 :: 5=FIN2 :: 6=FIN3)
2906 $retk = -1;
2907 // Alaph 00710 in Syriac
2908 if ($scriptTag == 'syrc' && $char == '00710') {
2909 // if there is a preceding (base?) character *** should search back to previous base - ignoring vowels and change $n
2910 // set $n as the position of the last base; for now we'll just do this:
2911 $n = $i - 1;
2912 // if the preceding (base) character cannot be joined to
2913 // not in $this->arabLeftJoining i.e. not a char which can join to the next one
2914 if (isset($chars[$n]) && isset($this->arabLeftJoining[hexdec($chars[$n])])) {
2915 // if in the middle of Syriac words
2916 if (isset($chars[$i + 1]) && preg_match('/[\x{0700}-\x{0745}]/u', code2utf(hexdec($chars[$n]))) && preg_match('/[\x{0700}-\x{0745}]/u', code2utf(hexdec($chars[$i + 1]))) && isset($this->arabGlyphs[$char][4])) {
2917 $retk = 4;
2919 // if at the end of Syriac words
2920 else if (!isset($chars[$i + 1]) || !preg_match('/[\x{0700}-\x{0745}]/u', code2utf(hexdec($chars[$i + 1])))) {
2921 // if preceding base character IS (00715|00716|0072A)
2922 if (strpos('0715|0716|072A', $chars[$n]) !== false && isset($this->arabGlyphs[$char][6])) {
2923 $retk = 6;
2926 // else if preceding base character is NOT (00715|00716|0072A)
2927 else if (isset($this->arabGlyphs[$char][5])) {
2928 $retk = 5;
2932 if ($retk != -1) {
2933 return array($this->arabGlyphs[$char][$retk], $retk);
2934 } else {
2935 return array($char, 0);
2939 if (($type > 0 || $type === 0) && isset($this->arabGlyphs[$char][$type])) {
2940 $retk = $type;
2941 } else if ($type == 3 && isset($this->arabGlyphs[$char][1])) { // if <medial> not defined, but <final>, return <final>
2942 $retk = 1;
2943 } else if ($type == 2 && isset($this->arabGlyphs[$char][0])) { // if <initial> not defined, but <isolated>, return <isolated>
2944 $retk = 0;
2946 if ($retk != -1) {
2947 $match = true;
2948 // If GSUB includes a Backtrack or Lookahead condition (e.g. font ArabicTypesetting)
2949 if (isset($this->arabGlyphs[$char]['prel'][$retk]) && $this->arabGlyphs[$char]['prel'][$retk]) {
2950 $ig = 1;
2951 foreach ($this->arabGlyphs[$char]['prel'][$retk] AS $k => $v) { // $k starts 0, 1...
2952 if (!isset($chars[$i - $ig - $k])) {
2953 $match = false;
2954 } else if (strpos($v, $chars[$i - $ig - $k]) === false) {
2955 while (strpos($this->arabGlyphs[$char]['ignore'][$retk], $chars[$i - $ig - $k]) !== false) { // ignore
2956 $ig++;
2958 if (!isset($chars[$i - $ig - $k])) {
2959 $match = false;
2960 } else if (strpos($v, $chars[$i - $ig - $k]) === false) {
2961 $match = false;
2966 if (isset($this->arabGlyphs[$char]['postl'][$retk]) && $this->arabGlyphs[$char]['postl'][$retk]) {
2967 $ig = 1;
2968 foreach ($this->arabGlyphs[$char]['postl'][$retk] AS $k => $v) { // $k starts 0, 1...
2969 if (!isset($chars[$i + $ig + $k])) {
2970 $match = false;
2971 } else if (strpos($v, $chars[$i + $ig + $k]) === false) {
2972 while (strpos($this->arabGlyphs[$char]['ignore'][$retk], $chars[$i + $ig + $k]) !== false) { // ignore
2973 $ig++;
2975 if (!isset($chars[$i + $ig + $k])) {
2976 $match = false;
2977 } else if (strpos($v, $chars[$i + $ig + $k]) === false) {
2978 $match = false;
2983 if ($match) {
2984 return array($this->arabGlyphs[$char][$retk], $retk);
2985 } else {
2986 return array($char, 0);
2988 } else {
2989 return array($char, 0);
2993 ////////////////////////////////////////////////////////////////
2994 ////////////////////////////////////////////////////////////////
2995 ///////////////// LINE BREAKING ///////////////////////
2996 ////////////////////////////////////////////////////////////////
2997 ////////////////////////////////////////////////////////////////
2998 ////////////////////////////////////////////////////////////////
2999 ///////////// TIBETAN LINE BREAKING ///////////////////
3000 ////////////////////////////////////////////////////////////////
3001 // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries
3002 function TibetanlineBreaking()
3004 for ($ptr = 0; $ptr < count($this->OTLdata); $ptr++) {
3005 // Break opportunities at U+0F0B Tsheg or U=0F0D
3006 if (isset($this->OTLdata[$ptr]['uni']) && ($this->OTLdata[$ptr]['uni'] == 0x0F0B || $this->OTLdata[$ptr]['uni'] == 0x0F0D)) {
3007 if (isset($this->OTLdata[$ptr + 1]['uni']) && ($this->OTLdata[$ptr + 1]['uni'] == 0x0F0D || $this->OTLdata[$ptr + 1]['uni'] == 0xF0E)) {
3008 continue;
3010 // Set end of word marker in OTLdata at matchpos
3011 $this->OTLdata[$ptr]['wordend'] = true;
3016 ////////////////////////////////////////////////////////////////
3017 ////////// SOUTH EAST ASIAN LINE BREAKING /////////////
3018 ////////////////////////////////////////////////////////////////
3019 // South East Asian Linebreaking (Thai, Khmer and Lao) using dictionary of words
3020 // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries
3021 function SEAlineBreaking()
3023 // Load Line-breaking dictionary
3024 if (!isset($this->lbdicts[$this->shaper]) && file_exists(_MPDF_PATH . 'includes/linebrdict' . $this->shaper . '.dat')) {
3025 $this->lbdicts[$this->shaper] = file_get_contents(_MPDF_PATH . 'includes/linebrdict' . $this->shaper . '.dat');
3028 $dict = &$this->lbdicts[$this->shaper];
3030 // Find all word boundaries and mark end of word $this->OTLdata[$i]['wordend']=true on last character
3031 // If Thai, allow for possible suffixes (not in Lao or Khmer)
3032 // repeater/ellision characters
3033 // (0x0E2F); // Ellision character THAI_PAIYANNOI 0x0E2F UTF-8 0xE0 0xB8 0xAF
3034 // (0x0E46); // Repeat character THAI_MAIYAMOK 0x0E46 UTF-8 0xE0 0xB9 0x86
3035 // (0x0EC6); // Repeat character LAO UTF-8 0xE0 0xBB 0x86
3037 $rollover = array();
3038 $ptr = 0;
3039 while ($ptr < count($this->OTLdata) - 3) {
3040 if (count($rollover)) {
3041 $matches = $rollover;
3042 $rollover = array();
3043 } else {
3044 $matches = $this->checkwordmatch($dict, $ptr);
3046 if (count($matches) == 1) {
3047 $matchpos = $matches[0];
3048 // Check for repeaters - if so $matchpos++
3049 if (isset($this->OTLdata[$matchpos + 1]['uni']) && ($this->OTLdata[$matchpos + 1]['uni'] == 0x0E2F || $this->OTLdata[$matchpos + 1]['uni'] == 0x0E46 || $this->OTLdata[$matchpos + 1]['uni'] == 0x0EC6)) {
3050 $matchpos++;
3052 // Set end of word marker in OTLdata at matchpos
3053 $this->OTLdata[$matchpos]['wordend'] = true;
3054 $ptr = $matchpos + 1;
3055 } else if (empty($matches)) {
3056 $ptr++;
3057 // Move past any ASCII characters
3058 while (isset($this->OTLdata[$ptr]['uni']) && ($this->OTLdata[$ptr]['uni'] >> 8) == 0) {
3059 $ptr++;
3061 } else { // Multiple matches
3062 $secondmatch = false;
3063 for ($m = count($matches) - 1; $m >= 0; $m--) {
3064 //for ($m=0;$m<count($matches);$m++) {
3065 $firstmatch = $matches[$m];
3066 $matches2 = $this->checkwordmatch($dict, $firstmatch + 1);
3067 if (count($matches2)) {
3068 // Set end of word marker in OTLdata at matchpos
3069 $this->OTLdata[$firstmatch]['wordend'] = true;
3070 $ptr = $firstmatch + 1;
3071 $rollover = $matches2;
3072 $secondmatch = true;
3073 break;
3076 if (!$secondmatch) {
3077 // Set end of word marker in OTLdata at end of longest first match
3078 $this->OTLdata[$matches[count($matches) - 1]]['wordend'] = true;
3079 $ptr = $matches[count($matches) - 1] + 1;
3080 // Move past any ASCII characters
3081 while (isset($this->OTLdata[$ptr]['uni']) && ($this->OTLdata[$ptr]['uni'] >> 8) == 0) {
3082 $ptr++;
3089 function checkwordmatch(&$dict, $ptr)
3092 define("_DICT_NODE_TYPE_SPLIT", 0x01);
3093 define("_DICT_NODE_TYPE_LINEAR", 0x02);
3094 define("_DICT_INTERMEDIATE_MATCH", 0x03);
3095 define("_DICT_FINAL_MATCH", 0x04);
3097 Node type: Split.
3098 Divide at < 98 >= 98
3099 Offset for >= 98 == 79 (long 4-byte unsigned)
3101 Node type: Linear match.
3102 Char = 97
3104 Intermediate match
3106 Final match
3109 $dictptr = 0;
3110 $ok = true;
3111 $matches = array();
3112 while ($ok) {
3113 $x = ord($dict{$dictptr});
3114 $c = $this->OTLdata[$ptr]['uni'] & 0xFF;
3115 if ($x == _DICT_INTERMEDIATE_MATCH) {
3116 //echo "DICT_INTERMEDIATE_MATCH: ".dechex($c).'<br />';
3117 // Do not match if next character in text is a Mark
3118 if (isset($this->OTLdata[$ptr]['uni']) && strpos($this->GlyphClassMarks, $this->OTLdata[$ptr]['hex']) === false) {
3119 $matches[] = $ptr - 1;
3121 $dictptr++;
3122 } else if ($x == _DICT_FINAL_MATCH) {
3123 //echo "DICT_FINAL_MATCH: ".dechex($c).'<br />';
3124 // Do not match if next character in text is a Mark
3125 if (isset($this->OTLdata[$ptr]['uni']) && strpos($this->GlyphClassMarks, $this->OTLdata[$ptr]['hex']) === false) {
3126 $matches[] = $ptr - 1;
3128 return $matches;
3129 } else if ($x == _DICT_NODE_TYPE_LINEAR) {
3130 //echo "DICT_NODE_TYPE_LINEAR: ".dechex($c).'<br />';
3131 $dictptr++;
3132 $m = ord($dict{$dictptr});
3133 if ($c == $m) {
3134 $ptr++;
3135 if ($ptr > count($this->OTLdata) - 1) {
3136 $next = ord($dict{$dictptr + 1});
3137 if ($next == _DICT_INTERMEDIATE_MATCH || $next == _DICT_FINAL_MATCH) {
3138 // Do not match if next character in text is a Mark
3139 if (isset($this->OTLdata[$ptr]['uni']) && strpos($this->GlyphClassMarks, $this->OTLdata[$ptr]['hex']) === false) {
3140 $matches[] = $ptr - 1;
3143 return $matches;
3145 $dictptr++;
3146 continue;
3147 } else {
3148 //echo "DICT_NODE_TYPE_LINEAR NOT: ".dechex($c).'<br />';
3149 return $matches;
3151 } else if ($x == _DICT_NODE_TYPE_SPLIT) {
3152 //echo "DICT_NODE_TYPE_SPLIT ON ".dechex($d).": ".dechex($c).'<br />';
3153 $dictptr++;
3154 $d = ord($dict{$dictptr});
3155 if ($c < $d) {
3156 $dictptr += 5;
3157 } else {
3158 $dictptr++;
3159 // Unsigned long 32-bit offset
3160 $offset = (ord($dict{$dictptr}) * 16777216) + (ord($dict{$dictptr + 1}) << 16) + (ord($dict{$dictptr + 2}) << 8) + ord($dict{$dictptr + 3});
3161 $dictptr = $offset;
3163 } else {
3164 //echo "PROBLEM: ".($x).'<br />';
3165 $ok = false; // Something has gone wrong
3169 return $matches;
3172 ////////////////////////////////////////////////////////////////
3173 ////////////////////////////////////////////////////////////////
3174 ////////// GPOS ///////////////////////////////////////
3175 ////////////////////////////////////////////////////////////////
3176 ////////////////////////////////////////////////////////////////
3178 function _applyGPOSrules($LookupList, $is_old_spec = false)
3180 foreach ($LookupList AS $lu => $tag) {
3181 $Type = $this->GPOSLookups[$lu]['Type'];
3182 $Flag = $this->GPOSLookups[$lu]['Flag'];
3183 $MarkFilteringSet = '';
3184 if (isset($this->GPOSLookups[$lu]['MarkFilteringSet']))
3185 $MarkFilteringSet = $this->GPOSLookups[$lu]['MarkFilteringSet'];
3186 $ptr = 0;
3187 // Test each glyph sequentially
3188 while ($ptr < (count($this->OTLdata))) { // whilst there is another glyph ..0064
3189 $currGlyph = $this->OTLdata[$ptr]['hex'];
3190 $currGID = $this->OTLdata[$ptr]['uni'];
3191 $shift = 1;
3192 foreach ($this->GPOSLookups[$lu]['Subtables'] AS $c => $subtable_offset) {
3193 // NB Coverage only looks at glyphs for position 1 (esp. 7.3 and 8.3)
3194 if (isset($this->LuCoverage[$lu][$c][$currGID])) {
3195 // Get rules from font GPOS subtable
3196 if (isset($this->OTLdata[$ptr]['bidi_type'])) { // No need to check bidi_type - just a check that it exists
3197 $shift = $this->_applyGPOSsubtable($lu, $c, $ptr, $currGlyph, $currGID, ($subtable_offset - $this->GPOS_offset + $this->GSUB_length), $Type, $Flag, $MarkFilteringSet, $this->LuCoverage[$lu][$c], $tag, 0, $is_old_spec);
3198 if ($shift) {
3199 break;
3204 if ($shift == 0) {
3205 $shift = 1;
3207 $ptr += $shift;
3212 //////////////////////////////////////////////////////////////////////////////////
3213 // GPOS Types
3214 // Lookup Type 1: Single Adjustment Positioning Subtable Adjust position of a single glyph
3215 // Lookup Type 2: Pair Adjustment Positioning Subtable Adjust position of a pair of glyphs
3216 // Lookup Type 3: Cursive Attachment Positioning Subtable Attach cursive glyphs
3217 // Lookup Type 4: MarkToBase Attachment Positioning Subtable Attach a combining mark to a base glyph
3218 // Lookup Type 5: MarkToLigature Attachment Positioning Subtable Attach a combining mark to a ligature
3219 // Lookup Type 6: MarkToMark Attachment Positioning Subtable Attach a combining mark to another mark
3220 // Lookup Type 7: Contextual Positioning Subtables Position one or more glyphs in context
3221 // Lookup Type 8: Chaining Contextual Positioning Subtable Position one or more glyphs in chained context
3222 // Lookup Type 9: Extension positioning
3223 //////////////////////////////////////////////////////////////////////////////////
3224 function _applyGPOSvaluerecord($basepos, $Value)
3227 // If current glyph is a mark with a defined width, any XAdvance is considered to REPLACE the character Advance Width
3228 // Test case <div style="font-family:myanmartext">&#x1004;&#x103a;&#x1039;&#x1000;&#x1039;&#x1000;&#x103b;&#x103c;&#x103d;&#x1031;&#x102d;</div>
3229 if (strpos($this->GlyphClassMarks, $this->OTLdata[$basepos]['hex']) !== false) {
3230 $cw = round($this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$basepos]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000); // convert back to font design units
3231 } else {
3232 $cw = 0;
3235 $apos = $this->_getXAdvancePos($basepos);
3237 if (isset($Value['XAdvance']) && ($Value['XAdvance'] - $cw) != 0) {
3238 // However DON'T REPLACE the character Advance Width if Advance Width is negative
3239 // Test case <div style="font-family: dejavusansmono">&#x440;&#x443;&#x301;&#x441;&#x441;&#x43a;&#x438;&#x439;</div>
3240 if ($Value['XAdvance'] < 0) {
3241 $cw = 0;
3244 // For LTR apply XAdvanceL to the last mark following the base = at $apos
3245 // For RTL apply XAdvanceR to base = at $basepos
3246 if (isset($this->OTLdata[$apos]['GPOSinfo']['XAdvanceL'])) {
3247 $this->OTLdata[$apos]['GPOSinfo']['XAdvanceL'] += $Value['XAdvance'] - $cw;
3248 } else {
3249 $this->OTLdata[$apos]['GPOSinfo']['XAdvanceL'] = $Value['XAdvance'] - $cw;
3251 if (isset($this->OTLdata[$basepos]['GPOSinfo']['XAdvanceR'])) {
3252 $this->OTLdata[$basepos]['GPOSinfo']['XAdvanceR'] += $Value['XAdvance'] - $cw;
3253 } else {
3254 $this->OTLdata[$basepos]['GPOSinfo']['XAdvanceR'] = $Value['XAdvance'] - $cw;
3258 // Any XPlacement (? and Y Placement) apply to base and marks (from basepos to apos)
3259 for ($a = $basepos; $a <= $apos; $a++) {
3260 if (isset($Value['XPlacement'])) {
3261 if (isset($this->OTLdata[$a]['GPOSinfo']['XPlacement'])) {
3262 $this->OTLdata[$a]['GPOSinfo']['XPlacement'] += $Value['XPlacement'];
3263 } else {
3264 $this->OTLdata[$a]['GPOSinfo']['XPlacement'] = $Value['XPlacement'];
3267 if (isset($Value['YPlacement'])) {
3268 if (isset($this->OTLdata[$a]['GPOSinfo']['YPlacement'])) {
3269 $this->OTLdata[$a]['GPOSinfo']['YPlacement'] += $Value['YPlacement'];
3270 } else {
3271 $this->OTLdata[$a]['GPOSinfo']['YPlacement'] = $Value['YPlacement'];
3277 // If XAdvance is aplied to $ptr - in order for PDF to position the Advance correctly need to place it on
3278 // the last of any Marks which immediately follow the current glyph
3279 function _getXAdvancePos($pos)
3281 // NB Not all fonts have all marks specified in GlyphClassMarks
3282 // If the current glyph is not a base (but a mark) then ignore this, and apply to the current position
3283 if (strpos($this->GlyphClassMarks, $this->OTLdata[$pos]['hex']) !== false) {
3284 return $pos;
3287 while (isset($this->OTLdata[$pos + 1]['hex']) && strpos($this->GlyphClassMarks, $this->OTLdata[$pos + 1]['hex']) !== false) {
3288 $pos++;
3290 return $pos;
3293 function _applyGPOSsubtable($lookupID, $subtable, $ptr, $currGlyph, $currGID, $subtable_offset, $Type, $Flag, $MarkFilteringSet, $LuCoverage, $tag, $level = 0, $is_old_spec)
3295 if (($Flag & 0x0001) == 1) {
3296 $dir = 'RTL';
3297 } // only used for Type 3
3298 else {
3299 $dir = 'LTR';
3301 $ignore = $this->_getGCOMignoreString($Flag, $MarkFilteringSet);
3303 // Lets start
3304 $this->seek($subtable_offset);
3305 $PosFormat = $this->read_ushort();
3307 ////////////////////////////////////////////////////////////////////////////////
3308 // LookupType 1: Single adjustment Adjust position of a single glyph (e.g. SmallCaps/Sups/Subs)
3309 ////////////////////////////////////////////////////////////////////////////////
3310 if ($Type == 1) {
3311 //===========
3312 // Format 1:
3313 //===========
3314 if ($PosFormat == 1) {
3315 $Coverage = $subtable_offset + $this->read_ushort();
3316 $ValueFormat = $this->read_ushort();
3317 $Value = $this->_getValueRecord($ValueFormat);
3319 //===========
3320 // Format 2:
3321 //===========
3322 else if ($PosFormat == 2) {
3323 $Coverage = $subtable_offset + $this->read_ushort();
3324 $ValueFormat = $this->read_ushort();
3325 $ValueCount = $this->read_ushort();
3326 $GlyphPos = $LuCoverage[$currGID];
3327 $this->skip($GlyphPos * 2 * $this->count_bits($ValueFormat));
3328 $Value = $this->_getValueRecord($ValueFormat);
3330 $this->_applyGPOSvaluerecord($ptr, $Value);
3331 if ($this->debugOTL) {
3332 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3334 return 1;
3337 ////////////////////////////////////////////////////////////////////////////////
3338 // LookupType 2: Pair adjustment Adjust position of a pair of glyphs (Kerning)
3339 ////////////////////////////////////////////////////////////////////////////////
3340 else if ($Type == 2) {
3341 $Coverage = $subtable_offset + $this->read_ushort();
3342 $ValueFormat1 = $this->read_ushort();
3343 $ValueFormat2 = $this->read_ushort();
3344 $sizeOfPair = ( 2 * $this->count_bits($ValueFormat1) ) + ( 2 * $this->count_bits($ValueFormat2) );
3345 //===========
3346 // Format 1:
3347 //===========
3348 if ($PosFormat == 1) {
3349 $PairSetCount = $this->read_ushort();
3350 $PairSetOffset = array();
3351 for ($p = 0; $p < $PairSetCount; $p++) {
3352 $PairSetOffset[] = $subtable_offset + $this->read_ushort();
3354 for ($p = 0; $p < $PairSetCount; $p++) {
3355 if (isset($LuCoverage[$currGID]) && $LuCoverage[$currGID] == $p) {
3356 $this->seek($PairSetOffset[$p]);
3357 //PairSet table
3358 $PairValueCount = $this->read_ushort();
3359 for ($pv = 0; $pv < $PairValueCount; $pv++) {
3360 //PairValueRecord
3361 $gid = $this->read_ushort();
3362 $SecondGlyph = $this->glyphToChar($gid);
3363 $FirstGlyph = $this->OTLdata[$ptr]['uni'];
3365 $checkpos = $ptr;
3366 $checkpos++;
3367 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
3368 $checkpos++;
3370 if (isset($this->OTLdata[$checkpos]) && $this->OTLdata[$checkpos]['uni'] == $SecondGlyph) {
3371 $matchedpos = $checkpos;
3372 } else {
3373 $matchedpos = false;
3376 if ($matchedpos !== false) {
3377 $Value1 = $this->_getValueRecord($ValueFormat1);
3378 $Value2 = $this->_getValueRecord($ValueFormat2);
3379 if ($ValueFormat1) {
3380 $this->_applyGPOSvaluerecord($ptr, $Value1);
3382 if ($ValueFormat2) {
3383 $this->_applyGPOSvaluerecord($matchedpos, $Value2);
3384 if ($this->debugOTL) {
3385 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3387 return $matchedpos - $ptr + 1;
3389 if ($this->debugOTL) {
3390 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3392 return $matchedpos - $ptr;
3393 } else {
3394 $this->skip($sizeOfPair);
3399 return 0;
3401 //===========
3402 // Format 2:
3403 //===========
3404 else if ($PosFormat == 2) {
3405 $ClassDef1 = $subtable_offset + $this->read_ushort();
3406 $ClassDef2 = $subtable_offset + $this->read_ushort();
3407 $Class1Count = $this->read_ushort();
3408 $Class2Count = $this->read_ushort();
3410 $sizeOfValueRecords = $Class1Count * $Class2Count * $sizeOfPair;
3412 //$this->skip($sizeOfValueRecords ); ???? NOT NEEDED
3413 // NB Class1Count includes Class 0 even though it is not defined by $ClassDef1
3414 // i.e. Class1Count = 5; Class1 will contain array(indices 1-4);
3415 $Class1 = $this->_getClassDefinitionTable($ClassDef1);
3416 $Class2 = $this->_getClassDefinitionTable($ClassDef2);
3417 $FirstGlyph = $this->OTLdata[$ptr]['uni'];
3418 $checkpos = $ptr;
3419 $checkpos++;
3420 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
3421 $checkpos++;
3423 if (isset($this->OTLdata[$checkpos])) {
3424 $matchedpos = $checkpos;
3425 } else {
3426 return 0;
3429 $SecondGlyph = $this->OTLdata[$matchedpos]['uni'];
3430 for ($i = 0; $i < $Class1Count; $i++) {
3431 if (isset($Class1[$i]) && count($Class1[$i])) {
3432 $FirstClassPos = array_search($FirstGlyph, $Class1[$i]);
3433 if ($FirstClassPos === false) {
3434 continue;
3435 } else {
3436 for ($j = 0; $j < $Class2Count; $j++) {
3437 if (isset($Class2[$j]) && count($Class2[$j])) {
3439 $SecondClassPos = array_search($SecondGlyph, $Class2[$j]);
3440 if ($SecondClassPos === false) {
3441 continue;
3444 // Get ValueRecord[$i][$j]
3445 $offs = ($i * $Class2Count * $sizeOfPair) + ($j * $sizeOfPair);
3446 $this->seek($subtable_offset + 16 + $offs);
3448 $Value1 = $this->_getValueRecord($ValueFormat1);
3449 $Value2 = $this->_getValueRecord($ValueFormat2);
3450 if ($ValueFormat1) {
3451 $this->_applyGPOSvaluerecord($ptr, $Value1);
3453 if ($ValueFormat2) {
3454 $this->_applyGPOSvaluerecord($matchedpos, $Value2);
3455 if ($this->debugOTL) {
3456 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3458 return $matchedpos - $ptr + 1;
3460 if ($this->debugOTL) {
3461 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3463 return $matchedpos - $ptr;
3469 return 0;
3473 ////////////////////////////////////////////////////////////////////////////////
3474 // LookupType 3: Cursive attachment Attach cursive glyphs
3475 ////////////////////////////////////////////////////////////////////////////////
3476 else if ($Type == 3) {
3477 $this->skip(4);
3478 // Need default XAdvance for glyph
3479 $pdfWidth = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], hexdec($currGlyph)); // DON'T convert back to design units
3481 $CPos = $LuCoverage[$currGID];
3482 $this->skip($CPos * 4);
3483 $EntryAnchor = $this->read_ushort();
3484 $ExitAnchor = $this->read_ushort();
3485 if ($EntryAnchor != 0) {
3486 $EntryAnchor += $subtable_offset;
3487 list($x, $y) = $this->_getAnchorTable($EntryAnchor);
3488 if ($dir == 'RTL') {
3489 if (round($pdfWidth) == round($x * 1000 / $this->mpdf->CurrentFont['unitsPerEm'])) {
3490 $x = 0;
3491 } else {
3492 $x = $x - ($pdfWidth * $this->mpdf->CurrentFont['unitsPerEm'] / 1000);
3496 $this->Entry[$ptr] = array('X' => $x, 'Y' => $y, 'dir' => $dir);
3498 if ($ExitAnchor != 0) {
3499 $ExitAnchor += $subtable_offset;
3500 list($x, $y) = $this->_getAnchorTable($ExitAnchor);
3501 if ($dir == 'LTR') {
3502 if (round($pdfWidth) == round($x * 1000 / $this->mpdf->CurrentFont['unitsPerEm'])) {
3503 $x = 0;
3504 } else {
3505 $x = $x - ($pdfWidth * $this->mpdf->CurrentFont['unitsPerEm'] / 1000);
3508 $this->Exit[$ptr] = array('X' => $x, 'Y' => $y, 'dir' => $dir);
3510 if ($this->debugOTL) {
3511 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3513 return 1;
3516 ////////////////////////////////////////////////////////////////////////////////
3517 // LookupType 4: MarkToBase attachment Attach a combining mark to a base glyph
3518 ////////////////////////////////////////////////////////////////////////////////
3519 else if ($Type == 4) {
3520 $MarkCoverage = $subtable_offset + $this->read_ushort();
3521 //$MarkCoverage is already set in $LuCoverage 00065|00073 etc
3522 $BaseCoverage = $subtable_offset + $this->read_ushort();
3523 $ClassCount = $this->read_ushort(); // Number of classes defined for marks = Number of mark glyphs in the MarkCoverage table
3524 $MarkArray = $subtable_offset + $this->read_ushort(); // Offset to MarkArray table
3525 $BaseArray = $subtable_offset + $this->read_ushort(); // Offset to BaseArray table
3527 $this->seek($BaseCoverage);
3528 $BaseGlyphs = implode('|', $this->_getCoverage());
3530 $checkpos = $ptr;
3531 $checkpos--;
3533 // ZZZ93
3534 // In Lohit-Kannada font (old-spec), rules specify a Type 4 GPOS to attach below-forms to base glyph
3535 // the repositioning does not happen in MS Word, and shouldn't happen comparing with other fonts
3536 // ?Why not
3537 // This Fix blocks the GPOS rule if the "mark" is not actually classified as a mark in the GlyphClasses of GDEF
3538 // but only in Indic old-spec.
3539 // Test cases: &#xca8;&#xccd;&#xca8;&#xcc1; and &#xc95;&#xccd;&#xcb0;&#xccc;
3540 if ($this->shaper == 'I' && $is_old_spec && strpos($this->GlyphClassMarks, $this->OTLdata[$ptr]['hex']) === false) {
3541 return;
3545 // "To identify the base glyph that combines with a mark, the text-processing client must look backward in the glyph string from the mark to the preceding base glyph."
3546 while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) {
3547 $checkpos--;
3550 if (isset($this->OTLdata[$checkpos]) && strpos($BaseGlyphs, $this->OTLdata[$checkpos]['hex']) !== false) {
3551 $matchedpos = $checkpos;
3552 } else {
3553 $matchedpos = false;
3556 if ($matchedpos !== false) {
3558 // Get the relevant MarkRecord
3559 $MarkPos = $LuCoverage[$currGID];
3560 $MarkRecord = $this->_getMarkRecord($MarkArray, $MarkPos); // e.g. Array ( [Class] => 0 [AnchorX] => -549 [AnchorY] => 1548 )
3561 //Mark Class is = $MarkRecord['Class']
3562 // Get the relevant BaseRecord
3563 $this->seek($BaseArray);
3564 $BaseCount = $this->read_ushort();
3565 $BasePos = strpos($BaseGlyphs, $this->OTLdata[$matchedpos]['hex']) / 6;
3567 // Move to the BaseRecord we want
3568 $nSkip = (2 * $BasePos * $ClassCount );
3569 $this->skip($nSkip);
3571 // Read BaseRecord we want for appropriate Class
3572 $nSkip = 2 * $MarkRecord['Class'];
3573 $this->skip($nSkip);
3574 $BaseRecordOffset = $BaseArray + $this->read_ushort();
3575 list($x, $y) = $this->_getAnchorTable($BaseRecordOffset);
3576 $BaseRecord = array('AnchorX' => $x, 'AnchorY' => $y); // e.g. Array ( [AnchorX] => 660 [AnchorY] => 1556 )
3577 // Need default XAdvance for Base glyph
3578 $BaseWidth = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$matchedpos]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000; // convert back to font design units
3579 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] = $BaseWidth;
3580 // And any intervening (ignored) characters
3581 if (($ptr - $matchedpos) > 1) {
3582 for ($i = $matchedpos + 1; $i < $ptr; $i++) {
3583 $BaseWidthExtra = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$i]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000; // convert back to font design units
3584 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] += $BaseWidthExtra;
3588 // Align to previous Glyph by attachment - so need to add to previous placement values
3589 $prevXPlacement = (isset($this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement']) ? $this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement'] : 0);
3590 $prevYPlacement = (isset($this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement']) ? $this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement'] : 0);
3592 $this->OTLdata[$ptr]['GPOSinfo']['XPlacement'] = $prevXPlacement + $BaseRecord['AnchorX'] - $MarkRecord['AnchorX'];
3593 $this->OTLdata[$ptr]['GPOSinfo']['YPlacement'] = $prevYPlacement + $BaseRecord['AnchorY'] - $MarkRecord['AnchorY'];
3594 if ($this->debugOTL) {
3595 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3597 return 1;
3599 return 0;
3602 ////////////////////////////////////////////////////////////////////////////////
3603 // LookupType 5: MarkToLigature attachment Attach a combining mark to a ligature
3604 ////////////////////////////////////////////////////////////////////////////////
3605 else if ($Type == 5) {
3606 $MarkCoverage = $subtable_offset + $this->read_ushort();
3607 //$MarkCoverage is already set in $LuCoverage 00065|00073 etc
3608 $LigatureCoverage = $subtable_offset + $this->read_ushort();
3609 $ClassCount = $this->read_ushort(); // Number of classes defined for marks = Number of mark glyphs in the MarkCoverage table
3610 $MarkArray = $subtable_offset + $this->read_ushort(); // Offset to MarkArray table
3611 $LigatureArray = $subtable_offset + $this->read_ushort(); // Offset to LigatureArray table
3613 $this->seek($LigatureCoverage);
3614 $LigatureGlyphs = implode('|', $this->_getCoverage());
3617 $checkpos = $ptr;
3618 $checkpos--;
3620 // "To position a combining mark using a MarkToLigature attachment subtable, the text-processing client must work backward from the mark to the preceding ligature glyph."
3621 while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) {
3622 $checkpos--;
3625 if (isset($this->OTLdata[$checkpos]) && strpos($LigatureGlyphs, $this->OTLdata[$checkpos]['hex']) !== false) {
3626 $matchedpos = $checkpos;
3627 } else {
3628 $matchedpos = false;
3631 if ($matchedpos !== false) {
3633 // Get the relevant MarkRecord
3634 $MarkPos = $LuCoverage[$currGID];
3635 $MarkRecord = $this->_getMarkRecord($MarkArray, $MarkPos); // e.g. Array ( [Class] => 0 [AnchorX] => -549 [AnchorY] => 1548 )
3636 //Mark Class is = $MarkRecord['Class']
3637 // Get the relevant LigatureRecord
3638 $this->seek($LigatureArray);
3639 $LigatureCount = $this->read_ushort();
3640 $LigaturePos = strpos($LigatureGlyphs, $this->OTLdata[$matchedpos]['hex']) / 6;
3642 // Move to the LigatureAttach table Record we want
3643 $nSkip = (2 * $LigaturePos);
3644 $this->skip($nSkip);
3645 $LigatureAttachOffset = $LigatureArray + $this->read_ushort();
3646 $this->seek($LigatureAttachOffset);
3647 $ComponentCount = $this->read_ushort();
3648 $offsets = array();
3649 for ($comp = 0; $comp < $ComponentCount; $comp++) {
3650 // ComponentRecords
3651 for ($class = 0; $class < $ClassCount; $class++) {
3652 $offsets[$comp][$class] = $this->read_ushort();
3656 // Get the specific component for this mark attachment
3657 if (isset($this->assocLigs[$matchedpos]) && isset($this->assocMarks[$ptr]['ligPos']) && $this->assocMarks[$ptr]['ligPos'] == $matchedpos) {
3658 $component = $this->assocMarks[$ptr]['compID'];
3659 } else {
3660 $component = $ComponentCount - 1;
3663 $offset = $offsets[$component][$MarkRecord['Class']];
3664 if ($offset != 0) {
3665 $LigatureRecordOffset = $offset + $LigatureAttachOffset;
3666 list($x, $y) = $this->_getAnchorTable($LigatureRecordOffset);
3667 $LigatureRecord = array('AnchorX' => $x, 'AnchorY' => $y);
3669 // Need default XAdvance for Ligature glyph
3670 $LigatureWidth = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$matchedpos]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000; // convert back to font design units
3671 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] = $LigatureWidth;
3672 // And any intervening (ignored)characters
3673 if (($ptr - $matchedpos) > 1) {
3674 for ($i = $matchedpos + 1; $i < $ptr; $i++) {
3675 $LigatureWidthExtra = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$i]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000; // convert back to font design units
3676 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] += $LigatureWidthExtra;
3680 // Align to previous Ligature by attachment - so need to add to previous placement values
3681 if (isset($this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement']))
3682 $prevXPlacement = $this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement'];
3683 else {
3684 $prevXPlacement = 0;
3686 if (isset($this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement'])) {
3687 $prevYPlacement = $this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement'];
3688 } else {
3689 $prevYPlacement = 0;
3692 $this->OTLdata[$ptr]['GPOSinfo']['XPlacement'] = $prevXPlacement + $LigatureRecord['AnchorX'] - $MarkRecord['AnchorX'];
3693 $this->OTLdata[$ptr]['GPOSinfo']['YPlacement'] = $prevYPlacement + $LigatureRecord['AnchorY'] - $MarkRecord['AnchorY'];
3694 if ($this->debugOTL) {
3695 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3697 return 1;
3700 return 0;
3703 ////////////////////////////////////////////////////////////////////////////////
3704 // LookupType 6: MarkToMark attachment Attach a combining mark to another mark
3705 ////////////////////////////////////////////////////////////////////////////////
3706 else if ($Type == 6) {
3707 $Mark1Coverage = $subtable_offset + $this->read_ushort(); // Combining Mark
3708 //$Mark1Coverage is already set in $LuCoverage 0065|0073 etc
3709 $Mark2Coverage = $subtable_offset + $this->read_ushort(); // Base Mark
3710 $ClassCount = $this->read_ushort(); // Number of classes defined for marks = No. of Combining mark1 glyphs in the MarkCoverage table
3711 $Mark1Array = $subtable_offset + $this->read_ushort(); // Offset to MarkArray table
3712 $Mark2Array = $subtable_offset + $this->read_ushort(); // Offset to Mark2Array table
3713 $this->seek($Mark2Coverage);
3714 $Mark2Glyphs = implode('|', $this->_getCoverage());
3715 $checkpos = $ptr;
3716 $checkpos--;
3717 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
3718 $checkpos--;
3720 if (isset($this->OTLdata[$checkpos]) && strpos($Mark2Glyphs, $this->OTLdata[$checkpos]['hex']) !== false) {
3721 $matchedpos = $checkpos;
3722 } else {
3723 $matchedpos = false;
3726 if ($matchedpos !== false) {
3728 // Get the relevant MarkRecord
3729 $Mark1Pos = $LuCoverage[$currGID];
3730 $Mark1Record = $this->_getMarkRecord($Mark1Array, $Mark1Pos); // e.g. Array ( [Class] => 0 [AnchorX] => -549 [AnchorY] => 1548 )
3731 //Mark Class is = $Mark1Record['Class']
3732 // Get the relevant Mark2Record
3733 $this->seek($Mark2Array);
3734 $Mark2Count = $this->read_ushort();
3735 $Mark2Pos = strpos($Mark2Glyphs, $this->OTLdata[$matchedpos]['hex']) / 6;
3737 // Move to the Mark2Record we want
3738 $nSkip = (2 * $Mark2Pos * $ClassCount );
3739 $this->skip($nSkip);
3741 // Read Mark2Record we want for appropriate Class
3742 $nSkip = 2 * $Mark1Record['Class'];
3743 $this->skip($nSkip);
3744 $Mark2RecordOffset = $Mark2Array + $this->read_ushort();
3745 list($x, $y) = $this->_getAnchorTable($Mark2RecordOffset);
3746 $Mark2Record = array('AnchorX' => $x, 'AnchorY' => $y); // e.g. Array ( [AnchorX] => 660 [AnchorY] => 1556 )
3747 // Need default XAdvance for Mark2 glyph
3748 $Mark2Width = $this->mpdf->_getCharWidth($this->mpdf->CurrentFont['cw'], $this->OTLdata[$matchedpos]['uni']) * $this->mpdf->CurrentFont['unitsPerEm'] / 1000; // convert back to font design units
3749 // IF combining marks are set on different components of a ligature glyph, do not apply this rule
3750 // Test: arabictypesetting: &#x625;&#x650;&#x644;&#x64e;&#x649;&#x670;&#x653;
3751 // Test: arabictypesetting: &#x628;&#x651;&#x64e;&#x64a;&#x652;&#x646;&#x64e;&#x643;&#x64f;&#x645;&#x652;
3752 $prevLig = -1;
3753 $thisLig = -1;
3754 $prevComp = -1;
3755 $thisComp = -1;
3756 if (isset($this->assocMarks[$matchedpos])) {
3757 $prevLig = $this->assocMarks[$matchedpos]['ligPos'];
3758 $prevComp = $this->assocMarks[$matchedpos]['compID'];
3760 if (isset($this->assocMarks[$ptr])) {
3761 $thisLig = $this->assocMarks[$ptr]['ligPos'];
3762 $thisComp = $this->assocMarks[$ptr]['compID'];
3765 // However IF Mark2 (first in logical order, i.e. being attached to) is not associated with a base, carry on
3766 // This happens in Indic when the Mark being attached to e.g. [Halant Ma lig] -> MatraU, [U+0B4D + U+B2E as E0F5]-> U+0B41 become E135
3767 if (!defined("OMIT_OTL_FIX_1") || OMIT_OTL_FIX_1 != 1) {
3768 /* OTL_FIX_1 */
3769 if (isset($this->assocMarks[$matchedpos]) && ($prevLig != $thisLig || $prevComp != $thisComp )) {
3770 return 0;
3772 } else {
3773 /* Original code */
3774 if ($prevLig != $thisLig || $prevComp != $thisComp) {
3775 return 0;
3780 if (!defined("OMIT_OTL_FIX_2") || OMIT_OTL_FIX_2 != 1) {
3781 /* OTL_FIX_2 */
3782 if (!isset($this->OTLdata[$matchedpos]['GPOSinfo']['BaseWidth']) || !$this->OTLdata[$matchedpos]['GPOSinfo']['BaseWidth']) {
3783 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] = $Mark2Width;
3787 // ZZZ99Q - Test Case font-family: garuda &#xe19;&#xe49;&#xe33;
3788 if (isset($this->OTLdata[$matchedpos]['GPOSinfo']['BaseWidth']) && $this->OTLdata[$matchedpos]['GPOSinfo']['BaseWidth']) {
3789 $this->OTLdata[$ptr]['GPOSinfo']['BaseWidth'] = $this->OTLdata[$matchedpos]['GPOSinfo']['BaseWidth'];
3792 // Align to previous Mark by attachment - so need to add the previous placement values
3793 $prevXPlacement = (isset($this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement']) ? $this->OTLdata[$matchedpos]['GPOSinfo']['XPlacement'] : 0);
3794 $prevYPlacement = (isset($this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement']) ? $this->OTLdata[$matchedpos]['GPOSinfo']['YPlacement'] : 0);
3795 $this->OTLdata[$ptr]['GPOSinfo']['XPlacement'] = $prevXPlacement + $Mark2Record['AnchorX'] - $Mark1Record['AnchorX'];
3796 $this->OTLdata[$ptr]['GPOSinfo']['YPlacement'] = $prevYPlacement + $Mark2Record['AnchorY'] - $Mark1Record['AnchorY'];
3797 if ($this->debugOTL) {
3798 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3800 return 1;
3802 return 0;
3805 ////////////////////////////////////////////////////////////////////////////////
3806 // LookupType 7: Context positioning Position one or more glyphs in context
3807 ////////////////////////////////////////////////////////////////////////////////
3808 else if ($Type == 7) {
3809 //===========
3810 // Format 1:
3811 //===========
3812 if ($PosFormat == 1) {
3813 throw new MpdfException("GPOS Lookup Type " . $Type . " Format " . $PosFormat . " not TESTED YET.");
3815 //===========
3816 // Format 2:
3817 //===========
3818 else if ($PosFormat == 2) {
3819 $CoverageTableOffset = $subtable_offset + $this->read_ushort();
3820 $InputClassDefOffset = $subtable_offset + $this->read_ushort();
3821 $PosClassSetCnt = $this->read_ushort();
3822 $PosClassSetOffset = array();
3823 for ($b = 0; $b < $PosClassSetCnt; $b++) {
3824 $offset = $this->read_ushort();
3825 if ($offset == 0x0000) {
3826 $PosClassSetOffset[] = $offset;
3827 } else {
3828 $PosClassSetOffset[] = $subtable_offset + $offset;
3832 $InputClasses = $this->_getClasses($InputClassDefOffset);
3834 for ($s = 0; $s < $PosClassSetCnt; $s++) { // $ChainPosClassSet is ordered by input class-may be NULL
3835 // Select $PosClassSet if currGlyph is in First Input Class
3836 if ($PosClassSetOffset[$s] > 0 && isset($InputClasses[$s][$currGID])) {
3837 $this->seek($PosClassSetOffset[$s]);
3838 $PosClassRuleCnt = $this->read_ushort();
3839 $PosClassRule = array();
3840 for ($b = 0; $b < $PosClassRuleCnt; $b++) {
3841 $PosClassRule[$b] = $PosClassSetOffset[$s] + $this->read_ushort();
3844 for ($b = 0; $b < $PosClassRuleCnt; $b++) { // EACH RULE
3845 $this->seek($PosClassRule[$b]);
3846 $InputGlyphCount = $this->read_ushort();
3847 $PosCount = $this->read_ushort();
3849 $Input = array();
3850 for ($r = 1; $r < $InputGlyphCount; $r++) {
3851 $Input[$r] = $this->read_ushort();
3853 $inputClass = $s;
3855 $inputGlyphs = array();
3856 $inputGlyphs[0] = $InputClasses[$inputClass];
3858 if ($InputGlyphCount > 1) {
3859 // NB starts at 1
3860 for ($gcl = 1; $gcl < $InputGlyphCount; $gcl++) {
3861 $classindex = $Input[$gcl];
3862 if (isset($InputClasses[$classindex])) {
3863 $inputGlyphs[$gcl] = $InputClasses[$classindex];
3864 } else {
3865 $inputGlyphs[$gcl] = '';
3870 // Class 0 contains all the glyphs NOT in the other classes
3871 $class0excl = array();
3872 for ($gc = 1; $gc <= count($InputClasses); $gc++) {
3873 if (is_array($InputClasses[$gc]))
3874 $class0excl = $class0excl + $InputClasses[$gc];
3877 $backtrackGlyphs = array();
3878 $lookaheadGlyphs = array();
3880 $matched = $this->checkContextMatchMultipleUni($inputGlyphs, $backtrackGlyphs, $lookaheadGlyphs, $ignore, $ptr, $class0excl);
3881 if ($matched) {
3882 for ($p = 0; $p < $PosCount; $p++) { // EACH LOOKUP
3883 $SequenceIndex[$p] = $this->read_ushort();
3884 $LookupListIndex[$p] = $this->read_ushort();
3887 for ($p = 0; $p < $PosCount; $p++) {
3888 // Apply $LookupListIndex at $SequenceIndex
3889 if ($SequenceIndex[$p] >= $InputGlyphCount) {
3890 continue;
3892 $lu = $LookupListIndex[$p];
3893 $luType = $this->GPOSLookups[$lu]['Type'];
3894 $luFlag = $this->GPOSLookups[$lu]['Flag'];
3895 $luMarkFilteringSet = $this->GPOSLookups[$lu]['MarkFilteringSet'];
3897 $luptr = $matched[$SequenceIndex[$p]];
3898 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
3899 $lucurrGID = $this->OTLdata[$luptr]['uni'];
3901 foreach ($this->GPOSLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
3902 $shift = $this->_applyGPOSsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GPOS_offset + $this->GSUB_length), $luType, $luFlag, $luMarkFilteringSet, $this->LuCoverage[$lu][$luc], $tag, 1, $is_old_spec);
3903 if ($this->debugOTL && $shift) {
3904 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
3906 if ($shift) {
3907 break;
3912 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
3913 return $shift;
3914 } /* OTL_FIX_3 */
3915 else
3916 return $InputGlyphCount; // should be + matched ignores in Input Sequence
3922 return 0;
3924 //===========
3925 // Format 3:
3926 //===========
3927 else if ($PosFormat == 3) {
3928 throw new MpdfException("GPOS Lookup Type " . $Type . " Format " . $PosFormat . " not TESTED YET.");
3929 } else {
3930 throw new MpdfException("GPOS Lookup Type " . $Type . ", Format " . $PosFormat . " not supported.");
3934 ////////////////////////////////////////////////////////////////////////////////
3935 // LookupType 8: Chained Context positioning Position one or more glyphs in chained context
3936 ////////////////////////////////////////////////////////////////////////////////
3937 else if ($Type == 8) {
3938 //===========
3939 // Format 1:
3940 //===========
3941 if ($PosFormat == 1) {
3942 throw new MpdfException("GPOS Lookup Type " . $Type . " Format " . $PosFormat . " not TESTED YET.");
3943 return 0;
3945 //===========
3946 // Format 2:
3947 //===========
3948 else if ($PosFormat == 2) {
3950 $CoverageTableOffset = $subtable_offset + $this->read_ushort();
3951 $BacktrackClassDefOffset = $subtable_offset + $this->read_ushort();
3952 $InputClassDefOffset = $subtable_offset + $this->read_ushort();
3953 $LookaheadClassDefOffset = $subtable_offset + $this->read_ushort();
3954 $ChainPosClassSetCnt = $this->read_ushort();
3955 $ChainPosClassSetOffset = array();
3956 for ($b = 0; $b < $ChainPosClassSetCnt; $b++) {
3957 $offset = $this->read_ushort();
3958 if ($offset == 0x0000) {
3959 $ChainPosClassSetOffset[] = $offset;
3960 } else {
3961 $ChainPosClassSetOffset[] = $subtable_offset + $offset;
3965 $BacktrackClasses = $this->_getClasses($BacktrackClassDefOffset);
3966 $InputClasses = $this->_getClasses($InputClassDefOffset);
3967 $LookaheadClasses = $this->_getClasses($LookaheadClassDefOffset);
3969 for ($s = 0; $s < $ChainPosClassSetCnt; $s++) { // $ChainPosClassSet is ordered by input class-may be NULL
3970 // Select $ChainPosClassSet if currGlyph is in First Input Class
3971 if ($ChainPosClassSetOffset[$s] > 0 && isset($InputClasses[$s][$currGID])) {
3972 $this->seek($ChainPosClassSetOffset[$s]);
3973 $ChainPosClassRuleCnt = $this->read_ushort();
3974 $ChainPosClassRule = array();
3975 for ($b = 0; $b < $ChainPosClassRuleCnt; $b++) {
3976 $ChainPosClassRule[$b] = $ChainPosClassSetOffset[$s] + $this->read_ushort();
3979 for ($b = 0; $b < $ChainPosClassRuleCnt; $b++) { // EACH RULE
3980 $this->seek($ChainPosClassRule[$b]);
3981 $BacktrackGlyphCount = $this->read_ushort();
3982 $Backtrack = array();
3983 for ($r = 0; $r < $BacktrackGlyphCount; $r++) {
3984 $Backtrack[$r] = $this->read_ushort();
3986 $InputGlyphCount = $this->read_ushort();
3987 $Input = array();
3988 for ($r = 1; $r < $InputGlyphCount; $r++) {
3989 $Input[$r] = $this->read_ushort();
3991 $LookaheadGlyphCount = $this->read_ushort();
3992 $Lookahead = array();
3993 for ($r = 0; $r < $LookaheadGlyphCount; $r++) {
3994 $Lookahead[$r] = $this->read_ushort();
3997 $inputClass = $s; //???
3999 $inputGlyphs = array();
4000 $inputGlyphs[0] = $InputClasses[$inputClass];
4002 if ($InputGlyphCount > 1) {
4003 // NB starts at 1
4004 for ($gcl = 1; $gcl < $InputGlyphCount; $gcl++) {
4005 $classindex = $Input[$gcl];
4006 if (isset($InputClasses[$classindex])) {
4007 $inputGlyphs[$gcl] = $InputClasses[$classindex];
4008 } else {
4009 $inputGlyphs[$gcl] = '';
4014 // Class 0 contains all the glyphs NOT in the other classes
4015 $class0excl = array();
4016 for ($gc = 1; $gc <= count($InputClasses); $gc++) {
4017 if (isset($InputClasses[$gc]) && is_array($InputClasses[$gc]))
4018 $class0excl = $class0excl + $InputClasses[$gc];
4021 if ($BacktrackGlyphCount) {
4022 $backtrackGlyphs = array();
4023 for ($gcl = 0; $gcl < $BacktrackGlyphCount; $gcl++) {
4024 $classindex = $Backtrack[$gcl];
4025 if (isset($BacktrackClasses[$classindex])) {
4026 $backtrackGlyphs[$gcl] = $BacktrackClasses[$classindex];
4027 } else {
4028 $backtrackGlyphs[$gcl] = '';
4031 } else {
4032 $backtrackGlyphs = array();
4035 // Class 0 contains all the glyphs NOT in the other classes
4036 $bclass0excl = array();
4037 for ($gc = 1; $gc <= count($BacktrackClasses); $gc++) {
4038 if (isset($BacktrackClasses[$gc]) && is_array($BacktrackClasses[$gc]))
4039 $bclass0excl = $bclass0excl + $BacktrackClasses[$gc];
4042 if ($LookaheadGlyphCount) {
4043 $lookaheadGlyphs = array();
4044 for ($gcl = 0; $gcl < $LookaheadGlyphCount; $gcl++) {
4045 $classindex = $Lookahead[$gcl];
4046 if (isset($LookaheadClasses[$classindex])) {
4047 $lookaheadGlyphs[$gcl] = $LookaheadClasses[$classindex];
4048 } else {
4049 $lookaheadGlyphs[$gcl] = '';
4052 } else {
4053 $lookaheadGlyphs = array();
4056 // Class 0 contains all the glyphs NOT in the other classes
4057 $lclass0excl = array();
4058 for ($gc = 1; $gc <= count($LookaheadClasses); $gc++) {
4059 if (isset($LookaheadClasses[$gc]) && is_array($LookaheadClasses[$gc]))
4060 $lclass0excl = $lclass0excl + $LookaheadClasses[$gc];
4063 $matched = $this->checkContextMatchMultipleUni($inputGlyphs, $backtrackGlyphs, $lookaheadGlyphs, $ignore, $ptr, $class0excl, $bclass0excl, $lclass0excl);
4064 if ($matched) {
4065 $PosCount = $this->read_ushort();
4066 $SequenceIndex = array();
4067 $LookupListIndex = array();
4068 for ($p = 0; $p < $PosCount; $p++) { // EACH LOOKUP
4069 $SequenceIndex[$p] = $this->read_ushort();
4070 $LookupListIndex[$p] = $this->read_ushort();
4073 for ($p = 0; $p < $PosCount; $p++) {
4074 // Apply $LookupListIndex at $SequenceIndex
4075 if ($SequenceIndex[$p] >= $InputGlyphCount) {
4076 continue;
4078 $lu = $LookupListIndex[$p];
4079 $luType = $this->GPOSLookups[$lu]['Type'];
4080 $luFlag = $this->GPOSLookups[$lu]['Flag'];
4081 $luMarkFilteringSet = $this->GPOSLookups[$lu]['MarkFilteringSet'];
4083 $luptr = $matched[$SequenceIndex[$p]];
4084 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
4085 $lucurrGID = $this->OTLdata[$luptr]['uni'];
4087 foreach ($this->GPOSLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
4088 $shift = $this->_applyGPOSsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GPOS_offset + $this->GSUB_length), $luType, $luFlag, $luMarkFilteringSet, $this->LuCoverage[$lu][$luc], $tag, 1, $is_old_spec);
4089 if ($this->debugOTL && $shift) {
4090 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
4092 if ($shift) {
4093 break;
4098 if (!defined("OMIT_OTL_FIX_3") || OMIT_OTL_FIX_3 != 1) {
4099 return $shift;
4100 } /* OTL_FIX_3 */
4101 else
4102 return $InputGlyphCount; // should be + matched ignores in Input Sequence
4108 return 0;
4110 //===========
4111 // Format 3:
4112 //===========
4113 else if ($PosFormat == 3) {
4114 $BacktrackGlyphCount = $this->read_ushort();
4115 for ($b = 0; $b < $BacktrackGlyphCount; $b++) {
4116 $CoverageBacktrackOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
4118 $InputGlyphCount = $this->read_ushort();
4119 for ($b = 0; $b < $InputGlyphCount; $b++) {
4120 $CoverageInputOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
4122 $LookaheadGlyphCount = $this->read_ushort();
4123 for ($b = 0; $b < $LookaheadGlyphCount; $b++) {
4124 $CoverageLookaheadOffset[] = $subtable_offset + $this->read_ushort(); // in glyph sequence order
4126 $PosCount = $this->read_ushort();
4127 $save_pos = $this->_pos; // Save the point just after PosCount
4129 $CoverageBacktrackGlyphs = array();
4130 for ($b = 0; $b < $BacktrackGlyphCount; $b++) {
4131 $this->seek($CoverageBacktrackOffset[$b]);
4132 $glyphs = $this->_getCoverage();
4133 $CoverageBacktrackGlyphs[$b] = implode("|", $glyphs);
4135 $CoverageInputGlyphs = array();
4136 for ($b = 0; $b < $InputGlyphCount; $b++) {
4137 $this->seek($CoverageInputOffset[$b]);
4138 $glyphs = $this->_getCoverage();
4139 $CoverageInputGlyphs[$b] = implode("|", $glyphs);
4141 $CoverageLookaheadGlyphs = array();
4142 for ($b = 0; $b < $LookaheadGlyphCount; $b++) {
4143 $this->seek($CoverageLookaheadOffset[$b]);
4144 $glyphs = $this->_getCoverage();
4145 $CoverageLookaheadGlyphs[$b] = implode("|", $glyphs);
4147 $matched = $this->checkContextMatchMultiple($CoverageInputGlyphs, $CoverageBacktrackGlyphs, $CoverageLookaheadGlyphs, $ignore, $ptr);
4148 if ($matched) {
4150 $this->seek($save_pos); // Return to just after PosCount
4151 for ($p = 0; $p < $PosCount; $p++) {
4152 // PosLookupRecord
4153 $PosLookupRecord[$p]['SequenceIndex'] = $this->read_ushort();
4154 $PosLookupRecord[$p]['LookupListIndex'] = $this->read_ushort();
4156 for ($p = 0; $p < $PosCount; $p++) {
4157 // Apply $PosLookupRecord[$p]['LookupListIndex'] at $PosLookupRecord[$p]['SequenceIndex']
4158 if ($PosLookupRecord[$p]['SequenceIndex'] >= $InputGlyphCount) {
4159 continue;
4161 $lu = $PosLookupRecord[$p]['LookupListIndex'];
4162 $luType = $this->GPOSLookups[$lu]['Type'];
4163 $luFlag = $this->GPOSLookups[$lu]['Flag'];
4164 if (isset($this->GPOSLookups[$lu]['MarkFilteringSet'])) {
4165 $luMarkFilteringSet = $this->GPOSLookups[$lu]['MarkFilteringSet'];
4166 } else {
4167 $luMarkFilteringSet = '';
4170 $luptr = $matched[$PosLookupRecord[$p]['SequenceIndex']];
4171 $lucurrGlyph = $this->OTLdata[$luptr]['hex'];
4172 $lucurrGID = $this->OTLdata[$luptr]['uni'];
4174 foreach ($this->GPOSLookups[$lu]['Subtables'] AS $luc => $lusubtable_offset) {
4175 $shift = $this->_applyGPOSsubtable($lu, $luc, $luptr, $lucurrGlyph, $lucurrGID, ($lusubtable_offset - $this->GPOS_offset + $this->GSUB_length), $luType, $luFlag, $luMarkFilteringSet, $this->LuCoverage[$lu][$luc], $tag, 1, $is_old_spec);
4176 if ($this->debugOTL && $shift) {
4177 $this->_dumpproc('GPOS', $lookupID, $subtable, $Type, $PosFormat, $ptr, $currGlyph, $level);
4179 if ($shift) {
4180 break;
4185 } else {
4186 throw new MpdfException("GPOS Lookup Type " . $Type . ", Format " . $PosFormat . " not supported.");
4188 } else {
4189 throw new MpdfException("GPOS Lookup Type " . $Type . " not supported.");
4193 //////////////////////////////////////////////////////////////////////////////////
4194 //////////////////////////////////////////////////////////////////////////////////
4195 // GPOS / GSUB / GCOM (common) functions
4196 //////////////////////////////////////////////////////////////////////////////////
4197 //////////////////////////////////////////////////////////////////////////////////
4199 function checkContextMatch($Input, $Backtrack, $Lookahead, $ignore, $ptr)
4201 // Input etc are single numbers - GSUB Format 6.1
4202 // Input starts with (1=>xxx)
4203 // return false if no match, else an array of ptr for matches (0=>0, 1=>3,...)
4205 $current_syllable = (isset($this->OTLdata[$ptr]['syllable']) ? $this->OTLdata[$ptr]['syllable'] : 0);
4207 // BACKTRACK
4208 $checkpos = $ptr;
4209 for ($i = 0; $i < count($Backtrack); $i++) {
4210 $checkpos--;
4211 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4212 $checkpos--;
4214 // If outside scope of current syllable - return no match
4215 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4216 return false;
4217 } else if (!isset($this->OTLdata[$checkpos]) || $this->OTLdata[$checkpos]['uni'] != $Backtrack[$i]) {
4218 return false;
4222 // INPUT
4223 $matched = array(0 => $ptr);
4224 $checkpos = $ptr;
4225 for ($i = 1; $i < count($Input); $i++) {
4226 $checkpos++;
4227 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4228 $checkpos++;
4230 // If outside scope of current syllable - return no match
4231 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4232 return false;
4233 } else if (isset($this->OTLdata[$checkpos]) && $this->OTLdata[$checkpos]['uni'] == $Input[$i]) {
4234 $matched[] = $checkpos;
4235 } else {
4236 return false;
4240 // LOOKAHEAD
4241 for ($i = 0; $i < count($Lookahead); $i++) {
4242 $checkpos++;
4243 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4244 $checkpos++;
4246 // If outside scope of current syllable - return no match
4247 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4248 return false;
4249 } else if (!isset($this->OTLdata[$checkpos]) || $this->OTLdata[$checkpos]['uni'] != $Lookahead[$i]) {
4250 return false;
4254 return $matched;
4257 function checkContextMatchMultiple($Input, $Backtrack, $Lookahead, $ignore, $ptr, $class0excl = '', $bclass0excl = '', $lclass0excl = '')
4259 // Input etc are string/array of glyph strings - GSUB Format 5.2, 5.3, 6.2, 6.3, GPOS Format 7.2, 7.3, 8.2, 8.3
4260 // Input starts with (1=>xxx)
4261 // return false if no match, else an array of ptr for matches (0=>0, 1=>3,...)
4262 // $class0excl is the string of glyphs in all classes except Class 0 (GSUB 5.2, 6.2, GPOS 7.2, 8.2)
4263 // $bclass0excl & $lclass0excl are the same for lookahead and backtrack (GSUB 6.2, GPOS 8.2)
4265 $current_syllable = (isset($this->OTLdata[$ptr]['syllable']) ? $this->OTLdata[$ptr]['syllable'] : 0);
4267 // BACKTRACK
4268 $checkpos = $ptr;
4269 for ($i = 0; $i < count($Backtrack); $i++) {
4270 $checkpos--;
4271 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4272 $checkpos--;
4274 // If outside scope of current syllable - return no match
4275 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4276 return false;
4278 // If Class 0 specified, matches anything NOT in $bclass0excl
4279 else if (!$Backtrack[$i] && isset($this->OTLdata[$checkpos]) && strpos($bclass0excl, $this->OTLdata[$checkpos]['hex']) !== false) {
4280 return false;
4281 } else if (!isset($this->OTLdata[$checkpos]) || strpos($Backtrack[$i], $this->OTLdata[$checkpos]['hex']) === false) {
4282 return false;
4286 // INPUT
4287 $matched = array(0 => $ptr);
4288 $checkpos = $ptr;
4289 for ($i = 1; $i < count($Input); $i++) { // Start at 1 - already matched the first InputGlyph
4290 $checkpos++;
4291 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4292 $checkpos++;
4294 // If outside scope of current syllable - return no match
4295 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4296 return false;
4298 // If Input Class 0 specified, matches anything NOT in $class0excl
4299 else if (!$Input[$i] && isset($this->OTLdata[$checkpos]) && strpos($class0excl, $this->OTLdata[$checkpos]['hex']) === false) {
4300 $matched[] = $checkpos;
4301 } else if (isset($this->OTLdata[$checkpos]) && strpos($Input[$i], $this->OTLdata[$checkpos]['hex']) !== false) {
4302 $matched[] = $checkpos;
4303 } else {
4304 return false;
4308 // LOOKAHEAD
4309 for ($i = 0; $i < count($Lookahead); $i++) {
4310 $checkpos++;
4311 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4312 $checkpos++;
4314 // If outside scope of current syllable - return no match
4315 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4316 return false;
4318 // If Class 0 specified, matches anything NOT in $lclass0excl
4319 else if (!$Lookahead[$i] && isset($this->OTLdata[$checkpos]) && strpos($lclass0excl, $this->OTLdata[$checkpos]['hex']) !== false) {
4320 return false;
4321 } else if (!isset($this->OTLdata[$checkpos]) || strpos($Lookahead[$i], $this->OTLdata[$checkpos]['hex']) === false) {
4322 return false;
4325 return $matched;
4328 function checkContextMatchMultipleUni($Input, $Backtrack, $Lookahead, $ignore, $ptr, $class0excl = array(), $bclass0excl = array(), $lclass0excl = array())
4330 // Input etc are array of glyphs - GSUB Format 5.2, 5.3, 6.2, 6.3, GPOS Format 7.2, 7.3, 8.2, 8.3
4331 // Input starts with (1=>xxx)
4332 // return false if no match, else an array of ptr for matches (0=>0, 1=>3,...)
4333 // $class0excl is array of glyphs in all classes except Class 0 (GSUB 5.2, 6.2, GPOS 7.2, 8.2)
4334 // $bclass0excl & $lclass0excl are the same for lookahead and backtrack (GSUB 6.2, GPOS 8.2)
4336 $current_syllable = (isset($this->OTLdata[$ptr]['syllable']) ? $this->OTLdata[$ptr]['syllable'] : 0);
4338 // BACKTRACK
4339 $checkpos = $ptr;
4340 for ($i = 0; $i < count($Backtrack); $i++) {
4341 $checkpos--;
4342 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4343 $checkpos--;
4345 // If outside scope of current syllable - return no match
4346 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4347 return false;
4349 // If Class 0 specified, matches anything NOT in $bclass0excl
4350 else if (!$Backtrack[$i] && isset($this->OTLdata[$checkpos]) && isset($bclass0excl[$this->OTLdata[$checkpos]['uni']])) {
4351 return false;
4352 } else if (!isset($this->OTLdata[$checkpos]) || !isset($Backtrack[$i][$this->OTLdata[$checkpos]['uni']])) {
4353 return false;
4357 // INPUT
4358 $matched = array(0 => $ptr);
4359 $checkpos = $ptr;
4360 for ($i = 1; $i < count($Input); $i++) { // Start at 1 - already matched the first InputGlyph
4361 $checkpos++;
4362 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4363 $checkpos++;
4365 // If outside scope of current syllable - return no match
4366 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4367 return false;
4369 // If Input Class 0 specified, matches anything NOT in $class0excl
4370 else if (!$Input[$i] && isset($this->OTLdata[$checkpos]) && !isset($class0excl[$this->OTLdata[$checkpos]['uni']])) {
4371 $matched[] = $checkpos;
4372 } else if (isset($this->OTLdata[$checkpos]) && isset($Input[$i][$this->OTLdata[$checkpos]['uni']])) {
4373 $matched[] = $checkpos;
4374 } else {
4375 return false;
4379 // LOOKAHEAD
4380 for ($i = 0; $i < count($Lookahead); $i++) {
4381 $checkpos++;
4382 while (isset($this->OTLdata[$checkpos]) && strpos($ignore, $this->OTLdata[$checkpos]['hex']) !== false) {
4383 $checkpos++;
4385 // If outside scope of current syllable - return no match
4386 if ($this->restrictToSyllable && isset($this->OTLdata[$checkpos]['syllable']) && $this->OTLdata[$checkpos]['syllable'] != $current_syllable) {
4387 return false;
4389 // If Class 0 specified, matches anything NOT in $lclass0excl
4390 else if (!$Lookahead[$i] && isset($this->OTLdata[$checkpos]) && isset($lclass0excl[$this->OTLdata[$checkpos]['uni']])) {
4391 return false;
4392 } else if (!isset($this->OTLdata[$checkpos]) || !isset($Lookahead[$i][$this->OTLdata[$checkpos]['uni']])) {
4393 return false;
4396 return $matched;
4399 function _getClassDefinitionTable($offset)
4401 if (isset($this->LuDataCache[$this->fontkey][$offset])) {
4402 $GlyphByClass = $this->LuDataCache[$this->fontkey][$offset];
4403 } else {
4404 $this->seek($offset);
4405 $ClassFormat = $this->read_ushort();
4406 $GlyphClass = array();
4407 // $GlyphByClass = array(0=>array()); // NB This forces an index[0]
4408 if ($ClassFormat == 1) {
4409 $StartGlyph = $this->read_ushort();
4410 $GlyphCount = $this->read_ushort();
4411 for ($i = 0; $i < $GlyphCount; $i++) {
4412 $GlyphClass[$i]['startGlyphID'] = $StartGlyph + $i;
4413 $GlyphClass[$i]['endGlyphID'] = $StartGlyph + $i;
4414 $GlyphClass[$i]['class'] = $this->read_ushort();
4415 for ($g = $GlyphClass[$i]['startGlyphID']; $g <= $GlyphClass[$i]['endGlyphID']; $g++) {
4416 $GlyphByClass[$GlyphClass[$i]['class']][] = $this->glyphToChar($g);
4419 } else if ($ClassFormat == 2) {
4420 $tableCount = $this->read_ushort();
4421 for ($i = 0; $i < $tableCount; $i++) {
4422 $GlyphClass[$i]['startGlyphID'] = $this->read_ushort();
4423 $GlyphClass[$i]['endGlyphID'] = $this->read_ushort();
4424 $GlyphClass[$i]['class'] = $this->read_ushort();
4425 for ($g = $GlyphClass[$i]['startGlyphID']; $g <= $GlyphClass[$i]['endGlyphID']; $g++) {
4426 $GlyphByClass[$GlyphClass[$i]['class']][] = $this->glyphToChar($g);
4430 ksort($GlyphByClass);
4431 $this->LuDataCache[$this->fontkey][$offset] = $GlyphByClass;
4433 return $GlyphByClass;
4436 function count_bits($n)
4438 for ($c = 0; $n; $c++) {
4439 $n &= $n - 1; // clear the least significant bit set
4441 return $c;
4444 function _getValueRecord($ValueFormat)
4445 { // Common ValueRecord for GPOS
4446 // Only returns 3 possible: $vra['XPlacement'] $vra['YPlacement'] $vra['XAdvance']
4447 $vra = array();
4448 // Horizontal adjustment for placement - in design units
4449 if (($ValueFormat & 0x0001) == 0x0001) {
4450 $vra['XPlacement'] = $this->read_short();
4452 // Vertical adjustment for placement - in design units
4453 if (($ValueFormat & 0x0002) == 0x0002) {
4454 $vra['YPlacement'] = $this->read_short();
4456 // Horizontal adjustment for advance - in design units (only used for horizontal writing)
4457 if (($ValueFormat & 0x0004) == 0x0004) {
4458 $vra['XAdvance'] = $this->read_short();
4460 // Vertical adjustment for advance - in design units (only used for vertical writing)
4461 if (($ValueFormat & 0x0008) == 0x0008) {
4462 $this->read_short();
4464 // Offset to Device table for horizontal placement-measured from beginning of PosTable (may be NULL)
4465 if (($ValueFormat & 0x0010) == 0x0010) {
4466 $this->read_ushort();
4468 // Offset to Device table for vertical placement-measured from beginning of PosTable (may be NULL)
4469 if (($ValueFormat & 0x0020) == 0x0020) {
4470 $this->read_ushort();
4472 // Offset to Device table for horizontal advance-measured from beginning of PosTable (may be NULL)
4473 if (($ValueFormat & 0x0040) == 0x0040) {
4474 $this->read_ushort();
4476 // Offset to Device table for vertical advance-measured from beginning of PosTable (may be NULL)
4477 if (($ValueFormat & 0x0080) == 0x0080) {
4478 $this->read_ushort();
4480 return $vra;
4483 function _getAnchorTable($offset = 0)
4485 if ($offset) {
4486 $this->seek($offset);
4488 $AnchorFormat = $this->read_ushort();
4489 $XCoordinate = $this->read_short();
4490 $YCoordinate = $this->read_short();
4491 // Format 2 specifies additional link to contour point; Format 3 additional Device table
4492 return array($XCoordinate, $YCoordinate);
4495 function _getMarkRecord($offset, $MarkPos)
4497 $this->seek($offset);
4498 $MarkCount = $this->read_ushort();
4499 $this->skip($MarkPos * 4);
4500 $Class = $this->read_ushort();
4501 $MarkAnchor = $offset + $this->read_ushort(); // = Offset to anchor table
4502 list($x, $y) = $this->_getAnchorTable($MarkAnchor);
4503 $MarkRecord = array('Class' => $Class, 'AnchorX' => $x, 'AnchorY' => $y);
4504 return $MarkRecord;
4507 function _getGCOMignoreString($flag, $MarkFilteringSet)
4509 // If ignoreFlag set, combine all ignore glyphs into -> "(?:( 0FBA1| 0FBA2| 0FBA3)*)"
4510 // else "()"
4511 // for Input - set on secondary Lookup table if in Context, and set Backtrack and Lookahead on Context Lookup
4512 $str = "";
4513 $ignoreflag = 0;
4515 // Flag & 0xFF?? = MarkAttachmentType
4516 if ($flag & 0xFF00) {
4517 // "a lookup must ignore any mark glyphs that are not in the specified mark attachment class"
4518 // $this->MarkAttachmentType is already adjusted for this i.e. contains all Marks except those in the MarkAttachmentClassDef table
4519 $MarkAttachmentType = $flag >> 8;
4520 $ignoreflag = $flag;
4521 $str = $this->MarkAttachmentType[$MarkAttachmentType];
4524 // Flag & 0x0010 = UseMarkFilteringSet
4525 if ($flag & 0x0010) {
4526 throw new MpdfException("This font [" . $this->fontkey . "] contains MarkGlyphSets - Not tested yet");
4527 // Change also in ttfontsuni.php
4528 if ($MarkFilteringSet == '')
4529 throw new MpdfException("This font [" . $this->fontkey . "] contains MarkGlyphSets - but MarkFilteringSet not set");
4530 $str = $this->MarkGlyphSets[$MarkFilteringSet];
4533 // If Ignore Marks set, supercedes any above
4534 // Flag & 0x0008 = Ignore Marks - (unless already done with MarkAttachmentType)
4535 if (($flag & 0x0008) == 0x0008 && ($flag & 0xFF00) == 0) {
4536 $ignoreflag = 8;
4537 $str = $this->GlyphClassMarks;
4540 // Flag & 0x0004 = Ignore Ligatures
4541 if (($flag & 0x0004) == 0x0004) {
4542 $ignoreflag += 4;
4543 if ($str) {
4544 $str .= "|";
4546 $str .= $this->GlyphClassLigatures;
4548 // Flag & 0x0002 = Ignore BaseGlyphs
4549 if (($flag & 0x0002) == 0x0002) {
4550 $ignoreflag += 2;
4551 if ($str) {
4552 $str .= "|";
4554 $str .= $this->GlyphClassBases;
4556 if ($str) {
4557 return "((?:(?:" . $str . "))*)";
4558 } else
4559 return "()";
4562 function _checkGCOMignore($flag, $glyph, $MarkFilteringSet)
4564 $ignore = false;
4565 // Flag & 0x0008 = Ignore Marks - (unless already done with MarkAttachmentType)
4566 if (($flag & 0x0008 && ($flag & 0xFF00) == 0) && strpos($this->GlyphClassMarks, $glyph)) {
4567 $ignore = true;
4569 if (($flag & 0x0004) && strpos($this->GlyphClassLigatures, $glyph)) {
4570 $ignore = true;
4572 if (($flag & 0x0002) && strpos($this->GlyphClassBases, $glyph)) {
4573 $ignore = true;
4575 // Flag & 0xFF?? = MarkAttachmentType
4576 if ($flag & 0xFF00) {
4577 // "a lookup must ignore any mark glyphs that are not in the specified mark attachment class"
4578 // $this->MarkAttachmentType is already adjusted for this i.e. contains all Marks except those in the MarkAttachmentClassDef table
4579 if (strpos($this->MarkAttachmentType[($flag >> 8)], $glyph)) {
4580 $ignore = true;
4583 // Flag & 0x0010 = UseMarkFilteringSet
4584 if (($flag & 0x0010) && strpos($this->MarkGlyphSets[$MarkFilteringSet], $glyph)) {
4585 $ignore = true;
4587 return $ignore;
4590 ////////////////////////////////////////////////////////////////
4591 ////////////////////////////////////////////////////////////////
4592 ////////// BIDI ALGORITHM ////////////////////////
4593 ////////////////////////////////////////////////////////////////
4594 ////////////////////////////////////////////////////////////////
4595 ////////////////////////////////////////////////////////////////
4596 ////////////////////////////////////////////////////////////////
4597 // These functions are called from mpdf after GSUB/GPOS has taken place
4598 // At this stage the bidi-type is in string form
4599 ////////////////////////////////////////////////////////////////
4600 ////////////////////////////////////////////////////////////////
4602 Bidirectional Character Types
4603 =============================
4604 Type Description General Scope
4605 Strong
4606 L Left-to-Right LRM, most alphabetic, syllabic, Han ideographs, non-European or non-Arabic digits, ...
4607 LRE Left-to-Right Embedding LRE
4608 LRO Left-to-Right Override LRO
4609 R Right-to-Left RLM, Hebrew alphabet, and related punctuation
4610 AL Right-to-Left Arabic Arabic, Thaana, and Syriac alphabets, most punctuation specific to those scripts, ...
4611 RLE Right-to-Left Embedding RLE
4612 RLO Right-to-Left Override RLO
4613 Weak
4614 PDF Pop Directional Format PDF
4615 EN European Number European digits, Eastern Arabic-Indic digits, ...
4616 ES European Number Separator Plus sign, minus sign
4617 ET European Number Terminator Degree sign, currency symbols, ...
4618 AN Arabic Number Arabic-Indic digits, Arabic decimal and thousands separators, ...
4619 CS Common Number Separator Colon, comma, full stop (period), No-break space, ...
4620 NSM Nonspacing Mark Characters marked Mn (Nonspacing_Mark) and Me (Enclosing_Mark) in the Unicode Character Database
4621 BN Boundary Neutral Default ignorables, non-characters, and control characters, other than those explicitly given other types.
4622 Neutral
4623 B Paragraph Separator Paragraph separator, appropriate Newline Functions, higher-level protocol paragraph determination
4624 S Segment Separator Tab
4625 WS Whitespace Space, figure space, line separator, form feed, General Punctuation spaces, ...
4626 ON Other Neutrals All other characters, including OBJECT REPLACEMENT CHARACTER
4629 function _bidiSort($ta, $str = '', $dir, &$chunkOTLdata, $useGPOS)
4632 $pel = 0; // paragraph embedding level
4633 $maxlevel = 0;
4634 $numchars = count($chunkOTLdata['char_data']);
4636 // Set the initial paragraph embedding level
4637 if ($dir == 'rtl') {
4638 $pel = 1;
4639 } else {
4640 $pel = 0;
4644 // X1. Begin by setting the current embedding level to the paragraph embedding level. Set the directional override status to neutral.
4645 // Current Embedding Level
4646 $cel = $pel;
4647 // directional override status (-1 is Neutral)
4648 $dos = -1;
4649 $remember = array();
4651 // Array of characters data
4652 $chardata = Array();
4654 // Process each character iteratively, applying rules X2 through X9. Only embedding levels from 0 to 61 are valid in this phase.
4655 // In the resolution of levels in rules I1 and I2, the maximum embedding level of 62 can be reached.
4656 for ($i = 0; $i < $numchars; ++$i) {
4657 if ($chunkOTLdata['char_data'][$i]['uni'] == 8235) { // RLE
4658 // X2. With each RLE, compute the least greater odd embedding level.
4659 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
4660 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
4661 $next_level = $cel + ($cel % 2) + 1;
4662 if ($next_level < 62) {
4663 $remember[] = array('num' => 8235, 'cel' => $cel, 'dos' => $dos);
4664 $cel = $next_level;
4665 $dos = -1;
4667 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8234) { // LRE
4668 // X3. With each LRE, compute the least greater even embedding level.
4669 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
4670 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
4671 $next_level = $cel + 2 - ($cel % 2);
4672 if ($next_level < 62) {
4673 $remember[] = array('num' => 8234, 'cel' => $cel, 'dos' => $dos);
4674 $cel = $next_level;
4675 $dos = -1;
4677 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8238) { // RLO
4678 // X4. With each RLO, compute the least greater odd embedding level.
4679 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to right-to-left.
4680 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
4681 $next_level = $cel + ($cel % 2) + 1;
4682 if ($next_level < 62) {
4683 $remember[] = array('num' => 8238, 'cel' => $cel, 'dos' => $dos);
4684 $cel = $next_level;
4685 $dos = UCDN::BIDI_CLASS_R;
4687 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8237) { // LRO
4688 // X5. With each LRO, compute the least greater even embedding level.
4689 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to left-to-right.
4690 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
4691 $next_level = $cel + 2 - ($cel % 2);
4692 if ($next_level < 62) {
4693 $remember[] = array('num' => 8237, 'cel' => $cel, 'dos' => $dos);
4694 $cel = $next_level;
4695 $dos = UCDN::BIDI_CLASS_L;
4697 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8236) { // PDF
4698 // X7. With each PDF, determine the matching embedding or override code. If there was a valid matching code, restore (pop) the last remembered (pushed) embedding level and directional override.
4699 if (count($remember)) {
4700 $last = count($remember) - 1;
4701 if (($remember[$last]['num'] == 8235) || ($remember[$last]['num'] == 8234) || ($remember[$last]['num'] == 8238) ||
4702 ($remember[$last]['num'] == 8237)) {
4703 $match = array_pop($remember);
4704 $cel = $match['cel'];
4705 $dos = $match['dos'];
4708 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 10) { // NEW LINE
4709 // Reset to start values
4710 $cel = $pel;
4711 $dos = -1;
4712 $remember = array();
4713 } else {
4714 // X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
4715 // a. Set the level of the current character to the current embedding level.
4716 // b. When the directional override status is not neutral, reset the current character type to directional override status.
4717 if ($dos != -1) {
4718 $chardir = $dos;
4719 } else {
4720 $chardir = $chunkOTLdata['char_data'][$i]['bidi_class'];
4722 // stores string characters and other information
4723 if (isset($chunkOTLdata['GPOSinfo'][$i])) {
4724 $gpos = $chunkOTLdata['GPOSinfo'][$i];
4725 } else
4726 $gpos = '';
4727 $chardata[] = array('char' => $chunkOTLdata['char_data'][$i]['uni'], 'level' => $cel, 'type' => $chardir, 'group' => $chunkOTLdata['group']{$i}, 'GPOSinfo' => $gpos);
4731 $numchars = count($chardata);
4733 // X8. All explicit directional embeddings and overrides are completely terminated at the end of each paragraph.
4734 // Paragraph separators are not included in the embedding.
4735 // X9. Remove all RLE, LRE, RLO, LRO, and PDF codes.
4736 // This is effectively done by only saving other codes to chardata
4737 // X10. Determine the start-of-sequence (sor) and end-of-sequence (eor) types, either L or R, for each isolating run sequence. These depend on the higher of the two levels on either side of the sequence boundary:
4738 // For sor, compare the level of the first character in the sequence with the level of the character preceding it in the paragraph or if there is none, with the paragraph embedding level.
4739 // For eor, compare the level of the last character in the sequence with the level of the character following it in the paragraph or if there is none, with the paragraph embedding level.
4740 // If the higher level is odd, the sor or eor is R; otherwise, it is L.
4742 $prelevel = $pel;
4743 $postlevel = $pel;
4744 $cel = $prelevel; // current embedding level
4745 for ($i = 0; $i < $numchars; ++$i) {
4746 $level = $chardata[$i]['level'];
4747 if ($i == 0) {
4748 $left = $prelevel;
4749 } else {
4750 $left = $chardata[$i - 1]['level'];
4752 if ($i == ($numchars - 1)) {
4753 $right = $postlevel;
4754 } else {
4755 $right = $chardata[$i + 1]['level'];
4757 $chardata[$i]['sor'] = max($left, $level) % 2 ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
4758 $chardata[$i]['eor'] = max($right, $level) % 2 ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
4763 // 3.3.3 Resolving Weak Types
4764 // Weak types are now resolved one level run at a time. At level run boundaries where the type of the character on the other side of the boundary is required, the type assigned to sor or eor is used.
4765 // Nonspacing marks are now resolved based on the previous characters.
4766 // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor.
4767 for ($i = 0; $i < $numchars; ++$i) {
4768 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_NSM) {
4769 if ($i == 0 || $chardata[$i]['level'] != $chardata[$i - 1]['level']) {
4770 $chardata[$i]['type'] = $chardata[$i]['sor'];
4771 } else {
4772 $chardata[$i]['type'] = $chardata[($i - 1)]['type'];
4777 // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found. If an AL is found, change the type of the European number to Arabic number.
4778 $prevlevel = -1;
4779 $levcount = 0;
4780 for ($i = 0; $i < $numchars; ++$i) {
4781 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN) {
4782 $found = false;
4783 for ($j = $levcount; $j >= 0; $j--) {
4784 if ($chardata[$j]['type'] == UCDN::BIDI_CLASS_AL) {
4785 $chardata[$i]['type'] = UCDN::BIDI_CLASS_AN;
4786 $found = true;
4787 break;
4788 } else if (($chardata[$j]['type'] == UCDN::BIDI_CLASS_L) || ($chardata[$j]['type'] == UCDN::BIDI_CLASS_R)) {
4789 $found = true;
4790 break;
4794 if ($chardata[$i]['level'] != $prevlevel) {
4795 $levcount = 0;
4796 } else {
4797 ++$levcount;
4799 $prevlevel = $chardata[$i]['level'];
4802 // W3. Change all ALs to R.
4803 for ($i = 0; $i < $numchars; ++$i) {
4804 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_AL) {
4805 $chardata[$i]['type'] = UCDN::BIDI_CLASS_R;
4809 // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type.
4810 for ($i = 1; $i < $numchars; ++$i) {
4811 if (($i + 1) < $numchars && $chardata[($i)]['level'] == $chardata[($i + 1)]['level'] && $chardata[($i)]['level'] == $chardata[($i - 1)]['level']) {
4812 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ES && $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_EN && $chardata[($i + 1)]['type'] == UCDN::BIDI_CLASS_EN) {
4813 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
4814 } else if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS && $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_EN && $chardata[($i + 1)]['type'] == UCDN::BIDI_CLASS_EN) {
4815 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
4816 } else if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS && $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_AN && $chardata[($i + 1)]['type'] == UCDN::BIDI_CLASS_AN) {
4817 $chardata[$i]['type'] = UCDN::BIDI_CLASS_AN;
4822 // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
4823 for ($i = 0; $i < $numchars; ++$i) {
4824 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ET) {
4825 if ($i > 0 && $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_EN && $chardata[($i)]['level'] == $chardata[($i - 1)]['level']) {
4826 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
4827 } else {
4828 $j = $i + 1;
4829 while ($j < $numchars && $chardata[$j]['level'] == $chardata[$i]['level']) {
4830 if ($chardata[$j]['type'] == UCDN::BIDI_CLASS_EN) {
4831 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
4832 break;
4833 } else if ($chardata[$j]['type'] != UCDN::BIDI_CLASS_ET) {
4834 break;
4836 ++$j;
4842 // W6. Otherwise, separators and terminators change to Other Neutral.
4843 for ($i = 0; $i < $numchars; ++$i) {
4844 if (($chardata[$i]['type'] == UCDN::BIDI_CLASS_ET) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ES) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS)) {
4845 $chardata[$i]['type'] = UCDN::BIDI_CLASS_ON;
4849 //W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found. If an L is found, then change the type of the European number to L.
4850 for ($i = 0; $i < $numchars; ++$i) {
4851 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN) {
4852 if ($i == 0) { // Start of Level run
4853 if ($chardata[$i]['sor'] == UCDN::BIDI_CLASS_L)
4854 $chardata[$i]['type'] = $chardata[$i]['sor'];
4856 else {
4857 for ($j = $i - 1; $j >= 0; $j--) {
4858 if ($chardata[$j]['level'] != $chardata[$i]['level']) { // Level run boundary
4859 if ($chardata[$j + 1]['sor'] == UCDN::BIDI_CLASS_L)
4860 $chardata[$i]['type'] = $chardata[$j + 1]['sor'];
4861 break;
4863 else if ($chardata[$j]['type'] == UCDN::BIDI_CLASS_L) {
4864 $chardata[$i]['type'] = UCDN::BIDI_CLASS_L;
4865 break;
4866 } else if ($chardata[$j]['type'] == UCDN::BIDI_CLASS_R) {
4867 break;
4874 // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction. European and Arabic numbers act as if they were R in terms of their influence on neutrals. Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
4875 for ($i = 0; $i < $numchars; ++$i) {
4876 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ON || $chardata[$i]['type'] == UCDN::BIDI_CLASS_WS) {
4877 $left = -1;
4878 // LEFT
4879 if ($i == 0) { // first char
4880 $left = $chardata[($i)]['sor'];
4881 } else if ($chardata[($i - 1)]['level'] != $chardata[($i)]['level']) { // run boundary
4882 $left = $chardata[($i)]['sor'];
4883 } else if ($chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_L) {
4884 $left = UCDN::BIDI_CLASS_L;
4885 } else if ($chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_R || $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_EN || $chardata[($i - 1)]['type'] == UCDN::BIDI_CLASS_AN) {
4886 $left = UCDN::BIDI_CLASS_R;
4888 // RIGHT
4889 $right = -1;
4890 $j = $i;
4891 // move to the right of any following neutrals OR hit a run boundary
4892 while (($chardata[$j]['type'] == UCDN::BIDI_CLASS_ON || $chardata[$j]['type'] == UCDN::BIDI_CLASS_WS) && $j <= ($numchars - 1)) {
4893 if ($j == ($numchars - 1)) { // last char
4894 $right = $chardata[($j)]['eor'];
4895 break;
4896 } else if ($chardata[($j + 1)]['level'] != $chardata[($j)]['level']) { // run boundary
4897 $right = $chardata[($j)]['eor'];
4898 break;
4899 } else if ($chardata[($j + 1)]['type'] == UCDN::BIDI_CLASS_L) {
4900 $right = UCDN::BIDI_CLASS_L;
4901 break;
4902 } else if ($chardata[($j + 1)]['type'] == UCDN::BIDI_CLASS_R || $chardata[($j + 1)]['type'] == UCDN::BIDI_CLASS_EN || $chardata[($j + 1)]['type'] == UCDN::BIDI_CLASS_AN) {
4903 $right = UCDN::BIDI_CLASS_R;
4904 break;
4906 $j++;
4908 if ($left > -1 && $left == $right) {
4909 $chardata[$i]['orig_type'] = $chardata[$i]['type']; // Need to store the original 'WS' for reference in L1 below
4910 $chardata[$i]['type'] = $left;
4915 // N2. Any remaining neutrals take the embedding direction
4916 for ($i = 0; $i < $numchars; ++$i) {
4917 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ON || $chardata[$i]['type'] == UCDN::BIDI_CLASS_WS) {
4918 $chardata[$i]['type'] = ($chardata[$i]['level'] % 2) ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
4919 $chardata[$i]['orig_type'] = $chardata[$i]['type']; // Need to store the original 'WS' for reference in L1 below
4923 // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels.
4924 // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level.
4925 for ($i = 0; $i < $numchars; ++$i) {
4926 $odd = $chardata[$i]['level'] % 2;
4927 if ($odd) {
4928 if (($chardata[$i]['type'] == UCDN::BIDI_CLASS_L) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_AN) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN)) {
4929 $chardata[$i]['level'] += 1;
4931 } else {
4932 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_R) {
4933 $chardata[$i]['level'] += 1;
4934 } else if (($chardata[$i]['type'] == UCDN::BIDI_CLASS_AN) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN)) {
4935 $chardata[$i]['level'] += 2;
4938 $maxlevel = max($chardata[$i]['level'], $maxlevel);
4941 // NB
4942 // Separate into lines at this point************
4944 // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
4945 // 1. Segment separators (Tab) 'S',
4946 // 2. Paragraph separators 'B',
4947 // 3. Any sequence of whitespace characters 'WS' preceding a segment separator or paragraph separator, and
4948 // 4. Any sequence of whitespace characters 'WS' at the end of the line.
4949 // The types of characters used here are the original types, not those modified by the previous phase cf N1 and N2*******
4950 // Because a Paragraph Separator breaks lines, there will be at most one per line, at the end of that line.
4952 for ($i = ($numchars - 1); $i > 0; $i--) {
4953 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_WS || (isset($chardata[$i]['orig_type']) && $chardata[$i]['orig_type'] == UCDN::BIDI_CLASS_WS)) {
4954 $chardata[$i]['level'] = $pel;
4955 } else {
4956 break;
4961 // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher.
4962 for ($j = $maxlevel; $j > 0; $j--) {
4963 $ordarray = array();
4964 $revarr = array();
4965 $onlevel = false;
4966 for ($i = 0; $i < $numchars; ++$i) {
4967 if ($chardata[$i]['level'] >= $j) {
4968 $onlevel = true;
4970 // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R, and (b) the Bidi_Mirrored property value of that character is true.
4971 if (isset(UCDN::$mirror_pairs[$chardata[$i]['char']]) && $chardata[$i]['type'] == UCDN::BIDI_CLASS_R) {
4972 $chardata[$i]['char'] = UCDN::$mirror_pairs[$chardata[$i]['char']];
4975 $revarr[] = $chardata[$i];
4976 } else {
4977 if ($onlevel) {
4978 $revarr = array_reverse($revarr);
4979 $ordarray = array_merge($ordarray, $revarr);
4980 $revarr = Array();
4981 $onlevel = false;
4983 $ordarray[] = $chardata[$i];
4986 if ($onlevel) {
4987 $revarr = array_reverse($revarr);
4988 $ordarray = array_merge($ordarray, $revarr);
4990 $chardata = $ordarray;
4993 $group = '';
4994 $e = '';
4995 $GPOS = array();
4996 $cctr = 0;
4997 $rtl_content = 0x0;
4998 foreach ($chardata as $cd) {
4999 $e.=code2utf($cd['char']);
5000 $group .= $cd['group'];
5001 if ($useGPOS && is_array($cd['GPOSinfo'])) {
5002 $GPOS[$cctr] = $cd['GPOSinfo'];
5003 $GPOS[$cctr]['wDir'] = ($cd['level'] % 2) ? 'RTL' : 'LTR';
5005 if ($cd['type'] == UCDN::BIDI_CLASS_L) {
5006 $rtl_content |= 1;
5007 } else if ($cd['type'] == UCDN::BIDI_CLASS_R) {
5008 $rtl_content |= 2;
5010 $cctr++;
5014 $chunkOTLdata['group'] = $group;
5015 if ($useGPOS) {
5016 $chunkOTLdata['GPOSinfo'] = $GPOS;
5019 return array($e, $rtl_content);
5022 // **********************************************************************************************
5023 // The following versions for BidiSort work on amalgamated chunks to process the whole paragraph
5024 // Firstly set the level in the OTLdata - called from fn printbuffer() [_bidiPrepare]
5025 // Secondly re-order - called from fn writeFlowingBlock and FinishFlowingBlock, when already divided into lines. [_bidiReorder]
5026 // **********************************************************************************************
5028 function _bidiPrepare(&$para, $dir)
5031 // Set the initial paragraph embedding level
5032 $pel = 0; // paragraph embedding level
5033 if ($dir == 'rtl') {
5034 $pel = 1;
5037 // X1. Begin by setting the current embedding level to the paragraph embedding level. Set the directional override status to neutral.
5038 // Current Embedding Level
5039 $cel = $pel;
5040 // directional override status (-1 is Neutral)
5041 $dos = -1;
5042 $remember = array();
5043 $controlchars = false;
5044 $strongrtl = false;
5045 $diid = 0; // direction isolate ID
5046 $dictr = 0; // direction isolate counter
5047 // Process each character iteratively, applying rules X2 through X9. Only embedding levels from 0 to 61 are valid in this phase.
5048 // In the resolution of levels in rules I1 and I2, the maximum embedding level of 62 can be reached.
5049 $numchunks = count($para);
5050 for ($nc = 0; $nc < $numchunks; $nc++) {
5051 $chunkOTLdata = & $para[$nc][18];
5053 $numchars = count($chunkOTLdata['char_data']);
5054 for ($i = 0; $i < $numchars; ++$i) {
5055 if ($chunkOTLdata['char_data'][$i]['uni'] == 8235) { // RLE
5056 // X2. With each RLE, compute the least greater odd embedding level.
5057 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
5058 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
5059 $next_level = $cel + ($cel % 2) + 1;
5060 if ($next_level < 62) {
5061 $remember[] = array('num' => 8235, 'cel' => $cel, 'dos' => $dos);
5062 $cel = $next_level;
5063 $dos = -1;
5064 $controlchars = true;
5066 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8234) { // LRE
5067 // X3. With each LRE, compute the least greater even embedding level.
5068 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
5069 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
5070 $next_level = $cel + 2 - ($cel % 2);
5071 if ($next_level < 62) {
5072 $remember[] = array('num' => 8234, 'cel' => $cel, 'dos' => $dos);
5073 $cel = $next_level;
5074 $dos = -1;
5075 $controlchars = true;
5077 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8238) { // RLO
5078 // X4. With each RLO, compute the least greater odd embedding level.
5079 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to right-to-left.
5080 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
5081 $next_level = $cel + ($cel % 2) + 1;
5082 if ($next_level < 62) {
5083 $remember[] = array('num' => 8238, 'cel' => $cel, 'dos' => $dos);
5084 $cel = $next_level;
5085 $dos = UCDN::BIDI_CLASS_R;
5086 $controlchars = true;
5088 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8237) { // LRO
5089 // X5. With each LRO, compute the least greater even embedding level.
5090 // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to left-to-right.
5091 // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
5092 $next_level = $cel + 2 - ($cel % 2);
5093 if ($next_level < 62) {
5094 $remember[] = array('num' => 8237, 'cel' => $cel, 'dos' => $dos);
5095 $cel = $next_level;
5096 $dos = UCDN::BIDI_CLASS_L;
5097 $controlchars = true;
5099 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8236) { // PDF
5100 // X7. With each PDF, determine the matching embedding or override code. If there was a valid matching code, restore (pop) the last remembered (pushed) embedding level and directional override.
5101 if (count($remember)) {
5102 $last = count($remember) - 1;
5103 if (($remember[$last]['num'] == 8235) || ($remember[$last]['num'] == 8234) || ($remember[$last]['num'] == 8238) ||
5104 ($remember[$last]['num'] == 8237)) {
5105 $match = array_pop($remember);
5106 $cel = $match['cel'];
5107 $dos = $match['dos'];
5110 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8294 || $chunkOTLdata['char_data'][$i]['uni'] == 8295 ||
5111 $chunkOTLdata['char_data'][$i]['uni'] == 8296) { // LRI // RLI // FSI
5112 // X5a. With each RLI:
5113 // X5b. With each LRI:
5114 // X5c. With each FSI, apply rules P2 and P3 for First Strong character
5115 // Set the RLI/LRI/FSI embedding level to the embedding level of the last entry on the directional status stack.
5116 if ($dos != -1) {
5117 $chardir = $dos;
5118 } else {
5119 $chardir = $chunkOTLdata['char_data'][$i]['bidi_class'];
5121 $chunkOTLdata['char_data'][$i]['level'] = $cel;
5122 $chunkOTLdata['char_data'][$i]['type'] = $chardir;
5123 $chunkOTLdata['char_data'][$i]['diid'] = $diid;
5125 $fsi = '';
5126 // X5c. With each FSI, apply rules P2 and P3 within the isolate run for First Strong character
5127 if ($chunkOTLdata['char_data'][$i]['uni'] == 8296) { // FSI
5128 $lvl = 0;
5129 $nc2 = $nc;
5130 $i2 = $i;
5131 while (!($nc2 == ($numchunks - 1) && $i2 == ((count($para[$nc2][18]['char_data'])) - 1))) { // while not at end of last chunk
5132 $i2++;
5133 if ($i2 >= count($para[$nc2][18]['char_data'])) {
5134 $nc2++;
5135 $i2 = 0;
5137 if ($lvl > 0) {
5138 continue;
5140 if ($para[$nc2][18]['char_data'][$i2]['uni'] == 8294 || $para[$nc2][18]['char_data'][$i2]['uni'] == 8295 || $para[$nc2][18]['char_data'][$i2]['uni'] == 8296) {
5141 $lvl++;
5142 continue;
5144 if ($para[$nc2][18]['char_data'][$i2]['uni'] == 8297) {
5145 $lvl--;
5146 if ($lvl < 0) {
5147 break;
5150 if ($para[$nc2][18]['char_data'][$i2]['bidi_class'] === UCDN::BIDI_CLASS_L || $para[$nc2][18]['char_data'][$i2]['bidi_class'] == UCDN::BIDI_CLASS_AL || $para[$nc2][18]['char_data'][$i2]['bidi_class'] === UCDN::BIDI_CLASS_R) {
5151 $fsi = $para[$nc2][18]['char_data'][$i2]['bidi_class'];
5152 break;
5155 // if fsi not found, fsi is same as paragraph embedding level
5156 if (!$fsi && $fsi !== 0) {
5157 if ($pel == 1) {
5158 $fsi = UCDN::BIDI_CLASS_R;
5159 } else {
5160 $fsi = UCDN::BIDI_CLASS_L;
5165 if ($chunkOTLdata['char_data'][$i]['uni'] == 8294 || $fsi === UCDN::BIDI_CLASS_L) { // LRI or FSI-L
5166 // Compute the least even embedding level greater than the embedding level of the last entry on the directional status stack.
5167 $next_level = $cel + 2 - ($cel % 2);
5168 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8295 || $fsi == UCDN::BIDI_CLASS_R || $fsi == UCDN::BIDI_CLASS_AL) { // RLI or FSI-R
5169 // Compute the least odd embedding level greater than the embedding level of the last entry on the directional status stack.
5170 $next_level = $cel + ($cel % 2) + 1;
5174 // Increment the isolate count by one, and push an entry consisting of the new embedding level,
5175 // neutral directional override status, and true directional isolate status onto the directional status stack.
5176 $remember[] = array('num' => $chunkOTLdata['char_data'][$i]['uni'], 'cel' => $cel, 'dos' => $dos, 'diid' => $diid);
5177 $cel = $next_level;
5178 $dos = -1;
5179 $diid = ++$dictr; // Set new direction isolate ID after incrementing direction isolate counter
5181 $controlchars = true;
5182 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 8297) { // PDI
5183 // X6a. With each PDI, perform the following steps:
5184 // Pop the last entry from the directional status stack and decrement the isolate count by one.
5185 while (count($remember)) {
5186 $last = count($remember) - 1;
5187 if (($remember[$last]['num'] == 8294) || ($remember[$last]['num'] == 8295) || ($remember[$last]['num'] == 8296)) {
5188 $match = array_pop($remember);
5189 $cel = $match['cel'];
5190 $dos = $match['dos'];
5191 $diid = $match['diid'];
5192 break;
5194 // End/close any open embedding states not explicitly closed during the isolate
5195 else if (($remember[$last]['num'] == 8235) || ($remember[$last]['num'] == 8234) || ($remember[$last]['num'] == 8238) ||
5196 ($remember[$last]['num'] == 8237)) {
5197 $match = array_pop($remember);
5200 // In all cases, set the PDI’s level to the embedding level of the last entry on the directional status stack left after the steps above.
5201 // NB The level assigned to an isolate initiator is always the same as that assigned to the matching PDI.
5202 if ($dos != -1) {
5203 $chardir = $dos;
5204 } else {
5205 $chardir = $chunkOTLdata['char_data'][$i]['bidi_class'];
5207 $chunkOTLdata['char_data'][$i]['level'] = $cel;
5208 $chunkOTLdata['char_data'][$i]['type'] = $chardir;
5209 $chunkOTLdata['char_data'][$i]['diid'] = $diid;
5210 $controlchars = true;
5211 } else if ($chunkOTLdata['char_data'][$i]['uni'] == 10) { // NEW LINE
5212 // Reset to start values
5213 $cel = $pel;
5214 $dos = -1;
5215 $remember = array();
5216 } else {
5217 // X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
5218 // a. Set the level of the current character to the current embedding level.
5219 // b. When the directional override status is not neutral, reset the current character type to directional override status.
5220 if ($dos != -1) {
5221 $chardir = $dos;
5222 } else {
5223 $chardir = $chunkOTLdata['char_data'][$i]['bidi_class'];
5224 if ($chardir == UCDN::BIDI_CLASS_R || $chardir == UCDN::BIDI_CLASS_AL) {
5225 $strongrtl = true;
5228 $chunkOTLdata['char_data'][$i]['level'] = $cel;
5229 $chunkOTLdata['char_data'][$i]['type'] = $chardir;
5230 $chunkOTLdata['char_data'][$i]['diid'] = $diid;
5233 // X8. All explicit directional embeddings and overrides are completely terminated at the end of each paragraph.
5234 // Paragraph separators are not included in the embedding.
5235 // X9. Remove all RLE, LRE, RLO, LRO, and PDF codes.
5236 if ($controlchars) {
5237 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x80\xaa");
5238 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x80\xab");
5239 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x80\xac");
5240 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x80\xad");
5241 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x80\xae");
5242 preg_replace("/\x{202a}-\x{202e}/u", '', $para[$nc][0]);
5246 // Remove any blank chunks made by removing directional codes
5247 $numchunks = count($para);
5248 for ($nc = ($numchunks - 1); $nc >= 0; $nc--) {
5249 if (count($para[$nc][18]['char_data']) == 0) {
5250 array_splice($para, $nc, 1);
5253 if ($dir != 'rtl' && !$strongrtl && !$controlchars) {
5254 return;
5257 $numchunks = count($para);
5259 // X10. Determine the start-of-sequence (sor) and end-of-sequence (eor) types, either L or R, for each isolating run sequence. These depend on the higher of the two levels on either side of the sequence boundary:
5260 // For sor, compare the level of the first character in the sequence with the level of the character preceding it in the paragraph or if there is none, with the paragraph embedding level.
5261 // For eor, compare the level of the last character in the sequence with the level of the character following it in the paragraph or if there is none, with the paragraph embedding level.
5262 // If the higher level is odd, the sor or eor is R; otherwise, it is L.
5264 for ($ir = 0; $ir <= $dictr; $ir++) {
5265 $prelevel = $pel;
5266 $postlevel = $pel;
5267 $firstchar = true;
5268 for ($nc = 0; $nc < $numchunks; $nc++) {
5269 $chardata = & $para[$nc][18]['char_data'];
5270 $numchars = count($chardata);
5271 for ($i = 0; $i < $numchars; ++$i) {
5272 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5273 continue;
5274 } // Ignore characters in a different isolate run
5275 $right = $postlevel;
5276 $nc2 = $nc;
5277 $i2 = $i;
5278 while (!($nc2 == ($numchunks - 1) && $i2 == ((count($para[$nc2][18]['char_data'])) - 1))) { // while not at end of last chunk
5279 $i2++;
5280 if ($i2 >= count($para[$nc2][18]['char_data'])) {
5281 $nc2++;
5282 $i2 = 0;
5285 if (isset($para[$nc2][18]['char_data'][$i2]['diid']) && $para[$nc2][18]['char_data'][$i2]['diid'] == $ir) {
5286 $right = $para[$nc2][18]['char_data'][$i2]['level'];
5287 break;
5291 $level = $chardata[$i]['level'];
5292 if ($firstchar || $level != $prelevel) {
5293 $chardata[$i]['sor'] = max($prelevel, $level) % 2 ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
5295 if (($nc == ($numchunks - 1) && $i == ($numchars - 1)) || $level != $right) {
5296 $chardata[$i]['eor'] = max($right, $level) % 2 ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
5298 $prelevel = $level;
5299 $firstchar = false;
5305 // 3.3.3 Resolving Weak Types
5306 // Weak types are now resolved one level run at a time. At level run boundaries where the type of the character on the other side of the boundary is required, the type assigned to sor or eor is used.
5307 // Nonspacing marks are now resolved based on the previous characters.
5308 // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor.
5309 for ($ir = 0; $ir <= $dictr; $ir++) {
5310 $prevtype = 0;
5311 for ($nc = 0; $nc < $numchunks; $nc++) {
5312 $chardata = & $para[$nc][18]['char_data'];
5313 $numchars = count($chardata);
5314 for ($i = 0; $i < $numchars; ++$i) {
5315 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5316 continue;
5317 } // Ignore characters in a different isolate run
5318 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_NSM) {
5319 if (isset($chardata[$i]['sor'])) {
5320 $chardata[$i]['type'] = $chardata[$i]['sor'];
5321 } else {
5322 $chardata[$i]['type'] = $prevtype;
5325 $prevtype = $chardata[$i]['type'];
5330 // W2. Search backward from each instance of a European number until the first strong type (R, L, AL or sor) is found. If an AL is found, change the type of the European number to Arabic number.
5331 for ($ir = 0; $ir <= $dictr; $ir++) {
5332 $laststrongtype = -1;
5333 for ($nc = 0; $nc < $numchunks; $nc++) {
5334 $chardata = & $para[$nc][18]['char_data'];
5335 $numchars = count($chardata);
5336 for ($i = 0; $i < $numchars; ++$i) {
5337 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5338 continue;
5339 } // Ignore characters in a different isolate run
5340 if (isset($chardata[$i]['sor'])) {
5341 $laststrongtype = $chardata[$i]['sor'];
5343 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN && $laststrongtype == UCDN::BIDI_CLASS_AL) {
5344 $chardata[$i]['type'] = UCDN::BIDI_CLASS_AN;
5346 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_L || $chardata[$i]['type'] == UCDN::BIDI_CLASS_R || $chardata[$i]['type'] == UCDN::BIDI_CLASS_AL) {
5347 $laststrongtype = $chardata[$i]['type'];
5354 // W3. Change all ALs to R.
5355 for ($nc = 0; $nc < $numchunks; $nc++) {
5356 $chardata = & $para[$nc][18]['char_data'];
5357 $numchars = count($chardata);
5358 for ($i = 0; $i < $numchars; ++$i) {
5359 if (isset($chardata[$i]['type']) && $chardata[$i]['type'] == UCDN::BIDI_CLASS_AL) {
5360 $chardata[$i]['type'] = UCDN::BIDI_CLASS_R;
5366 // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type.
5367 for ($ir = 0; $ir <= $dictr; $ir++) {
5368 $prevtype = -1;
5369 $nexttype = -1;
5370 for ($nc = 0; $nc < $numchunks; $nc++) {
5371 $chardata = & $para[$nc][18]['char_data'];
5372 $numchars = count($chardata);
5373 for ($i = 0; $i < $numchars; ++$i) {
5374 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5375 continue;
5376 } // Ignore characters in a different isolate run
5377 // Get next type
5378 $nexttype = -1;
5379 $nc2 = $nc;
5380 $i2 = $i;
5381 while (!($nc2 == ($numchunks - 1) && $i2 == ((count($para[$nc2][18]['char_data'])) - 1))) { // while not at end of last chunk
5382 $i2++;
5383 if ($i2 >= count($para[$nc2][18]['char_data'])) {
5384 $nc2++;
5385 $i2 = 0;
5388 if (isset($para[$nc2][18]['char_data'][$i2]['diid']) && $para[$nc2][18]['char_data'][$i2]['diid'] == $ir) {
5389 $nexttype = $para[$nc2][18]['char_data'][$i2]['type'];
5390 break;
5394 if (!isset($chardata[$i]['sor']) && !isset($chardata[$i]['eor'])) {
5395 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ES && $prevtype == UCDN::BIDI_CLASS_EN && $nexttype == UCDN::BIDI_CLASS_EN) {
5396 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
5397 } else if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS && $prevtype == UCDN::BIDI_CLASS_EN && $nexttype == UCDN::BIDI_CLASS_EN) {
5398 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
5399 } else if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS && $prevtype == UCDN::BIDI_CLASS_AN && $nexttype == UCDN::BIDI_CLASS_AN) {
5400 $chardata[$i]['type'] = UCDN::BIDI_CLASS_AN;
5403 $prevtype = $chardata[$i]['type'];
5408 // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
5409 for ($ir = 0; $ir <= $dictr; $ir++) {
5410 $prevtype = -1;
5411 $nexttype = -1;
5412 for ($nc = 0; $nc < $numchunks; $nc++) {
5413 $chardata = & $para[$nc][18]['char_data'];
5414 $numchars = count($chardata);
5415 for ($i = 0; $i < $numchars; ++$i) {
5416 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5417 continue;
5418 } // Ignore characters in a different isolate run
5419 if (isset($chardata[$i]['sor'])) {
5420 $prevtype = $chardata[$i]['sor'];
5423 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ET) {
5424 if ($prevtype == UCDN::BIDI_CLASS_EN) {
5425 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
5426 } else if (!isset($chardata[$i]['eor'])) {
5427 $nexttype = -1;
5428 $nc2 = $nc;
5429 $i2 = $i;
5430 while (!($nc2 == ($numchunks - 1) && $i2 == ((count($para[$nc2][18]['char_data'])) - 1))) { // while not at end of last chunk
5431 $i2++;
5432 if ($i2 >= count($para[$nc2][18]['char_data'])) {
5433 $nc2++;
5434 $i2 = 0;
5436 if ($para[$nc2][18]['char_data'][$i2]['diid'] != $ir) {
5437 continue;
5439 $nexttype = $para[$nc2][18]['char_data'][$i2]['type'];
5440 if (isset($para[$nc2][18]['char_data'][$i2]['sor'])) {
5441 break;
5443 if ($nexttype == UCDN::BIDI_CLASS_EN) {
5444 $chardata[$i]['type'] = UCDN::BIDI_CLASS_EN;
5445 break;
5446 } else if ($nexttype != UCDN::BIDI_CLASS_ET) {
5447 break;
5452 $prevtype = $chardata[$i]['type'];
5457 // W6. Otherwise, separators and terminators change to Other Neutral.
5458 for ($nc = 0; $nc < $numchunks; $nc++) {
5459 $chardata = & $para[$nc][18]['char_data'];
5460 $numchars = count($chardata);
5461 for ($i = 0; $i < $numchars; ++$i) {
5462 if (isset($chardata[$i]['type']) && (($chardata[$i]['type'] == UCDN::BIDI_CLASS_ET) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ES) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_CS))) {
5463 $chardata[$i]['type'] = UCDN::BIDI_CLASS_ON;
5468 //W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found. If an L is found, then change the type of the European number to L.
5469 for ($ir = 0; $ir <= $dictr; $ir++) {
5470 $laststrongtype = -1;
5471 for ($nc = 0; $nc < $numchunks; $nc++) {
5472 $chardata = & $para[$nc][18]['char_data'];
5473 $numchars = count($chardata);
5474 for ($i = 0; $i < $numchars; ++$i) {
5475 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5476 continue;
5477 } // Ignore characters in a different isolate run
5478 if (isset($chardata[$i]['sor'])) {
5479 $laststrongtype = $chardata[$i]['sor'];
5481 if (isset($chardata[$i]['type']) && $chardata[$i]['type'] == UCDN::BIDI_CLASS_EN && $laststrongtype == UCDN::BIDI_CLASS_L) {
5482 $chardata[$i]['type'] = UCDN::BIDI_CLASS_L;
5484 if (isset($chardata[$i]['type']) && ($chardata[$i]['type'] == UCDN::BIDI_CLASS_L || $chardata[$i]['type'] == UCDN::BIDI_CLASS_R || $chardata[$i]['type'] == UCDN::BIDI_CLASS_AL)) {
5485 $laststrongtype = $chardata[$i]['type'];
5491 // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction. European and Arabic numbers act as if they were R in terms of their influence on neutrals. Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
5492 for ($ir = 0; $ir <= $dictr; $ir++) {
5493 $laststrongtype = -1;
5494 for ($nc = 0; $nc < $numchunks; $nc++) {
5495 $chardata = & $para[$nc][18]['char_data'];
5496 $numchars = count($chardata);
5497 for ($i = 0; $i < $numchars; ++$i) {
5498 if (!isset($chardata[$i]['diid']) || $chardata[$i]['diid'] != $ir) {
5499 continue;
5500 } // Ignore characters in a different isolate run
5501 if (isset($chardata[$i]['sor'])) {
5502 $laststrongtype = $chardata[$i]['sor'];
5504 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ON || $chardata[$i]['type'] == UCDN::BIDI_CLASS_WS) {
5505 $left = -1;
5506 // LEFT
5507 if ($laststrongtype == UCDN::BIDI_CLASS_R || $laststrongtype == UCDN::BIDI_CLASS_EN || $laststrongtype == UCDN::BIDI_CLASS_AN) {
5508 $left = UCDN::BIDI_CLASS_R;
5509 } else if ($laststrongtype == UCDN::BIDI_CLASS_L) {
5510 $left = UCDN::BIDI_CLASS_L;
5512 // RIGHT
5513 $right = -1;
5514 // move to the right of any following neutrals OR hit a run boundary
5516 if (isset($chardata[$i]['eor'])) {
5517 $right = $chardata[$i]['eor'];
5518 } else {
5519 $nexttype = -1;
5520 $nc2 = $nc;
5521 $i2 = $i;
5522 while (!($nc2 == ($numchunks - 1) && $i2 == ((count($para[$nc2][18]['char_data'])) - 1))) { // while not at end of last chunk
5523 $i2++;
5524 if ($i2 >= count($para[$nc2][18]['char_data'])) {
5525 $nc2++;
5526 $i2 = 0;
5528 if (!isset($para[$nc2][18]['char_data'][$i2]['diid']) || $para[$nc2][18]['char_data'][$i2]['diid'] != $ir) {
5529 continue;
5531 $nexttype = $para[$nc2][18]['char_data'][$i2]['type'];
5532 if ($nexttype == UCDN::BIDI_CLASS_R || $nexttype == UCDN::BIDI_CLASS_EN || $nexttype == UCDN::BIDI_CLASS_AN) {
5533 $right = UCDN::BIDI_CLASS_R;
5534 break;
5535 } else if ($nexttype == UCDN::BIDI_CLASS_L) {
5536 $right = UCDN::BIDI_CLASS_L;
5537 break;
5538 } else if (isset($para[$nc2][18]['char_data'][$i2]['eor'])) {
5539 $right = $para[$nc2][18]['char_data'][$i2]['eor'];
5540 break;
5545 if ($left > -1 && $left == $right) {
5546 $chardata[$i]['orig_type'] = $chardata[$i]['type']; // Need to store the original 'WS' for reference in L1 below
5547 $chardata[$i]['type'] = $left;
5549 } else if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_L || $chardata[$i]['type'] == UCDN::BIDI_CLASS_R || $chardata[$i]['type'] == UCDN::BIDI_CLASS_EN || $chardata[$i]['type'] == UCDN::BIDI_CLASS_AN) {
5550 $laststrongtype = $chardata[$i]['type'];
5556 // N2. Any remaining neutrals take the embedding direction
5557 for ($nc = 0; $nc < $numchunks; $nc++) {
5558 $chardata = & $para[$nc][18]['char_data'];
5559 $numchars = count($chardata);
5560 for ($i = 0; $i < $numchars; ++$i) {
5561 if (isset($chardata[$i]['type']) && ($chardata[$i]['type'] == UCDN::BIDI_CLASS_ON || $chardata[$i]['type'] == UCDN::BIDI_CLASS_WS)) {
5562 $chardata[$i]['orig_type'] = $chardata[$i]['type']; // Need to store the original 'WS' for reference in L1 below
5563 $chardata[$i]['type'] = ($chardata[$i]['level'] % 2) ? UCDN::BIDI_CLASS_R : UCDN::BIDI_CLASS_L;
5568 // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels.
5569 // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level.
5570 for ($nc = 0; $nc < $numchunks; $nc++) {
5571 $chardata = & $para[$nc][18]['char_data'];
5572 $numchars = count($chardata);
5573 for ($i = 0; $i < $numchars; ++$i) {
5574 if (isset($chardata[$i]['level'])) {
5575 $odd = $chardata[$i]['level'] % 2;
5576 if ($odd) {
5577 if (($chardata[$i]['type'] == UCDN::BIDI_CLASS_L) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_AN) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN)) {
5578 $chardata[$i]['level'] += 1;
5580 } else {
5581 if ($chardata[$i]['type'] == UCDN::BIDI_CLASS_R) {
5582 $chardata[$i]['level'] += 1;
5583 } else if (($chardata[$i]['type'] == UCDN::BIDI_CLASS_AN) || ($chardata[$i]['type'] == UCDN::BIDI_CLASS_EN)) {
5584 $chardata[$i]['level'] += 2;
5591 // Remove Isolate formatters
5592 $numchunks = count($para);
5593 if ($controlchars) {
5594 for ($nc = 0; $nc < $numchunks; $nc++) {
5595 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x81\xa6");
5596 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x81\xa7");
5597 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x81\xa8");
5598 $this->removeChar($para[$nc][0], $para[$nc][18], "\xe2\x81\xa9");
5599 preg_replace("/\x{2066}-\x{2069}/u", '', $para[$nc][0]);
5601 // Remove any blank chunks made by removing directional codes
5602 for ($nc = ($numchunks - 1); $nc >= 0; $nc--) {
5603 if (count($para[$nc][18]['char_data']) == 0) {
5604 array_splice($para, $nc, 1);
5610 // Reorder, once divided into lines
5612 function _bidiReorder(&$chunkorder, &$content, &$cOTLdata, $blockdir)
5615 $bidiData = array();
5617 // First combine into one array (and get the highest level in use)
5618 $numchunks = count($content);
5619 $maxlevel = 0;
5620 for ($nc = 0; $nc < $numchunks; $nc++) {
5621 $numchars = count($cOTLdata[$nc]['char_data']);
5622 for ($i = 0; $i < $numchars; ++$i) {
5624 $carac = array();
5625 if (isset($cOTLdata[$nc]['GPOSinfo'][$i])) {
5626 $carac['GPOSinfo'] = $cOTLdata[$nc]['GPOSinfo'][$i];
5628 $carac['uni'] = $cOTLdata[$nc]['char_data'][$i]['uni'];
5629 if (isset($cOTLdata[$nc]['char_data'][$i]['type']))
5630 $carac['type'] = $cOTLdata[$nc]['char_data'][$i]['type'];
5631 if (isset($cOTLdata[$nc]['char_data'][$i]['level']))
5632 $carac['level'] = $cOTLdata[$nc]['char_data'][$i]['level'];
5633 if (isset($cOTLdata[$nc]['char_data'][$i]['orig_type'])) {
5634 $carac['orig_type'] = $cOTLdata[$nc]['char_data'][$i]['orig_type'];
5636 $carac['group'] = $cOTLdata[$nc]['group']{$i};
5637 $carac['chunkid'] = $chunkorder[$nc]; // gives font id and/or object ID
5639 $maxlevel = max((isset($carac['level']) ? $carac['level'] : 0), $maxlevel);
5640 $bidiData[] = $carac;
5643 if ($maxlevel == 0) {
5644 return;
5647 $numchars = count($bidiData);
5649 // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
5650 // 1. Segment separators (Tab) 'S',
5651 // 2. Paragraph separators 'B',
5652 // 3. Any sequence of whitespace characters 'WS' preceding a segment separator or paragraph separator, and
5653 // 4. Any sequence of whitespace characters 'WS' at the end of the line.
5654 // The types of characters used here are the original types, not those modified by the previous phase cf N1 and N2*******
5655 // Because a Paragraph Separator breaks lines, there will be at most one per line, at the end of that line.
5656 // Set the initial paragraph embedding level
5657 if ($blockdir == 'rtl') {
5658 $pel = 1;
5659 } else {
5660 $pel = 0;
5663 for ($i = ($numchars - 1); $i > 0; $i--) {
5664 if ($bidiData[$i]['type'] == UCDN::BIDI_CLASS_WS || (isset($bidiData[$i]['orig_type']) && $bidiData[$i]['orig_type'] == UCDN::BIDI_CLASS_WS)) {
5665 $bidiData[$i]['level'] = $pel;
5666 } else {
5667 break;
5672 // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher.
5673 for ($j = $maxlevel; $j > 0; $j--) {
5674 $ordarray = array();
5675 $revarr = array();
5676 $onlevel = false;
5677 for ($i = 0; $i < $numchars; ++$i) {
5678 if ($bidiData[$i]['level'] >= $j) {
5679 $onlevel = true;
5680 // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R, and (b) the Bidi_Mirrored property value of that character is true.
5681 if (isset(UCDN::$mirror_pairs[$bidiData[$i]['uni']]) && $bidiData[$i]['type'] == UCDN::BIDI_CLASS_R) {
5682 $bidiData[$i]['uni'] = UCDN::$mirror_pairs[$bidiData[$i]['uni']];
5685 $revarr[] = $bidiData[$i];
5686 } else {
5687 if ($onlevel) {
5688 $revarr = array_reverse($revarr);
5689 $ordarray = array_merge($ordarray, $revarr);
5690 $revarr = Array();
5691 $onlevel = false;
5693 $ordarray[] = $bidiData[$i];
5696 if ($onlevel) {
5697 $revarr = array_reverse($revarr);
5698 $ordarray = array_merge($ordarray, $revarr);
5700 $bidiData = $ordarray;
5703 $content = array();
5704 $cOTLdata = array();
5705 $chunkorder = array();
5709 $nc = -1; // New chunk order ID
5710 $chunkid = -1;
5712 foreach ($bidiData as $carac) {
5713 if ($carac['chunkid'] != $chunkid) {
5714 $nc++;
5715 $chunkorder[$nc] = $carac['chunkid'];
5716 $cctr = 0;
5717 $content[$nc] = '';
5718 $cOTLdata[$nc]['group'] = '';
5720 if ($carac['uni'] != 0xFFFC) { // Object replacement character (65532)
5721 $content[$nc] .= code2utf($carac['uni']);
5722 $cOTLdata[$nc]['group'] .= $carac['group'];
5723 if (!empty($carac['GPOSinfo'])) {
5724 if (isset($carac['GPOSinfo'])) {
5725 $cOTLdata[$nc]['GPOSinfo'][$cctr] = $carac['GPOSinfo'];
5727 $cOTLdata[$nc]['GPOSinfo'][$cctr]['wDir'] = ($carac['level'] % 2) ? 'RTL' : 'LTR';
5730 $chunkid = $carac['chunkid'];
5731 $cctr++;
5735 ////////////////////////////////////////////////////////////////
5736 ////////////////////////////////////////////////////////////////
5737 // These functions are called from mpdf after GSUB/GPOS has taken place
5738 // At this stage the bidi-type is in string form
5739 ////////////////////////////////////////////////////////////////
5740 ////////////////////////////////////////////////////////////////
5741 function splitOTLdata(&$cOTLdata, $OTLcutoffpos, $OTLrestartpos = '')
5743 if (!$OTLrestartpos) {
5744 $OTLrestartpos = $OTLcutoffpos;
5746 $newOTLdata = array('GPOSinfo' => array(), 'char_data' => array());
5747 $newOTLdata['group'] = substr($cOTLdata['group'], $OTLrestartpos);
5748 $cOTLdata['group'] = substr($cOTLdata['group'], 0, $OTLcutoffpos);
5750 if (isset($cOTLdata['GPOSinfo']) && $cOTLdata['GPOSinfo']) {
5751 foreach ($cOTLdata['GPOSinfo'] AS $k => $val) {
5752 if ($k >= $OTLrestartpos) {
5753 $newOTLdata['GPOSinfo'][($k - $OTLrestartpos)] = $val;
5755 if ($k >= $OTLcutoffpos) {
5756 unset($cOTLdata['GPOSinfo'][$k]);
5757 //$cOTLdata['GPOSinfo'][$k] = array();
5761 if (isset($cOTLdata['char_data'])) {
5762 $newOTLdata['char_data'] = array_slice($cOTLdata['char_data'], $OTLrestartpos);
5763 array_splice($cOTLdata['char_data'], $OTLcutoffpos);
5766 // Not necessary - easier to debug
5767 if (isset($cOTLdata['GPOSinfo']))
5768 ksort($cOTLdata['GPOSinfo']);
5769 if (isset($newOTLdata['GPOSinfo']))
5770 ksort($newOTLdata['GPOSinfo']);
5772 return $newOTLdata;
5775 function sliceOTLdata($OTLdata, $pos, $len)
5777 $newOTLdata = array('GPOSinfo' => array(), 'char_data' => array());
5778 $newOTLdata['group'] = substr($OTLdata['group'], $pos, $len);
5780 if ($OTLdata['GPOSinfo']) {
5781 foreach ($OTLdata['GPOSinfo'] AS $k => $val) {
5782 if ($k >= $pos && $k < ($pos + $len)) {
5783 $newOTLdata['GPOSinfo'][($k - $pos)] = $val;
5788 if (isset($OTLdata['char_data'])) {
5789 $newOTLdata['char_data'] = array_slice($OTLdata['char_data'], $pos, $len);
5792 // Not necessary - easier to debug
5793 if ($newOTLdata['GPOSinfo'])
5794 ksort($newOTLdata['GPOSinfo']);
5796 return $newOTLdata;
5799 // Remove one or more occurrences of $char (single character) from $txt and adjust OTLdata
5800 function removeChar(&$txt, &$cOTLdata, $char)
5802 while (mb_strpos($txt, $char, 0, $this->mpdf->mb_enc) !== false) {
5803 $pos = mb_strpos($txt, $char, 0, $this->mpdf->mb_enc);
5804 $newGPOSinfo = array();
5805 $cOTLdata['group'] = substr_replace($cOTLdata['group'], '', $pos, 1);
5806 if ($cOTLdata['GPOSinfo']) {
5807 foreach ($cOTLdata['GPOSinfo'] AS $k => $val) {
5808 if ($k > $pos) {
5809 $newGPOSinfo[($k - 1)] = $val;
5810 } else if ($k != $pos) {
5811 $newGPOSinfo[$k] = $val;
5814 $cOTLdata['GPOSinfo'] = $newGPOSinfo;
5816 if (isset($cOTLdata['char_data'])) {
5817 array_splice($cOTLdata['char_data'], $pos, 1);
5820 $txt = preg_replace("/" . $char . "/", '', $txt, 1);
5824 // Remove one or more occurrences of $char (single character) from $txt and adjust OTLdata
5825 function replaceSpace(&$txt, &$cOTLdata)
5827 $char = chr(194) . chr(160); // NBSP
5828 while (mb_strpos($txt, $char, 0, $this->mpdf->mb_enc) !== false) {
5829 $pos = mb_strpos($txt, $char, 0, $this->mpdf->mb_enc);
5830 if ($cOTLdata['char_data'][$pos]['uni'] == 160) {
5831 $cOTLdata['char_data'][$pos]['uni'] = 32;
5833 $txt = preg_replace("/" . $char . "/", ' ', $txt, 1);
5837 function trimOTLdata(&$cOTLdata, $Left = true, $Right = true)
5840 $len = count($cOTLdata['char_data']);
5841 $nLeft = 0;
5842 $nRight = 0;
5843 for ($i = 0; $i < $len; $i++) {
5844 if ($cOTLdata['char_data'][$i]['uni'] == 32 || $cOTLdata['char_data'][$i]['uni'] == 12288) {
5845 $nLeft++;
5846 } // 12288 = 0x3000 = CJK space
5847 else {
5848 break;
5851 for ($i = ($len - 1); $i >= 0; $i--) {
5852 if ($cOTLdata['char_data'][$i]['uni'] == 32 || $cOTLdata['char_data'][$i]['uni'] == 12288) {
5853 $nRight++;
5854 } // 12288 = 0x3000 = CJK space
5855 else {
5856 break;
5860 // Trim Right
5861 if ($Right && $nRight) {
5862 $cOTLdata['group'] = substr($cOTLdata['group'], 0, strlen($cOTLdata['group']) - $nRight);
5863 if ($cOTLdata['GPOSinfo']) {
5864 foreach ($cOTLdata['GPOSinfo'] AS $k => $val) {
5865 if ($k >= $len - $nRight) {
5866 unset($cOTLdata['GPOSinfo'][$k]);
5870 if (isset($cOTLdata['char_data'])) {
5871 for ($i = 0; $i < $nRight; $i++) {
5872 array_pop($cOTLdata['char_data']);
5876 // Trim Left
5877 if ($Left && $nLeft) {
5878 $cOTLdata['group'] = substr($cOTLdata['group'], $nLeft);
5879 if ($cOTLdata['GPOSinfo']) {
5880 $newPOSinfo = array();
5881 foreach ($cOTLdata['GPOSinfo'] AS $k => $val) {
5882 if ($k >= $nLeft) {
5883 $newPOSinfo[$k - $nLeft] = $cOTLdata['GPOSinfo'][$k];
5886 $cOTLdata['GPOSinfo'] = $newPOSinfo;
5888 if (isset($cOTLdata['char_data'])) {
5889 for ($i = 0; $i < $nLeft; $i++) {
5890 array_shift($cOTLdata['char_data']);
5896 ////////////////////////////////////////////////////////////////
5897 ////////////////////////////////////////////////////////////////
5898 ////////// GENERAL OTL FUNCTIONS /////////////////
5899 ////////////////////////////////////////////////////////////////
5900 ////////////////////////////////////////////////////////////////
5903 function glyphToChar($gid)
5905 return (ord($this->glyphIDtoUni[$gid * 3]) << 16) + (ord($this->glyphIDtoUni[$gid * 3 + 1]) << 8) + ord($this->glyphIDtoUni[$gid * 3 + 2]);
5908 function unicode_hex($unicode_dec)
5910 return (str_pad(strtoupper(dechex($unicode_dec)), 5, '0', STR_PAD_LEFT));
5913 function seek($pos)
5915 $this->_pos = $pos;
5918 function skip($delta)
5920 $this->_pos += $delta;
5923 function read_short()
5925 $a = (ord($this->ttfOTLdata[$this->_pos]) << 8) + ord($this->ttfOTLdata[$this->_pos + 1]);
5926 if ($a & (1 << 15)) {
5927 $a = ($a - (1 << 16));
5929 $this->_pos += 2;
5930 return $a;
5933 function read_ushort()
5935 $a = (ord($this->ttfOTLdata[$this->_pos]) << 8) + ord($this->ttfOTLdata[$this->_pos + 1]);
5936 $this->_pos += 2;
5937 return $a;
5940 function _getCoverageGID()
5942 // Called from Lookup Type 1, Format 1 - returns glyphIDs rather than hexstrings
5943 // Need to do this separately to cache separately
5944 // Otherwise the same as fn below _getCoverage
5945 $offset = $this->_pos;
5946 if (isset($this->LuDataCache[$this->fontkey]['GID'][$offset])) {
5947 $g = $this->LuDataCache[$this->fontkey]['GID'][$offset];
5948 } else {
5949 $g = array();
5950 $CoverageFormat = $this->read_ushort();
5951 if ($CoverageFormat == 1) {
5952 $CoverageGlyphCount = $this->read_ushort();
5953 for ($gid = 0; $gid < $CoverageGlyphCount; $gid++) {
5954 $glyphID = $this->read_ushort();
5955 $g[] = $glyphID;
5958 if ($CoverageFormat == 2) {
5959 $RangeCount = $this->read_ushort();
5960 for ($r = 0; $r < $RangeCount; $r++) {
5961 $start = $this->read_ushort();
5962 $end = $this->read_ushort();
5963 $StartCoverageIndex = $this->read_ushort(); // n/a
5964 for ($glyphID = $start; $glyphID <= $end; $glyphID++) {
5965 $g[] = $glyphID;
5969 $this->LuDataCache[$this->fontkey]['GID'][$offset] = $g;
5971 return $g;
5974 function _getCoverage()
5976 $offset = $this->_pos;
5977 if (isset($this->LuDataCache[$this->fontkey][$offset])) {
5978 $g = $this->LuDataCache[$this->fontkey][$offset];
5979 } else {
5980 $g = array();
5981 $CoverageFormat = $this->read_ushort();
5982 if ($CoverageFormat == 1) {
5983 $CoverageGlyphCount = $this->read_ushort();
5984 for ($gid = 0; $gid < $CoverageGlyphCount; $gid++) {
5985 $glyphID = $this->read_ushort();
5986 $g[] = $this->unicode_hex($this->glyphToChar($glyphID));
5989 if ($CoverageFormat == 2) {
5990 $RangeCount = $this->read_ushort();
5991 for ($r = 0; $r < $RangeCount; $r++) {
5992 $start = $this->read_ushort();
5993 $end = $this->read_ushort();
5994 $StartCoverageIndex = $this->read_ushort(); // n/a
5995 for ($glyphID = $start; $glyphID <= $end; $glyphID++) {
5996 $g[] = $this->unicode_hex($this->glyphToChar($glyphID));
6000 $this->LuDataCache[$this->fontkey][$offset] = $g;
6002 return $g;
6005 function _getClasses($offset)
6007 if (isset($this->LuDataCache[$this->fontkey][$offset])) {
6008 $GlyphByClass = $this->LuDataCache[$this->fontkey][$offset];
6009 } else {
6010 $this->seek($offset);
6011 $ClassFormat = $this->read_ushort();
6012 $GlyphByClass = array();
6013 if ($ClassFormat == 1) {
6014 $StartGlyph = $this->read_ushort();
6015 $GlyphCount = $this->read_ushort();
6016 for ($i = 0; $i < $GlyphCount; $i++) {
6017 $startGlyphID = $StartGlyph + $i;
6018 $endGlyphID = $StartGlyph + $i;
6019 $class = $this->read_ushort();
6020 // Note: Font FreeSerif , tag "blws"
6021 // $BacktrackClasses[0] is defined ? a mistake in the font ???
6022 // Let's ignore for now
6023 if ($class > 0) {
6024 for ($g = $startGlyphID; $g <= $endGlyphID; $g++) {
6025 if ($this->glyphToChar($g)) {
6026 $GlyphByClass[$class][$this->glyphToChar($g)] = 1;
6031 } else if ($ClassFormat == 2) {
6032 $tableCount = $this->read_ushort();
6033 for ($i = 0; $i < $tableCount; $i++) {
6034 $startGlyphID = $this->read_ushort();
6035 $endGlyphID = $this->read_ushort();
6036 $class = $this->read_ushort();
6037 // Note: Font FreeSerif , tag "blws"
6038 // $BacktrackClasses[0] is defined ? a mistake in the font ???
6039 // Let's ignore for now
6040 if ($class > 0) {
6041 for ($g = $startGlyphID; $g <= $endGlyphID; $g++) {
6042 if ($this->glyphToChar($g)) {
6043 $GlyphByClass[$class][$this->glyphToChar($g)] = 1;
6049 $this->LuDataCache[$this->fontkey][$offset] = $GlyphByClass;
6051 return $GlyphByClass;
6054 function _getOTLscriptTag($ScriptLang, $scripttag, $scriptblock, $shaper, $useOTL, $mode)
6056 // ScriptLang is the array of available script/lang tags supported by the font
6057 // $scriptblock is the (number/code) for the script of the actual text string based on Unicode properties (UCDN::$uni_scriptblock)
6058 // $scripttag is the default tag derived from $scriptblock
6060 http://www.microsoft.com/typography/otspec/ttoreg.htm
6061 http://www.microsoft.com/typography/otspec/scripttags.htm
6063 Values for useOTL
6065 Bit dn hn Value
6066 1 1 0x0001 GSUB/GPOS - Latin scripts
6067 2 2 0x0002 GSUB/GPOS - Cyrillic scripts
6068 3 4 0x0004 GSUB/GPOS - Greek scripts
6069 4 8 0x0008 GSUB/GPOS - CJK scripts (excluding Hangul-Jamo)
6070 5 16 0x0010 (Reserved)
6071 6 32 0x0020 (Reserved)
6072 7 64 0x0040 (Reserved)
6073 8 128 0x0080 GSUB/GPOS - All other scripts (including all RTL scripts, complex scripts with shapers etc)
6075 NB If change for RTL - cf. function magic_reverse_dir in mpdf.php to update
6080 if ($scriptblock == UCDN::SCRIPT_LATIN) {
6081 if (!($useOTL & 0x01)) {
6082 return array('', false);
6084 } else if ($scriptblock == UCDN::SCRIPT_CYRILLIC) {
6085 if (!($useOTL & 0x02)) {
6086 return array('', false);
6088 } else if ($scriptblock == UCDN::SCRIPT_GREEK) {
6089 if (!($useOTL & 0x04)) {
6090 return array('', false);
6092 } else if ($scriptblock >= UCDN::SCRIPT_HIRAGANA && $scriptblock <= UCDN::SCRIPT_YI) {
6093 if (!($useOTL & 0x08)) {
6094 return array('', false);
6096 } else {
6097 if (!($useOTL & 0x80)) {
6098 return array('', false);
6102 // If availabletags includes scripttag - choose
6103 if (isset($ScriptLang[$scripttag])) {
6104 return array($scripttag, false);
6107 // If INDIC (or Myanmar) and available tag not includes new version, check if includes old version & choose old version
6108 if ($shaper) {
6109 switch ($scripttag) {
6110 CASE 'bng2': if (isset($ScriptLang['beng']))
6111 return array('beng', true);
6112 CASE 'dev2': if (isset($ScriptLang['deva']))
6113 return array('deva', true);
6114 CASE 'gjr2': if (isset($ScriptLang['gujr']))
6115 return array('gujr', true);
6116 CASE 'gur2': if (isset($ScriptLang['guru']))
6117 return array('guru', true);
6118 CASE 'knd2': if (isset($ScriptLang['knda']))
6119 return array('knda', true);
6120 CASE 'mlm2': if (isset($ScriptLang['mlym']))
6121 return array('mlym', true);
6122 CASE 'ory2': if (isset($ScriptLang['orya']))
6123 return array('orya', true);
6124 CASE 'tml2': if (isset($ScriptLang['taml']))
6125 return array('taml', true);
6126 CASE 'tel2': if (isset($ScriptLang['telu']))
6127 return array('telu', true);
6128 CASE 'mym2': if (isset($ScriptLang['mymr']))
6129 return array('mymr', true);
6133 // choose DFLT if present
6134 if (isset($ScriptLang['DFLT'])) {
6135 return array('DFLT', false);
6137 // else choose dflt if present
6138 if (isset($ScriptLang['dflt'])) {
6139 return array('dflt', false);
6141 // else return no scriptTag
6142 if (isset($ScriptLang['latn'])) {
6143 return array('latn', false);
6145 // else return no scriptTag
6146 return array('', false);
6149 // LangSys tags
6150 function _getOTLLangTag($ietf, $available)
6152 // http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
6153 // http://www.microsoft.com/typography/otspec/languagetags.htm
6154 // IETF tag = e.g. en-US, und-Arab, sr-Cyrl cf. config_lang2fonts.php
6155 if ($available == '') {
6156 return '';
6158 $tags = preg_split('/-/', $ietf);
6159 $lang = '';
6160 $country = '';
6161 $script = '';
6162 $lang = strtolower($tags[0]);
6163 if (isset($tags[1]) && $tags[1]) {
6164 if (strlen($tags[1]) == 2) {
6165 $country = strtolower($tags[1]);
6168 if (isset($tags[2]) && $tags[2]) {
6169 $country = strtolower($tags[2]);
6172 if ($lang != '' && isset(UCDN::$ot_languages[$lang])) {
6173 $langsys = UCDN::$ot_languages[$lang];
6174 } else if ($lang != '' && $country != '' && isset(UCDN::$ot_languages[$lang . '' . $country])) {
6175 $langsys = UCDN::$ot_languages[$lang . '' . $country];
6176 } else {
6177 $langsys = "DFLT";
6179 if (strpos($available, $langsys) === false) {
6180 if (strpos($available, "DFLT") !== false) {
6181 return "DFLT";
6182 } else
6183 return '';
6185 return $langsys;
6188 function _dumpproc($GPOSSUB, $lookupID, $subtable, $Type, $Format, $ptr, $currGlyph, $level)
6190 echo '<div style="padding-left: ' . ($level * 2) . 'em;">';
6191 echo $GPOSSUB . ' LookupID #' . $lookupID . ' Subtable#' . $subtable . ' Type: ' . $Type . ' Format: ' . $Format . '<br />';
6192 echo '<div style="font-family:monospace">';
6193 echo 'Glyph position: ' . $ptr . ' Current Glyph: ' . $currGlyph . '<br />';
6195 for ($i = 0; $i < count($this->OTLdata); $i++) {
6196 if ($i == $ptr) {
6197 echo '<b>';
6199 echo $this->OTLdata[$i]['hex'] . ' ';
6200 if ($i == $ptr) {
6201 echo '</b>';
6204 echo '<br />';
6206 for ($i = 0; $i < count($this->OTLdata); $i++) {
6207 if ($i == $ptr) {
6208 echo '<b>';
6210 echo str_pad($this->OTLdata[$i]['uni'], 5) . ' ';
6211 if ($i == $ptr) {
6212 echo '</b>';
6215 echo '<br />';
6217 if ($GPOSSUB == 'GPOS') {
6218 for ($i = 0; $i < count($this->OTLdata); $i++) {
6219 if (!empty($this->OTLdata[$i]['GPOSinfo'])) {
6220 echo $this->OTLdata[$i]['hex'] . ' &#x' . $this->OTLdata[$i]['hex'] . '; ';
6221 print_r($this->OTLdata[$i]['GPOSinfo']);
6222 echo ' ';
6227 echo '</div>';
6228 echo '</div>';