2 //============================================================+
3 // File name : unicode_data.php
6 // Last Update : 2010-12-16
7 // Author : Nicola Asuni - Tecnick.com S.r.l - Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com
8 // License : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
9 // -------------------------------------------------------------------
10 // Copyright (C) 2008-2010 Nicola Asuni - Tecnick.com S.r.l.
12 // This file is part of TCPDF software library.
14 // TCPDF is free software: you can redistribute it and/or modify it
15 // under the terms of the GNU Lesser General Public License as
16 // published by the Free Software Foundation, either version 3 of the
17 // License, or (at your option) any later version.
19 // TCPDF is distributed in the hope that it will be useful, but
20 // WITHOUT ANY WARRANTY; without even the implied warranty of
21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22 // See the GNU Lesser General Public License for more details.
24 // You should have received a copy of the GNU Lesser General Public License
25 // along with TCPDF. If not, see <http://www.gnu.org/licenses/>.
27 // See LICENSE.TXT file for more information.
28 // -------------------------------------------------------------------
30 // Description : Unicode data for TCPDF library.
32 //============================================================+
34 // Efthimios Mavrogeorgiadis
39 * Unicode data class for TCPDF library.
40 * @author Nicola Asuni
41 * @package com.tecnick.tcpdf
42 * @since 2.1.000 (2008-01-08)
46 * @class TCPDF_UNICODE_DATA
47 * This is a PHP class containing Unicde data for TCPDF library
48 * @package com.tecnick.tcpdf
50 * @author Nicola Asuni - info@tecnick.com
52 class TCPDF_UNICODE_DATA
{
55 * Unicode code for Left-to-Right Mark.
58 public $uni_LRM = 8206;
61 * Unicode code for Right-to-Left Mark.
64 public $uni_RLM = 8207;
67 * Unicode code for Left-to-Right Embedding.
70 public $uni_LRE = 8234;
73 * Unicode code for Right-to-Left Embedding.
76 public $uni_RLE = 8235;
79 * Unicode code for Pop Directional Format.
82 public $uni_PDF = 8236;
85 * Unicode code for Left-to-Right Override.
88 public $uni_LRO = 8237;
91 * Unicode code for Right-to-Left Override.
94 public $uni_RLO = 8238;
97 * Pattern to test RTL (Righ-To-Left) strings using regular expressions.
100 public $uni_RE_PATTERN_RTL = "/(
102 | \xD7[\x80\x83\x86\x90-\xAA\xB0-\xB4] # R
103 | \xDF[\x80-\xAA\xB4\xB5\xBA] # R
105 | \xEF\xAC[\x9D\x9F\xA0-\xA8\xAA-\xB6\xB8-\xBC\xBE] # R
106 | \xEF\xAD[\x80\x81\x83\x84\x86-\x8F] # R
107 | \xF0\x90\xA0[\x80-\x85\x88\x8A-\xB5\xB7\xB8\xBC\xBF] # R
108 | \xF0\x90\xA4[\x80-\x99] # R
109 | \xF0\x90\xA8[\x80\x90-\x93\x95-\x97\x99-\xB3] # R
110 | \xF0\x90\xA9[\x80-\x87\x90-\x98] # R
111 | \xE2\x80[\xAB\xAE] # RLE & RLO
115 * Pattern to test Arabic strings using regular expressions. Source: http://www.w3.org/International/questions/qa-forms-utf-8
118 public $uni_RE_PATTERN_ARABIC = "/(
119 \xD8[\x80-\x83\x8B\x8D\x9B\x9E\x9F\xA1-\xBA] # AL
120 | \xD9[\x80-\x8A\xAD-\xAF\xB1-\xBF] # AL
121 | \xDA[\x80-\xBF] # AL
122 | \xDB[\x80-\x95\x9D\xA5\xA6\xAE\xAF\xBA-\xBF] # AL
123 | \xDC[\x80-\x8D\x90\x92-\xAF] # AL
124 | \xDD[\x8D-\xAD] # AL
125 | \xDE[\x80-\xA5\xB1] # AL
126 | \xEF\xAD[\x90-\xBF] # AL
127 | \xEF\xAE[\x80-\xB1] # AL
128 | \xEF\xAF[\x93-\xBF] # AL
129 | \xEF[\xB0-\xB3][\x80-\xBF] # AL
130 | \xEF\xB4[\x80-\xBD] # AL
131 | \xEF\xB5[\x90-\xBF] # AL
132 | \xEF\xB6[\x80-\x8F\x92-\xBF] # AL
133 | \xEF\xB7[\x80-\x87\xB0-\xBC] # AL
134 | \xEF\xB9[\xB0-\xB4\xB6-\xBF] # AL
135 | \xEF\xBA[\x80-\xBF] # AL
136 | \xEF\xBB[\x80-\xBC] # AL
137 | \xD9[\xA0-\xA9\xAB\xAC] # AN
141 * Array of Unicode types.
144 public $uni_type = array(
17868 * Mirror unicode characters. For information on bidi mirroring, see UAX #9: Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
17871 public $uni_mirror = array (
18226 * Arabic shape substitutions: char code => (isolated, final, initial, medial).
18229 public $uni_arabicsubst = array(
18230 1569=>array(65152),
18231 1570=>array(65153, 65154, 65153, 65154),
18232 1571=>array(65155, 65156, 65155, 65156),
18233 1572=>array(65157, 65158),
18234 1573=>array(65159, 65160, 65159, 65160),
18235 1574=>array(65161, 65162, 65163, 65164),
18236 1575=>array(65165, 65166, 65165, 65166),
18237 1576=>array(65167, 65168, 65169, 65170),
18238 1577=>array(65171, 65172),
18239 1578=>array(65173, 65174, 65175, 65176),
18240 1579=>array(65177, 65178, 65179, 65180),
18241 1580=>array(65181, 65182, 65183, 65184),
18242 1581=>array(65185, 65186, 65187, 65188),
18243 1582=>array(65189, 65190, 65191, 65192),
18244 1583=>array(65193, 65194, 65193, 65194),
18245 1584=>array(65195, 65196, 65195, 65196),
18246 1585=>array(65197, 65198, 65197, 65198),
18247 1586=>array(65199, 65200, 65199, 65200),
18248 1587=>array(65201, 65202, 65203, 65204),
18249 1588=>array(65205, 65206, 65207, 65208),
18250 1589=>array(65209, 65210, 65211, 65212),
18251 1590=>array(65213, 65214, 65215, 65216),
18252 1591=>array(65217, 65218, 65219, 65220),
18253 1592=>array(65221, 65222, 65223, 65224),
18254 1593=>array(65225, 65226, 65227, 65228),
18255 1594=>array(65229, 65230, 65231, 65232),
18256 1601=>array(65233, 65234, 65235, 65236),
18257 1602=>array(65237, 65238, 65239, 65240),
18258 1603=>array(65241, 65242, 65243, 65244),
18259 1604=>array(65245, 65246, 65247, 65248),
18260 1605=>array(65249, 65250, 65251, 65252),
18261 1606=>array(65253, 65254, 65255, 65256),
18262 1607=>array(65257, 65258, 65259, 65260),
18263 1608=>array(65261, 65262, 65261, 65262),
18264 1609=>array(65263, 65264, 64488, 64489),
18265 1610=>array(65265, 65266, 65267, 65268),
18266 1649=>array(64336, 64337),
18267 1655=>array(64477),
18268 1657=>array(64358, 64359, 64360, 64361),
18269 1658=>array(64350, 64351, 64352, 64353),
18270 1659=>array(64338, 64339, 64340, 64341),
18271 1662=>array(64342, 64343, 64344, 64345),
18272 1663=>array(64354, 64355, 64356, 64357),
18273 1664=>array(64346, 64347, 64348, 64349),
18274 1667=>array(64374, 64375, 64376, 64377),
18275 1668=>array(64370, 64371, 64372, 64373),
18276 1670=>array(64378, 64379, 64380, 64381),
18277 1671=>array(64382, 64383, 64384, 64385),
18278 1672=>array(64392, 64393),
18279 1676=>array(64388, 64389),
18280 1677=>array(64386, 64387),
18281 1678=>array(64390, 64391),
18282 1681=>array(64396, 64397),
18283 1688=>array(64394, 64395, 64394, 64395),
18284 1700=>array(64362, 64363, 64364, 64365),
18285 1702=>array(64366, 64367, 64368, 64369),
18286 1705=>array(64398, 64399, 64400, 64401),
18287 1709=>array(64467, 64468, 64469, 64470),
18288 1711=>array(64402, 64403, 64404, 64405),
18289 1713=>array(64410, 64411, 64412, 64413),
18290 1715=>array(64406, 64407, 64408, 64409),
18291 1722=>array(64414, 64415),
18292 1723=>array(64416, 64417, 64418, 64419),
18293 1726=>array(64426, 64427, 64428, 64429),
18294 1728=>array(64420, 64421),
18295 1729=>array(64422, 64423, 64424, 64425),
18296 1733=>array(64480, 64481),
18297 1734=>array(64473, 64474),
18298 1735=>array(64471, 64472),
18299 1736=>array(64475, 64476),
18300 1737=>array(64482, 64483),
18301 1739=>array(64478, 64479),
18302 1740=>array(64508, 64509, 64510, 64511),
18303 1744=>array(64484, 64485, 64486, 64487),
18304 1746=>array(64430, 64431),
18305 1747=>array(64432, 64433)
18309 * Arabic laa letter: (char code => isolated, final, initial, medial).
18312 public $uni_laa_array = array (
18313 1570 =>array(65269, 65270, 65269, 65270),
18314 1571 =>array(65271, 65272, 65271, 65272),
18315 1573 =>array(65273, 65274, 65273, 65274),
18316 1575 =>array(65275, 65276, 65275, 65276)
18320 * Array of character substitutions for sequences of two diacritics symbols.
18321 * Putting the combining mark and character in the same glyph allows us to avoid the two marks overlapping each other in an illegible manner.
18322 * second NSM char code => substitution char
18325 public $uni_diacritics = array (
18326 1612=>64606, # Shadda + Dammatan
18327 1613=>64607, # Shadda + Kasratan
18328 1614=>64608, # Shadda + Fatha
18329 1615=>64609, # Shadda + Damma
18330 1616=>64610 # Shadda + Kasra
18334 * Array of character substitutions from UTF-8 Unicode to Latin1.
18337 public $uni_utf8tolatin = array (
18341 376=>159, # Ydieresis
18342 381=>142, # Zcaron2
18343 8226=>149, # bullet3
18344 710=>136, # circumflex
18345 8224=>134, # dagger
18346 8225=>135, # daggerdbl
18347 8230=>133, # ellipsis
18348 8212=>151, # emdash
18349 8211=>150, # endash
18351 8249=>139, # guilsinglleft
18352 8250=>155, # guilsinglright
18354 8240=>137, # perthousand
18355 8222=>132, # quotedblbase
18356 8220=>147, # quotedblleft
18357 8221=>148, # quotedblright
18358 8216=>145, # quoteleft
18359 8217=>146, # quoteright
18360 8218=>130, # quotesinglbase
18363 8482=>153, # trademark
18367 } // --- END OF CLASS ---
18369 //============================================================+
18371 //============================================================+