3 % Copyright
2006 Han The Thanh
, <thanh@@pdftex.org
>
4 % Copyright
2006-2010 Taco Hoekwater
<taco@@luatex.org
>
6 % This file is part of LuaTeX.
8 % LuaTeX is free software
; you can redistribute it and
/or modify it under
9 % the terms of the GNU General Public License as published by the Free
10 % Software Foundation
; either version
2 of the License
, or
(at your
11 % option
) any later version.
13 % LuaTeX is distributed in the hope that it will be useful
, but WITHOUT
14 % ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY or
15 % FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 % License for more details.
18 % You should have received a copy of the GNU General Public License along
19 % with LuaTeX
; if not
, see
<http
://www.gnu.org
/licenses
/>.
27 #define isXdigit
(c
) (isdigit
(c
) ||
('A'
<= (c
) && (c) <= 'F'))
29 #define UNI_STRING
-2 /* string allocated by |def_tounicode
()|
*/
30 #define UNI_EXTRA_STRING
-3 /* string allocated by |set_glyph_unicode
()|
*/
32 static struct avl_table
*glyph_unicode_tree
= NULL;
34 static int comp_glyph_unicode_entry
(const void
*pa
, const void
*pb
, void
*p
)
37 return strcmp
(((const glyph_unicode_entry
*) pa
)->name
,
38 ((const glyph_unicode_entry
*) pb
)->name
);
41 static glyph_unicode_entry
*new_glyph_unicode_entry
(void
)
43 glyph_unicode_entry
*e
;
44 e
= xtalloc
(1, glyph_unicode_entry
);
47 e-
>unicode_seq
= NULL;
51 static void destroy_glyph_unicode_entry
(void
*pa
, void
*pb
)
53 glyph_unicode_entry
*e
= (glyph_unicode_entry
*) pa
;
56 if
(e-
>code
== UNI_STRING
) {
57 assert
(e-
>unicode_seq
!= NULL);
58 xfree
(e-
>unicode_seq
);
62 void glyph_unicode_free
(void
)
64 if
(glyph_unicode_tree
!= NULL)
65 avl_destroy
(glyph_unicode_tree
, destroy_glyph_unicode_entry
);
69 void def_tounicode
(str_number glyph
, str_number unistr
)
71 char buf
[SMALL_BUF_SIZE
], *p
, *ph
;
72 char buf2
[SMALL_BUF_SIZE
], *q
;
73 int valid_unistr
; /* 0: invalid
; 1: unicode value
; 2: string
*/
75 glyph_unicode_entry
*gu
, t
;
78 p
= makecstring
(glyph
);
79 assert
(strlen
(p
) < SMALL_BUF_SIZE
);
82 p
= makecstring
(unistr
);
85 p
++; /* ignore leading spaces
*/
87 while
(l
> 0 && p[l - 1] == ' ')
88 l--
; /* ignore traling spaces
*/
89 valid_unistr
= 1; /* a unicode value is the most common case
*/
90 for
(i
= 0; i
< l
; i
++) {
92 valid_unistr
= 2; /* if a space occurs we treat this entry as a string
*/
93 else if
(!isXdigit
((unsigned char
)p
[i
])) {
98 if
(l
== 0 || valid_unistr
== 0 || strlen
(buf
) == 0 || strcmp
(buf
, notdef
) == 0) {
99 formatted_warning
("tounicode", "invalid parameter(s): %s -> %s", buf
, p
);
102 if
(glyph_unicode_tree
== NULL) {
104 avl_create
(comp_glyph_unicode_entry
, NULL, &avl_xallocator);
105 assert
(glyph_unicode_tree
!= NULL);
108 /* allow overriding existing entries
*/
109 if
((gu
= (glyph_unicode_entry
*) avl_find
(glyph_unicode_tree
, &t)) != NULL) {
110 if
(gu-
>code
== UNI_STRING
) {
111 assert
(gu-
>unicode_seq
!= NULL);
112 xfree
(gu-
>unicode_seq
);
114 } else
{ /* make new entry
*/
115 gu
= new_glyph_unicode_entry
();
116 gu-
>name
= xstrdup
(buf
);
118 if
(valid_unistr
== 2) { /* a string with space
(s
) */
119 /* copy p to buf2
, ignoring spaces
*/
120 for
(q
= buf2
; *p
!= 0; p
++)
124 gu-
>code
= UNI_STRING
;
125 gu-
>unicode_seq
= xstrdup
(buf2
);
127 i
= sscanf
(p
, "%lX", &(gu->code));
130 aa
= avl_probe
(glyph_unicode_tree
, gu
);
137 static long check_unicode_value
(char
*s
, boolean multiple_value
)
139 int l
= (int
) strlen
(s
);
141 long code
= 0; /* anything that is not |UNI_UNDEF| will do
*/
145 if
(multiple_value
&& l % 4 != 0)
147 if
(!multiple_value
&& !(4 <= l && l <= 6))
150 for
(i
= 0; i
< l
; i
++) {
151 if
(!isXdigit
((unsigned char
)s
[i
]))
153 if
(multiple_value
) {
155 if
(sscanf
(s
+ i
- 3, "%4lX", &code) != 1)
157 if
(!((0x0000 <= code
&& code <= 0xD7FF) ||
158 (0xE000 <= code
&& code <= 0xFFFF)))
161 } else
{ /* single value
*/
163 if
(sscanf
(s
, "%lX", &code) != 1)
165 if
(!((0x0000 <= code
&& code <= 0xD7FF) ||
166 (0xE000 <= code
&& code <= 0x10FFFF)))
174 @ This function set proper values to |
*gp| based on |s|
; in case it returns
175 |gp-
>code
== UNI_EXTRA_STRING| then the caller is responsible for freeing
176 |gp-
>unicode_seq| too.
178 static void set_glyph_unicode
(char
*s
, glyph_unicode_entry
* gp
)
180 char buf
[SMALL_BUF_SIZE
], buf2
[SMALL_BUF_SIZE
], *p
;
182 boolean last_component
;
183 glyph_unicode_entry tmp
, *ptmp
;
185 /* skip dummy entries
*/
186 if
(s
== NULL || s
== notdef
)
189 /* strip everything after the first dot
*/
193 strncat
(buf
, s
, (size_t
) (p
- s
));
200 /* check for case of multiple components separated by |'_'|
*/
203 assert
(strlen
(s
) < sizeof
(buf
));
206 p
= strchr
(buf
, '_'
);
210 last_component
= false
;
213 tmp.code
= UNI_UNDEF
;
214 set_glyph_unicode
(s
, &tmp);
216 case UNI_UNDEF
: /* not found
, do nothing
*/
218 case UNI_STRING
: /* s matched an entry with string value in the database
*/
219 assert
(tmp.unicode_seq
!= NULL);
220 assert
(strlen
(buf2
) + strlen
(tmp.unicode_seq
) < sizeof
(buf2
));
221 strcat
(buf2
, tmp.unicode_seq
);
223 case UNI_EXTRA_STRING
: /* s is a multiple value of form
"uniXXXX" */
224 assert
(strlen
(buf2
) + strlen
(tmp.unicode_seq
) < sizeof
(buf2
));
225 strcat
(buf2
, tmp.unicode_seq
);
226 xfree
(tmp.unicode_seq
);
228 default
: /* s matched an entry with numeric value in the
229 database
, or a value derived from
"uXXXX" */
230 assert
(tmp.code
>= 0);
231 strcat
(buf2
, utf16be_str
(tmp.code
));
239 last_component
= true
;
242 gp-
>code
= UNI_EXTRA_STRING
;
243 gp-
>unicode_seq
= xstrdup
(buf2
);
247 /* lookup for glyph name in the database
*/
249 tmp.code
= UNI_UNDEF
;
250 ptmp
= (glyph_unicode_entry
*) avl_find
(glyph_unicode_tree
, &tmp);
252 gp-
>code
= ptmp-
>code
;
253 gp-
>unicode_seq
= ptmp-
>unicode_seq
;
257 /* check for case of
"uniXXXX" (multiple
4-hex-digit values allowed
) */
258 if
(str_prefix
(s
, "uni")) {
259 p
= s
+ strlen
("uni");
260 code
= check_unicode_value
(p
, true
);
261 if
(code
!= UNI_UNDEF
) {
262 if
(strlen
(p
) == 4) /* single value
*/
264 else
{ /* multiple value
*/
265 gp-
>code
= UNI_EXTRA_STRING
;
266 gp-
>unicode_seq
= xstrdup
(p
);
269 return
; /* since the last case cannot happen
*/
272 /* check for case of
"uXXXX" (single value up to
6 hex digits
) */
273 if
(str_prefix
(s
, "u")) {
275 code
= check_unicode_value
(p
, false
);
276 if
(code
!= UNI_UNDEF
) {
284 static void set_cid_glyph_unicode
(long index
, glyph_unicode_entry
* gp
,
285 internal_font_number f
)
288 if
(font_tounicode
(f
) &&
289 (s
= get_charinfo_tounicode
(char_info
(f
, (int
) index
))) != NULL) {
290 gp-
>code
= UNI_EXTRA_STRING
;
291 gp-
>unicode_seq
= xstrdup
(s
);
293 gp-
>code
= index
; /* fallback
*/
299 int write_tounicode
(PDF pdf
, char
**glyph_names
, char
*name
)
301 char buf
[SMALL_BUF_SIZE
], *p
;
302 static char builtin_suffix
[] = "-builtin";
303 short range_size
[257];
304 glyph_unicode_entry gtab
[257];
307 int bfchar_count
, bfrange_count
, subrange_count
;
308 assert
(strlen
(name
) + strlen
(builtin_suffix
) < SMALL_BUF_SIZE
);
309 if
(glyph_unicode_tree
== NULL) {
310 pdf-
>gen_tounicode
= 0;
314 if
((p
= strrchr
(buf
, '.'
)) != NULL && strcmp(p, ".enc") == 0)
315 *p
= 0; /* strip
".enc" from encoding name
*/
317 strcat
(buf
, builtin_suffix
); /* ".enc" not present
, this is a builtin
318 encoding so the name is eg
"cmr10-builtin" */
319 objnum
= pdf_create_obj
(pdf
, obj_type_others
, 0);
320 pdf_begin_obj
(pdf
, objnum
, OBJSTM_NEVER
);
322 pdf_dict_add_streaminfo
(pdf
);
324 pdf_begin_stream
(pdf
);
325 pdf_printf
(pdf
, "%%!PS-Adobe-3.0 Resource-CMap\n"@
/
326 "%%%%DocumentNeededResources: ProcSet (CIDInit)\n"@
/
327 "%%%%IncludeResource: ProcSet (CIDInit)\n"@
/
328 "%%%%BeginResource: CMap (TeX-%s-0)\n"@
/
329 "%%%%Title: (TeX-%s-0 TeX %s 0)\n"@
/
330 "%%%%Version: 1.000\n"@
/
331 "%%%%EndComments\n"@
/
332 "/CIDInit /ProcSet findresource begin\n"@
/
336 "<< /Registry (TeX)\n"@
/
340 "/CMapName /TeX-%s-0 def\n"@
/
341 "/CMapType 2 def\n"@
/
342 "1 begincodespacerange\n"@
/
343 "<00> <FF>\n" "endcodespacerange\n", buf
, buf
, buf
, buf
, buf
);
346 for
(i
= 0; i
< 256; ++i
) {
347 gtab
[i
].code
= UNI_UNDEF
;
348 set_glyph_unicode
(glyph_names
[i
], >ab[i]);
350 gtab
[256].code
= UNI_UNDEF
;
352 /* set |range_size|
*/
353 for
(i
= 0; i
< 256;) {
354 if
(gtab
[i
].code
== UNI_STRING || gtab
[i
].code
== UNI_EXTRA_STRING
) {
355 range_size
[i
] = 1; /* single entry
*/
357 } else if
(gtab
[i
].code
== UNI_UNDEF
) {
358 range_size
[i
] = 0; /* no entry
*/
360 } else
{ /* gtab
[i
].code
>= 0 */
362 while
(i
< 256 && gtab[i + 1].code >= 0 &&
363 gtab
[i
].code
+ 1 == gtab
[i
+ 1].code
)
365 /* at this point i is the last entry of the subrange
*/
366 i
++; /* move i to the next entry
*/
367 range_size
[j
] = (short
) (i
- j
);
371 /* calculate |bfrange_count| and |bfchar_count|
*/
374 for
(i
= 0; i
< 256;) {
375 if
(range_size
[i
] == 1) {
378 } else if
(range_size
[i
] > 1) {
385 /* write out bfrange
*/
388 if
(bfrange_count
> 100)
389 subrange_count
= 100;
391 subrange_count
= bfrange_count
;
392 bfrange_count
-= subrange_count
;
393 pdf_printf
(pdf
, "%i beginbfrange\n", subrange_count
);
394 for
(j
= 0; j
< subrange_count
; j
++) {
395 while
(range_size
[i
] <= 1 && i < 256)
398 pdf_printf
(pdf
, "<%02X> <%02X> <%s>\n", i
, i
+ range_size
[i
] - 1,
399 utf16be_str
(gtab
[i
].code
));
402 pdf_printf
(pdf
, "endbfrange\n");
403 if
(bfrange_count
> 0)
406 /* write out bfchar
*/
409 if
(bfchar_count
> 100)
410 subrange_count
= 100;
412 subrange_count
= bfchar_count
;
413 bfchar_count
-= subrange_count
;
414 pdf_printf
(pdf
, "%i beginbfchar\n", subrange_count
);
415 for
(j
= 0; j
< subrange_count
; j
++) {
417 if
(range_size
[i
] > 1)
419 else if
(range_size
[i
] == 0)
421 else
/* |range_size
[i
] == 1|
*/
424 assert
(i
< 256 && gtab[i].code != UNI_UNDEF);
425 if
(gtab
[i
].code
== UNI_STRING || gtab
[i
].code
== UNI_EXTRA_STRING
) {
426 assert
(gtab
[i
].unicode_seq
!= NULL);
427 pdf_printf
(pdf
, "<%02X> <%s>\n", i
, gtab
[i
].unicode_seq
);
429 pdf_printf
(pdf
, "<%02X> <%s>\n", i
, utf16be_str
(gtab
[i
].code
));
432 pdf_printf
(pdf
, "endbfchar\n");
433 if
(bfchar_count
> 0)
436 /* free strings allocated by |set_glyph_unicode
()|
*/
437 for
(i
= 0; i
< 256; ++i
) {
438 if
(gtab
[i
].code
== UNI_EXTRA_STRING
)
439 xfree
(gtab
[i
].unicode_seq
);
442 pdf_printf
(pdf
, "endcmap\n"
443 "CMapName currentdict /CMap defineresource pop\n"
444 "end\n" "end\n" "%%%%EndResource\n" "%%%%EOF\n");
451 int write_cid_tounicode
(PDF pdf
, fo_entry
* fo
, internal_font_number f
)
454 static int range_size
[65537];
455 static glyph_unicode_entry gtab
[65537];
458 int bfchar_count
, bfrange_count
, subrange_count
;
461 assert
(fo-
>fd-
>fontname
);
462 buf
= xmalloc
((unsigned
) (strlen
(fo-
>fd-
>fontname
) + 8));
463 sprintf
(buf
, "%s-%s",
464 (fo-
>fd-
>subset_tag
!= NULL ? fo-
>fd-
>subset_tag
: "UCS"),
467 objnum
= pdf_create_obj
(pdf
, obj_type_others
, 0);
468 pdf_begin_obj
(pdf
, objnum
, OBJSTM_NEVER
);
470 pdf_dict_add_streaminfo
(pdf
);
472 pdf_begin_stream
(pdf
);
473 pdf_printf
(pdf
, "%%!PS-Adobe-3.0 Resource-CMap\n"@
/
474 "%%%%DocumentNeededResources: ProcSet (CIDInit)\n"@
/
475 "%%%%IncludeResource: ProcSet (CIDInit)\n"@
/
476 "%%%%BeginResource: CMap (TeX-%s-0)\n"@
/
477 "%%%%Title: (TeX-%s-0 TeX %s 0)\n"@
/
478 "%%%%Version: 1.000\n"@
/
479 "%%%%EndComments\n"@
/
480 "/CIDInit /ProcSet findresource begin\n"@
/
484 "<< /Registry (TeX)\n"@
/
488 "/CMapName /TeX-Identity-%s def\n"@
/
489 "/CMapType 2 def\n"@
/
490 "1 begincodespacerange\n"@
/
492 "endcodespacerange\n", buf
, buf
, buf
, buf
, buf
);
495 for
(i
= 0; i
< 65537; ++i
) {
496 gtab
[i
].code
= UNI_UNDEF
;
498 for
(k
= 1; k
<= max_font_id
(); k
++) {
499 if
(k
== f ||
-f
== pdf_font_num
(k
)) {
500 for
(i
= font_bc
(k
); i
<= font_ec
(k
); i
++) {
501 if
(quick_char_exists
(k
, i
) && char_used(k, i)) {
502 j
= char_index
(k
, i
);
503 if
(gtab
[j
].code
== UNI_UNDEF
) {
504 set_cid_glyph_unicode
(i
, >ab[j], f);
511 /* set |range_size|
*/
512 for
(i
= 0; i
< 65536;) {
513 if
(gtab
[i
].code
== UNI_STRING || gtab
[i
].code
== UNI_EXTRA_STRING
) {
514 range_size
[i
] = 1; /* single entry
*/
516 } else if
(gtab
[i
].code
== UNI_UNDEF
) {
517 range_size
[i
] = 0; /* no entry
*/
519 } else
{ /* |gtab
[i
].code
>= 0|
*/
522 while
(i
< 65536 && k<255 && gtab[i + 1].code >= 0 &&
523 gtab
[i
].code
+ 1 == gtab
[i
+ 1].code
) {
526 /* at this point i is the last entry of the subrange
*/
527 i
++; /* move i to the next entry
*/
528 range_size
[j
] = i
- j
;
532 /* calculate |bfrange_count| and |bfchar_count|
*/
535 for
(i
= 0; i
< 65536;) {
536 if
(range_size
[i
] == 1) {
539 } else if
(range_size
[i
] > 1) {
546 /* write out bfrange
*/
549 if
(bfrange_count
> 100)
550 subrange_count
= 100;
552 subrange_count
= bfrange_count
;
553 bfrange_count
-= subrange_count
;
554 pdf_printf
(pdf
, "%i beginbfrange\n", subrange_count
);
555 for
(j
= 0; j
< subrange_count
; j
++) {
556 while
(range_size
[i
] <= 1 && i < 65536)
559 pdf_printf
(pdf
, "<%04X> <%04X> <%s>\n", i
, i
+ range_size
[i
] - 1,
560 utf16be_str
(gtab
[i
].code
));
563 pdf_printf
(pdf
, "endbfrange\n");
564 if
(bfrange_count
> 0)
567 /* write out bfchar
*/
570 if
(bfchar_count
> 100)
571 subrange_count
= 100;
573 subrange_count
= bfchar_count
;
574 bfchar_count
-= subrange_count
;
575 pdf_printf
(pdf
, "%i beginbfchar\n", subrange_count
);
576 for
(j
= 0; j
< subrange_count
; j
++) {
578 if
(range_size
[i
] > 1)
580 else if
(range_size
[i
] == 0)
582 else
/* |range_size
[i
] == 1|
*/
585 assert
(i
< 65536 && gtab[i].code != UNI_UNDEF);
586 if
(gtab
[i
].code
== UNI_STRING || gtab
[i
].code
== UNI_EXTRA_STRING
) {
587 assert
(gtab
[i
].unicode_seq
!= NULL);
588 pdf_printf
(pdf
, "<%04X> <%s>\n", i
, gtab
[i
].unicode_seq
);
590 pdf_printf
(pdf
, "<%04X> <%s>\n", i
, utf16be_str
(gtab
[i
].code
));
593 pdf_printf
(pdf
, "endbfchar\n");
594 if
(bfchar_count
> 0)
597 /* free strings allocated by |set_glyph_unicode
()|
*/
598 for
(i
= 0; i
< 65536; ++i
) {
599 if
(gtab
[i
].code
== UNI_EXTRA_STRING
)
600 xfree
(gtab
[i
].unicode_seq
);
603 pdf_printf
(pdf
, "endcmap\n"
604 "CMapName currentdict /CMap defineresource pop\n"
605 "end\n" "end\n" "%%%%EndResource\n" "%%%%EOF\n");