2 * Copyright © 2014 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
30 #include "hb-ot-os2-table.hh"
31 #include "hb-ot-shaper-arabic-pua.hh"
32 #include "hb-open-type.hh"
34 #include "hb-cache.hh"
37 * cmap -- Character to Glyph Index Mapping
38 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
45 struct CmapSubtableFormat0
47 bool get_glyph (hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
) const
49 hb_codepoint_t gid
= codepoint
< 256 ? glyphIdArray
[codepoint
] : 0;
56 unsigned get_language () const
61 void collect_unicodes (hb_set_t
*out
) const
63 for (unsigned int i
= 0; i
< 256; i
++)
68 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
69 hb_map_t
*mapping
/* OUT */) const
71 for (unsigned i
= 0; i
< 256; i
++)
74 hb_codepoint_t glyph
= glyphIdArray
[i
];
76 mapping
->set (i
, glyph
);
80 bool sanitize (hb_sanitize_context_t
*c
) const
82 TRACE_SANITIZE (this);
83 return_trace (c
->check_struct (this));
87 HBUINT16 format
; /* Format number is set to 0. */
88 HBUINT16 length
; /* Byte length of this subtable. */
89 HBUINT16 language
; /* Ignore. */
90 HBUINT8 glyphIdArray
[256];/* An array that maps character
91 * code to glyph index values. */
93 DEFINE_SIZE_STATIC (6 + 256);
96 struct CmapSubtableFormat4
100 template<typename Iterator
,
102 hb_requires (hb_is_iterator (Iterator
))>
103 void to_ranges (Iterator it
, Writer
& range_writer
)
105 hb_codepoint_t start_cp
= 0, prev_run_start_cp
= 0, run_start_cp
= 0, end_cp
= 0, last_gid
= 0;
106 int run_length
= 0 , delta
= 0, prev_delta
= 0;
116 const auto& pair
= *it
;
117 start_cp
= pair
.first
;
118 prev_run_start_cp
= start_cp
;
119 run_start_cp
= start_cp
;
121 last_gid
= pair
.second
;
126 delta
= last_gid
- start_cp
;
127 mode
= FIRST_SUB_RANGE
;
132 const auto& pair
= *it
;
133 hb_codepoint_t next_cp
= pair
.first
;
134 hb_codepoint_t next_gid
= pair
.second
;
135 if (next_cp
!= end_cp
+ 1) {
136 // Current range is over, stop processing.
140 if (next_gid
== last_gid
+ 1) {
141 // The current run continues.
149 // A new run is starting, decide if we want to commit the current run.
150 int split_cost
= (mode
== FIRST_SUB_RANGE
) ? 8 : 16;
151 int run_cost
= run_length
* 2;
152 if (run_cost
>= split_cost
) {
153 commit_current_range(start_cp
,
165 mode
= FOLLOWING_SUB_RANGE
;
166 prev_run_start_cp
= run_start_cp
;
167 run_start_cp
= next_cp
;
170 delta
= next_gid
- run_start_cp
;
177 commit_current_range (start_cp
,
187 if (likely (end_cp
!= 0xFFFF)) {
188 range_writer (0xFFFF, 0xFFFF, 1);
193 * Writes the current range as either one or two ranges depending on what is most efficient.
195 template<typename Writer
>
196 void commit_current_range (hb_codepoint_t start
,
197 hb_codepoint_t prev_run_start
,
198 hb_codepoint_t run_start
,
201 int previous_run_delta
,
203 Writer
& range_writer
) {
204 bool should_split
= false;
205 if (start
< run_start
&& run_start
< end
) {
206 int run_cost
= (end
- run_start
+ 1) * 2;
207 if (run_cost
>= split_cost
) {
212 // TODO(grieger): handle case where delta is legitimately 0, mark range offset array instead?
214 if (start
== prev_run_start
)
215 range_writer (start
, run_start
- 1, previous_run_delta
);
217 range_writer (start
, run_start
- 1, 0);
218 range_writer (run_start
, end
, run_delta
);
223 if (start
== run_start
) {
224 // Range is only a run
225 range_writer (start
, end
, run_delta
);
229 // Write only a single non-run range.
230 range_writer (start
, end
, 0);
233 template<typename Iterator
,
234 hb_requires (hb_is_iterator (Iterator
))>
235 unsigned serialize_find_segcount (Iterator it
) {
237 unsigned segcount
= 0;
239 void operator() (hb_codepoint_t start
,
246 to_ranges (+it
, counter
);
247 return counter
.segcount
;
251 template<typename Iterator
,
252 hb_requires (hb_is_iterator (Iterator
))>
253 bool serialize_start_end_delta_arrays (hb_serialize_context_t
*c
,
258 hb_serialize_context_t
*serializer_
;
260 HBUINT16
* start_code_
;
264 Writer(hb_serialize_context_t
*serializer
)
265 : serializer_(serializer
),
267 start_code_(nullptr),
270 void operator() (hb_codepoint_t start
,
273 start_code_
[index_
] = start
;
274 end_code_
[index_
] = end
;
275 id_delta_
[index_
] = delta
;
280 writer
.end_code_
= c
->allocate_size
<HBUINT16
> (HBUINT16::static_size
* segcount
, false);
281 (void) c
->allocate_size
<HBUINT16
> (2); // padding
282 writer
.start_code_
= c
->allocate_size
<HBUINT16
> (HBUINT16::static_size
* segcount
, false);
283 writer
.id_delta_
= c
->allocate_size
<HBINT16
> (HBINT16::static_size
* segcount
, false);
285 if (unlikely (!writer
.end_code_
|| !writer
.start_code_
|| !writer
.id_delta_
)) return false;
287 to_ranges (+it
, writer
);
291 template<typename Iterator
,
292 hb_requires (hb_is_iterator (Iterator
))>
293 HBUINT16
* serialize_rangeoffset_glyid (hb_serialize_context_t
*c
,
300 hb_map_t cp_to_gid
{ it
};
302 HBUINT16
*idRangeOffset
= c
->allocate_size
<HBUINT16
> (HBUINT16::static_size
* segcount
);
303 if (unlikely (!c
->check_success (idRangeOffset
))) return nullptr;
304 if (unlikely ((char *)idRangeOffset
- (char *)idDelta
!= (int) segcount
* (int) HBINT16::static_size
)) return nullptr;
306 for (unsigned i
: + hb_range (segcount
)
307 | hb_filter ([&] (const unsigned _
) { return idDelta
[_
] == 0; }))
309 idRangeOffset
[i
] = 2 * (c
->start_embed
<HBUINT16
> () - idRangeOffset
- i
);
310 for (hb_codepoint_t cp
= startCode
[i
]; cp
<= endCode
[i
]; cp
++)
314 c
->copy
<HBUINT16
> (gid
);
318 return idRangeOffset
;
321 template<typename Iterator
,
322 hb_requires (hb_is_iterator (Iterator
))>
323 void serialize (hb_serialize_context_t
*c
,
328 | hb_filter ([&] (const hb_codepoint_pair_t _
)
329 { return _
.first
<= 0xFFFF; })
332 if (!format4_iter
) return;
334 unsigned table_initpos
= c
->length ();
335 if (unlikely (!c
->extend_min (this))) return;
338 hb_vector_t
<hb_codepoint_pair_t
> cp_to_gid
{
342 //serialize endCode[], startCode[], idDelta[]
343 HBUINT16
* endCode
= c
->start_embed
<HBUINT16
> ();
344 unsigned segcount
= serialize_find_segcount (cp_to_gid
.iter());
345 if (unlikely (!serialize_start_end_delta_arrays (c
, cp_to_gid
.iter(), segcount
)))
348 HBUINT16
*startCode
= endCode
+ segcount
+ 1;
349 HBINT16
*idDelta
= ((HBINT16
*)startCode
) + segcount
;
351 HBUINT16
*idRangeOffset
= serialize_rangeoffset_glyid (c
,
357 if (unlikely (!c
->check_success (idRangeOffset
))) return;
359 this->length
= c
->length () - table_initpos
;
360 if ((long long) this->length
!= (long long) c
->length () - table_initpos
)
362 // Length overflowed. Discard the current object before setting the error condition, otherwise
363 // discard is a noop which prevents the higher level code from reverting the serializer to the
364 // pre-error state in cmap4 overflow handling code.
366 c
->err (HB_SERIALIZE_ERROR_INT_OVERFLOW
);
370 this->segCountX2
= segcount
* 2;
371 this->entrySelector
= hb_max (1u, hb_bit_storage (segcount
)) - 1;
372 this->searchRange
= 2 * (1u << this->entrySelector
);
373 this->rangeShift
= segcount
* 2 > this->searchRange
374 ? 2 * segcount
- this->searchRange
378 unsigned get_language () const
386 accelerator_t (const CmapSubtableFormat4
*subtable
) { init (subtable
); }
388 void init (const CmapSubtableFormat4
*subtable
)
390 segCount
= subtable
->segCountX2
/ 2;
391 endCount
= subtable
->values
.arrayZ
;
392 startCount
= endCount
+ segCount
+ 1;
393 idDelta
= startCount
+ segCount
;
394 idRangeOffset
= idDelta
+ segCount
;
395 glyphIdArray
= idRangeOffset
+ segCount
;
396 glyphIdArrayLength
= (subtable
->length
- 16 - 8 * segCount
) / 2;
399 bool get_glyph (hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
) const
403 int cmp (hb_codepoint_t k
,
404 unsigned distance
) const
406 if (k
> last
) return +1;
407 if (k
< (&last
)[distance
]/*first*/) return -1;
413 const HBUINT16
*found
= hb_bsearch (codepoint
,
416 sizeof (CustomRange
),
417 _hb_cmp_method
<hb_codepoint_t
, CustomRange
, unsigned>,
419 if (unlikely (!found
))
421 unsigned int i
= found
- endCount
;
424 unsigned int rangeOffset
= this->idRangeOffset
[i
];
425 if (rangeOffset
== 0)
426 gid
= codepoint
+ this->idDelta
[i
];
429 /* Somebody has been smoking... */
430 unsigned int index
= rangeOffset
/ 2 + (codepoint
- this->startCount
[i
]) + i
- this->segCount
;
431 if (unlikely (index
>= this->glyphIdArrayLength
))
433 gid
= this->glyphIdArray
[index
];
436 gid
+= this->idDelta
[i
];
445 HB_INTERNAL
static bool get_glyph_func (const void *obj
, hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
)
446 { return ((const accelerator_t
*) obj
)->get_glyph (codepoint
, glyph
); }
448 void collect_unicodes (hb_set_t
*out
) const
450 unsigned int count
= this->segCount
;
451 if (count
&& this->startCount
[count
- 1] == 0xFFFFu
)
452 count
--; /* Skip sentinel segment. */
453 for (unsigned int i
= 0; i
< count
; i
++)
455 hb_codepoint_t start
= this->startCount
[i
];
456 hb_codepoint_t end
= this->endCount
[i
];
457 unsigned int rangeOffset
= this->idRangeOffset
[i
];
458 out
->add_range(start
, end
);
459 if (rangeOffset
== 0)
461 for (hb_codepoint_t codepoint
= start
; codepoint
<= end
; codepoint
++)
463 hb_codepoint_t gid
= (codepoint
+ this->idDelta
[i
]) & 0xFFFFu
;
470 for (hb_codepoint_t codepoint
= start
; codepoint
<= end
; codepoint
++)
472 unsigned int index
= rangeOffset
/ 2 + (codepoint
- this->startCount
[i
]) + i
- this->segCount
;
473 if (unlikely (index
>= this->glyphIdArrayLength
))
475 out
->del_range (codepoint
, end
);
478 hb_codepoint_t gid
= this->glyphIdArray
[index
];
486 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
487 hb_map_t
*mapping
/* OUT */) const
489 // TODO(grieger): optimize similar to collect_unicodes
490 // (ie. use add_range())
491 unsigned count
= this->segCount
;
492 if (count
&& this->startCount
[count
- 1] == 0xFFFFu
)
493 count
--; /* Skip sentinel segment. */
494 for (unsigned i
= 0; i
< count
; i
++)
496 hb_codepoint_t start
= this->startCount
[i
];
497 hb_codepoint_t end
= this->endCount
[i
];
498 unsigned rangeOffset
= this->idRangeOffset
[i
];
499 if (rangeOffset
== 0)
501 for (hb_codepoint_t codepoint
= start
; codepoint
<= end
; codepoint
++)
503 hb_codepoint_t gid
= (codepoint
+ this->idDelta
[i
]) & 0xFFFFu
;
506 unicodes
->add (codepoint
);
507 mapping
->set (codepoint
, gid
);
512 for (hb_codepoint_t codepoint
= start
; codepoint
<= end
; codepoint
++)
514 unsigned index
= rangeOffset
/ 2 + (codepoint
- this->startCount
[i
]) + i
- this->segCount
;
515 if (unlikely (index
>= this->glyphIdArrayLength
))
517 hb_codepoint_t gid
= this->glyphIdArray
[index
];
520 unicodes
->add (codepoint
);
521 mapping
->set (codepoint
, gid
);
527 const HBUINT16
*endCount
;
528 const HBUINT16
*startCount
;
529 const HBUINT16
*idDelta
;
530 const HBUINT16
*idRangeOffset
;
531 const HBUINT16
*glyphIdArray
;
532 unsigned int segCount
;
533 unsigned int glyphIdArrayLength
;
536 bool get_glyph (hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
) const
538 accelerator_t
accel (this);
539 return accel
.get_glyph_func (&accel
, codepoint
, glyph
);
541 void collect_unicodes (hb_set_t
*out
) const
543 accelerator_t
accel (this);
544 accel
.collect_unicodes (out
);
547 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
548 hb_map_t
*mapping
/* OUT */) const
550 accelerator_t
accel (this);
551 accel
.collect_mapping (unicodes
, mapping
);
554 bool sanitize (hb_sanitize_context_t
*c
) const
556 TRACE_SANITIZE (this);
557 if (unlikely (!c
->check_struct (this)))
558 return_trace (false);
561 if (unlikely (!c
->check_range (this, length
)))
563 /* Some broken fonts have too long of a "length" value.
564 * If that is the case, just change the value to truncate
565 * the subtable at the end of the blob. */
566 uint16_t new_length
= (uint16_t) hb_min ((uintptr_t) 65535,
567 (uintptr_t) (c
->end
-
569 if (!c
->try_set (&length
, new_length
))
570 return_trace (false);
573 return_trace (16 + 4 * (unsigned int) segCountX2
<= length
);
579 HBUINT16 format
; /* Format number is set to 4. */
580 HBUINT16 length
; /* This is the length in bytes of the
582 HBUINT16 language
; /* Ignore. */
583 HBUINT16 segCountX2
; /* 2 x segCount. */
584 HBUINT16 searchRange
; /* 2 * (2**floor(log2(segCount))) */
585 HBUINT16 entrySelector
; /* log2(searchRange/2) */
586 HBUINT16 rangeShift
; /* 2 x segCount - searchRange */
588 UnsizedArrayOf
<HBUINT16
>
591 HBUINT16 endCount
[segCount
]; /* End characterCode for each segment,
593 HBUINT16 reservedPad
; /* Set to 0. */
594 HBUINT16 startCount
[segCount
]; /* Start character code for each segment. */
595 HBINT16 idDelta
[segCount
]; /* Delta for all character codes in segment. */
596 HBUINT16 idRangeOffset
[segCount
];/* Offsets into glyphIdArray or 0 */
597 UnsizedArrayOf
<HBUINT16
>
598 glyphIdArray
; /* Glyph index array (arbitrary length) */
602 DEFINE_SIZE_ARRAY (14, values
);
605 struct CmapSubtableLongGroup
607 friend struct CmapSubtableFormat12
;
608 friend struct CmapSubtableFormat13
;
610 friend struct CmapSubtableLongSegmented
;
613 int cmp (hb_codepoint_t codepoint
) const
615 if (codepoint
< startCharCode
) return -1;
616 if (codepoint
> endCharCode
) return +1;
620 bool sanitize (hb_sanitize_context_t
*c
) const
622 TRACE_SANITIZE (this);
623 return_trace (c
->check_struct (this));
627 HBUINT32 startCharCode
; /* First character code in this group. */
628 HBUINT32 endCharCode
; /* Last character code in this group. */
629 HBUINT32 glyphID
; /* Glyph index; interpretation depends on
630 * subtable format. */
632 DEFINE_SIZE_STATIC (12);
634 DECLARE_NULL_NAMESPACE_BYTES (OT
, CmapSubtableLongGroup
);
636 template <typename UINT
>
637 struct CmapSubtableTrimmed
639 bool get_glyph (hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
) const
641 /* Rely on our implicit array bound-checking. */
642 hb_codepoint_t gid
= glyphIdArray
[codepoint
- startCharCode
];
649 unsigned get_language () const
654 void collect_unicodes (hb_set_t
*out
) const
656 hb_codepoint_t start
= startCharCode
;
657 unsigned int count
= glyphIdArray
.len
;
658 for (unsigned int i
= 0; i
< count
; i
++)
660 out
->add (start
+ i
);
663 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
664 hb_map_t
*mapping
/* OUT */) const
666 hb_codepoint_t start_cp
= startCharCode
;
667 unsigned count
= glyphIdArray
.len
;
668 for (unsigned i
= 0; i
< count
; i
++)
671 hb_codepoint_t unicode
= start_cp
+ i
;
672 hb_codepoint_t glyphid
= glyphIdArray
[i
];
673 unicodes
->add (unicode
);
674 mapping
->set (unicode
, glyphid
);
678 bool sanitize (hb_sanitize_context_t
*c
) const
680 TRACE_SANITIZE (this);
681 return_trace (c
->check_struct (this) && glyphIdArray
.sanitize (c
));
685 UINT formatReserved
; /* Subtable format and (maybe) padding. */
686 UINT length
; /* Byte length of this subtable. */
687 UINT language
; /* Ignore. */
688 UINT startCharCode
; /* First character code covered. */
689 ArrayOf
<HBGlyphID16
, UINT
>
690 glyphIdArray
; /* Array of glyph index values for character
691 * codes in the range. */
693 DEFINE_SIZE_ARRAY (5 * sizeof (UINT
), glyphIdArray
);
696 struct CmapSubtableFormat6
: CmapSubtableTrimmed
<HBUINT16
> {};
697 struct CmapSubtableFormat10
: CmapSubtableTrimmed
<HBUINT32
> {};
699 template <typename T
>
700 struct CmapSubtableLongSegmented
704 bool get_glyph (hb_codepoint_t codepoint
, hb_codepoint_t
*glyph
) const
706 hb_codepoint_t gid
= T::group_get_glyph (groups
.bsearch (codepoint
), codepoint
);
713 unsigned get_language () const
718 void collect_unicodes (hb_set_t
*out
, unsigned int num_glyphs
) const
720 for (unsigned int i
= 0; i
< this->groups
.len
; i
++)
722 hb_codepoint_t start
= this->groups
[i
].startCharCode
;
723 hb_codepoint_t end
= hb_min ((hb_codepoint_t
) this->groups
[i
].endCharCode
,
724 (hb_codepoint_t
) HB_UNICODE_MAX
);
725 hb_codepoint_t gid
= this->groups
[i
].glyphID
;
728 /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
729 if (! T::group_get_glyph (this->groups
[i
], end
)) continue;
733 if (unlikely ((unsigned int) gid
>= num_glyphs
)) continue;
734 if (unlikely ((unsigned int) (gid
+ end
- start
) >= num_glyphs
))
735 end
= start
+ (hb_codepoint_t
) num_glyphs
- gid
;
737 out
->add_range (start
, hb_min (end
, 0x10FFFFu
));
741 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
742 hb_map_t
*mapping
, /* OUT */
743 unsigned num_glyphs
) const
745 hb_codepoint_t last_end
= 0;
746 unsigned count
= this->groups
.len
;
747 for (unsigned i
= 0; i
< count
; i
++)
749 hb_codepoint_t start
= this->groups
.arrayZ
[i
].startCharCode
;
750 hb_codepoint_t end
= hb_min ((hb_codepoint_t
) this->groups
.arrayZ
[i
].endCharCode
,
751 (hb_codepoint_t
) HB_UNICODE_MAX
);
752 if (unlikely (start
> end
|| start
< last_end
)) {
753 // Range is not in order and is invalid, skip it.
759 hb_codepoint_t gid
= this->groups
.arrayZ
[i
].glyphID
;
762 if (T::formatNumber
== 13) continue;
766 if (unlikely ((unsigned int) gid
>= num_glyphs
)) continue;
767 if (unlikely ((unsigned int) (gid
+ end
- start
) >= num_glyphs
))
768 end
= start
+ (hb_codepoint_t
) num_glyphs
- gid
;
770 mapping
->alloc (mapping
->get_population () + end
- start
+ 1);
772 unicodes
->add_range (start
, end
);
773 for (unsigned cp
= start
; cp
<= end
; cp
++)
775 mapping
->set (cp
, gid
);
781 bool sanitize (hb_sanitize_context_t
*c
) const
783 TRACE_SANITIZE (this);
784 return_trace (c
->check_struct (this) && groups
.sanitize (c
));
788 HBUINT16 format
; /* Subtable format; set to 12. */
789 HBUINT16 reserved
; /* Reserved; set to 0. */
790 HBUINT32 length
; /* Byte length of this subtable. */
791 HBUINT32 language
; /* Ignore. */
792 SortedArray32Of
<CmapSubtableLongGroup
>
793 groups
; /* Groupings. */
795 DEFINE_SIZE_ARRAY (16, groups
);
798 struct CmapSubtableFormat12
: CmapSubtableLongSegmented
<CmapSubtableFormat12
>
800 static constexpr int increment
= 1;
801 static constexpr int formatNumber
= 12;
803 static hb_codepoint_t
group_get_glyph (const CmapSubtableLongGroup
&group
,
805 { return likely (group
.startCharCode
<= group
.endCharCode
) ?
806 group
.glyphID
+ (u
- group
.startCharCode
) : 0; }
809 template<typename Iterator
,
810 hb_requires (hb_is_iterator (Iterator
))>
811 void serialize (hb_serialize_context_t
*c
,
815 unsigned table_initpos
= c
->length ();
816 if (unlikely (!c
->extend_min (this))) return;
818 hb_codepoint_t startCharCode
= (hb_codepoint_t
) -1, endCharCode
= (hb_codepoint_t
) -1;
819 hb_codepoint_t glyphID
= 0;
821 for (const auto& _
: +it
)
823 if (startCharCode
== (hb_codepoint_t
) -1)
825 startCharCode
= _
.first
;
826 endCharCode
= _
.first
;
829 else if (!_is_gid_consecutive (endCharCode
, startCharCode
, glyphID
, _
.first
, _
.second
))
831 CmapSubtableLongGroup grouprecord
;
832 grouprecord
.startCharCode
= startCharCode
;
833 grouprecord
.endCharCode
= endCharCode
;
834 grouprecord
.glyphID
= glyphID
;
835 c
->copy
<CmapSubtableLongGroup
> (grouprecord
);
837 startCharCode
= _
.first
;
838 endCharCode
= _
.first
;
842 endCharCode
= _
.first
;
845 CmapSubtableLongGroup record
;
846 record
.startCharCode
= startCharCode
;
847 record
.endCharCode
= endCharCode
;
848 record
.glyphID
= glyphID
;
849 c
->copy
<CmapSubtableLongGroup
> (record
);
853 this->length
= c
->length () - table_initpos
;
854 this->groups
.len
= (this->length
- min_size
) / CmapSubtableLongGroup::static_size
;
857 static size_t get_sub_table_size (const hb_sorted_vector_t
<CmapSubtableLongGroup
> &groups_data
)
858 { return 16 + 12 * groups_data
.length
; }
861 static bool _is_gid_consecutive (hb_codepoint_t endCharCode
,
862 hb_codepoint_t startCharCode
,
863 hb_codepoint_t glyphID
,
865 hb_codepoint_t new_gid
)
867 return (cp
- 1 == endCharCode
) &&
868 new_gid
== glyphID
+ (cp
- startCharCode
);
873 struct CmapSubtableFormat13
: CmapSubtableLongSegmented
<CmapSubtableFormat13
>
875 static constexpr int increment
= 0;
876 static constexpr int formatNumber
= 13;
878 static hb_codepoint_t
group_get_glyph (const CmapSubtableLongGroup
&group
,
879 hb_codepoint_t u HB_UNUSED
)
880 { return group
.glyphID
; }
885 GLYPH_VARIANT_NOT_FOUND
= 0,
886 GLYPH_VARIANT_FOUND
= 1,
887 GLYPH_VARIANT_USE_DEFAULT
= 2
890 struct UnicodeValueRange
892 int cmp (const hb_codepoint_t
&codepoint
) const
894 if (codepoint
< startUnicodeValue
) return -1;
895 if (codepoint
> startUnicodeValue
+ additionalCount
) return +1;
899 bool sanitize (hb_sanitize_context_t
*c
) const
901 TRACE_SANITIZE (this);
902 return_trace (c
->check_struct (this));
905 HBUINT24 startUnicodeValue
; /* First value in this range. */
906 HBUINT8 additionalCount
; /* Number of additional values in this
909 DEFINE_SIZE_STATIC (4);
912 struct DefaultUVS
: SortedArray32Of
<UnicodeValueRange
>
914 void collect_unicodes (hb_set_t
*out
) const
916 unsigned int count
= len
;
917 for (unsigned int i
= 0; i
< count
; i
++)
919 hb_codepoint_t first
= arrayZ
[i
].startUnicodeValue
;
920 hb_codepoint_t last
= hb_min ((hb_codepoint_t
) (first
+ arrayZ
[i
].additionalCount
),
921 (hb_codepoint_t
) HB_UNICODE_MAX
);
922 out
->add_range (first
, last
);
926 DefaultUVS
* copy (hb_serialize_context_t
*c
,
927 const hb_set_t
*unicodes
) const
929 auto *out
= c
->start_embed
<DefaultUVS
> ();
930 auto snap
= c
->snapshot ();
934 if (unlikely (!c
->copy
<HBUINT32
> (len
))) return nullptr;
935 unsigned init_len
= c
->length ();
937 if (this->len
> unicodes
->get_population () * hb_bit_storage ((unsigned) this->len
))
939 hb_codepoint_t start
= HB_SET_VALUE_INVALID
;
940 hb_codepoint_t end
= HB_SET_VALUE_INVALID
;
942 for (auto u
: *unicodes
)
944 if (!as_array ().bsearch (u
))
946 if (start
== HB_SET_VALUE_INVALID
)
951 if (end
+ 1 != u
|| end
- start
== 255)
953 UnicodeValueRange rec
;
954 rec
.startUnicodeValue
= start
;
955 rec
.additionalCount
= end
- start
;
956 c
->copy
<UnicodeValueRange
> (rec
);
961 if (start
!= HB_SET_VALUE_INVALID
)
963 UnicodeValueRange rec
;
964 rec
.startUnicodeValue
= start
;
965 rec
.additionalCount
= end
- start
;
966 c
->copy
<UnicodeValueRange
> (rec
);
972 hb_codepoint_t lastCode
= HB_SET_VALUE_INVALID
;
975 for (const UnicodeValueRange
& _
: *this)
977 hb_codepoint_t curEntry
= (hb_codepoint_t
) (_
.startUnicodeValue
- 1);
978 hb_codepoint_t end
= curEntry
+ _
.additionalCount
+ 2;
980 for (; unicodes
->next (&curEntry
) && curEntry
< end
;)
983 if (lastCode
== HB_SET_VALUE_INVALID
)
985 else if (lastCode
+ count
!= curEntry
)
987 UnicodeValueRange rec
;
988 rec
.startUnicodeValue
= lastCode
;
989 rec
.additionalCount
= count
- 1;
990 c
->copy
<UnicodeValueRange
> (rec
);
998 if (lastCode
!= HB_MAP_VALUE_INVALID
)
1000 UnicodeValueRange rec
;
1001 rec
.startUnicodeValue
= lastCode
;
1002 rec
.additionalCount
= count
;
1003 c
->copy
<UnicodeValueRange
> (rec
);
1007 if (c
->length () - init_len
== 0)
1014 if (unlikely (!c
->check_assign (out
->len
,
1015 (c
->length () - init_len
) / UnicodeValueRange::static_size
,
1016 HB_SERIALIZE_ERROR_INT_OVERFLOW
))) return nullptr;
1022 DEFINE_SIZE_ARRAY (4, *this);
1027 int cmp (const hb_codepoint_t
&codepoint
) const
1028 { return unicodeValue
.cmp (codepoint
); }
1030 bool sanitize (hb_sanitize_context_t
*c
) const
1032 TRACE_SANITIZE (this);
1033 return_trace (c
->check_struct (this));
1036 HBUINT24 unicodeValue
; /* Base Unicode value of the UVS */
1037 HBGlyphID16 glyphID
; /* Glyph ID of the UVS */
1039 DEFINE_SIZE_STATIC (5);
1042 struct NonDefaultUVS
: SortedArray32Of
<UVSMapping
>
1044 void collect_unicodes (hb_set_t
*out
) const
1046 for (const auto& a
: as_array ())
1047 out
->add (a
.unicodeValue
);
1050 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
1051 hb_map_t
*mapping
/* OUT */) const
1053 for (const auto& a
: as_array ())
1055 hb_codepoint_t unicode
= a
.unicodeValue
;
1056 hb_codepoint_t glyphid
= a
.glyphID
;
1057 unicodes
->add (unicode
);
1058 mapping
->set (unicode
, glyphid
);
1062 void closure_glyphs (const hb_set_t
*unicodes
,
1063 hb_set_t
*glyphset
) const
1066 | hb_filter (unicodes
, &UVSMapping::unicodeValue
)
1067 | hb_map (&UVSMapping::glyphID
)
1068 | hb_sink (glyphset
)
1072 NonDefaultUVS
* copy (hb_serialize_context_t
*c
,
1073 const hb_set_t
*unicodes
,
1074 const hb_set_t
*glyphs_requested
,
1075 const hb_map_t
*glyph_map
) const
1077 auto *out
= c
->start_embed
<NonDefaultUVS
> ();
1080 | hb_filter ([&] (const UVSMapping
& _
)
1082 return unicodes
->has (_
.unicodeValue
) || glyphs_requested
->has (_
.glyphID
);
1086 if (!it
) return nullptr;
1090 if (unlikely (!c
->copy
<HBUINT32
> (len
))) return nullptr;
1092 for (const UVSMapping
& _
: it
)
1095 mapping
.unicodeValue
= _
.unicodeValue
;
1096 mapping
.glyphID
= glyph_map
->get (_
.glyphID
);
1097 c
->copy
<UVSMapping
> (mapping
);
1104 DEFINE_SIZE_ARRAY (4, *this);
1107 struct VariationSelectorRecord
1109 glyph_variant_t
get_glyph (hb_codepoint_t codepoint
,
1110 hb_codepoint_t
*glyph
,
1111 const void *base
) const
1113 if ((base
+defaultUVS
).bfind (codepoint
))
1114 return GLYPH_VARIANT_USE_DEFAULT
;
1115 const UVSMapping
&nonDefault
= (base
+nonDefaultUVS
).bsearch (codepoint
);
1116 if (nonDefault
.glyphID
)
1118 *glyph
= nonDefault
.glyphID
;
1119 return GLYPH_VARIANT_FOUND
;
1121 return GLYPH_VARIANT_NOT_FOUND
;
1124 VariationSelectorRecord(const VariationSelectorRecord
& other
)
1129 void operator= (const VariationSelectorRecord
& other
)
1131 varSelector
= other
.varSelector
;
1132 HBUINT32 offset
= other
.defaultUVS
;
1133 defaultUVS
= offset
;
1134 offset
= other
.nonDefaultUVS
;
1135 nonDefaultUVS
= offset
;
1138 void collect_unicodes (hb_set_t
*out
, const void *base
) const
1140 (base
+defaultUVS
).collect_unicodes (out
);
1141 (base
+nonDefaultUVS
).collect_unicodes (out
);
1144 void collect_mapping (const void *base
,
1145 hb_set_t
*unicodes
, /* OUT */
1146 hb_map_t
*mapping
/* OUT */) const
1148 (base
+defaultUVS
).collect_unicodes (unicodes
);
1149 (base
+nonDefaultUVS
).collect_mapping (unicodes
, mapping
);
1152 int cmp (const hb_codepoint_t
&variation_selector
) const
1153 { return varSelector
.cmp (variation_selector
); }
1155 bool sanitize (hb_sanitize_context_t
*c
, const void *base
) const
1157 TRACE_SANITIZE (this);
1158 return_trace (c
->check_struct (this) &&
1159 defaultUVS
.sanitize (c
, base
) &&
1160 nonDefaultUVS
.sanitize (c
, base
));
1163 hb_pair_t
<unsigned, unsigned>
1164 copy (hb_serialize_context_t
*c
,
1165 const hb_set_t
*unicodes
,
1166 const hb_set_t
*glyphs_requested
,
1167 const hb_map_t
*glyph_map
,
1168 const void *base
) const
1170 auto snap
= c
->snapshot ();
1171 auto *out
= c
->embed
<VariationSelectorRecord
> (*this);
1172 if (unlikely (!out
)) return hb_pair (0, 0);
1174 out
->defaultUVS
= 0;
1175 out
->nonDefaultUVS
= 0;
1177 unsigned non_default_uvs_objidx
= 0;
1178 if (nonDefaultUVS
!= 0)
1181 if (c
->copy (base
+nonDefaultUVS
, unicodes
, glyphs_requested
, glyph_map
))
1182 non_default_uvs_objidx
= c
->pop_pack ();
1183 else c
->pop_discard ();
1186 unsigned default_uvs_objidx
= 0;
1187 if (defaultUVS
!= 0)
1190 if (c
->copy (base
+defaultUVS
, unicodes
))
1191 default_uvs_objidx
= c
->pop_pack ();
1192 else c
->pop_discard ();
1196 if (!default_uvs_objidx
&& !non_default_uvs_objidx
)
1199 return hb_pair (default_uvs_objidx
, non_default_uvs_objidx
);
1202 HBUINT24 varSelector
; /* Variation selector. */
1203 Offset32To
<DefaultUVS
>
1204 defaultUVS
; /* Offset to Default UVS Table. May be 0. */
1205 Offset32To
<NonDefaultUVS
>
1206 nonDefaultUVS
; /* Offset to Non-Default UVS Table. May be 0. */
1208 DEFINE_SIZE_STATIC (11);
1211 struct CmapSubtableFormat14
1213 glyph_variant_t
get_glyph_variant (hb_codepoint_t codepoint
,
1214 hb_codepoint_t variation_selector
,
1215 hb_codepoint_t
*glyph
) const
1216 { return record
.bsearch (variation_selector
).get_glyph (codepoint
, glyph
, this); }
1218 void collect_variation_selectors (hb_set_t
*out
) const
1220 for (const auto& a
: record
.as_array ())
1221 out
->add (a
.varSelector
);
1223 void collect_variation_unicodes (hb_codepoint_t variation_selector
,
1224 hb_set_t
*out
) const
1225 { record
.bsearch (variation_selector
).collect_unicodes (out
, this); }
1227 void serialize (hb_serialize_context_t
*c
,
1228 const hb_set_t
*unicodes
,
1229 const hb_set_t
*glyphs_requested
,
1230 const hb_map_t
*glyph_map
,
1233 auto snap
= c
->snapshot ();
1234 unsigned table_initpos
= c
->length ();
1235 const char* init_tail
= c
->tail
;
1237 if (unlikely (!c
->extend_min (this))) return;
1240 auto src_tbl
= reinterpret_cast<const CmapSubtableFormat14
*> (base
);
1243 * Some versions of OTS require that offsets are in order. Due to the use
1244 * of push()/pop_pack() serializing the variation records in order results
1245 * in the offsets being in reverse order (first record has the largest
1246 * offset). While this is perfectly valid, it will cause some versions of
1247 * OTS to consider this table bad.
1249 * So to prevent this issue we serialize the variation records in reverse
1250 * order, so that the offsets are ordered from small to large. Since
1251 * variation records are supposed to be in increasing order of varSelector
1252 * we then have to reverse the order of the written variation selector
1253 * records after everything is finalized.
1255 hb_vector_t
<hb_pair_t
<unsigned, unsigned>> obj_indices
;
1256 for (int i
= src_tbl
->record
.len
- 1; i
>= 0; i
--)
1258 hb_pair_t
<unsigned, unsigned> result
= src_tbl
->record
[i
].copy (c
, unicodes
, glyphs_requested
, glyph_map
, base
);
1259 if (result
.first
|| result
.second
)
1260 obj_indices
.push (result
);
1263 if (c
->length () - table_initpos
== CmapSubtableFormat14::min_size
)
1269 if (unlikely (!c
->check_success (!obj_indices
.in_error ())))
1272 int tail_len
= init_tail
- c
->tail
;
1273 c
->check_assign (this->length
, c
->length () - table_initpos
+ tail_len
,
1274 HB_SERIALIZE_ERROR_INT_OVERFLOW
);
1275 c
->check_assign (this->record
.len
,
1276 (c
->length () - table_initpos
- CmapSubtableFormat14::min_size
) /
1277 VariationSelectorRecord::static_size
,
1278 HB_SERIALIZE_ERROR_INT_OVERFLOW
);
1280 /* Correct the incorrect write order by reversing the order of the variation
1282 _reverse_variation_records ();
1284 /* Now that records are in the right order, we can set up the offsets. */
1285 _add_links_to_variation_records (c
, obj_indices
);
1288 void _reverse_variation_records ()
1290 record
.as_array ().reverse ();
1293 void _add_links_to_variation_records (hb_serialize_context_t
*c
,
1294 const hb_vector_t
<hb_pair_t
<unsigned, unsigned>>& obj_indices
)
1296 for (unsigned i
= 0; i
< obj_indices
.length
; i
++)
1299 * Since the record array has been reversed (see comments in copy())
1300 * but obj_indices has not been, the indices at obj_indices[i]
1301 * are for the variation record at record[j].
1303 int j
= obj_indices
.length
- 1 - i
;
1304 c
->add_link (record
[j
].defaultUVS
, obj_indices
[i
].first
);
1305 c
->add_link (record
[j
].nonDefaultUVS
, obj_indices
[i
].second
);
1309 void closure_glyphs (const hb_set_t
*unicodes
,
1310 hb_set_t
*glyphset
) const
1313 | hb_filter (hb_bool
, &VariationSelectorRecord::nonDefaultUVS
)
1314 | hb_map (&VariationSelectorRecord::nonDefaultUVS
)
1315 | hb_map (hb_add (this))
1316 | hb_apply ([=] (const NonDefaultUVS
& _
) { _
.closure_glyphs (unicodes
, glyphset
); })
1320 void collect_unicodes (hb_set_t
*out
) const
1322 for (const VariationSelectorRecord
& _
: record
)
1323 _
.collect_unicodes (out
, this);
1326 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
1327 hb_map_t
*mapping
/* OUT */) const
1329 for (const VariationSelectorRecord
& _
: record
)
1330 _
.collect_mapping (this, unicodes
, mapping
);
1333 bool sanitize (hb_sanitize_context_t
*c
) const
1335 TRACE_SANITIZE (this);
1336 return_trace (c
->check_struct (this) &&
1337 record
.sanitize (c
, this));
1341 HBUINT16 format
; /* Format number is set to 14. */
1342 HBUINT32 length
; /* Byte length of this subtable. */
1343 SortedArray32Of
<VariationSelectorRecord
>
1344 record
; /* Variation selector records; sorted
1345 * in increasing order of `varSelector'. */
1347 DEFINE_SIZE_ARRAY (10, record
);
1352 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
1354 bool get_glyph (hb_codepoint_t codepoint
,
1355 hb_codepoint_t
*glyph
) const
1358 case 0: return u
.format0
.get_glyph (codepoint
, glyph
);
1359 case 4: return u
.format4
.get_glyph (codepoint
, glyph
);
1360 case 6: return u
.format6
.get_glyph (codepoint
, glyph
);
1361 case 10: return u
.format10
.get_glyph (codepoint
, glyph
);
1362 case 12: return u
.format12
.get_glyph (codepoint
, glyph
);
1363 case 13: return u
.format13
.get_glyph (codepoint
, glyph
);
1365 default: return false;
1368 void collect_unicodes (hb_set_t
*out
, unsigned int num_glyphs
= UINT_MAX
) const
1371 case 0: u
.format0
.collect_unicodes (out
); return;
1372 case 4: u
.format4
.collect_unicodes (out
); return;
1373 case 6: u
.format6
.collect_unicodes (out
); return;
1374 case 10: u
.format10
.collect_unicodes (out
); return;
1375 case 12: u
.format12
.collect_unicodes (out
, num_glyphs
); return;
1376 case 13: u
.format13
.collect_unicodes (out
, num_glyphs
); return;
1382 void collect_mapping (hb_set_t
*unicodes
, /* OUT */
1383 hb_map_t
*mapping
, /* OUT */
1384 unsigned num_glyphs
= UINT_MAX
) const
1387 case 0: u
.format0
.collect_mapping (unicodes
, mapping
); return;
1388 case 4: u
.format4
.collect_mapping (unicodes
, mapping
); return;
1389 case 6: u
.format6
.collect_mapping (unicodes
, mapping
); return;
1390 case 10: u
.format10
.collect_mapping (unicodes
, mapping
); return;
1391 case 12: u
.format12
.collect_mapping (unicodes
, mapping
, num_glyphs
); return;
1392 case 13: u
.format13
.collect_mapping (unicodes
, mapping
, num_glyphs
); return;
1398 unsigned get_language () const
1401 case 0: return u
.format0
.get_language ();
1402 case 4: return u
.format4
.get_language ();
1403 case 6: return u
.format6
.get_language ();
1404 case 10: return u
.format10
.get_language ();
1405 case 12: return u
.format12
.get_language ();
1406 case 13: return u
.format13
.get_language ();
1412 template<typename Iterator
,
1413 hb_requires (hb_is_iterator (Iterator
))>
1414 void serialize (hb_serialize_context_t
*c
,
1417 const hb_subset_plan_t
*plan
,
1421 case 4: return u
.format4
.serialize (c
, it
);
1422 case 12: return u
.format12
.serialize (c
, it
);
1423 case 14: return u
.format14
.serialize (c
, &plan
->unicodes
, &plan
->glyphs_requested
, plan
->glyph_map
, base
);
1428 bool sanitize (hb_sanitize_context_t
*c
) const
1430 TRACE_SANITIZE (this);
1431 if (!u
.format
.sanitize (c
)) return_trace (false);
1434 case 0: return_trace (u
.format0
.sanitize (c
));
1435 case 4: return_trace (u
.format4
.sanitize (c
));
1436 case 6: return_trace (u
.format6
.sanitize (c
));
1437 case 10: return_trace (u
.format10
.sanitize (c
));
1438 case 12: return_trace (u
.format12
.sanitize (c
));
1439 case 13: return_trace (u
.format13
.sanitize (c
));
1440 case 14: return_trace (u
.format14
.sanitize (c
));
1441 default:return_trace (true);
1447 HBUINT16 format
; /* Format identifier */
1448 CmapSubtableFormat0 format0
;
1449 CmapSubtableFormat4 format4
;
1450 CmapSubtableFormat6 format6
;
1451 CmapSubtableFormat10 format10
;
1452 CmapSubtableFormat12 format12
;
1453 CmapSubtableFormat13 format13
;
1454 CmapSubtableFormat14 format14
;
1457 DEFINE_SIZE_UNION (2, format
);
1461 struct EncodingRecord
1463 int cmp (const EncodingRecord
&other
) const
1466 ret
= platformID
.cmp (other
.platformID
);
1467 if (ret
) return ret
;
1468 ret
= encodingID
.cmp (other
.encodingID
);
1469 if (ret
) return ret
;
1473 bool sanitize (hb_sanitize_context_t
*c
, const void *base
) const
1475 TRACE_SANITIZE (this);
1476 return_trace (c
->check_struct (this) &&
1477 subtable
.sanitize (c
, base
));
1480 template<typename Iterator
,
1481 hb_requires (hb_is_iterator (Iterator
))>
1482 EncodingRecord
* copy (hb_serialize_context_t
*c
,
1486 const hb_subset_plan_t
*plan
,
1487 /* INOUT */ unsigned *objidx
) const
1489 TRACE_SERIALIZE (this);
1490 auto snap
= c
->snapshot ();
1491 auto *out
= c
->embed (this);
1492 if (unlikely (!out
)) return_trace (nullptr);
1497 CmapSubtable
*cmapsubtable
= c
->push
<CmapSubtable
> ();
1498 unsigned origin_length
= c
->length ();
1499 cmapsubtable
->serialize (c
, it
, format
, plan
, &(base
+subtable
));
1500 if (c
->length () - origin_length
> 0) *objidx
= c
->pop_pack ();
1501 else c
->pop_discard ();
1507 return_trace (nullptr);
1510 c
->add_link (out
->subtable
, *objidx
);
1514 HBUINT16 platformID
; /* Platform ID. */
1515 HBUINT16 encodingID
; /* Platform-specific encoding ID. */
1516 Offset32To
<CmapSubtable
>
1517 subtable
; /* Byte offset from beginning of table to the subtable for this encoding. */
1519 DEFINE_SIZE_STATIC (8);
1524 struct SubtableUnicodesCache
{
1527 hb_blob_ptr_t
<cmap
> base_blob
;
1529 hb_hashmap_t
<unsigned, hb::unique_ptr
<hb_set_t
>> cached_unicodes
;
1533 static SubtableUnicodesCache
* create (hb_blob_ptr_t
<cmap
> source_table
)
1535 SubtableUnicodesCache
* cache
=
1536 (SubtableUnicodesCache
*) hb_malloc (sizeof(SubtableUnicodesCache
));
1537 new (cache
) SubtableUnicodesCache (source_table
);
1541 static void destroy (void* value
) {
1544 SubtableUnicodesCache
* cache
= (SubtableUnicodesCache
*) value
;
1545 cache
->~SubtableUnicodesCache ();
1549 SubtableUnicodesCache(const void* cmap_base
)
1551 base ((const char*) cmap_base
),
1555 SubtableUnicodesCache(hb_blob_ptr_t
<cmap
> base_blob_
)
1556 : base_blob(base_blob_
),
1557 base ((const char *) base_blob
.get()),
1561 ~SubtableUnicodesCache()
1563 base_blob
.destroy ();
1566 bool same_base(const void* other
) const
1568 return other
== (const void*) base
;
1571 const hb_set_t
* set_for (const EncodingRecord
* record
,
1572 SubtableUnicodesCache
& mutable_cache
) const
1574 if (cached_unicodes
.has ((unsigned) ((const char *) record
- base
)))
1575 return cached_unicodes
.get ((unsigned) ((const char *) record
- base
));
1577 return mutable_cache
.set_for (record
);
1580 const hb_set_t
* set_for (const EncodingRecord
* record
)
1582 if (!cached_unicodes
.has ((unsigned) ((const char *) record
- base
)))
1584 hb_set_t
*s
= hb_set_create ();
1585 if (unlikely (s
->in_error ()))
1586 return hb_set_get_empty ();
1588 (base
+record
->subtable
).collect_unicodes (s
);
1590 if (unlikely (!cached_unicodes
.set ((unsigned) ((const char *) record
- base
), hb::unique_ptr
<hb_set_t
> {s
})))
1591 return hb_set_get_empty ();
1595 return cached_unicodes
.get ((unsigned) ((const char *) record
- base
));
1600 static inline uint_fast16_t
1601 _hb_symbol_pua_map (unsigned codepoint
)
1603 if (codepoint
<= 0x00FFu
)
1605 /* For symbol-encoded OpenType fonts, we duplicate the
1606 * U+F000..F0FF range at U+0000..U+00FF. That's what
1607 * Windows seems to do, and that's hinted about at:
1608 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1609 * under "Non-Standard (Symbol) Fonts". */
1610 return 0xF000u
+ codepoint
;
1617 static constexpr hb_tag_t tableTag
= HB_OT_TAG_cmap
;
1620 static SubtableUnicodesCache
* create_filled_cache(hb_blob_ptr_t
<cmap
> source_table
) {
1621 const cmap
* cmap
= source_table
.get();
1623 + hb_iter (cmap
->encodingRecord
)
1624 | hb_filter ([&](const EncodingRecord
& _
) {
1625 return cmap::filter_encoding_records_for_subset (cmap
, _
);
1629 SubtableUnicodesCache
* cache
= SubtableUnicodesCache::create(source_table
);
1630 for (const EncodingRecord
& _
: it
)
1631 cache
->set_for(&_
); // populate the cache for this encoding record.
1636 template<typename Iterator
, typename EncodingRecIter
,
1637 hb_requires (hb_is_iterator (EncodingRecIter
))>
1638 bool serialize (hb_serialize_context_t
*c
,
1640 EncodingRecIter encodingrec_iter
,
1642 hb_subset_plan_t
*plan
,
1643 bool drop_format_4
= false)
1645 if (unlikely (!c
->extend_min ((*this)))) return false;
1648 unsigned format4objidx
= 0, format12objidx
= 0, format14objidx
= 0;
1649 auto snap
= c
->snapshot ();
1651 SubtableUnicodesCache
local_unicodes_cache (base
);
1652 const SubtableUnicodesCache
* unicodes_cache
= &local_unicodes_cache
;
1654 if (plan
->accelerator
&&
1655 plan
->accelerator
->cmap_cache
&&
1656 plan
->accelerator
->cmap_cache
->same_base (base
))
1657 unicodes_cache
= plan
->accelerator
->cmap_cache
;
1659 for (const EncodingRecord
& _
: encodingrec_iter
)
1664 unsigned format
= (base
+_
.subtable
).u
.format
;
1665 if (format
!= 4 && format
!= 12 && format
!= 14) continue;
1667 const hb_set_t
* unicodes_set
= unicodes_cache
->set_for (&_
, local_unicodes_cache
);
1669 if (!drop_format_4
&& format
== 4)
1671 c
->copy (_
, + it
| hb_filter (*unicodes_set
, hb_first
), 4u, base
, plan
, &format4objidx
);
1672 if (c
->in_error () && c
->only_overflow ())
1674 // cmap4 overflowed, reset and retry serialization without format 4 subtables.
1676 return serialize (c
, it
,
1684 else if (format
== 12)
1690 local_unicodes_cache
,
1691 + it
| hb_map (hb_first
), encodingrec_iter
))
1693 c
->copy (_
, + it
| hb_filter (*unicodes_set
, hb_first
), 12u, base
, plan
, &format12objidx
);
1695 else if (format
== 14) c
->copy (_
, it
, 14u, base
, plan
, &format14objidx
);
1697 c
->check_assign(this->encodingRecord
.len
,
1698 (c
->length () - cmap::min_size
)/EncodingRecord::static_size
,
1699 HB_SERIALIZE_ERROR_INT_OVERFLOW
);
1701 // Fail if format 4 was dropped and there is no cmap12.
1702 return !drop_format_4
|| format12objidx
;
1705 template<typename Iterator
, typename EncodingRecordIterator
,
1706 hb_requires (hb_is_iterator (Iterator
)),
1707 hb_requires (hb_is_iterator (EncodingRecordIterator
))>
1708 bool _can_drop (const EncodingRecord
& cmap12
,
1709 const hb_set_t
& cmap12_unicodes
,
1711 const SubtableUnicodesCache
& unicodes_cache
,
1712 SubtableUnicodesCache
& local_unicodes_cache
,
1713 Iterator subset_unicodes
,
1714 EncodingRecordIterator encoding_records
)
1716 for (auto cp
: + subset_unicodes
| hb_filter (cmap12_unicodes
))
1718 if (cp
>= 0x10000) return false;
1721 unsigned target_platform
;
1722 unsigned target_encoding
;
1723 unsigned target_language
= (base
+cmap12
.subtable
).get_language ();
1725 if (cmap12
.platformID
== 0 && cmap12
.encodingID
== 4)
1727 target_platform
= 0;
1728 target_encoding
= 3;
1729 } else if (cmap12
.platformID
== 3 && cmap12
.encodingID
== 10) {
1730 target_platform
= 3;
1731 target_encoding
= 1;
1736 for (const auto& _
: encoding_records
)
1738 if (_
.platformID
!= target_platform
1739 || _
.encodingID
!= target_encoding
1740 || (base
+_
.subtable
).get_language() != target_language
)
1743 const hb_set_t
* sibling_unicodes
= unicodes_cache
.set_for (&_
, local_unicodes_cache
);
1745 auto cmap12
= + subset_unicodes
| hb_filter (cmap12_unicodes
);
1746 auto sibling
= + subset_unicodes
| hb_filter (*sibling_unicodes
);
1747 for (; cmap12
&& sibling
; cmap12
++, sibling
++)
1749 unsigned a
= *cmap12
;
1750 unsigned b
= *sibling
;
1751 if (a
!= b
) return false;
1754 return !cmap12
&& !sibling
;
1760 void closure_glyphs (const hb_set_t
*unicodes
,
1761 hb_set_t
*glyphset
) const
1763 + hb_iter (encodingRecord
)
1764 | hb_map (&EncodingRecord::subtable
)
1765 | hb_map (hb_add (this))
1766 | hb_filter ([&] (const CmapSubtable
& _
) { return _
.u
.format
== 14; })
1767 | hb_apply ([=] (const CmapSubtable
& _
) { _
.u
.format14
.closure_glyphs (unicodes
, glyphset
); })
1771 bool subset (hb_subset_context_t
*c
) const
1773 TRACE_SUBSET (this);
1775 cmap
*cmap_prime
= c
->serializer
->start_embed
<cmap
> ();
1777 auto encodingrec_iter
=
1778 + hb_iter (encodingRecord
)
1779 | hb_filter ([&](const EncodingRecord
& _
) {
1780 return cmap::filter_encoding_records_for_subset (this, _
);
1784 if (unlikely (!encodingrec_iter
.len ())) return_trace (false);
1786 const EncodingRecord
*unicode_bmp
= nullptr, *unicode_ucs4
= nullptr, *ms_bmp
= nullptr, *ms_ucs4
= nullptr;
1787 bool has_format12
= false;
1789 for (const EncodingRecord
& _
: encodingrec_iter
)
1791 unsigned format
= (this + _
.subtable
).u
.format
;
1792 if (format
== 12) has_format12
= true;
1794 const EncodingRecord
*table
= std::addressof (_
);
1795 if (_
.platformID
== 0 && _
.encodingID
== 3) unicode_bmp
= table
;
1796 else if (_
.platformID
== 0 && _
.encodingID
== 4) unicode_ucs4
= table
;
1797 else if (_
.platformID
== 3 && _
.encodingID
== 1) ms_bmp
= table
;
1798 else if (_
.platformID
== 3 && _
.encodingID
== 10) ms_ucs4
= table
;
1801 if (unlikely (!has_format12
&& !unicode_bmp
&& !ms_bmp
)) return_trace (false);
1802 if (unlikely (has_format12
&& (!unicode_ucs4
&& !ms_ucs4
))) return_trace (false);
1805 + c
->plan
->unicode_to_new_gid_list
.iter ()
1806 | hb_filter ([&] (const hb_codepoint_pair_t _
)
1807 { return (_
.second
!= HB_MAP_VALUE_INVALID
); })
1810 return_trace (cmap_prime
->serialize (c
->serializer
,
1817 const CmapSubtable
*find_best_subtable (bool *symbol
= nullptr) const
1819 if (symbol
) *symbol
= false;
1821 const CmapSubtable
*subtable
;
1824 * Prefer symbol if available.
1825 * https://github.com/harfbuzz/harfbuzz/issues/1918 */
1826 if ((subtable
= this->find_subtable (3, 0)))
1828 if (symbol
) *symbol
= true;
1832 /* 32-bit subtables. */
1833 if ((subtable
= this->find_subtable (3, 10))) return subtable
;
1834 if ((subtable
= this->find_subtable (0, 6))) return subtable
;
1835 if ((subtable
= this->find_subtable (0, 4))) return subtable
;
1837 /* 16-bit subtables. */
1838 if ((subtable
= this->find_subtable (3, 1))) return subtable
;
1839 if ((subtable
= this->find_subtable (0, 3))) return subtable
;
1840 if ((subtable
= this->find_subtable (0, 2))) return subtable
;
1841 if ((subtable
= this->find_subtable (0, 1))) return subtable
;
1842 if ((subtable
= this->find_subtable (0, 0))) return subtable
;
1845 return &Null (CmapSubtable
);
1848 struct accelerator_t
1850 using cache_t
= hb_cache_t
<21, 16, 8, true>;
1852 accelerator_t (hb_face_t
*face
)
1854 this->table
= hb_sanitize_context_t ().reference_table
<cmap
> (face
);
1856 this->subtable
= table
->find_best_subtable (&symbol
);
1857 this->subtable_uvs
= &Null (CmapSubtableFormat14
);
1859 const CmapSubtable
*st
= table
->find_subtable (0, 5);
1860 if (st
&& st
->u
.format
== 14)
1861 subtable_uvs
= &st
->u
.format14
;
1864 this->get_glyph_data
= subtable
;
1865 if (unlikely (symbol
))
1867 switch ((unsigned) face
->table
.OS2
->get_font_page ()) {
1868 case OS2::font_page_t::FONT_PAGE_NONE
:
1869 this->get_glyph_funcZ
= get_glyph_from_symbol
<CmapSubtable
, _hb_symbol_pua_map
>;
1871 #ifndef HB_NO_OT_SHAPER_ARABIC_FALLBACK
1872 case OS2::font_page_t::FONT_PAGE_SIMP_ARABIC
:
1873 this->get_glyph_funcZ
= get_glyph_from_symbol
<CmapSubtable
, _hb_arabic_pua_simp_map
>;
1875 case OS2::font_page_t::FONT_PAGE_TRAD_ARABIC
:
1876 this->get_glyph_funcZ
= get_glyph_from_symbol
<CmapSubtable
, _hb_arabic_pua_trad_map
>;
1880 this->get_glyph_funcZ
= get_glyph_from
<CmapSubtable
>;
1886 switch (subtable
->u
.format
) {
1887 /* Accelerate format 4 and format 12. */
1889 this->get_glyph_funcZ
= get_glyph_from
<CmapSubtable
>;
1892 this->get_glyph_funcZ
= get_glyph_from
<CmapSubtableFormat12
>;
1896 this->format4_accel
.init (&subtable
->u
.format4
);
1897 this->get_glyph_data
= &this->format4_accel
;
1898 this->get_glyph_funcZ
= this->format4_accel
.get_glyph_func
;
1904 ~accelerator_t () { this->table
.destroy (); }
1906 inline bool _cached_get (hb_codepoint_t unicode
,
1907 hb_codepoint_t
*glyph
,
1908 cache_t
*cache
) const
1911 if (cache
&& cache
->get (unicode
, &v
))
1916 bool ret
= this->get_glyph_funcZ (this->get_glyph_data
, unicode
, glyph
);
1919 cache
->set (unicode
, *glyph
);
1923 bool get_nominal_glyph (hb_codepoint_t unicode
,
1924 hb_codepoint_t
*glyph
,
1925 cache_t
*cache
= nullptr) const
1927 if (unlikely (!this->get_glyph_funcZ
)) return 0;
1928 return _cached_get (unicode
, glyph
, cache
);
1931 unsigned int get_nominal_glyphs (unsigned int count
,
1932 const hb_codepoint_t
*first_unicode
,
1933 unsigned int unicode_stride
,
1934 hb_codepoint_t
*first_glyph
,
1935 unsigned int glyph_stride
,
1936 cache_t
*cache
= nullptr) const
1938 if (unlikely (!this->get_glyph_funcZ
)) return 0;
1942 done
< count
&& _cached_get (*first_unicode
, first_glyph
, cache
);
1945 first_unicode
= &StructAtOffsetUnaligned
<hb_codepoint_t
> (first_unicode
, unicode_stride
);
1946 first_glyph
= &StructAtOffsetUnaligned
<hb_codepoint_t
> (first_glyph
, glyph_stride
);
1951 bool get_variation_glyph (hb_codepoint_t unicode
,
1952 hb_codepoint_t variation_selector
,
1953 hb_codepoint_t
*glyph
,
1954 cache_t
*cache
= nullptr) const
1956 switch (this->subtable_uvs
->get_glyph_variant (unicode
,
1960 case GLYPH_VARIANT_NOT_FOUND
: return false;
1961 case GLYPH_VARIANT_FOUND
: return true;
1962 case GLYPH_VARIANT_USE_DEFAULT
: break;
1965 return get_nominal_glyph (unicode
, glyph
, cache
);
1968 void collect_unicodes (hb_set_t
*out
, unsigned int num_glyphs
) const
1969 { subtable
->collect_unicodes (out
, num_glyphs
); }
1970 void collect_mapping (hb_set_t
*unicodes
, hb_map_t
*mapping
,
1971 unsigned num_glyphs
= UINT_MAX
) const
1972 { subtable
->collect_mapping (unicodes
, mapping
, num_glyphs
); }
1973 void collect_variation_selectors (hb_set_t
*out
) const
1974 { subtable_uvs
->collect_variation_selectors (out
); }
1975 void collect_variation_unicodes (hb_codepoint_t variation_selector
,
1976 hb_set_t
*out
) const
1977 { subtable_uvs
->collect_variation_unicodes (variation_selector
, out
); }
1980 typedef bool (*hb_cmap_get_glyph_func_t
) (const void *obj
,
1981 hb_codepoint_t codepoint
,
1982 hb_codepoint_t
*glyph
);
1983 typedef uint_fast16_t (*hb_pua_remap_func_t
) (unsigned);
1985 template <typename Type
>
1986 HB_INTERNAL
static bool get_glyph_from (const void *obj
,
1987 hb_codepoint_t codepoint
,
1988 hb_codepoint_t
*glyph
)
1990 const Type
*typed_obj
= (const Type
*) obj
;
1991 return typed_obj
->get_glyph (codepoint
, glyph
);
1994 template <typename Type
, hb_pua_remap_func_t remap
>
1995 HB_INTERNAL
static bool get_glyph_from_symbol (const void *obj
,
1996 hb_codepoint_t codepoint
,
1997 hb_codepoint_t
*glyph
)
1999 const Type
*typed_obj
= (const Type
*) obj
;
2000 if (likely (typed_obj
->get_glyph (codepoint
, glyph
)))
2003 if (hb_codepoint_t c
= remap (codepoint
))
2004 return typed_obj
->get_glyph (c
, glyph
);
2010 hb_nonnull_ptr_t
<const CmapSubtable
> subtable
;
2011 hb_nonnull_ptr_t
<const CmapSubtableFormat14
> subtable_uvs
;
2013 hb_cmap_get_glyph_func_t get_glyph_funcZ
;
2014 const void *get_glyph_data
;
2016 CmapSubtableFormat4::accelerator_t format4_accel
;
2019 hb_blob_ptr_t
<cmap
> table
;
2024 const CmapSubtable
*find_subtable (unsigned int platform_id
,
2025 unsigned int encoding_id
) const
2028 key
.platformID
= platform_id
;
2029 key
.encodingID
= encoding_id
;
2031 const EncodingRecord
&result
= encodingRecord
.bsearch (key
);
2032 if (!result
.subtable
)
2035 return &(this+result
.subtable
);
2038 const EncodingRecord
*find_encodingrec (unsigned int platform_id
,
2039 unsigned int encoding_id
) const
2042 key
.platformID
= platform_id
;
2043 key
.encodingID
= encoding_id
;
2045 return encodingRecord
.as_array ().bsearch (key
);
2048 bool find_subtable (unsigned format
) const
2051 + hb_iter (encodingRecord
)
2052 | hb_map (&EncodingRecord::subtable
)
2053 | hb_map (hb_add (this))
2054 | hb_filter ([&] (const CmapSubtable
& _
) { return _
.u
.format
== format
; })
2062 bool sanitize (hb_sanitize_context_t
*c
) const
2064 TRACE_SANITIZE (this);
2065 return_trace (c
->check_struct (this) &&
2067 likely (version
== 0) &&
2068 encodingRecord
.sanitize (c
, this));
2073 static bool filter_encoding_records_for_subset(const cmap
* cmap
,
2074 const EncodingRecord
& _
)
2077 (_
.platformID
== 0 && _
.encodingID
== 3) ||
2078 (_
.platformID
== 0 && _
.encodingID
== 4) ||
2079 (_
.platformID
== 3 && _
.encodingID
== 1) ||
2080 (_
.platformID
== 3 && _
.encodingID
== 10) ||
2081 (cmap
+ _
.subtable
).u
.format
== 14;
2085 HBUINT16 version
; /* Table version number (0). */
2086 SortedArray16Of
<EncodingRecord
>
2087 encodingRecord
; /* Encoding tables. */
2089 DEFINE_SIZE_ARRAY (4, encodingRecord
);
2092 struct cmap_accelerator_t
: cmap::accelerator_t
{
2093 cmap_accelerator_t (hb_face_t
*face
) : cmap::accelerator_t (face
) {}
2096 } /* namespace OT */
2099 #endif /* HB_OT_CMAP_TABLE_HH */