2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2010,2011,2012 Google, Inc.
6 * This is part of HarfBuzz, a text shaping library.
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
33 #include "hb-unicode.hh"
39 * @short_description: Unicode character property access
42 * Unicode functions are used to access Unicode character properties.
43 * With these functions, client programs can query various properties from
44 * the Unicode Character Database for any code point, such as General
45 * Category (gc), Script (sc), Canonical Combining Class (ccc), etc.
47 * Client programs can optionally pass in their own Unicode functions
48 * that implement the same queries. The set of functions available is
49 * defined by the virtual methods in #hb_unicode_funcs_t.
51 * HarfBuzz provides built-in default functions for each method in
52 * #hb_unicode_funcs_t.
60 static hb_unicode_combining_class_t
61 hb_unicode_combining_class_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
62 hb_codepoint_t unicode HB_UNUSED
,
63 void *user_data HB_UNUSED
)
65 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED
;
68 #ifndef HB_DISABLE_DEPRECATED
70 hb_unicode_eastasian_width_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
71 hb_codepoint_t unicode HB_UNUSED
,
72 void *user_data HB_UNUSED
)
78 static hb_unicode_general_category_t
79 hb_unicode_general_category_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
80 hb_codepoint_t unicode HB_UNUSED
,
81 void *user_data HB_UNUSED
)
83 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER
;
87 hb_unicode_mirroring_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
88 hb_codepoint_t unicode
,
89 void *user_data HB_UNUSED
)
95 hb_unicode_script_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
96 hb_codepoint_t unicode HB_UNUSED
,
97 void *user_data HB_UNUSED
)
99 return HB_SCRIPT_UNKNOWN
;
103 hb_unicode_compose_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
104 hb_codepoint_t a HB_UNUSED
,
105 hb_codepoint_t b HB_UNUSED
,
106 hb_codepoint_t
*ab HB_UNUSED
,
107 void *user_data HB_UNUSED
)
113 hb_unicode_decompose_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
114 hb_codepoint_t ab HB_UNUSED
,
115 hb_codepoint_t
*a HB_UNUSED
,
116 hb_codepoint_t
*b HB_UNUSED
,
117 void *user_data HB_UNUSED
)
123 #ifndef HB_DISABLE_DEPRECATED
125 hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t
*ufuncs HB_UNUSED
,
126 hb_codepoint_t u HB_UNUSED
,
127 hb_codepoint_t
*decomposed HB_UNUSED
,
128 void *user_data HB_UNUSED
)
134 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
137 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
142 * hb_unicode_funcs_get_default:
144 * Fetches a pointer to the default Unicode-functions structure that is used
145 * when no functions are explicitly set on #hb_buffer_t.
147 * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
152 hb_unicode_funcs_get_default ()
154 #if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
155 return hb_ucd_get_unicode_funcs ();
156 #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
157 return hb_glib_get_unicode_funcs ();
158 #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
159 return hb_icu_get_unicode_funcs ();
161 #define HB_UNICODE_FUNCS_NIL 1
162 return hb_unicode_funcs_get_empty ();
166 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
167 #error "Could not find any Unicode functions implementation, you have to provide your own"
168 #error "Consider building hb-ucd.cc. If you absolutely want to build without any, define HB_NO_UNICODE_FUNCS."
172 * hb_unicode_funcs_create:
173 * @parent: (nullable): Parent Unicode-functions structure
175 * Creates a new #hb_unicode_funcs_t structure of Unicode functions.
177 * Return value: (transfer full): The Unicode-functions structure
182 hb_unicode_funcs_create (hb_unicode_funcs_t
*parent
)
184 hb_unicode_funcs_t
*ufuncs
;
186 if (!(ufuncs
= hb_object_create
<hb_unicode_funcs_t
> ()))
187 return hb_unicode_funcs_get_empty ();
190 parent
= hb_unicode_funcs_get_empty ();
192 hb_unicode_funcs_make_immutable (parent
);
193 ufuncs
->parent
= hb_unicode_funcs_reference (parent
);
195 ufuncs
->func
= parent
->func
;
197 /* We can safely copy user_data from parent since we hold a reference
198 * onto it and it's immutable. We should not copy the destroy notifiers
200 ufuncs
->user_data
= parent
->user_data
;
206 DEFINE_NULL_INSTANCE (hb_unicode_funcs_t
) =
208 HB_OBJECT_HEADER_STATIC
,
210 nullptr, /* parent */
212 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
213 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
214 #undef HB_UNICODE_FUNC_IMPLEMENT
219 * hb_unicode_funcs_get_empty:
221 * Fetches the singleton empty Unicode-functions structure.
223 * Return value: (transfer full): The empty Unicode-functions structure
228 hb_unicode_funcs_get_empty ()
230 return const_cast<hb_unicode_funcs_t
*> (&Null (hb_unicode_funcs_t
));
234 * hb_unicode_funcs_reference: (skip)
235 * @ufuncs: The Unicode-functions structure
237 * Increases the reference count on a Unicode-functions structure.
239 * Return value: (transfer full): The Unicode-functions structure
244 hb_unicode_funcs_reference (hb_unicode_funcs_t
*ufuncs
)
246 return hb_object_reference (ufuncs
);
250 * hb_unicode_funcs_destroy: (skip)
251 * @ufuncs: The Unicode-functions structure
253 * Decreases the reference count on a Unicode-functions structure. When
254 * the reference count reaches zero, the Unicode-functions structure is
255 * destroyed, freeing all memory.
260 hb_unicode_funcs_destroy (hb_unicode_funcs_t
*ufuncs
)
262 if (!hb_object_destroy (ufuncs
)) return;
264 #define HB_UNICODE_FUNC_IMPLEMENT(name) \
265 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
266 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
267 #undef HB_UNICODE_FUNC_IMPLEMENT
269 hb_unicode_funcs_destroy (ufuncs
->parent
);
275 * hb_unicode_funcs_set_user_data: (skip)
276 * @ufuncs: The Unicode-functions structure
277 * @key: The user-data key
278 * @data: A pointer to the user data
279 * @destroy: (nullable): A callback to call when @data is not needed anymore
280 * @replace: Whether to replace an existing data with the same key
282 * Attaches a user-data key/data pair to the specified Unicode-functions structure.
284 * Return value: `true` if success, `false` otherwise
289 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t
*ufuncs
,
290 hb_user_data_key_t
*key
,
292 hb_destroy_func_t destroy
,
295 return hb_object_set_user_data (ufuncs
, key
, data
, destroy
, replace
);
299 * hb_unicode_funcs_get_user_data: (skip)
300 * @ufuncs: The Unicode-functions structure
301 * @key: The user-data key to query
303 * Fetches the user-data associated with the specified key,
304 * attached to the specified Unicode-functions structure.
306 * Return value: (transfer none): A pointer to the user data
311 hb_unicode_funcs_get_user_data (const hb_unicode_funcs_t
*ufuncs
,
312 hb_user_data_key_t
*key
)
314 return hb_object_get_user_data (ufuncs
, key
);
319 * hb_unicode_funcs_make_immutable:
320 * @ufuncs: The Unicode-functions structure
322 * Makes the specified Unicode-functions structure
328 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t
*ufuncs
)
330 if (hb_object_is_immutable (ufuncs
))
333 hb_object_make_immutable (ufuncs
);
337 * hb_unicode_funcs_is_immutable:
338 * @ufuncs: The Unicode-functions structure
340 * Tests whether the specified Unicode-functions structure
343 * Return value: `true` if @ufuncs is immutable, `false` otherwise
348 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t
*ufuncs
)
350 return hb_object_is_immutable (ufuncs
);
354 * hb_unicode_funcs_get_parent:
355 * @ufuncs: The Unicode-functions structure
357 * Fetches the parent of the Unicode-functions structure
360 * Return value: The parent Unicode-functions structure
365 hb_unicode_funcs_get_parent (hb_unicode_funcs_t
*ufuncs
)
367 return ufuncs
->parent
? ufuncs
->parent
: hb_unicode_funcs_get_empty ();
371 #define HB_UNICODE_FUNC_IMPLEMENT(name) \
374 hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
375 hb_unicode_##name##_func_t func, \
377 hb_destroy_func_t destroy) \
379 if (hb_object_is_immutable (ufuncs)) \
385 destroy (user_data); \
387 user_data = ufuncs->parent->user_data.name; \
390 if (ufuncs->destroy.name) \
391 ufuncs->destroy.name (ufuncs->user_data.name); \
394 ufuncs->func.name = func; \
396 ufuncs->func.name = ufuncs->parent->func.name; \
397 ufuncs->user_data.name = user_data; \
398 ufuncs->destroy.name = destroy; \
403 destroy (user_data); \
406 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
407 #undef HB_UNICODE_FUNC_IMPLEMENT
410 #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
413 hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
414 hb_codepoint_t unicode) \
416 return ufuncs->name (unicode); \
418 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
419 #undef HB_UNICODE_FUNC_IMPLEMENT
422 * hb_unicode_compose:
423 * @ufuncs: The Unicode-functions structure
424 * @a: The first Unicode code point to compose
425 * @b: The second Unicode code point to compose
426 * @ab: (out): The composition of @a, @b
428 * Fetches the composition of a sequence of two Unicode
431 * Calls the composition function of the specified
432 * Unicode-functions structure @ufuncs.
434 * Return value: `true` if @a and @b composed, `false` otherwise
439 hb_unicode_compose (hb_unicode_funcs_t
*ufuncs
,
444 return ufuncs
->compose (a
, b
, ab
);
448 * hb_unicode_decompose:
449 * @ufuncs: The Unicode-functions structure
450 * @ab: Unicode code point to decompose
451 * @a: (out): The first code point of the decomposition of @ab
452 * @b: (out): The second code point of the decomposition of @ab
454 * Fetches the decomposition of a Unicode code point.
456 * Calls the decomposition function of the specified
457 * Unicode-functions structure @ufuncs.
459 * Return value: `true` if @ab was decomposed, `false` otherwise
464 hb_unicode_decompose (hb_unicode_funcs_t
*ufuncs
,
469 return ufuncs
->decompose (ab
, a
, b
);
472 #ifndef HB_DISABLE_DEPRECATED
474 * hb_unicode_decompose_compatibility:
475 * @ufuncs: The Unicode-functions structure
476 * @u: Code point to decompose
477 * @decomposed: (out): Compatibility decomposition of @u
479 * Fetches the compatibility decomposition of a Unicode
480 * code point. Deprecated.
482 * Return value: length of @decomposed.
488 hb_unicode_decompose_compatibility (hb_unicode_funcs_t
*ufuncs
,
490 hb_codepoint_t
*decomposed
)
492 return ufuncs
->decompose_compatibility (u
, decomposed
);
497 #ifndef HB_NO_OT_SHAPE
498 /* See hb-unicode.hh for details. */
500 _hb_modified_combining_class
[256] =
502 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
503 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
505 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
506 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
507 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
510 HB_MODIFIED_COMBINING_CLASS_CCC10
,
511 HB_MODIFIED_COMBINING_CLASS_CCC11
,
512 HB_MODIFIED_COMBINING_CLASS_CCC12
,
513 HB_MODIFIED_COMBINING_CLASS_CCC13
,
514 HB_MODIFIED_COMBINING_CLASS_CCC14
,
515 HB_MODIFIED_COMBINING_CLASS_CCC15
,
516 HB_MODIFIED_COMBINING_CLASS_CCC16
,
517 HB_MODIFIED_COMBINING_CLASS_CCC17
,
518 HB_MODIFIED_COMBINING_CLASS_CCC18
,
519 HB_MODIFIED_COMBINING_CLASS_CCC19
,
520 HB_MODIFIED_COMBINING_CLASS_CCC20
,
521 HB_MODIFIED_COMBINING_CLASS_CCC21
,
522 HB_MODIFIED_COMBINING_CLASS_CCC22
,
523 HB_MODIFIED_COMBINING_CLASS_CCC23
,
524 HB_MODIFIED_COMBINING_CLASS_CCC24
,
525 HB_MODIFIED_COMBINING_CLASS_CCC25
,
526 HB_MODIFIED_COMBINING_CLASS_CCC26
,
529 HB_MODIFIED_COMBINING_CLASS_CCC27
,
530 HB_MODIFIED_COMBINING_CLASS_CCC28
,
531 HB_MODIFIED_COMBINING_CLASS_CCC29
,
532 HB_MODIFIED_COMBINING_CLASS_CCC30
,
533 HB_MODIFIED_COMBINING_CLASS_CCC31
,
534 HB_MODIFIED_COMBINING_CLASS_CCC32
,
535 HB_MODIFIED_COMBINING_CLASS_CCC33
,
536 HB_MODIFIED_COMBINING_CLASS_CCC34
,
537 HB_MODIFIED_COMBINING_CLASS_CCC35
,
540 HB_MODIFIED_COMBINING_CLASS_CCC36
,
543 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
544 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
548 HB_MODIFIED_COMBINING_CLASS_CCC84
,
549 85, 86, 87, 88, 89, 90,
550 HB_MODIFIED_COMBINING_CLASS_CCC91
,
551 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
554 HB_MODIFIED_COMBINING_CLASS_CCC103
,
556 HB_MODIFIED_COMBINING_CLASS_CCC107
,
557 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
560 HB_MODIFIED_COMBINING_CLASS_CCC118
,
562 HB_MODIFIED_COMBINING_CLASS_CCC122
,
563 123, 124, 125, 126, 127, 128,
566 HB_MODIFIED_COMBINING_CLASS_CCC129
,
567 HB_MODIFIED_COMBINING_CLASS_CCC130
,
569 HB_MODIFIED_COMBINING_CLASS_CCC132
,
570 133, 134, 135, 136, 137, 138, 139,
573 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
574 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
575 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
576 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
577 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
578 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
580 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
582 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
583 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
584 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
586 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
588 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
590 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
592 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
594 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
596 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
598 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
600 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
602 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
603 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
604 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
605 235, 236, 237, 238, 239,
606 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
607 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
608 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
616 #ifndef HB_NO_EMOJI_SEQUENCES
618 #include "hb-unicode-emoji-table.hh"
621 _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp
)
623 return _hb_emoji_is_Extended_Pictographic (cp
);