make cleanup and some bash-ism removal
[charfbuzz.git] / hb-utf-private.c
blob97eb7798f1dc725aad02cc651ffe859044647582
1 // C99 port from c++ is protected by a GNU Lesser GPLv3
2 // Copyright © 2013 Sylvain BERTRAND <sylvain.bertrand@gmail.com>
3 // <sylware@legeek.net>
4 #include <string.h>
5 #include <stdint.h>
7 #include "hb.h"
8 #include "hb-private.h"
10 //------------------------------------------------------------------------------
11 //utf8
12 #define HB_UTF8_COMPUTE(Char, Mask, Len) \
13 if (Char < 128) { Len = 1; Mask = 0x7f; } \
14 else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
15 else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
16 else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
17 else Len = 0;
19 void *
20 hb_utf_next_utf8(void *text,
21 void *end,
22 hb_codepoint_t *unicode)
24 uint8_t *text_utf8 = text;
25 uint8_t *end_utf8 = end;
26 hb_codepoint_t c = *text_utf8, mask;
27 unsigned len;
29 //TODO check for overlong sequences?
31 HB_UTF8_COMPUTE (c, mask, len);
32 if (!len || (unsigned) (end_utf8 - text_utf8) < len) {
33 *unicode = -1;
34 return text_utf8 + 1;
35 } else {
36 hb_codepoint_t result;
37 unsigned i;
38 result = c & mask;
39 for (i = 1; i < len; i++) {
40 if ((text_utf8[i] & 0xc0) != 0x80) {
41 *unicode = -1;
42 return text_utf8 + 1;
44 result <<= 6;
45 result |= (text_utf8[i] & 0x3f);
47 *unicode = result;
48 return text_utf8 + len;
52 void *
53 hb_utf_prev_utf8(void *text,
54 void *start,
55 hb_codepoint_t *unicode)
57 uint8_t *text_utf8 = text;
58 uint8_t *start_utf8 = start;
60 uint8_t *end = text_utf8--;
61 while (start_utf8 < text_utf8 && (*text_utf8 & 0xc0) == 0x80
62 && end - text_utf8 < 4)
63 text_utf8--;
65 hb_codepoint_t c = *text_utf8, mask;
66 unsigned len;
68 //TODO check for overlong sequences?
70 HB_UTF8_COMPUTE(c, mask, len);
71 if (!len || (unsigned)(end - text_utf8) != len) {
72 *unicode = -1;
73 return end - 1;
74 } else {
75 hb_codepoint_t result;
76 unsigned i;
77 result = c & mask;
78 for (i = 1; i < len; i++) {
79 result <<= 6;
80 result |= (text_utf8[i] & 0x3f);
82 *unicode = result;
83 return text_utf8;
87 void *
88 hb_utf_ptr_offset_utf8(void *text, unsigned offset)
90 uint8_t *text_utf8 = text;
91 return text_utf8 + offset;
94 unsigned
95 hb_utf_strlen_utf8(void *text)
97 return strlen(text);
100 unsigned
101 hb_utf_diff_utf8(void *a, void *b)
103 uint8_t *a_utf8 = a;
104 uint8_t *b_utf8 = b;
105 return a_utf8 - b_utf8;
108 //------------------------------------------------------------------------------
109 //utf16
110 void *
111 hb_utf_next_utf16(void *text,
112 void *end,
113 hb_codepoint_t *unicode)
115 uint16_t *text_utf16 = text;
116 uint16_t *end_utf16 = end;
118 hb_codepoint_t c = *text_utf16++;
120 if (hb_codepoint_in_range(c, 0xd800, 0xdbff)) {
121 //high surrogate
122 hb_codepoint_t l;
123 if (text_utf16 < end_utf16 && ((l = *text_utf16),
124 hb_codepoint_in_range(l, 0xdc00, 0xdfff))) {
125 //low surrogate
126 *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
127 text_utf16++;
128 } else
129 *unicode = -1;
130 } else
131 *unicode = c;
132 return text_utf16;
135 void *
136 hb_utf_prev_utf16(void *text,
137 void *start,
138 hb_codepoint_t *unicode)
140 uint16_t *text_utf16 = text;
141 uint16_t *start_utf16 = start;
142 hb_codepoint_t c = *--text_utf16;
144 if (hb_codepoint_in_range(c, 0xdc00, 0xdfff)) {
145 //low surrogate
146 hb_codepoint_t h;
147 if (start_utf16 < text_utf16
148 && ((h = *(text_utf16 - 1)), hb_codepoint_in_range(h, 0xd800, 0xdbff))) {
149 //high surrogate
150 *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
151 text_utf16--;
152 } else
153 *unicode = -1;
154 } else
155 *unicode = c;
156 return text_utf16;
159 void *
160 hb_utf_ptr_offset_utf16(void *text, unsigned offset)
162 uint16_t *text_utf16 = text;
163 return text_utf16 + offset;
166 unsigned
167 hb_utf_strlen_utf16(void *text)
169 uint16_t *text_utf16 = text;
171 unsigned l = 0;
172 while (*text_utf16++) l++;
173 return l;
176 unsigned
177 hb_utf_diff_utf16(void *a, void *b)
179 uint16_t *a_utf16 = a;
180 uint16_t *b_utf16 = b;
181 return a_utf16 - b_utf16;
183 //------------------------------------------------------------------------------
184 //utf32
185 void *
186 hb_utf_next_utf32(void *text,
187 void *end HB_UNUSED,
188 hb_codepoint_t *unicode)
190 uint32_t *text_utf32 = text;
191 *unicode = *text_utf32++;
192 return text_utf32;
195 void *
196 hb_utf_prev_utf32(void *text,
197 void *start HB_UNUSED,
198 hb_codepoint_t *unicode)
200 uint32_t *text_utf32 = text;
201 *unicode = *--text_utf32;
202 return text_utf32;
205 void *
206 hb_utf_ptr_offset_utf32(void *text, unsigned offset)
208 uint32_t *text_utf32 = text;
209 return text_utf32 + offset;
212 unsigned
213 hb_utf_strlen_utf32(void *text)
215 uint32_t *text_utf32 = text;
217 unsigned l = 0;
218 while (*text_utf32++) l++;
219 return l;
222 unsigned
223 hb_utf_diff_utf32(void *a, void *b)
225 uint32_t *a_utf32 = a;
226 uint32_t *b_utf32 = b;
227 return a_utf32 - b_utf32;