Upgraded GRUB2 to 2.00 release.
[AROS.git] / arch / all-pc / boot / grub2-aros / include / grub / charset.h
blobab9202884419326449f1f27851c66a31728750a7
1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
5 * GRUB is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * GRUB is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
19 #ifndef GRUB_CHARSET_HEADER
20 #define GRUB_CHARSET_HEADER 1
22 #include <grub/types.h>
24 #define GRUB_UINT8_1_LEADINGBIT 0x80
25 #define GRUB_UINT8_2_LEADINGBITS 0xc0
26 #define GRUB_UINT8_3_LEADINGBITS 0xe0
27 #define GRUB_UINT8_4_LEADINGBITS 0xf0
28 #define GRUB_UINT8_5_LEADINGBITS 0xf8
29 #define GRUB_UINT8_6_LEADINGBITS 0xfc
30 #define GRUB_UINT8_7_LEADINGBITS 0xfe
32 #define GRUB_UINT8_1_TRAILINGBIT 0x01
33 #define GRUB_UINT8_2_TRAILINGBITS 0x03
34 #define GRUB_UINT8_3_TRAILINGBITS 0x07
35 #define GRUB_UINT8_4_TRAILINGBITS 0x0f
36 #define GRUB_UINT8_5_TRAILINGBITS 0x1f
37 #define GRUB_UINT8_6_TRAILINGBITS 0x3f
39 #define GRUB_MAX_UTF8_PER_UTF16 4
40 /* You need at least one UTF-8 byte to have one UTF-16 word.
41 You need at least three UTF-8 bytes to have 2 UTF-16 words (surrogate pairs).
43 #define GRUB_MAX_UTF16_PER_UTF8 1
45 #define GRUB_UCS2_LIMIT 0x10000
46 #define GRUB_UTF16_UPPER_SURROGATE(code) \
47 (0xD800 | ((((code) - GRUB_UCS2_LIMIT) >> 10) & 0x3ff))
48 #define GRUB_UTF16_LOWER_SURROGATE(code) \
49 (0xDC00 | (((code) - GRUB_UCS2_LIMIT) & 0x3ff))
51 /* Process one character from UTF8 sequence.
52 At beginning set *code = 0, *count = 0. Returns 0 on failure and
53 1 on success. *count holds the number of trailing bytes. */
54 static inline int
55 grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
57 if (*count)
59 if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
61 *count = 0;
62 /* invalid */
63 return 0;
65 else
67 *code <<= 6;
68 *code |= (c & GRUB_UINT8_6_TRAILINGBITS);
69 (*count)--;
70 return 1;
74 if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
76 *code = c;
77 return 1;
79 if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
81 *count = 1;
82 *code = c & GRUB_UINT8_5_TRAILINGBITS;
83 return 1;
85 if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
87 *count = 2;
88 *code = c & GRUB_UINT8_4_TRAILINGBITS;
89 return 1;
91 if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
93 *count = 3;
94 *code = c & GRUB_UINT8_3_TRAILINGBITS;
95 return 1;
97 return 0;
101 /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
102 bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
103 Return the number of characters converted. DEST must be able to hold
104 at least DESTSIZE characters. If an invalid sequence is found, return -1.
105 If SRCEND is not NULL, then *SRCEND is set to the next byte after the
106 last byte used in SRC. */
107 static inline grub_size_t
108 grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
109 const grub_uint8_t *src, grub_size_t srcsize,
110 const grub_uint8_t **srcend)
112 grub_uint16_t *p = dest;
113 int count = 0;
114 grub_uint32_t code = 0;
116 if (srcend)
117 *srcend = src;
119 while (srcsize && destsize)
121 int was_count = count;
122 if (srcsize != (grub_size_t)-1)
123 srcsize--;
124 if (!grub_utf8_process (*src++, &code, &count))
126 code = '?';
127 count = 0;
128 /* Character c may be valid, don't eat it. */
129 if (was_count)
130 src--;
132 if (count != 0)
133 continue;
134 if (code == 0)
135 break;
136 if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
137 break;
138 if (code >= GRUB_UCS2_LIMIT)
140 *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
141 *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
142 destsize -= 2;
144 else
146 *p++ = code;
147 destsize--;
151 if (srcend)
152 *srcend = src;
153 return p - dest;
156 /* Determine the last position where the UTF-8 string [beg, end) can
157 be safely cut. */
158 static inline grub_size_t
159 grub_getend (const char *beg, const char *end)
161 const char *ptr;
162 for (ptr = end - 1; ptr >= beg; ptr--)
163 if ((*ptr & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
164 break;
165 if (ptr < beg)
166 return 0;
167 if ((*ptr & GRUB_UINT8_1_LEADINGBIT) == 0)
168 return ptr + 1 - beg;
169 if ((*ptr & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS
170 && ptr + 2 <= end)
171 return ptr + 2 - beg;
172 if ((*ptr & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS
173 && ptr + 3 <= end)
174 return ptr + 3 - beg;
175 if ((*ptr & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS
176 && ptr + 4 <= end)
177 return ptr + 4 - beg;
178 /* Invalid character or incomplete. Cut before it. */
179 return ptr - beg;
182 /* Convert UTF-16 to UTF-8. */
183 static inline grub_uint8_t *
184 grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
185 grub_size_t size)
187 grub_uint32_t code_high = 0;
189 while (size--)
191 grub_uint32_t code = *src++;
193 if (code_high)
195 if (code >= 0xDC00 && code <= 0xDFFF)
197 /* Surrogate pair. */
198 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
200 *dest++ = (code >> 18) | 0xF0;
201 *dest++ = ((code >> 12) & 0x3F) | 0x80;
202 *dest++ = ((code >> 6) & 0x3F) | 0x80;
203 *dest++ = (code & 0x3F) | 0x80;
205 else
207 /* Error... */
208 *dest++ = '?';
209 /* *src may be valid. Don't eat it. */
210 src--;
213 code_high = 0;
215 else
217 if (code <= 0x007F)
218 *dest++ = code;
219 else if (code <= 0x07FF)
221 *dest++ = (code >> 6) | 0xC0;
222 *dest++ = (code & 0x3F) | 0x80;
224 else if (code >= 0xD800 && code <= 0xDBFF)
226 code_high = code;
227 continue;
229 else if (code >= 0xDC00 && code <= 0xDFFF)
231 /* Error... */
232 *dest++ = '?';
234 else if (code < 0x10000)
236 *dest++ = (code >> 12) | 0xE0;
237 *dest++ = ((code >> 6) & 0x3F) | 0x80;
238 *dest++ = (code & 0x3F) | 0x80;
240 else
242 *dest++ = (code >> 18) | 0xF0;
243 *dest++ = ((code >> 12) & 0x3F) | 0x80;
244 *dest++ = ((code >> 6) & 0x3F) | 0x80;
245 *dest++ = (code & 0x3F) | 0x80;
250 return dest;
253 #define GRUB_MAX_UTF8_PER_LATIN1 2
255 /* Convert Latin1 to UTF-8. */
256 static inline grub_uint8_t *
257 grub_latin1_to_utf8 (grub_uint8_t *dest, const grub_uint8_t *src,
258 grub_size_t size)
260 while (size--)
262 if (!(*src & 0x80))
263 *dest++ = *src;
264 else
266 *dest++ = (*src >> 6) | 0xC0;
267 *dest++ = (*src & 0x3F) | 0x80;
269 src++;
272 return dest;
275 /* Convert UCS-4 to UTF-8. */
276 char *grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size);
279 grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize);
281 grub_ssize_t grub_utf8_to_ucs4_alloc (const char *msg,
282 grub_uint32_t **unicode_msg,
283 grub_uint32_t **last_position);
285 /* Returns the number of bytes the string src would occupy is converted
286 to UTF-8, excluding \0. */
287 grub_size_t
288 grub_get_num_of_utf8_bytes (const grub_uint32_t *src, grub_size_t size);
290 /* Converts UCS-4 to UTF-8. Returns the number of bytes effectively written
291 excluding the trailing \0. */
292 grub_size_t
293 grub_ucs4_to_utf8 (const grub_uint32_t *src, grub_size_t size,
294 grub_uint8_t *dest, grub_size_t destsize);
295 grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
296 const grub_uint8_t *src, grub_size_t srcsize,
297 const grub_uint8_t **srcend);
298 /* Returns -2 if not enough space, -1 on invalid character. */
299 grub_ssize_t
300 grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
301 grub_uint32_t code);
303 const grub_uint32_t *
304 grub_unicode_get_comb_start (const grub_uint32_t *str,
305 const grub_uint32_t *cur);
307 #endif