2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
5 * GRUB is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * GRUB is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
19 /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
20 bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
21 Return the number of characters converted. DEST must be able to hold
22 at least DESTSIZE characters. If an invalid sequence is found, return -1.
23 If SRCEND is not NULL, then *SRCEND is set to the next byte after the
24 last byte used in SRC. */
26 #include <grub/charset.h>
28 #include <grub/misc.h>
31 grub_utf8_to_utf16 (grub_uint16_t
*dest
, grub_size_t destsize
,
32 const grub_uint8_t
*src
, grub_size_t srcsize
,
33 const grub_uint8_t
**srcend
)
35 grub_uint16_t
*p
= dest
;
37 grub_uint32_t code
= 0;
42 while (srcsize
&& destsize
)
44 grub_uint32_t c
= *src
++;
45 if (srcsize
!= (grub_size_t
)-1)
49 if ((c
& GRUB_UINT8_2_LEADINGBITS
) != GRUB_UINT8_1_LEADINGBIT
)
57 code
|= (c
& GRUB_UINT8_6_TRAILINGBITS
);
66 if ((c
& GRUB_UINT8_1_LEADINGBIT
) == 0)
68 else if ((c
& GRUB_UINT8_3_LEADINGBITS
) == GRUB_UINT8_2_LEADINGBITS
)
71 code
= c
& GRUB_UINT8_5_TRAILINGBITS
;
73 else if ((c
& GRUB_UINT8_4_LEADINGBITS
) == GRUB_UINT8_3_LEADINGBITS
)
76 code
= c
& GRUB_UINT8_4_TRAILINGBITS
;
78 else if ((c
& GRUB_UINT8_5_LEADINGBITS
) == GRUB_UINT8_4_LEADINGBITS
)
81 code
= c
& GRUB_UINT8_3_TRAILINGBITS
;
83 else if ((c
& GRUB_UINT8_6_LEADINGBITS
) == GRUB_UINT8_5_LEADINGBITS
)
86 code
= c
& GRUB_UINT8_2_TRAILINGBITS
;
88 else if ((c
& GRUB_UINT8_7_LEADINGBITS
) == GRUB_UINT8_6_LEADINGBITS
)
91 code
= c
& GRUB_UINT8_1_TRAILINGBIT
;
99 if (destsize
< 2 && code
>= GRUB_UCS2_LIMIT
)
101 if (code
>= GRUB_UCS2_LIMIT
)
103 *p
++ = GRUB_UTF16_UPPER_SURROGATE (code
);
104 *p
++ = GRUB_UTF16_LOWER_SURROGATE (code
);
120 /* Convert UCS-4 to UTF-8. */
122 grub_ucs4_to_utf8_alloc (grub_uint32_t
*src
, grub_size_t size
)
124 grub_size_t remaining
;
127 grub_uint8_t
*ret
, *dest
;
133 grub_uint32_t code
= *ptr
++;
137 else if (code
<= 0x07FF)
139 else if ((code
>= 0xDC00 && code
<= 0xDFFF)
140 || (code
>= 0xD800 && code
<= 0xDBFF))
141 /* No surrogates in UCS-4... */
148 ret
= grub_malloc (cnt
);
157 grub_uint32_t code
= *ptr
++;
161 else if (code
<= 0x07FF)
163 *dest
++ = (code
>> 6) | 0xC0;
164 *dest
++ = (code
& 0x3F) | 0x80;
166 else if ((code
>= 0xDC00 && code
<= 0xDFFF)
167 || (code
>= 0xD800 && code
<= 0xDBFF))
169 /* No surrogates in UCS-4... */
174 *dest
++ = (code
>> 12) | 0xE0;
175 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
176 *dest
++ = (code
& 0x3F) | 0x80;
185 grub_is_valid_utf8 (const grub_uint8_t
*src
, grub_size_t srcsize
)
187 grub_uint32_t code
= 0;
192 grub_uint32_t c
= *src
++;
193 if (srcsize
!= (grub_size_t
)-1)
197 if ((c
& 0xc0) != 0x80)
214 if ((c
& 0x80) == 0x00)
216 else if ((c
& 0xe0) == 0xc0)
221 else if ((c
& 0xf0) == 0xe0)
226 else if ((c
& 0xf8) == 0xf0)
231 else if ((c
& 0xfc) == 0xf8)
236 else if ((c
& 0xfe) == 0xfc)
250 grub_utf8_to_ucs4_alloc (const char *msg
, grub_uint32_t
**unicode_msg
,
251 grub_uint32_t
**last_position
)
253 grub_size_t msg_len
= grub_strlen (msg
);
255 *unicode_msg
= grub_malloc (grub_strlen (msg
) * sizeof (grub_uint32_t
));
259 grub_printf ("utf8_to_ucs4 ERROR1: %s", msg
);
263 msg_len
= grub_utf8_to_ucs4 (*unicode_msg
, msg_len
,
264 (grub_uint8_t
*) msg
, -1, 0);
266 *last_position
= *unicode_msg
+ msg_len
;