GRUB-1.98 changes
[grub2/jjazz.git] / lib / charset.c
blobf2e1b036def31604960d36661840f721c11374d3
1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
5 * GRUB is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * GRUB is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
19 /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
20 bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
21 Return the number of characters converted. DEST must be able to hold
22 at least DESTSIZE characters. If an invalid sequence is found, return -1.
23 If SRCEND is not NULL, then *SRCEND is set to the next byte after the
24 last byte used in SRC. */
26 #include <grub/charset.h>
27 #include <grub/mm.h>
28 #include <grub/misc.h>
30 grub_ssize_t
31 grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
32 const grub_uint8_t *src, grub_size_t srcsize,
33 const grub_uint8_t **srcend)
35 grub_uint16_t *p = dest;
36 int count = 0;
37 grub_uint32_t code = 0;
39 if (srcend)
40 *srcend = src;
42 while (srcsize && destsize)
44 grub_uint32_t c = *src++;
45 if (srcsize != (grub_size_t)-1)
46 srcsize--;
47 if (count)
49 if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
51 /* invalid */
52 return -1;
54 else
56 code <<= 6;
57 code |= (c & GRUB_UINT8_6_TRAILINGBITS);
58 count--;
61 else
63 if (c == 0)
64 break;
66 if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
67 code = c;
68 else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
70 count = 1;
71 code = c & GRUB_UINT8_5_TRAILINGBITS;
73 else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
75 count = 2;
76 code = c & GRUB_UINT8_4_TRAILINGBITS;
78 else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
80 count = 3;
81 code = c & GRUB_UINT8_3_TRAILINGBITS;
83 else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
85 count = 4;
86 code = c & GRUB_UINT8_2_TRAILINGBITS;
88 else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
90 count = 5;
91 code = c & GRUB_UINT8_1_TRAILINGBIT;
93 else
94 return -1;
97 if (count == 0)
99 if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
100 break;
101 if (code >= GRUB_UCS2_LIMIT)
103 *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
104 *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
105 destsize -= 2;
107 else
109 *p++ = code;
110 destsize--;
115 if (srcend)
116 *srcend = src;
117 return p - dest;
120 /* Convert UCS-4 to UTF-8. */
121 char *
122 grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
124 grub_size_t remaining;
125 grub_uint32_t *ptr;
126 grub_size_t cnt = 0;
127 grub_uint8_t *ret, *dest;
129 remaining = size;
130 ptr = src;
131 while (remaining--)
133 grub_uint32_t code = *ptr++;
135 if (code <= 0x007F)
136 cnt++;
137 else if (code <= 0x07FF)
138 cnt += 2;
139 else if ((code >= 0xDC00 && code <= 0xDFFF)
140 || (code >= 0xD800 && code <= 0xDBFF))
141 /* No surrogates in UCS-4... */
142 cnt++;
143 else
144 cnt += 3;
146 cnt++;
148 ret = grub_malloc (cnt);
149 if (!ret)
150 return 0;
152 dest = ret;
153 remaining = size;
154 ptr = src;
155 while (remaining--)
157 grub_uint32_t code = *ptr++;
159 if (code <= 0x007F)
160 *dest++ = code;
161 else if (code <= 0x07FF)
163 *dest++ = (code >> 6) | 0xC0;
164 *dest++ = (code & 0x3F) | 0x80;
166 else if ((code >= 0xDC00 && code <= 0xDFFF)
167 || (code >= 0xD800 && code <= 0xDBFF))
169 /* No surrogates in UCS-4... */
170 *dest++ = '?';
172 else
174 *dest++ = (code >> 12) | 0xE0;
175 *dest++ = ((code >> 6) & 0x3F) | 0x80;
176 *dest++ = (code & 0x3F) | 0x80;
179 *dest = 0;
181 return (char *) ret;
185 grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
187 grub_uint32_t code = 0;
188 int count = 0;
190 while (srcsize)
192 grub_uint32_t c = *src++;
193 if (srcsize != (grub_size_t)-1)
194 srcsize--;
195 if (count)
197 if ((c & 0xc0) != 0x80)
199 /* invalid */
200 return 0;
202 else
204 code <<= 6;
205 code |= (c & 0x3f);
206 count--;
209 else
211 if (c == 0)
212 break;
214 if ((c & 0x80) == 0x00)
215 code = c;
216 else if ((c & 0xe0) == 0xc0)
218 count = 1;
219 code = c & 0x1f;
221 else if ((c & 0xf0) == 0xe0)
223 count = 2;
224 code = c & 0x0f;
226 else if ((c & 0xf8) == 0xf0)
228 count = 3;
229 code = c & 0x07;
231 else if ((c & 0xfc) == 0xf8)
233 count = 4;
234 code = c & 0x03;
236 else if ((c & 0xfe) == 0xfc)
238 count = 5;
239 code = c & 0x01;
241 else
242 return 0;
246 return 1;
250 grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
251 grub_uint32_t **last_position)
253 grub_size_t msg_len = grub_strlen (msg);
255 *unicode_msg = grub_malloc (grub_strlen (msg) * sizeof (grub_uint32_t));
257 if (!*unicode_msg)
259 grub_printf ("utf8_to_ucs4 ERROR1: %s", msg);
260 return -1;
263 msg_len = grub_utf8_to_ucs4 (*unicode_msg, msg_len,
264 (grub_uint8_t *) msg, -1, 0);
266 *last_position = *unicode_msg + msg_len;
268 return msg_len;