2 * libid3tag - ID3 tag manipulation library
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
35 * NAME: utf16->length()
36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string
38 id3_length_t
id3_utf16_length(id3_utf16_t
const *utf16
)
40 id3_length_t length
= 0;
43 if (utf16
[0] < 0xd800 || utf16
[0] > 0xdfff)
45 else if (utf16
[0] >= 0xd800 && utf16
[0] <= 0xdbff &&
46 utf16
[1] >= 0xdc00 && utf16
[1] <= 0xdfff) {
59 * DESCRIPTION: return the encoding size of a utf16 string
61 id3_length_t
id3_utf16_size(id3_utf16_t
const *utf16
)
63 id3_utf16_t
const *ptr
= utf16
;
68 return ptr
- utf16
+ 1;
72 * NAME: utf16->ucs4duplicate()
73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4
75 id3_ucs4_t
*id3_utf16_ucs4duplicate(id3_utf16_t
const *utf16
)
79 ucs4
= malloc((id3_utf16_length(utf16
) + 1) * sizeof(*ucs4
));
81 id3_utf16_decode(utf16
, ucs4
);
87 * NAME: utf16->decodechar()
88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char
90 id3_length_t
id3_utf16_decodechar(id3_utf16_t
const *utf16
, id3_ucs4_t
*ucs4
)
92 id3_utf16_t
const *start
= utf16
;
95 if (utf16
[0] < 0xd800 || utf16
[0] > 0xdfff) {
97 return utf16
- start
+ 1;
99 else if (utf16
[0] >= 0xd800 && utf16
[0] <= 0xdbff &&
100 utf16
[1] >= 0xdc00 && utf16
[1] <= 0xdfff) {
101 *ucs4
= (((utf16
[0] & 0x03ffL
) << 10) |
102 ((utf16
[1] & 0x03ffL
) << 0)) + 0x00010000L
;
103 return utf16
- start
+ 2;
111 * NAME: utf16->encodechar()
112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars
114 id3_length_t
id3_utf16_encodechar(id3_utf16_t
*utf16
, id3_ucs4_t ucs4
)
116 if (ucs4
< 0x00010000L
) {
121 else if (ucs4
< 0x00110000L
) {
124 utf16
[0] = ((ucs4
>> 10) & 0x3ff) | 0xd800;
125 utf16
[1] = ((ucs4
>> 0) & 0x3ff) | 0xdc00;
132 return id3_utf16_encodechar(utf16
, ID3_UCS4_REPLACEMENTCHAR
);
136 * NAME: utf16->decode()
137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string
139 void id3_utf16_decode(id3_utf16_t
const *utf16
, id3_ucs4_t
*ucs4
)
142 utf16
+= id3_utf16_decodechar(utf16
, ucs4
);
147 * NAME: utf16->encode()
148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string
150 void id3_utf16_encode(id3_utf16_t
*utf16
, id3_ucs4_t
const *ucs4
)
153 utf16
+= id3_utf16_encodechar(utf16
, *ucs4
);
159 * DESCRIPTION: serialize a single utf16 character
161 id3_length_t
id3_utf16_put(id3_byte_t
**ptr
, id3_utf16_t utf16
,
162 enum id3_utf16_byteorder byteorder
)
167 case ID3_UTF16_BYTEORDER_BE
:
168 (*ptr
)[0] = (utf16
>> 8) & 0xff;
169 (*ptr
)[1] = (utf16
>> 0) & 0xff;
172 case ID3_UTF16_BYTEORDER_LE
:
173 (*ptr
)[0] = (utf16
>> 0) & 0xff;
174 (*ptr
)[1] = (utf16
>> 8) & 0xff;
186 * DESCRIPTION: deserialize a single utf16 character
188 id3_utf16_t
id3_utf16_get(id3_byte_t
const **ptr
,
189 enum id3_utf16_byteorder byteorder
)
195 case ID3_UTF16_BYTEORDER_BE
:
201 case ID3_UTF16_BYTEORDER_LE
:
214 * NAME: utf16->serialize()
215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding
217 id3_length_t
id3_utf16_serialize(id3_byte_t
**ptr
, id3_ucs4_t
const *ucs4
,
218 enum id3_utf16_byteorder byteorder
,
221 id3_length_t size
= 0;
222 id3_utf16_t utf16
[2], *out
;
224 if (byteorder
== ID3_UTF16_BYTEORDER_ANY
)
225 size
+= id3_utf16_put(ptr
, 0xfeff, byteorder
);
228 switch (id3_utf16_encodechar(out
= utf16
, *ucs4
++)) {
229 case 2: size
+= id3_utf16_put(ptr
, *out
++, byteorder
);
230 case 1: size
+= id3_utf16_put(ptr
, *out
++, byteorder
);
236 size
+= id3_utf16_put(ptr
, 0, byteorder
);
242 * NAME: utf16->deserialize()
243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding
245 id3_ucs4_t
*id3_utf16_deserialize(id3_byte_t
const **ptr
, id3_length_t length
,
246 enum id3_utf16_byteorder byteorder
)
248 id3_byte_t
const *end
;
249 id3_utf16_t
*utf16ptr
, *utf16
;
252 end
= *ptr
+ (length
& ~1);
254 utf16
= malloc((length
/ 2 + 1) * sizeof(*utf16
));
258 if (byteorder
== ID3_UTF16_BYTEORDER_ANY
&& end
- *ptr
> 0) {
259 switch (((*ptr
)[0] << 8) |
262 byteorder
= ID3_UTF16_BYTEORDER_BE
;
267 byteorder
= ID3_UTF16_BYTEORDER_LE
;
274 while (end
- *ptr
> 0 && (*utf16ptr
= id3_utf16_get(ptr
, byteorder
)))
279 ucs4
= malloc((id3_utf16_length(utf16
) + 1) * sizeof(*ucs4
));
281 id3_utf16_decode(utf16
, ucs4
);