1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
37 * String transformation functions
44 * Convert a string in UTF-8 format to UTF-16LE
46 static size_t utf8_to_16le(uint8_t *str
, size_t len
, char *op
)
48 #define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
53 uint32_t v
= 0, vmin
= 0;
59 if ((c
& 0xc0) != 0x80) {
63 v
= (v
<< 6) | (c
& 0x3f);
65 if (v
< vmin
|| v
> 0x10ffff ||
66 (v
>= 0xd800 && v
<= 0xdfff)) {
68 } else if (v
> 0xffff) {
70 EMIT(0xd800 | (v
>> 10));
71 EMIT(0xdc00 | (v
& 0x3ff));
82 } else if (c
< 0xc0 || c
>= 0xfe) {
85 } else if (c
< 0xe0) {
89 } else if (c
< 0xf0) {
93 } else if (c
< 0xf8) {
97 } else if (c
< 0xfc) {
108 return expect
? (size_t)-1 : outlen
<< 1;
114 * Convert a string in UTF-8 format to UTF-16BE
116 static size_t utf8_to_16be(uint8_t *str
, size_t len
, char *op
)
122 WRITECHAR(op, _y >> 8); \
131 uint32_t v
= 0, vmin
= 0;
137 if ((c
& 0xc0) != 0x80) {
141 v
= (v
<< 6) | (c
& 0x3f);
143 if (v
< vmin
|| v
> 0x10ffff ||
144 (v
>= 0xd800 && v
<= 0xdfff)) {
146 } else if (v
> 0xffff) {
148 EMIT(0xdc00 | (v
& 0x3ff));
149 EMIT(0xd800 | (v
>> 10));
160 } else if (c
< 0xc0 || c
>= 0xfe) {
163 } else if (c
< 0xe0) {
167 } else if (c
< 0xf0) {
171 } else if (c
< 0xf8) {
175 } else if (c
< 0xfc) {
186 return expect
? (size_t)-1 : outlen
<< 1;
192 * Convert a string in UTF-8 format to UTF-32LE
194 static size_t utf8_to_32le(uint8_t *str
, size_t len
, char *op
)
196 #define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
201 uint32_t v
= 0, vmin
= 0;
207 if ((c
& 0xc0) != 0x80) {
210 v
= (v
<< 6) | (c
& 0x3f);
212 if (v
< vmin
|| (v
>= 0xd800 && v
<= 0xdfff)) {
224 } else if (c
< 0xc0 || c
>= 0xfe) {
227 } else if (c
< 0xe0) {
231 } else if (c
< 0xf0) {
235 } else if (c
< 0xf8) {
239 } else if (c
< 0xfc) {
250 return expect
? (size_t)-1 : outlen
<< 2;
256 * Convert a string in UTF-8 format to UTF-32BE
258 static size_t utf8_to_32be(uint8_t *str
, size_t len
, char *op
)
264 WRITECHAR(op,_y >> 24); \
265 WRITECHAR(op,_y >> 16); \
266 WRITECHAR(op,_y >> 8); \
275 uint32_t v
= 0, vmin
= 0;
281 if ((c
& 0xc0) != 0x80) {
284 v
= (v
<< 6) | (c
& 0x3f);
286 if (v
< vmin
|| (v
>= 0xd800 && v
<= 0xdfff)) {
298 } else if (c
< 0xc0 || c
>= 0xfe) {
301 } else if (c
< 0xe0) {
305 } else if (c
< 0xf0) {
309 } else if (c
< 0xf8) {
313 } else if (c
< 0xfc) {
324 return expect
? (size_t)-1 : outlen
<< 2;
329 typedef size_t (*transform_func
)(uint8_t *, size_t, char *);
332 * Apply a specific string transform and return it in a nasm_malloc'd
333 * buffer, returning the length. On error, returns (size_t)-1 and no
334 * buffer is allocated.
336 size_t string_transform(char *str
, size_t len
, char **out
, enum strfunc func
)
338 /* This should match enum strfunc in nasm.h */
339 static const transform_func str_transforms
[] = {
347 transform_func transform
= str_transforms
[func
];
349 uint8_t *s
= (uint8_t *)str
;
352 outlen
= transform(s
, len
, NULL
);
353 if (outlen
== (size_t)-1)
356 *out
= buf
= nasm_malloc(outlen
+1);
357 buf
[outlen
] = '\0'; /* Forcibly null-terminate the buffer */
358 return transform(s
, len
, buf
);