BR 2781900: handle common labels while optimizing
[nasm.git] / strfunc.c
blob5929aae5e4d0a528370310a8b9847162458ad3f2
1 /*
2 * strfunc.c
4 * String transformation functions
5 */
7 #include "nasmlib.h"
8 #include "nasm.h"
11 * Convert a string in UTF-8 format to UTF-16LE
13 static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
15 #define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
17 size_t outlen = 0;
18 int expect = 0;
19 uint8_t c;
20 uint32_t v = 0, vmin = 0;
22 while (len--) {
23 c = *str++;
25 if (expect) {
26 if ((c & 0xc0) != 0x80) {
27 expect = 0;
28 return -1;
29 } else {
30 v = (v << 6) | (c & 0x3f);
31 if (!--expect) {
32 if (v < vmin || v > 0x10ffff ||
33 (v >= 0xd800 && v <= 0xdfff)) {
34 return -1;
35 } else if (v > 0xffff) {
36 v -= 0x10000;
37 EMIT(0xd800 | (v >> 10));
38 EMIT(0xdc00 | (v & 0x3ff));
39 } else {
40 EMIT(v);
43 continue;
47 if (c < 0x80) {
48 EMIT(c);
49 } else if (c < 0xc0 || c >= 0xfe) {
50 /* Invalid UTF-8 */
51 return -1;
52 } else if (c < 0xe0) {
53 v = c & 0x1f;
54 expect = 1;
55 vmin = 0x80;
56 } else if (c < 0xf0) {
57 v = c & 0x0f;
58 expect = 2;
59 vmin = 0x800;
60 } else if (c < 0xf8) {
61 v = c & 0x07;
62 expect = 3;
63 vmin = 0x10000;
64 } else if (c < 0xfc) {
65 v = c & 0x03;
66 expect = 4;
67 vmin = 0x200000;
68 } else {
69 v = c & 0x01;
70 expect = 5;
71 vmin = 0x4000000;
75 return expect ? (size_t)-1 : outlen << 1;
77 #undef EMIT
81 * Convert a string in UTF-8 format to UTF-32LE
83 static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
85 #define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
87 size_t outlen = 0;
88 int expect = 0;
89 uint8_t c;
90 uint32_t v = 0, vmin = 0;
92 while (len--) {
93 c = *str++;
95 if (expect) {
96 if ((c & 0xc0) != 0x80) {
97 return -1;
98 } else {
99 v = (v << 6) | (c & 0x3f);
100 if (!--expect) {
101 if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
102 return -1;
103 } else {
104 EMIT(v);
107 continue;
111 if (c < 0x80) {
112 EMIT(c);
113 } else if (c < 0xc0 || c >= 0xfe) {
114 /* Invalid UTF-8 */
115 return -1;
116 } else if (c < 0xe0) {
117 v = c & 0x1f;
118 expect = 1;
119 vmin = 0x80;
120 } else if (c < 0xf0) {
121 v = c & 0x0f;
122 expect = 2;
123 vmin = 0x800;
124 } else if (c < 0xf8) {
125 v = c & 0x07;
126 expect = 3;
127 vmin = 0x10000;
128 } else if (c < 0xfc) {
129 v = c & 0x03;
130 expect = 4;
131 vmin = 0x200000;
132 } else {
133 v = c & 0x01;
134 expect = 5;
135 vmin = 0x4000000;
139 return expect ? (size_t)-1 : outlen << 2;
141 #undef EMIT
144 typedef size_t (*transform_func)(uint8_t *, size_t, char *);
147 * Apply a specific string transform and return it in a nasm_malloc'd
148 * buffer, returning the length. On error, returns (size_t)-1 and no
149 * buffer is allocated.
151 size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
153 /* This should match enum strfunc in nasm.h */
154 static const transform_func str_transforms[] = {
155 utf8_to_16le,
156 utf8_to_32le,
158 transform_func transform = str_transforms[func];
159 size_t outlen;
160 uint8_t *s = (uint8_t *)str;
161 char *buf;
163 outlen = transform(s, len, NULL);
164 if (outlen == (size_t)-1)
165 return -1;
167 *out = buf = nasm_malloc(outlen+1);
168 buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
169 return transform(s, len, buf);