output: remove ABSOLUTE handling, OUT_RAWDATA asserts
[nasm.git] / asm / quote.c
blob75a937264c8140e3e37fc74e23160d198fb4b126
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2016 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * quote.c
38 #include "compiler.h"
40 #include <stdlib.h>
42 #include "nasmlib.h"
43 #include "quote.h"
45 char *nasm_quote(const char *str, size_t len)
47 const char *p, *ep;
48 char c, c1, *q, *nstr;
49 unsigned char uc;
50 bool sq_ok, dq_ok;
51 size_t qlen;
53 sq_ok = dq_ok = true;
54 ep = str+len;
55 qlen = 0; /* Length if we need `...` quotes */
56 for (p = str; p < ep; p++) {
57 c = *p;
58 switch (c) {
59 case '\'':
60 sq_ok = false;
61 qlen++;
62 break;
63 case '\"':
64 dq_ok = false;
65 qlen++;
66 break;
67 case '`':
68 case '\\':
69 qlen += 2;
70 break;
71 default:
72 if (c < ' ' || c > '~') {
73 sq_ok = dq_ok = false;
74 switch (c) {
75 case '\a':
76 case '\b':
77 case '\t':
78 case '\n':
79 case '\v':
80 case '\f':
81 case '\r':
82 case 27:
83 qlen += 2;
84 break;
85 default:
86 c1 = (p+1 < ep) ? p[1] : 0;
87 if (c1 >= '0' && c1 <= '7')
88 uc = 0377; /* Must use the full form */
89 else
90 uc = c;
91 if (uc > 077)
92 qlen++;
93 if (uc > 07)
94 qlen++;
95 qlen += 2;
96 break;
98 } else {
99 qlen++;
101 break;
105 if (sq_ok || dq_ok) {
106 /* Use '...' or "..." */
107 nstr = nasm_malloc(len+3);
108 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
109 nstr[len+2] = '\0';
110 if (len > 0)
111 memcpy(nstr+1, str, len);
112 } else {
113 /* Need to use `...` quoted syntax */
114 nstr = nasm_malloc(qlen+3);
115 q = nstr;
116 *q++ = '`';
117 for (p = str; p < ep; p++) {
118 c = *p;
119 switch (c) {
120 case '`':
121 case '\\':
122 *q++ = '\\';
123 *q++ = c;
124 break;
125 case 7:
126 *q++ = '\\';
127 *q++ = 'a';
128 break;
129 case 8:
130 *q++ = '\\';
131 *q++ = 'b';
132 break;
133 case 9:
134 *q++ = '\\';
135 *q++ = 't';
136 break;
137 case 10:
138 *q++ = '\\';
139 *q++ = 'n';
140 break;
141 case 11:
142 *q++ = '\\';
143 *q++ = 'v';
144 break;
145 case 12:
146 *q++ = '\\';
147 *q++ = 'f';
148 break;
149 case 13:
150 *q++ = '\\';
151 *q++ = 'r';
152 break;
153 case 27:
154 *q++ = '\\';
155 *q++ = 'e';
156 break;
157 default:
158 if (c < ' ' || c > '~') {
159 c1 = (p+1 < ep) ? p[1] : 0;
160 if (c1 >= '0' && c1 <= '7')
161 uc = 0377; /* Must use the full form */
162 else
163 uc = c;
164 *q++ = '\\';
165 if (uc > 077)
166 *q++ = ((unsigned char)c >> 6) + '0';
167 if (uc > 07)
168 *q++ = (((unsigned char)c >> 3) & 7) + '0';
169 *q++ = ((unsigned char)c & 7) + '0';
170 break;
171 } else {
172 *q++ = c;
174 break;
177 *q++ = '`';
178 *q++ = '\0';
179 nasm_assert((size_t)(q-nstr) == qlen+3);
181 return nstr;
184 static char *emit_utf8(char *q, int32_t v)
186 if (v < 0) {
187 /* Impossible - do nothing */
188 } else if (v <= 0x7f) {
189 *q++ = v;
190 } else if (v <= 0x000007ff) {
191 *q++ = 0xc0 | (v >> 6);
192 *q++ = 0x80 | (v & 63);
193 } else if (v <= 0x0000ffff) {
194 *q++ = 0xe0 | (v >> 12);
195 *q++ = 0x80 | ((v >> 6) & 63);
196 *q++ = 0x80 | (v & 63);
197 } else if (v <= 0x001fffff) {
198 *q++ = 0xf0 | (v >> 18);
199 *q++ = 0x80 | ((v >> 12) & 63);
200 *q++ = 0x80 | ((v >> 6) & 63);
201 *q++ = 0x80 | (v & 63);
202 } else if (v <= 0x03ffffff) {
203 *q++ = 0xf8 | (v >> 24);
204 *q++ = 0x80 | ((v >> 18) & 63);
205 *q++ = 0x80 | ((v >> 12) & 63);
206 *q++ = 0x80 | ((v >> 6) & 63);
207 *q++ = 0x80 | (v & 63);
208 } else {
209 *q++ = 0xfc | (v >> 30);
210 *q++ = 0x80 | ((v >> 24) & 63);
211 *q++ = 0x80 | ((v >> 18) & 63);
212 *q++ = 0x80 | ((v >> 12) & 63);
213 *q++ = 0x80 | ((v >> 6) & 63);
214 *q++ = 0x80 | (v & 63);
216 return q;
220 * Do an *in-place* dequoting of the specified string, returning the
221 * resulting length (which may be containing embedded nulls.)
223 * In-place replacement is possible since the unquoted length is always
224 * shorter than or equal to the quoted length.
226 * *ep points to the final quote, or to the null if improperly quoted.
228 size_t nasm_unquote(char *str, char **ep)
230 char bq;
231 char *p, *q;
232 char *escp = NULL;
233 char c;
234 enum unq_state {
235 st_start,
236 st_backslash,
237 st_hex,
238 st_oct,
239 st_ucs
240 } state;
241 int ndig = 0;
242 int32_t nval = 0;
244 p = q = str;
246 bq = *p++;
247 if (!bq)
248 return 0;
250 switch (bq) {
251 case '\'':
252 case '\"':
253 /* '...' or "..." string */
254 while ((c = *p) && c != bq) {
255 p++;
256 *q++ = c;
258 *q = '\0';
259 break;
261 case '`':
262 /* `...` string */
263 state = st_start;
265 while ((c = *p)) {
266 p++;
267 switch (state) {
268 case st_start:
269 switch (c) {
270 case '\\':
271 state = st_backslash;
272 break;
273 case '`':
274 p--;
275 goto out;
276 default:
277 *q++ = c;
278 break;
280 break;
282 case st_backslash:
283 state = st_start;
284 escp = p; /* Beginning of argument sequence */
285 nval = 0;
286 switch (c) {
287 case 'a':
288 *q++ = 7;
289 break;
290 case 'b':
291 *q++ = 8;
292 break;
293 case 'e':
294 *q++ = 27;
295 break;
296 case 'f':
297 *q++ = 12;
298 break;
299 case 'n':
300 *q++ = 10;
301 break;
302 case 'r':
303 *q++ = 13;
304 break;
305 case 't':
306 *q++ = 9;
307 break;
308 case 'u':
309 state = st_ucs;
310 ndig = 4;
311 break;
312 case 'U':
313 state = st_ucs;
314 ndig = 8;
315 break;
316 case 'v':
317 *q++ = 11;
318 break;
319 case 'x':
320 case 'X':
321 state = st_hex;
322 ndig = 2;
323 break;
324 case '0':
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 state = st_oct;
333 ndig = 2; /* Up to two more digits */
334 nval = c - '0';
335 break;
336 default:
337 *q++ = c;
338 break;
340 break;
342 case st_oct:
343 if (c >= '0' && c <= '7') {
344 nval = (nval << 3) + (c - '0');
345 if (!--ndig) {
346 *q++ = nval;
347 state = st_start;
349 } else {
350 p--; /* Process this character again */
351 *q++ = nval;
352 state = st_start;
354 break;
356 case st_hex:
357 if ((c >= '0' && c <= '9') ||
358 (c >= 'A' && c <= 'F') ||
359 (c >= 'a' && c <= 'f')) {
360 nval = (nval << 4) + numvalue(c);
361 if (!--ndig) {
362 *q++ = nval;
363 state = st_start;
365 } else {
366 p--; /* Process this character again */
367 *q++ = (p > escp) ? nval : escp[-1];
368 state = st_start;
370 break;
372 case st_ucs:
373 if ((c >= '0' && c <= '9') ||
374 (c >= 'A' && c <= 'F') ||
375 (c >= 'a' && c <= 'f')) {
376 nval = (nval << 4) + numvalue(c);
377 if (!--ndig) {
378 q = emit_utf8(q, nval);
379 state = st_start;
381 } else {
382 p--; /* Process this character again */
383 if (p > escp)
384 q = emit_utf8(q, nval);
385 else
386 *q++ = escp[-1];
387 state = st_start;
389 break;
392 switch (state) {
393 case st_start:
394 case st_backslash:
395 break;
396 case st_oct:
397 *q++ = nval;
398 break;
399 case st_hex:
400 *q++ = (p > escp) ? nval : escp[-1];
401 break;
402 case st_ucs:
403 if (p > escp)
404 q = emit_utf8(q, nval);
405 else
406 *q++ = escp[-1];
407 break;
409 out:
410 break;
412 default:
413 /* Not a quoted string, just return the input... */
414 p = q = strchr(str, '\0');
415 break;
418 if (ep)
419 *ep = p;
420 return q-str;
424 * Find the end of a quoted string; returns the pointer to the terminating
425 * character (either the ending quote or the null character, if unterminated.)
427 char *nasm_skip_string(char *str)
429 char bq;
430 char *p;
431 char c;
432 enum unq_state {
433 st_start,
434 st_backslash
435 } state;
437 bq = str[0];
438 if (bq == '\'' || bq == '\"') {
439 /* '...' or "..." string */
440 for (p = str+1; *p && *p != bq; p++)
442 return p;
443 } else if (bq == '`') {
444 /* `...` string */
445 state = st_start;
446 p = str+1;
447 if (!*p)
448 return p;
450 while ((c = *p++)) {
451 switch (state) {
452 case st_start:
453 switch (c) {
454 case '\\':
455 state = st_backslash;
456 break;
457 case '`':
458 return p-1; /* Found the end */
459 default:
460 break;
462 break;
464 case st_backslash:
466 * Note: for the purpose of finding the end of the string,
467 * all successor states to st_backslash are functionally
468 * equivalent to st_start, since either a backslash or
469 * a backquote will force a return to the st_start state.
471 state = st_start;
472 break;
475 return p-1; /* Unterminated string... */
476 } else {
477 return str; /* Not a string... */