NASM 2.06.01rc1
[nasm/sigaren-mirror.git] / quote.c
blob3aca4403f460ca3447d3f48b68f1346c7748b07e
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * quote.c
38 #include "compiler.h"
40 #include <assert.h>
41 #include <stdlib.h>
43 #include "nasmlib.h"
44 #include "quote.h"
46 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
48 char *nasm_quote(char *str, size_t len)
50 char c, c1, *p, *q, *nstr, *ep;
51 bool sq_ok, dq_ok;
52 size_t qlen;
54 sq_ok = dq_ok = true;
55 ep = str+len;
56 qlen = 0; /* Length if we need `...` quotes */
57 for (p = str; p < ep; p++) {
58 c = *p;
59 switch (c) {
60 case '\'':
61 sq_ok = false;
62 qlen++;
63 break;
64 case '\"':
65 dq_ok = false;
66 qlen++;
67 break;
68 case '`':
69 case '\\':
70 qlen += 2;
71 break;
72 default:
73 if (c < ' ' || c > '~') {
74 sq_ok = dq_ok = false;
75 switch (c) {
76 case '\a':
77 case '\b':
78 case '\t':
79 case '\n':
80 case '\v':
81 case '\f':
82 case '\r':
83 case 27:
84 qlen += 2;
85 break;
86 default:
87 c1 = (p+1 < ep) ? p[1] : 0;
88 if (c > 077 || (c1 >= '0' && c1 <= '7'))
89 qlen += 4; /* Must use the full form */
90 else if (c > 07)
91 qlen += 3;
92 else
93 qlen += 2;
94 break;
96 } else {
97 qlen++;
99 break;
103 if (sq_ok || dq_ok) {
104 /* Use '...' or "..." */
105 nstr = nasm_malloc(len+3);
106 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
107 nstr[len+2] = '\0';
108 memcpy(nstr+1, str, len);
109 } else {
110 /* Need to use `...` quoted syntax */
111 nstr = nasm_malloc(qlen+3);
112 q = nstr;
113 *q++ = '`';
114 for (p = str; p < ep; p++) {
115 c = *p;
116 switch (c) {
117 case '`':
118 case '\\':
119 *q++ = '\\';
120 *q++ = c;
121 break;
122 case 7:
123 *q++ = '\\';
124 *q++ = 'a';
125 break;
126 case 8:
127 *q++ = '\\';
128 *q++ = 'b';
129 break;
130 case 9:
131 *q++ = '\\';
132 *q++ = 't';
133 break;
134 case 10:
135 *q++ = '\\';
136 *q++ = 'n';
137 break;
138 case 11:
139 *q++ = '\\';
140 *q++ = 'v';
141 break;
142 case 12:
143 *q++ = '\\';
144 *q++ = 'f';
145 break;
146 case 13:
147 *q++ = '\\';
148 *q++ = 'r';
149 break;
150 case 27:
151 *q++ = '\\';
152 *q++ = 'e';
153 break;
154 default:
155 if (c < ' ' || c > '~') {
156 c1 = (p+1 < ep) ? p[1] : 0;
157 if (c1 >= '0' && c1 <= '7')
158 q += sprintf(q, "\\%03o", (unsigned char)c);
159 else
160 q += sprintf(q, "\\%o", (unsigned char)c);
161 } else {
162 *q++ = c;
164 break;
167 *q++ = '`';
168 *q++ = '\0';
169 assert((size_t)(q-nstr) == qlen+3);
171 return nstr;
174 static char *emit_utf8(char *q, int32_t v)
176 if (v < 0) {
177 /* Impossible - do nothing */
178 } else if (v <= 0x7f) {
179 *q++ = v;
180 } else if (v <= 0x000007ff) {
181 *q++ = 0xc0 | (v >> 6);
182 *q++ = 0x80 | (v & 63);
183 } else if (v <= 0x0000ffff) {
184 *q++ = 0xe0 | (v >> 12);
185 *q++ = 0x80 | ((v >> 6) & 63);
186 *q++ = 0x80 | (v & 63);
187 } else if (v <= 0x001fffff) {
188 *q++ = 0xf0 | (v >> 18);
189 *q++ = 0x80 | ((v >> 12) & 63);
190 *q++ = 0x80 | ((v >> 6) & 63);
191 *q++ = 0x80 | (v & 63);
192 } else if (v <= 0x03ffffff) {
193 *q++ = 0xf8 | (v >> 24);
194 *q++ = 0x80 | ((v >> 18) & 63);
195 *q++ = 0x80 | ((v >> 12) & 63);
196 *q++ = 0x80 | ((v >> 6) & 63);
197 *q++ = 0x80 | (v & 63);
198 } else {
199 *q++ = 0xfc | (v >> 30);
200 *q++ = 0x80 | ((v >> 24) & 63);
201 *q++ = 0x80 | ((v >> 18) & 63);
202 *q++ = 0x80 | ((v >> 12) & 63);
203 *q++ = 0x80 | ((v >> 6) & 63);
204 *q++ = 0x80 | (v & 63);
206 return q;
210 * Do an *in-place* dequoting of the specified string, returning the
211 * resulting length (which may be containing embedded nulls.)
213 * In-place replacement is possible since the unquoted length is always
214 * shorter than or equal to the quoted length.
216 * *ep points to the final quote, or to the null if improperly quoted.
218 size_t nasm_unquote(char *str, char **ep)
220 char bq;
221 char *p, *q;
222 char *escp = NULL;
223 char c;
224 enum unq_state {
225 st_start,
226 st_backslash,
227 st_hex,
228 st_oct,
229 st_ucs,
230 } state;
231 int ndig = 0;
232 int32_t nval = 0;
234 p = q = str;
236 bq = *p++;
237 if (!bq)
238 return 0;
240 switch (bq) {
241 case '\'':
242 case '\"':
243 /* '...' or "..." string */
244 while ((c = *p) && c != bq) {
245 p++;
246 *q++ = c;
248 *q = '\0';
249 break;
251 case '`':
252 /* `...` string */
253 state = st_start;
255 while ((c = *p)) {
256 p++;
257 switch (state) {
258 case st_start:
259 switch (c) {
260 case '\\':
261 state = st_backslash;
262 break;
263 case '`':
264 p--;
265 goto out;
266 default:
267 *q++ = c;
268 break;
270 break;
272 case st_backslash:
273 state = st_start;
274 escp = p; /* Beginning of argument sequence */
275 nval = 0;
276 switch (c) {
277 case 'a':
278 *q++ = 7;
279 break;
280 case 'b':
281 *q++ = 8;
282 break;
283 case 'e':
284 *q++ = 27;
285 break;
286 case 'f':
287 *q++ = 12;
288 break;
289 case 'n':
290 *q++ = 10;
291 break;
292 case 'r':
293 *q++ = 13;
294 break;
295 case 't':
296 *q++ = 9;
297 break;
298 case 'u':
299 state = st_ucs;
300 ndig = 4;
301 break;
302 case 'U':
303 state = st_ucs;
304 ndig = 8;
305 break;
306 case 'v':
307 *q++ = 11;
308 break;
309 case 'x':
310 case 'X':
311 state = st_hex;
312 ndig = 2;
313 break;
314 case '0':
315 case '1':
316 case '2':
317 case '3':
318 case '4':
319 case '5':
320 case '6':
321 case '7':
322 state = st_oct;
323 ndig = 2; /* Up to two more digits */
324 nval = c - '0';
325 break;
326 default:
327 *q++ = c;
328 break;
330 break;
332 case st_oct:
333 if (c >= '0' && c <= '7') {
334 nval = (nval << 3) + (c - '0');
335 if (!--ndig) {
336 *q++ = nval;
337 state = st_start;
339 } else {
340 p--; /* Process this character again */
341 *q++ = nval;
342 state = st_start;
344 break;
346 case st_hex:
347 if ((c >= '0' && c <= '9') ||
348 (c >= 'A' && c <= 'F') ||
349 (c >= 'a' && c <= 'f')) {
350 nval = (nval << 4) + numvalue(c);
351 if (!--ndig) {
352 *q++ = nval;
353 state = st_start;
355 } else {
356 p--; /* Process this character again */
357 *q++ = (p > escp) ? nval : escp[-1];
358 state = st_start;
360 break;
362 case st_ucs:
363 if ((c >= '0' && c <= '9') ||
364 (c >= 'A' && c <= 'F') ||
365 (c >= 'a' && c <= 'f')) {
366 nval = (nval << 4) + numvalue(c);
367 if (!--ndig) {
368 q = emit_utf8(q, nval);
369 state = st_start;
371 } else {
372 p--; /* Process this character again */
373 if (p > escp)
374 q = emit_utf8(q, nval);
375 else
376 *q++ = escp[-1];
377 state = st_start;
379 break;
382 switch (state) {
383 case st_start:
384 case st_backslash:
385 break;
386 case st_oct:
387 *q++ = nval;
388 break;
389 case st_hex:
390 *q++ = (p > escp) ? nval : escp[-1];
391 break;
392 case st_ucs:
393 if (p > escp)
394 q = emit_utf8(q, nval);
395 else
396 *q++ = escp[-1];
397 break;
399 out:
400 break;
402 default:
403 /* Not a quoted string, just return the input... */
404 p = q = strchr(str, '\0');
405 break;
408 if (ep)
409 *ep = p;
410 return q-str;
414 * Find the end of a quoted string; returns the pointer to the terminating
415 * character (either the ending quote or the null character, if unterminated.)
417 char *nasm_skip_string(char *str)
419 char bq;
420 char *p;
421 char c;
422 enum unq_state {
423 st_start,
424 st_backslash,
425 } state;
427 bq = str[0];
428 if (bq == '\'' || bq == '\"') {
429 /* '...' or "..." string */
430 for (p = str+1; *p && *p != bq; p++)
432 return p;
433 } else if (bq == '`') {
434 /* `...` string */
435 p = str+1;
436 state = st_start;
438 while ((c = *p++)) {
439 switch (state) {
440 case st_start:
441 switch (c) {
442 case '\\':
443 state = st_backslash;
444 break;
445 case '`':
446 return p-1; /* Found the end */
447 default:
448 break;
450 break;
452 case st_backslash:
454 * Note: for the purpose of finding the end of the string,
455 * all successor states to st_backslash are functionally
456 * equivalent to st_start, since either a backslash or
457 * a backquote will force a return to the st_start state.
459 state = st_start;
460 break;
463 return p; /* Unterminated string... */
464 } else {
465 return str; /* Not a string... */