Update .gitignore, remove bogus dependency
[nasm/autotest.git] / quote.c
blob6bfded3e3df274d6edf2fca950f7d3ad91261af9
1 /* quote.c library routines for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the license given in the file "LICENSE"
6 * distributed in the NASM archive.
7 */
9 #include "compiler.h"
11 #include <assert.h>
12 #include <stdlib.h>
14 #include "nasmlib.h"
15 #include "quote.h"
17 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
19 char *nasm_quote(char *str, size_t len)
21 char c, c1, *p, *q, *nstr, *ep;
22 bool sq_ok, dq_ok;
23 size_t qlen;
25 sq_ok = dq_ok = true;
26 ep = str+len;
27 qlen = 0; /* Length if we need `...` quotes */
28 for (p = str; p < ep; p++) {
29 c = *p;
30 switch (c) {
31 case '\'':
32 sq_ok = false;
33 qlen++;
34 break;
35 case '\"':
36 dq_ok = false;
37 qlen++;
38 break;
39 case '`':
40 case '\\':
41 qlen += 2;
42 break;
43 default:
44 if (c < ' ' || c > '~') {
45 sq_ok = dq_ok = false;
46 switch (c) {
47 case '\a':
48 case '\b':
49 case '\t':
50 case '\n':
51 case '\v':
52 case '\f':
53 case '\r':
54 case 27:
55 qlen += 2;
56 break;
57 default:
58 c1 = (p+1 < ep) ? p[1] : 0;
59 if (c > 077 || (c1 >= '0' && c1 <= '7'))
60 qlen += 4; /* Must use the full form */
61 else if (c > 07)
62 qlen += 3;
63 else
64 qlen += 2;
65 break;
67 } else {
68 qlen++;
70 break;
74 if (sq_ok || dq_ok) {
75 /* Use '...' or "..." */
76 nstr = nasm_malloc(len+3);
77 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
78 nstr[len+2] = '\0';
79 memcpy(nstr+1, str, len);
80 } else {
81 /* Need to use `...` quoted syntax */
82 nstr = nasm_malloc(qlen+3);
83 q = nstr;
84 *q++ = '`';
85 for (p = str; p < ep; p++) {
86 c = *p;
87 switch (c) {
88 case '`':
89 case '\\':
90 *q++ = '\\';
91 *q++ = c;
92 break;
93 case 7:
94 *q++ = '\\';
95 *q++ = 'a';
96 break;
97 case 8:
98 *q++ = '\\';
99 *q++ = 'b';
100 break;
101 case 9:
102 *q++ = '\\';
103 *q++ = 't';
104 break;
105 case 10:
106 *q++ = '\\';
107 *q++ = 'n';
108 break;
109 case 11:
110 *q++ = '\\';
111 *q++ = 'v';
112 break;
113 case 12:
114 *q++ = '\\';
115 *q++ = 'f';
116 break;
117 case 13:
118 *q++ = '\\';
119 *q++ = 'r';
120 break;
121 case 27:
122 *q++ = '\\';
123 *q++ = 'e';
124 break;
125 default:
126 if (c < ' ' || c > '~') {
127 c1 = (p+1 < ep) ? p[1] : 0;
128 if (c1 >= '0' && c1 <= '7')
129 q += sprintf(q, "\\%03o", (unsigned char)c);
130 else
131 q += sprintf(q, "\\%o", (unsigned char)c);
132 } else {
133 *q++ = c;
135 break;
138 *q++ = '`';
139 *q++ = '\0';
140 assert((size_t)(q-nstr) == qlen+3);
142 return nstr;
145 static char *emit_utf8(char *q, int32_t v)
147 if (v < 0) {
148 /* Impossible - do nothing */
149 } else if (v <= 0x7f) {
150 *q++ = v;
151 } else if (v <= 0x000007ff) {
152 *q++ = 0xc0 | (v >> 6);
153 *q++ = 0x80 | (v & 63);
154 } else if (v <= 0x0000ffff) {
155 *q++ = 0xe0 | (v >> 12);
156 *q++ = 0x80 | ((v >> 6) & 63);
157 *q++ = 0x80 | (v & 63);
158 } else if (v <= 0x001fffff) {
159 *q++ = 0xf0 | (v >> 18);
160 *q++ = 0x80 | ((v >> 12) & 63);
161 *q++ = 0x80 | ((v >> 6) & 63);
162 *q++ = 0x80 | (v & 63);
163 } else if (v <= 0x03ffffff) {
164 *q++ = 0xf8 | (v >> 24);
165 *q++ = 0x80 | ((v >> 18) & 63);
166 *q++ = 0x80 | ((v >> 12) & 63);
167 *q++ = 0x80 | ((v >> 6) & 63);
168 *q++ = 0x80 | (v & 63);
169 } else {
170 *q++ = 0xfc | (v >> 30);
171 *q++ = 0x80 | ((v >> 24) & 63);
172 *q++ = 0x80 | ((v >> 18) & 63);
173 *q++ = 0x80 | ((v >> 12) & 63);
174 *q++ = 0x80 | ((v >> 6) & 63);
175 *q++ = 0x80 | (v & 63);
177 return q;
181 * Do an *in-place* dequoting of the specified string, returning the
182 * resulting length (which may be containing embedded nulls.)
184 * In-place replacement is possible since the unquoted length is always
185 * shorter than or equal to the quoted length.
187 size_t nasm_unquote(char *str)
189 size_t ln;
190 char bq, eq;
191 char *p, *q, *ep;
192 char *escp = NULL;
193 char c;
194 enum unq_state {
195 st_start,
196 st_backslash,
197 st_hex,
198 st_oct,
199 st_ucs,
200 } state;
201 int ndig = 0;
202 int32_t nval = 0;
204 bq = str[0];
205 if (!bq)
206 return 0;
207 ln = strlen(str);
208 eq = str[ln-1];
210 if ((bq == '\'' || bq == '\"') && bq == eq) {
211 /* '...' or "..." string */
212 memmove(str, str+1, ln-2);
213 str[ln-2] = '\0';
214 return ln-2;
216 if (bq == '`' || eq == '`') {
217 /* `...` string */
218 q = str;
219 p = str+1;
220 ep = str+ln-1;
221 state = st_start;
223 while (p < ep) {
224 c = *p++;
225 switch (state) {
226 case st_start:
227 if (c == '\\')
228 state = st_backslash;
229 else
230 *q++ = c;
231 break;
233 case st_backslash:
234 state = st_start;
235 escp = p; /* Beginning of argument sequence */
236 nval = 0;
237 switch (c) {
238 case 'a':
239 *q++ = 7;
240 break;
241 case 'b':
242 *q++ = 8;
243 break;
244 case 'e':
245 *q++ = 27;
246 break;
247 case 'f':
248 *q++ = 12;
249 break;
250 case 'n':
251 *q++ = 10;
252 break;
253 case 'r':
254 *q++ = 13;
255 break;
256 case 't':
257 *q++ = 9;
258 break;
259 case 'u':
260 state = st_ucs;
261 ndig = 4;
262 break;
263 case 'U':
264 state = st_ucs;
265 ndig = 8;
266 break;
267 case 'v':
268 *q++ = 11;
269 break;
270 case 'x':
271 case 'X':
272 state = st_hex;
273 ndig = 2;
274 break;
275 case '0':
276 case '1':
277 case '2':
278 case '3':
279 case '4':
280 case '5':
281 case '6':
282 case '7':
283 state = st_oct;
284 ndig = 2; /* Up to two more digits */
285 nval = c - '0';
286 break;
287 default:
288 *q++ = c;
289 break;
291 break;
293 case st_oct:
294 if (c >= '0' && c <= '7') {
295 nval = (nval << 3) + (c - '0');
296 if (!--ndig) {
297 *q++ = nval;
298 state = st_start;
300 } else {
301 p--; /* Process this character again */
302 *q++ = nval;
303 state = st_start;
305 break;
307 case st_hex:
308 if ((c >= '0' && c <= '9') ||
309 (c >= 'A' && c <= 'F') ||
310 (c >= 'a' && c <= 'f')) {
311 nval = (nval << 4) + numvalue(c);
312 if (--ndig) {
313 *q++ = nval;
314 state = st_start;
316 } else {
317 p--; /* Process this character again */
318 *q++ = (p > escp) ? nval : escp[-1];
319 state = st_start;
321 break;
323 case st_ucs:
324 if ((c >= '0' && c <= '9') ||
325 (c >= 'A' && c <= 'F') ||
326 (c >= 'a' && c <= 'f')) {
327 nval = (nval << 4) + numvalue(c);
328 if (!--ndig) {
329 q = emit_utf8(q, nval);
330 state = st_start;
332 } else {
333 p--; /* Process this character again */
334 if (p > escp)
335 q = emit_utf8(q, nval);
336 else
337 *q++ = escp[-1];
338 state = st_start;
340 break;
343 switch (state) {
344 case st_start:
345 case st_backslash:
346 break;
347 case st_oct:
348 *q++ = nval;
349 break;
350 case st_hex:
351 *q++ = (p > escp) ? nval : escp[-1];
352 break;
353 case st_ucs:
354 if (p > escp)
355 q = emit_utf8(q, nval);
356 else
357 *q++ = escp[-1];
358 break;
360 *q = '\0';
361 return q-str;
364 /* Otherwise, just return the input... */
365 return ln;
369 * Find the end of a quoted string; returns the pointer to the terminating
370 * character (either the ending quote or the null character, if unterminated.)
372 char *nasm_skip_string(char *str)
374 char bq;
375 char *p;
376 char c;
377 enum unq_state {
378 st_start,
379 st_backslash,
380 } state;
382 bq = str[0];
383 if (bq == '\'' || bq == '\"') {
384 /* '...' or "..." string */
385 for (p = str+1; *p && *p != bq; p++)
387 return p;
388 } else if (bq == '`') {
389 /* `...` string */
390 p = str+1;
391 state = st_start;
393 while ((c = *p++)) {
394 switch (state) {
395 case st_start:
396 switch (c) {
397 case '\\':
398 state = st_backslash;
399 break;
400 case '`':
401 return p-1; /* Found the end */
402 default:
403 break;
405 break;
407 case st_backslash:
409 * Note: for the purpose of finding the end of the string,
410 * all successor states to st_backslash are functionally
411 * equivalent to st_start, since either a backslash or
412 * a backquote will force a return to the st_start state.
414 state = st_start;
415 break;
418 return p; /* Unterminated string... */
419 } else {
420 return str; /* Not a string... */