Add.
[gsasl.git] / gl / quotearg.c
blobf7f326ac5017475d76046d39962aa0b48f3ad09e
1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
22 #include <config.h>
24 #include "quotearg.h"
26 #include "xalloc.h"
28 #include <ctype.h>
29 #include <errno.h>
30 #include <limits.h>
31 #include <stdbool.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wchar.h>
35 #include <wctype.h>
37 #include "gettext.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
41 #if !HAVE_MBRTOWC
42 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
43 other macros are defined only for documentation and to satisfy C
44 syntax. */
45 # undef MB_CUR_MAX
46 # define MB_CUR_MAX 1
47 # undef mbstate_t
48 # define mbstate_t int
49 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
50 # define iswprint(wc) isprint ((unsigned char) (wc))
51 # undef HAVE_MBSINIT
52 #endif
54 #if !defined mbsinit && !HAVE_MBSINIT
55 # define mbsinit(ps) 1
56 #endif
58 #ifndef SIZE_MAX
59 # define SIZE_MAX ((size_t) -1)
60 #endif
62 #define INT_BITS (sizeof (int) * CHAR_BIT)
64 struct quoting_options
66 /* Basic quoting style. */
67 enum quoting_style style;
69 /* Quote the characters indicated by this bit vector even if the
70 quoting style would not normally require them to be quoted. */
71 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
74 /* Names of quoting styles. */
75 char const *const quoting_style_args[] =
77 "literal",
78 "shell",
79 "shell-always",
80 "c",
81 "escape",
82 "locale",
83 "clocale",
87 /* Correspondences to quoting style names. */
88 enum quoting_style const quoting_style_vals[] =
90 literal_quoting_style,
91 shell_quoting_style,
92 shell_always_quoting_style,
93 c_quoting_style,
94 escape_quoting_style,
95 locale_quoting_style,
96 clocale_quoting_style
99 /* The default quoting options. */
100 static struct quoting_options default_quoting_options;
102 /* Allocate a new set of quoting options, with contents initially identical
103 to O if O is not null, or to the default if O is null.
104 It is the caller's responsibility to free the result. */
105 struct quoting_options *
106 clone_quoting_options (struct quoting_options *o)
108 int e = errno;
109 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
110 sizeof *o);
111 errno = e;
112 return p;
115 /* Get the value of O's quoting style. If O is null, use the default. */
116 enum quoting_style
117 get_quoting_style (struct quoting_options *o)
119 return (o ? o : &default_quoting_options)->style;
122 /* In O (or in the default if O is null),
123 set the value of the quoting style to S. */
124 void
125 set_quoting_style (struct quoting_options *o, enum quoting_style s)
127 (o ? o : &default_quoting_options)->style = s;
130 /* In O (or in the default if O is null),
131 set the value of the quoting options for character C to I.
132 Return the old value. Currently, the only values defined for I are
133 0 (the default) and 1 (which means to quote the character even if
134 it would not otherwise be quoted). */
136 set_char_quoting (struct quoting_options *o, char c, int i)
138 unsigned char uc = c;
139 unsigned int *p =
140 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
141 int shift = uc % INT_BITS;
142 int r = (*p >> shift) & 1;
143 *p ^= ((i & 1) ^ r) << shift;
144 return r;
147 /* MSGID approximates a quotation mark. Return its translation if it
148 has one; otherwise, return either it or "\"", depending on S. */
149 static char const *
150 gettext_quote (char const *msgid, enum quoting_style s)
152 char const *translation = _(msgid);
153 if (translation == msgid && s == clocale_quoting_style)
154 translation = "\"";
155 return translation;
158 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
159 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
160 non-quoting-style part of O to control quoting.
161 Terminate the output with a null character, and return the written
162 size of the output, not counting the terminating null.
163 If BUFFERSIZE is too small to store the output string, return the
164 value that would have been returned had BUFFERSIZE been large enough.
165 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
167 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
168 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
169 style specified by O, and O may not be null. */
171 static size_t
172 quotearg_buffer_restyled (char *buffer, size_t buffersize,
173 char const *arg, size_t argsize,
174 enum quoting_style quoting_style,
175 struct quoting_options const *o)
177 size_t i;
178 size_t len = 0;
179 char const *quote_string = 0;
180 size_t quote_string_len = 0;
181 bool backslash_escapes = false;
182 bool unibyte_locale = MB_CUR_MAX == 1;
184 #define STORE(c) \
185 do \
187 if (len < buffersize) \
188 buffer[len] = (c); \
189 len++; \
191 while (0)
193 switch (quoting_style)
195 case c_quoting_style:
196 STORE ('"');
197 backslash_escapes = true;
198 quote_string = "\"";
199 quote_string_len = 1;
200 break;
202 case escape_quoting_style:
203 backslash_escapes = true;
204 break;
206 case locale_quoting_style:
207 case clocale_quoting_style:
209 /* TRANSLATORS:
210 Get translations for open and closing quotation marks.
212 The message catalog should translate "`" to a left
213 quotation mark suitable for the locale, and similarly for
214 "'". If the catalog has no translation,
215 locale_quoting_style quotes `like this', and
216 clocale_quoting_style quotes "like this".
218 For example, an American English Unicode locale should
219 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
220 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
221 MARK). A British English Unicode locale should instead
222 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
223 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
225 If you don't know what to put here, please see
226 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
227 and use glyphs suitable for your language. */
229 char const *left = gettext_quote (N_("`"), quoting_style);
230 char const *right = gettext_quote (N_("'"), quoting_style);
231 for (quote_string = left; *quote_string; quote_string++)
232 STORE (*quote_string);
233 backslash_escapes = true;
234 quote_string = right;
235 quote_string_len = strlen (quote_string);
237 break;
239 case shell_always_quoting_style:
240 STORE ('\'');
241 quote_string = "'";
242 quote_string_len = 1;
243 break;
245 default:
246 break;
249 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
251 unsigned char c;
252 unsigned char esc;
254 if (backslash_escapes
255 && quote_string_len
256 && i + quote_string_len <= argsize
257 && memcmp (arg + i, quote_string, quote_string_len) == 0)
258 STORE ('\\');
260 c = arg[i];
261 switch (c)
263 case '\0':
264 if (backslash_escapes)
266 STORE ('\\');
267 STORE ('0');
268 STORE ('0');
269 c = '0';
271 break;
273 case '?':
274 switch (quoting_style)
276 case shell_quoting_style:
277 goto use_shell_always_quoting_style;
279 case c_quoting_style:
280 if (i + 2 < argsize && arg[i + 1] == '?')
281 switch (arg[i + 2])
283 case '!': case '\'':
284 case '(': case ')': case '-': case '/':
285 case '<': case '=': case '>':
286 /* Escape the second '?' in what would otherwise be
287 a trigraph. */
288 c = arg[i + 2];
289 i += 2;
290 STORE ('?');
291 STORE ('\\');
292 STORE ('?');
293 break;
295 default:
296 break;
298 break;
300 default:
301 break;
303 break;
305 case '\a': esc = 'a'; goto c_escape;
306 case '\b': esc = 'b'; goto c_escape;
307 case '\f': esc = 'f'; goto c_escape;
308 case '\n': esc = 'n'; goto c_and_shell_escape;
309 case '\r': esc = 'r'; goto c_and_shell_escape;
310 case '\t': esc = 't'; goto c_and_shell_escape;
311 case '\v': esc = 'v'; goto c_escape;
312 case '\\': esc = c; goto c_and_shell_escape;
314 c_and_shell_escape:
315 if (quoting_style == shell_quoting_style)
316 goto use_shell_always_quoting_style;
317 c_escape:
318 if (backslash_escapes)
320 c = esc;
321 goto store_escape;
323 break;
325 case '{': case '}': /* sometimes special if isolated */
326 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
327 break;
328 /* Fall through. */
329 case '#': case '~':
330 if (i != 0)
331 break;
332 /* Fall through. */
333 case ' ':
334 case '!': /* special in bash */
335 case '"': case '$': case '&':
336 case '(': case ')': case '*': case ';':
337 case '<':
338 case '=': /* sometimes special in 0th or (with "set -k") later args */
339 case '>': case '[':
340 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
341 case '`': case '|':
342 /* A shell special character. In theory, '$' and '`' could
343 be the first bytes of multibyte characters, which means
344 we should check them with mbrtowc, but in practice this
345 doesn't happen so it's not worth worrying about. */
346 if (quoting_style == shell_quoting_style)
347 goto use_shell_always_quoting_style;
348 break;
350 case '\'':
351 switch (quoting_style)
353 case shell_quoting_style:
354 goto use_shell_always_quoting_style;
356 case shell_always_quoting_style:
357 STORE ('\'');
358 STORE ('\\');
359 STORE ('\'');
360 break;
362 default:
363 break;
365 break;
367 case '%': case '+': case ',': case '-': case '.': case '/':
368 case '0': case '1': case '2': case '3': case '4': case '5':
369 case '6': case '7': case '8': case '9': case ':':
370 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
371 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
372 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
373 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
374 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
375 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
376 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
377 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
378 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
379 /* These characters don't cause problems, no matter what the
380 quoting style is. They cannot start multibyte sequences. */
381 break;
383 default:
384 /* If we have a multibyte sequence, copy it until we reach
385 its end, find an error, or come back to the initial shift
386 state. For C-like styles, if the sequence has
387 unprintable characters, escape the whole sequence, since
388 we can't easily escape single characters within it. */
390 /* Length of multibyte sequence found so far. */
391 size_t m;
393 bool printable;
395 if (unibyte_locale)
397 m = 1;
398 printable = isprint (c) != 0;
400 else
402 mbstate_t mbstate;
403 memset (&mbstate, 0, sizeof mbstate);
405 m = 0;
406 printable = true;
407 if (argsize == SIZE_MAX)
408 argsize = strlen (arg);
412 wchar_t w;
413 size_t bytes = mbrtowc (&w, &arg[i + m],
414 argsize - (i + m), &mbstate);
415 if (bytes == 0)
416 break;
417 else if (bytes == (size_t) -1)
419 printable = false;
420 break;
422 else if (bytes == (size_t) -2)
424 printable = false;
425 while (i + m < argsize && arg[i + m])
426 m++;
427 break;
429 else
431 /* Work around a bug with older shells that "see" a '\'
432 that is really the 2nd byte of a multibyte character.
433 In practice the problem is limited to ASCII
434 chars >= '@' that are shell special chars. */
435 if ('[' == 0x5b && quoting_style == shell_quoting_style)
437 size_t j;
438 for (j = 1; j < bytes; j++)
439 switch (arg[i + m + j])
441 case '[': case '\\': case '^':
442 case '`': case '|':
443 goto use_shell_always_quoting_style;
445 default:
446 break;
450 if (! iswprint (w))
451 printable = false;
452 m += bytes;
455 while (! mbsinit (&mbstate));
458 if (1 < m || (backslash_escapes && ! printable))
460 /* Output a multibyte sequence, or an escaped
461 unprintable unibyte character. */
462 size_t ilim = i + m;
464 for (;;)
466 if (backslash_escapes && ! printable)
468 STORE ('\\');
469 STORE ('0' + (c >> 6));
470 STORE ('0' + ((c >> 3) & 7));
471 c = '0' + (c & 7);
473 if (ilim <= i + 1)
474 break;
475 STORE (c);
476 c = arg[++i];
479 goto store_c;
484 if (! (backslash_escapes
485 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
486 goto store_c;
488 store_escape:
489 STORE ('\\');
491 store_c:
492 STORE (c);
495 if (i == 0 && quoting_style == shell_quoting_style)
496 goto use_shell_always_quoting_style;
498 if (quote_string)
499 for (; *quote_string; quote_string++)
500 STORE (*quote_string);
502 if (len < buffersize)
503 buffer[len] = '\0';
504 return len;
506 use_shell_always_quoting_style:
507 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
508 shell_always_quoting_style, o);
511 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
512 argument ARG (of size ARGSIZE), using O to control quoting.
513 If O is null, use the default.
514 Terminate the output with a null character, and return the written
515 size of the output, not counting the terminating null.
516 If BUFFERSIZE is too small to store the output string, return the
517 value that would have been returned had BUFFERSIZE been large enough.
518 If ARGSIZE is SIZE_MAX, use the string length of the argument for
519 ARGSIZE. */
520 size_t
521 quotearg_buffer (char *buffer, size_t buffersize,
522 char const *arg, size_t argsize,
523 struct quoting_options const *o)
525 struct quoting_options const *p = o ? o : &default_quoting_options;
526 int e = errno;
527 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
528 p->style, p);
529 errno = e;
530 return r;
533 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
534 allocated storage containing the quoted string. */
535 char *
536 quotearg_alloc (char const *arg, size_t argsize,
537 struct quoting_options const *o)
539 int e = errno;
540 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
541 char *buf = xcharalloc (bufsize);
542 quotearg_buffer (buf, bufsize, arg, argsize, o);
543 errno = e;
544 return buf;
547 /* A storage slot with size and pointer to a value. */
548 struct slotvec
550 size_t size;
551 char *val;
554 /* Preallocate a slot 0 buffer, so that the caller can always quote
555 one small component of a "memory exhausted" message in slot 0. */
556 static char slot0[256];
557 static unsigned int nslots = 1;
558 static struct slotvec slotvec0 = {sizeof slot0, slot0};
559 static struct slotvec *slotvec = &slotvec0;
561 void
562 quotearg_free (void)
564 struct slotvec *sv = slotvec;
565 unsigned int i;
566 for (i = 1; i < nslots; i++)
567 free (sv[i].val);
568 if (sv[0].val != slot0)
570 free (sv[0].val);
571 slotvec0.size = sizeof slot0;
572 slotvec0.val = slot0;
574 if (sv != &slotvec0)
576 free (sv);
577 slotvec = &slotvec0;
579 nslots = 1;
582 /* Use storage slot N to return a quoted version of argument ARG.
583 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
584 null-terminated string.
585 OPTIONS specifies the quoting options.
586 The returned value points to static storage that can be
587 reused by the next call to this function with the same value of N.
588 N must be nonnegative. N is deliberately declared with type "int"
589 to allow for future extensions (using negative values). */
590 static char *
591 quotearg_n_options (int n, char const *arg, size_t argsize,
592 struct quoting_options const *options)
594 int e = errno;
596 unsigned int n0 = n;
597 struct slotvec *sv = slotvec;
599 if (n < 0)
600 abort ();
602 if (nslots <= n0)
604 /* FIXME: technically, the type of n1 should be `unsigned int',
605 but that evokes an unsuppressible warning from gcc-4.0.1 and
606 older. If gcc ever provides an option to suppress that warning,
607 revert to the original type, so that the test in xalloc_oversized
608 is once again performed only at compile time. */
609 size_t n1 = n0 + 1;
610 bool preallocated = (sv == &slotvec0);
612 if (xalloc_oversized (n1, sizeof *sv))
613 xalloc_die ();
615 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
616 if (preallocated)
617 *sv = slotvec0;
618 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
619 nslots = n1;
623 size_t size = sv[n].size;
624 char *val = sv[n].val;
625 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
627 if (size <= qsize)
629 sv[n].size = size = qsize + 1;
630 if (val != slot0)
631 free (val);
632 sv[n].val = val = xcharalloc (size);
633 quotearg_buffer (val, size, arg, argsize, options);
636 errno = e;
637 return val;
641 char *
642 quotearg_n (int n, char const *arg)
644 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
647 char *
648 quotearg (char const *arg)
650 return quotearg_n (0, arg);
653 /* Return quoting options for STYLE, with no extra quoting. */
654 static struct quoting_options
655 quoting_options_from_style (enum quoting_style style)
657 struct quoting_options o;
658 o.style = style;
659 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
660 return o;
663 char *
664 quotearg_n_style (int n, enum quoting_style s, char const *arg)
666 struct quoting_options const o = quoting_options_from_style (s);
667 return quotearg_n_options (n, arg, SIZE_MAX, &o);
670 char *
671 quotearg_n_style_mem (int n, enum quoting_style s,
672 char const *arg, size_t argsize)
674 struct quoting_options const o = quoting_options_from_style (s);
675 return quotearg_n_options (n, arg, argsize, &o);
678 char *
679 quotearg_style (enum quoting_style s, char const *arg)
681 return quotearg_n_style (0, s, arg);
684 char *
685 quotearg_char (char const *arg, char ch)
687 struct quoting_options options;
688 options = default_quoting_options;
689 set_char_quoting (&options, ch, 1);
690 return quotearg_n_options (0, arg, SIZE_MAX, &options);
693 char *
694 quotearg_colon (char const *arg)
696 return quotearg_char (arg, ':');