Add.
[gsasl.git] / gl / quotearg.c
blobc9e89bf28855b613874af41e834599a74611e892
1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
22 #include <config.h>
24 #include "quotearg.h"
26 #include "xalloc.h"
28 #include <ctype.h>
29 #include <errno.h>
30 #include <limits.h>
31 #include <stdbool.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wchar.h>
36 #include "gettext.h"
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
40 #if !HAVE_MBRTOWC
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
43 syntax. */
44 # undef MB_CUR_MAX
45 # define MB_CUR_MAX 1
46 # undef mbstate_t
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
50 # undef HAVE_MBSINIT
51 #endif
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
55 #endif
57 #include <wctype.h>
59 #ifndef SIZE_MAX
60 # define SIZE_MAX ((size_t) -1)
61 #endif
63 #define INT_BITS (sizeof (int) * CHAR_BIT)
65 struct quoting_options
67 /* Basic quoting style. */
68 enum quoting_style style;
70 /* Quote the characters indicated by this bit vector even if the
71 quoting style would not normally require them to be quoted. */
72 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
75 /* Names of quoting styles. */
76 char const *const quoting_style_args[] =
78 "literal",
79 "shell",
80 "shell-always",
81 "c",
82 "escape",
83 "locale",
84 "clocale",
88 /* Correspondences to quoting style names. */
89 enum quoting_style const quoting_style_vals[] =
91 literal_quoting_style,
92 shell_quoting_style,
93 shell_always_quoting_style,
94 c_quoting_style,
95 escape_quoting_style,
96 locale_quoting_style,
97 clocale_quoting_style
100 /* The default quoting options. */
101 static struct quoting_options default_quoting_options;
103 /* Allocate a new set of quoting options, with contents initially identical
104 to O if O is not null, or to the default if O is null.
105 It is the caller's responsibility to free the result. */
106 struct quoting_options *
107 clone_quoting_options (struct quoting_options *o)
109 int e = errno;
110 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
111 sizeof *o);
112 errno = e;
113 return p;
116 /* Get the value of O's quoting style. If O is null, use the default. */
117 enum quoting_style
118 get_quoting_style (struct quoting_options *o)
120 return (o ? o : &default_quoting_options)->style;
123 /* In O (or in the default if O is null),
124 set the value of the quoting style to S. */
125 void
126 set_quoting_style (struct quoting_options *o, enum quoting_style s)
128 (o ? o : &default_quoting_options)->style = s;
131 /* In O (or in the default if O is null),
132 set the value of the quoting options for character C to I.
133 Return the old value. Currently, the only values defined for I are
134 0 (the default) and 1 (which means to quote the character even if
135 it would not otherwise be quoted). */
137 set_char_quoting (struct quoting_options *o, char c, int i)
139 unsigned char uc = c;
140 unsigned int *p =
141 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
142 int shift = uc % INT_BITS;
143 int r = (*p >> shift) & 1;
144 *p ^= ((i & 1) ^ r) << shift;
145 return r;
148 /* MSGID approximates a quotation mark. Return its translation if it
149 has one; otherwise, return either it or "\"", depending on S. */
150 static char const *
151 gettext_quote (char const *msgid, enum quoting_style s)
153 char const *translation = _(msgid);
154 if (translation == msgid && s == clocale_quoting_style)
155 translation = "\"";
156 return translation;
159 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
160 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
161 non-quoting-style part of O to control quoting.
162 Terminate the output with a null character, and return the written
163 size of the output, not counting the terminating null.
164 If BUFFERSIZE is too small to store the output string, return the
165 value that would have been returned had BUFFERSIZE been large enough.
166 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
168 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
169 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
170 style specified by O, and O may not be null. */
172 static size_t
173 quotearg_buffer_restyled (char *buffer, size_t buffersize,
174 char const *arg, size_t argsize,
175 enum quoting_style quoting_style,
176 struct quoting_options const *o)
178 size_t i;
179 size_t len = 0;
180 char const *quote_string = 0;
181 size_t quote_string_len = 0;
182 bool backslash_escapes = false;
183 bool unibyte_locale = MB_CUR_MAX == 1;
185 #define STORE(c) \
186 do \
188 if (len < buffersize) \
189 buffer[len] = (c); \
190 len++; \
192 while (0)
194 switch (quoting_style)
196 case c_quoting_style:
197 STORE ('"');
198 backslash_escapes = true;
199 quote_string = "\"";
200 quote_string_len = 1;
201 break;
203 case escape_quoting_style:
204 backslash_escapes = true;
205 break;
207 case locale_quoting_style:
208 case clocale_quoting_style:
210 /* TRANSLATORS:
211 Get translations for open and closing quotation marks.
213 The message catalog should translate "`" to a left
214 quotation mark suitable for the locale, and similarly for
215 "'". If the catalog has no translation,
216 locale_quoting_style quotes `like this', and
217 clocale_quoting_style quotes "like this".
219 For example, an American English Unicode locale should
220 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
221 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
222 MARK). A British English Unicode locale should instead
223 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
224 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
226 If you don't know what to put here, please see
227 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
228 and use glyphs suitable for your language. */
230 char const *left = gettext_quote (N_("`"), quoting_style);
231 char const *right = gettext_quote (N_("'"), quoting_style);
232 for (quote_string = left; *quote_string; quote_string++)
233 STORE (*quote_string);
234 backslash_escapes = true;
235 quote_string = right;
236 quote_string_len = strlen (quote_string);
238 break;
240 case shell_always_quoting_style:
241 STORE ('\'');
242 quote_string = "'";
243 quote_string_len = 1;
244 break;
246 default:
247 break;
250 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
252 unsigned char c;
253 unsigned char esc;
255 if (backslash_escapes
256 && quote_string_len
257 && i + quote_string_len <= argsize
258 && memcmp (arg + i, quote_string, quote_string_len) == 0)
259 STORE ('\\');
261 c = arg[i];
262 switch (c)
264 case '\0':
265 if (backslash_escapes)
267 STORE ('\\');
268 STORE ('0');
269 STORE ('0');
270 c = '0';
272 break;
274 case '?':
275 switch (quoting_style)
277 case shell_quoting_style:
278 goto use_shell_always_quoting_style;
280 case c_quoting_style:
281 if (i + 2 < argsize && arg[i + 1] == '?')
282 switch (arg[i + 2])
284 case '!': case '\'':
285 case '(': case ')': case '-': case '/':
286 case '<': case '=': case '>':
287 /* Escape the second '?' in what would otherwise be
288 a trigraph. */
289 c = arg[i + 2];
290 i += 2;
291 STORE ('?');
292 STORE ('\\');
293 STORE ('?');
294 break;
296 default:
297 break;
299 break;
301 default:
302 break;
304 break;
306 case '\a': esc = 'a'; goto c_escape;
307 case '\b': esc = 'b'; goto c_escape;
308 case '\f': esc = 'f'; goto c_escape;
309 case '\n': esc = 'n'; goto c_and_shell_escape;
310 case '\r': esc = 'r'; goto c_and_shell_escape;
311 case '\t': esc = 't'; goto c_and_shell_escape;
312 case '\v': esc = 'v'; goto c_escape;
313 case '\\': esc = c; goto c_and_shell_escape;
315 c_and_shell_escape:
316 if (quoting_style == shell_quoting_style)
317 goto use_shell_always_quoting_style;
318 c_escape:
319 if (backslash_escapes)
321 c = esc;
322 goto store_escape;
324 break;
326 case '{': case '}': /* sometimes special if isolated */
327 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
328 break;
329 /* Fall through. */
330 case '#': case '~':
331 if (i != 0)
332 break;
333 /* Fall through. */
334 case ' ':
335 case '!': /* special in bash */
336 case '"': case '$': case '&':
337 case '(': case ')': case '*': case ';':
338 case '<':
339 case '=': /* sometimes special in 0th or (with "set -k") later args */
340 case '>': case '[':
341 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
342 case '`': case '|':
343 /* A shell special character. In theory, '$' and '`' could
344 be the first bytes of multibyte characters, which means
345 we should check them with mbrtowc, but in practice this
346 doesn't happen so it's not worth worrying about. */
347 if (quoting_style == shell_quoting_style)
348 goto use_shell_always_quoting_style;
349 break;
351 case '\'':
352 switch (quoting_style)
354 case shell_quoting_style:
355 goto use_shell_always_quoting_style;
357 case shell_always_quoting_style:
358 STORE ('\'');
359 STORE ('\\');
360 STORE ('\'');
361 break;
363 default:
364 break;
366 break;
368 case '%': case '+': case ',': case '-': case '.': case '/':
369 case '0': case '1': case '2': case '3': case '4': case '5':
370 case '6': case '7': case '8': case '9': case ':':
371 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
372 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
373 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
374 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
375 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
376 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
377 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
378 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
379 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
380 /* These characters don't cause problems, no matter what the
381 quoting style is. They cannot start multibyte sequences. */
382 break;
384 default:
385 /* If we have a multibyte sequence, copy it until we reach
386 its end, find an error, or come back to the initial shift
387 state. For C-like styles, if the sequence has
388 unprintable characters, escape the whole sequence, since
389 we can't easily escape single characters within it. */
391 /* Length of multibyte sequence found so far. */
392 size_t m;
394 bool printable;
396 if (unibyte_locale)
398 m = 1;
399 printable = isprint (c) != 0;
401 else
403 mbstate_t mbstate;
404 memset (&mbstate, 0, sizeof mbstate);
406 m = 0;
407 printable = true;
408 if (argsize == SIZE_MAX)
409 argsize = strlen (arg);
413 wchar_t w;
414 size_t bytes = mbrtowc (&w, &arg[i + m],
415 argsize - (i + m), &mbstate);
416 if (bytes == 0)
417 break;
418 else if (bytes == (size_t) -1)
420 printable = false;
421 break;
423 else if (bytes == (size_t) -2)
425 printable = false;
426 while (i + m < argsize && arg[i + m])
427 m++;
428 break;
430 else
432 /* Work around a bug with older shells that "see" a '\'
433 that is really the 2nd byte of a multibyte character.
434 In practice the problem is limited to ASCII
435 chars >= '@' that are shell special chars. */
436 if ('[' == 0x5b && quoting_style == shell_quoting_style)
438 size_t j;
439 for (j = 1; j < bytes; j++)
440 switch (arg[i + m + j])
442 case '[': case '\\': case '^':
443 case '`': case '|':
444 goto use_shell_always_quoting_style;
446 default:
447 break;
451 if (! iswprint (w))
452 printable = false;
453 m += bytes;
456 while (! mbsinit (&mbstate));
459 if (1 < m || (backslash_escapes && ! printable))
461 /* Output a multibyte sequence, or an escaped
462 unprintable unibyte character. */
463 size_t ilim = i + m;
465 for (;;)
467 if (backslash_escapes && ! printable)
469 STORE ('\\');
470 STORE ('0' + (c >> 6));
471 STORE ('0' + ((c >> 3) & 7));
472 c = '0' + (c & 7);
474 if (ilim <= i + 1)
475 break;
476 STORE (c);
477 c = arg[++i];
480 goto store_c;
485 if (! (backslash_escapes
486 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
487 goto store_c;
489 store_escape:
490 STORE ('\\');
492 store_c:
493 STORE (c);
496 if (i == 0 && quoting_style == shell_quoting_style)
497 goto use_shell_always_quoting_style;
499 if (quote_string)
500 for (; *quote_string; quote_string++)
501 STORE (*quote_string);
503 if (len < buffersize)
504 buffer[len] = '\0';
505 return len;
507 use_shell_always_quoting_style:
508 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
509 shell_always_quoting_style, o);
512 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
513 argument ARG (of size ARGSIZE), using O to control quoting.
514 If O is null, use the default.
515 Terminate the output with a null character, and return the written
516 size of the output, not counting the terminating null.
517 If BUFFERSIZE is too small to store the output string, return the
518 value that would have been returned had BUFFERSIZE been large enough.
519 If ARGSIZE is SIZE_MAX, use the string length of the argument for
520 ARGSIZE. */
521 size_t
522 quotearg_buffer (char *buffer, size_t buffersize,
523 char const *arg, size_t argsize,
524 struct quoting_options const *o)
526 struct quoting_options const *p = o ? o : &default_quoting_options;
527 int e = errno;
528 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
529 p->style, p);
530 errno = e;
531 return r;
534 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
535 allocated storage containing the quoted string. */
536 char *
537 quotearg_alloc (char const *arg, size_t argsize,
538 struct quoting_options const *o)
540 int e = errno;
541 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
542 char *buf = xcharalloc (bufsize);
543 quotearg_buffer (buf, bufsize, arg, argsize, o);
544 errno = e;
545 return buf;
548 /* A storage slot with size and pointer to a value. */
549 struct slotvec
551 size_t size;
552 char *val;
555 /* Preallocate a slot 0 buffer, so that the caller can always quote
556 one small component of a "memory exhausted" message in slot 0. */
557 static char slot0[256];
558 static unsigned int nslots = 1;
559 static struct slotvec slotvec0 = {sizeof slot0, slot0};
560 static struct slotvec *slotvec = &slotvec0;
562 void
563 quotearg_free (void)
565 struct slotvec *sv = slotvec;
566 unsigned int i;
567 for (i = 1; i < nslots; i++)
568 free (sv[i].val);
569 if (sv[0].val != slot0)
571 free (sv[0].val);
572 slotvec0.size = sizeof slot0;
573 slotvec0.val = slot0;
575 if (sv != &slotvec0)
577 free (sv);
578 slotvec = &slotvec0;
580 nslots = 1;
583 /* Use storage slot N to return a quoted version of argument ARG.
584 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
585 null-terminated string.
586 OPTIONS specifies the quoting options.
587 The returned value points to static storage that can be
588 reused by the next call to this function with the same value of N.
589 N must be nonnegative. N is deliberately declared with type "int"
590 to allow for future extensions (using negative values). */
591 static char *
592 quotearg_n_options (int n, char const *arg, size_t argsize,
593 struct quoting_options const *options)
595 int e = errno;
597 unsigned int n0 = n;
598 struct slotvec *sv = slotvec;
600 if (n < 0)
601 abort ();
603 if (nslots <= n0)
605 /* FIXME: technically, the type of n1 should be `unsigned int',
606 but that evokes an unsuppressible warning from gcc-4.0.1 and
607 older. If gcc ever provides an option to suppress that warning,
608 revert to the original type, so that the test in xalloc_oversized
609 is once again performed only at compile time. */
610 size_t n1 = n0 + 1;
611 bool preallocated = (sv == &slotvec0);
613 if (xalloc_oversized (n1, sizeof *sv))
614 xalloc_die ();
616 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
617 if (preallocated)
618 *sv = slotvec0;
619 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
620 nslots = n1;
624 size_t size = sv[n].size;
625 char *val = sv[n].val;
626 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
628 if (size <= qsize)
630 sv[n].size = size = qsize + 1;
631 if (val != slot0)
632 free (val);
633 sv[n].val = val = xcharalloc (size);
634 quotearg_buffer (val, size, arg, argsize, options);
637 errno = e;
638 return val;
642 char *
643 quotearg_n (int n, char const *arg)
645 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
648 char *
649 quotearg (char const *arg)
651 return quotearg_n (0, arg);
654 /* Return quoting options for STYLE, with no extra quoting. */
655 static struct quoting_options
656 quoting_options_from_style (enum quoting_style style)
658 struct quoting_options o;
659 o.style = style;
660 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
661 return o;
664 char *
665 quotearg_n_style (int n, enum quoting_style s, char const *arg)
667 struct quoting_options const o = quoting_options_from_style (s);
668 return quotearg_n_options (n, arg, SIZE_MAX, &o);
671 char *
672 quotearg_n_style_mem (int n, enum quoting_style s,
673 char const *arg, size_t argsize)
675 struct quoting_options const o = quoting_options_from_style (s);
676 return quotearg_n_options (n, arg, argsize, &o);
679 char *
680 quotearg_style (enum quoting_style s, char const *arg)
682 return quotearg_n_style (0, s, arg);
685 char *
686 quotearg_char (char const *arg, char ch)
688 struct quoting_options options;
689 options = default_quoting_options;
690 set_char_quoting (&options, ch, 1);
691 return quotearg_n_options (0, arg, SIZE_MAX, &options);
694 char *
695 quotearg_colon (char const *arg)
697 return quotearg_char (arg, ':');