1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
60 # define SIZE_MAX ((size_t) -1)
63 #define INT_BITS (sizeof (int) * CHAR_BIT)
65 struct quoting_options
67 /* Basic quoting style. */
68 enum quoting_style style
;
70 /* Quote the characters indicated by this bit vector even if the
71 quoting style would not normally require them to be quoted. */
72 unsigned int quote_these_too
[(UCHAR_MAX
/ INT_BITS
) + 1];
75 /* Names of quoting styles. */
76 char const *const quoting_style_args
[] =
88 /* Correspondences to quoting style names. */
89 enum quoting_style
const quoting_style_vals
[] =
91 literal_quoting_style
,
93 shell_always_quoting_style
,
100 /* The default quoting options. */
101 static struct quoting_options default_quoting_options
;
103 /* Allocate a new set of quoting options, with contents initially identical
104 to O if O is not null, or to the default if O is null.
105 It is the caller's responsibility to free the result. */
106 struct quoting_options
*
107 clone_quoting_options (struct quoting_options
*o
)
110 struct quoting_options
*p
= xmemdup (o
? o
: &default_quoting_options
,
116 /* Get the value of O's quoting style. If O is null, use the default. */
118 get_quoting_style (struct quoting_options
*o
)
120 return (o
? o
: &default_quoting_options
)->style
;
123 /* In O (or in the default if O is null),
124 set the value of the quoting style to S. */
126 set_quoting_style (struct quoting_options
*o
, enum quoting_style s
)
128 (o
? o
: &default_quoting_options
)->style
= s
;
131 /* In O (or in the default if O is null),
132 set the value of the quoting options for character C to I.
133 Return the old value. Currently, the only values defined for I are
134 0 (the default) and 1 (which means to quote the character even if
135 it would not otherwise be quoted). */
137 set_char_quoting (struct quoting_options
*o
, char c
, int i
)
139 unsigned char uc
= c
;
141 (o
? o
: &default_quoting_options
)->quote_these_too
+ uc
/ INT_BITS
;
142 int shift
= uc
% INT_BITS
;
143 int r
= (*p
>> shift
) & 1;
144 *p
^= ((i
& 1) ^ r
) << shift
;
148 /* MSGID approximates a quotation mark. Return its translation if it
149 has one; otherwise, return either it or "\"", depending on S. */
151 gettext_quote (char const *msgid
, enum quoting_style s
)
153 char const *translation
= _(msgid
);
154 if (translation
== msgid
&& s
== clocale_quoting_style
)
159 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
160 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
161 non-quoting-style part of O to control quoting.
162 Terminate the output with a null character, and return the written
163 size of the output, not counting the terminating null.
164 If BUFFERSIZE is too small to store the output string, return the
165 value that would have been returned had BUFFERSIZE been large enough.
166 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
168 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
169 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
170 style specified by O, and O may not be null. */
173 quotearg_buffer_restyled (char *buffer
, size_t buffersize
,
174 char const *arg
, size_t argsize
,
175 enum quoting_style quoting_style
,
176 struct quoting_options
const *o
)
180 char const *quote_string
= 0;
181 size_t quote_string_len
= 0;
182 bool backslash_escapes
= false;
183 bool unibyte_locale
= MB_CUR_MAX
== 1;
188 if (len < buffersize) \
194 switch (quoting_style
)
196 case c_quoting_style
:
198 backslash_escapes
= true;
200 quote_string_len
= 1;
203 case escape_quoting_style
:
204 backslash_escapes
= true;
207 case locale_quoting_style
:
208 case clocale_quoting_style
:
211 Get translations for open and closing quotation marks.
213 The message catalog should translate "`" to a left
214 quotation mark suitable for the locale, and similarly for
215 "'". If the catalog has no translation,
216 locale_quoting_style quotes `like this', and
217 clocale_quoting_style quotes "like this".
219 For example, an American English Unicode locale should
220 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
221 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
222 MARK). A British English Unicode locale should instead
223 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
224 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
226 If you don't know what to put here, please see
227 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
228 and use glyphs suitable for your language. */
230 char const *left
= gettext_quote (N_("`"), quoting_style
);
231 char const *right
= gettext_quote (N_("'"), quoting_style
);
232 for (quote_string
= left
; *quote_string
; quote_string
++)
233 STORE (*quote_string
);
234 backslash_escapes
= true;
235 quote_string
= right
;
236 quote_string_len
= strlen (quote_string
);
240 case shell_always_quoting_style
:
243 quote_string_len
= 1;
250 for (i
= 0; ! (argsize
== SIZE_MAX
? arg
[i
] == '\0' : i
== argsize
); i
++)
255 if (backslash_escapes
257 && i
+ quote_string_len
<= argsize
258 && memcmp (arg
+ i
, quote_string
, quote_string_len
) == 0)
265 if (backslash_escapes
)
275 switch (quoting_style
)
277 case shell_quoting_style
:
278 goto use_shell_always_quoting_style
;
280 case c_quoting_style
:
281 if (i
+ 2 < argsize
&& arg
[i
+ 1] == '?')
285 case '(': case ')': case '-': case '/':
286 case '<': case '=': case '>':
287 /* Escape the second '?' in what would otherwise be
306 case '\a': esc
= 'a'; goto c_escape
;
307 case '\b': esc
= 'b'; goto c_escape
;
308 case '\f': esc
= 'f'; goto c_escape
;
309 case '\n': esc
= 'n'; goto c_and_shell_escape
;
310 case '\r': esc
= 'r'; goto c_and_shell_escape
;
311 case '\t': esc
= 't'; goto c_and_shell_escape
;
312 case '\v': esc
= 'v'; goto c_escape
;
313 case '\\': esc
= c
; goto c_and_shell_escape
;
316 if (quoting_style
== shell_quoting_style
)
317 goto use_shell_always_quoting_style
;
319 if (backslash_escapes
)
326 case '{': case '}': /* sometimes special if isolated */
327 if (! (argsize
== SIZE_MAX
? arg
[1] == '\0' : argsize
== 1))
335 case '!': /* special in bash */
336 case '"': case '$': case '&':
337 case '(': case ')': case '*': case ';':
339 case '=': /* sometimes special in 0th or (with "set -k") later args */
341 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
343 /* A shell special character. In theory, '$' and '`' could
344 be the first bytes of multibyte characters, which means
345 we should check them with mbrtowc, but in practice this
346 doesn't happen so it's not worth worrying about. */
347 if (quoting_style
== shell_quoting_style
)
348 goto use_shell_always_quoting_style
;
352 switch (quoting_style
)
354 case shell_quoting_style
:
355 goto use_shell_always_quoting_style
;
357 case shell_always_quoting_style
:
368 case '%': case '+': case ',': case '-': case '.': case '/':
369 case '0': case '1': case '2': case '3': case '4': case '5':
370 case '6': case '7': case '8': case '9': case ':':
371 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
372 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
373 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
374 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
375 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
376 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
377 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
378 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
379 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
380 /* These characters don't cause problems, no matter what the
381 quoting style is. They cannot start multibyte sequences. */
385 /* If we have a multibyte sequence, copy it until we reach
386 its end, find an error, or come back to the initial shift
387 state. For C-like styles, if the sequence has
388 unprintable characters, escape the whole sequence, since
389 we can't easily escape single characters within it. */
391 /* Length of multibyte sequence found so far. */
399 printable
= isprint (c
) != 0;
404 memset (&mbstate
, 0, sizeof mbstate
);
408 if (argsize
== SIZE_MAX
)
409 argsize
= strlen (arg
);
414 size_t bytes
= mbrtowc (&w
, &arg
[i
+ m
],
415 argsize
- (i
+ m
), &mbstate
);
418 else if (bytes
== (size_t) -1)
423 else if (bytes
== (size_t) -2)
426 while (i
+ m
< argsize
&& arg
[i
+ m
])
432 /* Work around a bug with older shells that "see" a '\'
433 that is really the 2nd byte of a multibyte character.
434 In practice the problem is limited to ASCII
435 chars >= '@' that are shell special chars. */
436 if ('[' == 0x5b && quoting_style
== shell_quoting_style
)
439 for (j
= 1; j
< bytes
; j
++)
440 switch (arg
[i
+ m
+ j
])
442 case '[': case '\\': case '^':
444 goto use_shell_always_quoting_style
;
456 while (! mbsinit (&mbstate
));
459 if (1 < m
|| (backslash_escapes
&& ! printable
))
461 /* Output a multibyte sequence, or an escaped
462 unprintable unibyte character. */
467 if (backslash_escapes
&& ! printable
)
470 STORE ('0' + (c
>> 6));
471 STORE ('0' + ((c
>> 3) & 7));
485 if (! (backslash_escapes
486 && o
->quote_these_too
[c
/ INT_BITS
] & (1 << (c
% INT_BITS
))))
496 if (i
== 0 && quoting_style
== shell_quoting_style
)
497 goto use_shell_always_quoting_style
;
500 for (; *quote_string
; quote_string
++)
501 STORE (*quote_string
);
503 if (len
< buffersize
)
507 use_shell_always_quoting_style
:
508 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
509 shell_always_quoting_style
, o
);
512 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
513 argument ARG (of size ARGSIZE), using O to control quoting.
514 If O is null, use the default.
515 Terminate the output with a null character, and return the written
516 size of the output, not counting the terminating null.
517 If BUFFERSIZE is too small to store the output string, return the
518 value that would have been returned had BUFFERSIZE been large enough.
519 If ARGSIZE is SIZE_MAX, use the string length of the argument for
522 quotearg_buffer (char *buffer
, size_t buffersize
,
523 char const *arg
, size_t argsize
,
524 struct quoting_options
const *o
)
526 struct quoting_options
const *p
= o
? o
: &default_quoting_options
;
528 size_t r
= quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
534 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
535 allocated storage containing the quoted string. */
537 quotearg_alloc (char const *arg
, size_t argsize
,
538 struct quoting_options
const *o
)
541 size_t bufsize
= quotearg_buffer (0, 0, arg
, argsize
, o
) + 1;
542 char *buf
= xcharalloc (bufsize
);
543 quotearg_buffer (buf
, bufsize
, arg
, argsize
, o
);
548 /* A storage slot with size and pointer to a value. */
555 /* Preallocate a slot 0 buffer, so that the caller can always quote
556 one small component of a "memory exhausted" message in slot 0. */
557 static char slot0
[256];
558 static unsigned int nslots
= 1;
559 static struct slotvec slotvec0
= {sizeof slot0
, slot0
};
560 static struct slotvec
*slotvec
= &slotvec0
;
565 struct slotvec
*sv
= slotvec
;
567 for (i
= 1; i
< nslots
; i
++)
569 if (sv
[0].val
!= slot0
)
572 slotvec0
.size
= sizeof slot0
;
573 slotvec0
.val
= slot0
;
583 /* Use storage slot N to return a quoted version of argument ARG.
584 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
585 null-terminated string.
586 OPTIONS specifies the quoting options.
587 The returned value points to static storage that can be
588 reused by the next call to this function with the same value of N.
589 N must be nonnegative. N is deliberately declared with type "int"
590 to allow for future extensions (using negative values). */
592 quotearg_n_options (int n
, char const *arg
, size_t argsize
,
593 struct quoting_options
const *options
)
598 struct slotvec
*sv
= slotvec
;
605 /* FIXME: technically, the type of n1 should be `unsigned int',
606 but that evokes an unsuppressible warning from gcc-4.0.1 and
607 older. If gcc ever provides an option to suppress that warning,
608 revert to the original type, so that the test in xalloc_oversized
609 is once again performed only at compile time. */
611 bool preallocated
= (sv
== &slotvec0
);
613 if (xalloc_oversized (n1
, sizeof *sv
))
616 slotvec
= sv
= xrealloc (preallocated
? NULL
: sv
, n1
* sizeof *sv
);
619 memset (sv
+ nslots
, 0, (n1
- nslots
) * sizeof *sv
);
624 size_t size
= sv
[n
].size
;
625 char *val
= sv
[n
].val
;
626 size_t qsize
= quotearg_buffer (val
, size
, arg
, argsize
, options
);
630 sv
[n
].size
= size
= qsize
+ 1;
633 sv
[n
].val
= val
= xcharalloc (size
);
634 quotearg_buffer (val
, size
, arg
, argsize
, options
);
643 quotearg_n (int n
, char const *arg
)
645 return quotearg_n_options (n
, arg
, SIZE_MAX
, &default_quoting_options
);
649 quotearg (char const *arg
)
651 return quotearg_n (0, arg
);
654 /* Return quoting options for STYLE, with no extra quoting. */
655 static struct quoting_options
656 quoting_options_from_style (enum quoting_style style
)
658 struct quoting_options o
;
660 memset (o
.quote_these_too
, 0, sizeof o
.quote_these_too
);
665 quotearg_n_style (int n
, enum quoting_style s
, char const *arg
)
667 struct quoting_options
const o
= quoting_options_from_style (s
);
668 return quotearg_n_options (n
, arg
, SIZE_MAX
, &o
);
672 quotearg_n_style_mem (int n
, enum quoting_style s
,
673 char const *arg
, size_t argsize
)
675 struct quoting_options
const o
= quoting_options_from_style (s
);
676 return quotearg_n_options (n
, arg
, argsize
, &o
);
680 quotearg_style (enum quoting_style s
, char const *arg
)
682 return quotearg_n_style (0, s
, arg
);
686 quotearg_char (char const *arg
, char ch
)
688 struct quoting_options options
;
689 options
= default_quoting_options
;
690 set_char_quoting (&options
, ch
, 1);
691 return quotearg_n_options (0, arg
, SIZE_MAX
, &options
);
695 quotearg_colon (char const *arg
)
697 return quotearg_char (arg
, ':');