1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
43 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
51 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
52 other macros are defined only for documentation and to satisfy C
56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57 # define iswprint(wc) isprint ((unsigned char) (wc))
61 #if !defined mbsinit && !HAVE_MBSINIT
62 # define mbsinit(ps) 1
69 # if !defined iswprint && !HAVE_ISWPRINT
70 # define iswprint(wc) 1
75 # define SIZE_MAX ((size_t) -1)
78 #define INT_BITS (sizeof (int) * CHAR_BIT)
80 struct quoting_options
82 /* Basic quoting style. */
83 enum quoting_style style
;
85 /* Quote the characters indicated by this bit vector even if the
86 quoting style would not normally require them to be quoted. */
87 unsigned int quote_these_too
[(UCHAR_MAX
/ INT_BITS
) + 1];
90 /* Names of quoting styles. */
91 char const *const quoting_style_args
[] =
103 /* Correspondences to quoting style names. */
104 enum quoting_style
const quoting_style_vals
[] =
106 literal_quoting_style
,
108 shell_always_quoting_style
,
110 escape_quoting_style
,
111 locale_quoting_style
,
112 clocale_quoting_style
115 /* The default quoting options. */
116 static struct quoting_options default_quoting_options
;
118 /* Allocate a new set of quoting options, with contents initially identical
119 to O if O is not null, or to the default if O is null.
120 It is the caller's responsibility to free the result. */
121 struct quoting_options
*
122 clone_quoting_options (struct quoting_options
*o
)
125 struct quoting_options
*p
= xmalloc (sizeof *p
);
126 *p
= *(o
? o
: &default_quoting_options
);
131 /* Get the value of O's quoting style. If O is null, use the default. */
133 get_quoting_style (struct quoting_options
*o
)
135 return (o
? o
: &default_quoting_options
)->style
;
138 /* In O (or in the default if O is null),
139 set the value of the quoting style to S. */
141 set_quoting_style (struct quoting_options
*o
, enum quoting_style s
)
143 (o
? o
: &default_quoting_options
)->style
= s
;
146 /* In O (or in the default if O is null),
147 set the value of the quoting options for character C to I.
148 Return the old value. Currently, the only values defined for I are
149 0 (the default) and 1 (which means to quote the character even if
150 it would not otherwise be quoted). */
152 set_char_quoting (struct quoting_options
*o
, char c
, int i
)
154 unsigned char uc
= c
;
156 (o
? o
: &default_quoting_options
)->quote_these_too
+ uc
/ INT_BITS
;
157 int shift
= uc
% INT_BITS
;
158 int r
= (*p
>> shift
) & 1;
159 *p
^= ((i
& 1) ^ r
) << shift
;
163 /* MSGID approximates a quotation mark. Return its translation if it
164 has one; otherwise, return either it or "\"", depending on S. */
166 gettext_quote (char const *msgid
, enum quoting_style s
)
168 char const *translation
= _(msgid
);
169 if (translation
== msgid
&& s
== clocale_quoting_style
)
174 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
175 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
176 non-quoting-style part of O to control quoting.
177 Terminate the output with a null character, and return the written
178 size of the output, not counting the terminating null.
179 If BUFFERSIZE is too small to store the output string, return the
180 value that would have been returned had BUFFERSIZE been large enough.
181 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
183 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
184 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
185 style specified by O, and O may not be null. */
188 quotearg_buffer_restyled (char *buffer
, size_t buffersize
,
189 char const *arg
, size_t argsize
,
190 enum quoting_style quoting_style
,
191 struct quoting_options
const *o
)
195 char const *quote_string
= 0;
196 size_t quote_string_len
= 0;
197 bool backslash_escapes
= false;
198 bool unibyte_locale
= MB_CUR_MAX
== 1;
203 if (len < buffersize) \
209 switch (quoting_style
)
211 case c_quoting_style
:
213 backslash_escapes
= true;
215 quote_string_len
= 1;
218 case escape_quoting_style
:
219 backslash_escapes
= true;
222 case locale_quoting_style
:
223 case clocale_quoting_style
:
226 Get translations for open and closing quotation marks.
228 The message catalog should translate "`" to a left
229 quotation mark suitable for the locale, and similarly for
230 "'". If the catalog has no translation,
231 locale_quoting_style quotes `like this', and
232 clocale_quoting_style quotes "like this".
234 For example, an American English Unicode locale should
235 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
236 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
237 MARK). A British English Unicode locale should instead
238 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
239 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
241 If you don't know what to put here, please see
242 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
243 and use glyphs suitable for your language. */
245 char const *left
= gettext_quote (N_("`"), quoting_style
);
246 char const *right
= gettext_quote (N_("'"), quoting_style
);
247 for (quote_string
= left
; *quote_string
; quote_string
++)
248 STORE (*quote_string
);
249 backslash_escapes
= true;
250 quote_string
= right
;
251 quote_string_len
= strlen (quote_string
);
255 case shell_always_quoting_style
:
258 quote_string_len
= 1;
265 for (i
= 0; ! (argsize
== SIZE_MAX
? arg
[i
] == '\0' : i
== argsize
); i
++)
270 if (backslash_escapes
272 && i
+ quote_string_len
<= argsize
273 && memcmp (arg
+ i
, quote_string
, quote_string_len
) == 0)
280 if (backslash_escapes
)
290 switch (quoting_style
)
292 case shell_quoting_style
:
293 goto use_shell_always_quoting_style
;
295 case c_quoting_style
:
296 if (i
+ 2 < argsize
&& arg
[i
+ 1] == '?')
300 case '(': case ')': case '-': case '/':
301 case '<': case '=': case '>':
302 /* Escape the second '?' in what would otherwise be
321 case '\a': esc
= 'a'; goto c_escape
;
322 case '\b': esc
= 'b'; goto c_escape
;
323 case '\f': esc
= 'f'; goto c_escape
;
324 case '\n': esc
= 'n'; goto c_and_shell_escape
;
325 case '\r': esc
= 'r'; goto c_and_shell_escape
;
326 case '\t': esc
= 't'; goto c_and_shell_escape
;
327 case '\v': esc
= 'v'; goto c_escape
;
328 case '\\': esc
= c
; goto c_and_shell_escape
;
331 if (quoting_style
== shell_quoting_style
)
332 goto use_shell_always_quoting_style
;
334 if (backslash_escapes
)
341 case '{': case '}': /* sometimes special if isolated */
342 if (! (argsize
== SIZE_MAX
? arg
[1] == '\0' : argsize
== 1))
350 case '!': /* special in bash */
351 case '"': case '$': case '&':
352 case '(': case ')': case '*': case ';':
354 case '=': /* sometimes special in 0th or (with "set -k") later args */
356 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
358 /* A shell special character. In theory, '$' and '`' could
359 be the first bytes of multibyte characters, which means
360 we should check them with mbrtowc, but in practice this
361 doesn't happen so it's not worth worrying about. */
362 if (quoting_style
== shell_quoting_style
)
363 goto use_shell_always_quoting_style
;
367 switch (quoting_style
)
369 case shell_quoting_style
:
370 goto use_shell_always_quoting_style
;
372 case shell_always_quoting_style
:
383 case '%': case '+': case ',': case '-': case '.': case '/':
384 case '0': case '1': case '2': case '3': case '4': case '5':
385 case '6': case '7': case '8': case '9': case ':':
386 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
387 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
388 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
389 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
390 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
391 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
392 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
393 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
394 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
395 /* These characters don't cause problems, no matter what the
396 quoting style is. They cannot start multibyte sequences. */
400 /* If we have a multibyte sequence, copy it until we reach
401 its end, find an error, or come back to the initial shift
402 state. For C-like styles, if the sequence has
403 unprintable characters, escape the whole sequence, since
404 we can't easily escape single characters within it. */
406 /* Length of multibyte sequence found so far. */
414 printable
= isprint (c
) != 0;
419 memset (&mbstate
, 0, sizeof mbstate
);
423 if (argsize
== SIZE_MAX
)
424 argsize
= strlen (arg
);
429 size_t bytes
= mbrtowc (&w
, &arg
[i
+ m
],
430 argsize
- (i
+ m
), &mbstate
);
433 else if (bytes
== (size_t) -1)
438 else if (bytes
== (size_t) -2)
441 while (i
+ m
< argsize
&& arg
[i
+ m
])
447 /* Work around a bug with older shells that "see" a '\'
448 that is really the 2nd byte of a multibyte character.
449 In practice the problem is limited to ASCII
450 chars >= '@' that are shell special chars. */
451 if ('[' == 0x5b && quoting_style
== shell_quoting_style
)
454 for (j
= 1; j
< bytes
; j
++)
455 switch (arg
[i
+ m
+ j
])
457 case '[': case '\\': case '^':
459 goto use_shell_always_quoting_style
;
471 while (! mbsinit (&mbstate
));
474 if (1 < m
|| (backslash_escapes
&& ! printable
))
476 /* Output a multibyte sequence, or an escaped
477 unprintable unibyte character. */
482 if (backslash_escapes
&& ! printable
)
485 STORE ('0' + (c
>> 6));
486 STORE ('0' + ((c
>> 3) & 7));
500 if (! (backslash_escapes
501 && o
->quote_these_too
[c
/ INT_BITS
] & (1 << (c
% INT_BITS
))))
511 if (i
== 0 && quoting_style
== shell_quoting_style
)
512 goto use_shell_always_quoting_style
;
515 for (; *quote_string
; quote_string
++)
516 STORE (*quote_string
);
518 if (len
< buffersize
)
522 use_shell_always_quoting_style
:
523 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
524 shell_always_quoting_style
, o
);
527 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
528 argument ARG (of size ARGSIZE), using O to control quoting.
529 If O is null, use the default.
530 Terminate the output with a null character, and return the written
531 size of the output, not counting the terminating null.
532 If BUFFERSIZE is too small to store the output string, return the
533 value that would have been returned had BUFFERSIZE been large enough.
534 If ARGSIZE is SIZE_MAX, use the string length of the argument for
537 quotearg_buffer (char *buffer
, size_t buffersize
,
538 char const *arg
, size_t argsize
,
539 struct quoting_options
const *o
)
541 struct quoting_options
const *p
= o
? o
: &default_quoting_options
;
543 size_t r
= quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
549 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
550 allocated storage containing the quoted string. */
552 quotearg_alloc (char const *arg
, size_t argsize
,
553 struct quoting_options
const *o
)
556 size_t bufsize
= quotearg_buffer (0, 0, arg
, argsize
, o
) + 1;
557 char *buf
= xmalloc (bufsize
);
558 quotearg_buffer (buf
, bufsize
, arg
, argsize
, o
);
563 /* Use storage slot N to return a quoted version of argument ARG.
564 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
565 null-terminated string.
566 OPTIONS specifies the quoting options.
567 The returned value points to static storage that can be
568 reused by the next call to this function with the same value of N.
569 N must be nonnegative. N is deliberately declared with type "int"
570 to allow for future extensions (using negative values). */
572 quotearg_n_options (int n
, char const *arg
, size_t argsize
,
573 struct quoting_options
const *options
)
577 /* Preallocate a slot 0 buffer, so that the caller can always quote
578 one small component of a "memory exhausted" message in slot 0. */
579 static char slot0
[256];
580 static unsigned int nslots
= 1;
587 static struct slotvec slotvec0
= {sizeof slot0
, slot0
};
588 static struct slotvec
*slotvec
= &slotvec0
;
595 /* FIXME: technically, the type of n1 should be `unsigned int',
596 but that evokes an unsuppressible warning from gcc-4.0.1 and
597 older. If gcc ever provides an option to suppress that warning,
598 revert to the original type, so that the test in xalloc_oversized
599 is once again performed only at compile time. */
602 if (xalloc_oversized (n1
, sizeof *slotvec
))
605 if (slotvec
== &slotvec0
)
607 slotvec
= xmalloc (sizeof *slotvec
);
610 slotvec
= xrealloc (slotvec
, n1
* sizeof *slotvec
);
611 memset (slotvec
+ nslots
, 0, (n1
- nslots
) * sizeof *slotvec
);
616 size_t size
= slotvec
[n
].size
;
617 char *val
= slotvec
[n
].val
;
618 size_t qsize
= quotearg_buffer (val
, size
, arg
, argsize
, options
);
622 slotvec
[n
].size
= size
= qsize
+ 1;
625 slotvec
[n
].val
= val
= xmalloc (size
);
626 quotearg_buffer (val
, size
, arg
, argsize
, options
);
635 quotearg_n (int n
, char const *arg
)
637 return quotearg_n_options (n
, arg
, SIZE_MAX
, &default_quoting_options
);
641 quotearg (char const *arg
)
643 return quotearg_n (0, arg
);
646 /* Return quoting options for STYLE, with no extra quoting. */
647 static struct quoting_options
648 quoting_options_from_style (enum quoting_style style
)
650 struct quoting_options o
;
652 memset (o
.quote_these_too
, 0, sizeof o
.quote_these_too
);
657 quotearg_n_style (int n
, enum quoting_style s
, char const *arg
)
659 struct quoting_options
const o
= quoting_options_from_style (s
);
660 return quotearg_n_options (n
, arg
, SIZE_MAX
, &o
);
664 quotearg_n_style_mem (int n
, enum quoting_style s
,
665 char const *arg
, size_t argsize
)
667 struct quoting_options
const o
= quoting_options_from_style (s
);
668 return quotearg_n_options (n
, arg
, argsize
, &o
);
672 quotearg_style (enum quoting_style s
, char const *arg
)
674 return quotearg_n_style (0, s
, arg
);
678 quotearg_char (char const *arg
, char ch
)
680 struct quoting_options options
;
681 options
= default_quoting_options
;
682 set_char_quoting (&options
, ch
, 1);
683 return quotearg_n_options (0, arg
, SIZE_MAX
, &options
);
687 quotearg_colon (char const *arg
)
689 return quotearg_char (arg
, ':');