Fixed Savannah bug #20139 (-mtime -2 includes files matching -mtime 2 but it should...
[findutils.git] / lib / qmark.c
blob8220d5fb42776b308c669bf9228f4e0c56b6e671
1 /* qmark.c -- quote 'dangerous' filenames
3 Copyright (C) 2005 Free Software Foundation, Inc.
4 Derived from courutils' ls.c:
5 Copyright (C) 85, 88, 90, 91, 1995-2005 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
20 USA.
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
27 # include <stddef.h>
28 # include <stdlib.h>
29 #include <ctype.h>
31 #if HAVE_STRING_H || STDC_HEADERS
32 #include <string.h>
33 #else
34 #include <strings.h>
35 #endif
38 /* Get mbstate_t, mbrtowc(), mbsinit(), wcwidth(). */
39 #if HAVE_WCHAR_H
40 # include <wchar.h>
41 #endif
43 #include "printquoted.h"
46 /*
47 This comment, IN_CTYPE_DOMAIN and ISPRINT were borrowed from
48 coreutils at Sun Jun 5 21:17:40 2005 UTC.
50 Jim Meyering writes:
52 "... Some ctype macros are valid only for character codes that
53 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
54 using /bin/cc or gcc but without giving an ansi option). So, all
55 ctype uses should be through macros like ISPRINT... If
56 STDC_HEADERS is defined, then autoconf has verified that the ctype
57 macros don't need to be guarded with references to isascii. ...
58 Defining isascii to 1 should let any compiler worth its salt
59 eliminate the && through constant folding."
61 Bruno Haible adds:
63 "... Furthermore, isupper(c) etc. have an undefined result if c is
64 outside the range -1 <= c <= 255. One is tempted to write isupper(c)
65 with c being of type `char', but this is wrong if c is an 8-bit
66 character >= 128 which gets sign-extended to a negative value.
67 The macro ISUPPER protects against this as well." */
72 /* ISPRINT is defined in <sys/euc.h> on at least Solaris2.6 systems. */
73 #undef ISPRINT
74 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
76 #if STDC_HEADERS || (!defined (isascii) && !HAVE_ISASCII)
77 # define IN_CTYPE_DOMAIN(c) 1
78 #else
79 # define IN_CTYPE_DOMAIN(c) isascii(c)
80 #endif
86 /* Convert a possibly-signed character to an unsigned character. This is
87 * a bit safer than casting to unsigned char, since it catches some type
88 * errors that the cast doesn't.
90 * This code taken from coreutils' system.h header at
91 * Sun Jun 5 21:05:21 2005 UTC.
93 static inline unsigned char to_uchar (char ch)
95 return ch;
100 static size_t
101 unibyte_qmark_chars(char *buf, size_t len)
103 char *p = buf;
104 char const *plimit = buf + len;
106 while (p < plimit)
108 if (! ISPRINT (to_uchar (*p)))
109 *p = '?';
110 p++;
112 return len;
116 #if HAVE_MBRTOWC
117 static size_t
118 multibyte_qmark_chars(char *buf, size_t len)
120 if (MB_CUR_MAX <= 1)
122 return unibyte_qmark_chars(buf, len);
124 else
126 char const *p = buf;
127 char const *plimit = buf + len;
128 char *q = buf;
130 while (p < plimit)
131 switch (*p)
133 case ' ': case '!': case '"': case '#': case '%':
134 case '&': case '\'': case '(': case ')': case '*':
135 case '+': case ',': case '-': case '.': case '/':
136 case '0': case '1': case '2': case '3': case '4':
137 case '5': case '6': case '7': case '8': case '9':
138 case ':': case ';': case '<': case '=': case '>':
139 case '?':
140 case 'A': case 'B': case 'C': case 'D': case 'E':
141 case 'F': case 'G': case 'H': case 'I': case 'J':
142 case 'K': case 'L': case 'M': case 'N': case 'O':
143 case 'P': case 'Q': case 'R': case 'S': case 'T':
144 case 'U': case 'V': case 'W': case 'X': case 'Y':
145 case 'Z':
146 case '[': case '\\': case ']': case '^': case '_':
147 case 'a': case 'b': case 'c': case 'd': case 'e':
148 case 'f': case 'g': case 'h': case 'i': case 'j':
149 case 'k': case 'l': case 'm': case 'n': case 'o':
150 case 'p': case 'q': case 'r': case 's': case 't':
151 case 'u': case 'v': case 'w': case 'x': case 'y':
152 case 'z': case '{': case '|': case '}': case '~':
153 /* These characters are printable ASCII characters. */
154 *q++ = *p++;
155 break;
156 default:
157 /* If we have a multibyte sequence, copy it until we
158 reach its end, replacing each non-printable multibyte
159 character with a single question mark. */
161 mbstate_t mbstate;
162 memset (&mbstate, 0, sizeof mbstate);
165 wchar_t wc;
166 size_t bytes;
167 int w;
169 bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
171 if (bytes == (size_t) -1)
173 /* An invalid multibyte sequence was
174 encountered. Skip one input byte, and
175 put a question mark. */
176 p++;
177 *q++ = '?';
178 break;
181 if (bytes == (size_t) -2)
183 /* An incomplete multibyte character
184 at the end. Replace it entirely with
185 a question mark. */
186 p = plimit;
187 *q++ = '?';
188 break;
191 if (bytes == 0)
192 /* A null wide character was encountered. */
193 bytes = 1;
195 w = wcwidth (wc);
196 if (w >= 0)
198 /* A printable multibyte character.
199 Keep it. */
200 for (; bytes > 0; --bytes)
201 *q++ = *p++;
203 else
205 /* An unprintable multibyte character.
206 Replace it entirely with a question
207 mark. */
208 p += bytes;
209 *q++ = '?';
212 while (! mbsinit (&mbstate));
214 break;
217 /* The buffer may have shrunk. */
218 len = q - buf;
219 return len;
222 #endif
225 /* Scan BUF, replacing any dangerous-looking characters with question
226 * marks. This code is taken from the ls.c file in coreutils as at
227 * Sun Jun 5 20:51:54 2005 UTC.
229 * This function may shrink the buffer. Either way, the new length
230 * is returned.
232 size_t
233 qmark_chars(char *buf, size_t len)
235 #if HAVE_MBRTOWC
236 return multibyte_qmark_chars(buf, len);
237 #else
238 return unibyte_qmark_chars(buf, len);
239 #endif