2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 * See the COPYING file for license information.
18 * Guillaume Chazarain <guichaz@yahoo.fr>
21 /*******************************************
22 * UTF-8, filenames and mnemonics handling *
23 *******************************************/
25 #include <string.h> /* memcpy(), strlen(), strstr() */
26 #include <stdlib.h> /* getenv() */
29 #include "str_utils.h"
31 /* The returned string should not be freed. */
32 const gchar
*add_mnemonic(const gchar
* str
)
34 static GStaticPrivate result_key
= G_STATIC_PRIVATE_INIT
;
35 static GStaticPrivate size_key
= G_STATIC_PRIVATE_INIT
;
37 gchar
**result
= g_static_private_get(&result_key
);
38 gint
*size
= g_static_private_get(&size_key
);
39 const gchar
*ptr
= NULL
, *end
;
43 /* First time in this thread. */
44 result
= g_new(gchar
*, 1);
45 size
= g_new(gint
, 1);
46 g_static_private_set(&result_key
, result
, g_free
);
47 g_static_private_set(&size_key
, size
, g_free
);
52 for (ptr
= str
; *ptr
!= '\0'; ptr
= g_utf8_next_char(ptr
))
53 if (g_unichar_isalnum(g_utf8_get_char(ptr
)))
56 end
= ptr
+ strlen(ptr
);
60 * No g_unichar_isalnum() were found,
61 * we add the underscore in front.
65 /* + 2: '_' and '\0'. */
66 new_size
= end
- str
+ 2;
67 if (new_size
> *size
) {
71 *result
= g_new(gchar
, size
);
74 memcpy(*result
, str
, ptr
- str
);
75 (*result
)[ptr
- str
] = '_';
76 memcpy(*result
+ (ptr
- str
) + 1, ptr
, end
- ptr
+ 1);
82 * The magic to find a '\0' in a long int is taken from the glibc.
83 * See sysdeps/generic/strlen.c in the glibc sources to have the
85 * Actually it also finds 0x80, that's why we double check when
86 * HAS_ZERO() finds something.
88 static void init_magic(gulong
* himagic
, gulong
* lomagic
, gulong
* magic_bits
)
90 *himagic
= 0x80808080L
;
91 *lomagic
= 0x01010101L
;
92 if (sizeof(gulong
) > 4) {
94 *himagic
= ((*himagic
<< 16) << 16) | *himagic
;
95 *lomagic
= ((*lomagic
<< 16) << 16) | *lomagic
;
96 *magic_bits
= ((0x7efefefeL
<< 16) << 16) | 0xfefefeffL
;
98 *magic_bits
= 0x7efefeffL
;
101 #define HAS_ZERO(longint) ((((longint) + magic_bits) ^ ~longint) & ~magic_bits)
103 #define CHECK_BYTE(ptr) \
114 static gboolean
str_is_ascii(const gchar
* str
)
116 gulong himagic
, lomagic
, magic_bits
;
117 gulong
*long_ptr
, long_int
;
127 if (((gulong
) str
& (sizeof(gulong
) - 1)) == 0)
134 long_ptr
= (gulong
*) str
;
135 init_magic(&himagic
, &lomagic
, &magic_bits
);
137 if (sizeof(gulong
) > 4)
139 mask
= ((mask
<< 16) << 16) | mask
;
142 long_int
= *long_ptr
;
143 if (HAS_ZERO(long_int
)) {
144 /* A '\0' has been detected. */
145 const gchar
*char_ptr
= (const gchar
*) long_ptr
;
146 CHECK_BYTE(char_ptr
);
147 CHECK_BYTE(char_ptr
);
148 CHECK_BYTE(char_ptr
);
149 CHECK_BYTE(char_ptr
);
150 if (sizeof(gulong
) > 4) {
152 CHECK_BYTE(char_ptr
);
153 CHECK_BYTE(char_ptr
);
154 CHECK_BYTE(char_ptr
);
155 CHECK_BYTE(char_ptr
);
157 } else if (long_int
& mask
)
166 static gboolean
have_broken_filenames(void)
168 static gboolean initialized
= FALSE
;
169 static gboolean broken
;
174 broken
= getenv("G_BROKEN_FILENAMES") != NULL
;
181 /* The returned string should not be freed. */
182 const gchar
*filename_to_utf8(const gchar
* str
)
184 static GStaticPrivate result_key
= G_STATIC_PRIVATE_INIT
;
188 if (!have_broken_filenames() || g_get_charset(NULL
) || str_is_ascii(str
))
191 result
= g_static_private_get(&result_key
);
192 if (result
== NULL
) {
193 /* First time in this thread. */
194 result
= g_new(gchar
*, 1);
195 g_static_private_set(&result_key
, result
, g_free
);
201 *result
= g_filename_to_utf8(str
, -1, NULL
, NULL
, &err
);
203 g_printerr("%s\n", err
->message
);
214 #define CHECK_UNDERSCORE(ptr) \
217 return nb_underscores; \
225 gint
count_underscores(const gchar
* str
)
227 gint nb_underscores
= 0;
228 gulong himagic
, lomagic
, magic_bits
;
229 gulong
*long_ptr
, long_int
;
236 else if (*str
== '\0')
237 return nb_underscores
;
239 else if (((gulong
) str
& (sizeof(gulong
) - 1)) == 0)
246 long_ptr
= (gulong
*) str
;
247 init_magic(&himagic
, &lomagic
, &magic_bits
);
251 if (sizeof(gulong
) > 4)
253 mask
= ((mask
<< 16) << 16) | mask
;
256 long_int
= *long_ptr
;
257 if (HAS_ZERO(long_int
)) {
258 /* A '\0' has been detected. */
259 const gchar
*char_ptr
= (const gchar
*) long_ptr
;
260 CHECK_UNDERSCORE(char_ptr
);
261 CHECK_UNDERSCORE(char_ptr
);
262 CHECK_UNDERSCORE(char_ptr
);
263 CHECK_UNDERSCORE(char_ptr
);
264 if (sizeof(gulong
) > 4) {
266 CHECK_UNDERSCORE(char_ptr
);
267 CHECK_UNDERSCORE(char_ptr
);
268 CHECK_UNDERSCORE(char_ptr
);
269 CHECK_UNDERSCORE(char_ptr
);
274 masked
= long_int
^ mask
;
275 if (HAS_ZERO(masked
)) {
276 /* A '_' has been detected. */
277 gchar
*char_ptr
= (gchar
*) & mask
;
278 nb_underscores
+= (*char_ptr
++ == 0);
279 nb_underscores
+= (*char_ptr
++ == 0);
280 nb_underscores
+= (*char_ptr
++ == 0);
281 nb_underscores
+= (*char_ptr
++ == 0);
282 if (sizeof(gulong
) > 4) {
284 nb_underscores
+= (*char_ptr
++ == 0);
285 nb_underscores
+= (*char_ptr
++ == 0);
286 nb_underscores
+= (*char_ptr
++ == 0);
287 nb_underscores
+= (*char_ptr
++ == 0);
295 return nb_underscores
;
299 G_INLINE_FUNC gboolean
starts_dotslash(const gchar
* str
)
301 return str
[0] == '.' && str
[1] == '/';
304 static gboolean
is_clean(const gchar
* filename
)
306 if (filename
[0] != '/' && !starts_dotslash(filename
))
309 return strstr(filename
, "//") == NULL
&& strstr(filename
, "/./") == NULL
;
312 static gint
count_errors(gchar
* str
, gint
* len
)
317 * "path1/./path2" is replaced with "path1///path2".
318 * "./" and "//" are counted.
320 while (*str
!= '\0') {
341 static gchar
*remove_double_slash(gchar
* str
, gint new_len
, gboolean absolute
)
343 gchar
*new, *new_ptr
;
345 if (absolute
|| starts_dotslash(str
)) {
346 new = g_new(gchar
, new_len
);
349 new = g_new(gchar
, new_len
+ 2);
355 while (*str
!= '\0') {
356 if (str
[0] != '/' || str
[1] != '/') {
369 gchar
*clean_filename(const gchar
* filename
)
371 gchar
*orig_copy
, *copy
, *new;
372 gint count
= 0, len
= 0;
373 gboolean absolute
, finished
= FALSE
;
375 if (is_clean(filename
))
376 return g_strdup(filename
);
378 /* We work on a copy as we may modify it. */
379 orig_copy
= copy
= g_strdup(filename
);
381 absolute
= (copy
[0] == '/');
382 while (finished
== FALSE
) {
401 /* We keep the last leading '/' for absolute filenames. */
403 else if (starts_dotslash(orig_copy
))
404 /* We keep the last leading './' for relative filenames. */
407 /* Count the '//' and '/./'. */
408 count
= count_errors(copy
, &len
);
411 /* The filename was clean. */
414 if (orig_copy
!= copy
)
415 /* The filename started with "//". */
416 g_memmove(orig_copy
, copy
, len
+ 1);
421 if (starts_dotslash(orig_copy
)) {
422 g_memmove(orig_copy
, copy
, len
+ 1);
425 /* The relative filename just lacked the "./" in the beginning. */
426 new = g_strconcat("./", copy
, NULL
);
433 /* We now have to remove the "//". */
434 new = remove_double_slash(copy
, len
- count
+ 1, absolute
);
441 gchar
*clean_filename_free(gchar
* filename
)
445 if (filename
[0] == '\0' || is_clean(filename
))
448 clean
= clean_filename(filename
);