Wrong initialization for nb_inserted in the deserialization.
[gliv.git] / src / str_utils.c
blob9becb7918a9d7ddd5809b07003713b1ac114a6a6
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 * See the COPYING file for license information.
18 * Guillaume Chazarain <guichaz@yahoo.fr>
21 /*******************************************
22 * UTF-8, filenames and mnemonics handling *
23 *******************************************/
25 #include <string.h> /* memcpy(), strlen(), strstr() */
26 #include <stdlib.h> /* getenv() */
28 #include "gliv.h"
29 #include "str_utils.h"
31 /* The returned string should not be freed. */
32 const gchar *add_mnemonic(const gchar * str)
34 static GStaticPrivate result_key = G_STATIC_PRIVATE_INIT;
35 static GStaticPrivate size_key = G_STATIC_PRIVATE_INIT;
37 gchar **result = g_static_private_get(&result_key);
38 gint *size = g_static_private_get(&size_key);
39 const gchar *ptr = NULL, *end;
40 gint new_size;
42 if (result == NULL) {
43 /* First time in this thread. */
44 result = g_new(gchar *, 1);
45 size = g_new(gint, 1);
46 g_static_private_set(&result_key, result, g_free);
47 g_static_private_set(&size_key, size, g_free);
48 *result = NULL;
49 *size = 0;
52 for (ptr = str; *ptr != '\0'; ptr = g_utf8_next_char(ptr))
53 if (g_unichar_isalnum(g_utf8_get_char(ptr)))
54 break;
56 end = ptr + strlen(ptr);
58 if (*ptr == '\0')
60 * No g_unichar_isalnum() were found,
61 * we add the underscore in front.
63 ptr = str;
65 /* + 2: '_' and '\0'. */
66 new_size = end - str + 2;
67 if (new_size > *size) {
68 *size = new_size;
69 g_free(*result);
71 *result = g_new(gchar, size);
74 memcpy(*result, str, ptr - str);
75 (*result)[ptr - str] = '_';
76 memcpy(*result + (ptr - str) + 1, ptr, end - ptr + 1);
78 return *result;
82 * The magic to find a '\0' in a long int is taken from the glibc.
83 * See sysdeps/generic/strlen.c in the glibc sources to have the
84 * explanation.
85 * Actually it also finds 0x80, that's why we double check when
86 * HAS_ZERO() finds something.
88 static void init_magic(gulong * himagic, gulong * lomagic, gulong * magic_bits)
90 *himagic = 0x80808080L;
91 *lomagic = 0x01010101L;
92 if (sizeof(gulong) > 4) {
93 /* 64-bit */
94 *himagic = ((*himagic << 16) << 16) | *himagic;
95 *lomagic = ((*lomagic << 16) << 16) | *lomagic;
96 *magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL;
97 } else
98 *magic_bits = 0x7efefeffL;
101 #define HAS_ZERO(longint) ((((longint) + magic_bits) ^ ~longint) & ~magic_bits)
103 #define CHECK_BYTE(ptr) \
104 do { \
105 if (*ptr == '\0') \
106 return TRUE; \
108 if (*ptr >> 7) \
109 return FALSE; \
111 ptr++; \
112 } while (0)
114 static gboolean str_is_ascii(const gchar * str)
116 gulong himagic, lomagic, magic_bits;
117 gulong *long_ptr, long_int;
118 gulong mask;
120 for (;;) {
121 if (*str == '\0')
122 return TRUE;
124 if (*str >> 7)
125 return FALSE;
127 if (((gulong) str & (sizeof(gulong) - 1)) == 0)
128 /* Aligned. */
129 break;
131 str++;
134 long_ptr = (gulong *) str;
135 init_magic(&himagic, &lomagic, &magic_bits);
136 mask = 0x80808080;
137 if (sizeof(gulong) > 4)
138 /* 64-bit */
139 mask = ((mask << 16) << 16) | mask;
141 for (;;) {
142 long_int = *long_ptr;
143 if (HAS_ZERO(long_int)) {
144 /* A '\0' has been detected. */
145 const gchar *char_ptr = (const gchar *) long_ptr;
146 CHECK_BYTE(char_ptr);
147 CHECK_BYTE(char_ptr);
148 CHECK_BYTE(char_ptr);
149 CHECK_BYTE(char_ptr);
150 if (sizeof(gulong) > 4) {
151 /* 64-bit */
152 CHECK_BYTE(char_ptr);
153 CHECK_BYTE(char_ptr);
154 CHECK_BYTE(char_ptr);
155 CHECK_BYTE(char_ptr);
157 } else if (long_int & mask)
158 return FALSE;
160 long_ptr++;
162 return TRUE;
165 /* From glib. */
166 static gboolean have_broken_filenames(void)
168 static gboolean initialized = FALSE;
169 static gboolean broken;
171 if (initialized)
172 return broken;
174 broken = getenv("G_BROKEN_FILENAMES") != NULL;
175 initialized = TRUE;
177 return broken;
181 /* The returned string should not be freed. */
182 const gchar *filename_to_utf8(const gchar * str)
184 static GStaticPrivate result_key = G_STATIC_PRIVATE_INIT;
185 gchar **result;
186 GError *err = NULL;
188 if (!have_broken_filenames() || g_get_charset(NULL) || str_is_ascii(str))
189 return str;
191 result = g_static_private_get(&result_key);
192 if (result == NULL) {
193 /* First time in this thread. */
194 result = g_new(gchar *, 1);
195 g_static_private_set(&result_key, result, g_free);
196 *result = NULL;
199 g_free(*result);
201 *result = g_filename_to_utf8(str, -1, NULL, NULL, &err);
202 if (err != NULL) {
203 g_printerr("%s\n", err->message);
204 g_error_free(err);
206 g_free(*result);
207 *result = NULL;
208 return str;
211 return *result;
214 #define CHECK_UNDERSCORE(ptr) \
215 do { \
216 if (*ptr == '\0') \
217 return nb_underscores; \
219 if (*ptr == '_') \
220 nb_underscores++; \
222 ptr++; \
223 } while (0)
225 gint count_underscores(const gchar * str)
227 gint nb_underscores = 0;
228 gulong himagic, lomagic, magic_bits;
229 gulong *long_ptr, long_int;
230 gulong mask;
232 for (;;) {
233 if (*str == '_')
234 nb_underscores++;
236 else if (*str == '\0')
237 return nb_underscores;
239 else if (((gulong) str & (sizeof(gulong) - 1)) == 0)
240 /* Aligned. */
241 break;
243 str++;
246 long_ptr = (gulong *) str;
247 init_magic(&himagic, &lomagic, &magic_bits);
249 /* '_' == 0x5F */
250 mask = 0x5F5F5F5F;
251 if (sizeof(gulong) > 4)
252 /* 64-bit */
253 mask = ((mask << 16) << 16) | mask;
255 for (;;) {
256 long_int = *long_ptr;
257 if (HAS_ZERO(long_int)) {
258 /* A '\0' has been detected. */
259 const gchar *char_ptr = (const gchar *) long_ptr;
260 CHECK_UNDERSCORE(char_ptr);
261 CHECK_UNDERSCORE(char_ptr);
262 CHECK_UNDERSCORE(char_ptr);
263 CHECK_UNDERSCORE(char_ptr);
264 if (sizeof(gulong) > 4) {
265 /* 64-bit */
266 CHECK_UNDERSCORE(char_ptr);
267 CHECK_UNDERSCORE(char_ptr);
268 CHECK_UNDERSCORE(char_ptr);
269 CHECK_UNDERSCORE(char_ptr);
271 } else {
272 gulong masked;
274 masked = long_int ^ mask;
275 if (HAS_ZERO(masked)) {
276 /* A '_' has been detected. */
277 gchar *char_ptr = (gchar *) & mask;
278 nb_underscores += (*char_ptr++ == 0);
279 nb_underscores += (*char_ptr++ == 0);
280 nb_underscores += (*char_ptr++ == 0);
281 nb_underscores += (*char_ptr++ == 0);
282 if (sizeof(gulong) > 4) {
283 /* 64-bit */
284 nb_underscores += (*char_ptr++ == 0);
285 nb_underscores += (*char_ptr++ == 0);
286 nb_underscores += (*char_ptr++ == 0);
287 nb_underscores += (*char_ptr++ == 0);
292 long_ptr++;
295 return nb_underscores;
299 G_INLINE_FUNC gboolean starts_dotslash(const gchar * str)
301 return str[0] == '.' && str[1] == '/';
304 static gboolean is_clean(const gchar * filename)
306 if (filename[0] != '/' && !starts_dotslash(filename))
307 return FALSE;
309 return strstr(filename, "//") == NULL && strstr(filename, "/./") == NULL;
312 static gint count_errors(gchar * str, gint * len)
314 gint count = 0;
317 * "path1/./path2" is replaced with "path1///path2".
318 * "./" and "//" are counted.
320 while (*str != '\0') {
321 (*len)++;
322 if (str[0] == '/')
323 switch (str[1]) {
324 case '.':
325 if (str[2] == '/') {
326 str[1] = '/';
327 count++;
329 break;
331 case '/':
332 count++;
335 str++;
338 return count;
341 static gchar *remove_double_slash(gchar * str, gint new_len, gboolean absolute)
343 gchar *new, *new_ptr;
345 if (absolute || starts_dotslash(str)) {
346 new = g_new(gchar, new_len);
347 new_ptr = new;
348 } else {
349 new = g_new(gchar, new_len + 2);
350 new[0] = '.';
351 new[1] = '/';
352 new_ptr = new + 2;
355 while (*str != '\0') {
356 if (str[0] != '/' || str[1] != '/') {
357 *new_ptr = *str;
358 new_ptr++;
361 str++;
364 *new_ptr = '\0';
366 return new;
369 gchar *clean_filename(const gchar * filename)
371 gchar *orig_copy, *copy, *new;
372 gint count = 0, len = 0;
373 gboolean absolute, finished = FALSE;
375 if (is_clean(filename))
376 return g_strdup(filename);
378 /* We work on a copy as we may modify it. */
379 orig_copy = copy = g_strdup(filename);
381 absolute = (copy[0] == '/');
382 while (finished == FALSE) {
383 switch (copy[0]) {
384 case '.':
385 if (copy[1] == '/')
386 copy += 2;
387 else
388 finished = TRUE;
389 break;
391 case '/':
392 copy++;
393 break;
395 default:
396 finished = TRUE;
400 if (absolute)
401 /* We keep the last leading '/' for absolute filenames. */
402 copy--;
403 else if (starts_dotslash(orig_copy))
404 /* We keep the last leading './' for relative filenames. */
405 copy -= 2;
407 /* Count the '//' and '/./'. */
408 count = count_errors(copy, &len);
410 if (count == 0) {
411 /* The filename was clean. */
413 if (absolute) {
414 if (orig_copy != copy)
415 /* The filename started with "//". */
416 g_memmove(orig_copy, copy, len + 1);
418 return orig_copy;
421 if (starts_dotslash(orig_copy)) {
422 g_memmove(orig_copy, copy, len + 1);
423 new = orig_copy;
424 } else {
425 /* The relative filename just lacked the "./" in the beginning. */
426 new = g_strconcat("./", copy, NULL);
427 g_free(orig_copy);
430 return new;
433 /* We now have to remove the "//". */
434 new = remove_double_slash(copy, len - count + 1, absolute);
436 g_free(orig_copy);
438 return new;
441 gchar *clean_filename_free(gchar * filename)
443 gchar *clean;
445 if (filename[0] == '\0' || is_clean(filename))
446 clean = filename;
447 else {
448 clean = clean_filename(filename);
449 g_free(filename);
452 return clean;