1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
38 #include <ctype.h> /* For tolower() */
41 /* Needed on BSD/OS X for e.g. strtod_l */
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51 #include "gstrfuncs.h"
54 #include "gprintfint.h"
59 * SECTION:string_utils
60 * @title: String Utility Functions
61 * @short_description: various string-related functions
63 * This section describes a number of utility functions for creating,
64 * duplicating, and manipulating strings.
66 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67 * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
68 * are declared in the header `gprintf.h` which is not included in `glib.h`
69 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
70 * explicitly include `<glib/gprintf.h>` in order to use the GLib
73 * ## String precision pitfalls # {#string-precision}
75 * While you may use the printf() functions to format UTF-8 strings,
76 * notice that the precision of a \%Ns parameter is interpreted
77 * as the number of bytes, not characters to print. On top of that,
78 * the GNU libc implementation of the printf() functions has the
79 * "feature" that it checks that the string given for the \%Ns
80 * parameter consists of a whole number of characters in the current
81 * encoding. So, unless you are sure you are always going to be in an
82 * UTF-8 locale or your know your text is restricted to ASCII, avoid
83 * using \%Ns. If your intention is to format strings for a
84 * certain number of columns, then \%Ns is not a correct solution
85 * anyway, since it fails to take wide characters (see g_unichar_iswide())
88 * Note also that there are various printf() parameters which are platform
89 * dependent. GLib provides platform independent macros for these parameters
90 * which should be used instead. A common example is %G_GUINT64_FORMAT, which
91 * should be used instead of `%llu` or similar parameters for formatting
92 * 64-bit integers. These macros are all named `G_*_FORMAT`; see
93 * [Basic Types][glib-Basic-Types].
100 * Determines whether a character is alphanumeric.
102 * Unlike the standard C library isalnum() function, this only
103 * recognizes standard ASCII letters and ignores the locale,
104 * returning %FALSE for all non-ASCII characters. Also, unlike
105 * the standard library function, this takes a char, not an int,
106 * so don't call it on %EOF, but no need to cast to #guchar before
107 * passing a possibly non-ASCII character in.
109 * Returns: %TRUE if @c is an ASCII alphanumeric character
116 * Determines whether a character is alphabetic (i.e. a letter).
118 * Unlike the standard C library isalpha() function, this only
119 * recognizes standard ASCII letters and ignores the locale,
120 * returning %FALSE for all non-ASCII characters. Also, unlike
121 * the standard library function, this takes a char, not an int,
122 * so don't call it on %EOF, but no need to cast to #guchar before
123 * passing a possibly non-ASCII character in.
125 * Returns: %TRUE if @c is an ASCII alphabetic character
132 * Determines whether a character is a control character.
134 * Unlike the standard C library iscntrl() function, this only
135 * recognizes standard ASCII control characters and ignores the
136 * locale, returning %FALSE for all non-ASCII characters. Also,
137 * unlike the standard library function, this takes a char, not
138 * an int, so don't call it on %EOF, but no need to cast to #guchar
139 * before passing a possibly non-ASCII character in.
141 * Returns: %TRUE if @c is an ASCII control character.
148 * Determines whether a character is digit (0-9).
150 * Unlike the standard C library isdigit() function, this takes
151 * a char, not an int, so don't call it on %EOF, but no need to
152 * cast to #guchar before passing a possibly non-ASCII character in.
154 * Returns: %TRUE if @c is an ASCII digit.
161 * Determines whether a character is a printing character and not a space.
163 * Unlike the standard C library isgraph() function, this only
164 * recognizes standard ASCII characters and ignores the locale,
165 * returning %FALSE for all non-ASCII characters. Also, unlike
166 * the standard library function, this takes a char, not an int,
167 * so don't call it on %EOF, but no need to cast to #guchar before
168 * passing a possibly non-ASCII character in.
170 * Returns: %TRUE if @c is an ASCII printing character other than space.
177 * Determines whether a character is an ASCII lower case letter.
179 * Unlike the standard C library islower() function, this only
180 * recognizes standard ASCII letters and ignores the locale,
181 * returning %FALSE for all non-ASCII characters. Also, unlike
182 * the standard library function, this takes a char, not an int,
183 * so don't call it on %EOF, but no need to worry about casting
184 * to #guchar before passing a possibly non-ASCII character in.
186 * Returns: %TRUE if @c is an ASCII lower case letter
193 * Determines whether a character is a printing character.
195 * Unlike the standard C library isprint() function, this only
196 * recognizes standard ASCII characters and ignores the locale,
197 * returning %FALSE for all non-ASCII characters. Also, unlike
198 * the standard library function, this takes a char, not an int,
199 * so don't call it on %EOF, but no need to cast to #guchar before
200 * passing a possibly non-ASCII character in.
202 * Returns: %TRUE if @c is an ASCII printing character.
209 * Determines whether a character is a punctuation character.
211 * Unlike the standard C library ispunct() function, this only
212 * recognizes standard ASCII letters and ignores the locale,
213 * returning %FALSE for all non-ASCII characters. Also, unlike
214 * the standard library function, this takes a char, not an int,
215 * so don't call it on %EOF, but no need to cast to #guchar before
216 * passing a possibly non-ASCII character in.
218 * Returns: %TRUE if @c is an ASCII punctuation character.
225 * Determines whether a character is a white-space character.
227 * Unlike the standard C library isspace() function, this only
228 * recognizes standard ASCII white-space and ignores the locale,
229 * returning %FALSE for all non-ASCII characters. Also, unlike
230 * the standard library function, this takes a char, not an int,
231 * so don't call it on %EOF, but no need to cast to #guchar before
232 * passing a possibly non-ASCII character in.
234 * Returns: %TRUE if @c is an ASCII white-space character
241 * Determines whether a character is an ASCII upper case letter.
243 * Unlike the standard C library isupper() function, this only
244 * recognizes standard ASCII letters and ignores the locale,
245 * returning %FALSE for all non-ASCII characters. Also, unlike
246 * the standard library function, this takes a char, not an int,
247 * so don't call it on %EOF, but no need to worry about casting
248 * to #guchar before passing a possibly non-ASCII character in.
250 * Returns: %TRUE if @c is an ASCII upper case letter
257 * Determines whether a character is a hexadecimal-digit character.
259 * Unlike the standard C library isxdigit() function, this takes
260 * a char, not an int, so don't call it on %EOF, but no need to
261 * cast to #guchar before passing a possibly non-ASCII character in.
263 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
267 * G_ASCII_DTOSTR_BUF_SIZE:
269 * A good size for a buffer to be passed into g_ascii_dtostr().
270 * It is guaranteed to be enough for all output of that function
271 * on systems with 64bit IEEE-compatible doubles.
273 * The typical usage would be something like:
274 * |[<!-- language="C" -->
275 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
277 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
283 * @string: a string to remove the leading and trailing whitespace from
285 * Removes leading and trailing whitespace from a string.
286 * See g_strchomp() and g_strchug().
294 * The standard delimiters, used in g_strdelimit().
297 static const guint16 ascii_table_data
[256] = {
298 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
299 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
300 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
303 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
305 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
306 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
307 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
310 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
311 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
314 /* the upper 128 are all zeroes */
317 const guint16
* const g_ascii_table
= ascii_table_data
;
319 #if defined (HAVE_NEWLOCALE) && \
320 defined (HAVE_USELOCALE) && \
321 defined (HAVE_STRTOD_L) && \
322 defined (HAVE_STRTOULL_L) && \
323 defined (HAVE_STRTOLL_L)
324 #define USE_XLOCALE 1
331 static gsize initialized
= FALSE
;
332 static locale_t C_locale
= NULL
;
334 if (g_once_init_enter (&initialized
))
336 C_locale
= newlocale (LC_ALL_MASK
, "C", NULL
);
337 g_once_init_leave (&initialized
, TRUE
);
346 * @str: (nullable): the string to duplicate
348 * Duplicates a string. If @str is %NULL it returns %NULL.
349 * The returned string should be freed with g_free()
350 * when no longer needed.
352 * Returns: a newly-allocated copy of @str
355 g_strdup (const gchar
*str
)
362 length
= strlen (str
) + 1;
363 new_str
= g_new (char, length
);
364 memcpy (new_str
, str
, length
);
374 * @mem: the memory to copy.
375 * @byte_size: the number of bytes to copy.
377 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
378 * from @mem. If @mem is %NULL it returns %NULL.
380 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
384 g_memdup (gconstpointer mem
,
389 if (mem
&& byte_size
!= 0)
391 new_mem
= g_malloc (byte_size
);
392 memcpy (new_mem
, mem
, byte_size
);
402 * @str: the string to duplicate
403 * @n: the maximum number of bytes to copy from @str
405 * Duplicates the first @n bytes of a string, returning a newly-allocated
406 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
407 * is less than @n bytes long the buffer is padded with nuls. If @str is
408 * %NULL it returns %NULL. The returned value should be freed when no longer
411 * To copy a number of characters from a UTF-8 encoded string,
412 * use g_utf8_strncpy() instead.
414 * Returns: a newly-allocated buffer containing the first @n bytes
415 * of @str, nul-terminated
418 g_strndup (const gchar
*str
,
425 new_str
= g_new (gchar
, n
+ 1);
426 strncpy (new_str
, str
, n
);
437 * @length: the length of the new string
438 * @fill_char: the byte to fill the string with
440 * Creates a new string @length bytes long filled with @fill_char.
441 * The returned string should be freed when no longer needed.
443 * Returns: a newly-allocated string filled the @fill_char
446 g_strnfill (gsize length
,
451 str
= g_new (gchar
, length
+ 1);
452 memset (str
, (guchar
)fill_char
, length
);
460 * @dest: destination buffer.
461 * @src: source string.
463 * Copies a nul-terminated string into the dest buffer, include the
464 * trailing nul, and return a pointer to the trailing nul byte.
465 * This is useful for concatenating multiple strings together
466 * without having to repeatedly scan for the end.
468 * Returns: a pointer to trailing nul byte.
471 g_stpcpy (gchar
*dest
,
475 g_return_val_if_fail (dest
!= NULL
, NULL
);
476 g_return_val_if_fail (src
!= NULL
, NULL
);
477 return stpcpy (dest
, src
);
480 const gchar
*s
= src
;
482 g_return_val_if_fail (dest
!= NULL
, NULL
);
483 g_return_val_if_fail (src
!= NULL
, NULL
);
486 while (*s
++ != '\0');
494 * @format: a standard printf() format string, but notice
495 * [string precision pitfalls][string-precision]
496 * @args: the list of parameters to insert into the format string
498 * Similar to the standard C vsprintf() function but safer, since it
499 * calculates the maximum space required and allocates memory to hold
500 * the result. The returned string should be freed with g_free() when
503 * See also g_vasprintf(), which offers the same functionality, but
504 * additionally returns the length of the allocated string.
506 * Returns: a newly-allocated string holding the result
509 g_strdup_vprintf (const gchar
*format
,
512 gchar
*string
= NULL
;
514 g_vasprintf (&string
, format
, args
);
521 * @format: a standard printf() format string, but notice
522 * [string precision pitfalls][string-precision]
523 * @...: the parameters to insert into the format string
525 * Similar to the standard C sprintf() function but safer, since it
526 * calculates the maximum space required and allocates memory to hold
527 * the result. The returned string should be freed with g_free() when no
530 * Returns: a newly-allocated string holding the result
533 g_strdup_printf (const gchar
*format
,
539 va_start (args
, format
);
540 buffer
= g_strdup_vprintf (format
, args
);
548 * @string1: the first string to add, which must not be %NULL
549 * @...: a %NULL-terminated list of strings to append to the string
551 * Concatenates all of the given strings into one long string. The
552 * returned string should be freed with g_free() when no longer needed.
554 * The variable argument list must end with %NULL. If you forget the %NULL,
555 * g_strconcat() will start appending random memory junk to your string.
557 * Note that this function is usually not the right function to use to
558 * assemble a translated message from pieces, since proper translation
559 * often requires the pieces to be reordered.
561 * Returns: a newly-allocated string containing all the string arguments
564 g_strconcat (const gchar
*string1
, ...)
575 l
= 1 + strlen (string1
);
576 va_start (args
, string1
);
577 s
= va_arg (args
, gchar
*);
581 s
= va_arg (args
, gchar
*);
585 concat
= g_new (gchar
, l
);
588 ptr
= g_stpcpy (ptr
, string1
);
589 va_start (args
, string1
);
590 s
= va_arg (args
, gchar
*);
593 ptr
= g_stpcpy (ptr
, s
);
594 s
= va_arg (args
, gchar
*);
603 * @nptr: the string to convert to a numeric value.
604 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
605 * character after the last character used in the conversion.
607 * Converts a string to a #gdouble value.
608 * It calls the standard strtod() function to handle the conversion, but
609 * if the string is not completely converted it attempts the conversion
610 * again with g_ascii_strtod(), and returns the best match.
612 * This function should seldom be used. The normal situation when reading
613 * numbers not for human consumption is to use g_ascii_strtod(). Only when
614 * you know that you must expect both locale formatted and C formatted numbers
615 * should you use this. Make sure that you don't pass strings such as comma
616 * separated lists of values, since the commas may be interpreted as a decimal
617 * point in some locales, causing unexpected results.
619 * Returns: the #gdouble value.
622 g_strtod (const gchar
*nptr
,
630 g_return_val_if_fail (nptr
!= NULL
, 0);
635 val_1
= strtod (nptr
, &fail_pos_1
);
637 if (fail_pos_1
&& fail_pos_1
[0] != 0)
638 val_2
= g_ascii_strtod (nptr
, &fail_pos_2
);
640 if (!fail_pos_1
|| fail_pos_1
[0] == 0 || fail_pos_1
>= fail_pos_2
)
643 *endptr
= fail_pos_1
;
649 *endptr
= fail_pos_2
;
656 * @nptr: the string to convert to a numeric value.
657 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
658 * character after the last character used in the conversion.
660 * Converts a string to a #gdouble value.
662 * This function behaves like the standard strtod() function
663 * does in the C locale. It does this without actually changing
664 * the current locale, since that would not be thread-safe.
665 * A limitation of the implementation is that this function
666 * will still accept localized versions of infinities and NANs.
668 * This function is typically used when reading configuration
669 * files or other non-user input that should be locale independent.
670 * To handle input from the user you should normally use the
671 * locale-sensitive system strtod() function.
673 * To convert from a #gdouble to a string in a locale-insensitive
674 * way, use g_ascii_dtostr().
676 * If the correct value would cause overflow, plus or minus %HUGE_VAL
677 * is returned (according to the sign of the value), and %ERANGE is
678 * stored in %errno. If the correct value would cause underflow,
679 * zero is returned and %ERANGE is stored in %errno.
681 * This function resets %errno before calling strtod() so that
682 * you can reliably detect overflow and underflow.
684 * Returns: the #gdouble value.
687 g_ascii_strtod (const gchar
*nptr
,
692 g_return_val_if_fail (nptr
!= NULL
, 0);
696 return strtod_l (nptr
, endptr
, get_C_locale ());
703 struct lconv
*locale_data
;
705 const char *decimal_point
;
706 int decimal_point_len
;
707 const char *p
, *decimal_point_pos
;
708 const char *end
= NULL
; /* Silence gcc */
711 g_return_val_if_fail (nptr
!= NULL
, 0);
716 locale_data
= localeconv ();
717 decimal_point
= locale_data
->decimal_point
;
718 decimal_point_len
= strlen (decimal_point
);
721 decimal_point_len
= 1;
724 g_assert (decimal_point_len
!= 0);
726 decimal_point_pos
= NULL
;
729 if (decimal_point
[0] != '.' ||
730 decimal_point
[1] != 0)
733 /* Skip leading space */
734 while (g_ascii_isspace (*p
))
737 /* Skip leading optional sign */
738 if (*p
== '+' || *p
== '-')
742 (p
[1] == 'x' || p
[1] == 'X'))
745 /* HEX - find the (optional) decimal point */
747 while (g_ascii_isxdigit (*p
))
751 decimal_point_pos
= p
++;
753 while (g_ascii_isxdigit (*p
))
756 if (*p
== 'p' || *p
== 'P')
758 if (*p
== '+' || *p
== '-')
760 while (g_ascii_isdigit (*p
))
765 else if (g_ascii_isdigit (*p
) || *p
== '.')
767 while (g_ascii_isdigit (*p
))
771 decimal_point_pos
= p
++;
773 while (g_ascii_isdigit (*p
))
776 if (*p
== 'e' || *p
== 'E')
778 if (*p
== '+' || *p
== '-')
780 while (g_ascii_isdigit (*p
))
785 /* For the other cases, we need not convert the decimal point */
788 if (decimal_point_pos
)
792 /* We need to convert the '.' to the locale specific decimal point */
793 copy
= g_malloc (end
- nptr
+ 1 + decimal_point_len
);
796 memcpy (c
, nptr
, decimal_point_pos
- nptr
);
797 c
+= decimal_point_pos
- nptr
;
798 memcpy (c
, decimal_point
, decimal_point_len
);
799 c
+= decimal_point_len
;
800 memcpy (c
, decimal_point_pos
+ 1, end
- (decimal_point_pos
+ 1));
801 c
+= end
- (decimal_point_pos
+ 1);
805 val
= strtod (copy
, &fail_pos
);
806 strtod_errno
= errno
;
810 if (fail_pos
- copy
> decimal_point_pos
- nptr
)
811 fail_pos
= (char *)nptr
+ (fail_pos
- copy
) - (decimal_point_len
- 1);
813 fail_pos
= (char *)nptr
+ (fail_pos
- copy
);
823 copy
= g_malloc (end
- (char *)nptr
+ 1);
824 memcpy (copy
, nptr
, end
- nptr
);
825 *(copy
+ (end
- (char *)nptr
)) = 0;
828 val
= strtod (copy
, &fail_pos
);
829 strtod_errno
= errno
;
833 fail_pos
= (char *)nptr
+ (fail_pos
- copy
);
841 val
= strtod (nptr
, &fail_pos
);
842 strtod_errno
= errno
;
848 errno
= strtod_errno
;
857 * @buffer: A buffer to place the resulting string in
858 * @buf_len: The length of the buffer.
859 * @d: The #gdouble to convert
861 * Converts a #gdouble to a string, using the '.' as
864 * This function generates enough precision that converting
865 * the string back using g_ascii_strtod() gives the same machine-number
866 * (on machines with IEEE compatible 64bit doubles). It is
867 * guaranteed that the size of the resulting string will never
868 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
869 * nul character, which is always added.
871 * Returns: The pointer to the buffer with the converted string.
874 g_ascii_dtostr (gchar
*buffer
,
878 return g_ascii_formatd (buffer
, buf_len
, "%.17g", d
);
881 #pragma GCC diagnostic push
882 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
886 * @buffer: A buffer to place the resulting string in
887 * @buf_len: The length of the buffer.
888 * @format: The printf()-style format to use for the
889 * code to use for converting.
890 * @d: The #gdouble to convert
892 * Converts a #gdouble to a string, using the '.' as
893 * decimal point. To format the number you pass in
894 * a printf()-style format string. Allowed conversion
895 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
897 * The returned buffer is guaranteed to be nul-terminated.
899 * If you just want to want to serialize the value into a
900 * string, use g_ascii_dtostr().
902 * Returns: The pointer to the buffer with the converted string.
905 g_ascii_formatd (gchar
*buffer
,
913 old_locale
= uselocale (get_C_locale ());
914 _g_snprintf (buffer
, buf_len
, format
, d
);
915 uselocale (old_locale
);
920 struct lconv
*locale_data
;
922 const char *decimal_point
;
923 int decimal_point_len
;
928 g_return_val_if_fail (buffer
!= NULL
, NULL
);
929 g_return_val_if_fail (format
[0] == '%', NULL
);
930 g_return_val_if_fail (strpbrk (format
+ 1, "'l%") == NULL
, NULL
);
932 format_char
= format
[strlen (format
) - 1];
934 g_return_val_if_fail (format_char
== 'e' || format_char
== 'E' ||
935 format_char
== 'f' || format_char
== 'F' ||
936 format_char
== 'g' || format_char
== 'G',
939 if (format
[0] != '%')
942 if (strpbrk (format
+ 1, "'l%"))
945 if (!(format_char
== 'e' || format_char
== 'E' ||
946 format_char
== 'f' || format_char
== 'F' ||
947 format_char
== 'g' || format_char
== 'G'))
950 _g_snprintf (buffer
, buf_len
, format
, d
);
953 locale_data
= localeconv ();
954 decimal_point
= locale_data
->decimal_point
;
955 decimal_point_len
= strlen (decimal_point
);
958 decimal_point_len
= 1;
961 g_assert (decimal_point_len
!= 0);
963 if (decimal_point
[0] != '.' ||
964 decimal_point
[1] != 0)
968 while (g_ascii_isspace (*p
))
971 if (*p
== '+' || *p
== '-')
974 while (isdigit ((guchar
)*p
))
977 if (strncmp (p
, decimal_point
, decimal_point_len
) == 0)
981 if (decimal_point_len
> 1)
983 rest_len
= strlen (p
+ (decimal_point_len
-1));
984 memmove (p
, p
+ (decimal_point_len
-1), rest_len
);
993 #pragma GCC diagnostic pop
995 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
996 (c) == '\r' || (c) == '\t' || (c) == '\v')
997 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
998 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
999 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1000 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1001 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1006 g_parse_long_long (const gchar
*nptr
,
1007 const gchar
**endptr
,
1011 /* this code is based on on the strtol(3) code from GNU libc released under
1012 * the GNU Lesser General Public License.
1014 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1015 * Free Software Foundation, Inc.
1021 const gchar
*s
, *save
;
1024 g_return_val_if_fail (nptr
!= NULL
, 0);
1027 if (base
== 1 || base
> 36)
1037 /* Skip white space. */
1038 while (ISSPACE (*s
))
1041 if (G_UNLIKELY (!*s
))
1044 /* Check for a sign. */
1053 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1056 if ((base
== 0 || base
== 16) && TOUPPER (s
[1]) == 'X')
1067 /* Save the pointer so we can check later if anything happened. */
1069 cutoff
= G_MAXUINT64
/ base
;
1070 cutlim
= G_MAXUINT64
% base
;
1077 if (c
>= '0' && c
<= '9')
1079 else if (ISALPHA (c
))
1080 c
= TOUPPER (c
) - 'A' + 10;
1085 /* Check for overflow. */
1086 if (ui64
> cutoff
|| (ui64
== cutoff
&& c
> cutlim
))
1095 /* Check if anything actually happened. */
1099 /* Store in ENDPTR the address of one character
1100 past the last character we converted. */
1104 if (G_UNLIKELY (overflow
))
1113 /* We must handle a special case here: the base is 0 or 16 and the
1114 first two characters are '0' and 'x', but the rest are no
1115 hexadecimal digits. This is no error case. We return 0 and
1116 ENDPTR points to the `x`. */
1119 if (save
- nptr
>= 2 && TOUPPER (save
[-1]) == 'X'
1121 *endptr
= &save
[-1];
1123 /* There was no number to convert. */
1128 #endif /* !USE_XLOCALE */
1132 * @nptr: the string to convert to a numeric value.
1133 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1134 * character after the last character used in the conversion.
1135 * @base: to be used for the conversion, 2..36 or 0
1137 * Converts a string to a #guint64 value.
1138 * This function behaves like the standard strtoull() function
1139 * does in the C locale. It does this without actually
1140 * changing the current locale, since that would not be
1143 * This function is typically used when reading configuration
1144 * files or other non-user input that should be locale independent.
1145 * To handle input from the user you should normally use the
1146 * locale-sensitive system strtoull() function.
1148 * If the correct value would cause overflow, %G_MAXUINT64
1149 * is returned, and `ERANGE` is stored in `errno`.
1150 * If the base is outside the valid range, zero is returned, and
1151 * `EINVAL` is stored in `errno`.
1152 * If the string conversion fails, zero is returned, and @endptr returns
1153 * @nptr (if @endptr is non-%NULL).
1155 * Returns: the #guint64 value or zero on error.
1160 g_ascii_strtoull (const gchar
*nptr
,
1165 return strtoull_l (nptr
, endptr
, base
, get_C_locale ());
1170 result
= g_parse_long_long (nptr
, (const gchar
**) endptr
, base
, &negative
);
1172 /* Return the result of the appropriate sign. */
1173 return negative
? -result
: result
;
1179 * @nptr: the string to convert to a numeric value.
1180 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1181 * character after the last character used in the conversion.
1182 * @base: to be used for the conversion, 2..36 or 0
1184 * Converts a string to a #gint64 value.
1185 * This function behaves like the standard strtoll() function
1186 * does in the C locale. It does this without actually
1187 * changing the current locale, since that would not be
1190 * This function is typically used when reading configuration
1191 * files or other non-user input that should be locale independent.
1192 * To handle input from the user you should normally use the
1193 * locale-sensitive system strtoll() function.
1195 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1196 * is returned, and `ERANGE` is stored in `errno`.
1197 * If the base is outside the valid range, zero is returned, and
1198 * `EINVAL` is stored in `errno`. If the
1199 * string conversion fails, zero is returned, and @endptr returns @nptr
1200 * (if @endptr is non-%NULL).
1202 * Returns: the #gint64 value or zero on error.
1207 g_ascii_strtoll (const gchar
*nptr
,
1212 return strtoll_l (nptr
, endptr
, base
, get_C_locale ());
1217 result
= g_parse_long_long (nptr
, (const gchar
**) endptr
, base
, &negative
);
1219 if (negative
&& result
> (guint64
) G_MININT64
)
1224 else if (!negative
&& result
> (guint64
) G_MAXINT64
)
1230 return - (gint64
) result
;
1232 return (gint64
) result
;
1238 * @errnum: the system error number. See the standard C %errno
1241 * Returns a string corresponding to the given error code, e.g. "no
1242 * such process". Unlike strerror(), this always returns a string in
1243 * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1244 * the lifetime of the process.
1246 * Note that the string may be translated according to the current locale.
1248 * The value of %errno will not be changed by this function.
1250 * Returns: a UTF-8 string describing the error code. If the error code
1251 * is unknown, it returns a string like "unknown error (<code>)".
1254 g_strerror (gint errnum
)
1256 static GHashTable
*errors
;
1257 G_LOCK_DEFINE_STATIC (errors
);
1259 gint saved_errno
= errno
;
1263 msg
= g_hash_table_lookup (errors
, GINT_TO_POINTER (errnum
));
1266 errors
= g_hash_table_new (NULL
, NULL
);
1273 GError
*error
= NULL
;
1275 #if defined(G_OS_WIN32)
1276 strerror_s (buf
, sizeof (buf
), errnum
);
1278 #elif defined(HAVE_STRERROR_R)
1279 /* Match the condition in strerror_r(3) for glibc */
1280 # if defined(__GLIBC__) && !((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE)
1281 msg
= strerror_r (errnum
, buf
, sizeof (buf
));
1283 strerror_r (errnum
, buf
, sizeof (buf
));
1285 # endif /* HAVE_STRERROR_R */
1287 g_strlcpy (buf
, strerror (errnum
), sizeof (buf
));
1290 if (!g_get_charset (NULL
))
1292 msg
= g_locale_to_utf8 (msg
, -1, NULL
, NULL
, &error
);
1294 g_print ("%s\n", error
->message
);
1296 else if (msg
== (const gchar
*)buf
)
1297 msg
= g_strdup (buf
);
1299 g_hash_table_insert (errors
, GINT_TO_POINTER (errnum
), (char *) msg
);
1303 errno
= saved_errno
;
1309 * @signum: the signal number. See the `signal` documentation
1311 * Returns a string describing the given signal, e.g. "Segmentation fault".
1312 * You should use this function in preference to strsignal(), because it
1313 * returns a string in UTF-8 encoding, and since not all platforms support
1314 * the strsignal() function.
1316 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1317 * it returns "unknown signal (<signum>)".
1320 g_strsignal (gint signum
)
1326 msg
= tofree
= NULL
;
1328 #ifdef HAVE_STRSIGNAL
1329 msg
= strsignal (signum
);
1330 if (!g_get_charset (NULL
))
1331 msg
= tofree
= g_locale_to_utf8 (msg
, -1, NULL
, NULL
, NULL
);
1335 msg
= tofree
= g_strdup_printf ("unknown signal (%d)", signum
);
1336 ret
= g_intern_string (msg
);
1342 /* Functions g_strlcpy and g_strlcat were originally developed by
1343 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1344 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1345 * for more information.
1349 /* Use the native ones, if available; they might be implemented in assembly */
1351 g_strlcpy (gchar
*dest
,
1355 g_return_val_if_fail (dest
!= NULL
, 0);
1356 g_return_val_if_fail (src
!= NULL
, 0);
1358 return strlcpy (dest
, src
, dest_size
);
1362 g_strlcat (gchar
*dest
,
1366 g_return_val_if_fail (dest
!= NULL
, 0);
1367 g_return_val_if_fail (src
!= NULL
, 0);
1369 return strlcat (dest
, src
, dest_size
);
1372 #else /* ! HAVE_STRLCPY */
1375 * @dest: destination buffer
1376 * @src: source buffer
1377 * @dest_size: length of @dest in bytes
1379 * Portability wrapper that calls strlcpy() on systems which have it,
1380 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1381 * guaranteed to be nul-terminated; @src must be nul-terminated;
1382 * @dest_size is the buffer size, not the number of bytes to copy.
1384 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1385 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1386 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1387 * returns the size of the attempted result, strlen (src), so if
1388 * @retval >= @dest_size, truncation occurred.
1390 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1391 * but if you really want to avoid screwups, g_strdup() is an even better
1394 * Returns: length of @src
1397 g_strlcpy (gchar
*dest
,
1402 const gchar
*s
= src
;
1403 gsize n
= dest_size
;
1405 g_return_val_if_fail (dest
!= NULL
, 0);
1406 g_return_val_if_fail (src
!= NULL
, 0);
1408 /* Copy as many bytes as will fit */
1409 if (n
!= 0 && --n
!= 0)
1420 /* If not enough room in dest, add NUL and traverse rest of src */
1429 return s
- src
- 1; /* count does not include NUL */
1434 * @dest: destination buffer, already containing one nul-terminated string
1435 * @src: source buffer
1436 * @dest_size: length of @dest buffer in bytes (not length of existing string
1439 * Portability wrapper that calls strlcat() on systems which have it,
1440 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1441 * guaranteeing nul-termination for @dest. The total size of @dest won't
1442 * exceed @dest_size.
1444 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1445 * @dest_size is the full size of dest, not the space left over. This
1446 * function does not allocate memory. It always nul-terminates (unless
1447 * @dest_size == 0 or there were no nul characters in the @dest_size
1448 * characters of dest to start with).
1450 * Caveat: this is supposedly a more secure alternative to strcat() or
1451 * strncat(), but for real security g_strconcat() is harder to mess up.
1453 * Returns: size of attempted result, which is MIN (dest_size, strlen
1454 * (original dest)) + strlen (src), so if retval >= dest_size,
1455 * truncation occurred.
1458 g_strlcat (gchar
*dest
,
1463 const gchar
*s
= src
;
1464 gsize bytes_left
= dest_size
;
1465 gsize dlength
; /* Logically, MIN (strlen (d), dest_size) */
1467 g_return_val_if_fail (dest
!= NULL
, 0);
1468 g_return_val_if_fail (src
!= NULL
, 0);
1470 /* Find the end of dst and adjust bytes left but don't go past end */
1471 while (*d
!= 0 && bytes_left
-- != 0)
1474 bytes_left
= dest_size
- dlength
;
1476 if (bytes_left
== 0)
1477 return dlength
+ strlen (s
);
1481 if (bytes_left
!= 1)
1490 return dlength
+ (s
- src
); /* count does not include NUL */
1492 #endif /* ! HAVE_STRLCPY */
1497 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1499 * Converts all upper case ASCII letters to lower case ASCII letters.
1501 * Returns: a newly-allocated string, with all the upper case
1502 * characters in @str converted to lower case, with semantics that
1503 * exactly match g_ascii_tolower(). (Note that this is unlike the
1504 * old g_strdown(), which modified the string in place.)
1507 g_ascii_strdown (const gchar
*str
,
1512 g_return_val_if_fail (str
!= NULL
, NULL
);
1517 result
= g_strndup (str
, len
);
1518 for (s
= result
; *s
; s
++)
1519 *s
= g_ascii_tolower (*s
);
1527 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1529 * Converts all lower case ASCII letters to upper case ASCII letters.
1531 * Returns: a newly allocated string, with all the lower case
1532 * characters in @str converted to upper case, with semantics that
1533 * exactly match g_ascii_toupper(). (Note that this is unlike the
1534 * old g_strup(), which modified the string in place.)
1537 g_ascii_strup (const gchar
*str
,
1542 g_return_val_if_fail (str
!= NULL
, NULL
);
1547 result
= g_strndup (str
, len
);
1548 for (s
= result
; *s
; s
++)
1549 *s
= g_ascii_toupper (*s
);
1558 * Determines if a string is pure ASCII. A string is pure ASCII if it
1559 * contains no bytes with the high bit set.
1561 * Returns: %TRUE if @str is ASCII
1566 g_str_is_ascii (const gchar
*str
)
1570 for (i
= 0; str
[i
]; i
++)
1579 * @string: the string to convert.
1581 * Converts a string to lower case.
1583 * Returns: the string
1585 * Deprecated:2.2: This function is totally broken for the reasons discussed
1586 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1590 g_strdown (gchar
*string
)
1594 g_return_val_if_fail (string
!= NULL
, NULL
);
1596 s
= (guchar
*) string
;
1605 return (gchar
*) string
;
1610 * @string: the string to convert
1612 * Converts a string to upper case.
1614 * Returns: the string
1616 * Deprecated:2.2: This function is totally broken for the reasons
1617 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1618 * or g_utf8_strup() instead.
1621 g_strup (gchar
*string
)
1625 g_return_val_if_fail (string
!= NULL
, NULL
);
1627 s
= (guchar
*) string
;
1636 return (gchar
*) string
;
1641 * @string: the string to reverse
1643 * Reverses all of the bytes in a string. For example,
1644 * `g_strreverse ("abcdef")` will result in "fedcba".
1646 * Note that g_strreverse() doesn't work on UTF-8 strings
1647 * containing multibyte characters. For that purpose, use
1648 * g_utf8_strreverse().
1650 * Returns: the same pointer passed in as @string
1653 g_strreverse (gchar
*string
)
1655 g_return_val_if_fail (string
!= NULL
, NULL
);
1662 t
= string
+ strlen (string
) - 1;
1683 * Convert a character to ASCII lower case.
1685 * Unlike the standard C library tolower() function, this only
1686 * recognizes standard ASCII letters and ignores the locale, returning
1687 * all non-ASCII characters unchanged, even if they are lower case
1688 * letters in a particular character set. Also unlike the standard
1689 * library function, this takes and returns a char, not an int, so
1690 * don't call it on %EOF but no need to worry about casting to #guchar
1691 * before passing a possibly non-ASCII character in.
1693 * Returns: the result of converting @c to lower case. If @c is
1694 * not an ASCII upper case letter, @c is returned unchanged.
1697 g_ascii_tolower (gchar c
)
1699 return g_ascii_isupper (c
) ? c
- 'A' + 'a' : c
;
1706 * Convert a character to ASCII upper case.
1708 * Unlike the standard C library toupper() function, this only
1709 * recognizes standard ASCII letters and ignores the locale, returning
1710 * all non-ASCII characters unchanged, even if they are upper case
1711 * letters in a particular character set. Also unlike the standard
1712 * library function, this takes and returns a char, not an int, so
1713 * don't call it on %EOF but no need to worry about casting to #guchar
1714 * before passing a possibly non-ASCII character in.
1716 * Returns: the result of converting @c to upper case. If @c is not
1717 * an ASCII lower case letter, @c is returned unchanged.
1720 g_ascii_toupper (gchar c
)
1722 return g_ascii_islower (c
) ? c
- 'a' + 'A' : c
;
1726 * g_ascii_digit_value:
1727 * @c: an ASCII character
1729 * Determines the numeric value of a character as a decimal digit.
1730 * Differs from g_unichar_digit_value() because it takes a char, so
1731 * there's no worry about sign extension if characters are signed.
1733 * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1734 * its numeric value. Otherwise, -1.
1737 g_ascii_digit_value (gchar c
)
1739 if (g_ascii_isdigit (c
))
1745 * g_ascii_xdigit_value:
1746 * @c: an ASCII character.
1748 * Determines the numeric value of a character as a hexidecimal
1749 * digit. Differs from g_unichar_xdigit_value() because it takes
1750 * a char, so there's no worry about sign extension if characters
1753 * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1754 * its numeric value. Otherwise, -1.
1757 g_ascii_xdigit_value (gchar c
)
1759 if (c
>= 'A' && c
<= 'F')
1760 return c
- 'A' + 10;
1761 if (c
>= 'a' && c
<= 'f')
1762 return c
- 'a' + 10;
1763 return g_ascii_digit_value (c
);
1767 * g_ascii_strcasecmp:
1768 * @s1: string to compare with @s2
1769 * @s2: string to compare with @s1
1771 * Compare two strings, ignoring the case of ASCII characters.
1773 * Unlike the BSD strcasecmp() function, this only recognizes standard
1774 * ASCII letters and ignores the locale, treating all non-ASCII
1775 * bytes as if they are not letters.
1777 * This function should be used only on strings that are known to be
1778 * in encodings where the bytes corresponding to ASCII letters always
1779 * represent themselves. This includes UTF-8 and the ISO-8859-*
1780 * charsets, but not for instance double-byte encodings like the
1781 * Windows Codepage 932, where the trailing bytes of double-byte
1782 * characters include all ASCII letters. If you compare two CP932
1783 * strings using this function, you will get false matches.
1785 * Both @s1 and @s2 must be non-%NULL.
1787 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1788 * or a positive value if @s1 > @s2.
1791 g_ascii_strcasecmp (const gchar
*s1
,
1796 g_return_val_if_fail (s1
!= NULL
, 0);
1797 g_return_val_if_fail (s2
!= NULL
, 0);
1801 c1
= (gint
)(guchar
) TOLOWER (*s1
);
1802 c2
= (gint
)(guchar
) TOLOWER (*s2
);
1808 return (((gint
)(guchar
) *s1
) - ((gint
)(guchar
) *s2
));
1812 * g_ascii_strncasecmp:
1813 * @s1: string to compare with @s2
1814 * @s2: string to compare with @s1
1815 * @n: number of characters to compare
1817 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1818 * characters after the first @n in each string.
1820 * Unlike the BSD strcasecmp() function, this only recognizes standard
1821 * ASCII letters and ignores the locale, treating all non-ASCII
1822 * characters as if they are not letters.
1824 * The same warning as in g_ascii_strcasecmp() applies: Use this
1825 * function only on strings known to be in encodings where bytes
1826 * corresponding to ASCII letters always represent themselves.
1828 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1829 * or a positive value if @s1 > @s2.
1832 g_ascii_strncasecmp (const gchar
*s1
,
1838 g_return_val_if_fail (s1
!= NULL
, 0);
1839 g_return_val_if_fail (s2
!= NULL
, 0);
1841 while (n
&& *s1
&& *s2
)
1844 c1
= (gint
)(guchar
) TOLOWER (*s1
);
1845 c2
= (gint
)(guchar
) TOLOWER (*s2
);
1852 return (((gint
) (guchar
) *s1
) - ((gint
) (guchar
) *s2
));
1860 * @s2: a string to compare with @s1
1862 * A case-insensitive string comparison, corresponding to the standard
1863 * strcasecmp() function on platforms which support it.
1865 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1866 * or a positive value if @s1 > @s2.
1868 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1869 * function is deprecated and how to replace it.
1872 g_strcasecmp (const gchar
*s1
,
1875 #ifdef HAVE_STRCASECMP
1876 g_return_val_if_fail (s1
!= NULL
, 0);
1877 g_return_val_if_fail (s2
!= NULL
, 0);
1879 return strcasecmp (s1
, s2
);
1883 g_return_val_if_fail (s1
!= NULL
, 0);
1884 g_return_val_if_fail (s2
!= NULL
, 0);
1888 /* According to A. Cox, some platforms have islower's that
1889 * don't work right on non-uppercase
1891 c1
= isupper ((guchar
)*s1
) ? tolower ((guchar
)*s1
) : *s1
;
1892 c2
= isupper ((guchar
)*s2
) ? tolower ((guchar
)*s2
) : *s2
;
1898 return (((gint
)(guchar
) *s1
) - ((gint
)(guchar
) *s2
));
1905 * @s2: a string to compare with @s1
1906 * @n: the maximum number of characters to compare
1908 * A case-insensitive string comparison, corresponding to the standard
1909 * strncasecmp() function on platforms which support it. It is similar
1910 * to g_strcasecmp() except it only compares the first @n characters of
1913 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1914 * or a positive value if @s1 > @s2.
1916 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1917 * the comparison by calling toupper()/tolower(). These functions
1918 * are locale-specific and operate on single bytes. However, it is
1919 * impossible to handle things correctly from an internationalization
1920 * standpoint by operating on bytes, since characters may be multibyte.
1921 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1922 * ASCII, since it is locale-sensitive, and it's broken if your string
1923 * is localized, since it doesn't work on many encodings at all,
1924 * including UTF-8, EUC-JP, etc.
1926 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1927 * which only works on ASCII and is not locale-sensitive, and
1928 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1929 * which is good for case-insensitive sorting of UTF-8.
1932 g_strncasecmp (const gchar
*s1
,
1936 #ifdef HAVE_STRNCASECMP
1937 return strncasecmp (s1
, s2
, n
);
1941 g_return_val_if_fail (s1
!= NULL
, 0);
1942 g_return_val_if_fail (s2
!= NULL
, 0);
1944 while (n
&& *s1
&& *s2
)
1947 /* According to A. Cox, some platforms have islower's that
1948 * don't work right on non-uppercase
1950 c1
= isupper ((guchar
)*s1
) ? tolower ((guchar
)*s1
) : *s1
;
1951 c2
= isupper ((guchar
)*s2
) ? tolower ((guchar
)*s2
) : *s2
;
1958 return (((gint
) (guchar
) *s1
) - ((gint
) (guchar
) *s2
));
1966 * @string: the string to convert
1967 * @delimiters: (nullable): a string containing the current delimiters,
1968 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1969 * @new_delimiter: the new delimiter character
1971 * Converts any delimiter characters in @string to @new_delimiter.
1972 * Any characters in @string which are found in @delimiters are
1973 * changed to the @new_delimiter character. Modifies @string in place,
1974 * and returns @string itself, not a copy. The return value is to
1975 * allow nesting such as
1976 * |[<!-- language="C" -->
1977 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
1983 g_strdelimit (gchar
*string
,
1984 const gchar
*delimiters
,
1989 g_return_val_if_fail (string
!= NULL
, NULL
);
1992 delimiters
= G_STR_DELIMITERS
;
1994 for (c
= string
; *c
; c
++)
1996 if (strchr (delimiters
, *c
))
2005 * @string: a nul-terminated array of bytes
2006 * @valid_chars: bytes permitted in @string
2007 * @substitutor: replacement character for disallowed bytes
2009 * For each character in @string, if the character is not in @valid_chars,
2010 * replaces the character with @substitutor. Modifies @string in place,
2011 * and return @string itself, not a copy. The return value is to allow
2013 * |[<!-- language="C" -->
2014 * g_ascii_strup (g_strcanon (str, "abc", '?'))
2020 g_strcanon (gchar
*string
,
2021 const gchar
*valid_chars
,
2026 g_return_val_if_fail (string
!= NULL
, NULL
);
2027 g_return_val_if_fail (valid_chars
!= NULL
, NULL
);
2029 for (c
= string
; *c
; c
++)
2031 if (!strchr (valid_chars
, *c
))
2040 * @source: a string to compress
2042 * Replaces all escaped characters with their one byte equivalent.
2044 * This function does the reverse conversion of g_strescape().
2046 * Returns: a newly-allocated copy of @source with all escaped
2047 * character compressed
2050 g_strcompress (const gchar
*source
)
2052 const gchar
*p
= source
, *octal
;
2056 g_return_val_if_fail (source
!= NULL
, NULL
);
2058 dest
= g_malloc (strlen (source
) + 1);
2069 g_warning ("g_strcompress: trailing \\");
2071 case '0': case '1': case '2': case '3': case '4':
2072 case '5': case '6': case '7':
2075 while ((p
< octal
+ 3) && (*p
>= '0') && (*p
<= '7'))
2077 *q
= (*q
* 8) + (*p
- '0');
2101 default: /* Also handles \" and \\ */
2118 * @source: a string to escape
2119 * @exceptions: (nullable): a string of characters not to escape in @source
2121 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2122 * and '"' in the string @source by inserting a '\' before
2123 * them. Additionally all characters in the range 0x01-0x1F (everything
2124 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2125 * replaced with a '\' followed by their octal representation.
2126 * Characters supplied in @exceptions are not escaped.
2128 * g_strcompress() does the reverse conversion.
2130 * Returns: a newly-allocated copy of @source with certain
2131 * characters escaped. See above.
2134 g_strescape (const gchar
*source
,
2135 const gchar
*exceptions
)
2142 g_return_val_if_fail (source
!= NULL
, NULL
);
2144 p
= (guchar
*) source
;
2145 /* Each source byte needs maximally four destination chars (\777) */
2146 q
= dest
= g_malloc (strlen (source
) * 4 + 1);
2148 memset (excmap
, 0, 256);
2151 guchar
*e
= (guchar
*) exceptions
;
2201 if ((*p
< ' ') || (*p
>= 0177))
2204 *q
++ = '0' + (((*p
) >> 6) & 07);
2205 *q
++ = '0' + (((*p
) >> 3) & 07);
2206 *q
++ = '0' + ((*p
) & 07);
2221 * @string: a string to remove the leading whitespace from
2223 * Removes leading whitespace from a string, by moving the rest
2224 * of the characters forward.
2226 * This function doesn't allocate or reallocate any memory;
2227 * it modifies @string in place. Therefore, it cannot be used on
2228 * statically allocated strings.
2230 * The pointer to @string is returned to allow the nesting of functions.
2232 * Also see g_strchomp() and g_strstrip().
2237 g_strchug (gchar
*string
)
2241 g_return_val_if_fail (string
!= NULL
, NULL
);
2243 for (start
= (guchar
*) string
; *start
&& g_ascii_isspace (*start
); start
++)
2246 memmove (string
, start
, strlen ((gchar
*) start
) + 1);
2253 * @string: a string to remove the trailing whitespace from
2255 * Removes trailing whitespace from a string.
2257 * This function doesn't allocate or reallocate any memory;
2258 * it modifies @string in place. Therefore, it cannot be used
2259 * on statically allocated strings.
2261 * The pointer to @string is returned to allow the nesting of functions.
2263 * Also see g_strchug() and g_strstrip().
2268 g_strchomp (gchar
*string
)
2272 g_return_val_if_fail (string
!= NULL
, NULL
);
2274 len
= strlen (string
);
2277 if (g_ascii_isspace ((guchar
) string
[len
]))
2288 * @string: a string to split
2289 * @delimiter: a string which specifies the places at which to split
2290 * the string. The delimiter is not included in any of the resulting
2291 * strings, unless @max_tokens is reached.
2292 * @max_tokens: the maximum number of pieces to split @string into.
2293 * If this is less than 1, the string is split completely.
2295 * Splits a string into a maximum of @max_tokens pieces, using the given
2296 * @delimiter. If @max_tokens is reached, the remainder of @string is
2297 * appended to the last token.
2299 * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2300 * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2303 * As a special case, the result of splitting the empty string "" is an empty
2304 * vector, not a vector containing a single string. The reason for this
2305 * special case is that being able to represent a empty vector is typically
2306 * more useful than consistent handling of empty elements. If you do need
2307 * to represent empty elements, you'll need to check for the empty string
2308 * before calling g_strsplit().
2310 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2311 * g_strfreev() to free it.
2314 g_strsplit (const gchar
*string
,
2315 const gchar
*delimiter
,
2318 GSList
*string_list
= NULL
, *slist
;
2319 gchar
**str_array
, *s
;
2321 const gchar
*remainder
;
2323 g_return_val_if_fail (string
!= NULL
, NULL
);
2324 g_return_val_if_fail (delimiter
!= NULL
, NULL
);
2325 g_return_val_if_fail (delimiter
[0] != '\0', NULL
);
2328 max_tokens
= G_MAXINT
;
2331 s
= strstr (remainder
, delimiter
);
2334 gsize delimiter_len
= strlen (delimiter
);
2336 while (--max_tokens
&& s
)
2340 len
= s
- remainder
;
2341 string_list
= g_slist_prepend (string_list
,
2342 g_strndup (remainder
, len
));
2344 remainder
= s
+ delimiter_len
;
2345 s
= strstr (remainder
, delimiter
);
2351 string_list
= g_slist_prepend (string_list
, g_strdup (remainder
));
2354 str_array
= g_new (gchar
*, n
+ 1);
2356 str_array
[n
--] = NULL
;
2357 for (slist
= string_list
; slist
; slist
= slist
->next
)
2358 str_array
[n
--] = slist
->data
;
2360 g_slist_free (string_list
);
2367 * @string: The string to be tokenized
2368 * @delimiters: A nul-terminated string containing bytes that are used
2369 * to split the string.
2370 * @max_tokens: The maximum number of tokens to split @string into.
2371 * If this is less than 1, the string is split completely
2373 * Splits @string into a number of tokens not containing any of the characters
2374 * in @delimiter. A token is the (possibly empty) longest string that does not
2375 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2376 * remainder is appended to the last token.
2378 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2379 * %NULL-terminated vector containing the three strings "abc", "def",
2382 * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2383 * vector containing the four strings "", "def", "ghi", and "".
2385 * As a special case, the result of splitting the empty string "" is an empty
2386 * vector, not a vector containing a single string. The reason for this
2387 * special case is that being able to represent a empty vector is typically
2388 * more useful than consistent handling of empty elements. If you do need
2389 * to represent empty elements, you'll need to check for the empty string
2390 * before calling g_strsplit_set().
2392 * Note that this function works on bytes not characters, so it can't be used
2393 * to delimit UTF-8 strings for anything but ASCII characters.
2395 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2396 * g_strfreev() to free it.
2401 g_strsplit_set (const gchar
*string
,
2402 const gchar
*delimiters
,
2405 gboolean delim_table
[256];
2406 GSList
*tokens
, *list
;
2409 const gchar
*current
;
2413 g_return_val_if_fail (string
!= NULL
, NULL
);
2414 g_return_val_if_fail (delimiters
!= NULL
, NULL
);
2417 max_tokens
= G_MAXINT
;
2419 if (*string
== '\0')
2421 result
= g_new (char *, 1);
2426 memset (delim_table
, FALSE
, sizeof (delim_table
));
2427 for (s
= delimiters
; *s
!= '\0'; ++s
)
2428 delim_table
[*(guchar
*)s
] = TRUE
;
2433 s
= current
= string
;
2436 if (delim_table
[*(guchar
*)s
] && n_tokens
+ 1 < max_tokens
)
2438 token
= g_strndup (current
, s
- current
);
2439 tokens
= g_slist_prepend (tokens
, token
);
2448 token
= g_strndup (current
, s
- current
);
2449 tokens
= g_slist_prepend (tokens
, token
);
2452 result
= g_new (gchar
*, n_tokens
+ 1);
2454 result
[n_tokens
] = NULL
;
2455 for (list
= tokens
; list
!= NULL
; list
= list
->next
)
2456 result
[--n_tokens
] = list
->data
;
2458 g_slist_free (tokens
);
2466 * A typedef alias for gchar**. This is mostly useful when used together with
2472 * @str_array: (nullable): a %NULL-terminated array of strings to free
2474 * Frees a %NULL-terminated array of strings, as well as each
2475 * string it contains.
2477 * If @str_array is %NULL, this function simply returns.
2480 g_strfreev (gchar
**str_array
)
2486 for (i
= 0; str_array
[i
] != NULL
; i
++)
2487 g_free (str_array
[i
]);
2495 * @str_array: (nullable): a %NULL-terminated array of strings
2497 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2498 * the new array should be freed by first freeing each string, then
2499 * the array itself. g_strfreev() does this for you. If called
2500 * on a %NULL value, g_strdupv() simply returns %NULL.
2502 * Returns: (nullable): a new %NULL-terminated array of strings.
2505 g_strdupv (gchar
**str_array
)
2513 while (str_array
[i
])
2516 retval
= g_new (gchar
*, i
+ 1);
2519 while (str_array
[i
])
2521 retval
[i
] = g_strdup (str_array
[i
]);
2534 * @separator: (nullable): a string to insert between each of the
2536 * @str_array: a %NULL-terminated array of strings to join
2538 * Joins a number of strings together to form one long string, with the
2539 * optional @separator inserted between each of them. The returned string
2540 * should be freed with g_free().
2542 * If @str_array has no items, the return value will be an
2543 * empty string. If @str_array contains a single item, @separator will not
2544 * appear in the resulting string.
2546 * Returns: a newly-allocated string containing all of the strings joined
2547 * together, with @separator between them
2550 g_strjoinv (const gchar
*separator
,
2556 g_return_val_if_fail (str_array
!= NULL
, NULL
);
2558 if (separator
== NULL
)
2565 gsize separator_len
;
2567 separator_len
= strlen (separator
);
2568 /* First part, getting length */
2569 len
= 1 + strlen (str_array
[0]);
2570 for (i
= 1; str_array
[i
] != NULL
; i
++)
2571 len
+= strlen (str_array
[i
]);
2572 len
+= separator_len
* (i
- 1);
2574 /* Second part, building string */
2575 string
= g_new (gchar
, len
);
2576 ptr
= g_stpcpy (string
, *str_array
);
2577 for (i
= 1; str_array
[i
] != NULL
; i
++)
2579 ptr
= g_stpcpy (ptr
, separator
);
2580 ptr
= g_stpcpy (ptr
, str_array
[i
]);
2584 string
= g_strdup ("");
2591 * @separator: (nullable): a string to insert between each of the
2593 * @...: a %NULL-terminated list of strings to join
2595 * Joins a number of strings together to form one long string, with the
2596 * optional @separator inserted between each of them. The returned string
2597 * should be freed with g_free().
2599 * Returns: a newly-allocated string containing all of the strings joined
2600 * together, with @separator between them
2603 g_strjoin (const gchar
*separator
,
2609 gsize separator_len
;
2612 if (separator
== NULL
)
2615 separator_len
= strlen (separator
);
2617 va_start (args
, separator
);
2619 s
= va_arg (args
, gchar
*);
2623 /* First part, getting length */
2624 len
= 1 + strlen (s
);
2626 s
= va_arg (args
, gchar
*);
2629 len
+= separator_len
+ strlen (s
);
2630 s
= va_arg (args
, gchar
*);
2634 /* Second part, building string */
2635 string
= g_new (gchar
, len
);
2637 va_start (args
, separator
);
2639 s
= va_arg (args
, gchar
*);
2640 ptr
= g_stpcpy (string
, s
);
2642 s
= va_arg (args
, gchar
*);
2645 ptr
= g_stpcpy (ptr
, separator
);
2646 ptr
= g_stpcpy (ptr
, s
);
2647 s
= va_arg (args
, gchar
*);
2651 string
= g_strdup ("");
2661 * @haystack: a string
2662 * @haystack_len: the maximum length of @haystack. Note that -1 is
2663 * a valid length, if @haystack is nul-terminated, meaning it will
2664 * search through the whole string.
2665 * @needle: the string to search for
2667 * Searches the string @haystack for the first occurrence
2668 * of the string @needle, limiting the length of the search
2671 * Returns: a pointer to the found occurrence, or
2672 * %NULL if not found.
2675 g_strstr_len (const gchar
*haystack
,
2676 gssize haystack_len
,
2677 const gchar
*needle
)
2679 g_return_val_if_fail (haystack
!= NULL
, NULL
);
2680 g_return_val_if_fail (needle
!= NULL
, NULL
);
2682 if (haystack_len
< 0)
2683 return strstr (haystack
, needle
);
2686 const gchar
*p
= haystack
;
2687 gsize needle_len
= strlen (needle
);
2691 if (needle_len
== 0)
2692 return (gchar
*)haystack
;
2694 if (haystack_len
< needle_len
)
2697 end
= haystack
+ haystack_len
- needle_len
;
2699 while (p
<= end
&& *p
)
2701 for (i
= 0; i
< needle_len
; i
++)
2702 if (p
[i
] != needle
[i
])
2717 * @haystack: a nul-terminated string
2718 * @needle: the nul-terminated string to search for
2720 * Searches the string @haystack for the last occurrence
2721 * of the string @needle.
2723 * Returns: a pointer to the found occurrence, or
2724 * %NULL if not found.
2727 g_strrstr (const gchar
*haystack
,
2728 const gchar
*needle
)
2735 g_return_val_if_fail (haystack
!= NULL
, NULL
);
2736 g_return_val_if_fail (needle
!= NULL
, NULL
);
2738 needle_len
= strlen (needle
);
2739 haystack_len
= strlen (haystack
);
2741 if (needle_len
== 0)
2742 return (gchar
*)haystack
;
2744 if (haystack_len
< needle_len
)
2747 p
= haystack
+ haystack_len
- needle_len
;
2749 while (p
>= haystack
)
2751 for (i
= 0; i
< needle_len
; i
++)
2752 if (p
[i
] != needle
[i
])
2766 * @haystack: a nul-terminated string
2767 * @haystack_len: the maximum length of @haystack
2768 * @needle: the nul-terminated string to search for
2770 * Searches the string @haystack for the last occurrence
2771 * of the string @needle, limiting the length of the search
2774 * Returns: a pointer to the found occurrence, or
2775 * %NULL if not found.
2778 g_strrstr_len (const gchar
*haystack
,
2779 gssize haystack_len
,
2780 const gchar
*needle
)
2782 g_return_val_if_fail (haystack
!= NULL
, NULL
);
2783 g_return_val_if_fail (needle
!= NULL
, NULL
);
2785 if (haystack_len
< 0)
2786 return g_strrstr (haystack
, needle
);
2789 gsize needle_len
= strlen (needle
);
2790 const gchar
*haystack_max
= haystack
+ haystack_len
;
2791 const gchar
*p
= haystack
;
2794 while (p
< haystack_max
&& *p
)
2797 if (p
< haystack
+ needle_len
)
2802 while (p
>= haystack
)
2804 for (i
= 0; i
< needle_len
; i
++)
2805 if (p
[i
] != needle
[i
])
2821 * @str: a nul-terminated string
2822 * @suffix: the nul-terminated suffix to look for
2824 * Looks whether the string @str ends with @suffix.
2826 * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2831 g_str_has_suffix (const gchar
*str
,
2832 const gchar
*suffix
)
2837 g_return_val_if_fail (str
!= NULL
, FALSE
);
2838 g_return_val_if_fail (suffix
!= NULL
, FALSE
);
2840 str_len
= strlen (str
);
2841 suffix_len
= strlen (suffix
);
2843 if (str_len
< suffix_len
)
2846 return strcmp (str
+ str_len
- suffix_len
, suffix
) == 0;
2851 * @str: a nul-terminated string
2852 * @prefix: the nul-terminated prefix to look for
2854 * Looks whether the string @str begins with @prefix.
2856 * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2861 g_str_has_prefix (const gchar
*str
,
2862 const gchar
*prefix
)
2864 g_return_val_if_fail (str
!= NULL
, FALSE
);
2865 g_return_val_if_fail (prefix
!= NULL
, FALSE
);
2867 return strncmp (str
, prefix
, strlen (prefix
)) == 0;
2872 * @str_array: a %NULL-terminated array of strings
2874 * Returns the length of the given %NULL-terminated
2875 * string array @str_array.
2877 * Returns: length of @str_array.
2882 g_strv_length (gchar
**str_array
)
2886 g_return_val_if_fail (str_array
!= NULL
, 0);
2888 while (str_array
[i
])
2895 index_add_folded (GPtrArray
*array
,
2901 normal
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL_COMPOSE
);
2903 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2904 if (strstr (normal
, "ı") || strstr (normal
, "İ"))
2909 tmp
= g_string_new (NULL
);
2915 i
= strstr (s
, "ı");
2916 I
= strstr (s
, "İ");
2929 g_string_append_len (tmp
, s
, e
- s
);
2930 g_string_append_c (tmp
, 'i');
2931 s
= g_utf8_next_char (e
);
2934 g_string_append (tmp
, s
);
2936 normal
= g_string_free (tmp
, FALSE
);
2939 g_ptr_array_add (array
, g_utf8_casefold (normal
, -1));
2944 split_words (const gchar
*value
)
2946 const gchar
*start
= NULL
;
2950 result
= g_ptr_array_new ();
2952 for (s
= value
; *s
; s
= g_utf8_next_char (s
))
2954 gunichar c
= g_utf8_get_char (s
);
2958 if (g_unichar_isalnum (c
) || g_unichar_ismark (c
))
2963 if (!g_unichar_isalnum (c
) && !g_unichar_ismark (c
))
2965 index_add_folded (result
, start
, s
);
2972 index_add_folded (result
, start
, s
);
2974 g_ptr_array_add (result
, NULL
);
2976 return (gchar
**) g_ptr_array_free (result
, FALSE
);
2980 * g_str_tokenize_and_fold:
2982 * @translit_locale: (nullable): the language code (like 'de' or
2983 * 'en_GB') from which @string originates
2984 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2985 * return location for ASCII alternates
2987 * Tokenises @string and performs folding on each token.
2989 * A token is a non-empty sequence of alphanumeric characters in the
2990 * source string, separated by non-alphanumeric characters. An
2991 * "alphanumeric" character for this purpose is one that matches
2992 * g_unichar_isalnum() or g_unichar_ismark().
2994 * Each token is then (Unicode) normalised and case-folded. If
2995 * @ascii_alternates is non-%NULL and some of the returned tokens
2996 * contain non-ASCII characters, ASCII alternatives will be generated.
2998 * The number of ASCII alternatives that are generated and the method
2999 * for doing so is unspecified, but @translit_locale (if specified) may
3000 * improve the transliteration if the language of the source string is
3003 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3008 g_str_tokenize_and_fold (const gchar
*string
,
3009 const gchar
*translit_locale
,
3010 gchar
***ascii_alternates
)
3014 g_return_val_if_fail (string
!= NULL
, NULL
);
3016 if (ascii_alternates
&& g_str_is_ascii (string
))
3018 *ascii_alternates
= g_new0 (gchar
*, 0 + 1);
3019 ascii_alternates
= NULL
;
3022 result
= split_words (string
);
3024 if (ascii_alternates
)
3028 n
= g_strv_length (result
);
3029 *ascii_alternates
= g_new (gchar
*, n
+ 1);
3032 for (i
= 0; i
< n
; i
++)
3034 if (!g_str_is_ascii (result
[i
]))
3040 composed
= g_utf8_normalize (result
[i
], -1, G_NORMALIZE_ALL_COMPOSE
);
3042 ascii
= g_str_to_ascii (composed
, translit_locale
);
3044 /* Only accept strings that are now entirely alnums */
3045 for (k
= 0; ascii
[k
]; k
++)
3046 if (!g_ascii_isalnum (ascii
[k
]))
3049 if (ascii
[k
] == '\0')
3050 /* Made it to the end... */
3051 (*ascii_alternates
)[j
++] = ascii
;
3059 (*ascii_alternates
)[j
] = NULL
;
3066 * g_str_match_string:
3067 * @search_term: the search term from the user
3068 * @potential_hit: the text that may be a hit
3069 * @accept_alternates: %TRUE to accept ASCII alternates
3071 * Checks if a search conducted for @search_term should match
3074 * This function calls g_str_tokenize_and_fold() on both
3075 * @search_term and @potential_hit. ASCII alternates are never taken
3076 * for @search_term but will be taken for @potential_hit according to
3077 * the value of @accept_alternates.
3079 * A hit occurs when each folded token in @search_term is a prefix of a
3080 * folded token from @potential_hit.
3082 * Depending on how you're performing the search, it will typically be
3083 * faster to call g_str_tokenize_and_fold() on each string in
3084 * your corpus and build an index on the returned folded tokens, then
3085 * call g_str_tokenize_and_fold() on the search term and
3086 * perform lookups into that index.
3088 * As some examples, searching for "fred" would match the potential hit
3089 * "Smith, Fred" and also "Frédéric". Searching for "Fréd" would match
3090 * "Frédéric" but not "Frederic" (due to the one-directional nature of
3091 * accent matching). Searching "fo" would match "Foo" and "Bar Foo
3092 * Baz", but not "SFO" (because no word as "fo" as a prefix).
3094 * Returns: %TRUE if @potential_hit is a hit
3099 g_str_match_string (const gchar
*search_term
,
3100 const gchar
*potential_hit
,
3101 gboolean accept_alternates
)
3103 gchar
**alternates
= NULL
;
3104 gchar
**term_tokens
;
3109 g_return_val_if_fail (search_term
!= NULL
, FALSE
);
3110 g_return_val_if_fail (potential_hit
!= NULL
, FALSE
);
3112 term_tokens
= g_str_tokenize_and_fold (search_term
, NULL
, NULL
);
3113 hit_tokens
= g_str_tokenize_and_fold (potential_hit
, NULL
, accept_alternates
? &alternates
: NULL
);
3117 for (i
= 0; term_tokens
[i
]; i
++)
3119 for (j
= 0; hit_tokens
[j
]; j
++)
3120 if (g_str_has_prefix (hit_tokens
[j
], term_tokens
[i
]))
3123 if (accept_alternates
)
3124 for (j
= 0; alternates
[j
]; j
++)
3125 if (g_str_has_prefix (alternates
[j
], term_tokens
[i
]))
3135 g_strfreev (term_tokens
);
3136 g_strfreev (hit_tokens
);
3137 g_strfreev (alternates
);
3144 * @strv: a %NULL-terminated array of strings
3147 * Checks if @strv contains @str. @strv must not be %NULL.
3149 * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3154 g_strv_contains (const gchar
* const *strv
,
3157 g_return_val_if_fail (strv
!= NULL
, FALSE
);
3158 g_return_val_if_fail (str
!= NULL
, FALSE
);
3160 for (; *strv
!= NULL
; strv
++)
3162 if (g_str_equal (str
, *strv
))