glib/gstrfuncs.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 /*
  19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
  20  * file for a list of people on the GLib Team.  See the ChangeLog
  21  * files for a list of changes.  These files are distributed with
  22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
  23  */
  24
  25 /*
  26  * MT safe
  27  */
  28
  29 #include "config.h"
  30
  31 #include <stdarg.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <locale.h>
  35 #include <string.h>
  36 #include <locale.h>
  37 #include <errno.h>
  38 #include <ctype.h>              /* For tolower() */
  39
  40 #ifdef HAVE_XLOCALE_H
  41 /* Needed on BSD/OS X for e.g. strtod_l */
  42 #include <xlocale.h>
  43 #endif
  44
  45 #ifdef G_OS_WIN32
  46 #include <windows.h>
  47 #endif
  48
  49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
  50
  51 #include "gstrfuncs.h"
  52
  53 #include "gprintf.h"
  54 #include "gprintfint.h"
  55 #include "glibintl.h"
  56
  57
  58 /**
  59  * SECTION:string_utils
  60  * @title: String Utility Functions
  61  * @short_description: various string-related functions
  62  *
  63  * This section describes a number of utility functions for creating,
  64  * duplicating, and manipulating strings.
  65  *
  66  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
  67  * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
  68  * are declared in the header `gprintf.h` which is not included in `glib.h`
  69  * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
  70  * explicitly include `<glib/gprintf.h>` in order to use the GLib
  71  * printf() functions.
  72  *
  73  * ## String precision pitfalls # {#string-precision}
  74  *
  75  * While you may use the printf() functions to format UTF-8 strings,
  76  * notice that the precision of a \%Ns parameter is interpreted
  77  * as the number of bytes, not characters to print. On top of that,
  78  * the GNU libc implementation of the printf() functions has the
  79  * "feature" that it checks that the string given for the \%Ns
  80  * parameter consists of a whole number of characters in the current
  81  * encoding. So, unless you are sure you are always going to be in an
  82  * UTF-8 locale or your know your text is restricted to ASCII, avoid
  83  * using \%Ns. If your intention is to format strings for a
  84  * certain number of columns, then \%Ns is not a correct solution
  85  * anyway, since it fails to take wide characters (see g_unichar_iswide())
  86  * into account.
  87  *
  88  * Note also that there are various printf() parameters which are platform
  89  * dependent. GLib provides platform independent macros for these parameters
  90  * which should be used instead. A common example is %G_GUINT64_FORMAT, which
  91  * should be used instead of `%llu` or similar parameters for formatting
  92  * 64-bit integers. These macros are all named `G_*_FORMAT`; see
  93  * [Basic Types][glib-Basic-Types].
  94  */
  95
  96 /**
  97  * g_ascii_isalnum:
  98  * @c: any character
  99  *
 100  * Determines whether a character is alphanumeric.
 101  *
 102  * Unlike the standard C library isalnum() function, this only
 103  * recognizes standard ASCII letters and ignores the locale,
 104  * returning %FALSE for all non-ASCII characters. Also, unlike
 105  * the standard library function, this takes a char, not an int,
 106  * so don't call it on %EOF, but no need to cast to #guchar before
 107  * passing a possibly non-ASCII character in.
 108  *
 109  * Returns: %TRUE if @c is an ASCII alphanumeric character
 110  */
 111
 112 /**
 113  * g_ascii_isalpha:
 114  * @c: any character
 115  *
 116  * Determines whether a character is alphabetic (i.e. a letter).
 117  *
 118  * Unlike the standard C library isalpha() function, this only
 119  * recognizes standard ASCII letters and ignores the locale,
 120  * returning %FALSE for all non-ASCII characters. Also, unlike
 121  * the standard library function, this takes a char, not an int,
 122  * so don't call it on %EOF, but no need to cast to #guchar before
 123  * passing a possibly non-ASCII character in.
 124  *
 125  * Returns: %TRUE if @c is an ASCII alphabetic character
 126  */
 127
 128 /**
 129  * g_ascii_iscntrl:
 130  * @c: any character
 131  *
 132  * Determines whether a character is a control character.
 133  *
 134  * Unlike the standard C library iscntrl() function, this only
 135  * recognizes standard ASCII control characters and ignores the
 136  * locale, returning %FALSE for all non-ASCII characters. Also,
 137  * unlike the standard library function, this takes a char, not
 138  * an int, so don't call it on %EOF, but no need to cast to #guchar
 139  * before passing a possibly non-ASCII character in.
 140  *
 141  * Returns: %TRUE if @c is an ASCII control character.
 142  */
 143
 144 /**
 145  * g_ascii_isdigit:
 146  * @c: any character
 147  *
 148  * Determines whether a character is digit (0-9).
 149  *
 150  * Unlike the standard C library isdigit() function, this takes
 151  * a char, not an int, so don't call it  on %EOF, but no need to
 152  * cast to #guchar before passing a possibly non-ASCII character in.
 153  *
 154  * Returns: %TRUE if @c is an ASCII digit.
 155  */
 156
 157 /**
 158  * g_ascii_isgraph:
 159  * @c: any character
 160  *
 161  * Determines whether a character is a printing character and not a space.
 162  *
 163  * Unlike the standard C library isgraph() function, this only
 164  * recognizes standard ASCII characters and ignores the locale,
 165  * returning %FALSE for all non-ASCII characters. Also, unlike
 166  * the standard library function, this takes a char, not an int,
 167  * so don't call it on %EOF, but no need to cast to #guchar before
 168  * passing a possibly non-ASCII character in.
 169  *
 170  * Returns: %TRUE if @c is an ASCII printing character other than space.
 171  */
 172
 173 /**
 174  * g_ascii_islower:
 175  * @c: any character
 176  *
 177  * Determines whether a character is an ASCII lower case letter.
 178  *
 179  * Unlike the standard C library islower() function, this only
 180  * recognizes standard ASCII letters and ignores the locale,
 181  * returning %FALSE for all non-ASCII characters. Also, unlike
 182  * the standard library function, this takes a char, not an int,
 183  * so don't call it on %EOF, but no need to worry about casting
 184  * to #guchar before passing a possibly non-ASCII character in.
 185  *
 186  * Returns: %TRUE if @c is an ASCII lower case letter
 187  */
 188
 189 /**
 190  * g_ascii_isprint:
 191  * @c: any character
 192  *
 193  * Determines whether a character is a printing character.
 194  *
 195  * Unlike the standard C library isprint() function, this only
 196  * recognizes standard ASCII characters and ignores the locale,
 197  * returning %FALSE for all non-ASCII characters. Also, unlike
 198  * the standard library function, this takes a char, not an int,
 199  * so don't call it on %EOF, but no need to cast to #guchar before
 200  * passing a possibly non-ASCII character in.
 201  *
 202  * Returns: %TRUE if @c is an ASCII printing character.
 203  */
 204
 205 /**
 206  * g_ascii_ispunct:
 207  * @c: any character
 208  *
 209  * Determines whether a character is a punctuation character.
 210  *
 211  * Unlike the standard C library ispunct() function, this only
 212  * recognizes standard ASCII letters and ignores the locale,
 213  * returning %FALSE for all non-ASCII characters. Also, unlike
 214  * the standard library function, this takes a char, not an int,
 215  * so don't call it on %EOF, but no need to cast to #guchar before
 216  * passing a possibly non-ASCII character in.
 217  *
 218  * Returns: %TRUE if @c is an ASCII punctuation character.
 219  */
 220
 221 /**
 222  * g_ascii_isspace:
 223  * @c: any character
 224  *
 225  * Determines whether a character is a white-space character.
 226  *
 227  * Unlike the standard C library isspace() function, this only
 228  * recognizes standard ASCII white-space and ignores the locale,
 229  * returning %FALSE for all non-ASCII characters. Also, unlike
 230  * the standard library function, this takes a char, not an int,
 231  * so don't call it on %EOF, but no need to cast to #guchar before
 232  * passing a possibly non-ASCII character in.
 233  *
 234  * Returns: %TRUE if @c is an ASCII white-space character
 235  */
 236
 237 /**
 238  * g_ascii_isupper:
 239  * @c: any character
 240  *
 241  * Determines whether a character is an ASCII upper case letter.
 242  *
 243  * Unlike the standard C library isupper() function, this only
 244  * recognizes standard ASCII letters and ignores the locale,
 245  * returning %FALSE for all non-ASCII characters. Also, unlike
 246  * the standard library function, this takes a char, not an int,
 247  * so don't call it on %EOF, but no need to worry about casting
 248  * to #guchar before passing a possibly non-ASCII character in.
 249  *
 250  * Returns: %TRUE if @c is an ASCII upper case letter
 251  */
 252
 253 /**
 254  * g_ascii_isxdigit:
 255  * @c: any character
 256  *
 257  * Determines whether a character is a hexadecimal-digit character.
 258  *
 259  * Unlike the standard C library isxdigit() function, this takes
 260  * a char, not an int, so don't call it on %EOF, but no need to
 261  * cast to #guchar before passing a possibly non-ASCII character in.
 262  *
 263  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
 264  */
 265
 266 /**
 267  * G_ASCII_DTOSTR_BUF_SIZE:
 268  *
 269  * A good size for a buffer to be passed into g_ascii_dtostr().
 270  * It is guaranteed to be enough for all output of that function
 271  * on systems with 64bit IEEE-compatible doubles.
 272  *
 273  * The typical usage would be something like:
 274  * |[<!-- language="C" -->
 275  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
 276  *
 277  *   fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
 278  * ]|
 279  */
 280
 281 /**
 282  * g_strstrip:
 283  * @string: a string to remove the leading and trailing whitespace from
 284  *
 285  * Removes leading and trailing whitespace from a string.
 286  * See g_strchomp() and g_strchug().
 287  *
 288  * Returns: @string
 289  */
 290
 291 /**
 292  * G_STR_DELIMITERS:
 293  *
 294  * The standard delimiters, used in g_strdelimit().
 295  */
 296
 297 static const guint16 ascii_table_data[256] = {
 298   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 299   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
 300   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 302   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 303   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 304   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
 305   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 306   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
 307   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 309   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 310   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
 311   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 313   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
 314   /* the upper 128 are all zeroes */
 315 };
 316
 317 const guint16 * const g_ascii_table = ascii_table_data;
 318
 319 #if defined (HAVE_NEWLOCALE) && \
 320     defined (HAVE_USELOCALE) && \
 321     defined (HAVE_STRTOD_L) && \
 322     defined (HAVE_STRTOULL_L) && \
 323     defined (HAVE_STRTOLL_L)
 324 #define USE_XLOCALE 1
 325 #endif
 326
 327 #ifdef USE_XLOCALE
 328 static locale_t
 329 get_C_locale (void)
 330 {
 331   static gsize initialized = FALSE;
 332   static locale_t C_locale = NULL;
 333
 334   if (g_once_init_enter (&initialized))
 335     {
 336       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
 337       g_once_init_leave (&initialized, TRUE);
 338     }
 339
 340   return C_locale;
 341 }
 342 #endif
 343
 344 /**
 345  * g_strdup:
 346  * @str: (nullable): the string to duplicate
 347  *
 348  * Duplicates a string. If @str is %NULL it returns %NULL.
 349  * The returned string should be freed with g_free()
 350  * when no longer needed.
 351  *
 352  * Returns: a newly-allocated copy of @str
 353  */
 354 gchar*
 355 g_strdup (const gchar *str)
 356 {
 357   gchar *new_str;
 358   gsize length;
 359
 360   if (str)
 361     {
 362       length = strlen (str) + 1;
 363       new_str = g_new (char, length);
 364       memcpy (new_str, str, length);
 365     }
 366   else
 367     new_str = NULL;
 368
 369   return new_str;
 370 }
 371
 372 /**
 373  * g_memdup:
 374  * @mem: the memory to copy.
 375  * @byte_size: the number of bytes to copy.
 376  *
 377  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
 378  * from @mem. If @mem is %NULL it returns %NULL.
 379  *
 380  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
 381  *  is %NULL.
 382  */
 383 gpointer
 384 g_memdup (gconstpointer mem,
 385           guint         byte_size)
 386 {
 387   gpointer new_mem;
 388
 389   if (mem && byte_size != 0)
 390     {
 391       new_mem = g_malloc (byte_size);
 392       memcpy (new_mem, mem, byte_size);
 393     }
 394   else
 395     new_mem = NULL;
 396
 397   return new_mem;
 398 }
 399
 400 /**
 401  * g_strndup:
 402  * @str: the string to duplicate
 403  * @n: the maximum number of bytes to copy from @str
 404  *
 405  * Duplicates the first @n bytes of a string, returning a newly-allocated
 406  * buffer @n + 1 bytes long which will always be nul-terminated. If @str
 407  * is less than @n bytes long the buffer is padded with nuls. If @str is
 408  * %NULL it returns %NULL. The returned value should be freed when no longer
 409  * needed.
 410  *
 411  * To copy a number of characters from a UTF-8 encoded string,
 412  * use g_utf8_strncpy() instead.
 413  *
 414  * Returns: a newly-allocated buffer containing the first @n bytes
 415  *     of @str, nul-terminated
 416  */
 417 gchar*
 418 g_strndup (const gchar *str,
 419            gsize        n)
 420 {
 421   gchar *new_str;
 422
 423   if (str)
 424     {
 425       new_str = g_new (gchar, n + 1);
 426       strncpy (new_str, str, n);
 427       new_str[n] = '\0';
 428     }
 429   else
 430     new_str = NULL;
 431
 432   return new_str;
 433 }
 434
 435 /**
 436  * g_strnfill:
 437  * @length: the length of the new string
 438  * @fill_char: the byte to fill the string with
 439  *
 440  * Creates a new string @length bytes long filled with @fill_char.
 441  * The returned string should be freed when no longer needed.
 442  *
 443  * Returns: a newly-allocated string filled the @fill_char
 444  */
 445 gchar*
 446 g_strnfill (gsize length,
 447             gchar fill_char)
 448 {
 449   gchar *str;
 450
 451   str = g_new (gchar, length + 1);
 452   memset (str, (guchar)fill_char, length);
 453   str[length] = '\0';
 454
 455   return str;
 456 }
 457
 458 /**
 459  * g_stpcpy:
 460  * @dest: destination buffer.
 461  * @src: source string.
 462  *
 463  * Copies a nul-terminated string into the dest buffer, include the
 464  * trailing nul, and return a pointer to the trailing nul byte.
 465  * This is useful for concatenating multiple strings together
 466  * without having to repeatedly scan for the end.
 467  *
 468  * Returns: a pointer to trailing nul byte.
 469  **/
 470 gchar *
 471 g_stpcpy (gchar       *dest,
 472           const gchar *src)
 473 {
 474 #ifdef HAVE_STPCPY
 475   g_return_val_if_fail (dest != NULL, NULL);
 476   g_return_val_if_fail (src != NULL, NULL);
 477   return stpcpy (dest, src);
 478 #else
 479   gchar *d = dest;
 480   const gchar *s = src;
 481
 482   g_return_val_if_fail (dest != NULL, NULL);
 483   g_return_val_if_fail (src != NULL, NULL);
 484   do
 485     *d++ = *s;
 486   while (*s++ != '\0');
 487
 488   return d - 1;
 489 #endif
 490 }
 491
 492 /**
 493  * g_strdup_vprintf:
 494  * @format: a standard printf() format string, but notice
 495  *     [string precision pitfalls][string-precision]
 496  * @args: the list of parameters to insert into the format string
 497  *
 498  * Similar to the standard C vsprintf() function but safer, since it
 499  * calculates the maximum space required and allocates memory to hold
 500  * the result. The returned string should be freed with g_free() when
 501  * no longer needed.
 502  *
 503  * See also g_vasprintf(), which offers the same functionality, but
 504  * additionally returns the length of the allocated string.
 505  *
 506  * Returns: a newly-allocated string holding the result
 507  */
 508 gchar*
 509 g_strdup_vprintf (const gchar *format,
 510                   va_list      args)
 511 {
 512   gchar *string = NULL;
 513
 514   g_vasprintf (&string, format, args);
 515
 516   return string;
 517 }
 518
 519 /**
 520  * g_strdup_printf:
 521  * @format: a standard printf() format string, but notice
 522  *     [string precision pitfalls][string-precision]
 523  * @...: the parameters to insert into the format string
 524  *
 525  * Similar to the standard C sprintf() function but safer, since it
 526  * calculates the maximum space required and allocates memory to hold
 527  * the result. The returned string should be freed with g_free() when no
 528  * longer needed.
 529  *
 530  * Returns: a newly-allocated string holding the result
 531  */
 532 gchar*
 533 g_strdup_printf (const gchar *format,
 534                  ...)
 535 {
 536   gchar *buffer;
 537   va_list args;
 538
 539   va_start (args, format);
 540   buffer = g_strdup_vprintf (format, args);
 541   va_end (args);
 542
 543   return buffer;
 544 }
 545
 546 /**
 547  * g_strconcat:
 548  * @string1: the first string to add, which must not be %NULL
 549  * @...: a %NULL-terminated list of strings to append to the string
 550  *
 551  * Concatenates all of the given strings into one long string. The
 552  * returned string should be freed with g_free() when no longer needed.
 553  *
 554  * The variable argument list must end with %NULL. If you forget the %NULL,
 555  * g_strconcat() will start appending random memory junk to your string.
 556  *
 557  * Note that this function is usually not the right function to use to
 558  * assemble a translated message from pieces, since proper translation
 559  * often requires the pieces to be reordered.
 560  *
 561  * Returns: a newly-allocated string containing all the string arguments
 562  */
 563 gchar*
 564 g_strconcat (const gchar *string1, ...)
 565 {
 566   gsize   l;
 567   va_list args;
 568   gchar   *s;
 569   gchar   *concat;
 570   gchar   *ptr;
 571
 572   if (!string1)
 573     return NULL;
 574
 575   l = 1 + strlen (string1);
 576   va_start (args, string1);
 577   s = va_arg (args, gchar*);
 578   while (s)
 579     {
 580       l += strlen (s);
 581       s = va_arg (args, gchar*);
 582     }
 583   va_end (args);
 584
 585   concat = g_new (gchar, l);
 586   ptr = concat;
 587
 588   ptr = g_stpcpy (ptr, string1);
 589   va_start (args, string1);
 590   s = va_arg (args, gchar*);
 591   while (s)
 592     {
 593       ptr = g_stpcpy (ptr, s);
 594       s = va_arg (args, gchar*);
 595     }
 596   va_end (args);
 597
 598   return concat;
 599 }
 600
 601 /**
 602  * g_strtod:
 603  * @nptr:    the string to convert to a numeric value.
 604  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 605  *           character after the last character used in the conversion.
 606  *
 607  * Converts a string to a #gdouble value.
 608  * It calls the standard strtod() function to handle the conversion, but
 609  * if the string is not completely converted it attempts the conversion
 610  * again with g_ascii_strtod(), and returns the best match.
 611  *
 612  * This function should seldom be used. The normal situation when reading
 613  * numbers not for human consumption is to use g_ascii_strtod(). Only when
 614  * you know that you must expect both locale formatted and C formatted numbers
 615  * should you use this. Make sure that you don't pass strings such as comma
 616  * separated lists of values, since the commas may be interpreted as a decimal
 617  * point in some locales, causing unexpected results.
 618  *
 619  * Returns: the #gdouble value.
 620  **/
 621 gdouble
 622 g_strtod (const gchar *nptr,
 623           gchar      **endptr)
 624 {
 625   gchar *fail_pos_1;
 626   gchar *fail_pos_2;
 627   gdouble val_1;
 628   gdouble val_2 = 0;
 629
 630   g_return_val_if_fail (nptr != NULL, 0);
 631
 632   fail_pos_1 = NULL;
 633   fail_pos_2 = NULL;
 634
 635   val_1 = strtod (nptr, &fail_pos_1);
 636
 637   if (fail_pos_1 && fail_pos_1[0] != 0)
 638     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
 639
 640   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
 641     {
 642       if (endptr)
 643         *endptr = fail_pos_1;
 644       return val_1;
 645     }
 646   else
 647     {
 648       if (endptr)
 649         *endptr = fail_pos_2;
 650       return val_2;
 651     }
 652 }
 653
 654 /**
 655  * g_ascii_strtod:
 656  * @nptr:    the string to convert to a numeric value.
 657  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 658  *           character after the last character used in the conversion.
 659  *
 660  * Converts a string to a #gdouble value.
 661  *
 662  * This function behaves like the standard strtod() function
 663  * does in the C locale. It does this without actually changing
 664  * the current locale, since that would not be thread-safe.
 665  * A limitation of the implementation is that this function
 666  * will still accept localized versions of infinities and NANs.
 667  *
 668  * This function is typically used when reading configuration
 669  * files or other non-user input that should be locale independent.
 670  * To handle input from the user you should normally use the
 671  * locale-sensitive system strtod() function.
 672  *
 673  * To convert from a #gdouble to a string in a locale-insensitive
 674  * way, use g_ascii_dtostr().
 675  *
 676  * If the correct value would cause overflow, plus or minus %HUGE_VAL
 677  * is returned (according to the sign of the value), and %ERANGE is
 678  * stored in %errno. If the correct value would cause underflow,
 679  * zero is returned and %ERANGE is stored in %errno.
 680  *
 681  * This function resets %errno before calling strtod() so that
 682  * you can reliably detect overflow and underflow.
 683  *
 684  * Returns: the #gdouble value.
 685  */
 686 gdouble
 687 g_ascii_strtod (const gchar *nptr,
 688                 gchar      **endptr)
 689 {
 690 #ifdef USE_XLOCALE
 691
 692   g_return_val_if_fail (nptr != NULL, 0);
 693
 694   errno = 0;
 695
 696   return strtod_l (nptr, endptr, get_C_locale ());
 697
 698 #else
 699
 700   gchar *fail_pos;
 701   gdouble val;
 702 #ifndef __BIONIC__
 703   struct lconv *locale_data;
 704 #endif
 705   const char *decimal_point;
 706   gsize decimal_point_len;
 707   const char *p, *decimal_point_pos;
 708   const char *end = NULL; /* Silence gcc */
 709   int strtod_errno;
 710
 711   g_return_val_if_fail (nptr != NULL, 0);
 712
 713   fail_pos = NULL;
 714
 715 #ifndef __BIONIC__
 716   locale_data = localeconv ();
 717   decimal_point = locale_data->decimal_point;
 718   decimal_point_len = strlen (decimal_point);
 719 #else
 720   decimal_point = ".";
 721   decimal_point_len = 1;
 722 #endif
 723
 724   g_assert (decimal_point_len != 0);
 725
 726   decimal_point_pos = NULL;
 727   end = NULL;
 728
 729   if (decimal_point[0] != '.' ||
 730       decimal_point[1] != 0)
 731     {
 732       p = nptr;
 733       /* Skip leading space */
 734       while (g_ascii_isspace (*p))
 735         p++;
 736
 737       /* Skip leading optional sign */
 738       if (*p == '+' || *p == '-')
 739         p++;
 740
 741       if (p[0] == '0' &&
 742           (p[1] == 'x' || p[1] == 'X'))
 743         {
 744           p += 2;
 745           /* HEX - find the (optional) decimal point */
 746
 747           while (g_ascii_isxdigit (*p))
 748             p++;
 749
 750           if (*p == '.')
 751             decimal_point_pos = p++;
 752
 753           while (g_ascii_isxdigit (*p))
 754             p++;
 755
 756           if (*p == 'p' || *p == 'P')
 757             p++;
 758           if (*p == '+' || *p == '-')
 759             p++;
 760           while (g_ascii_isdigit (*p))
 761             p++;
 762
 763           end = p;
 764         }
 765       else if (g_ascii_isdigit (*p) || *p == '.')
 766         {
 767           while (g_ascii_isdigit (*p))
 768             p++;
 769
 770           if (*p == '.')
 771             decimal_point_pos = p++;
 772
 773           while (g_ascii_isdigit (*p))
 774             p++;
 775
 776           if (*p == 'e' || *p == 'E')
 777             p++;
 778           if (*p == '+' || *p == '-')
 779             p++;
 780           while (g_ascii_isdigit (*p))
 781             p++;
 782
 783           end = p;
 784         }
 785       /* For the other cases, we need not convert the decimal point */
 786     }
 787
 788   if (decimal_point_pos)
 789     {
 790       char *copy, *c;
 791
 792       /* We need to convert the '.' to the locale specific decimal point */
 793       copy = g_malloc (end - nptr + 1 + decimal_point_len);
 794
 795       c = copy;
 796       memcpy (c, nptr, decimal_point_pos - nptr);
 797       c += decimal_point_pos - nptr;
 798       memcpy (c, decimal_point, decimal_point_len);
 799       c += decimal_point_len;
 800       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
 801       c += end - (decimal_point_pos + 1);
 802       *c = 0;
 803
 804       errno = 0;
 805       val = strtod (copy, &fail_pos);
 806       strtod_errno = errno;
 807
 808       if (fail_pos)
 809         {
 810           if (fail_pos - copy > decimal_point_pos - nptr)
 811             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
 812           else
 813             fail_pos = (char *)nptr + (fail_pos - copy);
 814         }
 815
 816       g_free (copy);
 817
 818     }
 819   else if (end)
 820     {
 821       char *copy;
 822
 823       copy = g_malloc (end - (char *)nptr + 1);
 824       memcpy (copy, nptr, end - nptr);
 825       *(copy + (end - (char *)nptr)) = 0;
 826
 827       errno = 0;
 828       val = strtod (copy, &fail_pos);
 829       strtod_errno = errno;
 830
 831       if (fail_pos)
 832         {
 833           fail_pos = (char *)nptr + (fail_pos - copy);
 834         }
 835
 836       g_free (copy);
 837     }
 838   else
 839     {
 840       errno = 0;
 841       val = strtod (nptr, &fail_pos);
 842       strtod_errno = errno;
 843     }
 844
 845   if (endptr)
 846     *endptr = fail_pos;
 847
 848   errno = strtod_errno;
 849
 850   return val;
 851 #endif
 852 }
 853
 854
 855 /**
 856  * g_ascii_dtostr:
 857  * @buffer: A buffer to place the resulting string in
 858  * @buf_len: The length of the buffer.
 859  * @d: The #gdouble to convert
 860  *
 861  * Converts a #gdouble to a string, using the '.' as
 862  * decimal point.
 863  *
 864  * This function generates enough precision that converting
 865  * the string back using g_ascii_strtod() gives the same machine-number
 866  * (on machines with IEEE compatible 64bit doubles). It is
 867  * guaranteed that the size of the resulting string will never
 868  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
 869  * nul character, which is always added.
 870  *
 871  * Returns: The pointer to the buffer with the converted string.
 872  **/
 873 gchar *
 874 g_ascii_dtostr (gchar       *buffer,
 875                 gint         buf_len,
 876                 gdouble      d)
 877 {
 878   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
 879 }
 880
 881 #pragma GCC diagnostic push
 882 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 883
 884 /**
 885  * g_ascii_formatd:
 886  * @buffer: A buffer to place the resulting string in
 887  * @buf_len: The length of the buffer.
 888  * @format: The printf()-style format to use for the
 889  *          code to use for converting.
 890  * @d: The #gdouble to convert
 891  *
 892  * Converts a #gdouble to a string, using the '.' as
 893  * decimal point. To format the number you pass in
 894  * a printf()-style format string. Allowed conversion
 895  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
 896  *
 897  * The returned buffer is guaranteed to be nul-terminated.
 898  *
 899  * If you just want to want to serialize the value into a
 900  * string, use g_ascii_dtostr().
 901  *
 902  * Returns: The pointer to the buffer with the converted string.
 903  */
 904 gchar *
 905 g_ascii_formatd (gchar       *buffer,
 906                  gint         buf_len,
 907                  const gchar *format,
 908                  gdouble      d)
 909 {
 910 #ifdef USE_XLOCALE
 911   locale_t old_locale;
 912
 913   old_locale = uselocale (get_C_locale ());
 914    _g_snprintf (buffer, buf_len, format, d);
 915   uselocale (old_locale);
 916
 917   return buffer;
 918 #else
 919 #ifndef __BIONIC__
 920   struct lconv *locale_data;
 921 #endif
 922   const char *decimal_point;
 923   gsize decimal_point_len;
 924   gchar *p;
 925   int rest_len;
 926   gchar format_char;
 927
 928   g_return_val_if_fail (buffer != NULL, NULL);
 929   g_return_val_if_fail (format[0] == '%', NULL);
 930   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
 931
 932   format_char = format[strlen (format) - 1];
 933
 934   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
 935                         format_char == 'f' || format_char == 'F' ||
 936                         format_char == 'g' || format_char == 'G',
 937                         NULL);
 938
 939   if (format[0] != '%')
 940     return NULL;
 941
 942   if (strpbrk (format + 1, "'l%"))
 943     return NULL;
 944
 945   if (!(format_char == 'e' || format_char == 'E' ||
 946         format_char == 'f' || format_char == 'F' ||
 947         format_char == 'g' || format_char == 'G'))
 948     return NULL;
 949
 950   _g_snprintf (buffer, buf_len, format, d);
 951
 952 #ifndef __BIONIC__
 953   locale_data = localeconv ();
 954   decimal_point = locale_data->decimal_point;
 955   decimal_point_len = strlen (decimal_point);
 956 #else
 957   decimal_point = ".";
 958   decimal_point_len = 1;
 959 #endif
 960
 961   g_assert (decimal_point_len != 0);
 962
 963   if (decimal_point[0] != '.' ||
 964       decimal_point[1] != 0)
 965     {
 966       p = buffer;
 967
 968       while (g_ascii_isspace (*p))
 969         p++;
 970
 971       if (*p == '+' || *p == '-')
 972         p++;
 973
 974       while (isdigit ((guchar)*p))
 975         p++;
 976
 977       if (strncmp (p, decimal_point, decimal_point_len) == 0)
 978         {
 979           *p = '.';
 980           p++;
 981           if (decimal_point_len > 1)
 982             {
 983               rest_len = strlen (p + (decimal_point_len - 1));
 984               memmove (p, p + (decimal_point_len - 1), rest_len);
 985               p[rest_len] = 0;
 986             }
 987         }
 988     }
 989
 990   return buffer;
 991 #endif
 992 }
 993 #pragma GCC diagnostic pop
 994
 995 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
 996                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
 997 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
 998 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
 999 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
1000 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1001 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1002
1003 #ifndef USE_XLOCALE
1004
1005 static guint64
1006 g_parse_long_long (const gchar  *nptr,
1007                    const gchar **endptr,
1008                    guint         base,
1009                    gboolean     *negative)
1010 {
1011   /* this code is based on on the strtol(3) code from GNU libc released under
1012    * the GNU Lesser General Public License.
1013    *
1014    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1015    *        Free Software Foundation, Inc.
1016    */
1017   gboolean overflow;
1018   guint64 cutoff;
1019   guint64 cutlim;
1020   guint64 ui64;
1021   const gchar *s, *save;
1022   guchar c;
1023
1024   g_return_val_if_fail (nptr != NULL, 0);
1025
1026   *negative = FALSE;
1027   if (base == 1 || base > 36)
1028     {
1029       errno = EINVAL;
1030       if (endptr)
1031         *endptr = nptr;
1032       return 0;
1033     }
1034
1035   save = s = nptr;
1036
1037   /* Skip white space.  */
1038   while (ISSPACE (*s))
1039     ++s;
1040
1041   if (G_UNLIKELY (!*s))
1042     goto noconv;
1043
1044   /* Check for a sign.  */
1045   if (*s == '-')
1046     {
1047       *negative = TRUE;
1048       ++s;
1049     }
1050   else if (*s == '+')
1051     ++s;
1052
1053   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1054   if (*s == '0')
1055     {
1056       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1057         {
1058           s += 2;
1059           base = 16;
1060         }
1061       else if (base == 0)
1062         base = 8;
1063     }
1064   else if (base == 0)
1065     base = 10;
1066
1067   /* Save the pointer so we can check later if anything happened.  */
1068   save = s;
1069   cutoff = G_MAXUINT64 / base;
1070   cutlim = G_MAXUINT64 % base;
1071
1072   overflow = FALSE;
1073   ui64 = 0;
1074   c = *s;
1075   for (; c; c = *++s)
1076     {
1077       if (c >= '0' && c <= '9')
1078         c -= '0';
1079       else if (ISALPHA (c))
1080         c = TOUPPER (c) - 'A' + 10;
1081       else
1082         break;
1083       if (c >= base)
1084         break;
1085       /* Check for overflow.  */
1086       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1087         overflow = TRUE;
1088       else
1089         {
1090           ui64 *= base;
1091           ui64 += c;
1092         }
1093     }
1094
1095   /* Check if anything actually happened.  */
1096   if (s == save)
1097     goto noconv;
1098
1099   /* Store in ENDPTR the address of one character
1100      past the last character we converted.  */
1101   if (endptr)
1102     *endptr = s;
1103
1104   if (G_UNLIKELY (overflow))
1105     {
1106       errno = ERANGE;
1107       return G_MAXUINT64;
1108     }
1109
1110   return ui64;
1111
1112  noconv:
1113   /* We must handle a special case here: the base is 0 or 16 and the
1114      first two characters are '0' and 'x', but the rest are no
1115      hexadecimal digits.  This is no error case.  We return 0 and
1116      ENDPTR points to the `x`.  */
1117   if (endptr)
1118     {
1119       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1120           && save[-2] == '0')
1121         *endptr = &save[-1];
1122       else
1123         /*  There was no number to convert.  */
1124         *endptr = nptr;
1125     }
1126   return 0;
1127 }
1128 #endif /* !USE_XLOCALE */
1129
1130 /**
1131  * g_ascii_strtoull:
1132  * @nptr:    the string to convert to a numeric value.
1133  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1134  *           character after the last character used in the conversion.
1135  * @base:    to be used for the conversion, 2..36 or 0
1136  *
1137  * Converts a string to a #guint64 value.
1138  * This function behaves like the standard strtoull() function
1139  * does in the C locale. It does this without actually
1140  * changing the current locale, since that would not be
1141  * thread-safe.
1142  *
1143  * This function is typically used when reading configuration
1144  * files or other non-user input that should be locale independent.
1145  * To handle input from the user you should normally use the
1146  * locale-sensitive system strtoull() function.
1147  *
1148  * If the correct value would cause overflow, %G_MAXUINT64
1149  * is returned, and `ERANGE` is stored in `errno`.
1150  * If the base is outside the valid range, zero is returned, and
1151  * `EINVAL` is stored in `errno`.
1152  * If the string conversion fails, zero is returned, and @endptr returns
1153  * @nptr (if @endptr is non-%NULL).
1154  *
1155  * Returns: the #guint64 value or zero on error.
1156  *
1157  * Since: 2.2
1158  */
1159 guint64
1160 g_ascii_strtoull (const gchar *nptr,
1161                   gchar      **endptr,
1162                   guint        base)
1163 {
1164 #ifdef USE_XLOCALE
1165   return strtoull_l (nptr, endptr, base, get_C_locale ());
1166 #else
1167   gboolean negative;
1168   guint64 result;
1169
1170   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1171
1172   /* Return the result of the appropriate sign.  */
1173   return negative ? -result : result;
1174 #endif
1175 }
1176
1177 /**
1178  * g_ascii_strtoll:
1179  * @nptr:    the string to convert to a numeric value.
1180  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1181  *           character after the last character used in the conversion.
1182  * @base:    to be used for the conversion, 2..36 or 0
1183  *
1184  * Converts a string to a #gint64 value.
1185  * This function behaves like the standard strtoll() function
1186  * does in the C locale. It does this without actually
1187  * changing the current locale, since that would not be
1188  * thread-safe.
1189  *
1190  * This function is typically used when reading configuration
1191  * files or other non-user input that should be locale independent.
1192  * To handle input from the user you should normally use the
1193  * locale-sensitive system strtoll() function.
1194  *
1195  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1196  * is returned, and `ERANGE` is stored in `errno`.
1197  * If the base is outside the valid range, zero is returned, and
1198  * `EINVAL` is stored in `errno`. If the
1199  * string conversion fails, zero is returned, and @endptr returns @nptr
1200  * (if @endptr is non-%NULL).
1201  *
1202  * Returns: the #gint64 value or zero on error.
1203  *
1204  * Since: 2.12
1205  */
1206 gint64
1207 g_ascii_strtoll (const gchar *nptr,
1208                  gchar      **endptr,
1209                  guint        base)
1210 {
1211 #ifdef USE_XLOCALE
1212   return strtoll_l (nptr, endptr, base, get_C_locale ());
1213 #else
1214   gboolean negative;
1215   guint64 result;
1216
1217   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1218
1219   if (negative && result > (guint64) G_MININT64)
1220     {
1221       errno = ERANGE;
1222       return G_MININT64;
1223     }
1224   else if (!negative && result > (guint64) G_MAXINT64)
1225     {
1226       errno = ERANGE;
1227       return G_MAXINT64;
1228     }
1229   else if (negative)
1230     return - (gint64) result;
1231   else
1232     return (gint64) result;
1233 #endif
1234 }
1235
1236 /**
1237  * g_strerror:
1238  * @errnum: the system error number. See the standard C %errno
1239  *     documentation
1240  *
1241  * Returns a string corresponding to the given error code, e.g. "no
1242  * such process". Unlike strerror(), this always returns a string in
1243  * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1244  * the lifetime of the process.
1245  *
1246  * Note that the string may be translated according to the current locale.
1247  *
1248  * The value of %errno will not be changed by this function. However, it may
1249  * be changed by intermediate function calls, so you should save its value
1250  * as soon as the call returns:
1251  * |[
1252  *   int saved_errno;
1253  *
1254  *   ret = read (blah);
1255  *   saved_errno = errno;
1256  *
1257  *   g_strerror (saved_errno);
1258  * ]|
1259  *
1260  * Returns: a UTF-8 string describing the error code. If the error code
1261  *     is unknown, it returns a string like "unknown error (<code>)".
1262  */
1263 const gchar *
1264 g_strerror (gint errnum)
1265 {
1266   static GHashTable *errors;
1267   G_LOCK_DEFINE_STATIC (errors);
1268   const gchar *msg;
1269   gint saved_errno = errno;
1270
1271   G_LOCK (errors);
1272   if (errors)
1273     msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1274   else
1275     {
1276       errors = g_hash_table_new (NULL, NULL);
1277       msg = NULL;
1278     }
1279
1280   if (!msg)
1281     {
1282       gchar buf[1024];
1283       GError *error = NULL;
1284
1285 #if defined(G_OS_WIN32)
1286       strerror_s (buf, sizeof (buf), errnum);
1287       msg = buf;
1288 #elif defined(HAVE_STRERROR_R)
1289       /* Match the condition in strerror_r(3) for glibc */
1290 #  if defined(STRERROR_R_CHAR_P)
1291       msg = strerror_r (errnum, buf, sizeof (buf));
1292 #  else
1293       (void) strerror_r (errnum, buf, sizeof (buf));
1294       msg = buf;
1295 #  endif /* HAVE_STRERROR_R */
1296 #else
1297       g_strlcpy (buf, strerror (errnum), sizeof (buf));
1298       msg = buf;
1299 #endif
1300       if (!g_get_charset (NULL))
1301         {
1302           msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1303           if (error)
1304             g_print ("%s\n", error->message);
1305         }
1306       else if (msg == (const gchar *)buf)
1307         msg = g_strdup (buf);
1308
1309       g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1310     }
1311   G_UNLOCK (errors);
1312
1313   errno = saved_errno;
1314   return msg;
1315 }
1316
1317 /**
1318  * g_strsignal:
1319  * @signum: the signal number. See the `signal` documentation
1320  *
1321  * Returns a string describing the given signal, e.g. "Segmentation fault".
1322  * You should use this function in preference to strsignal(), because it
1323  * returns a string in UTF-8 encoding, and since not all platforms support
1324  * the strsignal() function.
1325  *
1326  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1327  *     it returns "unknown signal (<signum>)".
1328  */
1329 const gchar *
1330 g_strsignal (gint signum)
1331 {
1332   gchar *msg;
1333   gchar *tofree;
1334   const gchar *ret;
1335
1336   msg = tofree = NULL;
1337
1338 #ifdef HAVE_STRSIGNAL
1339   msg = strsignal (signum);
1340   if (!g_get_charset (NULL))
1341     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1342 #endif
1343
1344   if (!msg)
1345     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1346   ret = g_intern_string (msg);
1347   g_free (tofree);
1348
1349   return ret;
1350 }
1351
1352 /* Functions g_strlcpy and g_strlcat were originally developed by
1353  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1354  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1355  * for more information.
1356  */
1357
1358 #ifdef HAVE_STRLCPY
1359 /* Use the native ones, if available; they might be implemented in assembly */
1360 gsize
1361 g_strlcpy (gchar       *dest,
1362            const gchar *src,
1363            gsize        dest_size)
1364 {
1365   g_return_val_if_fail (dest != NULL, 0);
1366   g_return_val_if_fail (src  != NULL, 0);
1367
1368   return strlcpy (dest, src, dest_size);
1369 }
1370
1371 gsize
1372 g_strlcat (gchar       *dest,
1373            const gchar *src,
1374            gsize        dest_size)
1375 {
1376   g_return_val_if_fail (dest != NULL, 0);
1377   g_return_val_if_fail (src  != NULL, 0);
1378
1379   return strlcat (dest, src, dest_size);
1380 }
1381
1382 #else /* ! HAVE_STRLCPY */
1383 /**
1384  * g_strlcpy:
1385  * @dest: destination buffer
1386  * @src: source buffer
1387  * @dest_size: length of @dest in bytes
1388  *
1389  * Portability wrapper that calls strlcpy() on systems which have it,
1390  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1391  * guaranteed to be nul-terminated; @src must be nul-terminated;
1392  * @dest_size is the buffer size, not the number of bytes to copy.
1393  *
1394  * At most @dest_size - 1 characters will be copied. Always nul-terminates
1395  * (unless @dest_size is 0). This function does not allocate memory. Unlike
1396  * strncpy(), this function doesn't pad @dest (so it's often faster). It
1397  * returns the size of the attempted result, strlen (src), so if
1398  * @retval >= @dest_size, truncation occurred.
1399  *
1400  * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1401  * but if you really want to avoid screwups, g_strdup() is an even better
1402  * idea.
1403  *
1404  * Returns: length of @src
1405  */
1406 gsize
1407 g_strlcpy (gchar       *dest,
1408            const gchar *src,
1409            gsize        dest_size)
1410 {
1411   gchar *d = dest;
1412   const gchar *s = src;
1413   gsize n = dest_size;
1414
1415   g_return_val_if_fail (dest != NULL, 0);
1416   g_return_val_if_fail (src  != NULL, 0);
1417
1418   /* Copy as many bytes as will fit */
1419   if (n != 0 && --n != 0)
1420     do
1421       {
1422         gchar c = *s++;
1423
1424         *d++ = c;
1425         if (c == 0)
1426           break;
1427       }
1428     while (--n != 0);
1429
1430   /* If not enough room in dest, add NUL and traverse rest of src */
1431   if (n == 0)
1432     {
1433       if (dest_size != 0)
1434         *d = 0;
1435       while (*s++)
1436         ;
1437     }
1438
1439   return s - src - 1;  /* count does not include NUL */
1440 }
1441
1442 /**
1443  * g_strlcat:
1444  * @dest: destination buffer, already containing one nul-terminated string
1445  * @src: source buffer
1446  * @dest_size: length of @dest buffer in bytes (not length of existing string
1447  *     inside @dest)
1448  *
1449  * Portability wrapper that calls strlcat() on systems which have it,
1450  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1451  * guaranteeing nul-termination for @dest. The total size of @dest won't
1452  * exceed @dest_size.
1453  *
1454  * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1455  * @dest_size is the full size of dest, not the space left over. This
1456  * function does not allocate memory. It always nul-terminates (unless
1457  * @dest_size == 0 or there were no nul characters in the @dest_size
1458  * characters of dest to start with).
1459  *
1460  * Caveat: this is supposedly a more secure alternative to strcat() or
1461  * strncat(), but for real security g_strconcat() is harder to mess up.
1462  *
1463  * Returns: size of attempted result, which is MIN (dest_size, strlen
1464  *     (original dest)) + strlen (src), so if retval >= dest_size,
1465  *     truncation occurred.
1466  */
1467 gsize
1468 g_strlcat (gchar       *dest,
1469            const gchar *src,
1470            gsize        dest_size)
1471 {
1472   gchar *d = dest;
1473   const gchar *s = src;
1474   gsize bytes_left = dest_size;
1475   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1476
1477   g_return_val_if_fail (dest != NULL, 0);
1478   g_return_val_if_fail (src  != NULL, 0);
1479
1480   /* Find the end of dst and adjust bytes left but don't go past end */
1481   while (*d != 0 && bytes_left-- != 0)
1482     d++;
1483   dlength = d - dest;
1484   bytes_left = dest_size - dlength;
1485
1486   if (bytes_left == 0)
1487     return dlength + strlen (s);
1488
1489   while (*s != 0)
1490     {
1491       if (bytes_left != 1)
1492         {
1493           *d++ = *s;
1494           bytes_left--;
1495         }
1496       s++;
1497     }
1498   *d = 0;
1499
1500   return dlength + (s - src);  /* count does not include NUL */
1501 }
1502 #endif /* ! HAVE_STRLCPY */
1503
1504 /**
1505  * g_ascii_strdown:
1506  * @str: a string
1507  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1508  *
1509  * Converts all upper case ASCII letters to lower case ASCII letters.
1510  *
1511  * Returns: a newly-allocated string, with all the upper case
1512  *     characters in @str converted to lower case, with semantics that
1513  *     exactly match g_ascii_tolower(). (Note that this is unlike the
1514  *     old g_strdown(), which modified the string in place.)
1515  */
1516 gchar*
1517 g_ascii_strdown (const gchar *str,
1518                  gssize       len)
1519 {
1520   gchar *result, *s;
1521
1522   g_return_val_if_fail (str != NULL, NULL);
1523
1524   if (len < 0)
1525     len = (gssize) strlen (str);
1526
1527   result = g_strndup (str, (gsize) len);
1528   for (s = result; *s; s++)
1529     *s = g_ascii_tolower (*s);
1530
1531   return result;
1532 }
1533
1534 /**
1535  * g_ascii_strup:
1536  * @str: a string
1537  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1538  *
1539  * Converts all lower case ASCII letters to upper case ASCII letters.
1540  *
1541  * Returns: a newly allocated string, with all the lower case
1542  *     characters in @str converted to upper case, with semantics that
1543  *     exactly match g_ascii_toupper(). (Note that this is unlike the
1544  *     old g_strup(), which modified the string in place.)
1545  */
1546 gchar*
1547 g_ascii_strup (const gchar *str,
1548                gssize       len)
1549 {
1550   gchar *result, *s;
1551
1552   g_return_val_if_fail (str != NULL, NULL);
1553
1554   if (len < 0)
1555     len = (gssize) strlen (str);
1556
1557   result = g_strndup (str, (gsize) len);
1558   for (s = result; *s; s++)
1559     *s = g_ascii_toupper (*s);
1560
1561   return result;
1562 }
1563
1564 /**
1565  * g_str_is_ascii:
1566  * @str: a string
1567  *
1568  * Determines if a string is pure ASCII. A string is pure ASCII if it
1569  * contains no bytes with the high bit set.
1570  *
1571  * Returns: %TRUE if @str is ASCII
1572  *
1573  * Since: 2.40
1574  */
1575 gboolean
1576 g_str_is_ascii (const gchar *str)
1577 {
1578   gint i;
1579
1580   for (i = 0; str[i]; i++)
1581     if (str[i] & 0x80)
1582       return FALSE;
1583
1584   return TRUE;
1585 }
1586
1587 /**
1588  * g_strdown:
1589  * @string: the string to convert.
1590  *
1591  * Converts a string to lower case.
1592  *
1593  * Returns: the string
1594  *
1595  * Deprecated:2.2: This function is totally broken for the reasons discussed
1596  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1597  * instead.
1598  **/
1599 gchar*
1600 g_strdown (gchar *string)
1601 {
1602   guchar *s;
1603
1604   g_return_val_if_fail (string != NULL, NULL);
1605
1606   s = (guchar *) string;
1607
1608   while (*s)
1609     {
1610       if (isupper (*s))
1611         *s = tolower (*s);
1612       s++;
1613     }
1614
1615   return (gchar *) string;
1616 }
1617
1618 /**
1619  * g_strup:
1620  * @string: the string to convert
1621  *
1622  * Converts a string to upper case.
1623  *
1624  * Returns: the string
1625  *
1626  * Deprecated:2.2: This function is totally broken for the reasons
1627  *     discussed in the g_strncasecmp() docs - use g_ascii_strup()
1628  *     or g_utf8_strup() instead.
1629  */
1630 gchar*
1631 g_strup (gchar *string)
1632 {
1633   guchar *s;
1634
1635   g_return_val_if_fail (string != NULL, NULL);
1636
1637   s = (guchar *) string;
1638
1639   while (*s)
1640     {
1641       if (islower (*s))
1642         *s = toupper (*s);
1643       s++;
1644     }
1645
1646   return (gchar *) string;
1647 }
1648
1649 /**
1650  * g_strreverse:
1651  * @string: the string to reverse
1652  *
1653  * Reverses all of the bytes in a string. For example,
1654  * `g_strreverse ("abcdef")` will result in "fedcba".
1655  *
1656  * Note that g_strreverse() doesn't work on UTF-8 strings
1657  * containing multibyte characters. For that purpose, use
1658  * g_utf8_strreverse().
1659  *
1660  * Returns: the same pointer passed in as @string
1661  */
1662 gchar*
1663 g_strreverse (gchar *string)
1664 {
1665   g_return_val_if_fail (string != NULL, NULL);
1666
1667   if (*string)
1668     {
1669       gchar *h, *t;
1670
1671       h = string;
1672       t = string + strlen (string) - 1;
1673
1674       while (h < t)
1675         {
1676           gchar c;
1677
1678           c = *h;
1679           *h = *t;
1680           h++;
1681           *t = c;
1682           t--;
1683         }
1684     }
1685
1686   return string;
1687 }
1688
1689 /**
1690  * g_ascii_tolower:
1691  * @c: any character
1692  *
1693  * Convert a character to ASCII lower case.
1694  *
1695  * Unlike the standard C library tolower() function, this only
1696  * recognizes standard ASCII letters and ignores the locale, returning
1697  * all non-ASCII characters unchanged, even if they are lower case
1698  * letters in a particular character set. Also unlike the standard
1699  * library function, this takes and returns a char, not an int, so
1700  * don't call it on %EOF but no need to worry about casting to #guchar
1701  * before passing a possibly non-ASCII character in.
1702  *
1703  * Returns: the result of converting @c to lower case. If @c is
1704  *     not an ASCII upper case letter, @c is returned unchanged.
1705  */
1706 gchar
1707 g_ascii_tolower (gchar c)
1708 {
1709   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1710 }
1711
1712 /**
1713  * g_ascii_toupper:
1714  * @c: any character
1715  *
1716  * Convert a character to ASCII upper case.
1717  *
1718  * Unlike the standard C library toupper() function, this only
1719  * recognizes standard ASCII letters and ignores the locale, returning
1720  * all non-ASCII characters unchanged, even if they are upper case
1721  * letters in a particular character set. Also unlike the standard
1722  * library function, this takes and returns a char, not an int, so
1723  * don't call it on %EOF but no need to worry about casting to #guchar
1724  * before passing a possibly non-ASCII character in.
1725  *
1726  * Returns: the result of converting @c to upper case. If @c is not
1727  *    an ASCII lower case letter, @c is returned unchanged.
1728  */
1729 gchar
1730 g_ascii_toupper (gchar c)
1731 {
1732   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1733 }
1734
1735 /**
1736  * g_ascii_digit_value:
1737  * @c: an ASCII character
1738  *
1739  * Determines the numeric value of a character as a decimal digit.
1740  * Differs from g_unichar_digit_value() because it takes a char, so
1741  * there's no worry about sign extension if characters are signed.
1742  *
1743  * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1744  *    its numeric value. Otherwise, -1.
1745  */
1746 int
1747 g_ascii_digit_value (gchar c)
1748 {
1749   if (g_ascii_isdigit (c))
1750     return c - '0';
1751   return -1;
1752 }
1753
1754 /**
1755  * g_ascii_xdigit_value:
1756  * @c: an ASCII character.
1757  *
1758  * Determines the numeric value of a character as a hexidecimal
1759  * digit. Differs from g_unichar_xdigit_value() because it takes
1760  * a char, so there's no worry about sign extension if characters
1761  * are signed.
1762  *
1763  * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1764  *     its numeric value. Otherwise, -1.
1765  */
1766 int
1767 g_ascii_xdigit_value (gchar c)
1768 {
1769   if (c >= 'A' && c <= 'F')
1770     return c - 'A' + 10;
1771   if (c >= 'a' && c <= 'f')
1772     return c - 'a' + 10;
1773   return g_ascii_digit_value (c);
1774 }
1775
1776 /**
1777  * g_ascii_strcasecmp:
1778  * @s1: string to compare with @s2
1779  * @s2: string to compare with @s1
1780  *
1781  * Compare two strings, ignoring the case of ASCII characters.
1782  *
1783  * Unlike the BSD strcasecmp() function, this only recognizes standard
1784  * ASCII letters and ignores the locale, treating all non-ASCII
1785  * bytes as if they are not letters.
1786  *
1787  * This function should be used only on strings that are known to be
1788  * in encodings where the bytes corresponding to ASCII letters always
1789  * represent themselves. This includes UTF-8 and the ISO-8859-*
1790  * charsets, but not for instance double-byte encodings like the
1791  * Windows Codepage 932, where the trailing bytes of double-byte
1792  * characters include all ASCII letters. If you compare two CP932
1793  * strings using this function, you will get false matches.
1794  *
1795  * Both @s1 and @s2 must be non-%NULL.
1796  *
1797  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1798  *     or a positive value if @s1 > @s2.
1799  */
1800 gint
1801 g_ascii_strcasecmp (const gchar *s1,
1802                     const gchar *s2)
1803 {
1804   gint c1, c2;
1805
1806   g_return_val_if_fail (s1 != NULL, 0);
1807   g_return_val_if_fail (s2 != NULL, 0);
1808
1809   while (*s1 && *s2)
1810     {
1811       c1 = (gint)(guchar) TOLOWER (*s1);
1812       c2 = (gint)(guchar) TOLOWER (*s2);
1813       if (c1 != c2)
1814         return (c1 - c2);
1815       s1++; s2++;
1816     }
1817
1818   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1819 }
1820
1821 /**
1822  * g_ascii_strncasecmp:
1823  * @s1: string to compare with @s2
1824  * @s2: string to compare with @s1
1825  * @n: number of characters to compare
1826  *
1827  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1828  * characters after the first @n in each string.
1829  *
1830  * Unlike the BSD strcasecmp() function, this only recognizes standard
1831  * ASCII letters and ignores the locale, treating all non-ASCII
1832  * characters as if they are not letters.
1833  *
1834  * The same warning as in g_ascii_strcasecmp() applies: Use this
1835  * function only on strings known to be in encodings where bytes
1836  * corresponding to ASCII letters always represent themselves.
1837  *
1838  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1839  *     or a positive value if @s1 > @s2.
1840  */
1841 gint
1842 g_ascii_strncasecmp (const gchar *s1,
1843                      const gchar *s2,
1844                      gsize        n)
1845 {
1846   gint c1, c2;
1847
1848   g_return_val_if_fail (s1 != NULL, 0);
1849   g_return_val_if_fail (s2 != NULL, 0);
1850
1851   while (n && *s1 && *s2)
1852     {
1853       n -= 1;
1854       c1 = (gint)(guchar) TOLOWER (*s1);
1855       c2 = (gint)(guchar) TOLOWER (*s2);
1856       if (c1 != c2)
1857         return (c1 - c2);
1858       s1++; s2++;
1859     }
1860
1861   if (n)
1862     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1863   else
1864     return 0;
1865 }
1866
1867 /**
1868  * g_strcasecmp:
1869  * @s1: a string
1870  * @s2: a string to compare with @s1
1871  *
1872  * A case-insensitive string comparison, corresponding to the standard
1873  * strcasecmp() function on platforms which support it.
1874  *
1875  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1876  *     or a positive value if @s1 > @s2.
1877  *
1878  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1879  *     function is deprecated and how to replace it.
1880  */
1881 gint
1882 g_strcasecmp (const gchar *s1,
1883               const gchar *s2)
1884 {
1885 #ifdef HAVE_STRCASECMP
1886   g_return_val_if_fail (s1 != NULL, 0);
1887   g_return_val_if_fail (s2 != NULL, 0);
1888
1889   return strcasecmp (s1, s2);
1890 #else
1891   gint c1, c2;
1892
1893   g_return_val_if_fail (s1 != NULL, 0);
1894   g_return_val_if_fail (s2 != NULL, 0);
1895
1896   while (*s1 && *s2)
1897     {
1898       /* According to A. Cox, some platforms have islower's that
1899        * don't work right on non-uppercase
1900        */
1901       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1902       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1903       if (c1 != c2)
1904         return (c1 - c2);
1905       s1++; s2++;
1906     }
1907
1908   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1909 #endif
1910 }
1911
1912 /**
1913  * g_strncasecmp:
1914  * @s1: a string
1915  * @s2: a string to compare with @s1
1916  * @n: the maximum number of characters to compare
1917  *
1918  * A case-insensitive string comparison, corresponding to the standard
1919  * strncasecmp() function on platforms which support it. It is similar
1920  * to g_strcasecmp() except it only compares the first @n characters of
1921  * the strings.
1922  *
1923  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1924  *     or a positive value if @s1 > @s2.
1925  *
1926  * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1927  *     the comparison by calling toupper()/tolower(). These functions
1928  *     are locale-specific and operate on single bytes. However, it is
1929  *     impossible to handle things correctly from an internationalization
1930  *     standpoint by operating on bytes, since characters may be multibyte.
1931  *     Thus g_strncasecmp() is broken if your string is guaranteed to be
1932  *     ASCII, since it is locale-sensitive, and it's broken if your string
1933  *     is localized, since it doesn't work on many encodings at all,
1934  *     including UTF-8, EUC-JP, etc.
1935  *
1936  *     There are therefore two replacement techniques: g_ascii_strncasecmp(),
1937  *     which only works on ASCII and is not locale-sensitive, and
1938  *     g_utf8_casefold() followed by strcmp() on the resulting strings,
1939  *     which is good for case-insensitive sorting of UTF-8.
1940  */
1941 gint
1942 g_strncasecmp (const gchar *s1,
1943                const gchar *s2,
1944                guint n)
1945 {
1946 #ifdef HAVE_STRNCASECMP
1947   return strncasecmp (s1, s2, n);
1948 #else
1949   gint c1, c2;
1950
1951   g_return_val_if_fail (s1 != NULL, 0);
1952   g_return_val_if_fail (s2 != NULL, 0);
1953
1954   while (n && *s1 && *s2)
1955     {
1956       n -= 1;
1957       /* According to A. Cox, some platforms have islower's that
1958        * don't work right on non-uppercase
1959        */
1960       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1961       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1962       if (c1 != c2)
1963         return (c1 - c2);
1964       s1++; s2++;
1965     }
1966
1967   if (n)
1968     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1969   else
1970     return 0;
1971 #endif
1972 }
1973
1974 /**
1975  * g_strdelimit:
1976  * @string: the string to convert
1977  * @delimiters: (nullable): a string containing the current delimiters,
1978  *     or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1979  * @new_delimiter: the new delimiter character
1980  *
1981  * Converts any delimiter characters in @string to @new_delimiter.
1982  * Any characters in @string which are found in @delimiters are
1983  * changed to the @new_delimiter character. Modifies @string in place,
1984  * and returns @string itself, not a copy. The return value is to
1985  * allow nesting such as
1986  * |[<!-- language="C" -->
1987  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
1988  * ]|
1989  *
1990  * Returns: @string
1991  */
1992 gchar *
1993 g_strdelimit (gchar       *string,
1994               const gchar *delimiters,
1995               gchar        new_delim)
1996 {
1997   gchar *c;
1998
1999   g_return_val_if_fail (string != NULL, NULL);
2000
2001   if (!delimiters)
2002     delimiters = G_STR_DELIMITERS;
2003
2004   for (c = string; *c; c++)
2005     {
2006       if (strchr (delimiters, *c))
2007         *c = new_delim;
2008     }
2009
2010   return string;
2011 }
2012
2013 /**
2014  * g_strcanon:
2015  * @string: a nul-terminated array of bytes
2016  * @valid_chars: bytes permitted in @string
2017  * @substitutor: replacement character for disallowed bytes
2018  *
2019  * For each character in @string, if the character is not in @valid_chars,
2020  * replaces the character with @substitutor. Modifies @string in place,
2021  * and return @string itself, not a copy. The return value is to allow
2022  * nesting such as
2023  * |[<!-- language="C" -->
2024  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
2025  * ]|
2026  *
2027  * Returns: @string
2028  */
2029 gchar *
2030 g_strcanon (gchar       *string,
2031             const gchar *valid_chars,
2032             gchar        substitutor)
2033 {
2034   gchar *c;
2035
2036   g_return_val_if_fail (string != NULL, NULL);
2037   g_return_val_if_fail (valid_chars != NULL, NULL);
2038
2039   for (c = string; *c; c++)
2040     {
2041       if (!strchr (valid_chars, *c))
2042         *c = substitutor;
2043     }
2044
2045   return string;
2046 }
2047
2048 /**
2049  * g_strcompress:
2050  * @source: a string to compress
2051  *
2052  * Replaces all escaped characters with their one byte equivalent.
2053  *
2054  * This function does the reverse conversion of g_strescape().
2055  *
2056  * Returns: a newly-allocated copy of @source with all escaped
2057  *     character compressed
2058  */
2059 gchar *
2060 g_strcompress (const gchar *source)
2061 {
2062   const gchar *p = source, *octal;
2063   gchar *dest;
2064   gchar *q;
2065
2066   g_return_val_if_fail (source != NULL, NULL);
2067
2068   dest = g_malloc (strlen (source) + 1);
2069   q = dest;
2070
2071   while (*p)
2072     {
2073       if (*p == '\\')
2074         {
2075           p++;
2076           switch (*p)
2077             {
2078             case '\0':
2079               g_warning ("g_strcompress: trailing \\");
2080               goto out;
2081             case '0':  case '1':  case '2':  case '3':  case '4':
2082             case '5':  case '6':  case '7':
2083               *q = 0;
2084               octal = p;
2085               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2086                 {
2087                   *q = (*q * 8) + (*p - '0');
2088                   p++;
2089                 }
2090               q++;
2091               p--;
2092               break;
2093             case 'b':
2094               *q++ = '\b';
2095               break;
2096             case 'f':
2097               *q++ = '\f';
2098               break;
2099             case 'n':
2100               *q++ = '\n';
2101               break;
2102             case 'r':
2103               *q++ = '\r';
2104               break;
2105             case 't':
2106               *q++ = '\t';
2107               break;
2108             case 'v':
2109               *q++ = '\v';
2110               break;
2111             default:            /* Also handles \" and \\ */
2112               *q++ = *p;
2113               break;
2114             }
2115         }
2116       else
2117         *q++ = *p;
2118       p++;
2119     }
2120 out:
2121   *q = 0;
2122
2123   return dest;
2124 }
2125
2126 /**
2127  * g_strescape:
2128  * @source: a string to escape
2129  * @exceptions: (nullable): a string of characters not to escape in @source
2130  *
2131  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2132  * and '"' in the string @source by inserting a '\' before
2133  * them. Additionally all characters in the range 0x01-0x1F (everything
2134  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2135  * replaced with a '\' followed by their octal representation.
2136  * Characters supplied in @exceptions are not escaped.
2137  *
2138  * g_strcompress() does the reverse conversion.
2139  *
2140  * Returns: a newly-allocated copy of @source with certain
2141  *     characters escaped. See above.
2142  */
2143 gchar *
2144 g_strescape (const gchar *source,
2145              const gchar *exceptions)
2146 {
2147   const guchar *p;
2148   gchar *dest;
2149   gchar *q;
2150   guchar excmap[256];
2151
2152   g_return_val_if_fail (source != NULL, NULL);
2153
2154   p = (guchar *) source;
2155   /* Each source byte needs maximally four destination chars (\777) */
2156   q = dest = g_malloc (strlen (source) * 4 + 1);
2157
2158   memset (excmap, 0, 256);
2159   if (exceptions)
2160     {
2161       guchar *e = (guchar *) exceptions;
2162
2163       while (*e)
2164         {
2165           excmap[*e] = 1;
2166           e++;
2167         }
2168     }
2169
2170   while (*p)
2171     {
2172       if (excmap[*p])
2173         *q++ = *p;
2174       else
2175         {
2176           switch (*p)
2177             {
2178             case '\b':
2179               *q++ = '\\';
2180               *q++ = 'b';
2181               break;
2182             case '\f':
2183               *q++ = '\\';
2184               *q++ = 'f';
2185               break;
2186             case '\n':
2187               *q++ = '\\';
2188               *q++ = 'n';
2189               break;
2190             case '\r':
2191               *q++ = '\\';
2192               *q++ = 'r';
2193               break;
2194             case '\t':
2195               *q++ = '\\';
2196               *q++ = 't';
2197               break;
2198             case '\v':
2199               *q++ = '\\';
2200               *q++ = 'v';
2201               break;
2202             case '\\':
2203               *q++ = '\\';
2204               *q++ = '\\';
2205               break;
2206             case '"':
2207               *q++ = '\\';
2208               *q++ = '"';
2209               break;
2210             default:
2211               if ((*p < ' ') || (*p >= 0177))
2212                 {
2213                   *q++ = '\\';
2214                   *q++ = '0' + (((*p) >> 6) & 07);
2215                   *q++ = '0' + (((*p) >> 3) & 07);
2216                   *q++ = '0' + ((*p) & 07);
2217                 }
2218               else
2219                 *q++ = *p;
2220               break;
2221             }
2222         }
2223       p++;
2224     }
2225   *q = 0;
2226   return dest;
2227 }
2228
2229 /**
2230  * g_strchug:
2231  * @string: a string to remove the leading whitespace from
2232  *
2233  * Removes leading whitespace from a string, by moving the rest
2234  * of the characters forward.
2235  *
2236  * This function doesn't allocate or reallocate any memory;
2237  * it modifies @string in place. Therefore, it cannot be used on
2238  * statically allocated strings.
2239  *
2240  * The pointer to @string is returned to allow the nesting of functions.
2241  *
2242  * Also see g_strchomp() and g_strstrip().
2243  *
2244  * Returns: @string
2245  */
2246 gchar *
2247 g_strchug (gchar *string)
2248 {
2249   guchar *start;
2250
2251   g_return_val_if_fail (string != NULL, NULL);
2252
2253   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2254     ;
2255
2256   memmove (string, start, strlen ((gchar *) start) + 1);
2257
2258   return string;
2259 }
2260
2261 /**
2262  * g_strchomp:
2263  * @string: a string to remove the trailing whitespace from
2264  *
2265  * Removes trailing whitespace from a string.
2266  *
2267  * This function doesn't allocate or reallocate any memory;
2268  * it modifies @string in place. Therefore, it cannot be used
2269  * on statically allocated strings.
2270  *
2271  * The pointer to @string is returned to allow the nesting of functions.
2272  *
2273  * Also see g_strchug() and g_strstrip().
2274  *
2275  * Returns: @string
2276  */
2277 gchar *
2278 g_strchomp (gchar *string)
2279 {
2280   gsize len;
2281
2282   g_return_val_if_fail (string != NULL, NULL);
2283
2284   len = strlen (string);
2285   while (len--)
2286     {
2287       if (g_ascii_isspace ((guchar) string[len]))
2288         string[len] = '\0';
2289       else
2290         break;
2291     }
2292
2293   return string;
2294 }
2295
2296 /**
2297  * g_strsplit:
2298  * @string: a string to split
2299  * @delimiter: a string which specifies the places at which to split
2300  *     the string. The delimiter is not included in any of the resulting
2301  *     strings, unless @max_tokens is reached.
2302  * @max_tokens: the maximum number of pieces to split @string into.
2303  *     If this is less than 1, the string is split completely.
2304  *
2305  * Splits a string into a maximum of @max_tokens pieces, using the given
2306  * @delimiter. If @max_tokens is reached, the remainder of @string is
2307  * appended to the last token.
2308  *
2309  * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2310  * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2311  * and "".
2312  *
2313  * As a special case, the result of splitting the empty string "" is an empty
2314  * vector, not a vector containing a single string. The reason for this
2315  * special case is that being able to represent a empty vector is typically
2316  * more useful than consistent handling of empty elements. If you do need
2317  * to represent empty elements, you'll need to check for the empty string
2318  * before calling g_strsplit().
2319  *
2320  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2321  *    g_strfreev() to free it.
2322  */
2323 gchar**
2324 g_strsplit (const gchar *string,
2325             const gchar *delimiter,
2326             gint         max_tokens)
2327 {
2328   GSList *string_list = NULL, *slist;
2329   gchar **str_array, *s;
2330   guint n = 0;
2331   const gchar *remainder;
2332
2333   g_return_val_if_fail (string != NULL, NULL);
2334   g_return_val_if_fail (delimiter != NULL, NULL);
2335   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2336
2337   if (max_tokens < 1)
2338     max_tokens = G_MAXINT;
2339
2340   remainder = string;
2341   s = strstr (remainder, delimiter);
2342   if (s)
2343     {
2344       gsize delimiter_len = strlen (delimiter);
2345
2346       while (--max_tokens && s)
2347         {
2348           gsize len;
2349
2350           len = s - remainder;
2351           string_list = g_slist_prepend (string_list,
2352                                          g_strndup (remainder, len));
2353           n++;
2354           remainder = s + delimiter_len;
2355           s = strstr (remainder, delimiter);
2356         }
2357     }
2358   if (*string)
2359     {
2360       n++;
2361       string_list = g_slist_prepend (string_list, g_strdup (remainder));
2362     }
2363
2364   str_array = g_new (gchar*, n + 1);
2365
2366   str_array[n--] = NULL;
2367   for (slist = string_list; slist; slist = slist->next)
2368     str_array[n--] = slist->data;
2369
2370   g_slist_free (string_list);
2371
2372   return str_array;
2373 }
2374
2375 /**
2376  * g_strsplit_set:
2377  * @string: The string to be tokenized
2378  * @delimiters: A nul-terminated string containing bytes that are used
2379  *     to split the string.
2380  * @max_tokens: The maximum number of tokens to split @string into.
2381  *     If this is less than 1, the string is split completely
2382  *
2383  * Splits @string into a number of tokens not containing any of the characters
2384  * in @delimiter. A token is the (possibly empty) longest string that does not
2385  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2386  * remainder is appended to the last token.
2387  *
2388  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2389  * %NULL-terminated vector containing the three strings "abc", "def",
2390  * and "ghi".
2391  *
2392  * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2393  * vector containing the four strings "", "def", "ghi", and "".
2394  *
2395  * As a special case, the result of splitting the empty string "" is an empty
2396  * vector, not a vector containing a single string. The reason for this
2397  * special case is that being able to represent a empty vector is typically
2398  * more useful than consistent handling of empty elements. If you do need
2399  * to represent empty elements, you'll need to check for the empty string
2400  * before calling g_strsplit_set().
2401  *
2402  * Note that this function works on bytes not characters, so it can't be used
2403  * to delimit UTF-8 strings for anything but ASCII characters.
2404  *
2405  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2406  *    g_strfreev() to free it.
2407  *
2408  * Since: 2.4
2409  **/
2410 gchar **
2411 g_strsplit_set (const gchar *string,
2412                 const gchar *delimiters,
2413                 gint         max_tokens)
2414 {
2415   gboolean delim_table[256];
2416   GSList *tokens, *list;
2417   gint n_tokens;
2418   const gchar *s;
2419   const gchar *current;
2420   gchar *token;
2421   gchar **result;
2422
2423   g_return_val_if_fail (string != NULL, NULL);
2424   g_return_val_if_fail (delimiters != NULL, NULL);
2425
2426   if (max_tokens < 1)
2427     max_tokens = G_MAXINT;
2428
2429   if (*string == '\0')
2430     {
2431       result = g_new (char *, 1);
2432       result[0] = NULL;
2433       return result;
2434     }
2435
2436   memset (delim_table, FALSE, sizeof (delim_table));
2437   for (s = delimiters; *s != '\0'; ++s)
2438     delim_table[*(guchar *)s] = TRUE;
2439
2440   tokens = NULL;
2441   n_tokens = 0;
2442
2443   s = current = string;
2444   while (*s != '\0')
2445     {
2446       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2447         {
2448           token = g_strndup (current, s - current);
2449           tokens = g_slist_prepend (tokens, token);
2450           ++n_tokens;
2451
2452           current = s + 1;
2453         }
2454
2455       ++s;
2456     }
2457
2458   token = g_strndup (current, s - current);
2459   tokens = g_slist_prepend (tokens, token);
2460   ++n_tokens;
2461
2462   result = g_new (gchar *, n_tokens + 1);
2463
2464   result[n_tokens] = NULL;
2465   for (list = tokens; list != NULL; list = list->next)
2466     result[--n_tokens] = list->data;
2467
2468   g_slist_free (tokens);
2469
2470   return result;
2471 }
2472
2473 /**
2474  * GStrv:
2475  *
2476  * A typedef alias for gchar**. This is mostly useful when used together with
2477  * g_auto().
2478  */
2479
2480 /**
2481  * g_strfreev:
2482  * @str_array: (nullable): a %NULL-terminated array of strings to free
2483  *
2484  * Frees a %NULL-terminated array of strings, as well as each
2485  * string it contains.
2486  *
2487  * If @str_array is %NULL, this function simply returns.
2488  */
2489 void
2490 g_strfreev (gchar **str_array)
2491 {
2492   if (str_array)
2493     {
2494       int i;
2495
2496       for (i = 0; str_array[i] != NULL; i++)
2497         g_free (str_array[i]);
2498
2499       g_free (str_array);
2500     }
2501 }
2502
2503 /**
2504  * g_strdupv:
2505  * @str_array: (nullable): a %NULL-terminated array of strings
2506  *
2507  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2508  * the new array should be freed by first freeing each string, then
2509  * the array itself. g_strfreev() does this for you. If called
2510  * on a %NULL value, g_strdupv() simply returns %NULL.
2511  *
2512  * Returns: (nullable): a new %NULL-terminated array of strings.
2513  */
2514 gchar**
2515 g_strdupv (gchar **str_array)
2516 {
2517   if (str_array)
2518     {
2519       gint i;
2520       gchar **retval;
2521
2522       i = 0;
2523       while (str_array[i])
2524         ++i;
2525
2526       retval = g_new (gchar*, i + 1);
2527
2528       i = 0;
2529       while (str_array[i])
2530         {
2531           retval[i] = g_strdup (str_array[i]);
2532           ++i;
2533         }
2534       retval[i] = NULL;
2535
2536       return retval;
2537     }
2538   else
2539     return NULL;
2540 }
2541
2542 /**
2543  * g_strjoinv:
2544  * @separator: (nullable): a string to insert between each of the
2545  *     strings, or %NULL
2546  * @str_array: a %NULL-terminated array of strings to join
2547  *
2548  * Joins a number of strings together to form one long string, with the
2549  * optional @separator inserted between each of them. The returned string
2550  * should be freed with g_free().
2551  *
2552  * If @str_array has no items, the return value will be an
2553  * empty string. If @str_array contains a single item, @separator will not
2554  * appear in the resulting string.
2555  *
2556  * Returns: a newly-allocated string containing all of the strings joined
2557  *     together, with @separator between them
2558  */
2559 gchar*
2560 g_strjoinv (const gchar  *separator,
2561             gchar       **str_array)
2562 {
2563   gchar *string;
2564   gchar *ptr;
2565
2566   g_return_val_if_fail (str_array != NULL, NULL);
2567
2568   if (separator == NULL)
2569     separator = "";
2570
2571   if (*str_array)
2572     {
2573       gint i;
2574       gsize len;
2575       gsize separator_len;
2576
2577       separator_len = strlen (separator);
2578       /* First part, getting length */
2579       len = 1 + strlen (str_array[0]);
2580       for (i = 1; str_array[i] != NULL; i++)
2581         len += strlen (str_array[i]);
2582       len += separator_len * (i - 1);
2583
2584       /* Second part, building string */
2585       string = g_new (gchar, len);
2586       ptr = g_stpcpy (string, *str_array);
2587       for (i = 1; str_array[i] != NULL; i++)
2588         {
2589           ptr = g_stpcpy (ptr, separator);
2590           ptr = g_stpcpy (ptr, str_array[i]);
2591         }
2592       }
2593   else
2594     string = g_strdup ("");
2595
2596   return string;
2597 }
2598
2599 /**
2600  * g_strjoin:
2601  * @separator: (nullable): a string to insert between each of the
2602  *     strings, or %NULL
2603  * @...: a %NULL-terminated list of strings to join
2604  *
2605  * Joins a number of strings together to form one long string, with the
2606  * optional @separator inserted between each of them. The returned string
2607  * should be freed with g_free().
2608  *
2609  * Returns: a newly-allocated string containing all of the strings joined
2610  *     together, with @separator between them
2611  */
2612 gchar*
2613 g_strjoin (const gchar *separator,
2614            ...)
2615 {
2616   gchar *string, *s;
2617   va_list args;
2618   gsize len;
2619   gsize separator_len;
2620   gchar *ptr;
2621
2622   if (separator == NULL)
2623     separator = "";
2624
2625   separator_len = strlen (separator);
2626
2627   va_start (args, separator);
2628
2629   s = va_arg (args, gchar*);
2630
2631   if (s)
2632     {
2633       /* First part, getting length */
2634       len = 1 + strlen (s);
2635
2636       s = va_arg (args, gchar*);
2637       while (s)
2638         {
2639           len += separator_len + strlen (s);
2640           s = va_arg (args, gchar*);
2641         }
2642       va_end (args);
2643
2644       /* Second part, building string */
2645       string = g_new (gchar, len);
2646
2647       va_start (args, separator);
2648
2649       s = va_arg (args, gchar*);
2650       ptr = g_stpcpy (string, s);
2651
2652       s = va_arg (args, gchar*);
2653       while (s)
2654         {
2655           ptr = g_stpcpy (ptr, separator);
2656           ptr = g_stpcpy (ptr, s);
2657           s = va_arg (args, gchar*);
2658         }
2659     }
2660   else
2661     string = g_strdup ("");
2662
2663   va_end (args);
2664
2665   return string;
2666 }
2667
2668
2669 /**
2670  * g_strstr_len:
2671  * @haystack: a string
2672  * @haystack_len: the maximum length of @haystack. Note that -1 is
2673  *     a valid length, if @haystack is nul-terminated, meaning it will
2674  *     search through the whole string.
2675  * @needle: the string to search for
2676  *
2677  * Searches the string @haystack for the first occurrence
2678  * of the string @needle, limiting the length of the search
2679  * to @haystack_len.
2680  *
2681  * Returns: a pointer to the found occurrence, or
2682  *    %NULL if not found.
2683  */
2684 gchar *
2685 g_strstr_len (const gchar *haystack,
2686               gssize       haystack_len,
2687               const gchar *needle)
2688 {
2689   g_return_val_if_fail (haystack != NULL, NULL);
2690   g_return_val_if_fail (needle != NULL, NULL);
2691
2692   if (haystack_len < 0)
2693     return strstr (haystack, needle);
2694   else
2695     {
2696       const gchar *p = haystack;
2697       gsize needle_len = strlen (needle);
2698       const gchar *end;
2699       gsize i;
2700
2701       if (needle_len == 0)
2702         return (gchar *)haystack;
2703
2704       if (haystack_len < needle_len)
2705         return NULL;
2706
2707       end = haystack + haystack_len - needle_len;
2708
2709       while (p <= end && *p)
2710         {
2711           for (i = 0; i < needle_len; i++)
2712             if (p[i] != needle[i])
2713               goto next;
2714
2715           return (gchar *)p;
2716
2717         next:
2718           p++;
2719         }
2720
2721       return NULL;
2722     }
2723 }
2724
2725 /**
2726  * g_strrstr:
2727  * @haystack: a nul-terminated string
2728  * @needle: the nul-terminated string to search for
2729  *
2730  * Searches the string @haystack for the last occurrence
2731  * of the string @needle.
2732  *
2733  * Returns: a pointer to the found occurrence, or
2734  *    %NULL if not found.
2735  */
2736 gchar *
2737 g_strrstr (const gchar *haystack,
2738            const gchar *needle)
2739 {
2740   gsize i;
2741   gsize needle_len;
2742   gsize haystack_len;
2743   const gchar *p;
2744
2745   g_return_val_if_fail (haystack != NULL, NULL);
2746   g_return_val_if_fail (needle != NULL, NULL);
2747
2748   needle_len = strlen (needle);
2749   haystack_len = strlen (haystack);
2750
2751   if (needle_len == 0)
2752     return (gchar *)haystack;
2753
2754   if (haystack_len < needle_len)
2755     return NULL;
2756
2757   p = haystack + haystack_len - needle_len;
2758
2759   while (p >= haystack)
2760     {
2761       for (i = 0; i < needle_len; i++)
2762         if (p[i] != needle[i])
2763           goto next;
2764
2765       return (gchar *)p;
2766
2767     next:
2768       p--;
2769     }
2770
2771   return NULL;
2772 }
2773
2774 /**
2775  * g_strrstr_len:
2776  * @haystack: a nul-terminated string
2777  * @haystack_len: the maximum length of @haystack
2778  * @needle: the nul-terminated string to search for
2779  *
2780  * Searches the string @haystack for the last occurrence
2781  * of the string @needle, limiting the length of the search
2782  * to @haystack_len.
2783  *
2784  * Returns: a pointer to the found occurrence, or
2785  *    %NULL if not found.
2786  */
2787 gchar *
2788 g_strrstr_len (const gchar *haystack,
2789                gssize        haystack_len,
2790                const gchar *needle)
2791 {
2792   g_return_val_if_fail (haystack != NULL, NULL);
2793   g_return_val_if_fail (needle != NULL, NULL);
2794
2795   if (haystack_len < 0)
2796     return g_strrstr (haystack, needle);
2797   else
2798     {
2799       gsize needle_len = strlen (needle);
2800       const gchar *haystack_max = haystack + haystack_len;
2801       const gchar *p = haystack;
2802       gsize i;
2803
2804       while (p < haystack_max && *p)
2805         p++;
2806
2807       if (p < haystack + needle_len)
2808         return NULL;
2809
2810       p -= needle_len;
2811
2812       while (p >= haystack)
2813         {
2814           for (i = 0; i < needle_len; i++)
2815             if (p[i] != needle[i])
2816               goto next;
2817
2818           return (gchar *)p;
2819
2820         next:
2821           p--;
2822         }
2823
2824       return NULL;
2825     }
2826 }
2827
2828
2829 /**
2830  * g_str_has_suffix:
2831  * @str: a nul-terminated string
2832  * @suffix: the nul-terminated suffix to look for
2833  *
2834  * Looks whether the string @str ends with @suffix.
2835  *
2836  * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2837  *
2838  * Since: 2.2
2839  */
2840 gboolean
2841 g_str_has_suffix (const gchar *str,
2842                   const gchar *suffix)
2843 {
2844   gsize str_len;
2845   gsize suffix_len;
2846
2847   g_return_val_if_fail (str != NULL, FALSE);
2848   g_return_val_if_fail (suffix != NULL, FALSE);
2849
2850   str_len = strlen (str);
2851   suffix_len = strlen (suffix);
2852
2853   if (str_len < suffix_len)
2854     return FALSE;
2855
2856   return strcmp (str + str_len - suffix_len, suffix) == 0;
2857 }
2858
2859 /**
2860  * g_str_has_prefix:
2861  * @str: a nul-terminated string
2862  * @prefix: the nul-terminated prefix to look for
2863  *
2864  * Looks whether the string @str begins with @prefix.
2865  *
2866  * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2867  *
2868  * Since: 2.2
2869  */
2870 gboolean
2871 g_str_has_prefix (const gchar *str,
2872                   const gchar *prefix)
2873 {
2874   g_return_val_if_fail (str != NULL, FALSE);
2875   g_return_val_if_fail (prefix != NULL, FALSE);
2876
2877   return strncmp (str, prefix, strlen (prefix)) == 0;
2878 }
2879
2880 /**
2881  * g_strv_length:
2882  * @str_array: a %NULL-terminated array of strings
2883  *
2884  * Returns the length of the given %NULL-terminated
2885  * string array @str_array. @str_array must not be %NULL.
2886  *
2887  * Returns: length of @str_array.
2888  *
2889  * Since: 2.6
2890  */
2891 guint
2892 g_strv_length (gchar **str_array)
2893 {
2894   guint i = 0;
2895
2896   g_return_val_if_fail (str_array != NULL, 0);
2897
2898   while (str_array[i])
2899     ++i;
2900
2901   return i;
2902 }
2903
2904 static void
2905 index_add_folded (GPtrArray   *array,
2906                   const gchar *start,
2907                   const gchar *end)
2908 {
2909   gchar *normal;
2910
2911   normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2912
2913   /* TODO: Invent time machine.  Converse with Mustafa Ataturk... */
2914   if (strstr (normal, "ı") || strstr (normal, "İ"))
2915     {
2916       gchar *s = normal;
2917       GString *tmp;
2918
2919       tmp = g_string_new (NULL);
2920
2921       while (*s)
2922         {
2923           gchar *i, *I, *e;
2924
2925           i = strstr (s, "ı");
2926           I = strstr (s, "İ");
2927
2928           if (!i && !I)
2929             break;
2930           else if (i && !I)
2931             e = i;
2932           else if (I && !i)
2933             e = I;
2934           else if (i < I)
2935             e = i;
2936           else
2937             e = I;
2938
2939           g_string_append_len (tmp, s, e - s);
2940           g_string_append_c (tmp, 'i');
2941           s = g_utf8_next_char (e);
2942         }
2943
2944       g_string_append (tmp, s);
2945       g_free (normal);
2946       normal = g_string_free (tmp, FALSE);
2947     }
2948
2949   g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2950   g_free (normal);
2951 }
2952
2953 static gchar **
2954 split_words (const gchar *value)
2955 {
2956   const gchar *start = NULL;
2957   GPtrArray *result;
2958   const gchar *s;
2959
2960   result = g_ptr_array_new ();
2961
2962   for (s = value; *s; s = g_utf8_next_char (s))
2963     {
2964       gunichar c = g_utf8_get_char (s);
2965
2966       if (start == NULL)
2967         {
2968           if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2969             start = s;
2970         }
2971       else
2972         {
2973           if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2974             {
2975               index_add_folded (result, start, s);
2976               start = NULL;
2977             }
2978         }
2979     }
2980
2981   if (start)
2982     index_add_folded (result, start, s);
2983
2984   g_ptr_array_add (result, NULL);
2985
2986   return (gchar **) g_ptr_array_free (result, FALSE);
2987 }
2988
2989 /**
2990  * g_str_tokenize_and_fold:
2991  * @string: a string
2992  * @translit_locale: (nullable): the language code (like 'de' or
2993  *   'en_GB') from which @string originates
2994  * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2995  *   return location for ASCII alternates
2996  *
2997  * Tokenises @string and performs folding on each token.
2998  *
2999  * A token is a non-empty sequence of alphanumeric characters in the
3000  * source string, separated by non-alphanumeric characters.  An
3001  * "alphanumeric" character for this purpose is one that matches
3002  * g_unichar_isalnum() or g_unichar_ismark().
3003  *
3004  * Each token is then (Unicode) normalised and case-folded.  If
3005  * @ascii_alternates is non-%NULL and some of the returned tokens
3006  * contain non-ASCII characters, ASCII alternatives will be generated.
3007  *
3008  * The number of ASCII alternatives that are generated and the method
3009  * for doing so is unspecified, but @translit_locale (if specified) may
3010  * improve the transliteration if the language of the source string is
3011  * known.
3012  *
3013  * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3014  *
3015  * Since: 2.40
3016  **/
3017 gchar **
3018 g_str_tokenize_and_fold (const gchar   *string,
3019                          const gchar   *translit_locale,
3020                          gchar       ***ascii_alternates)
3021 {
3022   gchar **result;
3023
3024   g_return_val_if_fail (string != NULL, NULL);
3025
3026   if (ascii_alternates && g_str_is_ascii (string))
3027     {
3028       *ascii_alternates = g_new0 (gchar *, 0 + 1);
3029       ascii_alternates = NULL;
3030     }
3031
3032   result = split_words (string);
3033
3034   if (ascii_alternates)
3035     {
3036       gint i, j, n;
3037
3038       n = g_strv_length (result);
3039       *ascii_alternates = g_new (gchar *, n + 1);
3040       j = 0;
3041
3042       for (i = 0; i < n; i++)
3043         {
3044           if (!g_str_is_ascii (result[i]))
3045             {
3046               gchar *composed;
3047               gchar *ascii;
3048               gint k;
3049
3050               composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3051
3052               ascii = g_str_to_ascii (composed, translit_locale);
3053
3054               /* Only accept strings that are now entirely alnums */
3055               for (k = 0; ascii[k]; k++)
3056                 if (!g_ascii_isalnum (ascii[k]))
3057                   break;
3058
3059               if (ascii[k] == '\0')
3060                 /* Made it to the end... */
3061                 (*ascii_alternates)[j++] = ascii;
3062               else
3063                 g_free (ascii);
3064
3065               g_free (composed);
3066             }
3067         }
3068
3069       (*ascii_alternates)[j] = NULL;
3070     }
3071
3072   return result;
3073 }
3074
3075 /**
3076  * g_str_match_string:
3077  * @search_term: the search term from the user
3078  * @potential_hit: the text that may be a hit
3079  * @accept_alternates: %TRUE to accept ASCII alternates
3080  *
3081  * Checks if a search conducted for @search_term should match
3082  * @potential_hit.
3083  *
3084  * This function calls g_str_tokenize_and_fold() on both
3085  * @search_term and @potential_hit.  ASCII alternates are never taken
3086  * for @search_term but will be taken for @potential_hit according to
3087  * the value of @accept_alternates.
3088  *
3089  * A hit occurs when each folded token in @search_term is a prefix of a
3090  * folded token from @potential_hit.
3091  *
3092  * Depending on how you're performing the search, it will typically be
3093  * faster to call g_str_tokenize_and_fold() on each string in
3094  * your corpus and build an index on the returned folded tokens, then
3095  * call g_str_tokenize_and_fold() on the search term and
3096  * perform lookups into that index.
3097  *
3098  * As some examples, searching for ‘fred’ would match the potential hit
3099  * ‘Smith, Fred’ and also ‘Frédéric’.  Searching for ‘Fréd’ would match
3100  * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3101  * accent matching).  Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3102  * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3103  *
3104  * Returns: %TRUE if @potential_hit is a hit
3105  *
3106  * Since: 2.40
3107  **/
3108 gboolean
3109 g_str_match_string (const gchar *search_term,
3110                     const gchar *potential_hit,
3111                     gboolean     accept_alternates)
3112 {
3113   gchar **alternates = NULL;
3114   gchar **term_tokens;
3115   gchar **hit_tokens;
3116   gboolean matched;
3117   gint i, j;
3118
3119   g_return_val_if_fail (search_term != NULL, FALSE);
3120   g_return_val_if_fail (potential_hit != NULL, FALSE);
3121
3122   term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3123   hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3124
3125   matched = TRUE;
3126
3127   for (i = 0; term_tokens[i]; i++)
3128     {
3129       for (j = 0; hit_tokens[j]; j++)
3130         if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3131           goto one_matched;
3132
3133       if (accept_alternates)
3134         for (j = 0; alternates[j]; j++)
3135           if (g_str_has_prefix (alternates[j], term_tokens[i]))
3136             goto one_matched;
3137
3138       matched = FALSE;
3139       break;
3140
3141 one_matched:
3142       continue;
3143     }
3144
3145   g_strfreev (term_tokens);
3146   g_strfreev (hit_tokens);
3147   g_strfreev (alternates);
3148
3149   return matched;
3150 }
3151
3152 /**
3153  * g_strv_contains:
3154  * @strv: a %NULL-terminated array of strings
3155  * @str: a string
3156  *
3157  * Checks if @strv contains @str. @strv must not be %NULL.
3158  *
3159  * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3160  *
3161  * Since: 2.44
3162  */
3163 gboolean
3164 g_strv_contains (const gchar * const *strv,
3165                  const gchar         *str)
3166 {
3167   g_return_val_if_fail (strv != NULL, FALSE);
3168   g_return_val_if_fail (str != NULL, FALSE);
3169
3170   for (; *strv != NULL; strv++)
3171     {
3172       if (g_str_equal (str, *strv))
3173         return TRUE;
3174     }
3175
3176   return FALSE;
3177 }
3178
3179 static gboolean
3180 str_has_sign (const gchar *str)
3181 {
3182   return str[0] == '-' || str[0] == '+';
3183 }
3184
3185 static gboolean
3186 str_has_hex_prefix (const gchar *str)
3187 {
3188   return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3189 }
3190
3191 /**
3192  * g_ascii_string_to_signed:
3193  * @str: a string
3194  * @base: base of a parsed number
3195  * @min: a lower bound (inclusive)
3196  * @max: an upper bound (inclusive)
3197  * @out_num: (out) (optional): a return location for a number
3198  * @error: a return location for #GError
3199  *
3200  * A convenience function for converting a string to a signed number.
3201  *
3202  * This function assumes that @str contains only a number of the given
3203  * @base that is within inclusive bounds limited by @min and @max. If
3204  * this is true, then the converted number is stored in @out_num. An
3205  * empty string is not a valid input. A string with leading or
3206  * trailing whitespace is also an invalid input.
3207  *
3208  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3209  * not be prefixed with "0x" or "0X". Such a problem does not exist
3210  * for octal numbers, since they were usually prefixed with a zero
3211  * which does not change the value of the parsed number.
3212  *
3213  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3214  * domain. If the input is invalid, the error code will be
3215  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3216  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3217  *
3218  * See g_ascii_strtoll() if you have more complex needs such as
3219  * parsing a string which starts with a number, but then has other
3220  * characters.
3221  *
3222  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3223  *
3224  * Since: 2.54
3225  */
3226 gboolean
3227 g_ascii_string_to_signed (const gchar  *str,
3228                           guint         base,
3229                           gint64        min,
3230                           gint64        max,
3231                           gint64       *out_num,
3232                           GError      **error)
3233 {
3234   gint64 number;
3235   const gchar *end_ptr = NULL;
3236   gint saved_errno = 0;
3237
3238   g_return_val_if_fail (str != NULL, FALSE);
3239   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3240   g_return_val_if_fail (min <= max, FALSE);
3241   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3242
3243   if (str[0] == '\0')
3244     {
3245       g_set_error_literal (error,
3246                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3247                            _("Empty string is not a number"));
3248       return FALSE;
3249     }
3250
3251   errno = 0;
3252   number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3253   saved_errno = errno;
3254
3255   if (/* We do not allow leading whitespace, but g_ascii_strtoll
3256        * accepts it and just skips it, so we need to check for it
3257        * ourselves.
3258        */
3259       g_ascii_isspace (str[0]) ||
3260       /* We don't support hexadecimal numbers prefixed with 0x or
3261        * 0X.
3262        */
3263       (base == 16 &&
3264        (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3265       (saved_errno != 0 && saved_errno != ERANGE) ||
3266       end_ptr == NULL ||
3267       *end_ptr != '\0')
3268     {
3269       g_set_error (error,
3270                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3271                    _("“%s” is not a signed number"), str);
3272       return FALSE;
3273     }
3274   if (saved_errno == ERANGE || number < min || number > max)
3275     {
3276       gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3277       gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3278
3279       g_set_error (error,
3280                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3281                    _("Number “%s” is out of bounds [%s, %s]"),
3282                    str, min_str, max_str);
3283       g_free (min_str);
3284       g_free (max_str);
3285       return FALSE;
3286     }
3287   if (out_num != NULL)
3288     *out_num = number;
3289   return TRUE;
3290 }
3291
3292 /**
3293  * g_ascii_string_to_unsigned:
3294  * @str: a string
3295  * @base: base of a parsed number
3296  * @min: a lower bound (inclusive)
3297  * @max: an upper bound (inclusive)
3298  * @out_num: (out) (optional): a return location for a number
3299  * @error: a return location for #GError
3300  *
3301  * A convenience function for converting a string to an unsigned number.
3302  *
3303  * This function assumes that @str contains only a number of the given
3304  * @base that is within inclusive bounds limited by @min and @max. If
3305  * this is true, then the converted number is stored in @out_num. An
3306  * empty string is not a valid input. A string with leading or
3307  * trailing whitespace is also an invalid input.
3308  *
3309  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3310  * not be prefixed with "0x" or "0X". Such a problem does not exist
3311  * for octal numbers, since they were usually prefixed with a zero
3312  * which does not change the value of the parsed number.
3313  *
3314  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3315  * domain. If the input is invalid, the error code will be
3316  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3317  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3318  *
3319  * See g_ascii_strtoull() if you have more complex needs such as
3320  * parsing a string which starts with a number, but then has other
3321  * characters.
3322  *
3323  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3324  *
3325  * Since: 2.54
3326  */
3327 gboolean
3328 g_ascii_string_to_unsigned (const gchar  *str,
3329                             guint         base,
3330                             guint64       min,
3331                             guint64       max,
3332                             guint64      *out_num,
3333                             GError      **error)
3334 {
3335   guint64 number;
3336   const gchar *end_ptr = NULL;
3337   gint saved_errno = 0;
3338
3339   g_return_val_if_fail (str != NULL, FALSE);
3340   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3341   g_return_val_if_fail (min <= max, FALSE);
3342   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3343
3344   if (str[0] == '\0')
3345     {
3346       g_set_error_literal (error,
3347                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3348                            _("Empty string is not a number"));
3349       return FALSE;
3350     }
3351
3352   errno = 0;
3353   number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3354   saved_errno = errno;
3355
3356   if (/* We do not allow leading whitespace, but g_ascii_strtoull
3357        * accepts it and just skips it, so we need to check for it
3358        * ourselves.
3359        */
3360       g_ascii_isspace (str[0]) ||
3361       /* Unsigned number should have no sign.
3362        */
3363       str_has_sign (str) ||
3364       /* We don't support hexadecimal numbers prefixed with 0x or
3365        * 0X.
3366        */
3367       (base == 16 && str_has_hex_prefix (str)) ||
3368       (saved_errno != 0 && saved_errno != ERANGE) ||
3369       end_ptr == NULL ||
3370       *end_ptr != '\0')
3371     {
3372       g_set_error (error,
3373                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3374                    _("“%s” is not an unsigned number"), str);
3375       return FALSE;
3376     }
3377   if (saved_errno == ERANGE || number < min || number > max)
3378     {
3379       gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3380       gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3381
3382       g_set_error (error,
3383                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3384                    _("Number “%s” is out of bounds [%s, %s]"),
3385                    str, min_str, max_str);
3386       g_free (min_str);
3387       g_free (max_str);
3388       return FALSE;
3389     }
3390   if (out_num != NULL)
3391     *out_num = number;
3392   return TRUE;
3393 }
3394
3395 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)