glib/gstrfuncs.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 /*
  19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
  20  * file for a list of people on the GLib Team.  See the ChangeLog
  21  * files for a list of changes.  These files are distributed with
  22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
  23  */
  24
  25 /*
  26  * MT safe
  27  */
  28
  29 #include "config.h"
  30
  31 #include <stdarg.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <locale.h>
  35 #include <string.h>
  36 #include <locale.h>
  37 #include <errno.h>
  38 #include <ctype.h>              /* For tolower() */
  39
  40 #ifdef HAVE_XLOCALE_H
  41 /* Needed on BSD/OS X for e.g. strtod_l */
  42 #include <xlocale.h>
  43 #endif
  44
  45 #ifdef G_OS_WIN32
  46 #include <windows.h>
  47 #endif
  48
  49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
  50
  51 #include "gstrfuncs.h"
  52
  53 #include "gprintf.h"
  54 #include "gprintfint.h"
  55 #include "glibintl.h"
  56
  57
  58 /**
  59  * SECTION:string_utils
  60  * @title: String Utility Functions
  61  * @short_description: various string-related functions
  62  *
  63  * This section describes a number of utility functions for creating,
  64  * duplicating, and manipulating strings.
  65  *
  66  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
  67  * g_snprintf(), g_vprintf(), g_vfprintf(), g_vsprintf() and g_vsnprintf()
  68  * are declared in the header `gprintf.h` which is not included in `glib.h`
  69  * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
  70  * explicitly include `<glib/gprintf.h>` in order to use the GLib
  71  * printf() functions.
  72  *
  73  * ## String precision pitfalls # {#string-precision}
  74  *
  75  * While you may use the printf() functions to format UTF-8 strings,
  76  * notice that the precision of a \%Ns parameter is interpreted
  77  * as the number of bytes, not characters to print. On top of that,
  78  * the GNU libc implementation of the printf() functions has the
  79  * "feature" that it checks that the string given for the \%Ns
  80  * parameter consists of a whole number of characters in the current
  81  * encoding. So, unless you are sure you are always going to be in an
  82  * UTF-8 locale or your know your text is restricted to ASCII, avoid
  83  * using \%Ns. If your intention is to format strings for a
  84  * certain number of columns, then \%Ns is not a correct solution
  85  * anyway, since it fails to take wide characters (see g_unichar_iswide())
  86  * into account.
  87  *
  88  * Note also that there are various printf() parameters which are platform
  89  * dependent. GLib provides platform independent macros for these parameters
  90  * which should be used instead. A common example is %G_GUINT64_FORMAT, which
  91  * should be used instead of `%llu` or similar parameters for formatting
  92  * 64-bit integers. These macros are all named `G_*_FORMAT`; see
  93  * [Basic Types][glib-Basic-Types].
  94  */
  95
  96 /**
  97  * g_ascii_isalnum:
  98  * @c: any character
  99  *
 100  * Determines whether a character is alphanumeric.
 101  *
 102  * Unlike the standard C library isalnum() function, this only
 103  * recognizes standard ASCII letters and ignores the locale,
 104  * returning %FALSE for all non-ASCII characters. Also, unlike
 105  * the standard library function, this takes a char, not an int,
 106  * so don't call it on %EOF, but no need to cast to #guchar before
 107  * passing a possibly non-ASCII character in.
 108  *
 109  * Returns: %TRUE if @c is an ASCII alphanumeric character
 110  */
 111
 112 /**
 113  * g_ascii_isalpha:
 114  * @c: any character
 115  *
 116  * Determines whether a character is alphabetic (i.e. a letter).
 117  *
 118  * Unlike the standard C library isalpha() function, this only
 119  * recognizes standard ASCII letters and ignores the locale,
 120  * returning %FALSE for all non-ASCII characters. Also, unlike
 121  * the standard library function, this takes a char, not an int,
 122  * so don't call it on %EOF, but no need to cast to #guchar before
 123  * passing a possibly non-ASCII character in.
 124  *
 125  * Returns: %TRUE if @c is an ASCII alphabetic character
 126  */
 127
 128 /**
 129  * g_ascii_iscntrl:
 130  * @c: any character
 131  *
 132  * Determines whether a character is a control character.
 133  *
 134  * Unlike the standard C library iscntrl() function, this only
 135  * recognizes standard ASCII control characters and ignores the
 136  * locale, returning %FALSE for all non-ASCII characters. Also,
 137  * unlike the standard library function, this takes a char, not
 138  * an int, so don't call it on %EOF, but no need to cast to #guchar
 139  * before passing a possibly non-ASCII character in.
 140  *
 141  * Returns: %TRUE if @c is an ASCII control character.
 142  */
 143
 144 /**
 145  * g_ascii_isdigit:
 146  * @c: any character
 147  *
 148  * Determines whether a character is digit (0-9).
 149  *
 150  * Unlike the standard C library isdigit() function, this takes
 151  * a char, not an int, so don't call it  on %EOF, but no need to
 152  * cast to #guchar before passing a possibly non-ASCII character in.
 153  *
 154  * Returns: %TRUE if @c is an ASCII digit.
 155  */
 156
 157 /**
 158  * g_ascii_isgraph:
 159  * @c: any character
 160  *
 161  * Determines whether a character is a printing character and not a space.
 162  *
 163  * Unlike the standard C library isgraph() function, this only
 164  * recognizes standard ASCII characters and ignores the locale,
 165  * returning %FALSE for all non-ASCII characters. Also, unlike
 166  * the standard library function, this takes a char, not an int,
 167  * so don't call it on %EOF, but no need to cast to #guchar before
 168  * passing a possibly non-ASCII character in.
 169  *
 170  * Returns: %TRUE if @c is an ASCII printing character other than space.
 171  */
 172
 173 /**
 174  * g_ascii_islower:
 175  * @c: any character
 176  *
 177  * Determines whether a character is an ASCII lower case letter.
 178  *
 179  * Unlike the standard C library islower() function, this only
 180  * recognizes standard ASCII letters and ignores the locale,
 181  * returning %FALSE for all non-ASCII characters. Also, unlike
 182  * the standard library function, this takes a char, not an int,
 183  * so don't call it on %EOF, but no need to worry about casting
 184  * to #guchar before passing a possibly non-ASCII character in.
 185  *
 186  * Returns: %TRUE if @c is an ASCII lower case letter
 187  */
 188
 189 /**
 190  * g_ascii_isprint:
 191  * @c: any character
 192  *
 193  * Determines whether a character is a printing character.
 194  *
 195  * Unlike the standard C library isprint() function, this only
 196  * recognizes standard ASCII characters and ignores the locale,
 197  * returning %FALSE for all non-ASCII characters. Also, unlike
 198  * the standard library function, this takes a char, not an int,
 199  * so don't call it on %EOF, but no need to cast to #guchar before
 200  * passing a possibly non-ASCII character in.
 201  *
 202  * Returns: %TRUE if @c is an ASCII printing character.
 203  */
 204
 205 /**
 206  * g_ascii_ispunct:
 207  * @c: any character
 208  *
 209  * Determines whether a character is a punctuation character.
 210  *
 211  * Unlike the standard C library ispunct() function, this only
 212  * recognizes standard ASCII letters and ignores the locale,
 213  * returning %FALSE for all non-ASCII characters. Also, unlike
 214  * the standard library function, this takes a char, not an int,
 215  * so don't call it on %EOF, but no need to cast to #guchar before
 216  * passing a possibly non-ASCII character in.
 217  *
 218  * Returns: %TRUE if @c is an ASCII punctuation character.
 219  */
 220
 221 /**
 222  * g_ascii_isspace:
 223  * @c: any character
 224  *
 225  * Determines whether a character is a white-space character.
 226  *
 227  * Unlike the standard C library isspace() function, this only
 228  * recognizes standard ASCII white-space and ignores the locale,
 229  * returning %FALSE for all non-ASCII characters. Also, unlike
 230  * the standard library function, this takes a char, not an int,
 231  * so don't call it on %EOF, but no need to cast to #guchar before
 232  * passing a possibly non-ASCII character in.
 233  *
 234  * Returns: %TRUE if @c is an ASCII white-space character
 235  */
 236
 237 /**
 238  * g_ascii_isupper:
 239  * @c: any character
 240  *
 241  * Determines whether a character is an ASCII upper case letter.
 242  *
 243  * Unlike the standard C library isupper() function, this only
 244  * recognizes standard ASCII letters and ignores the locale,
 245  * returning %FALSE for all non-ASCII characters. Also, unlike
 246  * the standard library function, this takes a char, not an int,
 247  * so don't call it on %EOF, but no need to worry about casting
 248  * to #guchar before passing a possibly non-ASCII character in.
 249  *
 250  * Returns: %TRUE if @c is an ASCII upper case letter
 251  */
 252
 253 /**
 254  * g_ascii_isxdigit:
 255  * @c: any character
 256  *
 257  * Determines whether a character is a hexadecimal-digit character.
 258  *
 259  * Unlike the standard C library isxdigit() function, this takes
 260  * a char, not an int, so don't call it on %EOF, but no need to
 261  * cast to #guchar before passing a possibly non-ASCII character in.
 262  *
 263  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
 264  */
 265
 266 /**
 267  * G_ASCII_DTOSTR_BUF_SIZE:
 268  *
 269  * A good size for a buffer to be passed into g_ascii_dtostr().
 270  * It is guaranteed to be enough for all output of that function
 271  * on systems with 64bit IEEE-compatible doubles.
 272  *
 273  * The typical usage would be something like:
 274  * |[<!-- language="C" -->
 275  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
 276  *
 277  *   fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
 278  * ]|
 279  */
 280
 281 /**
 282  * g_strstrip:
 283  * @string: a string to remove the leading and trailing whitespace from
 284  *
 285  * Removes leading and trailing whitespace from a string.
 286  * See g_strchomp() and g_strchug().
 287  *
 288  * Returns: @string
 289  */
 290
 291 /**
 292  * G_STR_DELIMITERS:
 293  *
 294  * The standard delimiters, used in g_strdelimit().
 295  */
 296
 297 static const guint16 ascii_table_data[256] = {
 298   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 299   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
 300   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
 302   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 303   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 304   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
 305   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 306   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
 307   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
 309   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
 310   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
 311   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
 313   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
 314   /* the upper 128 are all zeroes */
 315 };
 316
 317 const guint16 * const g_ascii_table = ascii_table_data;
 318
 319 #if defined (HAVE_NEWLOCALE) && \
 320     defined (HAVE_USELOCALE) && \
 321     defined (HAVE_STRTOD_L) && \
 322     defined (HAVE_STRTOULL_L) && \
 323     defined (HAVE_STRTOLL_L)
 324 #define USE_XLOCALE 1
 325 #endif
 326
 327 #ifdef USE_XLOCALE
 328 static locale_t
 329 get_C_locale (void)
 330 {
 331   static gsize initialized = FALSE;
 332   static locale_t C_locale = NULL;
 333
 334   if (g_once_init_enter (&initialized))
 335     {
 336       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
 337       g_once_init_leave (&initialized, TRUE);
 338     }
 339
 340   return C_locale;
 341 }
 342 #endif
 343
 344 /**
 345  * g_strdup:
 346  * @str: (nullable): the string to duplicate
 347  *
 348  * Duplicates a string. If @str is %NULL it returns %NULL.
 349  * The returned string should be freed with g_free()
 350  * when no longer needed.
 351  *
 352  * Returns: a newly-allocated copy of @str
 353  */
 354 gchar*
 355 g_strdup (const gchar *str)
 356 {
 357   gchar *new_str;
 358   gsize length;
 359
 360   if (str)
 361     {
 362       length = strlen (str) + 1;
 363       new_str = g_new (char, length);
 364       memcpy (new_str, str, length);
 365     }
 366   else
 367     new_str = NULL;
 368
 369   return new_str;
 370 }
 371
 372 /**
 373  * g_memdup:
 374  * @mem: the memory to copy.
 375  * @byte_size: the number of bytes to copy.
 376  *
 377  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
 378  * from @mem. If @mem is %NULL it returns %NULL.
 379  *
 380  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
 381  *  is %NULL.
 382  */
 383 gpointer
 384 g_memdup (gconstpointer mem,
 385           guint         byte_size)
 386 {
 387   gpointer new_mem;
 388
 389   if (mem && byte_size != 0)
 390     {
 391       new_mem = g_malloc (byte_size);
 392       memcpy (new_mem, mem, byte_size);
 393     }
 394   else
 395     new_mem = NULL;
 396
 397   return new_mem;
 398 }
 399
 400 /**
 401  * g_strndup:
 402  * @str: the string to duplicate
 403  * @n: the maximum number of bytes to copy from @str
 404  *
 405  * Duplicates the first @n bytes of a string, returning a newly-allocated
 406  * buffer @n + 1 bytes long which will always be nul-terminated. If @str
 407  * is less than @n bytes long the buffer is padded with nuls. If @str is
 408  * %NULL it returns %NULL. The returned value should be freed when no longer
 409  * needed.
 410  *
 411  * To copy a number of characters from a UTF-8 encoded string,
 412  * use g_utf8_strncpy() instead.
 413  *
 414  * Returns: a newly-allocated buffer containing the first @n bytes
 415  *     of @str, nul-terminated
 416  */
 417 gchar*
 418 g_strndup (const gchar *str,
 419            gsize        n)
 420 {
 421   gchar *new_str;
 422
 423   if (str)
 424     {
 425       new_str = g_new (gchar, n + 1);
 426       strncpy (new_str, str, n);
 427       new_str[n] = '\0';
 428     }
 429   else
 430     new_str = NULL;
 431
 432   return new_str;
 433 }
 434
 435 /**
 436  * g_strnfill:
 437  * @length: the length of the new string
 438  * @fill_char: the byte to fill the string with
 439  *
 440  * Creates a new string @length bytes long filled with @fill_char.
 441  * The returned string should be freed when no longer needed.
 442  *
 443  * Returns: a newly-allocated string filled the @fill_char
 444  */
 445 gchar*
 446 g_strnfill (gsize length,
 447             gchar fill_char)
 448 {
 449   gchar *str;
 450
 451   str = g_new (gchar, length + 1);
 452   memset (str, (guchar)fill_char, length);
 453   str[length] = '\0';
 454
 455   return str;
 456 }
 457
 458 /**
 459  * g_stpcpy:
 460  * @dest: destination buffer.
 461  * @src: source string.
 462  *
 463  * Copies a nul-terminated string into the dest buffer, include the
 464  * trailing nul, and return a pointer to the trailing nul byte.
 465  * This is useful for concatenating multiple strings together
 466  * without having to repeatedly scan for the end.
 467  *
 468  * Returns: a pointer to trailing nul byte.
 469  **/
 470 gchar *
 471 g_stpcpy (gchar       *dest,
 472           const gchar *src)
 473 {
 474 #ifdef HAVE_STPCPY
 475   g_return_val_if_fail (dest != NULL, NULL);
 476   g_return_val_if_fail (src != NULL, NULL);
 477   return stpcpy (dest, src);
 478 #else
 479   gchar *d = dest;
 480   const gchar *s = src;
 481
 482   g_return_val_if_fail (dest != NULL, NULL);
 483   g_return_val_if_fail (src != NULL, NULL);
 484   do
 485     *d++ = *s;
 486   while (*s++ != '\0');
 487
 488   return d - 1;
 489 #endif
 490 }
 491
 492 /**
 493  * g_strdup_vprintf:
 494  * @format: a standard printf() format string, but notice
 495  *     [string precision pitfalls][string-precision]
 496  * @args: the list of parameters to insert into the format string
 497  *
 498  * Similar to the standard C vsprintf() function but safer, since it
 499  * calculates the maximum space required and allocates memory to hold
 500  * the result. The returned string should be freed with g_free() when
 501  * no longer needed.
 502  *
 503  * See also g_vasprintf(), which offers the same functionality, but
 504  * additionally returns the length of the allocated string.
 505  *
 506  * Returns: a newly-allocated string holding the result
 507  */
 508 gchar*
 509 g_strdup_vprintf (const gchar *format,
 510                   va_list      args)
 511 {
 512   gchar *string = NULL;
 513
 514   g_vasprintf (&string, format, args);
 515
 516   return string;
 517 }
 518
 519 /**
 520  * g_strdup_printf:
 521  * @format: a standard printf() format string, but notice
 522  *     [string precision pitfalls][string-precision]
 523  * @...: the parameters to insert into the format string
 524  *
 525  * Similar to the standard C sprintf() function but safer, since it
 526  * calculates the maximum space required and allocates memory to hold
 527  * the result. The returned string should be freed with g_free() when no
 528  * longer needed.
 529  *
 530  * Returns: a newly-allocated string holding the result
 531  */
 532 gchar*
 533 g_strdup_printf (const gchar *format,
 534                  ...)
 535 {
 536   gchar *buffer;
 537   va_list args;
 538
 539   va_start (args, format);
 540   buffer = g_strdup_vprintf (format, args);
 541   va_end (args);
 542
 543   return buffer;
 544 }
 545
 546 /**
 547  * g_strconcat:
 548  * @string1: the first string to add, which must not be %NULL
 549  * @...: a %NULL-terminated list of strings to append to the string
 550  *
 551  * Concatenates all of the given strings into one long string. The
 552  * returned string should be freed with g_free() when no longer needed.
 553  *
 554  * The variable argument list must end with %NULL. If you forget the %NULL,
 555  * g_strconcat() will start appending random memory junk to your string.
 556  *
 557  * Note that this function is usually not the right function to use to
 558  * assemble a translated message from pieces, since proper translation
 559  * often requires the pieces to be reordered.
 560  *
 561  * Returns: a newly-allocated string containing all the string arguments
 562  */
 563 gchar*
 564 g_strconcat (const gchar *string1, ...)
 565 {
 566   gsize   l;
 567   va_list args;
 568   gchar   *s;
 569   gchar   *concat;
 570   gchar   *ptr;
 571
 572   if (!string1)
 573     return NULL;
 574
 575   l = 1 + strlen (string1);
 576   va_start (args, string1);
 577   s = va_arg (args, gchar*);
 578   while (s)
 579     {
 580       l += strlen (s);
 581       s = va_arg (args, gchar*);
 582     }
 583   va_end (args);
 584
 585   concat = g_new (gchar, l);
 586   ptr = concat;
 587
 588   ptr = g_stpcpy (ptr, string1);
 589   va_start (args, string1);
 590   s = va_arg (args, gchar*);
 591   while (s)
 592     {
 593       ptr = g_stpcpy (ptr, s);
 594       s = va_arg (args, gchar*);
 595     }
 596   va_end (args);
 597
 598   return concat;
 599 }
 600
 601 /**
 602  * g_strtod:
 603  * @nptr:    the string to convert to a numeric value.
 604  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 605  *           character after the last character used in the conversion.
 606  *
 607  * Converts a string to a #gdouble value.
 608  * It calls the standard strtod() function to handle the conversion, but
 609  * if the string is not completely converted it attempts the conversion
 610  * again with g_ascii_strtod(), and returns the best match.
 611  *
 612  * This function should seldom be used. The normal situation when reading
 613  * numbers not for human consumption is to use g_ascii_strtod(). Only when
 614  * you know that you must expect both locale formatted and C formatted numbers
 615  * should you use this. Make sure that you don't pass strings such as comma
 616  * separated lists of values, since the commas may be interpreted as a decimal
 617  * point in some locales, causing unexpected results.
 618  *
 619  * Returns: the #gdouble value.
 620  **/
 621 gdouble
 622 g_strtod (const gchar *nptr,
 623           gchar      **endptr)
 624 {
 625   gchar *fail_pos_1;
 626   gchar *fail_pos_2;
 627   gdouble val_1;
 628   gdouble val_2 = 0;
 629
 630   g_return_val_if_fail (nptr != NULL, 0);
 631
 632   fail_pos_1 = NULL;
 633   fail_pos_2 = NULL;
 634
 635   val_1 = strtod (nptr, &fail_pos_1);
 636
 637   if (fail_pos_1 && fail_pos_1[0] != 0)
 638     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
 639
 640   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
 641     {
 642       if (endptr)
 643         *endptr = fail_pos_1;
 644       return val_1;
 645     }
 646   else
 647     {
 648       if (endptr)
 649         *endptr = fail_pos_2;
 650       return val_2;
 651     }
 652 }
 653
 654 /**
 655  * g_ascii_strtod:
 656  * @nptr:    the string to convert to a numeric value.
 657  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
 658  *           character after the last character used in the conversion.
 659  *
 660  * Converts a string to a #gdouble value.
 661  *
 662  * This function behaves like the standard strtod() function
 663  * does in the C locale. It does this without actually changing
 664  * the current locale, since that would not be thread-safe.
 665  * A limitation of the implementation is that this function
 666  * will still accept localized versions of infinities and NANs.
 667  *
 668  * This function is typically used when reading configuration
 669  * files or other non-user input that should be locale independent.
 670  * To handle input from the user you should normally use the
 671  * locale-sensitive system strtod() function.
 672  *
 673  * To convert from a #gdouble to a string in a locale-insensitive
 674  * way, use g_ascii_dtostr().
 675  *
 676  * If the correct value would cause overflow, plus or minus %HUGE_VAL
 677  * is returned (according to the sign of the value), and %ERANGE is
 678  * stored in %errno. If the correct value would cause underflow,
 679  * zero is returned and %ERANGE is stored in %errno.
 680  *
 681  * This function resets %errno before calling strtod() so that
 682  * you can reliably detect overflow and underflow.
 683  *
 684  * Returns: the #gdouble value.
 685  */
 686 gdouble
 687 g_ascii_strtod (const gchar *nptr,
 688                 gchar      **endptr)
 689 {
 690 #ifdef USE_XLOCALE
 691
 692   g_return_val_if_fail (nptr != NULL, 0);
 693
 694   errno = 0;
 695
 696   return strtod_l (nptr, endptr, get_C_locale ());
 697
 698 #else
 699
 700   gchar *fail_pos;
 701   gdouble val;
 702 #ifndef __BIONIC__
 703   struct lconv *locale_data;
 704 #endif
 705   const char *decimal_point;
 706   int decimal_point_len;
 707   const char *p, *decimal_point_pos;
 708   const char *end = NULL; /* Silence gcc */
 709   int strtod_errno;
 710
 711   g_return_val_if_fail (nptr != NULL, 0);
 712
 713   fail_pos = NULL;
 714
 715 #ifndef __BIONIC__
 716   locale_data = localeconv ();
 717   decimal_point = locale_data->decimal_point;
 718   decimal_point_len = strlen (decimal_point);
 719 #else
 720   decimal_point = ".";
 721   decimal_point_len = 1;
 722 #endif
 723
 724   g_assert (decimal_point_len != 0);
 725
 726   decimal_point_pos = NULL;
 727   end = NULL;
 728
 729   if (decimal_point[0] != '.' ||
 730       decimal_point[1] != 0)
 731     {
 732       p = nptr;
 733       /* Skip leading space */
 734       while (g_ascii_isspace (*p))
 735         p++;
 736
 737       /* Skip leading optional sign */
 738       if (*p == '+' || *p == '-')
 739         p++;
 740
 741       if (p[0] == '0' &&
 742           (p[1] == 'x' || p[1] == 'X'))
 743         {
 744           p += 2;
 745           /* HEX - find the (optional) decimal point */
 746
 747           while (g_ascii_isxdigit (*p))
 748             p++;
 749
 750           if (*p == '.')
 751             decimal_point_pos = p++;
 752
 753           while (g_ascii_isxdigit (*p))
 754             p++;
 755
 756           if (*p == 'p' || *p == 'P')
 757             p++;
 758           if (*p == '+' || *p == '-')
 759             p++;
 760           while (g_ascii_isdigit (*p))
 761             p++;
 762
 763           end = p;
 764         }
 765       else if (g_ascii_isdigit (*p) || *p == '.')
 766         {
 767           while (g_ascii_isdigit (*p))
 768             p++;
 769
 770           if (*p == '.')
 771             decimal_point_pos = p++;
 772
 773           while (g_ascii_isdigit (*p))
 774             p++;
 775
 776           if (*p == 'e' || *p == 'E')
 777             p++;
 778           if (*p == '+' || *p == '-')
 779             p++;
 780           while (g_ascii_isdigit (*p))
 781             p++;
 782
 783           end = p;
 784         }
 785       /* For the other cases, we need not convert the decimal point */
 786     }
 787
 788   if (decimal_point_pos)
 789     {
 790       char *copy, *c;
 791
 792       /* We need to convert the '.' to the locale specific decimal point */
 793       copy = g_malloc (end - nptr + 1 + decimal_point_len);
 794
 795       c = copy;
 796       memcpy (c, nptr, decimal_point_pos - nptr);
 797       c += decimal_point_pos - nptr;
 798       memcpy (c, decimal_point, decimal_point_len);
 799       c += decimal_point_len;
 800       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
 801       c += end - (decimal_point_pos + 1);
 802       *c = 0;
 803
 804       errno = 0;
 805       val = strtod (copy, &fail_pos);
 806       strtod_errno = errno;
 807
 808       if (fail_pos)
 809         {
 810           if (fail_pos - copy > decimal_point_pos - nptr)
 811             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
 812           else
 813             fail_pos = (char *)nptr + (fail_pos - copy);
 814         }
 815
 816       g_free (copy);
 817
 818     }
 819   else if (end)
 820     {
 821       char *copy;
 822
 823       copy = g_malloc (end - (char *)nptr + 1);
 824       memcpy (copy, nptr, end - nptr);
 825       *(copy + (end - (char *)nptr)) = 0;
 826
 827       errno = 0;
 828       val = strtod (copy, &fail_pos);
 829       strtod_errno = errno;
 830
 831       if (fail_pos)
 832         {
 833           fail_pos = (char *)nptr + (fail_pos - copy);
 834         }
 835
 836       g_free (copy);
 837     }
 838   else
 839     {
 840       errno = 0;
 841       val = strtod (nptr, &fail_pos);
 842       strtod_errno = errno;
 843     }
 844
 845   if (endptr)
 846     *endptr = fail_pos;
 847
 848   errno = strtod_errno;
 849
 850   return val;
 851 #endif
 852 }
 853
 854
 855 /**
 856  * g_ascii_dtostr:
 857  * @buffer: A buffer to place the resulting string in
 858  * @buf_len: The length of the buffer.
 859  * @d: The #gdouble to convert
 860  *
 861  * Converts a #gdouble to a string, using the '.' as
 862  * decimal point.
 863  *
 864  * This function generates enough precision that converting
 865  * the string back using g_ascii_strtod() gives the same machine-number
 866  * (on machines with IEEE compatible 64bit doubles). It is
 867  * guaranteed that the size of the resulting string will never
 868  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
 869  * nul character, which is always added.
 870  *
 871  * Returns: The pointer to the buffer with the converted string.
 872  **/
 873 gchar *
 874 g_ascii_dtostr (gchar       *buffer,
 875                 gint         buf_len,
 876                 gdouble      d)
 877 {
 878   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
 879 }
 880
 881 #pragma GCC diagnostic push
 882 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 883
 884 /**
 885  * g_ascii_formatd:
 886  * @buffer: A buffer to place the resulting string in
 887  * @buf_len: The length of the buffer.
 888  * @format: The printf()-style format to use for the
 889  *          code to use for converting.
 890  * @d: The #gdouble to convert
 891  *
 892  * Converts a #gdouble to a string, using the '.' as
 893  * decimal point. To format the number you pass in
 894  * a printf()-style format string. Allowed conversion
 895  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
 896  *
 897  * The returned buffer is guaranteed to be nul-terminated.
 898  *
 899  * If you just want to want to serialize the value into a
 900  * string, use g_ascii_dtostr().
 901  *
 902  * Returns: The pointer to the buffer with the converted string.
 903  */
 904 gchar *
 905 g_ascii_formatd (gchar       *buffer,
 906                  gint         buf_len,
 907                  const gchar *format,
 908                  gdouble      d)
 909 {
 910 #ifdef USE_XLOCALE
 911   locale_t old_locale;
 912
 913   old_locale = uselocale (get_C_locale ());
 914    _g_snprintf (buffer, buf_len, format, d);
 915   uselocale (old_locale);
 916
 917   return buffer;
 918 #else
 919 #ifndef __BIONIC__
 920   struct lconv *locale_data;
 921 #endif
 922   const char *decimal_point;
 923   int decimal_point_len;
 924   gchar *p;
 925   int rest_len;
 926   gchar format_char;
 927
 928   g_return_val_if_fail (buffer != NULL, NULL);
 929   g_return_val_if_fail (format[0] == '%', NULL);
 930   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
 931
 932   format_char = format[strlen (format) - 1];
 933
 934   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
 935                         format_char == 'f' || format_char == 'F' ||
 936                         format_char == 'g' || format_char == 'G',
 937                         NULL);
 938
 939   if (format[0] != '%')
 940     return NULL;
 941
 942   if (strpbrk (format + 1, "'l%"))
 943     return NULL;
 944
 945   if (!(format_char == 'e' || format_char == 'E' ||
 946         format_char == 'f' || format_char == 'F' ||
 947         format_char == 'g' || format_char == 'G'))
 948     return NULL;
 949
 950   _g_snprintf (buffer, buf_len, format, d);
 951
 952 #ifndef __BIONIC__
 953   locale_data = localeconv ();
 954   decimal_point = locale_data->decimal_point;
 955   decimal_point_len = strlen (decimal_point);
 956 #else
 957   decimal_point = ".";
 958   decimal_point_len = 1;
 959 #endif
 960
 961   g_assert (decimal_point_len != 0);
 962
 963   if (decimal_point[0] != '.' ||
 964       decimal_point[1] != 0)
 965     {
 966       p = buffer;
 967
 968       while (g_ascii_isspace (*p))
 969         p++;
 970
 971       if (*p == '+' || *p == '-')
 972         p++;
 973
 974       while (isdigit ((guchar)*p))
 975         p++;
 976
 977       if (strncmp (p, decimal_point, decimal_point_len) == 0)
 978         {
 979           *p = '.';
 980           p++;
 981           if (decimal_point_len > 1)
 982             {
 983               rest_len = strlen (p + (decimal_point_len-1));
 984               memmove (p, p + (decimal_point_len-1), rest_len);
 985               p[rest_len] = 0;
 986             }
 987         }
 988     }
 989
 990   return buffer;
 991 #endif
 992 }
 993 #pragma GCC diagnostic pop
 994
 995 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
 996                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
 997 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
 998 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
 999 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
1000 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1001 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1002
1003 #ifndef USE_XLOCALE
1004
1005 static guint64
1006 g_parse_long_long (const gchar  *nptr,
1007                    const gchar **endptr,
1008                    guint         base,
1009                    gboolean     *negative)
1010 {
1011   /* this code is based on on the strtol(3) code from GNU libc released under
1012    * the GNU Lesser General Public License.
1013    *
1014    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1015    *        Free Software Foundation, Inc.
1016    */
1017   gboolean overflow;
1018   guint64 cutoff;
1019   guint64 cutlim;
1020   guint64 ui64;
1021   const gchar *s, *save;
1022   guchar c;
1023
1024   g_return_val_if_fail (nptr != NULL, 0);
1025
1026   *negative = FALSE;
1027   if (base == 1 || base > 36)
1028     {
1029       errno = EINVAL;
1030       if (endptr)
1031         *endptr = nptr;
1032       return 0;
1033     }
1034
1035   save = s = nptr;
1036
1037   /* Skip white space.  */
1038   while (ISSPACE (*s))
1039     ++s;
1040
1041   if (G_UNLIKELY (!*s))
1042     goto noconv;
1043
1044   /* Check for a sign.  */
1045   if (*s == '-')
1046     {
1047       *negative = TRUE;
1048       ++s;
1049     }
1050   else if (*s == '+')
1051     ++s;
1052
1053   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1054   if (*s == '0')
1055     {
1056       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1057         {
1058           s += 2;
1059           base = 16;
1060         }
1061       else if (base == 0)
1062         base = 8;
1063     }
1064   else if (base == 0)
1065     base = 10;
1066
1067   /* Save the pointer so we can check later if anything happened.  */
1068   save = s;
1069   cutoff = G_MAXUINT64 / base;
1070   cutlim = G_MAXUINT64 % base;
1071
1072   overflow = FALSE;
1073   ui64 = 0;
1074   c = *s;
1075   for (; c; c = *++s)
1076     {
1077       if (c >= '0' && c <= '9')
1078         c -= '0';
1079       else if (ISALPHA (c))
1080         c = TOUPPER (c) - 'A' + 10;
1081       else
1082         break;
1083       if (c >= base)
1084         break;
1085       /* Check for overflow.  */
1086       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1087         overflow = TRUE;
1088       else
1089         {
1090           ui64 *= base;
1091           ui64 += c;
1092         }
1093     }
1094
1095   /* Check if anything actually happened.  */
1096   if (s == save)
1097     goto noconv;
1098
1099   /* Store in ENDPTR the address of one character
1100      past the last character we converted.  */
1101   if (endptr)
1102     *endptr = s;
1103
1104   if (G_UNLIKELY (overflow))
1105     {
1106       errno = ERANGE;
1107       return G_MAXUINT64;
1108     }
1109
1110   return ui64;
1111
1112  noconv:
1113   /* We must handle a special case here: the base is 0 or 16 and the
1114      first two characters are '0' and 'x', but the rest are no
1115      hexadecimal digits.  This is no error case.  We return 0 and
1116      ENDPTR points to the `x`.  */
1117   if (endptr)
1118     {
1119       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1120           && save[-2] == '0')
1121         *endptr = &save[-1];
1122       else
1123         /*  There was no number to convert.  */
1124         *endptr = nptr;
1125     }
1126   return 0;
1127 }
1128 #endif /* !USE_XLOCALE */
1129
1130 /**
1131  * g_ascii_strtoull:
1132  * @nptr:    the string to convert to a numeric value.
1133  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1134  *           character after the last character used in the conversion.
1135  * @base:    to be used for the conversion, 2..36 or 0
1136  *
1137  * Converts a string to a #guint64 value.
1138  * This function behaves like the standard strtoull() function
1139  * does in the C locale. It does this without actually
1140  * changing the current locale, since that would not be
1141  * thread-safe.
1142  *
1143  * This function is typically used when reading configuration
1144  * files or other non-user input that should be locale independent.
1145  * To handle input from the user you should normally use the
1146  * locale-sensitive system strtoull() function.
1147  *
1148  * If the correct value would cause overflow, %G_MAXUINT64
1149  * is returned, and `ERANGE` is stored in `errno`.
1150  * If the base is outside the valid range, zero is returned, and
1151  * `EINVAL` is stored in `errno`.
1152  * If the string conversion fails, zero is returned, and @endptr returns
1153  * @nptr (if @endptr is non-%NULL).
1154  *
1155  * Returns: the #guint64 value or zero on error.
1156  *
1157  * Since: 2.2
1158  */
1159 guint64
1160 g_ascii_strtoull (const gchar *nptr,
1161                   gchar      **endptr,
1162                   guint        base)
1163 {
1164 #ifdef USE_XLOCALE
1165   return strtoull_l (nptr, endptr, base, get_C_locale ());
1166 #else
1167   gboolean negative;
1168   guint64 result;
1169
1170   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1171
1172   /* Return the result of the appropriate sign.  */
1173   return negative ? -result : result;
1174 #endif
1175 }
1176
1177 /**
1178  * g_ascii_strtoll:
1179  * @nptr:    the string to convert to a numeric value.
1180  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1181  *           character after the last character used in the conversion.
1182  * @base:    to be used for the conversion, 2..36 or 0
1183  *
1184  * Converts a string to a #gint64 value.
1185  * This function behaves like the standard strtoll() function
1186  * does in the C locale. It does this without actually
1187  * changing the current locale, since that would not be
1188  * thread-safe.
1189  *
1190  * This function is typically used when reading configuration
1191  * files or other non-user input that should be locale independent.
1192  * To handle input from the user you should normally use the
1193  * locale-sensitive system strtoll() function.
1194  *
1195  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1196  * is returned, and `ERANGE` is stored in `errno`.
1197  * If the base is outside the valid range, zero is returned, and
1198  * `EINVAL` is stored in `errno`. If the
1199  * string conversion fails, zero is returned, and @endptr returns @nptr
1200  * (if @endptr is non-%NULL).
1201  *
1202  * Returns: the #gint64 value or zero on error.
1203  *
1204  * Since: 2.12
1205  */
1206 gint64
1207 g_ascii_strtoll (const gchar *nptr,
1208                  gchar      **endptr,
1209                  guint        base)
1210 {
1211 #ifdef USE_XLOCALE
1212   return strtoll_l (nptr, endptr, base, get_C_locale ());
1213 #else
1214   gboolean negative;
1215   guint64 result;
1216
1217   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1218
1219   if (negative && result > (guint64) G_MININT64)
1220     {
1221       errno = ERANGE;
1222       return G_MININT64;
1223     }
1224   else if (!negative && result > (guint64) G_MAXINT64)
1225     {
1226       errno = ERANGE;
1227       return G_MAXINT64;
1228     }
1229   else if (negative)
1230     return - (gint64) result;
1231   else
1232     return (gint64) result;
1233 #endif
1234 }
1235
1236 /**
1237  * g_strerror:
1238  * @errnum: the system error number. See the standard C %errno
1239  *     documentation
1240  *
1241  * Returns a string corresponding to the given error code, e.g. "no
1242  * such process". Unlike strerror(), this always returns a string in
1243  * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1244  * the lifetime of the process.
1245  *
1246  * Note that the string may be translated according to the current locale.
1247  *
1248  * The value of %errno will not be changed by this function.
1249  *
1250  * Returns: a UTF-8 string describing the error code. If the error code
1251  *     is unknown, it returns a string like "unknown error (<code>)".
1252  */
1253 const gchar *
1254 g_strerror (gint errnum)
1255 {
1256   static GHashTable *errors;
1257   G_LOCK_DEFINE_STATIC (errors);
1258   const gchar *msg;
1259   gint saved_errno = errno;
1260
1261   G_LOCK (errors);
1262   if (errors)
1263     msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1264   else
1265     {
1266       errors = g_hash_table_new (NULL, NULL);
1267       msg = NULL;
1268     }
1269
1270   if (!msg)
1271     {
1272       gchar buf[1024];
1273       GError *error = NULL;
1274
1275 #if defined(G_OS_WIN32)
1276       strerror_s (buf, sizeof (buf), errnum);
1277       msg = buf;
1278 #elif defined(HAVE_STRERROR_R)
1279       /* Match the condition in strerror_r(3) for glibc */
1280 #  if defined(STRERROR_R_CHAR_P)
1281       msg = strerror_r (errnum, buf, sizeof (buf));
1282 #  else
1283       (void) strerror_r (errnum, buf, sizeof (buf));
1284       msg = buf;
1285 #  endif /* HAVE_STRERROR_R */
1286 #else
1287       g_strlcpy (buf, strerror (errnum), sizeof (buf));
1288       msg = buf;
1289 #endif
1290       if (!g_get_charset (NULL))
1291         {
1292           msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1293           if (error)
1294             g_print ("%s\n", error->message);
1295         }
1296       else if (msg == (const gchar *)buf)
1297         msg = g_strdup (buf);
1298
1299       g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1300     }
1301   G_UNLOCK (errors);
1302
1303   errno = saved_errno;
1304   return msg;
1305 }
1306
1307 /**
1308  * g_strsignal:
1309  * @signum: the signal number. See the `signal` documentation
1310  *
1311  * Returns a string describing the given signal, e.g. "Segmentation fault".
1312  * You should use this function in preference to strsignal(), because it
1313  * returns a string in UTF-8 encoding, and since not all platforms support
1314  * the strsignal() function.
1315  *
1316  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1317  *     it returns "unknown signal (<signum>)".
1318  */
1319 const gchar *
1320 g_strsignal (gint signum)
1321 {
1322   gchar *msg;
1323   gchar *tofree;
1324   const gchar *ret;
1325
1326   msg = tofree = NULL;
1327
1328 #ifdef HAVE_STRSIGNAL
1329   msg = strsignal (signum);
1330   if (!g_get_charset (NULL))
1331     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1332 #endif
1333
1334   if (!msg)
1335     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1336   ret = g_intern_string (msg);
1337   g_free (tofree);
1338
1339   return ret;
1340 }
1341
1342 /* Functions g_strlcpy and g_strlcat were originally developed by
1343  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1344  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1345  * for more information.
1346  */
1347
1348 #ifdef HAVE_STRLCPY
1349 /* Use the native ones, if available; they might be implemented in assembly */
1350 gsize
1351 g_strlcpy (gchar       *dest,
1352            const gchar *src,
1353            gsize        dest_size)
1354 {
1355   g_return_val_if_fail (dest != NULL, 0);
1356   g_return_val_if_fail (src  != NULL, 0);
1357
1358   return strlcpy (dest, src, dest_size);
1359 }
1360
1361 gsize
1362 g_strlcat (gchar       *dest,
1363            const gchar *src,
1364            gsize        dest_size)
1365 {
1366   g_return_val_if_fail (dest != NULL, 0);
1367   g_return_val_if_fail (src  != NULL, 0);
1368
1369   return strlcat (dest, src, dest_size);
1370 }
1371
1372 #else /* ! HAVE_STRLCPY */
1373 /**
1374  * g_strlcpy:
1375  * @dest: destination buffer
1376  * @src: source buffer
1377  * @dest_size: length of @dest in bytes
1378  *
1379  * Portability wrapper that calls strlcpy() on systems which have it,
1380  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1381  * guaranteed to be nul-terminated; @src must be nul-terminated;
1382  * @dest_size is the buffer size, not the number of bytes to copy.
1383  *
1384  * At most @dest_size - 1 characters will be copied. Always nul-terminates
1385  * (unless @dest_size is 0). This function does not allocate memory. Unlike
1386  * strncpy(), this function doesn't pad @dest (so it's often faster). It
1387  * returns the size of the attempted result, strlen (src), so if
1388  * @retval >= @dest_size, truncation occurred.
1389  *
1390  * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1391  * but if you really want to avoid screwups, g_strdup() is an even better
1392  * idea.
1393  *
1394  * Returns: length of @src
1395  */
1396 gsize
1397 g_strlcpy (gchar       *dest,
1398            const gchar *src,
1399            gsize        dest_size)
1400 {
1401   gchar *d = dest;
1402   const gchar *s = src;
1403   gsize n = dest_size;
1404
1405   g_return_val_if_fail (dest != NULL, 0);
1406   g_return_val_if_fail (src  != NULL, 0);
1407
1408   /* Copy as many bytes as will fit */
1409   if (n != 0 && --n != 0)
1410     do
1411       {
1412         gchar c = *s++;
1413
1414         *d++ = c;
1415         if (c == 0)
1416           break;
1417       }
1418     while (--n != 0);
1419
1420   /* If not enough room in dest, add NUL and traverse rest of src */
1421   if (n == 0)
1422     {
1423       if (dest_size != 0)
1424         *d = 0;
1425       while (*s++)
1426         ;
1427     }
1428
1429   return s - src - 1;  /* count does not include NUL */
1430 }
1431
1432 /**
1433  * g_strlcat:
1434  * @dest: destination buffer, already containing one nul-terminated string
1435  * @src: source buffer
1436  * @dest_size: length of @dest buffer in bytes (not length of existing string
1437  *     inside @dest)
1438  *
1439  * Portability wrapper that calls strlcat() on systems which have it,
1440  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1441  * guaranteeing nul-termination for @dest. The total size of @dest won't
1442  * exceed @dest_size.
1443  *
1444  * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1445  * @dest_size is the full size of dest, not the space left over. This
1446  * function does not allocate memory. It always nul-terminates (unless
1447  * @dest_size == 0 or there were no nul characters in the @dest_size
1448  * characters of dest to start with).
1449  *
1450  * Caveat: this is supposedly a more secure alternative to strcat() or
1451  * strncat(), but for real security g_strconcat() is harder to mess up.
1452  *
1453  * Returns: size of attempted result, which is MIN (dest_size, strlen
1454  *     (original dest)) + strlen (src), so if retval >= dest_size,
1455  *     truncation occurred.
1456  */
1457 gsize
1458 g_strlcat (gchar       *dest,
1459            const gchar *src,
1460            gsize        dest_size)
1461 {
1462   gchar *d = dest;
1463   const gchar *s = src;
1464   gsize bytes_left = dest_size;
1465   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1466
1467   g_return_val_if_fail (dest != NULL, 0);
1468   g_return_val_if_fail (src  != NULL, 0);
1469
1470   /* Find the end of dst and adjust bytes left but don't go past end */
1471   while (*d != 0 && bytes_left-- != 0)
1472     d++;
1473   dlength = d - dest;
1474   bytes_left = dest_size - dlength;
1475
1476   if (bytes_left == 0)
1477     return dlength + strlen (s);
1478
1479   while (*s != 0)
1480     {
1481       if (bytes_left != 1)
1482         {
1483           *d++ = *s;
1484           bytes_left--;
1485         }
1486       s++;
1487     }
1488   *d = 0;
1489
1490   return dlength + (s - src);  /* count does not include NUL */
1491 }
1492 #endif /* ! HAVE_STRLCPY */
1493
1494 /**
1495  * g_ascii_strdown:
1496  * @str: a string
1497  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1498  *
1499  * Converts all upper case ASCII letters to lower case ASCII letters.
1500  *
1501  * Returns: a newly-allocated string, with all the upper case
1502  *     characters in @str converted to lower case, with semantics that
1503  *     exactly match g_ascii_tolower(). (Note that this is unlike the
1504  *     old g_strdown(), which modified the string in place.)
1505  */
1506 gchar*
1507 g_ascii_strdown (const gchar *str,
1508                  gssize       len)
1509 {
1510   gchar *result, *s;
1511
1512   g_return_val_if_fail (str != NULL, NULL);
1513
1514   if (len < 0)
1515     len = strlen (str);
1516
1517   result = g_strndup (str, len);
1518   for (s = result; *s; s++)
1519     *s = g_ascii_tolower (*s);
1520
1521   return result;
1522 }
1523
1524 /**
1525  * g_ascii_strup:
1526  * @str: a string
1527  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1528  *
1529  * Converts all lower case ASCII letters to upper case ASCII letters.
1530  *
1531  * Returns: a newly allocated string, with all the lower case
1532  *     characters in @str converted to upper case, with semantics that
1533  *     exactly match g_ascii_toupper(). (Note that this is unlike the
1534  *     old g_strup(), which modified the string in place.)
1535  */
1536 gchar*
1537 g_ascii_strup (const gchar *str,
1538                gssize       len)
1539 {
1540   gchar *result, *s;
1541
1542   g_return_val_if_fail (str != NULL, NULL);
1543
1544   if (len < 0)
1545     len = strlen (str);
1546
1547   result = g_strndup (str, len);
1548   for (s = result; *s; s++)
1549     *s = g_ascii_toupper (*s);
1550
1551   return result;
1552 }
1553
1554 /**
1555  * g_str_is_ascii:
1556  * @str: a string
1557  *
1558  * Determines if a string is pure ASCII. A string is pure ASCII if it
1559  * contains no bytes with the high bit set.
1560  *
1561  * Returns: %TRUE if @str is ASCII
1562  *
1563  * Since: 2.40
1564  */
1565 gboolean
1566 g_str_is_ascii (const gchar *str)
1567 {
1568   gint i;
1569
1570   for (i = 0; str[i]; i++)
1571     if (str[i] & 0x80)
1572       return FALSE;
1573
1574   return TRUE;
1575 }
1576
1577 /**
1578  * g_strdown:
1579  * @string: the string to convert.
1580  *
1581  * Converts a string to lower case.
1582  *
1583  * Returns: the string
1584  *
1585  * Deprecated:2.2: This function is totally broken for the reasons discussed
1586  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1587  * instead.
1588  **/
1589 gchar*
1590 g_strdown (gchar *string)
1591 {
1592   guchar *s;
1593
1594   g_return_val_if_fail (string != NULL, NULL);
1595
1596   s = (guchar *) string;
1597
1598   while (*s)
1599     {
1600       if (isupper (*s))
1601         *s = tolower (*s);
1602       s++;
1603     }
1604
1605   return (gchar *) string;
1606 }
1607
1608 /**
1609  * g_strup:
1610  * @string: the string to convert
1611  *
1612  * Converts a string to upper case.
1613  *
1614  * Returns: the string
1615  *
1616  * Deprecated:2.2: This function is totally broken for the reasons
1617  *     discussed in the g_strncasecmp() docs - use g_ascii_strup()
1618  *     or g_utf8_strup() instead.
1619  */
1620 gchar*
1621 g_strup (gchar *string)
1622 {
1623   guchar *s;
1624
1625   g_return_val_if_fail (string != NULL, NULL);
1626
1627   s = (guchar *) string;
1628
1629   while (*s)
1630     {
1631       if (islower (*s))
1632         *s = toupper (*s);
1633       s++;
1634     }
1635
1636   return (gchar *) string;
1637 }
1638
1639 /**
1640  * g_strreverse:
1641  * @string: the string to reverse
1642  *
1643  * Reverses all of the bytes in a string. For example,
1644  * `g_strreverse ("abcdef")` will result in "fedcba".
1645  *
1646  * Note that g_strreverse() doesn't work on UTF-8 strings
1647  * containing multibyte characters. For that purpose, use
1648  * g_utf8_strreverse().
1649  *
1650  * Returns: the same pointer passed in as @string
1651  */
1652 gchar*
1653 g_strreverse (gchar *string)
1654 {
1655   g_return_val_if_fail (string != NULL, NULL);
1656
1657   if (*string)
1658     {
1659       gchar *h, *t;
1660
1661       h = string;
1662       t = string + strlen (string) - 1;
1663
1664       while (h < t)
1665         {
1666           gchar c;
1667
1668           c = *h;
1669           *h = *t;
1670           h++;
1671           *t = c;
1672           t--;
1673         }
1674     }
1675
1676   return string;
1677 }
1678
1679 /**
1680  * g_ascii_tolower:
1681  * @c: any character
1682  *
1683  * Convert a character to ASCII lower case.
1684  *
1685  * Unlike the standard C library tolower() function, this only
1686  * recognizes standard ASCII letters and ignores the locale, returning
1687  * all non-ASCII characters unchanged, even if they are lower case
1688  * letters in a particular character set. Also unlike the standard
1689  * library function, this takes and returns a char, not an int, so
1690  * don't call it on %EOF but no need to worry about casting to #guchar
1691  * before passing a possibly non-ASCII character in.
1692  *
1693  * Returns: the result of converting @c to lower case. If @c is
1694  *     not an ASCII upper case letter, @c is returned unchanged.
1695  */
1696 gchar
1697 g_ascii_tolower (gchar c)
1698 {
1699   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1700 }
1701
1702 /**
1703  * g_ascii_toupper:
1704  * @c: any character
1705  *
1706  * Convert a character to ASCII upper case.
1707  *
1708  * Unlike the standard C library toupper() function, this only
1709  * recognizes standard ASCII letters and ignores the locale, returning
1710  * all non-ASCII characters unchanged, even if they are upper case
1711  * letters in a particular character set. Also unlike the standard
1712  * library function, this takes and returns a char, not an int, so
1713  * don't call it on %EOF but no need to worry about casting to #guchar
1714  * before passing a possibly non-ASCII character in.
1715  *
1716  * Returns: the result of converting @c to upper case. If @c is not
1717  *    an ASCII lower case letter, @c is returned unchanged.
1718  */
1719 gchar
1720 g_ascii_toupper (gchar c)
1721 {
1722   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1723 }
1724
1725 /**
1726  * g_ascii_digit_value:
1727  * @c: an ASCII character
1728  *
1729  * Determines the numeric value of a character as a decimal digit.
1730  * Differs from g_unichar_digit_value() because it takes a char, so
1731  * there's no worry about sign extension if characters are signed.
1732  *
1733  * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1734  *    its numeric value. Otherwise, -1.
1735  */
1736 int
1737 g_ascii_digit_value (gchar c)
1738 {
1739   if (g_ascii_isdigit (c))
1740     return c - '0';
1741   return -1;
1742 }
1743
1744 /**
1745  * g_ascii_xdigit_value:
1746  * @c: an ASCII character.
1747  *
1748  * Determines the numeric value of a character as a hexidecimal
1749  * digit. Differs from g_unichar_xdigit_value() because it takes
1750  * a char, so there's no worry about sign extension if characters
1751  * are signed.
1752  *
1753  * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1754  *     its numeric value. Otherwise, -1.
1755  */
1756 int
1757 g_ascii_xdigit_value (gchar c)
1758 {
1759   if (c >= 'A' && c <= 'F')
1760     return c - 'A' + 10;
1761   if (c >= 'a' && c <= 'f')
1762     return c - 'a' + 10;
1763   return g_ascii_digit_value (c);
1764 }
1765
1766 /**
1767  * g_ascii_strcasecmp:
1768  * @s1: string to compare with @s2
1769  * @s2: string to compare with @s1
1770  *
1771  * Compare two strings, ignoring the case of ASCII characters.
1772  *
1773  * Unlike the BSD strcasecmp() function, this only recognizes standard
1774  * ASCII letters and ignores the locale, treating all non-ASCII
1775  * bytes as if they are not letters.
1776  *
1777  * This function should be used only on strings that are known to be
1778  * in encodings where the bytes corresponding to ASCII letters always
1779  * represent themselves. This includes UTF-8 and the ISO-8859-*
1780  * charsets, but not for instance double-byte encodings like the
1781  * Windows Codepage 932, where the trailing bytes of double-byte
1782  * characters include all ASCII letters. If you compare two CP932
1783  * strings using this function, you will get false matches.
1784  *
1785  * Both @s1 and @s2 must be non-%NULL.
1786  *
1787  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1788  *     or a positive value if @s1 > @s2.
1789  */
1790 gint
1791 g_ascii_strcasecmp (const gchar *s1,
1792                     const gchar *s2)
1793 {
1794   gint c1, c2;
1795
1796   g_return_val_if_fail (s1 != NULL, 0);
1797   g_return_val_if_fail (s2 != NULL, 0);
1798
1799   while (*s1 && *s2)
1800     {
1801       c1 = (gint)(guchar) TOLOWER (*s1);
1802       c2 = (gint)(guchar) TOLOWER (*s2);
1803       if (c1 != c2)
1804         return (c1 - c2);
1805       s1++; s2++;
1806     }
1807
1808   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1809 }
1810
1811 /**
1812  * g_ascii_strncasecmp:
1813  * @s1: string to compare with @s2
1814  * @s2: string to compare with @s1
1815  * @n: number of characters to compare
1816  *
1817  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1818  * characters after the first @n in each string.
1819  *
1820  * Unlike the BSD strcasecmp() function, this only recognizes standard
1821  * ASCII letters and ignores the locale, treating all non-ASCII
1822  * characters as if they are not letters.
1823  *
1824  * The same warning as in g_ascii_strcasecmp() applies: Use this
1825  * function only on strings known to be in encodings where bytes
1826  * corresponding to ASCII letters always represent themselves.
1827  *
1828  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1829  *     or a positive value if @s1 > @s2.
1830  */
1831 gint
1832 g_ascii_strncasecmp (const gchar *s1,
1833                      const gchar *s2,
1834                      gsize        n)
1835 {
1836   gint c1, c2;
1837
1838   g_return_val_if_fail (s1 != NULL, 0);
1839   g_return_val_if_fail (s2 != NULL, 0);
1840
1841   while (n && *s1 && *s2)
1842     {
1843       n -= 1;
1844       c1 = (gint)(guchar) TOLOWER (*s1);
1845       c2 = (gint)(guchar) TOLOWER (*s2);
1846       if (c1 != c2)
1847         return (c1 - c2);
1848       s1++; s2++;
1849     }
1850
1851   if (n)
1852     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1853   else
1854     return 0;
1855 }
1856
1857 /**
1858  * g_strcasecmp:
1859  * @s1: a string
1860  * @s2: a string to compare with @s1
1861  *
1862  * A case-insensitive string comparison, corresponding to the standard
1863  * strcasecmp() function on platforms which support it.
1864  *
1865  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1866  *     or a positive value if @s1 > @s2.
1867  *
1868  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1869  *     function is deprecated and how to replace it.
1870  */
1871 gint
1872 g_strcasecmp (const gchar *s1,
1873               const gchar *s2)
1874 {
1875 #ifdef HAVE_STRCASECMP
1876   g_return_val_if_fail (s1 != NULL, 0);
1877   g_return_val_if_fail (s2 != NULL, 0);
1878
1879   return strcasecmp (s1, s2);
1880 #else
1881   gint c1, c2;
1882
1883   g_return_val_if_fail (s1 != NULL, 0);
1884   g_return_val_if_fail (s2 != NULL, 0);
1885
1886   while (*s1 && *s2)
1887     {
1888       /* According to A. Cox, some platforms have islower's that
1889        * don't work right on non-uppercase
1890        */
1891       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1892       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1893       if (c1 != c2)
1894         return (c1 - c2);
1895       s1++; s2++;
1896     }
1897
1898   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1899 #endif
1900 }
1901
1902 /**
1903  * g_strncasecmp:
1904  * @s1: a string
1905  * @s2: a string to compare with @s1
1906  * @n: the maximum number of characters to compare
1907  *
1908  * A case-insensitive string comparison, corresponding to the standard
1909  * strncasecmp() function on platforms which support it. It is similar
1910  * to g_strcasecmp() except it only compares the first @n characters of
1911  * the strings.
1912  *
1913  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1914  *     or a positive value if @s1 > @s2.
1915  *
1916  * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1917  *     the comparison by calling toupper()/tolower(). These functions
1918  *     are locale-specific and operate on single bytes. However, it is
1919  *     impossible to handle things correctly from an internationalization
1920  *     standpoint by operating on bytes, since characters may be multibyte.
1921  *     Thus g_strncasecmp() is broken if your string is guaranteed to be
1922  *     ASCII, since it is locale-sensitive, and it's broken if your string
1923  *     is localized, since it doesn't work on many encodings at all,
1924  *     including UTF-8, EUC-JP, etc.
1925  *
1926  *     There are therefore two replacement techniques: g_ascii_strncasecmp(),
1927  *     which only works on ASCII and is not locale-sensitive, and
1928  *     g_utf8_casefold() followed by strcmp() on the resulting strings,
1929  *     which is good for case-insensitive sorting of UTF-8.
1930  */
1931 gint
1932 g_strncasecmp (const gchar *s1,
1933                const gchar *s2,
1934                guint n)
1935 {
1936 #ifdef HAVE_STRNCASECMP
1937   return strncasecmp (s1, s2, n);
1938 #else
1939   gint c1, c2;
1940
1941   g_return_val_if_fail (s1 != NULL, 0);
1942   g_return_val_if_fail (s2 != NULL, 0);
1943
1944   while (n && *s1 && *s2)
1945     {
1946       n -= 1;
1947       /* According to A. Cox, some platforms have islower's that
1948        * don't work right on non-uppercase
1949        */
1950       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952       if (c1 != c2)
1953         return (c1 - c2);
1954       s1++; s2++;
1955     }
1956
1957   if (n)
1958     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1959   else
1960     return 0;
1961 #endif
1962 }
1963
1964 /**
1965  * g_strdelimit:
1966  * @string: the string to convert
1967  * @delimiters: (nullable): a string containing the current delimiters,
1968  *     or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
1969  * @new_delimiter: the new delimiter character
1970  *
1971  * Converts any delimiter characters in @string to @new_delimiter.
1972  * Any characters in @string which are found in @delimiters are
1973  * changed to the @new_delimiter character. Modifies @string in place,
1974  * and returns @string itself, not a copy. The return value is to
1975  * allow nesting such as
1976  * |[<!-- language="C" -->
1977  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
1978  * ]|
1979  *
1980  * Returns: @string
1981  */
1982 gchar *
1983 g_strdelimit (gchar       *string,
1984               const gchar *delimiters,
1985               gchar        new_delim)
1986 {
1987   gchar *c;
1988
1989   g_return_val_if_fail (string != NULL, NULL);
1990
1991   if (!delimiters)
1992     delimiters = G_STR_DELIMITERS;
1993
1994   for (c = string; *c; c++)
1995     {
1996       if (strchr (delimiters, *c))
1997         *c = new_delim;
1998     }
1999
2000   return string;
2001 }
2002
2003 /**
2004  * g_strcanon:
2005  * @string: a nul-terminated array of bytes
2006  * @valid_chars: bytes permitted in @string
2007  * @substitutor: replacement character for disallowed bytes
2008  *
2009  * For each character in @string, if the character is not in @valid_chars,
2010  * replaces the character with @substitutor. Modifies @string in place,
2011  * and return @string itself, not a copy. The return value is to allow
2012  * nesting such as
2013  * |[<!-- language="C" -->
2014  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
2015  * ]|
2016  *
2017  * Returns: @string
2018  */
2019 gchar *
2020 g_strcanon (gchar       *string,
2021             const gchar *valid_chars,
2022             gchar        substitutor)
2023 {
2024   gchar *c;
2025
2026   g_return_val_if_fail (string != NULL, NULL);
2027   g_return_val_if_fail (valid_chars != NULL, NULL);
2028
2029   for (c = string; *c; c++)
2030     {
2031       if (!strchr (valid_chars, *c))
2032         *c = substitutor;
2033     }
2034
2035   return string;
2036 }
2037
2038 /**
2039  * g_strcompress:
2040  * @source: a string to compress
2041  *
2042  * Replaces all escaped characters with their one byte equivalent.
2043  *
2044  * This function does the reverse conversion of g_strescape().
2045  *
2046  * Returns: a newly-allocated copy of @source with all escaped
2047  *     character compressed
2048  */
2049 gchar *
2050 g_strcompress (const gchar *source)
2051 {
2052   const gchar *p = source, *octal;
2053   gchar *dest;
2054   gchar *q;
2055
2056   g_return_val_if_fail (source != NULL, NULL);
2057
2058   dest = g_malloc (strlen (source) + 1);
2059   q = dest;
2060
2061   while (*p)
2062     {
2063       if (*p == '\\')
2064         {
2065           p++;
2066           switch (*p)
2067             {
2068             case '\0':
2069               g_warning ("g_strcompress: trailing \\");
2070               goto out;
2071             case '0':  case '1':  case '2':  case '3':  case '4':
2072             case '5':  case '6':  case '7':
2073               *q = 0;
2074               octal = p;
2075               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2076                 {
2077                   *q = (*q * 8) + (*p - '0');
2078                   p++;
2079                 }
2080               q++;
2081               p--;
2082               break;
2083             case 'b':
2084               *q++ = '\b';
2085               break;
2086             case 'f':
2087               *q++ = '\f';
2088               break;
2089             case 'n':
2090               *q++ = '\n';
2091               break;
2092             case 'r':
2093               *q++ = '\r';
2094               break;
2095             case 't':
2096               *q++ = '\t';
2097               break;
2098             case 'v':
2099               *q++ = '\v';
2100               break;
2101             default:            /* Also handles \" and \\ */
2102               *q++ = *p;
2103               break;
2104             }
2105         }
2106       else
2107         *q++ = *p;
2108       p++;
2109     }
2110 out:
2111   *q = 0;
2112
2113   return dest;
2114 }
2115
2116 /**
2117  * g_strescape:
2118  * @source: a string to escape
2119  * @exceptions: (nullable): a string of characters not to escape in @source
2120  *
2121  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2122  * and '"' in the string @source by inserting a '\' before
2123  * them. Additionally all characters in the range 0x01-0x1F (everything
2124  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2125  * replaced with a '\' followed by their octal representation.
2126  * Characters supplied in @exceptions are not escaped.
2127  *
2128  * g_strcompress() does the reverse conversion.
2129  *
2130  * Returns: a newly-allocated copy of @source with certain
2131  *     characters escaped. See above.
2132  */
2133 gchar *
2134 g_strescape (const gchar *source,
2135              const gchar *exceptions)
2136 {
2137   const guchar *p;
2138   gchar *dest;
2139   gchar *q;
2140   guchar excmap[256];
2141
2142   g_return_val_if_fail (source != NULL, NULL);
2143
2144   p = (guchar *) source;
2145   /* Each source byte needs maximally four destination chars (\777) */
2146   q = dest = g_malloc (strlen (source) * 4 + 1);
2147
2148   memset (excmap, 0, 256);
2149   if (exceptions)
2150     {
2151       guchar *e = (guchar *) exceptions;
2152
2153       while (*e)
2154         {
2155           excmap[*e] = 1;
2156           e++;
2157         }
2158     }
2159
2160   while (*p)
2161     {
2162       if (excmap[*p])
2163         *q++ = *p;
2164       else
2165         {
2166           switch (*p)
2167             {
2168             case '\b':
2169               *q++ = '\\';
2170               *q++ = 'b';
2171               break;
2172             case '\f':
2173               *q++ = '\\';
2174               *q++ = 'f';
2175               break;
2176             case '\n':
2177               *q++ = '\\';
2178               *q++ = 'n';
2179               break;
2180             case '\r':
2181               *q++ = '\\';
2182               *q++ = 'r';
2183               break;
2184             case '\t':
2185               *q++ = '\\';
2186               *q++ = 't';
2187               break;
2188             case '\v':
2189               *q++ = '\\';
2190               *q++ = 'v';
2191               break;
2192             case '\\':
2193               *q++ = '\\';
2194               *q++ = '\\';
2195               break;
2196             case '"':
2197               *q++ = '\\';
2198               *q++ = '"';
2199               break;
2200             default:
2201               if ((*p < ' ') || (*p >= 0177))
2202                 {
2203                   *q++ = '\\';
2204                   *q++ = '0' + (((*p) >> 6) & 07);
2205                   *q++ = '0' + (((*p) >> 3) & 07);
2206                   *q++ = '0' + ((*p) & 07);
2207                 }
2208               else
2209                 *q++ = *p;
2210               break;
2211             }
2212         }
2213       p++;
2214     }
2215   *q = 0;
2216   return dest;
2217 }
2218
2219 /**
2220  * g_strchug:
2221  * @string: a string to remove the leading whitespace from
2222  *
2223  * Removes leading whitespace from a string, by moving the rest
2224  * of the characters forward.
2225  *
2226  * This function doesn't allocate or reallocate any memory;
2227  * it modifies @string in place. Therefore, it cannot be used on
2228  * statically allocated strings.
2229  *
2230  * The pointer to @string is returned to allow the nesting of functions.
2231  *
2232  * Also see g_strchomp() and g_strstrip().
2233  *
2234  * Returns: @string
2235  */
2236 gchar *
2237 g_strchug (gchar *string)
2238 {
2239   guchar *start;
2240
2241   g_return_val_if_fail (string != NULL, NULL);
2242
2243   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2244     ;
2245
2246   memmove (string, start, strlen ((gchar *) start) + 1);
2247
2248   return string;
2249 }
2250
2251 /**
2252  * g_strchomp:
2253  * @string: a string to remove the trailing whitespace from
2254  *
2255  * Removes trailing whitespace from a string.
2256  *
2257  * This function doesn't allocate or reallocate any memory;
2258  * it modifies @string in place. Therefore, it cannot be used
2259  * on statically allocated strings.
2260  *
2261  * The pointer to @string is returned to allow the nesting of functions.
2262  *
2263  * Also see g_strchug() and g_strstrip().
2264  *
2265  * Returns: @string
2266  */
2267 gchar *
2268 g_strchomp (gchar *string)
2269 {
2270   gsize len;
2271
2272   g_return_val_if_fail (string != NULL, NULL);
2273
2274   len = strlen (string);
2275   while (len--)
2276     {
2277       if (g_ascii_isspace ((guchar) string[len]))
2278         string[len] = '\0';
2279       else
2280         break;
2281     }
2282
2283   return string;
2284 }
2285
2286 /**
2287  * g_strsplit:
2288  * @string: a string to split
2289  * @delimiter: a string which specifies the places at which to split
2290  *     the string. The delimiter is not included in any of the resulting
2291  *     strings, unless @max_tokens is reached.
2292  * @max_tokens: the maximum number of pieces to split @string into.
2293  *     If this is less than 1, the string is split completely.
2294  *
2295  * Splits a string into a maximum of @max_tokens pieces, using the given
2296  * @delimiter. If @max_tokens is reached, the remainder of @string is
2297  * appended to the last token.
2298  *
2299  * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2300  * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2301  * and "".
2302  *
2303  * As a special case, the result of splitting the empty string "" is an empty
2304  * vector, not a vector containing a single string. The reason for this
2305  * special case is that being able to represent a empty vector is typically
2306  * more useful than consistent handling of empty elements. If you do need
2307  * to represent empty elements, you'll need to check for the empty string
2308  * before calling g_strsplit().
2309  *
2310  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2311  *    g_strfreev() to free it.
2312  */
2313 gchar**
2314 g_strsplit (const gchar *string,
2315             const gchar *delimiter,
2316             gint         max_tokens)
2317 {
2318   GSList *string_list = NULL, *slist;
2319   gchar **str_array, *s;
2320   guint n = 0;
2321   const gchar *remainder;
2322
2323   g_return_val_if_fail (string != NULL, NULL);
2324   g_return_val_if_fail (delimiter != NULL, NULL);
2325   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2326
2327   if (max_tokens < 1)
2328     max_tokens = G_MAXINT;
2329
2330   remainder = string;
2331   s = strstr (remainder, delimiter);
2332   if (s)
2333     {
2334       gsize delimiter_len = strlen (delimiter);
2335
2336       while (--max_tokens && s)
2337         {
2338           gsize len;
2339
2340           len = s - remainder;
2341           string_list = g_slist_prepend (string_list,
2342                                          g_strndup (remainder, len));
2343           n++;
2344           remainder = s + delimiter_len;
2345           s = strstr (remainder, delimiter);
2346         }
2347     }
2348   if (*string)
2349     {
2350       n++;
2351       string_list = g_slist_prepend (string_list, g_strdup (remainder));
2352     }
2353
2354   str_array = g_new (gchar*, n + 1);
2355
2356   str_array[n--] = NULL;
2357   for (slist = string_list; slist; slist = slist->next)
2358     str_array[n--] = slist->data;
2359
2360   g_slist_free (string_list);
2361
2362   return str_array;
2363 }
2364
2365 /**
2366  * g_strsplit_set:
2367  * @string: The string to be tokenized
2368  * @delimiters: A nul-terminated string containing bytes that are used
2369  *     to split the string.
2370  * @max_tokens: The maximum number of tokens to split @string into.
2371  *     If this is less than 1, the string is split completely
2372  *
2373  * Splits @string into a number of tokens not containing any of the characters
2374  * in @delimiter. A token is the (possibly empty) longest string that does not
2375  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2376  * remainder is appended to the last token.
2377  *
2378  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2379  * %NULL-terminated vector containing the three strings "abc", "def",
2380  * and "ghi".
2381  *
2382  * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2383  * vector containing the four strings "", "def", "ghi", and "".
2384  *
2385  * As a special case, the result of splitting the empty string "" is an empty
2386  * vector, not a vector containing a single string. The reason for this
2387  * special case is that being able to represent a empty vector is typically
2388  * more useful than consistent handling of empty elements. If you do need
2389  * to represent empty elements, you'll need to check for the empty string
2390  * before calling g_strsplit_set().
2391  *
2392  * Note that this function works on bytes not characters, so it can't be used
2393  * to delimit UTF-8 strings for anything but ASCII characters.
2394  *
2395  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2396  *    g_strfreev() to free it.
2397  *
2398  * Since: 2.4
2399  **/
2400 gchar **
2401 g_strsplit_set (const gchar *string,
2402                 const gchar *delimiters,
2403                 gint         max_tokens)
2404 {
2405   gboolean delim_table[256];
2406   GSList *tokens, *list;
2407   gint n_tokens;
2408   const gchar *s;
2409   const gchar *current;
2410   gchar *token;
2411   gchar **result;
2412
2413   g_return_val_if_fail (string != NULL, NULL);
2414   g_return_val_if_fail (delimiters != NULL, NULL);
2415
2416   if (max_tokens < 1)
2417     max_tokens = G_MAXINT;
2418
2419   if (*string == '\0')
2420     {
2421       result = g_new (char *, 1);
2422       result[0] = NULL;
2423       return result;
2424     }
2425
2426   memset (delim_table, FALSE, sizeof (delim_table));
2427   for (s = delimiters; *s != '\0'; ++s)
2428     delim_table[*(guchar *)s] = TRUE;
2429
2430   tokens = NULL;
2431   n_tokens = 0;
2432
2433   s = current = string;
2434   while (*s != '\0')
2435     {
2436       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2437         {
2438           token = g_strndup (current, s - current);
2439           tokens = g_slist_prepend (tokens, token);
2440           ++n_tokens;
2441
2442           current = s + 1;
2443         }
2444
2445       ++s;
2446     }
2447
2448   token = g_strndup (current, s - current);
2449   tokens = g_slist_prepend (tokens, token);
2450   ++n_tokens;
2451
2452   result = g_new (gchar *, n_tokens + 1);
2453
2454   result[n_tokens] = NULL;
2455   for (list = tokens; list != NULL; list = list->next)
2456     result[--n_tokens] = list->data;
2457
2458   g_slist_free (tokens);
2459
2460   return result;
2461 }
2462
2463 /**
2464  * GStrv:
2465  *
2466  * A typedef alias for gchar**. This is mostly useful when used together with
2467  * g_auto().
2468  */
2469
2470 /**
2471  * g_strfreev:
2472  * @str_array: (nullable): a %NULL-terminated array of strings to free
2473  *
2474  * Frees a %NULL-terminated array of strings, as well as each
2475  * string it contains.
2476  *
2477  * If @str_array is %NULL, this function simply returns.
2478  */
2479 void
2480 g_strfreev (gchar **str_array)
2481 {
2482   if (str_array)
2483     {
2484       int i;
2485
2486       for (i = 0; str_array[i] != NULL; i++)
2487         g_free (str_array[i]);
2488
2489       g_free (str_array);
2490     }
2491 }
2492
2493 /**
2494  * g_strdupv:
2495  * @str_array: (nullable): a %NULL-terminated array of strings
2496  *
2497  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2498  * the new array should be freed by first freeing each string, then
2499  * the array itself. g_strfreev() does this for you. If called
2500  * on a %NULL value, g_strdupv() simply returns %NULL.
2501  *
2502  * Returns: (nullable): a new %NULL-terminated array of strings.
2503  */
2504 gchar**
2505 g_strdupv (gchar **str_array)
2506 {
2507   if (str_array)
2508     {
2509       gint i;
2510       gchar **retval;
2511
2512       i = 0;
2513       while (str_array[i])
2514         ++i;
2515
2516       retval = g_new (gchar*, i + 1);
2517
2518       i = 0;
2519       while (str_array[i])
2520         {
2521           retval[i] = g_strdup (str_array[i]);
2522           ++i;
2523         }
2524       retval[i] = NULL;
2525
2526       return retval;
2527     }
2528   else
2529     return NULL;
2530 }
2531
2532 /**
2533  * g_strjoinv:
2534  * @separator: (nullable): a string to insert between each of the
2535  *     strings, or %NULL
2536  * @str_array: a %NULL-terminated array of strings to join
2537  *
2538  * Joins a number of strings together to form one long string, with the
2539  * optional @separator inserted between each of them. The returned string
2540  * should be freed with g_free().
2541  *
2542  * If @str_array has no items, the return value will be an
2543  * empty string. If @str_array contains a single item, @separator will not
2544  * appear in the resulting string.
2545  *
2546  * Returns: a newly-allocated string containing all of the strings joined
2547  *     together, with @separator between them
2548  */
2549 gchar*
2550 g_strjoinv (const gchar  *separator,
2551             gchar       **str_array)
2552 {
2553   gchar *string;
2554   gchar *ptr;
2555
2556   g_return_val_if_fail (str_array != NULL, NULL);
2557
2558   if (separator == NULL)
2559     separator = "";
2560
2561   if (*str_array)
2562     {
2563       gint i;
2564       gsize len;
2565       gsize separator_len;
2566
2567       separator_len = strlen (separator);
2568       /* First part, getting length */
2569       len = 1 + strlen (str_array[0]);
2570       for (i = 1; str_array[i] != NULL; i++)
2571         len += strlen (str_array[i]);
2572       len += separator_len * (i - 1);
2573
2574       /* Second part, building string */
2575       string = g_new (gchar, len);
2576       ptr = g_stpcpy (string, *str_array);
2577       for (i = 1; str_array[i] != NULL; i++)
2578         {
2579           ptr = g_stpcpy (ptr, separator);
2580           ptr = g_stpcpy (ptr, str_array[i]);
2581         }
2582       }
2583   else
2584     string = g_strdup ("");
2585
2586   return string;
2587 }
2588
2589 /**
2590  * g_strjoin:
2591  * @separator: (nullable): a string to insert between each of the
2592  *     strings, or %NULL
2593  * @...: a %NULL-terminated list of strings to join
2594  *
2595  * Joins a number of strings together to form one long string, with the
2596  * optional @separator inserted between each of them. The returned string
2597  * should be freed with g_free().
2598  *
2599  * Returns: a newly-allocated string containing all of the strings joined
2600  *     together, with @separator between them
2601  */
2602 gchar*
2603 g_strjoin (const gchar *separator,
2604            ...)
2605 {
2606   gchar *string, *s;
2607   va_list args;
2608   gsize len;
2609   gsize separator_len;
2610   gchar *ptr;
2611
2612   if (separator == NULL)
2613     separator = "";
2614
2615   separator_len = strlen (separator);
2616
2617   va_start (args, separator);
2618
2619   s = va_arg (args, gchar*);
2620
2621   if (s)
2622     {
2623       /* First part, getting length */
2624       len = 1 + strlen (s);
2625
2626       s = va_arg (args, gchar*);
2627       while (s)
2628         {
2629           len += separator_len + strlen (s);
2630           s = va_arg (args, gchar*);
2631         }
2632       va_end (args);
2633
2634       /* Second part, building string */
2635       string = g_new (gchar, len);
2636
2637       va_start (args, separator);
2638
2639       s = va_arg (args, gchar*);
2640       ptr = g_stpcpy (string, s);
2641
2642       s = va_arg (args, gchar*);
2643       while (s)
2644         {
2645           ptr = g_stpcpy (ptr, separator);
2646           ptr = g_stpcpy (ptr, s);
2647           s = va_arg (args, gchar*);
2648         }
2649     }
2650   else
2651     string = g_strdup ("");
2652
2653   va_end (args);
2654
2655   return string;
2656 }
2657
2658
2659 /**
2660  * g_strstr_len:
2661  * @haystack: a string
2662  * @haystack_len: the maximum length of @haystack. Note that -1 is
2663  *     a valid length, if @haystack is nul-terminated, meaning it will
2664  *     search through the whole string.
2665  * @needle: the string to search for
2666  *
2667  * Searches the string @haystack for the first occurrence
2668  * of the string @needle, limiting the length of the search
2669  * to @haystack_len.
2670  *
2671  * Returns: a pointer to the found occurrence, or
2672  *    %NULL if not found.
2673  */
2674 gchar *
2675 g_strstr_len (const gchar *haystack,
2676               gssize       haystack_len,
2677               const gchar *needle)
2678 {
2679   g_return_val_if_fail (haystack != NULL, NULL);
2680   g_return_val_if_fail (needle != NULL, NULL);
2681
2682   if (haystack_len < 0)
2683     return strstr (haystack, needle);
2684   else
2685     {
2686       const gchar *p = haystack;
2687       gsize needle_len = strlen (needle);
2688       const gchar *end;
2689       gsize i;
2690
2691       if (needle_len == 0)
2692         return (gchar *)haystack;
2693
2694       if (haystack_len < needle_len)
2695         return NULL;
2696
2697       end = haystack + haystack_len - needle_len;
2698
2699       while (p <= end && *p)
2700         {
2701           for (i = 0; i < needle_len; i++)
2702             if (p[i] != needle[i])
2703               goto next;
2704
2705           return (gchar *)p;
2706
2707         next:
2708           p++;
2709         }
2710
2711       return NULL;
2712     }
2713 }
2714
2715 /**
2716  * g_strrstr:
2717  * @haystack: a nul-terminated string
2718  * @needle: the nul-terminated string to search for
2719  *
2720  * Searches the string @haystack for the last occurrence
2721  * of the string @needle.
2722  *
2723  * Returns: a pointer to the found occurrence, or
2724  *    %NULL if not found.
2725  */
2726 gchar *
2727 g_strrstr (const gchar *haystack,
2728            const gchar *needle)
2729 {
2730   gsize i;
2731   gsize needle_len;
2732   gsize haystack_len;
2733   const gchar *p;
2734
2735   g_return_val_if_fail (haystack != NULL, NULL);
2736   g_return_val_if_fail (needle != NULL, NULL);
2737
2738   needle_len = strlen (needle);
2739   haystack_len = strlen (haystack);
2740
2741   if (needle_len == 0)
2742     return (gchar *)haystack;
2743
2744   if (haystack_len < needle_len)
2745     return NULL;
2746
2747   p = haystack + haystack_len - needle_len;
2748
2749   while (p >= haystack)
2750     {
2751       for (i = 0; i < needle_len; i++)
2752         if (p[i] != needle[i])
2753           goto next;
2754
2755       return (gchar *)p;
2756
2757     next:
2758       p--;
2759     }
2760
2761   return NULL;
2762 }
2763
2764 /**
2765  * g_strrstr_len:
2766  * @haystack: a nul-terminated string
2767  * @haystack_len: the maximum length of @haystack
2768  * @needle: the nul-terminated string to search for
2769  *
2770  * Searches the string @haystack for the last occurrence
2771  * of the string @needle, limiting the length of the search
2772  * to @haystack_len.
2773  *
2774  * Returns: a pointer to the found occurrence, or
2775  *    %NULL if not found.
2776  */
2777 gchar *
2778 g_strrstr_len (const gchar *haystack,
2779                gssize        haystack_len,
2780                const gchar *needle)
2781 {
2782   g_return_val_if_fail (haystack != NULL, NULL);
2783   g_return_val_if_fail (needle != NULL, NULL);
2784
2785   if (haystack_len < 0)
2786     return g_strrstr (haystack, needle);
2787   else
2788     {
2789       gsize needle_len = strlen (needle);
2790       const gchar *haystack_max = haystack + haystack_len;
2791       const gchar *p = haystack;
2792       gsize i;
2793
2794       while (p < haystack_max && *p)
2795         p++;
2796
2797       if (p < haystack + needle_len)
2798         return NULL;
2799
2800       p -= needle_len;
2801
2802       while (p >= haystack)
2803         {
2804           for (i = 0; i < needle_len; i++)
2805             if (p[i] != needle[i])
2806               goto next;
2807
2808           return (gchar *)p;
2809
2810         next:
2811           p--;
2812         }
2813
2814       return NULL;
2815     }
2816 }
2817
2818
2819 /**
2820  * g_str_has_suffix:
2821  * @str: a nul-terminated string
2822  * @suffix: the nul-terminated suffix to look for
2823  *
2824  * Looks whether the string @str ends with @suffix.
2825  *
2826  * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2827  *
2828  * Since: 2.2
2829  */
2830 gboolean
2831 g_str_has_suffix (const gchar *str,
2832                   const gchar *suffix)
2833 {
2834   int str_len;
2835   int suffix_len;
2836
2837   g_return_val_if_fail (str != NULL, FALSE);
2838   g_return_val_if_fail (suffix != NULL, FALSE);
2839
2840   str_len = strlen (str);
2841   suffix_len = strlen (suffix);
2842
2843   if (str_len < suffix_len)
2844     return FALSE;
2845
2846   return strcmp (str + str_len - suffix_len, suffix) == 0;
2847 }
2848
2849 /**
2850  * g_str_has_prefix:
2851  * @str: a nul-terminated string
2852  * @prefix: the nul-terminated prefix to look for
2853  *
2854  * Looks whether the string @str begins with @prefix.
2855  *
2856  * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2857  *
2858  * Since: 2.2
2859  */
2860 gboolean
2861 g_str_has_prefix (const gchar *str,
2862                   const gchar *prefix)
2863 {
2864   g_return_val_if_fail (str != NULL, FALSE);
2865   g_return_val_if_fail (prefix != NULL, FALSE);
2866
2867   return strncmp (str, prefix, strlen (prefix)) == 0;
2868 }
2869
2870 /**
2871  * g_strv_length:
2872  * @str_array: a %NULL-terminated array of strings
2873  *
2874  * Returns the length of the given %NULL-terminated
2875  * string array @str_array.
2876  *
2877  * Returns: length of @str_array.
2878  *
2879  * Since: 2.6
2880  */
2881 guint
2882 g_strv_length (gchar **str_array)
2883 {
2884   guint i = 0;
2885
2886   g_return_val_if_fail (str_array != NULL, 0);
2887
2888   while (str_array[i])
2889     ++i;
2890
2891   return i;
2892 }
2893
2894 static void
2895 index_add_folded (GPtrArray   *array,
2896                   const gchar *start,
2897                   const gchar *end)
2898 {
2899   gchar *normal;
2900
2901   normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2902
2903   /* TODO: Invent time machine.  Converse with Mustafa Ataturk... */
2904   if (strstr (normal, "ı") || strstr (normal, "İ"))
2905     {
2906       gchar *s = normal;
2907       GString *tmp;
2908
2909       tmp = g_string_new (NULL);
2910
2911       while (*s)
2912         {
2913           gchar *i, *I, *e;
2914
2915           i = strstr (s, "ı");
2916           I = strstr (s, "İ");
2917
2918           if (!i && !I)
2919             break;
2920           else if (i && !I)
2921             e = i;
2922           else if (I && !i)
2923             e = I;
2924           else if (i < I)
2925             e = i;
2926           else
2927             e = I;
2928
2929           g_string_append_len (tmp, s, e - s);
2930           g_string_append_c (tmp, 'i');
2931           s = g_utf8_next_char (e);
2932         }
2933
2934       g_string_append (tmp, s);
2935       g_free (normal);
2936       normal = g_string_free (tmp, FALSE);
2937     }
2938
2939   g_ptr_array_add (array, g_utf8_casefold (normal, -1));
2940   g_free (normal);
2941 }
2942
2943 static gchar **
2944 split_words (const gchar *value)
2945 {
2946   const gchar *start = NULL;
2947   GPtrArray *result;
2948   const gchar *s;
2949
2950   result = g_ptr_array_new ();
2951
2952   for (s = value; *s; s = g_utf8_next_char (s))
2953     {
2954       gunichar c = g_utf8_get_char (s);
2955
2956       if (start == NULL)
2957         {
2958           if (g_unichar_isalnum (c) || g_unichar_ismark (c))
2959             start = s;
2960         }
2961       else
2962         {
2963           if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
2964             {
2965               index_add_folded (result, start, s);
2966               start = NULL;
2967             }
2968         }
2969     }
2970
2971   if (start)
2972     index_add_folded (result, start, s);
2973
2974   g_ptr_array_add (result, NULL);
2975
2976   return (gchar **) g_ptr_array_free (result, FALSE);
2977 }
2978
2979 /**
2980  * g_str_tokenize_and_fold:
2981  * @string: a string
2982  * @translit_locale: (nullable): the language code (like 'de' or
2983  *   'en_GB') from which @string originates
2984  * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
2985  *   return location for ASCII alternates
2986  *
2987  * Tokenises @string and performs folding on each token.
2988  *
2989  * A token is a non-empty sequence of alphanumeric characters in the
2990  * source string, separated by non-alphanumeric characters.  An
2991  * "alphanumeric" character for this purpose is one that matches
2992  * g_unichar_isalnum() or g_unichar_ismark().
2993  *
2994  * Each token is then (Unicode) normalised and case-folded.  If
2995  * @ascii_alternates is non-%NULL and some of the returned tokens
2996  * contain non-ASCII characters, ASCII alternatives will be generated.
2997  *
2998  * The number of ASCII alternatives that are generated and the method
2999  * for doing so is unspecified, but @translit_locale (if specified) may
3000  * improve the transliteration if the language of the source string is
3001  * known.
3002  *
3003  * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3004  *
3005  * Since: 2.40
3006  **/
3007 gchar **
3008 g_str_tokenize_and_fold (const gchar   *string,
3009                          const gchar   *translit_locale,
3010                          gchar       ***ascii_alternates)
3011 {
3012   gchar **result;
3013
3014   g_return_val_if_fail (string != NULL, NULL);
3015
3016   if (ascii_alternates && g_str_is_ascii (string))
3017     {
3018       *ascii_alternates = g_new0 (gchar *, 0 + 1);
3019       ascii_alternates = NULL;
3020     }
3021
3022   result = split_words (string);
3023
3024   if (ascii_alternates)
3025     {
3026       gint i, j, n;
3027
3028       n = g_strv_length (result);
3029       *ascii_alternates = g_new (gchar *, n + 1);
3030       j = 0;
3031
3032       for (i = 0; i < n; i++)
3033         {
3034           if (!g_str_is_ascii (result[i]))
3035             {
3036               gchar *composed;
3037               gchar *ascii;
3038               gint k;
3039
3040               composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3041
3042               ascii = g_str_to_ascii (composed, translit_locale);
3043
3044               /* Only accept strings that are now entirely alnums */
3045               for (k = 0; ascii[k]; k++)
3046                 if (!g_ascii_isalnum (ascii[k]))
3047                   break;
3048
3049               if (ascii[k] == '\0')
3050                 /* Made it to the end... */
3051                 (*ascii_alternates)[j++] = ascii;
3052               else
3053                 g_free (ascii);
3054
3055               g_free (composed);
3056             }
3057         }
3058
3059       (*ascii_alternates)[j] = NULL;
3060     }
3061
3062   return result;
3063 }
3064
3065 /**
3066  * g_str_match_string:
3067  * @search_term: the search term from the user
3068  * @potential_hit: the text that may be a hit
3069  * @accept_alternates: %TRUE to accept ASCII alternates
3070  *
3071  * Checks if a search conducted for @search_term should match
3072  * @potential_hit.
3073  *
3074  * This function calls g_str_tokenize_and_fold() on both
3075  * @search_term and @potential_hit.  ASCII alternates are never taken
3076  * for @search_term but will be taken for @potential_hit according to
3077  * the value of @accept_alternates.
3078  *
3079  * A hit occurs when each folded token in @search_term is a prefix of a
3080  * folded token from @potential_hit.
3081  *
3082  * Depending on how you're performing the search, it will typically be
3083  * faster to call g_str_tokenize_and_fold() on each string in
3084  * your corpus and build an index on the returned folded tokens, then
3085  * call g_str_tokenize_and_fold() on the search term and
3086  * perform lookups into that index.
3087  *
3088  * As some examples, searching for "fred" would match the potential hit
3089  * "Smith, Fred" and also "Frédéric".  Searching for "Fréd" would match
3090  * "Frédéric" but not "Frederic" (due to the one-directional nature of
3091  * accent matching).  Searching "fo" would match "Foo" and "Bar Foo
3092  * Baz", but not "SFO" (because no word as "fo" as a prefix).
3093  *
3094  * Returns: %TRUE if @potential_hit is a hit
3095  *
3096  * Since: 2.40
3097  **/
3098 gboolean
3099 g_str_match_string (const gchar *search_term,
3100                     const gchar *potential_hit,
3101                     gboolean     accept_alternates)
3102 {
3103   gchar **alternates = NULL;
3104   gchar **term_tokens;
3105   gchar **hit_tokens;
3106   gboolean matched;
3107   gint i, j;
3108
3109   g_return_val_if_fail (search_term != NULL, FALSE);
3110   g_return_val_if_fail (potential_hit != NULL, FALSE);
3111
3112   term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3113   hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3114
3115   matched = TRUE;
3116
3117   for (i = 0; term_tokens[i]; i++)
3118     {
3119       for (j = 0; hit_tokens[j]; j++)
3120         if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3121           goto one_matched;
3122
3123       if (accept_alternates)
3124         for (j = 0; alternates[j]; j++)
3125           if (g_str_has_prefix (alternates[j], term_tokens[i]))
3126             goto one_matched;
3127
3128       matched = FALSE;
3129       break;
3130
3131 one_matched:
3132       continue;
3133     }
3134
3135   g_strfreev (term_tokens);
3136   g_strfreev (hit_tokens);
3137   g_strfreev (alternates);
3138
3139   return matched;
3140 }
3141
3142 /**
3143  * g_strv_contains:
3144  * @strv: a %NULL-terminated array of strings
3145  * @str: a string
3146  *
3147  * Checks if @strv contains @str. @strv must not be %NULL.
3148  *
3149  * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3150  *
3151  * Since: 2.44
3152  */
3153 gboolean
3154 g_strv_contains (const gchar * const *strv,
3155                  const gchar         *str)
3156 {
3157   g_return_val_if_fail (strv != NULL, FALSE);
3158   g_return_val_if_fail (str != NULL, FALSE);
3159
3160   for (; *strv != NULL; strv++)
3161     {
3162       if (g_str_equal (str, *strv))
3163         return TRUE;
3164     }
3165
3166   return FALSE;
3167 }
3168
3169 static gboolean
3170 str_has_sign (const gchar *str)
3171 {
3172   return str[0] == '-' || str[0] == '+';
3173 }
3174
3175 static gboolean
3176 str_has_hex_prefix (const gchar *str)
3177 {
3178   return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3179 }
3180
3181 /**
3182  * g_ascii_string_to_signed:
3183  * @str: a string
3184  * @base: base of a parsed number
3185  * @min: a lower bound (inclusive)
3186  * @max: an upper bound (inclusive)
3187  * @out_num: (out) (optional): a return location for a number
3188  * @error: a return location for #GError
3189  *
3190  * A convenience function for converting a string to a signed number.
3191  *
3192  * This function assumes that @str contains only a number of the given
3193  * @base that is within inclusive bounds limited by @min and @max. If
3194  * this is true, then the converted number is stored in @out_num. An
3195  * empty string is not a valid input. A string with leading or
3196  * trailing whitespace is also an invalid input.
3197  *
3198  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3199  * not be prefixed with "0x" or "0X". Such a problem does not exist
3200  * for octal numbers, since they were usually prefixed with a zero
3201  * which does not change the value of the parsed number.
3202  *
3203  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3204  * domain. If the input is invalid, the error code will be
3205  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3206  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3207  *
3208  * See g_ascii_strtoll() if you have more complex needs such as
3209  * parsing a string which starts with a number, but then has other
3210  * characters.
3211  *
3212  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3213  *
3214  * Since: 2.54
3215  */
3216 gboolean
3217 g_ascii_string_to_signed (const gchar  *str,
3218                           guint         base,
3219                           gint64        min,
3220                           gint64        max,
3221                           gint64       *out_num,
3222                           GError      **error)
3223 {
3224   gint64 number;
3225   const gchar *end_ptr = NULL;
3226   gint saved_errno = 0;
3227
3228   g_return_val_if_fail (str != NULL, FALSE);
3229   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3230   g_return_val_if_fail (min <= max, FALSE);
3231   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3232
3233   if (str[0] == '\0')
3234     {
3235       g_set_error_literal (error,
3236                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3237                            _("Empty string is not a number"));
3238       return FALSE;
3239     }
3240
3241   errno = 0;
3242   number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3243   saved_errno = errno;
3244
3245   if (/* We do not allow leading whitespace, but g_ascii_strtoll
3246        * accepts it and just skips it, so we need to check for it
3247        * ourselves.
3248        */
3249       g_ascii_isspace (str[0]) ||
3250       /* We don't support hexadecimal numbers prefixed with 0x or
3251        * 0X.
3252        */
3253       (base == 16 &&
3254        (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3255       (saved_errno != 0 && saved_errno != ERANGE) ||
3256       end_ptr == NULL ||
3257       *end_ptr != '\0')
3258     {
3259       g_set_error (error,
3260                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3261                    _("“%s” is not a signed number"), str);
3262       return FALSE;
3263     }
3264   if (saved_errno == ERANGE || number < min || number > max)
3265     {
3266       gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3267       gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3268
3269       g_set_error (error,
3270                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3271                    _("Number “%s” is out of bounds [%s, %s]"),
3272                    str, min_str, max_str);
3273       g_free (min_str);
3274       g_free (max_str);
3275       return FALSE;
3276     }
3277   if (out_num != NULL)
3278     *out_num = number;
3279   return TRUE;
3280 }
3281
3282 /**
3283  * g_ascii_string_to_unsigned:
3284  * @str: a string
3285  * @base: base of a parsed number
3286  * @min: a lower bound (inclusive)
3287  * @max: an upper bound (inclusive)
3288  * @out_num: (out) (optional): a return location for a number
3289  * @error: a return location for #GError
3290  *
3291  * A convenience function for converting a string to an unsigned number.
3292  *
3293  * This function assumes that @str contains only a number of the given
3294  * @base that is within inclusive bounds limited by @min and @max. If
3295  * this is true, then the converted number is stored in @out_num. An
3296  * empty string is not a valid input. A string with leading or
3297  * trailing whitespace is also an invalid input.
3298  *
3299  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3300  * not be prefixed with "0x" or "0X". Such a problem does not exist
3301  * for octal numbers, since they were usually prefixed with a zero
3302  * which does not change the value of the parsed number.
3303  *
3304  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3305  * domain. If the input is invalid, the error code will be
3306  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3307  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3308  *
3309  * See g_ascii_strtoull() if you have more complex needs such as
3310  * parsing a string which starts with a number, but then has other
3311  * characters.
3312  *
3313  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3314  *
3315  * Since: 2.54
3316  */
3317 gboolean
3318 g_ascii_string_to_unsigned (const gchar  *str,
3319                             guint         base,
3320                             guint64       min,
3321                             guint64       max,
3322                             guint64      *out_num,
3323                             GError      **error)
3324 {
3325   guint64 number;
3326   const gchar *end_ptr = NULL;
3327   gint saved_errno = 0;
3328
3329   g_return_val_if_fail (str != NULL, FALSE);
3330   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3331   g_return_val_if_fail (min <= max, FALSE);
3332   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3333
3334   if (str[0] == '\0')
3335     {
3336       g_set_error_literal (error,
3337                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3338                            _("Empty string is not a number"));
3339       return FALSE;
3340     }
3341
3342   errno = 0;
3343   number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3344   saved_errno = errno;
3345
3346   if (/* We do not allow leading whitespace, but g_ascii_strtoull
3347        * accepts it and just skips it, so we need to check for it
3348        * ourselves.
3349        */
3350       g_ascii_isspace (str[0]) ||
3351       /* Unsigned number should have no sign.
3352        */
3353       str_has_sign (str) ||
3354       /* We don't support hexadecimal numbers prefixed with 0x or
3355        * 0X.
3356        */
3357       (base == 16 && str_has_hex_prefix (str)) ||
3358       (saved_errno != 0 && saved_errno != ERANGE) ||
3359       end_ptr == NULL ||
3360       *end_ptr != '\0')
3361     {
3362       g_set_error (error,
3363                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3364                    _("“%s” is not an unsigned number"), str);
3365       return FALSE;
3366     }
3367   if (saved_errno == ERANGE || number < min || number > max)
3368     {
3369       gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3370       gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3371
3372       g_set_error (error,
3373                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3374                    _("Number “%s” is out of bounds [%s, %s]"),
3375                    str, min_str, max_str);
3376       g_free (min_str);
3377       g_free (max_str);
3378       return FALSE;
3379     }
3380   if (out_num != NULL)
3381     *out_num = number;
3382   return TRUE;
3383 }
3384
3385 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)