lib/nl_langinfo.c

   1 /* nl_langinfo() replacement: query locale dependent information.
   2
   3    Copyright (C) 2007-2020 Free Software Foundation, Inc.
   4
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17
  18 #include <config.h>
  19
  20 /* Specification.  */
  21 #include <langinfo.h>
  22
  23 #include <locale.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #if defined _WIN32 && ! defined __CYGWIN__
  27 # define WIN32_LEAN_AND_MEAN  /* avoid including junk */
  28 # include <windows.h>
  29 # include <stdio.h>
  30 #endif
  31
  32 #if REPLACE_NL_LANGINFO && !NL_LANGINFO_MTSAFE
  33 # if defined _WIN32 && !defined __CYGWIN__
  34
  35 #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
  36 #  include <windows.h>
  37
  38 # elif HAVE_PTHREAD_API
  39
  40 #  include <pthread.h>
  41 #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
  42 #   include <threads.h>
  43 #   pragma weak thrd_exit
  44 #   define c11_threads_in_use() (thrd_exit != NULL)
  45 #  else
  46 #   define c11_threads_in_use() 0
  47 #  endif
  48
  49 # elif HAVE_THREADS_H
  50
  51 #  include <threads.h>
  52
  53 # endif
  54 #endif
  55
  56 /* nl_langinfo() must be multithread-safe.  To achieve this without using
  57    thread-local storage:
  58      1. We use a specific static buffer for each possible argument.
  59         So that different threads can call nl_langinfo with different arguments,
  60         without interfering.
  61      2. We use a simple strcpy or memcpy to fill this static buffer.  Filling it
  62         through, for example, strcpy + strcat would not be guaranteed to leave
  63         the buffer's contents intact if another thread is currently accessing
  64         it.  If necessary, the contents is first assembled in a stack-allocated
  65         buffer.  */
  66
  67 #if !REPLACE_NL_LANGINFO || GNULIB_defined_CODESET
  68 /* Return the codeset of the current locale, if this is easily deducible.
  69    Otherwise, return "".  */
  70 static char *
  71 ctype_codeset (void)
  72 {
  73   static char result[2 + 10 + 1];
  74   char buf[2 + 10 + 1];
  75   char locale[SETLOCALE_NULL_MAX];
  76   char *codeset;
  77   size_t codesetlen;
  78
  79   if (setlocale_null_r (LC_CTYPE, locale, sizeof (locale)))
  80     locale[0] = '\0';
  81
  82   codeset = buf;
  83   codeset[0] = '\0';
  84
  85   if (locale[0])
  86     {
  87       /* If the locale name contains an encoding after the dot, return it.  */
  88       char *dot = strchr (locale, '.');
  89
  90       if (dot)
  91         {
  92           /* Look for the possible @... trailer and remove it, if any.  */
  93           char *codeset_start = dot + 1;
  94           char const *modifier = strchr (codeset_start, '@');
  95
  96           if (! modifier)
  97             codeset = codeset_start;
  98           else
  99             {
 100               codesetlen = modifier - codeset_start;
 101               if (codesetlen < sizeof buf)
 102                 {
 103                   codeset = memcpy (buf, codeset_start, codesetlen);
 104                   codeset[codesetlen] = '\0';
 105                 }
 106             }
 107         }
 108     }
 109
 110 # if defined _WIN32 && ! defined __CYGWIN__
 111   /* If setlocale is successful, it returns the number of the
 112      codepage, as a string.  Otherwise, fall back on Windows API
 113      GetACP, which returns the locale's codepage as a number (although
 114      this doesn't change according to what the 'setlocale' call specified).
 115      Either way, prepend "CP" to make it a valid codeset name.  */
 116   codesetlen = strlen (codeset);
 117   if (0 < codesetlen && codesetlen < sizeof buf - 2)
 118     memmove (buf + 2, codeset, codesetlen + 1);
 119   else
 120     sprintf (buf + 2, "%u", GetACP ());
 121   /* For a locale name such as "French_France.65001", in Windows 10,
 122      setlocale now returns "French_France.utf8" instead.  */
 123   if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
 124     return (char *) "UTF-8";
 125   else
 126     {
 127       memcpy (buf, "CP", 2);
 128       strcpy (result, buf);
 129       return result;
 130     }
 131 # else
 132   strcpy (result, codeset);
 133   return result;
 134 #endif
 135 }
 136 #endif
 137
 138
 139 #if REPLACE_NL_LANGINFO
 140
 141 /* Override nl_langinfo with support for added nl_item values.  */
 142
 143 # undef nl_langinfo
 144
 145 /* Without locking, on Solaris 11.3, test-nl_langinfo-mt fails, with message
 146    "thread5 disturbed by threadN!", even when threadN invokes only
 147       nl_langinfo (CODESET);
 148       nl_langinfo (CRNCYSTR);
 149    Similarly on Solaris 10.  */
 150
 151 # if !NL_LANGINFO_MTSAFE /* Solaris */
 152
 153 #  define ITEMS (MAXSTRMSG + 1)
 154 #  define MAX_RESULT_LEN 80
 155
 156 static char *
 157 nl_langinfo_unlocked (nl_item item)
 158 {
 159   static char result[ITEMS][MAX_RESULT_LEN];
 160
 161   /* The result of nl_langinfo is in storage that can be overwritten by
 162      other calls to nl_langinfo.  */
 163   char *tmp = nl_langinfo (item);
 164   if (item >= 0 && item < ITEMS && tmp != NULL)
 165     {
 166       size_t tmp_len = strlen (tmp);
 167       if (tmp_len < MAX_RESULT_LEN)
 168         strcpy (result[item], tmp);
 169       else
 170         {
 171           /* Produce a truncated result.  Oh well...  */
 172           result[item][MAX_RESULT_LEN - 1] = '\0';
 173           memcpy (result[item], tmp, MAX_RESULT_LEN - 1);
 174         }
 175       return result[item];
 176     }
 177   else
 178     return tmp;
 179 }
 180
 181 /* Use a lock, so that no two threads can invoke nl_langinfo_unlocked
 182    at the same time.  */
 183
 184 /* Prohibit renaming this symbol.  */
 185 #  undef gl_get_nl_langinfo_lock
 186
 187 #  if defined _WIN32 && !defined __CYGWIN__
 188
 189 extern __declspec(dllimport) CRITICAL_SECTION *gl_get_nl_langinfo_lock (void);
 190
 191 static char *
 192 nl_langinfo_with_lock (nl_item item)
 193 {
 194   CRITICAL_SECTION *lock = gl_get_nl_langinfo_lock ();
 195   char *ret;
 196
 197   EnterCriticalSection (lock);
 198   ret = nl_langinfo_unlocked (item);
 199   LeaveCriticalSection (lock);
 200
 201   return ret;
 202 }
 203
 204 #  elif HAVE_PTHREAD_API
 205
 206 extern
 207 #   if defined _WIN32 || defined __CYGWIN__
 208   __declspec(dllimport)
 209 #   endif
 210   pthread_mutex_t *gl_get_nl_langinfo_lock (void);
 211
 212 #   if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */
 213
 214      /* Avoid the need to link with '-lpthread'.  */
 215 #    pragma weak pthread_mutex_lock
 216 #    pragma weak pthread_mutex_unlock
 217
 218      /* Determine whether libpthread is in use.  */
 219 #    pragma weak pthread_mutexattr_gettype
 220      /* See the comments in lock.h.  */
 221 #    define pthread_in_use() \
 222        (pthread_mutexattr_gettype != NULL || c11_threads_in_use ())
 223
 224 #   else
 225 #    define pthread_in_use() 1
 226 #   endif
 227
 228 static char *
 229 nl_langinfo_with_lock (nl_item item)
 230 {
 231   if (pthread_in_use())
 232     {
 233       pthread_mutex_t *lock = gl_get_nl_langinfo_lock ();
 234       char *ret;
 235
 236       if (pthread_mutex_lock (lock))
 237         abort ();
 238       ret = nl_langinfo_unlocked (item);
 239       if (pthread_mutex_unlock (lock))
 240         abort ();
 241
 242       return ret;
 243     }
 244   else
 245     return nl_langinfo_unlocked (item);
 246 }
 247
 248 #  elif HAVE_THREADS_H
 249
 250 extern mtx_t *gl_get_nl_langinfo_lock (void);
 251
 252 static char *
 253 nl_langinfo_with_lock (nl_item item)
 254 {
 255   mtx_t *lock = gl_get_nl_langinfo_lock ();
 256   char *ret;
 257
 258   if (mtx_lock (lock) != thrd_success)
 259     abort ();
 260   ret = nl_langinfo_unlocked (item);
 261   if (mtx_unlock (lock) != thrd_success)
 262     abort ();
 263
 264   return ret;
 265 }
 266
 267 #  endif
 268
 269 # else
 270
 271 /* On other platforms, no lock is needed.  */
 272 #  define nl_langinfo_with_lock nl_langinfo
 273
 274 # endif
 275
 276 char *
 277 rpl_nl_langinfo (nl_item item)
 278 {
 279   switch (item)
 280     {
 281 # if GNULIB_defined_CODESET
 282     case CODESET:
 283       return ctype_codeset ();
 284 # endif
 285 # if GNULIB_defined_T_FMT_AMPM
 286     case T_FMT_AMPM:
 287       return (char *) "%I:%M:%S %p";
 288 # endif
 289 # if GNULIB_defined_ALTMON
 290     case ALTMON_1:
 291     case ALTMON_2:
 292     case ALTMON_3:
 293     case ALTMON_4:
 294     case ALTMON_5:
 295     case ALTMON_6:
 296     case ALTMON_7:
 297     case ALTMON_8:
 298     case ALTMON_9:
 299     case ALTMON_10:
 300     case ALTMON_11:
 301     case ALTMON_12:
 302       /* We don't ship the appropriate localizations with gnulib.  Therefore,
 303          treat ALTMON_i like MON_i.  */
 304       item = item - ALTMON_1 + MON_1;
 305       break;
 306 # endif
 307 # if GNULIB_defined_ERA
 308     case ERA:
 309       /* The format is not standardized.  In glibc it is a sequence of strings
 310          of the form "direction:offset:start_date:end_date:era_name:era_format"
 311          with an empty string at the end.  */
 312       return (char *) "";
 313     case ERA_D_FMT:
 314       /* The %Ex conversion in strftime behaves like %x if the locale does not
 315          have an alternative time format.  */
 316       item = D_FMT;
 317       break;
 318     case ERA_D_T_FMT:
 319       /* The %Ec conversion in strftime behaves like %c if the locale does not
 320          have an alternative time format.  */
 321       item = D_T_FMT;
 322       break;
 323     case ERA_T_FMT:
 324       /* The %EX conversion in strftime behaves like %X if the locale does not
 325          have an alternative time format.  */
 326       item = T_FMT;
 327       break;
 328     case ALT_DIGITS:
 329       /* The format is not standardized.  In glibc it is a sequence of 10
 330          strings, appended in memory.  */
 331       return (char *) "\0\0\0\0\0\0\0\0\0\0";
 332 # endif
 333 # if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
 334     case YESEXPR:
 335       return (char *) "^[yY]";
 336     case NOEXPR:
 337       return (char *) "^[nN]";
 338 # endif
 339     default:
 340       break;
 341     }
 342   return nl_langinfo_with_lock (item);
 343 }
 344
 345 #else
 346
 347 /* Provide nl_langinfo from scratch, either for native MS-Windows, or
 348    for old Unix platforms without locales, such as Linux libc5 or
 349    BeOS.  */
 350
 351 # include <time.h>
 352
 353 char *
 354 nl_langinfo (nl_item item)
 355 {
 356   char buf[100];
 357   struct tm tmm = { 0 };
 358
 359   switch (item)
 360     {
 361     /* nl_langinfo items of the LC_CTYPE category */
 362     case CODESET:
 363       {
 364         char *codeset = ctype_codeset ();
 365         if (*codeset)
 366           return codeset;
 367       }
 368 # ifdef __BEOS__
 369       return (char *) "UTF-8";
 370 # else
 371       return (char *) "ISO-8859-1";
 372 # endif
 373     /* nl_langinfo items of the LC_NUMERIC category */
 374     case RADIXCHAR:
 375       return localeconv () ->decimal_point;
 376     case THOUSEP:
 377       return localeconv () ->thousands_sep;
 378 # ifdef GROUPING
 379     case GROUPING:
 380       return localeconv () ->grouping;
 381 # endif
 382     /* nl_langinfo items of the LC_TIME category.
 383        TODO: Really use the locale.  */
 384     case D_T_FMT:
 385     case ERA_D_T_FMT:
 386       return (char *) "%a %b %e %H:%M:%S %Y";
 387     case D_FMT:
 388     case ERA_D_FMT:
 389       return (char *) "%m/%d/%y";
 390     case T_FMT:
 391     case ERA_T_FMT:
 392       return (char *) "%H:%M:%S";
 393     case T_FMT_AMPM:
 394       return (char *) "%I:%M:%S %p";
 395     case AM_STR:
 396       {
 397         static char result[80];
 398         if (!strftime (buf, sizeof result, "%p", &tmm))
 399           return (char *) "AM";
 400         strcpy (result, buf);
 401         return result;
 402       }
 403     case PM_STR:
 404       {
 405         static char result[80];
 406         tmm.tm_hour = 12;
 407         if (!strftime (buf, sizeof result, "%p", &tmm))
 408           return (char *) "PM";
 409         strcpy (result, buf);
 410         return result;
 411       }
 412     case DAY_1:
 413     case DAY_2:
 414     case DAY_3:
 415     case DAY_4:
 416     case DAY_5:
 417     case DAY_6:
 418     case DAY_7:
 419       {
 420         static char result[7][50];
 421         static char const days[][sizeof "Wednesday"] = {
 422           "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
 423           "Friday", "Saturday"
 424         };
 425         tmm.tm_wday = item - DAY_1;
 426         if (!strftime (buf, sizeof result[0], "%A", &tmm))
 427           return (char *) days[item - DAY_1];
 428         strcpy (result[item - DAY_1], buf);
 429         return result[item - DAY_1];
 430       }
 431     case ABDAY_1:
 432     case ABDAY_2:
 433     case ABDAY_3:
 434     case ABDAY_4:
 435     case ABDAY_5:
 436     case ABDAY_6:
 437     case ABDAY_7:
 438       {
 439         static char result[7][30];
 440         static char const abdays[][sizeof "Sun"] = {
 441           "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
 442         };
 443         tmm.tm_wday = item - ABDAY_1;
 444         if (!strftime (buf, sizeof result[0], "%a", &tmm))
 445           return (char *) abdays[item - ABDAY_1];
 446         strcpy (result[item - ABDAY_1], buf);
 447         return result[item - ABDAY_1];
 448       }
 449     {
 450       static char const months[][sizeof "September"] = {
 451         "January", "February", "March", "April", "May", "June", "July",
 452         "September", "October", "November", "December"
 453       };
 454       case MON_1:
 455       case MON_2:
 456       case MON_3:
 457       case MON_4:
 458       case MON_5:
 459       case MON_6:
 460       case MON_7:
 461       case MON_8:
 462       case MON_9:
 463       case MON_10:
 464       case MON_11:
 465       case MON_12:
 466         {
 467           static char result[12][50];
 468           tmm.tm_mon = item - MON_1;
 469           if (!strftime (buf, sizeof result[0], "%B", &tmm))
 470             return (char *) months[item - MON_1];
 471           strcpy (result[item - MON_1], buf);
 472           return result[item - MON_1];
 473         }
 474       case ALTMON_1:
 475       case ALTMON_2:
 476       case ALTMON_3:
 477       case ALTMON_4:
 478       case ALTMON_5:
 479       case ALTMON_6:
 480       case ALTMON_7:
 481       case ALTMON_8:
 482       case ALTMON_9:
 483       case ALTMON_10:
 484       case ALTMON_11:
 485       case ALTMON_12:
 486         {
 487           static char result[12][50];
 488           tmm.tm_mon = item - ALTMON_1;
 489           /* The platforms without nl_langinfo() don't support strftime with
 490              %OB.  We don't even need to try.  */
 491           #if 0
 492           if (!strftime (buf, sizeof result[0], "%OB", &tmm))
 493           #endif
 494             if (!strftime (buf, sizeof result[0], "%B", &tmm))
 495               return (char *) months[item - ALTMON_1];
 496           strcpy (result[item - ALTMON_1], buf);
 497           return result[item - ALTMON_1];
 498         }
 499     }
 500     case ABMON_1:
 501     case ABMON_2:
 502     case ABMON_3:
 503     case ABMON_4:
 504     case ABMON_5:
 505     case ABMON_6:
 506     case ABMON_7:
 507     case ABMON_8:
 508     case ABMON_9:
 509     case ABMON_10:
 510     case ABMON_11:
 511     case ABMON_12:
 512       {
 513         static char result[12][30];
 514         static char const abmonths[][sizeof "Jan"] = {
 515           "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
 516           "Sep", "Oct", "Nov", "Dec"
 517         };
 518         tmm.tm_mon = item - ABMON_1;
 519         if (!strftime (buf, sizeof result[0], "%b", &tmm))
 520           return (char *) abmonths[item - ABMON_1];
 521         strcpy (result[item - ABMON_1], buf);
 522         return result[item - ABMON_1];
 523       }
 524     case ERA:
 525       return (char *) "";
 526     case ALT_DIGITS:
 527       return (char *) "\0\0\0\0\0\0\0\0\0\0";
 528     /* nl_langinfo items of the LC_MONETARY category.  */
 529     case CRNCYSTR:
 530       return localeconv () ->currency_symbol;
 531 # ifdef INT_CURR_SYMBOL
 532     case INT_CURR_SYMBOL:
 533       return localeconv () ->int_curr_symbol;
 534     case MON_DECIMAL_POINT:
 535       return localeconv () ->mon_decimal_point;
 536     case MON_THOUSANDS_SEP:
 537       return localeconv () ->mon_thousands_sep;
 538     case MON_GROUPING:
 539       return localeconv () ->mon_grouping;
 540     case POSITIVE_SIGN:
 541       return localeconv () ->positive_sign;
 542     case NEGATIVE_SIGN:
 543       return localeconv () ->negative_sign;
 544     case FRAC_DIGITS:
 545       return & localeconv () ->frac_digits;
 546     case INT_FRAC_DIGITS:
 547       return & localeconv () ->int_frac_digits;
 548     case P_CS_PRECEDES:
 549       return & localeconv () ->p_cs_precedes;
 550     case N_CS_PRECEDES:
 551       return & localeconv () ->n_cs_precedes;
 552     case P_SEP_BY_SPACE:
 553       return & localeconv () ->p_sep_by_space;
 554     case N_SEP_BY_SPACE:
 555       return & localeconv () ->n_sep_by_space;
 556     case P_SIGN_POSN:
 557       return & localeconv () ->p_sign_posn;
 558     case N_SIGN_POSN:
 559       return & localeconv () ->n_sign_posn;
 560 # endif
 561     /* nl_langinfo items of the LC_MESSAGES category
 562        TODO: Really use the locale. */
 563     case YESEXPR:
 564       return (char *) "^[yY]";
 565     case NOEXPR:
 566       return (char *) "^[nN]";
 567     default:
 568       return (char *) "";
 569     }
 570 }
 571
 572 #endif