mpdm_s.c

   1 /*
   2
   3     MPDM - Minimum Profit Data Manager
   4     Copyright (C) 2003/2010 Angel Ortega <angel@triptico.com>
   5
   6     mpdm_s.c - String management
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License
  10     as published by the Free Software Foundation; either version 2
  11     of the License, or (at your option) any later version.
  12
  13     This program is distributed in the hope that it will be useful,
  14     but WITHOUT ANY WARRANTY; without even the implied warranty of
  15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16     GNU General Public License for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with this program; if not, write to the Free Software
  20     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  21
  22     http://www.triptico.com
  23
  24 */
  25
  26 #include "config.h"
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32 #include <malloc.h>
  33 #include <locale.h>
  34 #include <wctype.h>
  35
  36 #ifdef CONFOPT_GETTEXT
  37 #include <libintl.h>
  38 #endif
  39
  40 #ifdef CONFOPT_WIN32
  41 #include <windows.h>
  42 #endif
  43
  44 #include "mpdm.h"
  45
  46
  47 /** code **/
  48
  49 void *mpdm_poke_o(void *dst, int *dsize, int *offset, const void *org,
  50                   int osize, int esize)
  51 {
  52     if (org != NULL && osize) {
  53         /* enough room? */
  54         if (*offset + osize > *dsize) {
  55             /* no; enlarge */
  56             *dsize += osize;
  57
  58             dst = realloc(dst, *dsize * esize);
  59         }
  60
  61         memcpy((char *) dst + (*offset * esize), org, osize * esize);
  62         *offset += osize;
  63     }
  64
  65     return dst;
  66 }
  67
  68
  69 void *mpdm_poke(void *dst, int *dsize, const void *org, int osize,
  70                 int esize)
  71 /* pokes (adds) org into dst, which is a dynamic string, making it grow */
  72 {
  73     int offset = *dsize;
  74
  75     return mpdm_poke_o(dst, dsize, &offset, org, osize, esize);
  76 }
  77
  78
  79 wchar_t *mpdm_pokewsn(wchar_t * dst, int *dsize, const wchar_t * str,
  80                       int slen)
  81 /* adds a wide string to dst using mpdm_poke() with size */
  82 {
  83     if (str)
  84         dst = mpdm_poke(dst, dsize, str, slen, sizeof(wchar_t));
  85
  86     return dst;
  87 }
  88
  89
  90 wchar_t *mpdm_pokews(wchar_t * dst, int *dsize, const wchar_t * str)
  91 /* adds a wide string to dst using mpdm_poke() */
  92 {
  93     if (str)
  94         dst = mpdm_pokewsn(dst, dsize, str, wcslen(str));
  95
  96     return dst;
  97 }
  98
  99
 100 wchar_t *mpdm_pokev(wchar_t * dst, int *dsize, const mpdm_t v)
 101 /* adds the string in v to dst using mpdm_poke() */
 102 {
 103     if (v != NULL) {
 104         const wchar_t *ptr = mpdm_string(v);
 105
 106         mpdm_ref(v);
 107         dst = mpdm_pokews(dst, dsize, ptr);
 108         mpdm_unref(v);
 109     }
 110
 111     return dst;
 112 }
 113
 114
 115 wchar_t *mpdm_mbstowcs(const char *str, int *s, int l)
 116 /* converts an mbs to a wcs, but filling invalid chars
 117    with question marks instead of just failing */
 118 {
 119     wchar_t *ptr = NULL;
 120     char tmp[64];               /* really MB_CUR_MAX + 1 */
 121     wchar_t wc;
 122     int n, i, c, t = 0;
 123     char *cstr;
 124
 125     /* allow NULL values for s */
 126     if (s == NULL)
 127         s = &t;
 128
 129     /* if there is a limit, duplicate and break the string */
 130     if (l >= 0) {
 131         cstr = strdup(str);
 132         cstr[l] = '\0';
 133     }
 134     else
 135         cstr = (char *) str;
 136
 137     /* try first a direct conversion with mbstowcs */
 138     if ((*s = mbstowcs(NULL, cstr, 0)) != -1) {
 139         /* direct conversion is possible; do it */
 140         if ((ptr = malloc((*s + 1) * sizeof(wchar_t))) != NULL) {
 141             mbstowcs(ptr, cstr, *s);
 142             ptr[*s] = L'\0';
 143         }
 144     }
 145     else {
 146         /* zero everything */
 147         *s = n = i = 0;
 148
 149         for (;;) {
 150             /* no more characters to process? */
 151             if ((c = cstr[n + i]) == '\0' && i == 0)
 152                 break;
 153
 154             tmp[i++] = c;
 155             tmp[i] = '\0';
 156
 157             /* try to convert */
 158             if (mbstowcs(&wc, tmp, 1) == (size_t) - 1) {
 159                 /* can still be an incomplete multibyte char? */
 160                 if (c != '\0' && i <= (int) MB_CUR_MAX)
 161                     continue;
 162                 else {
 163                     /* too many failing bytes; skip 1 byte */
 164                     wc = L'?';
 165                     i = 1;
 166                 }
 167             }
 168
 169             /* skip used bytes and back again */
 170             n += i;
 171             i = 0;
 172
 173             /* store new char */
 174             if ((ptr = mpdm_poke(ptr, s, &wc, 1, sizeof(wchar_t))) == NULL)
 175                 break;
 176         }
 177
 178         /* null terminate and count one less */
 179         if (ptr != NULL) {
 180             ptr = mpdm_poke(ptr, s, L"", 1, sizeof(wchar_t));
 181             (*s)--;
 182         }
 183     }
 184
 185     /* free the duplicate */
 186     if (cstr != str)
 187         free(cstr);
 188
 189     return ptr;
 190 }
 191
 192
 193 char *mpdm_wcstombs(const wchar_t * str, int *s)
 194 /* converts a wcs to an mbs, but filling invalid chars
 195    with question marks instead of just failing */
 196 {
 197     char *ptr = NULL;
 198     char tmp[64];               /* really MB_CUR_MAX + 1 */
 199     int l, t = 0;
 200
 201     /* allow NULL values for s */
 202     if (s == NULL)
 203         s = &t;
 204
 205     /* try first a direct conversion with wcstombs */
 206     if ((*s = wcstombs(NULL, str, 0)) != -1) {
 207         /* direct conversion is possible; do it and return */
 208         if ((ptr = malloc(*s + 1)) != NULL) {
 209             wcstombs(ptr, str, *s);
 210             ptr[*s] = '\0';
 211         }
 212
 213         return ptr;
 214     }
 215
 216     /* invalid encoding? convert characters one by one */
 217     *s = 0;
 218
 219     while (*str) {
 220         if ((l = wctomb(tmp, *str)) <= 0) {
 221             /* if char couldn't be converted,
 222                write a question mark instead */
 223             l = wctomb(tmp, L'?');
 224         }
 225
 226         tmp[l] = '\0';
 227         if ((ptr = mpdm_poke(ptr, s, tmp, l, 1)) == NULL)
 228             break;
 229
 230         str++;
 231     }
 232
 233     /* null terminate and count one less */
 234     if (ptr != NULL) {
 235         ptr = mpdm_poke(ptr, s, "", 1, 1);
 236         (*s)--;
 237     }
 238
 239     return ptr;
 240 }
 241
 242
 243 mpdm_t mpdm_new_wcs(int flags, const wchar_t * str, int size, int cpy)
 244 /* creates a new string value from a wcs */
 245 {
 246     wchar_t *ptr;
 247
 248     /* a size of -1 means 'calculate it' */
 249     if (size == -1 && str != NULL)
 250         size = wcslen(str);
 251
 252     /* create a copy? */
 253     if (cpy) {
 254         /* free() on destruction */
 255         flags |= MPDM_FREE;
 256
 257         /* allocs */
 258         if ((ptr = malloc((size + 1) * sizeof(wchar_t))) == NULL)
 259             return NULL;
 260
 261         /* if no source, reset to zeroes; otherwise, copy */
 262         if (str == NULL)
 263             memset(ptr, '\0', size * sizeof(wchar_t));
 264         else {
 265             wcsncpy(ptr, str, size);
 266             ptr[size] = L'\0';
 267         }
 268     }
 269     else
 270         ptr = (wchar_t *) str;
 271
 272     /* it's a string */
 273     flags |= MPDM_STRING;
 274
 275     return mpdm_new(flags, ptr, size);
 276 }
 277
 278
 279 mpdm_t mpdm_new_mbstowcs(int flags, const char *str, int l)
 280 /* creates a new string value from an mbs */
 281 {
 282     wchar_t *ptr;
 283     int size;
 284
 285     if ((ptr = mpdm_mbstowcs(str, &size, l)) == NULL)
 286         return NULL;
 287
 288     /* it's a string */
 289     flags |= (MPDM_STRING | MPDM_FREE);
 290
 291     return mpdm_new(flags, ptr, size);
 292 }
 293
 294
 295 mpdm_t mpdm_new_wcstombs(int flags, const wchar_t * str)
 296 /* creates a new mbs value from a wbs */
 297 {
 298     char *ptr;
 299     int size;
 300
 301     ptr = mpdm_wcstombs(str, &size);
 302
 303     flags |= MPDM_FREE;
 304
 305     /* unset the string flag; mbs,s are not 'strings' */
 306     flags &= ~MPDM_STRING;
 307
 308     return mpdm_new(flags, ptr, size);
 309 }
 310
 311
 312 mpdm_t mpdm_new_i(int ival)
 313 /* creates a new string value from an integer */
 314 {
 315     mpdm_t v;
 316
 317     /* create a string value, but without the 'string' */
 318     v = mpdm_new(MPDM_STRING | MPDM_FREE, NULL, 0);
 319     return mpdm_set_ival(v, ival);
 320 }
 321
 322
 323 mpdm_t mpdm_new_r(double rval)
 324 /* creates a new string value from a real number */
 325 {
 326     mpdm_t v;
 327
 328     /* create a string value, but without the 'string' */
 329     v = mpdm_new(MPDM_STRING | MPDM_FREE, NULL, 0);
 330     return mpdm_set_rval(v, rval);
 331 }
 332
 333
 334 /* interface */
 335
 336 /**
 337  * mpdm_string2 - Returns a printable representation of a value (with buffer).
 338  * @v: the value
 339  * @wtmp: the external buffer
 340  *
 341  * Returns a printable representation of a value. For strings, it's
 342  * the value data itself; for any other type, a conversion to string
 343  * is returned instead. If @v is not a string, the @wtmp buffer
 344  * can be used as a placeholder for the string representation.
 345  *
 346  * The reference count value in @v is not touched.
 347  * [Strings]
 348  */
 349 wchar_t *mpdm_string2(const mpdm_t v, wchar_t *wtmp)
 350 {
 351     char tmp[32];
 352     wchar_t *ret;
 353
 354     /* if it's NULL, return a constant */
 355     if (v == NULL)
 356         ret = L"[NULL]";
 357     else
 358         /* if it's a string, return it */
 359     if (v->flags & MPDM_STRING) {
 360
 361         if (v->data == NULL) {
 362             char tmp[128] = "";
 363
 364             /* string but no data? most probably a 'lazy' number */
 365             if (v->flags & MPDM_RVAL) {
 366                 char *prev_locale = setlocale(LC_NUMERIC, "C");
 367
 368                 /* creates the visual representation */
 369                 snprintf(tmp, sizeof(tmp), "%lf", v->rval);
 370
 371                 setlocale(LC_NUMERIC, prev_locale);
 372
 373                 /* manually strip useless zeroes */
 374                 if (strchr(tmp, '.') != NULL) {
 375                     char *ptr;
 376
 377                     for (ptr = tmp + strlen(tmp) - 1; *ptr == '0'; ptr--);
 378
 379                     /* if it's over the ., strip it also */
 380                     if (*ptr != '.')
 381                         ptr++;
 382
 383                     *ptr = '\0';
 384                 }
 385             }
 386             else
 387             if (v->flags & MPDM_IVAL) {
 388                 /* creates the visual representation */
 389                 snprintf(tmp, sizeof(tmp), "%d", v->ival);
 390             }
 391
 392             v->data = (void *)mpdm_mbstowcs(tmp, &v->size, -1);
 393         }
 394
 395         ret = (wchar_t *) v->data;
 396     }
 397     else {
 398         /* otherwise, return a visual representation */
 399         snprintf(tmp, sizeof(tmp), "%p", v);
 400         mbstowcs(wtmp, tmp, sizeof(tmp) * sizeof(wchar_t));
 401
 402         ret = wtmp;
 403     }
 404
 405     return ret;
 406 }
 407
 408
 409 /**
 410  * mpdm_string - Returns a printable representation of a value.
 411  * @v: the value
 412  *
 413  * Returns a printable representation of a value. For strings, it's
 414  * the value data itself; for any other type, a conversion to string
 415  * is returned instead. This value should be used immediately, as it
 416  * can be a pointer to a static buffer.
 417  *
 418  * The reference count value in @v is not touched.
 419  * [Strings]
 420  */
 421 wchar_t *mpdm_string(const mpdm_t v)
 422 {
 423     static wchar_t tmp[32];
 424
 425     return mpdm_string2(v, tmp);
 426 }
 427
 428
 429 /**
 430  * mpdm_cmp - Compares two values.
 431  * @v1: the first value
 432  * @v2: the second value
 433  *
 434  * Compares two values. If both has the MPDM_STRING flag set,
 435  * a comparison using wcscoll() is returned; if both are arrays,
 436  * the size is compared first and, if they have the same number
 437  * elements, each one is compared; otherwise, a simple visual
 438  * representation comparison is done.
 439  * [Strings]
 440  */
 441 int mpdm_cmp(const mpdm_t v1, const mpdm_t v2)
 442 {
 443     int r;
 444
 445     mpdm_ref(v1);
 446     mpdm_ref(v2);
 447
 448     /* same values? */
 449     if (v1 == v2)
 450         r = 0;
 451     else
 452         /* is any value NULL? */
 453     if (v1 == NULL)
 454         r = -1;
 455     else
 456     if (v2 == NULL)
 457         r = 1;
 458     else
 459     if (MPDM_IS_ARRAY(v1) && MPDM_IS_ARRAY(v2)) {
 460         /* compare first the sizes */
 461         if ((r = mpdm_size(v1) - mpdm_size(v2)) == 0) {
 462             int n;
 463
 464             /* they have the same size;
 465                compare each pair of elements */
 466             for (n = 0; n < mpdm_size(v1); n++) {
 467                 if ((r = mpdm_cmp(mpdm_aget(v1, n),
 468                                   mpdm_aget(v2, n))) != 0)
 469                     break;
 470             }
 471         }
 472     }
 473     else {
 474         wchar_t tmp[32];
 475
 476         r = wcscoll(mpdm_string(v1), mpdm_string2(v2, tmp));
 477     }
 478
 479     mpdm_unref(v2);
 480     mpdm_unref(v1);
 481
 482     return r;
 483 }
 484
 485
 486 /**
 487  * mpdm_cmp_s - Compares two values (string version).
 488  * @v1: the first value
 489  * @v2: the second value
 490  *
 491  * Compares two values. If both has the MPDM_STRING flag set,
 492  * a comparison using wcscoll() is returned; if both are arrays,
 493  * the size is compared first and, if they have the same number
 494  * elements, each one is compared; otherwise, a simple visual
 495  * representation comparison is done.
 496  */
 497 int mpdm_cmp_s(const mpdm_t v1, const wchar_t * v2)
 498 {
 499     return mpdm_cmp(v1, MPDM_AS(v2));
 500 }
 501
 502
 503 /**
 504  * mpdm_splice - Creates a new string value from another.
 505  * @v: the original value
 506  * @i: the value to be inserted
 507  * @offset: offset where the substring is to be inserted
 508  * @del: number of characters to delete
 509  *
 510  * Creates a new string value from @v, deleting @del chars at @offset
 511  * and substituting them by @i. If @del is 0, no deletion is done.
 512  * both @offset and @del can be negative; if this is the case, it's
 513  * assumed as counting from the end of @v. If @v is NULL, @i will become
 514  * the new string, and both @offset and @del will be ignored. If @v is
 515  * not NULL and @i is, no insertion process is done (only deletion, if
 516  * applicable).
 517  *
 518  * Returns a two element array, with the new string in the first
 519  * element and the deleted string in the second (with a NULL value
 520  * if @del is 0).
 521  * [Strings]
 522  */
 523 mpdm_t mpdm_splice(const mpdm_t v, const mpdm_t i, int offset, int del)
 524 {
 525     mpdm_t w;
 526     mpdm_t n = NULL;
 527     mpdm_t d = NULL;
 528     int os, ns, r;
 529     int ins = 0;
 530     wchar_t *ptr;
 531
 532     mpdm_ref(v);
 533     mpdm_ref(i);
 534
 535     if (v != NULL) {
 536         os = mpdm_size(v);
 537
 538         /* negative offsets start from the end */
 539         if (offset < 0)
 540             offset = os + 1 - offset;
 541
 542         /* never add further the end */
 543         if (offset > os)
 544             offset = os;
 545
 546         /* negative del counts as 'characters left' */
 547         if (del < 0)
 548             del = os + 1 - offset + del;
 549
 550         /* something to delete? */
 551         if (del > 0) {
 552             /* never delete further the end */
 553             if (offset + del > os)
 554                 del = os - offset;
 555
 556             /* deleted string */
 557             d = MPDM_NS(((wchar_t *) v->data) + offset, del);
 558         }
 559         else
 560             del = 0;
 561
 562         /* something to insert? */
 563         ins = mpdm_size(i);
 564
 565         /* new size and remainder */
 566         ns = os + ins - del;
 567         r = offset + del;
 568
 569         n = MPDM_NS(NULL, ns);
 570
 571         ptr = (wchar_t *) n->data;
 572
 573         /* copy the beginning */
 574         if (offset > 0) {
 575             wcsncpy(ptr, v->data, offset);
 576             ptr += offset;
 577         }
 578
 579         /* copy the text to be inserted */
 580         if (ins > 0) {
 581             wcsncpy(ptr, i->data, ins);
 582             ptr += ins;
 583         }
 584
 585         /* copy the remaining */
 586         os -= r;
 587         if (os > 0) {
 588             wcsncpy(ptr, ((wchar_t *) v->data) + r, os);
 589             ptr += os;
 590         }
 591
 592         /* null terminate */
 593         *ptr = L'\0';
 594     }
 595     else
 596         n = i;
 597
 598     /* creates the output array */
 599     w = MPDM_A(2);
 600
 601     mpdm_ref(w);
 602     mpdm_aset(w, n, 0);
 603     mpdm_aset(w, d, 1);
 604     mpdm_unrefnd(w);
 605
 606     mpdm_unref(i);
 607     mpdm_unref(v);
 608
 609     return w;
 610 }
 611
 612
 613 /**
 614  * mpdm_strcat_sn - Concatenates two strings (string with size version).
 615  * @s1: the first string
 616  * @s2: the second string
 617  * @size: the size of the second string
 618  *
 619  * Returns a new string formed by the concatenation of @s1 and @s2.
 620  * [Strings]
 621  */
 622 mpdm_t mpdm_strcat_sn(const mpdm_t s1, const wchar_t * s2, int size)
 623 {
 624     wchar_t *ptr = NULL;
 625     int s = 0;
 626     mpdm_t r;
 627
 628     if (s1 == NULL && s2 == NULL)
 629         r = NULL;
 630     else {
 631         ptr = mpdm_pokev(ptr, &s, s1);
 632         ptr = mpdm_pokewsn(ptr, &s, s2, size);
 633
 634         ptr = mpdm_poke(ptr, &s, L"", 1, sizeof(wchar_t));
 635         r = MPDM_ENS(ptr, s - 1);
 636     }
 637
 638     return r;
 639 }
 640
 641
 642 /**
 643  * mpdm_strcat_s - Concatenates two strings (string version).
 644  * @s1: the first string
 645  * @s2: the second string
 646  *
 647  * Returns a new string formed by the concatenation of @s1 and @s2.
 648  * [Strings]
 649  */
 650 mpdm_t mpdm_strcat_s(const mpdm_t s1, const wchar_t * s2)
 651 {
 652     return mpdm_strcat_sn(s1, s2, s2 ? wcslen(s2) : 0);
 653 }
 654
 655
 656 /**
 657  * mpdm_strcat - Concatenates two strings.
 658  * @s1: the first string
 659  * @s2: the second string
 660  *
 661  * Returns a new string formed by the concatenation of @s1 and @s2.
 662  * [Strings]
 663  */
 664 mpdm_t mpdm_strcat(const mpdm_t s1, const mpdm_t s2)
 665 {
 666     mpdm_t r;
 667
 668     mpdm_ref(s2);
 669     r = mpdm_strcat_s(s1, s2 ? mpdm_string(s2) : NULL);
 670     mpdm_unref(s2);
 671
 672     return r;
 673 }
 674
 675
 676 /**
 677  * mpdm_ival - Returns a value's data as an integer.
 678  * @v: the value
 679  *
 680  * Returns a value's data as an integer. If the value is a string,
 681  * it's converted via sscanf and returned; non-string values have all
 682  * an ival of 0. The converted integer is cached, so costly string
 683  * conversions are only done once. Values created with the MPDM_IVAL
 684  * flag set have its ival cached from the beginning.
 685  * [Strings]
 686  * [Value Management]
 687  */
 688 int mpdm_ival(mpdm_t v)
 689 {
 690     int i = 0;
 691
 692     mpdm_ref(v);
 693
 694     if (v != NULL) {
 695         /* if there is no cached integer, calculate it */
 696         if (!(v->flags & MPDM_IVAL)) {
 697             /* does it have an rval? */
 698             if (v->flags & MPDM_RVAL)
 699                 i = (int) v->rval;
 700             else
 701             /* if it's a string, calculate it; other
 702                values will have an ival of 0 */
 703             if (v->flags & MPDM_STRING) {
 704                 char tmp[32];
 705                 char *fmt = "%i";
 706
 707                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 708                 tmp[sizeof(tmp) - 1] = '\0';
 709
 710                 /* workaround for mingw32: as it doesn't
 711                    correctly parse octal and hexadecimal
 712                    numbers, they are tried as special cases */
 713                 if (tmp[0] == '0') {
 714                     if (tmp[1] == 'b' || tmp[1] == 'B') {
 715                         /* binary number */
 716                         fmt = NULL;
 717                         char *ptr = &tmp[2];
 718
 719                         while (*ptr == '0' || *ptr == '1') {
 720                             i <<= 1;
 721
 722                             if (*ptr == '1')
 723                                 i |= 1;
 724
 725                             ptr++;
 726                         }
 727                     }
 728                     else
 729                     if (tmp[1] == 'x' || tmp[1] == 'X')
 730                         fmt = "%x";
 731                     else
 732                         fmt = "%o";
 733                 }
 734
 735                 if (fmt != NULL)
 736                     sscanf(tmp, fmt, &i);
 737             }
 738
 739             mpdm_set_ival(v, i);
 740         }
 741
 742         i = v->ival;
 743     }
 744
 745     mpdm_unref(v);
 746
 747     return i;
 748 }
 749
 750
 751 /**
 752  * mpdm_rval - Returns a value's data as a real number (double).
 753  * @v: the value
 754  *
 755  * Returns a value's data as a real number (double float). If the value
 756  * is a string, it's converted via sscanf and returned; non-string values
 757  * have all an rval of 0. The converted double is cached, so costly string
 758  * conversions are only done once. Values created with the MPDM_RVAL
 759  * flag set have its rval cached from the beginning.
 760  * [Strings]
 761  * [Value Management]
 762  */
 763 double mpdm_rval(mpdm_t v)
 764 {
 765     double r = 0.0;
 766
 767     mpdm_ref(v);
 768
 769     if (v != NULL) {
 770         /* if there is no cached double, calculate it */
 771         if (!(v->flags & MPDM_RVAL)) {
 772             /* does it have in ival? */
 773             if (v->flags & MPDM_IVAL)
 774                 r = (double) v->ival;
 775             else
 776             /* if it's a string, calculate it; other
 777                values will have an rval of 0.0 */
 778             if (v->flags & MPDM_STRING) {
 779                 char tmp[128];
 780                 char *prev_locale;
 781
 782                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 783                 tmp[sizeof(tmp) - 1] = '\0';
 784
 785                 /* if the number starts with 0, it's
 786                    an octal or hexadecimal number; just
 787                    take the integer value and cast it */
 788                 if (tmp[0] == '0' && tmp[1] != '.')
 789                     r = (double) mpdm_ival(v);
 790                 else {
 791                     /* set locale to C for non locale-dependent
 792                        floating point conversion */
 793                     prev_locale = setlocale(LC_NUMERIC, "C");
 794
 795                     /* read */
 796                     sscanf(tmp, "%lf", &r);
 797
 798                     /* set previous locale */
 799                     setlocale(LC_NUMERIC, prev_locale);
 800                 }
 801             }
 802
 803             mpdm_set_rval(v, r);
 804         }
 805
 806         r = v->rval;
 807     }
 808
 809     mpdm_unref(v);
 810
 811     return r;
 812 }
 813
 814
 815 /**
 816  * mpdm_gettext - Translates a string to the current language.
 817  * @str: the string
 818  *
 819  * Translates the @str string to the current language.
 820  *
 821  * This function can still be used even if there is no real gettext
 822  * support() by manually filling the __I18N__ hash.
 823  *
 824  * If the string is found in the current table, the translation is
 825  * returned; otherwise, the same @str value is returned.
 826  * [Strings]
 827  * [Localization]
 828  */
 829 mpdm_t mpdm_gettext(const mpdm_t str)
 830 {
 831     mpdm_t v;
 832     mpdm_t i18n = NULL;
 833
 834     /* gets the cache */
 835     if ((i18n = mpdm_hget_s(mpdm_root(), L"__I18N__")) == NULL)
 836         i18n = mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 837
 838     mpdm_ref(str);
 839
 840     /* try first the cache */
 841     if ((v = mpdm_hget(i18n, str)) == NULL) {
 842 #ifdef CONFOPT_GETTEXT
 843         char *s;
 844         mpdm_t t;
 845
 846         /* convert to mbs */
 847         t = mpdm_ref(MPDM_2MBS(str->data));
 848
 849         /* ask gettext for it */
 850         s = gettext((char *) t->data);
 851
 852         if (s != t->data)
 853             v = MPDM_MBS(s);
 854         else
 855             v = str;
 856
 857         mpdm_unref(t);
 858
 859 #else                           /* CONFOPT_GETTEXT */
 860
 861         v = str;
 862
 863 #endif                          /* CONFOPT_GETTEXT */
 864
 865         /* store in the cache */
 866         mpdm_hset(i18n, str, v);
 867     }
 868
 869     mpdm_unref(str);
 870
 871     return v;
 872 }
 873
 874
 875 /**
 876  * mpdm_gettext_domain - Sets domain and data directory for translations.
 877  * @dom: the domain (application name)
 878  * @data: directory contaning the .mo files
 879  *
 880  * Sets the domain (application name) and translation data for translating
 881  * strings that will be returned by mpdm_gettext().@data must point to a
 882  * directory containing the .mo (compiled .po) files.
 883  *
 884  * If there is no gettext support, returns 0, or 1 otherwise.
 885  * [Strings]
 886  * [Localization]
 887  */
 888 int mpdm_gettext_domain(const mpdm_t dom, const mpdm_t data)
 889 {
 890     int ret = 0;
 891
 892     mpdm_ref(dom);
 893     mpdm_ref(data);
 894
 895 #ifdef CONFOPT_GETTEXT
 896
 897     mpdm_t dm;
 898     mpdm_t dt;
 899
 900     /* convert both to mbs,s */
 901     dm = mpdm_ref(MPDM_2MBS(dom->data));
 902     dt = mpdm_ref(MPDM_2MBS(data->data));
 903
 904     /* bind and set domain */
 905     bindtextdomain((char *) dm->data, (char *) dt->data);
 906     textdomain((char *) dm->data);
 907
 908     mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 909
 910     mpdm_unref(dt);
 911     mpdm_unref(dm);
 912
 913     ret = 1;
 914
 915 #endif                          /* CONFOPT_GETTEXT */
 916
 917 #ifdef CONFOPT_WIN32
 918
 919     mpdm_t v;
 920
 921     if ((v = mpdm_hget_s(mpdm_root(), L"ENV")) != NULL &&
 922         mpdm_hget_s(v, L"LANG") == NULL) {
 923         wchar_t *wptr = L"en";
 924
 925         /* MS Windows crappy language constants... */
 926
 927         switch ((GetSystemDefaultLangID() & 0x00ff)) {
 928         case 0x01:
 929             wptr = L"ar";
 930             break;              /* arabic */
 931         case 0x02:
 932             wptr = L"bg";
 933             break;              /* bulgarian */
 934         case 0x03:
 935             wptr = L"ca";
 936             break;              /* catalan */
 937         case 0x04:
 938             wptr = L"zh";
 939             break;              /* chinese */
 940         case 0x05:
 941             wptr = L"cz";
 942             break;              /* czech */
 943         case 0x06:
 944             wptr = L"da";
 945             break;              /* danish */
 946         case 0x07:
 947             wptr = L"de";
 948             break;              /* german */
 949         case 0x08:
 950             wptr = L"el";
 951             break;              /* greek */
 952         case 0x09:
 953             wptr = L"en";
 954             break;              /* english */
 955         case 0x0a:
 956             wptr = L"es";
 957             break;              /* spanish */
 958         case 0x0b:
 959             wptr = L"fi";
 960             break;              /* finnish */
 961         case 0x0c:
 962             wptr = L"fr";
 963             break;              /* french */
 964         case 0x0d:
 965             wptr = L"he";
 966             break;              /* hebrew */
 967         case 0x0e:
 968             wptr = L"hu";
 969             break;              /* hungarian */
 970         case 0x0f:
 971             wptr = L"is";
 972             break;              /* icelandic */
 973         case 0x10:
 974             wptr = L"it";
 975             break;              /* italian */
 976         case 0x11:
 977             wptr = L"jp";
 978             break;              /* japanese */
 979         case 0x12:
 980             wptr = L"ko";
 981             break;              /* korean */
 982         case 0x13:
 983             wptr = L"nl";
 984             break;              /* dutch */
 985         case 0x14:
 986             wptr = L"no";
 987             break;              /* norwegian */
 988         case 0x15:
 989             wptr = L"po";
 990             break;              /* polish */
 991         case 0x16:
 992             wptr = L"pt";
 993             break;              /* portuguese */
 994         case 0x17:
 995             wptr = L"rm";
 996             break;              /* romansh (switzerland) */
 997         case 0x18:
 998             wptr = L"ro";
 999             break;              /* romanian */
1000         case 0x19:
1001             wptr = L"ru";
1002             break;              /* russian */
1003         case 0x1a:
1004             wptr = L"sr";
1005             break;              /* serbian */
1006         case 0x1b:
1007             wptr = L"sk";
1008             break;              /* slovak */
1009         case 0x1c:
1010             wptr = L"sq";
1011             break;              /* albanian */
1012         case 0x1d:
1013             wptr = L"sv";
1014             break;              /* swedish */
1015         }
1016
1017         mpdm_hset_s(v, L"LANG", MPDM_S(wptr));
1018     }
1019
1020 #endif                          /* CONFOPT_WIN32 */
1021
1022     mpdm_unref(data);
1023     mpdm_unref(dom);
1024
1025     return ret;
1026 }
1027
1028
1029 #ifdef CONFOPT_WCWIDTH
1030
1031 int wcwidth(wchar_t);
1032
1033 int mpdm_wcwidth(wchar_t c)
1034 {
1035     return wcwidth(c);
1036 }
1037
1038 #else                           /* CONFOPT_WCWIDTH */
1039
1040 #include "wcwidth.c"
1041
1042 int mpdm_wcwidth(wchar_t c)
1043 {
1044     return mk_wcwidth(c);
1045 }
1046
1047 #endif                          /* CONFOPT_WCWIDTH */
1048
1049
1050 /**
1051  * mpdm_sprintf - Formats a sprintf()-like string.
1052  * @fmt: the string format
1053  * @args: an array of values
1054  *
1055  * Formats a string using the sprintf() format taking the values from @args.
1056  * [Strings]
1057  */
1058 mpdm_t mpdm_sprintf(const mpdm_t fmt, const mpdm_t args)
1059 {
1060     const wchar_t *i = fmt->data;
1061     wchar_t *o = NULL;
1062     int l = 0, n = 0;
1063     wchar_t c;
1064
1065     mpdm_ref(fmt);
1066     mpdm_ref(args);
1067
1068     /* loop all characters */
1069     while ((c = *i++) != L'\0') {
1070         int m = 0;
1071         wchar_t *tptr = NULL;
1072         wchar_t *wptr = NULL;
1073
1074         if (c == L'%') {
1075             /* format directive */
1076             char t_fmt[128];
1077             char tmp[1024];
1078             mpdm_t v;
1079             char *ptr = NULL;
1080
1081             /* transfer the % */
1082             t_fmt[m++] = '%';
1083
1084             /* transform the format to mbs */
1085             while (*i != L'\0' &&
1086                    m < (int) (sizeof(t_fmt) - MB_CUR_MAX - 1) &&
1087                    wcschr(L"-.0123456789", *i) != NULL)
1088                 m += wctomb(&t_fmt[m], *i++);
1089
1090             /* transfer the directive */
1091             m += wctomb(&t_fmt[m], *i++);
1092
1093             t_fmt[m] = '\0';
1094
1095             /* by default, copies the format */
1096             strcpy(tmp, t_fmt);
1097
1098             /* pick next value */
1099             v = mpdm_aget(args, n++);
1100
1101             switch (t_fmt[m - 1]) {
1102             case 'd':
1103             case 'i':
1104             case 'u':
1105             case 'x':
1106             case 'X':
1107             case 'o':
1108
1109                 /* integer value */
1110                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_ival(v));
1111                 break;
1112
1113             case 'f':
1114
1115                 /* float (real) value */
1116                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_rval(v));
1117                 break;
1118
1119             case 's':
1120
1121                 /* string value */
1122                 ptr = mpdm_wcstombs(mpdm_string(v), NULL);
1123                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, ptr);
1124                 free(ptr);
1125
1126                 break;
1127
1128             case 'c':
1129
1130                 /* char */
1131                 m = 1;
1132                 wptr = &c;
1133                 c = mpdm_ival(v);
1134                 break;
1135
1136             case 'b':
1137
1138                 ptr = tmp;
1139                 unsigned int mask;
1140                 int p = 0;
1141
1142                 mask = 1 << ((sizeof(int) * 8) - 1);
1143                 while (mask) {
1144                     if (mask & (unsigned int) mpdm_ival(v)) {
1145                         *ptr++ = '1';
1146                         p = 1;
1147                     }
1148                     else
1149                     if (p)
1150                         *ptr++ = '0';
1151
1152                     mask >>= 1;
1153                 }
1154
1155                 if (ptr == tmp)
1156                     *ptr++ = '0';
1157
1158                 *ptr = '\0';
1159                 break;
1160
1161             case '%':
1162
1163                 /* percent sign */
1164                 m = 1;
1165                 wptr = &c;
1166                 break;
1167             }
1168
1169             /* transfer */
1170             if (wptr == NULL)
1171                 wptr = tptr = mpdm_mbstowcs(tmp, &m, -1);
1172         }
1173         else {
1174             /* raw character */
1175             m = 1;
1176             wptr = &c;
1177         }
1178
1179         /* transfer */
1180         o = mpdm_poke(o, &l, wptr, m, sizeof(wchar_t));
1181
1182         /* free the temporary buffer, if any */
1183         if (tptr != NULL)
1184             free(tptr);
1185     }
1186
1187     if (o == NULL)
1188         return NULL;
1189
1190     /* null-terminate */
1191     o = mpdm_poke(o, &l, L"", 1, sizeof(wchar_t));
1192
1193     mpdm_unref(args);
1194     mpdm_unref(fmt);
1195
1196     return MPDM_ENS(o, l - 1);
1197 }
1198
1199
1200 /**
1201  * mpdm_ulc - Converts a string to uppercase or lowecase.
1202  * @s: the string
1203  * @u: convert to uppercase (1) or to lowercase (0).
1204  *
1205  * Converts @s to uppercase (for @u == 1) or to lowercase (@u == 0).
1206  * [Strings]
1207  */
1208 mpdm_t mpdm_ulc(const mpdm_t s, int u)
1209 {
1210     mpdm_t r = NULL;
1211     wchar_t *optr;
1212     int i;
1213
1214     mpdm_ref(s);
1215
1216     i = mpdm_size(s);
1217
1218     if ((optr = malloc((i + 1) * sizeof(wchar_t))) != NULL) {
1219         wchar_t *iptr = mpdm_string(s);
1220         int n;
1221
1222         for (n = 0; n < i; n++)
1223             optr[n] = u ? towupper(iptr[n]) : towlower(iptr[n]);
1224
1225         optr[n] = L'\0';
1226         r = MPDM_ENS(optr, i);
1227     }
1228
1229     mpdm_unref(s);
1230
1231     return r;
1232 }
1233
1234
1235 /* scanf working buffers */
1236 #define SCANF_BUF_SIZE 1024
1237 static wchar_t scanf_yset[SCANF_BUF_SIZE];
1238 static wchar_t scanf_nset[SCANF_BUF_SIZE];
1239 static wchar_t scanf_mark[SCANF_BUF_SIZE];
1240
1241 struct {
1242     wchar_t cmd;
1243     wchar_t *yset;
1244     wchar_t *nset;
1245 } scanf_sets[] = {
1246     { L's',  L"",                         L" \t"},
1247     { L'u',  L"0123456789",               L""},
1248     { L'd',  L"-0123456789",              L""},
1249     { L'i',  L"-0123456789",              L""},
1250     { L'f',  L"-0123456789.",             L""},
1251     { L'x',  L"-0123456789xabcdefABCDEF", L""},
1252     { L'\0', NULL,                        NULL},
1253 };
1254
1255 /**
1256  * mpdm_sscanf - Extracts data like sscanf().
1257  * @str: the string to be parsed
1258  * @fmt: the string format
1259  * @offset: the character offset to start scanning
1260  *
1261  * Extracts data from a string using a special format pattern, very
1262  * much like the scanf() series of functions in the C library. Apart
1263  * from the standard percent-sign-commands (s, u, d, i, f, x,
1264  * n, [, with optional size and * to ignore), it implements S,
1265  * to match a string of characters upto what follows in the format
1266  * string. Also, the [ set of characters can include other % formats.
1267  *
1268  * Returns an array with the extracted values. If %n is used, the
1269  * position in the scanned string is returned as the value.
1270  * [Strings]
1271  */
1272 mpdm_t mpdm_sscanf(const mpdm_t str, const mpdm_t fmt, int offset)
1273 {
1274     wchar_t *i = (wchar_t *) str->data;
1275     wchar_t *f = (wchar_t *) fmt->data;
1276     mpdm_t r;
1277
1278     mpdm_ref(fmt);
1279     mpdm_ref(str);
1280
1281     i += offset;
1282     r = MPDM_A(0);
1283     mpdm_ref(r);
1284
1285     while (*f) {
1286         if (*f == L'%') {
1287             wchar_t *ptr = NULL;
1288             int size = 0;
1289             wchar_t cmd;
1290             int vsize = 0;
1291             int ignore = 0;
1292             int msize = 0;
1293
1294             /* empty all buffers */
1295             scanf_yset[0] = scanf_nset[0] = scanf_mark[0] = L'\0';
1296
1297             f++;
1298
1299             /* an asterisk? don't return next value */
1300             if (*f == L'*') {
1301                 ignore = 1;
1302                 f++;
1303             }
1304
1305             /* does it have a size? */
1306             while (wcschr(L"0123456789", *f)) {
1307                 vsize *= 10;
1308                 vsize += *f - L'0';
1309                 f++;
1310             }
1311
1312             /* if no size, set it to an arbitrary big limit */
1313             if (!vsize)
1314                 vsize = 0xfffffff;
1315
1316             /* now *f should contain a command */
1317             cmd = *f;
1318             f++;
1319
1320             /* is it a verbatim percent sign? */
1321             if (cmd == L'%') {
1322                 vsize = 1;
1323                 ignore = 1;
1324                 wcscpy(scanf_yset, L"%");
1325             }
1326             else
1327                 /* a position? */
1328             if (cmd == L'n') {
1329                 vsize = 0;
1330                 ignore = 1;
1331                 mpdm_push(r, MPDM_I(i - (wchar_t *) str->data));
1332             }
1333             else
1334                 /* string upto a mark */
1335             if (cmd == L'S') {
1336                 wchar_t *tmp = f;
1337
1338                 /* fill the mark upto another command */
1339                 while (*tmp) {
1340                     if (*tmp == L'%') {
1341                         tmp++;
1342
1343                         /* is it an 'n'? ignore and go on */
1344                         if (*tmp == L'n') {
1345                             tmp++;
1346                             continue;
1347                         }
1348                         else
1349                         if (*tmp == L'%')
1350                             scanf_mark[msize++] = *tmp;
1351                         else
1352                             break;
1353                     }
1354                     else
1355                         scanf_mark[msize++] = *tmp;
1356
1357                     tmp++;
1358                 }
1359
1360                 scanf_mark[msize] = L'\0';
1361             }
1362             else
1363                 /* raw set */
1364             if (cmd == L'[') {
1365                 int n = 0;
1366                 wchar_t *set = scanf_yset;
1367
1368                 /* is it an inverse set? */
1369                 if (*f == L'^') {
1370                     set = scanf_nset;
1371                     f++;
1372                 }
1373
1374                 /* first one is a ]? add it */
1375                 if (*f == L']') {
1376                     set[n++] = *f;
1377                     f++;
1378                 }
1379
1380                 /* now build the set */
1381                 for (; n < SCANF_BUF_SIZE - 1 && *f && *f != L']'; f++) {
1382                     /* is it a range? */
1383                     if (*f == L'-') {
1384                         f++;
1385
1386                         /* start or end? hyphen itself */
1387                         if (n == 0 || *f == L']')
1388                             set[n++] = L'-';
1389                         else {
1390                             /* pick previous char */
1391                             wchar_t c = set[n - 1];
1392
1393                             /* fill */
1394                             while (n < SCANF_BUF_SIZE - 1 && c < *f)
1395                                 set[n++] = ++c;
1396                         }
1397                     }
1398                     else
1399                         /* is it another command? */
1400                     if (*f == L'%') {
1401                         int i;
1402
1403                         f++;
1404                         for (i = 0; scanf_sets[i].cmd; i++) {
1405                             if (*f == scanf_sets[i].cmd) {
1406                                 set[n] = L'\0';
1407                                 wcscat(set, scanf_sets[i].yset);
1408                                 n += wcslen(scanf_sets[i].yset);
1409                                 break;
1410                             }
1411                         }
1412                     }
1413                     else
1414                         set[n++] = *f;
1415                 }
1416
1417                 /* skip the ] */
1418                 f++;
1419
1420                 set[n] = L'\0';
1421             }
1422             else
1423                 /* a standard set? */
1424             {
1425                 int n;
1426
1427                 for (n = 0; scanf_sets[n].cmd != L'\0'; n++) {
1428                     if (cmd == scanf_sets[n].cmd) {
1429                         wcscpy(scanf_yset, scanf_sets[n].yset);
1430                         wcscpy(scanf_nset, scanf_sets[n].nset);
1431                         break;
1432                     }
1433                 }
1434             }
1435
1436             /* now fill the dynamic string */
1437             while (vsize &&
1438                    !wcschr(scanf_nset, *i) &&
1439                    (scanf_yset[0] == L'\0' || wcschr(scanf_yset, *i)) &&
1440                    (msize == 0 || wcsncmp(i, scanf_mark, msize) != 0)) {
1441
1442                 /* only add if not being ignored */
1443                 if (!ignore)
1444                     ptr = mpdm_poke(ptr, &size, i, 1, sizeof(wchar_t));
1445
1446                 i++;
1447                 vsize--;
1448             }
1449
1450             if (!ignore && size) {
1451                 /* null terminate and push */
1452                 ptr = mpdm_poke(ptr, &size, L"", 1, sizeof(wchar_t));
1453                 mpdm_push(r, MPDM_ENS(ptr, size - 1));
1454             }
1455         }
1456         else
1457         if (*f == L' ' || *f == L'\t') {
1458             /* if it's a blank, sync to next non-blank */
1459             f++;
1460
1461             while (*i == L' ' || *i == L'\t')
1462                 i++;
1463         }
1464         else
1465             /* test for literals in the format string */
1466         if (*i == *f) {
1467             i++;
1468             f++;
1469         }
1470         else
1471             break;
1472     }
1473
1474     mpdm_unref(str);
1475     mpdm_unref(fmt);
1476
1477     mpdm_unrefnd(r);
1478
1479     return r;
1480 }
1481
1482
1483 /**
1484  * mpdm_tr - Transliterates a string.
1485  * @str: the strnig
1486  * @s1: characters to be changed
1487  * @s2: characters to replace those in s1
1488  *
1489  * Creates a copy of @str, which will have all characters in @s1
1490  * replaced by those in @s2 matching their position.
1491  */
1492 mpdm_t mpdm_tr(mpdm_t str, mpdm_t s1, mpdm_t s2)
1493 {
1494     mpdm_t r;
1495     wchar_t *ptr;
1496     wchar_t *cs1;
1497     wchar_t *cs2;
1498     wchar_t c;
1499
1500     mpdm_ref(str);
1501     mpdm_ref(s1);
1502     mpdm_ref(s2);
1503
1504     /* create a copy of the string */
1505     r = MPDM_NS((wchar_t *)str->data, mpdm_size(str));
1506     mpdm_ref(r);
1507
1508     ptr = (wchar_t *)r->data;
1509     cs1 = (wchar_t *)s1->data;
1510     cs2 = (wchar_t *)s2->data;
1511
1512     while ((c = *ptr) != L'\0') {
1513         int n;
1514
1515         for (n = 0; cs1[n] != '\0'; n++) {
1516             if (c == cs1[n]) {
1517                 *ptr = cs2[n];
1518                 break;
1519             }
1520         }
1521
1522         ptr++;
1523     }
1524
1525     mpdm_unrefnd(r);
1526     mpdm_unref(s2);
1527     mpdm_unref(s1);
1528     mpdm_unref(str);
1529
1530     return r;
1531 }