mpdm_s.c

   1 /*
   2
   3     MPDM - Minimum Profit Data Manager
   4     Copyright (C) 2003/2010 Angel Ortega <angel@triptico.com>
   5
   6     mpdm_s.c - String management
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License
  10     as published by the Free Software Foundation; either version 2
  11     of the License, or (at your option) any later version.
  12
  13     This program is distributed in the hope that it will be useful,
  14     but WITHOUT ANY WARRANTY; without even the implied warranty of
  15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16     GNU General Public License for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with this program; if not, write to the Free Software
  20     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  21
  22     http://www.triptico.com
  23
  24 */
  25
  26 #include "config.h"
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32 #include <locale.h>
  33 #include <wctype.h>
  34
  35 #ifdef CONFOPT_GETTEXT
  36 #include <libintl.h>
  37 #endif
  38
  39 #ifdef CONFOPT_WIN32
  40 #include <windows.h>
  41 #endif
  42
  43 #include "mpdm.h"
  44
  45
  46 /** code **/
  47
  48 void *mpdm_poke_o(void *dst, int *dsize, int *offset, const void *org,
  49                   int osize, int esize)
  50 {
  51     if (org != NULL && osize) {
  52         /* enough room? */
  53         if (*offset + osize > *dsize) {
  54             /* no; enlarge */
  55             *dsize += osize;
  56
  57             dst = realloc(dst, *dsize * esize);
  58         }
  59
  60         memcpy((char *) dst + (*offset * esize), org, osize * esize);
  61         *offset += osize;
  62     }
  63
  64     return dst;
  65 }
  66
  67
  68 void *mpdm_poke(void *dst, int *dsize, const void *org, int osize,
  69                 int esize)
  70 /* pokes (adds) org into dst, which is a dynamic string, making it grow */
  71 {
  72     int offset = *dsize;
  73
  74     return mpdm_poke_o(dst, dsize, &offset, org, osize, esize);
  75 }
  76
  77
  78 wchar_t *mpdm_pokewsn(wchar_t * dst, int *dsize, const wchar_t * str,
  79                       int slen)
  80 /* adds a wide string to dst using mpdm_poke() with size */
  81 {
  82     if (str)
  83         dst = mpdm_poke(dst, dsize, str, slen, sizeof(wchar_t));
  84
  85     return dst;
  86 }
  87
  88
  89 wchar_t *mpdm_pokews(wchar_t * dst, int *dsize, const wchar_t * str)
  90 /* adds a wide string to dst using mpdm_poke() */
  91 {
  92     if (str)
  93         dst = mpdm_pokewsn(dst, dsize, str, wcslen(str));
  94
  95     return dst;
  96 }
  97
  98
  99 wchar_t *mpdm_pokev(wchar_t * dst, int *dsize, const mpdm_t v)
 100 /* adds the string in v to dst using mpdm_poke() */
 101 {
 102     if (v != NULL) {
 103         const wchar_t *ptr = mpdm_string(v);
 104
 105         mpdm_ref(v);
 106         dst = mpdm_pokews(dst, dsize, ptr);
 107         mpdm_unref(v);
 108     }
 109
 110     return dst;
 111 }
 112
 113
 114 wchar_t *mpdm_mbstowcs(const char *str, int *s, int l)
 115 /* converts an mbs to a wcs, but filling invalid chars
 116    with question marks instead of just failing */
 117 {
 118     wchar_t *ptr = NULL;
 119     char tmp[64];               /* really MB_CUR_MAX + 1 */
 120     wchar_t wc;
 121     int n, i, c, t = 0;
 122     char *cstr;
 123
 124     /* allow NULL values for s */
 125     if (s == NULL)
 126         s = &t;
 127
 128     /* if there is a limit, duplicate and break the string */
 129     if (l >= 0) {
 130         cstr = strdup(str);
 131         cstr[l] = '\0';
 132     }
 133     else
 134         cstr = (char *) str;
 135
 136     /* try first a direct conversion with mbstowcs */
 137     if ((*s = mbstowcs(NULL, cstr, 0)) != -1) {
 138         /* direct conversion is possible; do it */
 139         if ((ptr = malloc((*s + 1) * sizeof(wchar_t))) != NULL) {
 140             mbstowcs(ptr, cstr, *s);
 141             ptr[*s] = L'\0';
 142         }
 143     }
 144     else {
 145         /* zero everything */
 146         *s = n = i = 0;
 147
 148         for (;;) {
 149             /* no more characters to process? */
 150             if ((c = cstr[n + i]) == '\0' && i == 0)
 151                 break;
 152
 153             tmp[i++] = c;
 154             tmp[i] = '\0';
 155
 156             /* try to convert */
 157             if (mbstowcs(&wc, tmp, 1) == (size_t) - 1) {
 158                 /* can still be an incomplete multibyte char? */
 159                 if (c != '\0' && i <= (int) MB_CUR_MAX)
 160                     continue;
 161                 else {
 162                     /* too many failing bytes; skip 1 byte */
 163                     wc = L'?';
 164                     i = 1;
 165                 }
 166             }
 167
 168             /* skip used bytes and back again */
 169             n += i;
 170             i = 0;
 171
 172             /* store new char */
 173             if ((ptr = mpdm_poke(ptr, s, &wc, 1, sizeof(wchar_t))) == NULL)
 174                 break;
 175         }
 176
 177         /* null terminate and count one less */
 178         if (ptr != NULL) {
 179             ptr = mpdm_poke(ptr, s, L"", 1, sizeof(wchar_t));
 180             (*s)--;
 181         }
 182     }
 183
 184     /* free the duplicate */
 185     if (cstr != str)
 186         free(cstr);
 187
 188     return ptr;
 189 }
 190
 191
 192 char *mpdm_wcstombs(const wchar_t * str, int *s)
 193 /* converts a wcs to an mbs, but filling invalid chars
 194    with question marks instead of just failing */
 195 {
 196     char *ptr = NULL;
 197     char tmp[64];               /* really MB_CUR_MAX + 1 */
 198     int l, t = 0;
 199
 200     /* allow NULL values for s */
 201     if (s == NULL)
 202         s = &t;
 203
 204     /* try first a direct conversion with wcstombs */
 205     if ((*s = wcstombs(NULL, str, 0)) != -1) {
 206         /* direct conversion is possible; do it and return */
 207         if ((ptr = malloc(*s + 1)) != NULL) {
 208             wcstombs(ptr, str, *s);
 209             ptr[*s] = '\0';
 210         }
 211
 212         return ptr;
 213     }
 214
 215     /* invalid encoding? convert characters one by one */
 216     *s = 0;
 217
 218     while (*str) {
 219         if ((l = wctomb(tmp, *str)) <= 0) {
 220             /* if char couldn't be converted,
 221                write a question mark instead */
 222             l = wctomb(tmp, L'?');
 223         }
 224
 225         tmp[l] = '\0';
 226         if ((ptr = mpdm_poke(ptr, s, tmp, l, 1)) == NULL)
 227             break;
 228
 229         str++;
 230     }
 231
 232     /* null terminate and count one less */
 233     if (ptr != NULL) {
 234         ptr = mpdm_poke(ptr, s, "", 1, 1);
 235         (*s)--;
 236     }
 237
 238     return ptr;
 239 }
 240
 241
 242 mpdm_t mpdm_new_wcs(int flags, const wchar_t * str, int size, int cpy)
 243 /* creates a new string value from a wcs */
 244 {
 245     wchar_t *ptr;
 246
 247     /* a size of -1 means 'calculate it' */
 248     if (size == -1 && str != NULL)
 249         size = wcslen(str);
 250
 251     /* create a copy? */
 252     if (cpy) {
 253         /* free() on destruction */
 254         flags |= MPDM_FREE;
 255
 256         /* allocs */
 257         if ((ptr = malloc((size + 1) * sizeof(wchar_t))) == NULL)
 258             return NULL;
 259
 260         /* if no source, reset to zeroes; otherwise, copy */
 261         if (str == NULL)
 262             memset(ptr, '\0', size * sizeof(wchar_t));
 263         else {
 264             wcsncpy(ptr, str, size);
 265             ptr[size] = L'\0';
 266         }
 267     }
 268     else
 269         ptr = (wchar_t *) str;
 270
 271     /* it's a string */
 272     flags |= MPDM_STRING;
 273
 274     return mpdm_new(flags, ptr, size);
 275 }
 276
 277
 278 mpdm_t mpdm_new_mbstowcs(int flags, const char *str, int l)
 279 /* creates a new string value from an mbs */
 280 {
 281     wchar_t *ptr;
 282     int size;
 283
 284     if ((ptr = mpdm_mbstowcs(str, &size, l)) == NULL)
 285         return NULL;
 286
 287     /* it's a string */
 288     flags |= (MPDM_STRING | MPDM_FREE);
 289
 290     return mpdm_new(flags, ptr, size);
 291 }
 292
 293
 294 mpdm_t mpdm_new_wcstombs(int flags, const wchar_t * str)
 295 /* creates a new mbs value from a wbs */
 296 {
 297     char *ptr;
 298     int size;
 299
 300     ptr = mpdm_wcstombs(str, &size);
 301
 302     flags |= MPDM_FREE;
 303
 304     /* unset the string flag; mbs,s are not 'strings' */
 305     flags &= ~MPDM_STRING;
 306
 307     return mpdm_new(flags, ptr, size);
 308 }
 309
 310
 311 mpdm_t mpdm_new_i(int ival)
 312 /* creates a new string value from an integer */
 313 {
 314     mpdm_t v;
 315     char tmp[32];
 316
 317     /* creates the visual representation */
 318     snprintf(tmp, sizeof(tmp), "%d", ival);
 319
 320     v = MPDM_MBS(tmp);
 321
 322     return mpdm_set_ival(v, ival);
 323 }
 324
 325
 326 mpdm_t mpdm_new_r(double rval)
 327 /* creates a new string value from a real number */
 328 {
 329     mpdm_t v;
 330     char tmp[128];
 331
 332     /* creates the visual representation */
 333     snprintf(tmp, sizeof(tmp), "%lf", rval);
 334
 335     /* manually strip useless zeroes */
 336     if (strchr(tmp, '.') != NULL) {
 337         char *ptr;
 338
 339         for (ptr = tmp + strlen(tmp) - 1; *ptr == '0'; ptr--);
 340
 341         /* if it's over the ., strip it also */
 342         if (*ptr != '.')
 343             ptr++;
 344
 345         *ptr = '\0';
 346     }
 347
 348     v = MPDM_MBS(tmp);
 349
 350     return mpdm_set_rval(v, rval);
 351 }
 352
 353
 354 /* interface */
 355
 356 /**
 357  * mpdm_string2 - Returns a printable representation of a value (with buffer).
 358  * @v: the value
 359  * @wtmp: the external buffer
 360  *
 361  * Returns a printable representation of a value. For strings, it's
 362  * the value data itself; for any other type, a conversion to string
 363  * is returned instead. If @v is not a string, the @wtmp buffer
 364  * can be used as a placeholder for the string representation.
 365  *
 366  * The reference count value in @v is not touched.
 367  * [Strings]
 368  */
 369 wchar_t *mpdm_string2(const mpdm_t v, wchar_t *wtmp)
 370 {
 371     char tmp[32];
 372     wchar_t *ret;
 373
 374     /* if it's NULL, return a constant */
 375     if (v == NULL)
 376         ret = L"[NULL]";
 377     else
 378         /* if it's a string, return it */
 379     if (v->flags & MPDM_STRING)
 380         ret = (wchar_t *) v->data;
 381     else {
 382         /* otherwise, return a visual representation */
 383         snprintf(tmp, sizeof(tmp), "%p", v);
 384         mbstowcs(wtmp, tmp, sizeof(wtmp));
 385
 386         ret = wtmp;
 387     }
 388
 389     return ret;
 390 }
 391
 392
 393 /**
 394  * mpdm_string - Returns a printable representation of a value.
 395  * @v: the value
 396  *
 397  * Returns a printable representation of a value. For strings, it's
 398  * the value data itself; for any other type, a conversion to string
 399  * is returned instead. This value should be used immediately, as it
 400  * can be a pointer to a static buffer.
 401  *
 402  * The reference count value in @v is not touched.
 403  * [Strings]
 404  */
 405 wchar_t *mpdm_string(const mpdm_t v)
 406 {
 407     static wchar_t tmp[32];
 408
 409     return mpdm_string2(v, tmp);
 410 }
 411
 412
 413 /**
 414  * mpdm_cmp - Compares two values.
 415  * @v1: the first value
 416  * @v2: the second value
 417  *
 418  * Compares two values. If both has the MPDM_STRING flag set,
 419  * a comparison using wcscoll() is returned; if both are arrays,
 420  * the size is compared first and, if they have the same number
 421  * elements, each one is compared; otherwise, a simple pointer
 422  * comparison is done.
 423  * [Strings]
 424  */
 425 int mpdm_cmp(const mpdm_t v1, const mpdm_t v2)
 426 {
 427     int r;
 428
 429     mpdm_ref(v1);
 430     mpdm_ref(v2);
 431
 432     /* same values? */
 433     if (v1 == v2)
 434         r = 0;
 435     else
 436         /* is any value NULL? */
 437     if (v1 == NULL)
 438         r = -1;
 439     else
 440     if (v2 == NULL)
 441         r = 1;
 442     else
 443         /* different values, but same content? (unlikely) */
 444     if (v1->data == v2->data)
 445         r = 0;
 446     else
 447     if (MPDM_IS_STRING(v1) && MPDM_IS_STRING(v2))
 448         r = wcscoll((wchar_t *) v1->data, (wchar_t *) v2->data);
 449     else
 450     if (MPDM_IS_ARRAY(v1) && MPDM_IS_ARRAY(v2)) {
 451         /* compare first the sizes */
 452         if ((r = mpdm_size(v1) - mpdm_size(v2)) == 0) {
 453             int n;
 454
 455             /* they have the same size;
 456                compare each pair of elements */
 457             for (n = 0; n < mpdm_size(v1); n++) {
 458                 if ((r = mpdm_cmp(mpdm_aget(v1, n),
 459                                   mpdm_aget(v2, n))) != 0)
 460                     break;
 461             }
 462         }
 463     }
 464     else {
 465         wchar_t tmp[32];
 466
 467         r = wcscoll(mpdm_string(v1), mpdm_string2(v2, tmp));
 468     }
 469
 470     mpdm_unref(v2);
 471     mpdm_unref(v1);
 472
 473     return r;
 474 }
 475
 476
 477 /**
 478  * mpdm_cmp_s - Compares two values (string version).
 479  * @v1: the first value
 480  * @v2: the second value
 481  *
 482  * Compares two values. Compares both values using wcscoll()
 483  * if the first one is a string, or returns 1 otherwise.
 484  */
 485 int mpdm_cmp_s(const mpdm_t v1, const wchar_t * v2)
 486 {
 487     int r;
 488
 489     mpdm_ref(v1);
 490
 491     if (MPDM_IS_STRING(v1))
 492         r = wcscoll((wchar_t *) v1->data, v2);
 493     else
 494         r = (int) ((wchar_t *) v1->data - v2);
 495
 496     mpdm_unref(v1);
 497
 498     return r;
 499 }
 500
 501
 502 /**
 503  * mpdm_splice - Creates a new string value from another.
 504  * @v: the original value
 505  * @i: the value to be inserted
 506  * @offset: offset where the substring is to be inserted
 507  * @del: number of characters to delete
 508  *
 509  * Creates a new string value from @v, deleting @del chars at @offset
 510  * and substituting them by @i. If @del is 0, no deletion is done.
 511  * both @offset and @del can be negative; if this is the case, it's
 512  * assumed as counting from the end of @v. If @v is NULL, @i will become
 513  * the new string, and both @offset and @del will be ignored. If @v is
 514  * not NULL and @i is, no insertion process is done (only deletion, if
 515  * applicable).
 516  *
 517  * Returns a two element array, with the new string in the first
 518  * element and the deleted string in the second (with a NULL value
 519  * if @del is 0).
 520  * [Strings]
 521  */
 522 mpdm_t mpdm_splice(const mpdm_t v, const mpdm_t i, int offset, int del)
 523 {
 524     mpdm_t w;
 525     mpdm_t n = NULL;
 526     mpdm_t d = NULL;
 527     int os, ns, r;
 528     int ins = 0;
 529     wchar_t *ptr;
 530
 531     mpdm_ref(v);
 532     mpdm_ref(i);
 533
 534     if (v != NULL) {
 535         os = mpdm_size(v);
 536
 537         /* negative offsets start from the end */
 538         if (offset < 0)
 539             offset = os + 1 - offset;
 540
 541         /* never add further the end */
 542         if (offset > os)
 543             offset = os;
 544
 545         /* negative del counts as 'characters left' */
 546         if (del < 0)
 547             del = os + 1 - offset + del;
 548
 549         /* something to delete? */
 550         if (del > 0) {
 551             /* never delete further the end */
 552             if (offset + del > os)
 553                 del = os - offset;
 554
 555             /* deleted string */
 556             d = MPDM_NS(((wchar_t *) v->data) + offset, del);
 557         }
 558         else
 559             del = 0;
 560
 561         /* something to insert? */
 562         ins = mpdm_size(i);
 563
 564         /* new size and remainder */
 565         ns = os + ins - del;
 566         r = offset + del;
 567
 568         n = MPDM_NS(NULL, ns);
 569
 570         ptr = (wchar_t *) n->data;
 571
 572         /* copy the beginning */
 573         if (offset > 0) {
 574             wcsncpy(ptr, v->data, offset);
 575             ptr += offset;
 576         }
 577
 578         /* copy the text to be inserted */
 579         if (ins > 0) {
 580             wcsncpy(ptr, i->data, ins);
 581             ptr += ins;
 582         }
 583
 584         /* copy the remaining */
 585         os -= r;
 586         if (os > 0) {
 587             wcsncpy(ptr, ((wchar_t *) v->data) + r, os);
 588             ptr += os;
 589         }
 590
 591         /* null terminate */
 592         *ptr = L'\0';
 593     }
 594     else
 595         n = i;
 596
 597     /* creates the output array */
 598     w = MPDM_A(2);
 599
 600     mpdm_ref(w);
 601     mpdm_aset(w, n, 0);
 602     mpdm_aset(w, d, 1);
 603     mpdm_unrefnd(w);
 604
 605     mpdm_unref(i);
 606     mpdm_unref(v);
 607
 608     return w;
 609 }
 610
 611
 612 /**
 613  * mpdm_strcat_sn - Concatenates two strings (string with size version).
 614  * @s1: the first string
 615  * @s2: the second string
 616  * @size: the size of the second string
 617  *
 618  * Returns a new string formed by the concatenation of @s1 and @s2.
 619  * [Strings]
 620  */
 621 mpdm_t mpdm_strcat_sn(const mpdm_t s1, const wchar_t * s2, int size)
 622 {
 623     wchar_t *ptr = NULL;
 624     int s = 0;
 625     mpdm_t r;
 626
 627     if (s1 == NULL && s2 == NULL)
 628         r = NULL;
 629     else {
 630         ptr = mpdm_pokev(ptr, &s, s1);
 631         ptr = mpdm_pokewsn(ptr, &s, s2, size);
 632
 633         ptr = mpdm_poke(ptr, &s, L"", 1, sizeof(wchar_t));
 634         r = MPDM_ENS(ptr, s - 1);
 635     }
 636
 637     return r;
 638 }
 639
 640
 641 /**
 642  * mpdm_strcat_s - Concatenates two strings (string version).
 643  * @s1: the first string
 644  * @s2: the second string
 645  *
 646  * Returns a new string formed by the concatenation of @s1 and @s2.
 647  * [Strings]
 648  */
 649 mpdm_t mpdm_strcat_s(const mpdm_t s1, const wchar_t * s2)
 650 {
 651     return mpdm_strcat_sn(s1, s2, s2 ? wcslen(s2) : 0);
 652 }
 653
 654
 655 /**
 656  * mpdm_strcat - Concatenates two strings.
 657  * @s1: the first string
 658  * @s2: the second string
 659  *
 660  * Returns a new string formed by the concatenation of @s1 and @s2.
 661  * [Strings]
 662  */
 663 mpdm_t mpdm_strcat(const mpdm_t s1, const mpdm_t s2)
 664 {
 665     mpdm_t r;
 666
 667     mpdm_ref(s2);
 668     r = mpdm_strcat_s(s1, s2 ? mpdm_string(s2) : NULL);
 669     mpdm_unref(s2);
 670
 671     return r;
 672 }
 673
 674
 675 /**
 676  * mpdm_ival - Returns a value's data as an integer.
 677  * @v: the value
 678  *
 679  * Returns a value's data as an integer. If the value is a string,
 680  * it's converted via sscanf and returned; non-string values have all
 681  * an ival of 0. The converted integer is cached, so costly string
 682  * conversions are only done once. Values created with the MPDM_IVAL
 683  * flag set have its ival cached from the beginning.
 684  * [Strings]
 685  * [Value Management]
 686  */
 687 int mpdm_ival(mpdm_t v)
 688 {
 689     int i = 0;
 690
 691     mpdm_ref(v);
 692
 693     if (v != NULL) {
 694         /* if there is no cached integer, calculate it */
 695         if (!(v->flags & MPDM_IVAL)) {
 696             /* if it's a string, calculate it; other
 697                values will have an ival of 0 */
 698             if (v->flags & MPDM_STRING) {
 699                 char tmp[32];
 700                 char *fmt = "%i";
 701
 702                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 703                 tmp[sizeof(tmp) - 1] = '\0';
 704
 705                 /* workaround for mingw32: as it doesn't
 706                    correctly parse octal and hexadecimal
 707                    numbers, they are tried as special cases */
 708                 if (tmp[0] == '0') {
 709                     if (tmp[1] == 'b' || tmp[1] == 'B') {
 710                         /* binary number */
 711                         fmt = NULL;
 712                         char *ptr = &tmp[2];
 713
 714                         while (*ptr == '0' || *ptr == '1') {
 715                             i <<= 1;
 716
 717                             if (*ptr == '1')
 718                                 i |= 1;
 719
 720                             ptr++;
 721                         }
 722                     }
 723                     else
 724                     if (tmp[1] == 'x' || tmp[1] == 'X')
 725                         fmt = "%x";
 726                     else
 727                         fmt = "%o";
 728                 }
 729
 730                 if (fmt != NULL)
 731                     sscanf(tmp, fmt, &i);
 732             }
 733
 734             mpdm_set_ival(v, i);
 735         }
 736
 737         i = v->ival;
 738     }
 739
 740     mpdm_unref(v);
 741
 742     return i;
 743 }
 744
 745
 746 /**
 747  * mpdm_rval - Returns a value's data as a real number (double).
 748  * @v: the value
 749  *
 750  * Returns a value's data as a real number (double float). If the value
 751  * is a string, it's converted via sscanf and returned; non-string values
 752  * have all an rval of 0. The converted double is cached, so costly string
 753  * conversions are only done once. Values created with the MPDM_RVAL
 754  * flag set have its rval cached from the beginning.
 755  * [Strings]
 756  * [Value Management]
 757  */
 758 double mpdm_rval(mpdm_t v)
 759 {
 760     double r = 0.0;
 761
 762     mpdm_ref(v);
 763
 764     if (v != NULL) {
 765         /* if there is no cached double, calculate it */
 766         if (!(v->flags & MPDM_RVAL)) {
 767             /* if it's a string, calculate it; other
 768                values will have an rval of 0.0 */
 769             if (v->flags & MPDM_STRING) {
 770                 char tmp[128];
 771                 char *prev_locale;
 772
 773                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 774                 tmp[sizeof(tmp) - 1] = '\0';
 775
 776                 /* if the number starts with 0, it's
 777                    an octal or hexadecimal number; just
 778                    take the integer value and cast it */
 779                 if (tmp[0] == '0' && tmp[1] != '.')
 780                     r = (double) mpdm_ival(v);
 781                 else {
 782                     /* set locale to C for non locale-dependent
 783                        floating point conversion */
 784                     prev_locale = setlocale(LC_NUMERIC, "C");
 785
 786                     /* read */
 787                     sscanf(tmp, "%lf", &r);
 788
 789                     /* set previous locale */
 790                     setlocale(LC_NUMERIC, prev_locale);
 791                 }
 792             }
 793
 794             mpdm_set_rval(v, r);
 795         }
 796
 797         r = v->rval;
 798     }
 799
 800     mpdm_unref(v);
 801
 802     return r;
 803 }
 804
 805
 806 /**
 807  * mpdm_gettext - Translates a string to the current language.
 808  * @str: the string
 809  *
 810  * Translates the @str string to the current language.
 811  *
 812  * This function can still be used even if there is no real gettext
 813  * support() by manually filling the __I18N__ hash.
 814  *
 815  * If the string is found in the current table, the translation is
 816  * returned; otherwise, the same @str value is returned.
 817  * [Strings]
 818  * [Localization]
 819  */
 820 mpdm_t mpdm_gettext(const mpdm_t str)
 821 {
 822     mpdm_t v;
 823     mpdm_t i18n = NULL;
 824
 825     /* gets the cache */
 826     if ((i18n = mpdm_hget_s(mpdm_root(), L"__I18N__")) == NULL)
 827         i18n = mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 828
 829     mpdm_ref(str);
 830
 831     /* try first the cache */
 832     if ((v = mpdm_hget(i18n, str)) == NULL) {
 833 #ifdef CONFOPT_GETTEXT
 834         char *s;
 835         mpdm_t t;
 836
 837         /* convert to mbs */
 838         t = mpdm_ref(MPDM_2MBS(str->data));
 839
 840         /* ask gettext for it */
 841         s = gettext((char *) t->data);
 842
 843         if (s != t->data)
 844             v = MPDM_MBS(s);
 845         else
 846             v = str;
 847
 848         mpdm_unref(t);
 849
 850 #else                           /* CONFOPT_GETTEXT */
 851
 852         v = str;
 853
 854 #endif                          /* CONFOPT_GETTEXT */
 855
 856         /* store in the cache */
 857         mpdm_hset(i18n, str, v);
 858     }
 859
 860     mpdm_unref(str);
 861
 862     return v;
 863 }
 864
 865
 866 /**
 867  * mpdm_gettext_domain - Sets domain and data directory for translations.
 868  * @dom: the domain (application name)
 869  * @data: directory contaning the .mo files
 870  *
 871  * Sets the domain (application name) and translation data for translating
 872  * strings that will be returned by mpdm_gettext().@data must point to a
 873  * directory containing the .mo (compiled .po) files.
 874  *
 875  * If there is no gettext support, returns 0, or 1 otherwise.
 876  * [Strings]
 877  * [Localization]
 878  */
 879 int mpdm_gettext_domain(const mpdm_t dom, const mpdm_t data)
 880 {
 881     int ret = 0;
 882
 883     mpdm_ref(dom);
 884     mpdm_ref(data);
 885
 886 #ifdef CONFOPT_GETTEXT
 887
 888     mpdm_t dm;
 889     mpdm_t dt;
 890
 891     /* convert both to mbs,s */
 892     dm = mpdm_ref(MPDM_2MBS(dom->data));
 893     dt = mpdm_ref(MPDM_2MBS(data->data));
 894
 895     /* bind and set domain */
 896     bindtextdomain((char *) dm->data, (char *) dt->data);
 897     textdomain((char *) dm->data);
 898
 899     mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 900
 901     mpdm_unref(dt);
 902     mpdm_unref(dm);
 903
 904     ret = 1;
 905
 906 #endif                          /* CONFOPT_GETTEXT */
 907
 908 #ifdef CONFOPT_WIN32
 909
 910     mpdm_t v;
 911
 912     if ((v = mpdm_hget_s(mpdm_root(), L"ENV")) != NULL &&
 913         mpdm_hget_s(v, L"LANG") == NULL) {
 914         wchar_t *wptr = L"en";
 915
 916         /* MS Windows crappy language constants... */
 917
 918         switch ((GetSystemDefaultLangID() & 0x00ff)) {
 919         case 0x01:
 920             wptr = L"ar";
 921             break;              /* arabic */
 922         case 0x02:
 923             wptr = L"bg";
 924             break;              /* bulgarian */
 925         case 0x03:
 926             wptr = L"ca";
 927             break;              /* catalan */
 928         case 0x04:
 929             wptr = L"zh";
 930             break;              /* chinese */
 931         case 0x05:
 932             wptr = L"cz";
 933             break;              /* czech */
 934         case 0x06:
 935             wptr = L"da";
 936             break;              /* danish */
 937         case 0x07:
 938             wptr = L"de";
 939             break;              /* german */
 940         case 0x08:
 941             wptr = L"el";
 942             break;              /* greek */
 943         case 0x09:
 944             wptr = L"en";
 945             break;              /* english */
 946         case 0x0a:
 947             wptr = L"es";
 948             break;              /* spanish */
 949         case 0x0b:
 950             wptr = L"fi";
 951             break;              /* finnish */
 952         case 0x0c:
 953             wptr = L"fr";
 954             break;              /* french */
 955         case 0x0d:
 956             wptr = L"he";
 957             break;              /* hebrew */
 958         case 0x0e:
 959             wptr = L"hu";
 960             break;              /* hungarian */
 961         case 0x0f:
 962             wptr = L"is";
 963             break;              /* icelandic */
 964         case 0x10:
 965             wptr = L"it";
 966             break;              /* italian */
 967         case 0x11:
 968             wptr = L"jp";
 969             break;              /* japanese */
 970         case 0x12:
 971             wptr = L"ko";
 972             break;              /* korean */
 973         case 0x13:
 974             wptr = L"nl";
 975             break;              /* dutch */
 976         case 0x14:
 977             wptr = L"no";
 978             break;              /* norwegian */
 979         case 0x15:
 980             wptr = L"po";
 981             break;              /* polish */
 982         case 0x16:
 983             wptr = L"pt";
 984             break;              /* portuguese */
 985         case 0x17:
 986             wptr = L"rm";
 987             break;              /* romansh (switzerland) */
 988         case 0x18:
 989             wptr = L"ro";
 990             break;              /* romanian */
 991         case 0x19:
 992             wptr = L"ru";
 993             break;              /* russian */
 994         case 0x1a:
 995             wptr = L"sr";
 996             break;              /* serbian */
 997         case 0x1b:
 998             wptr = L"sk";
 999             break;              /* slovak */
1000         case 0x1c:
1001             wptr = L"sq";
1002             break;              /* albanian */
1003         case 0x1d:
1004             wptr = L"sv";
1005             break;              /* swedish */
1006         }
1007
1008         mpdm_hset_s(v, L"LANG", MPDM_S(wptr));
1009     }
1010
1011 #endif                          /* CONFOPT_WIN32 */
1012
1013     mpdm_unref(data);
1014     mpdm_unref(dom);
1015
1016     return ret;
1017 }
1018
1019
1020 #ifdef CONFOPT_WCWIDTH
1021
1022 int wcwidth(wchar_t);
1023
1024 int mpdm_wcwidth(wchar_t c)
1025 {
1026     return wcwidth(c);
1027 }
1028
1029 #else                           /* CONFOPT_WCWIDTH */
1030
1031 #include "wcwidth.c"
1032
1033 int mpdm_wcwidth(wchar_t c)
1034 {
1035     return mk_wcwidth(c);
1036 }
1037
1038 #endif                          /* CONFOPT_WCWIDTH */
1039
1040
1041 /**
1042  * mpdm_sprintf - Formats a sprintf()-like string.
1043  * @fmt: the string format
1044  * @args: an array of values
1045  *
1046  * Formats a string using the sprintf() format taking the values from @args.
1047  * [Strings]
1048  */
1049 mpdm_t mpdm_sprintf(const mpdm_t fmt, const mpdm_t args)
1050 {
1051     const wchar_t *i = fmt->data;
1052     wchar_t *o = NULL;
1053     int l = 0, n = 0;
1054     wchar_t c;
1055
1056     mpdm_ref(fmt);
1057     mpdm_ref(args);
1058
1059     /* loop all characters */
1060     while ((c = *i++) != L'\0') {
1061         int m = 0;
1062         wchar_t *tptr = NULL;
1063         wchar_t *wptr = NULL;
1064
1065         if (c == L'%') {
1066             /* format directive */
1067             char t_fmt[128];
1068             char tmp[1024];
1069             mpdm_t v;
1070             char *ptr = NULL;
1071
1072             /* transfer the % */
1073             t_fmt[m++] = '%';
1074
1075             /* transform the format to mbs */
1076             while (*i != L'\0' &&
1077                    m < (int) (sizeof(t_fmt) - MB_CUR_MAX - 1) &&
1078                    wcschr(L"-.0123456789", *i) != NULL)
1079                 m += wctomb(&t_fmt[m], *i++);
1080
1081             /* transfer the directive */
1082             m += wctomb(&t_fmt[m], *i++);
1083
1084             t_fmt[m] = '\0';
1085
1086             /* by default, copies the format */
1087             strcpy(tmp, t_fmt);
1088
1089             /* pick next value */
1090             v = mpdm_aget(args, n++);
1091
1092             switch (t_fmt[m - 1]) {
1093             case 'd':
1094             case 'i':
1095             case 'u':
1096             case 'x':
1097             case 'X':
1098             case 'o':
1099
1100                 /* integer value */
1101                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_ival(v));
1102                 break;
1103
1104             case 'f':
1105
1106                 /* float (real) value */
1107                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_rval(v));
1108                 break;
1109
1110             case 's':
1111
1112                 /* string value */
1113                 ptr = mpdm_wcstombs(mpdm_string(v), NULL);
1114                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, ptr);
1115                 free(ptr);
1116
1117                 break;
1118
1119             case 'c':
1120
1121                 /* char */
1122                 m = 1;
1123                 wptr = &c;
1124                 c = mpdm_ival(v);
1125                 break;
1126
1127             case 'b':
1128
1129                 ptr = tmp;
1130                 unsigned int mask;
1131                 int p = 0;
1132
1133                 mask = 1 << ((sizeof(int) * 8) - 1);
1134                 while (mask) {
1135                     if (mask & (unsigned int) mpdm_ival(v)) {
1136                         *ptr++ = '1';
1137                         p = 1;
1138                     }
1139                     else
1140                     if (p)
1141                         *ptr++ = '0';
1142
1143                     mask >>= 1;
1144                 }
1145
1146                 if (ptr == tmp)
1147                     *ptr++ = '0';
1148
1149                 *ptr = '\0';
1150                 break;
1151
1152             case '%':
1153
1154                 /* percent sign */
1155                 m = 1;
1156                 wptr = &c;
1157                 break;
1158             }
1159
1160             /* transfer */
1161             if (wptr == NULL)
1162                 wptr = tptr = mpdm_mbstowcs(tmp, &m, -1);
1163         }
1164         else {
1165             /* raw character */
1166             m = 1;
1167             wptr = &c;
1168         }
1169
1170         /* transfer */
1171         o = mpdm_poke(o, &l, wptr, m, sizeof(wchar_t));
1172
1173         /* free the temporary buffer, if any */
1174         if (tptr != NULL)
1175             free(tptr);
1176     }
1177
1178     if (o == NULL)
1179         return NULL;
1180
1181     /* null-terminate */
1182     o = mpdm_poke(o, &l, L"", 1, sizeof(wchar_t));
1183
1184     mpdm_unref(args);
1185     mpdm_unref(fmt);
1186
1187     return MPDM_ENS(o, l - 1);
1188 }
1189
1190
1191 /**
1192  * mpdm_ulc - Converts a string to uppercase or lowecase.
1193  * @s: the string
1194  * @u: convert to uppercase (1) or to lowercase (0).
1195  *
1196  * Converts @s to uppercase (for @u == 1) or to lowercase (@u == 0).
1197  * [Strings]
1198  */
1199 mpdm_t mpdm_ulc(const mpdm_t s, int u)
1200 {
1201     mpdm_t r = NULL;
1202     wchar_t *optr;
1203     int i;
1204
1205     mpdm_ref(s);
1206
1207     i = mpdm_size(s);
1208
1209     if ((optr = malloc((i + 1) * sizeof(wchar_t))) != NULL) {
1210         wchar_t *iptr = mpdm_string(s);
1211         int n;
1212
1213         for (n = 0; n < i; n++)
1214             optr[n] = u ? towupper(iptr[n]) : towlower(iptr[n]);
1215
1216         optr[n] = L'\0';
1217         r = MPDM_ENS(optr, i);
1218     }
1219
1220     mpdm_unref(s);
1221
1222     return r;
1223 }
1224
1225
1226 /* scanf working buffers */
1227 #define SCANF_BUF_SIZE 1024
1228 static wchar_t scanf_yset[SCANF_BUF_SIZE];
1229 static wchar_t scanf_nset[SCANF_BUF_SIZE];
1230 static wchar_t scanf_mark[SCANF_BUF_SIZE];
1231
1232 struct {
1233     wchar_t cmd;
1234     wchar_t *yset;
1235     wchar_t *nset;
1236 } scanf_sets[] = {
1237     { L's',  L"",                         L" \t"},
1238     { L'u',  L"0123456789",               L""},
1239     { L'd',  L"-0123456789",              L""},
1240     { L'i',  L"-0123456789",              L""},
1241     { L'f',  L"-0123456789.",             L""},
1242     { L'x',  L"-0123456789xabcdefABCDEF", L""},
1243     { L'\0', NULL,                        NULL},
1244 };
1245
1246 /**
1247  * mpdm_sscanf - Extracts data like sscanf().
1248  * @fmt: the string format
1249  * @str: the string to be parsed
1250  * @offset: the character offset to start scanning
1251  *
1252  * Extracts data from a string using a special format pattern, very
1253  * much like the scanf() series of functions in the C library. Apart
1254  * from the standard percent-sign-commands (s, u, d, i, f, x,
1255  * n, [, with optional size and * to ignore), it implements S,
1256  * to match a string of characters upto what follows in the format
1257  * string. Also, the [ set of characters can include other % formats.
1258  *
1259  * Returns an array with the extracted values. If %n is used, the
1260  * position in the scanned string is returned as the value.
1261  * [Strings]
1262  */
1263 mpdm_t mpdm_sscanf(const mpdm_t fmt, const mpdm_t str, int offset)
1264 {
1265     wchar_t *i = (wchar_t *) str->data;
1266     wchar_t *f = (wchar_t *) fmt->data;
1267     mpdm_t r;
1268
1269     mpdm_ref(fmt);
1270     mpdm_ref(str);
1271
1272     i += offset;
1273     r = MPDM_A(0);
1274     mpdm_ref(r);
1275
1276     while (*f) {
1277         if (*f == L'%') {
1278             wchar_t *ptr = NULL;
1279             int size = 0;
1280             wchar_t cmd;
1281             int vsize = 0;
1282             int ignore = 0;
1283             int msize = 0;
1284
1285             /* empty all buffers */
1286             scanf_yset[0] = scanf_nset[0] = scanf_mark[0] = L'\0';
1287
1288             f++;
1289
1290             /* an asterisk? don't return next value */
1291             if (*f == L'*') {
1292                 ignore = 1;
1293                 f++;
1294             }
1295
1296             /* does it have a size? */
1297             while (wcschr(L"0123456789", *f)) {
1298                 vsize *= 10;
1299                 vsize += *f - L'0';
1300                 f++;
1301             }
1302
1303             /* if no size, set it to an arbitrary big limit */
1304             if (!vsize)
1305                 vsize = 0xfffffff;
1306
1307             /* now *f should contain a command */
1308             cmd = *f;
1309             f++;
1310
1311             /* is it a verbatim percent sign? */
1312             if (cmd == L'%') {
1313                 vsize = 1;
1314                 ignore = 1;
1315                 wcscpy(scanf_yset, L"%");
1316             }
1317             else
1318                 /* a position? */
1319             if (cmd == L'n') {
1320                 vsize = 0;
1321                 ignore = 1;
1322                 mpdm_push(r, MPDM_I(i - (wchar_t *) str->data));
1323             }
1324             else
1325                 /* string upto a mark */
1326             if (cmd == L'S') {
1327                 wchar_t *tmp = f;
1328
1329                 /* fill the mark upto another command */
1330                 while (*tmp) {
1331                     if (*tmp == L'%') {
1332                         tmp++;
1333
1334                         /* is it an 'n'? ignore and go on */
1335                         if (*tmp == L'n') {
1336                             tmp++;
1337                             continue;
1338                         }
1339                         else
1340                         if (*tmp == L'%')
1341                             scanf_mark[msize++] = *tmp;
1342                         else
1343                             break;
1344                     }
1345                     else
1346                         scanf_mark[msize++] = *tmp;
1347
1348                     tmp++;
1349                 }
1350
1351                 scanf_mark[msize] = L'\0';
1352             }
1353             else
1354                 /* raw set */
1355             if (cmd == L'[') {
1356                 int n = 0;
1357                 wchar_t *set = scanf_yset;
1358
1359                 /* is it an inverse set? */
1360                 if (*f == L'^') {
1361                     set = scanf_nset;
1362                     f++;
1363                 }
1364
1365                 /* first one is a ]? add it */
1366                 if (*f == L']') {
1367                     set[n++] = *f;
1368                     f++;
1369                 }
1370
1371                 /* now build the set */
1372                 for (; n < SCANF_BUF_SIZE - 1 && *f && *f != L']'; f++) {
1373                     /* is it a range? */
1374                     if (*f == L'-') {
1375                         f++;
1376
1377                         /* start or end? hyphen itself */
1378                         if (n == 0 || *f == L']')
1379                             set[n++] = L'-';
1380                         else {
1381                             /* pick previous char */
1382                             wchar_t c = set[n - 1];
1383
1384                             /* fill */
1385                             while (n < SCANF_BUF_SIZE - 1 && c < *f)
1386                                 set[n++] = ++c;
1387                         }
1388                     }
1389                     else
1390                         /* is it another command? */
1391                     if (*f == L'%') {
1392                         int i;
1393
1394                         f++;
1395                         for (i = 0; scanf_sets[i].cmd; i++) {
1396                             if (*f == scanf_sets[i].cmd) {
1397                                 set[n] = L'\0';
1398                                 wcscat(set, scanf_sets[i].yset);
1399                                 n += wcslen(scanf_sets[i].yset);
1400                                 break;
1401                             }
1402                         }
1403                     }
1404                     else
1405                         set[n++] = *f;
1406                 }
1407
1408                 /* skip the ] */
1409                 f++;
1410
1411                 set[n] = L'\0';
1412             }
1413             else
1414                 /* a standard set? */
1415             {
1416                 int n;
1417
1418                 for (n = 0; scanf_sets[n].cmd != L'\0'; n++) {
1419                     if (cmd == scanf_sets[n].cmd) {
1420                         wcscpy(scanf_yset, scanf_sets[n].yset);
1421                         wcscpy(scanf_nset, scanf_sets[n].nset);
1422                         break;
1423                     }
1424                 }
1425             }
1426
1427             /* now fill the dynamic string */
1428             while (vsize &&
1429                    !wcschr(scanf_nset, *i) &&
1430                    (scanf_yset[0] == L'\0' || wcschr(scanf_yset, *i)) &&
1431                    (msize == 0 || wcsncmp(i, scanf_mark, msize) != 0)) {
1432
1433                 /* only add if not being ignored */
1434                 if (!ignore)
1435                     ptr = mpdm_poke(ptr, &size, i, 1, sizeof(wchar_t));
1436
1437                 i++;
1438                 vsize--;
1439             }
1440
1441             if (!ignore && size) {
1442                 /* null terminate and push */
1443                 ptr = mpdm_poke(ptr, &size, L"", 1, sizeof(wchar_t));
1444                 mpdm_push(r, MPDM_ENS(ptr, size - 1));
1445             }
1446         }
1447         else
1448         if (*f == L' ' || *f == L'\t') {
1449             /* if it's a blank, sync to next non-blank */
1450             f++;
1451
1452             while (*i == L' ' || *i == L'\t')
1453                 i++;
1454         }
1455         else
1456             /* test for literals in the format string */
1457         if (*i == *f) {
1458             i++;
1459             f++;
1460         }
1461         else
1462             break;
1463     }
1464
1465     mpdm_unref(str);
1466     mpdm_unref(fmt);
1467
1468     mpdm_unrefnd(r);
1469
1470     return r;
1471 }