mpdm_s.c

   1 /*
   2
   3     MPDM - Minimum Profit Data Manager
   4     Copyright (C) 2003/2010 Angel Ortega <angel@triptico.com>
   5
   6     mpdm_s.c - String management
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License
  10     as published by the Free Software Foundation; either version 2
  11     of the License, or (at your option) any later version.
  12
  13     This program is distributed in the hope that it will be useful,
  14     but WITHOUT ANY WARRANTY; without even the implied warranty of
  15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16     GNU General Public License for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with this program; if not, write to the Free Software
  20     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  21
  22     http://www.triptico.com
  23
  24 */
  25
  26 #include "config.h"
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32 #include <locale.h>
  33 #include <wctype.h>
  34
  35 #ifdef CONFOPT_GETTEXT
  36 #include <libintl.h>
  37 #endif
  38
  39 #ifdef CONFOPT_WIN32
  40 #include <windows.h>
  41 #endif
  42
  43 #include "mpdm.h"
  44
  45
  46 /** code **/
  47
  48 void *mpdm_poke_o(void *dst, int *dsize, int *offset, const void *org,
  49                   int osize, int esize)
  50 {
  51     if (org != NULL && osize) {
  52         /* enough room? */
  53         if (*offset + osize > *dsize) {
  54             /* no; enlarge */
  55             *dsize += osize;
  56
  57             dst = realloc(dst, *dsize * esize);
  58         }
  59
  60         memcpy((char *) dst + (*offset * esize), org, osize * esize);
  61         *offset += osize;
  62     }
  63
  64     return dst;
  65 }
  66
  67
  68 void *mpdm_poke(void *dst, int *dsize, const void *org, int osize,
  69                 int esize)
  70 /* pokes (adds) org into dst, which is a dynamic string, making it grow */
  71 {
  72     int offset = *dsize;
  73
  74     return mpdm_poke_o(dst, dsize, &offset, org, osize, esize);
  75 }
  76
  77
  78 wchar_t *mpdm_pokewsn(wchar_t * dst, int *dsize, const wchar_t * str,
  79                       int slen)
  80 /* adds a wide string to dst using mpdm_poke() with size */
  81 {
  82     if (str)
  83         dst = mpdm_poke(dst, dsize, str, slen, sizeof(wchar_t));
  84
  85     return dst;
  86 }
  87
  88
  89 wchar_t *mpdm_pokews(wchar_t * dst, int *dsize, const wchar_t * str)
  90 /* adds a wide string to dst using mpdm_poke() */
  91 {
  92     if (str)
  93         dst = mpdm_pokewsn(dst, dsize, str, wcslen(str));
  94
  95     return dst;
  96 }
  97
  98
  99 wchar_t *mpdm_pokev(wchar_t * dst, int *dsize, const mpdm_t v)
 100 /* adds the string in v to dst using mpdm_poke() */
 101 {
 102     if (v != NULL) {
 103         const wchar_t *ptr = mpdm_string(v);
 104
 105         mpdm_ref(v);
 106         dst = mpdm_pokews(dst, dsize, ptr);
 107         mpdm_unref(v);
 108     }
 109
 110     return dst;
 111 }
 112
 113
 114 wchar_t *mpdm_mbstowcs(const char *str, int *s, int l)
 115 /* converts an mbs to a wcs, but filling invalid chars
 116    with question marks instead of just failing */
 117 {
 118     wchar_t *ptr = NULL;
 119     char tmp[64];               /* really MB_CUR_MAX + 1 */
 120     wchar_t wc;
 121     int n, i, c, t = 0;
 122     char *cstr;
 123
 124     /* allow NULL values for s */
 125     if (s == NULL)
 126         s = &t;
 127
 128     /* if there is a limit, duplicate and break the string */
 129     if (l >= 0) {
 130         cstr = strdup(str);
 131         cstr[l] = '\0';
 132     }
 133     else
 134         cstr = (char *) str;
 135
 136     /* try first a direct conversion with mbstowcs */
 137     if ((*s = mbstowcs(NULL, cstr, 0)) != -1) {
 138         /* direct conversion is possible; do it */
 139         if ((ptr = malloc((*s + 1) * sizeof(wchar_t))) != NULL) {
 140             mbstowcs(ptr, cstr, *s);
 141             ptr[*s] = L'\0';
 142         }
 143     }
 144     else {
 145         /* zero everything */
 146         *s = n = i = 0;
 147
 148         for (;;) {
 149             /* no more characters to process? */
 150             if ((c = cstr[n + i]) == '\0' && i == 0)
 151                 break;
 152
 153             tmp[i++] = c;
 154             tmp[i] = '\0';
 155
 156             /* try to convert */
 157             if (mbstowcs(&wc, tmp, 1) == (size_t) - 1) {
 158                 /* can still be an incomplete multibyte char? */
 159                 if (c != '\0' && i <= (int) MB_CUR_MAX)
 160                     continue;
 161                 else {
 162                     /* too many failing bytes; skip 1 byte */
 163                     wc = L'?';
 164                     i = 1;
 165                 }
 166             }
 167
 168             /* skip used bytes and back again */
 169             n += i;
 170             i = 0;
 171
 172             /* store new char */
 173             if ((ptr = mpdm_poke(ptr, s, &wc, 1, sizeof(wchar_t))) == NULL)
 174                 break;
 175         }
 176
 177         /* null terminate and count one less */
 178         if (ptr != NULL) {
 179             ptr = mpdm_poke(ptr, s, L"", 1, sizeof(wchar_t));
 180             (*s)--;
 181         }
 182     }
 183
 184     /* free the duplicate */
 185     if (cstr != str)
 186         free(cstr);
 187
 188     return ptr;
 189 }
 190
 191
 192 char *mpdm_wcstombs(const wchar_t * str, int *s)
 193 /* converts a wcs to an mbs, but filling invalid chars
 194    with question marks instead of just failing */
 195 {
 196     char *ptr = NULL;
 197     char tmp[64];               /* really MB_CUR_MAX + 1 */
 198     int l, t = 0;
 199
 200     /* allow NULL values for s */
 201     if (s == NULL)
 202         s = &t;
 203
 204     /* try first a direct conversion with wcstombs */
 205     if ((*s = wcstombs(NULL, str, 0)) != -1) {
 206         /* direct conversion is possible; do it and return */
 207         if ((ptr = malloc(*s + 1)) != NULL) {
 208             wcstombs(ptr, str, *s);
 209             ptr[*s] = '\0';
 210         }
 211
 212         return ptr;
 213     }
 214
 215     /* invalid encoding? convert characters one by one */
 216     *s = 0;
 217
 218     while (*str) {
 219         if ((l = wctomb(tmp, *str)) <= 0) {
 220             /* if char couldn't be converted,
 221                write a question mark instead */
 222             l = wctomb(tmp, L'?');
 223         }
 224
 225         tmp[l] = '\0';
 226         if ((ptr = mpdm_poke(ptr, s, tmp, l, 1)) == NULL)
 227             break;
 228
 229         str++;
 230     }
 231
 232     /* null terminate and count one less */
 233     if (ptr != NULL) {
 234         ptr = mpdm_poke(ptr, s, "", 1, 1);
 235         (*s)--;
 236     }
 237
 238     return ptr;
 239 }
 240
 241
 242 mpdm_t mpdm_new_wcs(int flags, const wchar_t * str, int size, int cpy)
 243 /* creates a new string value from a wcs */
 244 {
 245     wchar_t *ptr;
 246
 247     /* a size of -1 means 'calculate it' */
 248     if (size == -1 && str != NULL)
 249         size = wcslen(str);
 250
 251     /* create a copy? */
 252     if (cpy) {
 253         /* free() on destruction */
 254         flags |= MPDM_FREE;
 255
 256         /* allocs */
 257         if ((ptr = malloc((size + 1) * sizeof(wchar_t))) == NULL)
 258             return NULL;
 259
 260         /* if no source, reset to zeroes; otherwise, copy */
 261         if (str == NULL)
 262             memset(ptr, '\0', size * sizeof(wchar_t));
 263         else {
 264             wcsncpy(ptr, str, size);
 265             ptr[size] = L'\0';
 266         }
 267     }
 268     else
 269         ptr = (wchar_t *) str;
 270
 271     /* it's a string */
 272     flags |= MPDM_STRING;
 273
 274     return mpdm_new(flags, ptr, size);
 275 }
 276
 277
 278 mpdm_t mpdm_new_mbstowcs(int flags, const char *str, int l)
 279 /* creates a new string value from an mbs */
 280 {
 281     wchar_t *ptr;
 282     int size;
 283
 284     if ((ptr = mpdm_mbstowcs(str, &size, l)) == NULL)
 285         return NULL;
 286
 287     /* it's a string */
 288     flags |= (MPDM_STRING | MPDM_FREE);
 289
 290     return mpdm_new(flags, ptr, size);
 291 }
 292
 293
 294 mpdm_t mpdm_new_wcstombs(int flags, const wchar_t * str)
 295 /* creates a new mbs value from a wbs */
 296 {
 297     char *ptr;
 298     int size;
 299
 300     ptr = mpdm_wcstombs(str, &size);
 301
 302     flags |= MPDM_FREE;
 303
 304     /* unset the string flag; mbs,s are not 'strings' */
 305     flags &= ~MPDM_STRING;
 306
 307     return mpdm_new(flags, ptr, size);
 308 }
 309
 310
 311 mpdm_t mpdm_new_i(int ival)
 312 /* creates a new string value from an integer */
 313 {
 314     mpdm_t v;
 315     char tmp[32];
 316
 317     /* creates the visual representation */
 318     snprintf(tmp, sizeof(tmp), "%d", ival);
 319
 320     v = MPDM_MBS(tmp);
 321
 322     return mpdm_set_ival(v, ival);
 323 }
 324
 325
 326 mpdm_t mpdm_new_r(double rval)
 327 /* creates a new string value from a real number */
 328 {
 329     mpdm_t v;
 330     char tmp[128];
 331
 332     /* creates the visual representation */
 333     snprintf(tmp, sizeof(tmp), "%lf", rval);
 334
 335     /* manually strip useless zeroes */
 336     if (strchr(tmp, '.') != NULL) {
 337         char *ptr;
 338
 339         for (ptr = tmp + strlen(tmp) - 1; *ptr == '0'; ptr--);
 340
 341         /* if it's over the ., strip it also */
 342         if (*ptr != '.')
 343             ptr++;
 344
 345         *ptr = '\0';
 346     }
 347
 348     v = MPDM_MBS(tmp);
 349
 350     return mpdm_set_rval(v, rval);
 351 }
 352
 353
 354 /* interface */
 355
 356 /**
 357  * mpdm_string2 - Returns a printable representation of a value (with buffer).
 358  * @v: the value
 359  * @wtmp: the external buffer
 360  *
 361  * Returns a printable representation of a value. For strings, it's
 362  * the value data itself; for any other type, a conversion to string
 363  * is returned instead. If @v is not a string, the @wtmp buffer
 364  * can be used as a placeholder for the string representation.
 365  *
 366  * The reference count value in @v is not touched.
 367  * [Strings]
 368  */
 369 wchar_t *mpdm_string2(const mpdm_t v, wchar_t *wtmp)
 370 {
 371     char tmp[32];
 372     wchar_t *ret;
 373
 374     /* if it's NULL, return a constant */
 375     if (v == NULL)
 376         ret = L"[NULL]";
 377     else
 378         /* if it's a string, return it */
 379     if (v->flags & MPDM_STRING)
 380         ret = (wchar_t *) v->data;
 381     else {
 382         /* otherwise, return a visual representation */
 383         snprintf(tmp, sizeof(tmp), "%p", v);
 384         mbstowcs(wtmp, tmp, sizeof(tmp) * sizeof(wchar_t));
 385
 386         ret = wtmp;
 387     }
 388
 389     return ret;
 390 }
 391
 392
 393 /**
 394  * mpdm_string - Returns a printable representation of a value.
 395  * @v: the value
 396  *
 397  * Returns a printable representation of a value. For strings, it's
 398  * the value data itself; for any other type, a conversion to string
 399  * is returned instead. This value should be used immediately, as it
 400  * can be a pointer to a static buffer.
 401  *
 402  * The reference count value in @v is not touched.
 403  * [Strings]
 404  */
 405 wchar_t *mpdm_string(const mpdm_t v)
 406 {
 407     static wchar_t tmp[32];
 408
 409     return mpdm_string2(v, tmp);
 410 }
 411
 412
 413 /**
 414  * mpdm_cmp - Compares two values.
 415  * @v1: the first value
 416  * @v2: the second value
 417  *
 418  * Compares two values. If both has the MPDM_STRING flag set,
 419  * a comparison using wcscoll() is returned; if both are arrays,
 420  * the size is compared first and, if they have the same number
 421  * elements, each one is compared; otherwise, a simple visual
 422  * representation comparison is done.
 423  * [Strings]
 424  */
 425 int mpdm_cmp(const mpdm_t v1, const mpdm_t v2)
 426 {
 427     int r;
 428
 429     mpdm_ref(v1);
 430     mpdm_ref(v2);
 431
 432     /* same values? */
 433     if (v1 == v2)
 434         r = 0;
 435     else
 436         /* is any value NULL? */
 437     if (v1 == NULL)
 438         r = -1;
 439     else
 440     if (v2 == NULL)
 441         r = 1;
 442     else
 443         /* different values, but same content? (unlikely) */
 444     if (v1->data == v2->data)
 445         r = 0;
 446     else
 447     if (MPDM_IS_STRING(v1) && MPDM_IS_STRING(v2))
 448         r = wcscoll((wchar_t *) v1->data, (wchar_t *) v2->data);
 449     else
 450     if (MPDM_IS_ARRAY(v1) && MPDM_IS_ARRAY(v2)) {
 451         /* compare first the sizes */
 452         if ((r = mpdm_size(v1) - mpdm_size(v2)) == 0) {
 453             int n;
 454
 455             /* they have the same size;
 456                compare each pair of elements */
 457             for (n = 0; n < mpdm_size(v1); n++) {
 458                 if ((r = mpdm_cmp(mpdm_aget(v1, n),
 459                                   mpdm_aget(v2, n))) != 0)
 460                     break;
 461             }
 462         }
 463     }
 464     else {
 465         wchar_t tmp[32];
 466
 467         r = wcscoll(mpdm_string(v1), mpdm_string2(v2, tmp));
 468     }
 469
 470     mpdm_unref(v2);
 471     mpdm_unref(v1);
 472
 473     return r;
 474 }
 475
 476
 477 /**
 478  * mpdm_cmp_s - Compares two values (string version).
 479  * @v1: the first value
 480  * @v2: the second value
 481  *
 482  * Compares two values. If both has the MPDM_STRING flag set,
 483  * a comparison using wcscoll() is returned; if both are arrays,
 484  * the size is compared first and, if they have the same number
 485  * elements, each one is compared; otherwise, a simple visual
 486  * representation comparison is done.
 487  */
 488 int mpdm_cmp_s(const mpdm_t v1, const wchar_t * v2)
 489 {
 490     return mpdm_cmp(v1, MPDM_S(v2));
 491 }
 492
 493
 494 /**
 495  * mpdm_splice - Creates a new string value from another.
 496  * @v: the original value
 497  * @i: the value to be inserted
 498  * @offset: offset where the substring is to be inserted
 499  * @del: number of characters to delete
 500  *
 501  * Creates a new string value from @v, deleting @del chars at @offset
 502  * and substituting them by @i. If @del is 0, no deletion is done.
 503  * both @offset and @del can be negative; if this is the case, it's
 504  * assumed as counting from the end of @v. If @v is NULL, @i will become
 505  * the new string, and both @offset and @del will be ignored. If @v is
 506  * not NULL and @i is, no insertion process is done (only deletion, if
 507  * applicable).
 508  *
 509  * Returns a two element array, with the new string in the first
 510  * element and the deleted string in the second (with a NULL value
 511  * if @del is 0).
 512  * [Strings]
 513  */
 514 mpdm_t mpdm_splice(const mpdm_t v, const mpdm_t i, int offset, int del)
 515 {
 516     mpdm_t w;
 517     mpdm_t n = NULL;
 518     mpdm_t d = NULL;
 519     int os, ns, r;
 520     int ins = 0;
 521     wchar_t *ptr;
 522
 523     mpdm_ref(v);
 524     mpdm_ref(i);
 525
 526     if (v != NULL) {
 527         os = mpdm_size(v);
 528
 529         /* negative offsets start from the end */
 530         if (offset < 0)
 531             offset = os + 1 - offset;
 532
 533         /* never add further the end */
 534         if (offset > os)
 535             offset = os;
 536
 537         /* negative del counts as 'characters left' */
 538         if (del < 0)
 539             del = os + 1 - offset + del;
 540
 541         /* something to delete? */
 542         if (del > 0) {
 543             /* never delete further the end */
 544             if (offset + del > os)
 545                 del = os - offset;
 546
 547             /* deleted string */
 548             d = MPDM_NS(((wchar_t *) v->data) + offset, del);
 549         }
 550         else
 551             del = 0;
 552
 553         /* something to insert? */
 554         ins = mpdm_size(i);
 555
 556         /* new size and remainder */
 557         ns = os + ins - del;
 558         r = offset + del;
 559
 560         n = MPDM_NS(NULL, ns);
 561
 562         ptr = (wchar_t *) n->data;
 563
 564         /* copy the beginning */
 565         if (offset > 0) {
 566             wcsncpy(ptr, v->data, offset);
 567             ptr += offset;
 568         }
 569
 570         /* copy the text to be inserted */
 571         if (ins > 0) {
 572             wcsncpy(ptr, i->data, ins);
 573             ptr += ins;
 574         }
 575
 576         /* copy the remaining */
 577         os -= r;
 578         if (os > 0) {
 579             wcsncpy(ptr, ((wchar_t *) v->data) + r, os);
 580             ptr += os;
 581         }
 582
 583         /* null terminate */
 584         *ptr = L'\0';
 585     }
 586     else
 587         n = i;
 588
 589     /* creates the output array */
 590     w = MPDM_A(2);
 591
 592     mpdm_ref(w);
 593     mpdm_aset(w, n, 0);
 594     mpdm_aset(w, d, 1);
 595     mpdm_unrefnd(w);
 596
 597     mpdm_unref(i);
 598     mpdm_unref(v);
 599
 600     return w;
 601 }
 602
 603
 604 /**
 605  * mpdm_strcat_sn - Concatenates two strings (string with size version).
 606  * @s1: the first string
 607  * @s2: the second string
 608  * @size: the size of the second string
 609  *
 610  * Returns a new string formed by the concatenation of @s1 and @s2.
 611  * [Strings]
 612  */
 613 mpdm_t mpdm_strcat_sn(const mpdm_t s1, const wchar_t * s2, int size)
 614 {
 615     wchar_t *ptr = NULL;
 616     int s = 0;
 617     mpdm_t r;
 618
 619     if (s1 == NULL && s2 == NULL)
 620         r = NULL;
 621     else {
 622         ptr = mpdm_pokev(ptr, &s, s1);
 623         ptr = mpdm_pokewsn(ptr, &s, s2, size);
 624
 625         ptr = mpdm_poke(ptr, &s, L"", 1, sizeof(wchar_t));
 626         r = MPDM_ENS(ptr, s - 1);
 627     }
 628
 629     return r;
 630 }
 631
 632
 633 /**
 634  * mpdm_strcat_s - Concatenates two strings (string version).
 635  * @s1: the first string
 636  * @s2: the second string
 637  *
 638  * Returns a new string formed by the concatenation of @s1 and @s2.
 639  * [Strings]
 640  */
 641 mpdm_t mpdm_strcat_s(const mpdm_t s1, const wchar_t * s2)
 642 {
 643     return mpdm_strcat_sn(s1, s2, s2 ? wcslen(s2) : 0);
 644 }
 645
 646
 647 /**
 648  * mpdm_strcat - Concatenates two strings.
 649  * @s1: the first string
 650  * @s2: the second string
 651  *
 652  * Returns a new string formed by the concatenation of @s1 and @s2.
 653  * [Strings]
 654  */
 655 mpdm_t mpdm_strcat(const mpdm_t s1, const mpdm_t s2)
 656 {
 657     mpdm_t r;
 658
 659     mpdm_ref(s2);
 660     r = mpdm_strcat_s(s1, s2 ? mpdm_string(s2) : NULL);
 661     mpdm_unref(s2);
 662
 663     return r;
 664 }
 665
 666
 667 /**
 668  * mpdm_ival - Returns a value's data as an integer.
 669  * @v: the value
 670  *
 671  * Returns a value's data as an integer. If the value is a string,
 672  * it's converted via sscanf and returned; non-string values have all
 673  * an ival of 0. The converted integer is cached, so costly string
 674  * conversions are only done once. Values created with the MPDM_IVAL
 675  * flag set have its ival cached from the beginning.
 676  * [Strings]
 677  * [Value Management]
 678  */
 679 int mpdm_ival(mpdm_t v)
 680 {
 681     int i = 0;
 682
 683     mpdm_ref(v);
 684
 685     if (v != NULL) {
 686         /* if there is no cached integer, calculate it */
 687         if (!(v->flags & MPDM_IVAL)) {
 688             /* if it's a string, calculate it; other
 689                values will have an ival of 0 */
 690             if (v->flags & MPDM_STRING) {
 691                 char tmp[32];
 692                 char *fmt = "%i";
 693
 694                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 695                 tmp[sizeof(tmp) - 1] = '\0';
 696
 697                 /* workaround for mingw32: as it doesn't
 698                    correctly parse octal and hexadecimal
 699                    numbers, they are tried as special cases */
 700                 if (tmp[0] == '0') {
 701                     if (tmp[1] == 'b' || tmp[1] == 'B') {
 702                         /* binary number */
 703                         fmt = NULL;
 704                         char *ptr = &tmp[2];
 705
 706                         while (*ptr == '0' || *ptr == '1') {
 707                             i <<= 1;
 708
 709                             if (*ptr == '1')
 710                                 i |= 1;
 711
 712                             ptr++;
 713                         }
 714                     }
 715                     else
 716                     if (tmp[1] == 'x' || tmp[1] == 'X')
 717                         fmt = "%x";
 718                     else
 719                         fmt = "%o";
 720                 }
 721
 722                 if (fmt != NULL)
 723                     sscanf(tmp, fmt, &i);
 724             }
 725
 726             mpdm_set_ival(v, i);
 727         }
 728
 729         i = v->ival;
 730     }
 731
 732     mpdm_unref(v);
 733
 734     return i;
 735 }
 736
 737
 738 /**
 739  * mpdm_rval - Returns a value's data as a real number (double).
 740  * @v: the value
 741  *
 742  * Returns a value's data as a real number (double float). If the value
 743  * is a string, it's converted via sscanf and returned; non-string values
 744  * have all an rval of 0. The converted double is cached, so costly string
 745  * conversions are only done once. Values created with the MPDM_RVAL
 746  * flag set have its rval cached from the beginning.
 747  * [Strings]
 748  * [Value Management]
 749  */
 750 double mpdm_rval(mpdm_t v)
 751 {
 752     double r = 0.0;
 753
 754     mpdm_ref(v);
 755
 756     if (v != NULL) {
 757         /* if there is no cached double, calculate it */
 758         if (!(v->flags & MPDM_RVAL)) {
 759             /* if it's a string, calculate it; other
 760                values will have an rval of 0.0 */
 761             if (v->flags & MPDM_STRING) {
 762                 char tmp[128];
 763                 char *prev_locale;
 764
 765                 wcstombs(tmp, (wchar_t *) v->data, sizeof(tmp));
 766                 tmp[sizeof(tmp) - 1] = '\0';
 767
 768                 /* if the number starts with 0, it's
 769                    an octal or hexadecimal number; just
 770                    take the integer value and cast it */
 771                 if (tmp[0] == '0' && tmp[1] != '.')
 772                     r = (double) mpdm_ival(v);
 773                 else {
 774                     /* set locale to C for non locale-dependent
 775                        floating point conversion */
 776                     prev_locale = setlocale(LC_NUMERIC, "C");
 777
 778                     /* read */
 779                     sscanf(tmp, "%lf", &r);
 780
 781                     /* set previous locale */
 782                     setlocale(LC_NUMERIC, prev_locale);
 783                 }
 784             }
 785
 786             mpdm_set_rval(v, r);
 787         }
 788
 789         r = v->rval;
 790     }
 791
 792     mpdm_unref(v);
 793
 794     return r;
 795 }
 796
 797
 798 /**
 799  * mpdm_gettext - Translates a string to the current language.
 800  * @str: the string
 801  *
 802  * Translates the @str string to the current language.
 803  *
 804  * This function can still be used even if there is no real gettext
 805  * support() by manually filling the __I18N__ hash.
 806  *
 807  * If the string is found in the current table, the translation is
 808  * returned; otherwise, the same @str value is returned.
 809  * [Strings]
 810  * [Localization]
 811  */
 812 mpdm_t mpdm_gettext(const mpdm_t str)
 813 {
 814     mpdm_t v;
 815     mpdm_t i18n = NULL;
 816
 817     /* gets the cache */
 818     if ((i18n = mpdm_hget_s(mpdm_root(), L"__I18N__")) == NULL)
 819         i18n = mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 820
 821     mpdm_ref(str);
 822
 823     /* try first the cache */
 824     if ((v = mpdm_hget(i18n, str)) == NULL) {
 825 #ifdef CONFOPT_GETTEXT
 826         char *s;
 827         mpdm_t t;
 828
 829         /* convert to mbs */
 830         t = mpdm_ref(MPDM_2MBS(str->data));
 831
 832         /* ask gettext for it */
 833         s = gettext((char *) t->data);
 834
 835         if (s != t->data)
 836             v = MPDM_MBS(s);
 837         else
 838             v = str;
 839
 840         mpdm_unref(t);
 841
 842 #else                           /* CONFOPT_GETTEXT */
 843
 844         v = str;
 845
 846 #endif                          /* CONFOPT_GETTEXT */
 847
 848         /* store in the cache */
 849         mpdm_hset(i18n, str, v);
 850     }
 851
 852     mpdm_unref(str);
 853
 854     return v;
 855 }
 856
 857
 858 /**
 859  * mpdm_gettext_domain - Sets domain and data directory for translations.
 860  * @dom: the domain (application name)
 861  * @data: directory contaning the .mo files
 862  *
 863  * Sets the domain (application name) and translation data for translating
 864  * strings that will be returned by mpdm_gettext().@data must point to a
 865  * directory containing the .mo (compiled .po) files.
 866  *
 867  * If there is no gettext support, returns 0, or 1 otherwise.
 868  * [Strings]
 869  * [Localization]
 870  */
 871 int mpdm_gettext_domain(const mpdm_t dom, const mpdm_t data)
 872 {
 873     int ret = 0;
 874
 875     mpdm_ref(dom);
 876     mpdm_ref(data);
 877
 878 #ifdef CONFOPT_GETTEXT
 879
 880     mpdm_t dm;
 881     mpdm_t dt;
 882
 883     /* convert both to mbs,s */
 884     dm = mpdm_ref(MPDM_2MBS(dom->data));
 885     dt = mpdm_ref(MPDM_2MBS(data->data));
 886
 887     /* bind and set domain */
 888     bindtextdomain((char *) dm->data, (char *) dt->data);
 889     textdomain((char *) dm->data);
 890
 891     mpdm_hset_s(mpdm_root(), L"__I18N__", MPDM_H(0));
 892
 893     mpdm_unref(dt);
 894     mpdm_unref(dm);
 895
 896     ret = 1;
 897
 898 #endif                          /* CONFOPT_GETTEXT */
 899
 900 #ifdef CONFOPT_WIN32
 901
 902     mpdm_t v;
 903
 904     if ((v = mpdm_hget_s(mpdm_root(), L"ENV")) != NULL &&
 905         mpdm_hget_s(v, L"LANG") == NULL) {
 906         wchar_t *wptr = L"en";
 907
 908         /* MS Windows crappy language constants... */
 909
 910         switch ((GetSystemDefaultLangID() & 0x00ff)) {
 911         case 0x01:
 912             wptr = L"ar";
 913             break;              /* arabic */
 914         case 0x02:
 915             wptr = L"bg";
 916             break;              /* bulgarian */
 917         case 0x03:
 918             wptr = L"ca";
 919             break;              /* catalan */
 920         case 0x04:
 921             wptr = L"zh";
 922             break;              /* chinese */
 923         case 0x05:
 924             wptr = L"cz";
 925             break;              /* czech */
 926         case 0x06:
 927             wptr = L"da";
 928             break;              /* danish */
 929         case 0x07:
 930             wptr = L"de";
 931             break;              /* german */
 932         case 0x08:
 933             wptr = L"el";
 934             break;              /* greek */
 935         case 0x09:
 936             wptr = L"en";
 937             break;              /* english */
 938         case 0x0a:
 939             wptr = L"es";
 940             break;              /* spanish */
 941         case 0x0b:
 942             wptr = L"fi";
 943             break;              /* finnish */
 944         case 0x0c:
 945             wptr = L"fr";
 946             break;              /* french */
 947         case 0x0d:
 948             wptr = L"he";
 949             break;              /* hebrew */
 950         case 0x0e:
 951             wptr = L"hu";
 952             break;              /* hungarian */
 953         case 0x0f:
 954             wptr = L"is";
 955             break;              /* icelandic */
 956         case 0x10:
 957             wptr = L"it";
 958             break;              /* italian */
 959         case 0x11:
 960             wptr = L"jp";
 961             break;              /* japanese */
 962         case 0x12:
 963             wptr = L"ko";
 964             break;              /* korean */
 965         case 0x13:
 966             wptr = L"nl";
 967             break;              /* dutch */
 968         case 0x14:
 969             wptr = L"no";
 970             break;              /* norwegian */
 971         case 0x15:
 972             wptr = L"po";
 973             break;              /* polish */
 974         case 0x16:
 975             wptr = L"pt";
 976             break;              /* portuguese */
 977         case 0x17:
 978             wptr = L"rm";
 979             break;              /* romansh (switzerland) */
 980         case 0x18:
 981             wptr = L"ro";
 982             break;              /* romanian */
 983         case 0x19:
 984             wptr = L"ru";
 985             break;              /* russian */
 986         case 0x1a:
 987             wptr = L"sr";
 988             break;              /* serbian */
 989         case 0x1b:
 990             wptr = L"sk";
 991             break;              /* slovak */
 992         case 0x1c:
 993             wptr = L"sq";
 994             break;              /* albanian */
 995         case 0x1d:
 996             wptr = L"sv";
 997             break;              /* swedish */
 998         }
 999
1000         mpdm_hset_s(v, L"LANG", MPDM_S(wptr));
1001     }
1002
1003 #endif                          /* CONFOPT_WIN32 */
1004
1005     mpdm_unref(data);
1006     mpdm_unref(dom);
1007
1008     return ret;
1009 }
1010
1011
1012 #ifdef CONFOPT_WCWIDTH
1013
1014 int wcwidth(wchar_t);
1015
1016 int mpdm_wcwidth(wchar_t c)
1017 {
1018     return wcwidth(c);
1019 }
1020
1021 #else                           /* CONFOPT_WCWIDTH */
1022
1023 #include "wcwidth.c"
1024
1025 int mpdm_wcwidth(wchar_t c)
1026 {
1027     return mk_wcwidth(c);
1028 }
1029
1030 #endif                          /* CONFOPT_WCWIDTH */
1031
1032
1033 /**
1034  * mpdm_sprintf - Formats a sprintf()-like string.
1035  * @fmt: the string format
1036  * @args: an array of values
1037  *
1038  * Formats a string using the sprintf() format taking the values from @args.
1039  * [Strings]
1040  */
1041 mpdm_t mpdm_sprintf(const mpdm_t fmt, const mpdm_t args)
1042 {
1043     const wchar_t *i = fmt->data;
1044     wchar_t *o = NULL;
1045     int l = 0, n = 0;
1046     wchar_t c;
1047
1048     mpdm_ref(fmt);
1049     mpdm_ref(args);
1050
1051     /* loop all characters */
1052     while ((c = *i++) != L'\0') {
1053         int m = 0;
1054         wchar_t *tptr = NULL;
1055         wchar_t *wptr = NULL;
1056
1057         if (c == L'%') {
1058             /* format directive */
1059             char t_fmt[128];
1060             char tmp[1024];
1061             mpdm_t v;
1062             char *ptr = NULL;
1063
1064             /* transfer the % */
1065             t_fmt[m++] = '%';
1066
1067             /* transform the format to mbs */
1068             while (*i != L'\0' &&
1069                    m < (int) (sizeof(t_fmt) - MB_CUR_MAX - 1) &&
1070                    wcschr(L"-.0123456789", *i) != NULL)
1071                 m += wctomb(&t_fmt[m], *i++);
1072
1073             /* transfer the directive */
1074             m += wctomb(&t_fmt[m], *i++);
1075
1076             t_fmt[m] = '\0';
1077
1078             /* by default, copies the format */
1079             strcpy(tmp, t_fmt);
1080
1081             /* pick next value */
1082             v = mpdm_aget(args, n++);
1083
1084             switch (t_fmt[m - 1]) {
1085             case 'd':
1086             case 'i':
1087             case 'u':
1088             case 'x':
1089             case 'X':
1090             case 'o':
1091
1092                 /* integer value */
1093                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_ival(v));
1094                 break;
1095
1096             case 'f':
1097
1098                 /* float (real) value */
1099                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_rval(v));
1100                 break;
1101
1102             case 's':
1103
1104                 /* string value */
1105                 ptr = mpdm_wcstombs(mpdm_string(v), NULL);
1106                 snprintf(tmp, sizeof(tmp) - 1, t_fmt, ptr);
1107                 free(ptr);
1108
1109                 break;
1110
1111             case 'c':
1112
1113                 /* char */
1114                 m = 1;
1115                 wptr = &c;
1116                 c = mpdm_ival(v);
1117                 break;
1118
1119             case 'b':
1120
1121                 ptr = tmp;
1122                 unsigned int mask;
1123                 int p = 0;
1124
1125                 mask = 1 << ((sizeof(int) * 8) - 1);
1126                 while (mask) {
1127                     if (mask & (unsigned int) mpdm_ival(v)) {
1128                         *ptr++ = '1';
1129                         p = 1;
1130                     }
1131                     else
1132                     if (p)
1133                         *ptr++ = '0';
1134
1135                     mask >>= 1;
1136                 }
1137
1138                 if (ptr == tmp)
1139                     *ptr++ = '0';
1140
1141                 *ptr = '\0';
1142                 break;
1143
1144             case '%':
1145
1146                 /* percent sign */
1147                 m = 1;
1148                 wptr = &c;
1149                 break;
1150             }
1151
1152             /* transfer */
1153             if (wptr == NULL)
1154                 wptr = tptr = mpdm_mbstowcs(tmp, &m, -1);
1155         }
1156         else {
1157             /* raw character */
1158             m = 1;
1159             wptr = &c;
1160         }
1161
1162         /* transfer */
1163         o = mpdm_poke(o, &l, wptr, m, sizeof(wchar_t));
1164
1165         /* free the temporary buffer, if any */
1166         if (tptr != NULL)
1167             free(tptr);
1168     }
1169
1170     if (o == NULL)
1171         return NULL;
1172
1173     /* null-terminate */
1174     o = mpdm_poke(o, &l, L"", 1, sizeof(wchar_t));
1175
1176     mpdm_unref(args);
1177     mpdm_unref(fmt);
1178
1179     return MPDM_ENS(o, l - 1);
1180 }
1181
1182
1183 /**
1184  * mpdm_ulc - Converts a string to uppercase or lowecase.
1185  * @s: the string
1186  * @u: convert to uppercase (1) or to lowercase (0).
1187  *
1188  * Converts @s to uppercase (for @u == 1) or to lowercase (@u == 0).
1189  * [Strings]
1190  */
1191 mpdm_t mpdm_ulc(const mpdm_t s, int u)
1192 {
1193     mpdm_t r = NULL;
1194     wchar_t *optr;
1195     int i;
1196
1197     mpdm_ref(s);
1198
1199     i = mpdm_size(s);
1200
1201     if ((optr = malloc((i + 1) * sizeof(wchar_t))) != NULL) {
1202         wchar_t *iptr = mpdm_string(s);
1203         int n;
1204
1205         for (n = 0; n < i; n++)
1206             optr[n] = u ? towupper(iptr[n]) : towlower(iptr[n]);
1207
1208         optr[n] = L'\0';
1209         r = MPDM_ENS(optr, i);
1210     }
1211
1212     mpdm_unref(s);
1213
1214     return r;
1215 }
1216
1217
1218 /* scanf working buffers */
1219 #define SCANF_BUF_SIZE 1024
1220 static wchar_t scanf_yset[SCANF_BUF_SIZE];
1221 static wchar_t scanf_nset[SCANF_BUF_SIZE];
1222 static wchar_t scanf_mark[SCANF_BUF_SIZE];
1223
1224 struct {
1225     wchar_t cmd;
1226     wchar_t *yset;
1227     wchar_t *nset;
1228 } scanf_sets[] = {
1229     { L's',  L"",                         L" \t"},
1230     { L'u',  L"0123456789",               L""},
1231     { L'd',  L"-0123456789",              L""},
1232     { L'i',  L"-0123456789",              L""},
1233     { L'f',  L"-0123456789.",             L""},
1234     { L'x',  L"-0123456789xabcdefABCDEF", L""},
1235     { L'\0', NULL,                        NULL},
1236 };
1237
1238 /**
1239  * mpdm_sscanf - Extracts data like sscanf().
1240  * @fmt: the string format
1241  * @str: the string to be parsed
1242  * @offset: the character offset to start scanning
1243  *
1244  * Extracts data from a string using a special format pattern, very
1245  * much like the scanf() series of functions in the C library. Apart
1246  * from the standard percent-sign-commands (s, u, d, i, f, x,
1247  * n, [, with optional size and * to ignore), it implements S,
1248  * to match a string of characters upto what follows in the format
1249  * string. Also, the [ set of characters can include other % formats.
1250  *
1251  * Returns an array with the extracted values. If %n is used, the
1252  * position in the scanned string is returned as the value.
1253  * [Strings]
1254  */
1255 mpdm_t mpdm_sscanf(const mpdm_t fmt, const mpdm_t str, int offset)
1256 {
1257     wchar_t *i = (wchar_t *) str->data;
1258     wchar_t *f = (wchar_t *) fmt->data;
1259     mpdm_t r;
1260
1261     mpdm_ref(fmt);
1262     mpdm_ref(str);
1263
1264     i += offset;
1265     r = MPDM_A(0);
1266     mpdm_ref(r);
1267
1268     while (*f) {
1269         if (*f == L'%') {
1270             wchar_t *ptr = NULL;
1271             int size = 0;
1272             wchar_t cmd;
1273             int vsize = 0;
1274             int ignore = 0;
1275             int msize = 0;
1276
1277             /* empty all buffers */
1278             scanf_yset[0] = scanf_nset[0] = scanf_mark[0] = L'\0';
1279
1280             f++;
1281
1282             /* an asterisk? don't return next value */
1283             if (*f == L'*') {
1284                 ignore = 1;
1285                 f++;
1286             }
1287
1288             /* does it have a size? */
1289             while (wcschr(L"0123456789", *f)) {
1290                 vsize *= 10;
1291                 vsize += *f - L'0';
1292                 f++;
1293             }
1294
1295             /* if no size, set it to an arbitrary big limit */
1296             if (!vsize)
1297                 vsize = 0xfffffff;
1298
1299             /* now *f should contain a command */
1300             cmd = *f;
1301             f++;
1302
1303             /* is it a verbatim percent sign? */
1304             if (cmd == L'%') {
1305                 vsize = 1;
1306                 ignore = 1;
1307                 wcscpy(scanf_yset, L"%");
1308             }
1309             else
1310                 /* a position? */
1311             if (cmd == L'n') {
1312                 vsize = 0;
1313                 ignore = 1;
1314                 mpdm_push(r, MPDM_I(i - (wchar_t *) str->data));
1315             }
1316             else
1317                 /* string upto a mark */
1318             if (cmd == L'S') {
1319                 wchar_t *tmp = f;
1320
1321                 /* fill the mark upto another command */
1322                 while (*tmp) {
1323                     if (*tmp == L'%') {
1324                         tmp++;
1325
1326                         /* is it an 'n'? ignore and go on */
1327                         if (*tmp == L'n') {
1328                             tmp++;
1329                             continue;
1330                         }
1331                         else
1332                         if (*tmp == L'%')
1333                             scanf_mark[msize++] = *tmp;
1334                         else
1335                             break;
1336                     }
1337                     else
1338                         scanf_mark[msize++] = *tmp;
1339
1340                     tmp++;
1341                 }
1342
1343                 scanf_mark[msize] = L'\0';
1344             }
1345             else
1346                 /* raw set */
1347             if (cmd == L'[') {
1348                 int n = 0;
1349                 wchar_t *set = scanf_yset;
1350
1351                 /* is it an inverse set? */
1352                 if (*f == L'^') {
1353                     set = scanf_nset;
1354                     f++;
1355                 }
1356
1357                 /* first one is a ]? add it */
1358                 if (*f == L']') {
1359                     set[n++] = *f;
1360                     f++;
1361                 }
1362
1363                 /* now build the set */
1364                 for (; n < SCANF_BUF_SIZE - 1 && *f && *f != L']'; f++) {
1365                     /* is it a range? */
1366                     if (*f == L'-') {
1367                         f++;
1368
1369                         /* start or end? hyphen itself */
1370                         if (n == 0 || *f == L']')
1371                             set[n++] = L'-';
1372                         else {
1373                             /* pick previous char */
1374                             wchar_t c = set[n - 1];
1375
1376                             /* fill */
1377                             while (n < SCANF_BUF_SIZE - 1 && c < *f)
1378                                 set[n++] = ++c;
1379                         }
1380                     }
1381                     else
1382                         /* is it another command? */
1383                     if (*f == L'%') {
1384                         int i;
1385
1386                         f++;
1387                         for (i = 0; scanf_sets[i].cmd; i++) {
1388                             if (*f == scanf_sets[i].cmd) {
1389                                 set[n] = L'\0';
1390                                 wcscat(set, scanf_sets[i].yset);
1391                                 n += wcslen(scanf_sets[i].yset);
1392                                 break;
1393                             }
1394                         }
1395                     }
1396                     else
1397                         set[n++] = *f;
1398                 }
1399
1400                 /* skip the ] */
1401                 f++;
1402
1403                 set[n] = L'\0';
1404             }
1405             else
1406                 /* a standard set? */
1407             {
1408                 int n;
1409
1410                 for (n = 0; scanf_sets[n].cmd != L'\0'; n++) {
1411                     if (cmd == scanf_sets[n].cmd) {
1412                         wcscpy(scanf_yset, scanf_sets[n].yset);
1413                         wcscpy(scanf_nset, scanf_sets[n].nset);
1414                         break;
1415                     }
1416                 }
1417             }
1418
1419             /* now fill the dynamic string */
1420             while (vsize &&
1421                    !wcschr(scanf_nset, *i) &&
1422                    (scanf_yset[0] == L'\0' || wcschr(scanf_yset, *i)) &&
1423                    (msize == 0 || wcsncmp(i, scanf_mark, msize) != 0)) {
1424
1425                 /* only add if not being ignored */
1426                 if (!ignore)
1427                     ptr = mpdm_poke(ptr, &size, i, 1, sizeof(wchar_t));
1428
1429                 i++;
1430                 vsize--;
1431             }
1432
1433             if (!ignore && size) {
1434                 /* null terminate and push */
1435                 ptr = mpdm_poke(ptr, &size, L"", 1, sizeof(wchar_t));
1436                 mpdm_push(r, MPDM_ENS(ptr, size - 1));
1437             }
1438         }
1439         else
1440         if (*f == L' ' || *f == L'\t') {
1441             /* if it's a blank, sync to next non-blank */
1442             f++;
1443
1444             while (*i == L' ' || *i == L'\t')
1445                 i++;
1446         }
1447         else
1448             /* test for literals in the format string */
1449         if (*i == *f) {
1450             i++;
1451             f++;
1452         }
1453         else
1454             break;
1455     }
1456
1457     mpdm_unref(str);
1458     mpdm_unref(fmt);
1459
1460     mpdm_unrefnd(r);
1461
1462     return r;
1463 }