Variables removed (see discussion "buildsystem curiosity" on mailing list).
[AROS-Contrib.git] / regina / regutil / regunicode.c
blob7a25fead5f09c607311a8a50d8164a7d91edd376
1 /* Unicode conversion functions for regutil
3 * The contents of this file are subject to the Mozilla Public License
4 * Version 1.0 (the "License"); you may not use this file except in
5 * compliance with the License. You may obtain a copy of the License at
6 * http://www.mozilla.org/MPL/
8 * Software distributed under the License is distributed on an "AS IS"
9 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
10 * License for the specific language governing rights and limitations
11 * under the License.
13 * The Original Code is regutil.
15 * The Initial Developer of the Original Code is Patrick TJ McPhee.
16 * Portions created by Patrick McPhee are Copyright © 2003
17 * Patrick TJ McPhee. All Rights Reserved.
19 * Contributors:
21 * $Header: /opt/cvs/Regina/regutil/regunicode.c,v 1.2 2009/11/23 23:24:35 mark Exp $
23 #include "rxproto.h"
25 #ifdef _WIN32
26 #include <windows.h>
27 #include <winnls.h>
28 #else
30 # ifdef HAVE_ICONV_H
31 # include <iconv.h>
33 /* this needs to be redefined if your system uses a different name for
34 * the base unicode code page */
35 # ifndef ICONV_UTF16
36 # define ICONV_UTF16 "UTF-16LE"
37 # endif
39 # endif
41 #define CP_ACP 0
42 #define CP_OEMCP 0
43 #define CP_MACCP 1
44 #define CP_UTF7 7
45 #define CP_UTF8 8
47 #endif
49 /* code pages can be a numeric value, one of the strings defined by IBM, or
50 * MAC (which I include because everything else in this API follows the
51 * windows function so closely, we might as well include it) */
52 static int cvtcp(const char * s)
54 static const struct {
55 const char * s;
56 int cp;
57 } cpgs[] = {
58 {"ACP", CP_ACP},
59 #ifndef _WIN32
60 {"THREAD_ACP", CP_ACP},
61 #endif
62 {"OEMCP", CP_OEMCP},
63 {"MAC", CP_MACCP },
64 {"UTF7", CP_UTF7 },
65 {"UTF8", CP_UTF8 }
67 int cp = -1;
68 register int i;
70 for (i = 0; i < DIM(cpgs); i++) {
71 if (!strcasecmp(cpgs[i].s, s)) {
72 cp = cpgs[i].cp;
73 break;
77 #ifdef _WIN32
78 /* query system to find the current thread's ACP (thread's ACP? Windows!) */
79 if (cp == -1) {
80 if (!strcasecmp(s, "THREAD_ACP")) {
81 cp = GetACP();
84 #endif
86 if (cp == -1) {
87 cp = atoi(s);
88 if (!cp)
89 cp = -1;
92 return cp;
95 #ifdef _WIN32
96 struct mapping_flags_T {
97 RXSTRING str;
98 int flg;
101 /* find the flags in string s */
102 static int getflags(RXSTRING rxs, const struct mapping_flags_T * flgs,
103 int dim_flgs)
105 int flags = 0;
106 register int i;
108 while (rxs.strlength > 0) {
109 for (i = 0; i < dim_flgs; i++) {
110 if (rxs.strlength >= flgs[i].str.strlength &&
111 !memcmp(rxs.strptr, flgs[i].str.strptr, flgs[i].str.strlength)) {
112 flags |= flgs[i].flg;
113 rxs.strptr += flgs[i].str.strlength;
114 rxs.strlength -= flgs[i].str.strlength;
117 /* skip non-spaces -- strictly, I'm supposed to return rc 1004 */
118 i = strcspn(rxs.strptr, " ");
119 rxs.strlength -= i;
120 rxs.strptr += i;
122 /* skip spaces */
123 i = strspn(rxs.strptr, " ");
124 rxs.strlength -= i;
125 rxs.strptr += i;
129 return flags;
131 #elif !defined(HAVE_MBSTOWCS)
133 /* trivial conversions between unicode and latin 1 */
134 static int utol1(unsigned char * l1s, const unsigned short * us, int ul)
136 register int i;
137 for (i = 0; i < ul; i++) {
138 l1s[i] = (unsigned char)us[i];
141 return ul;
144 static int l1tou(unsigned short * us, const unsigned char * l1s, int ul)
146 register int i;
147 for (i = 0; i < ul; i++) {
149 us[i] = l1s[i];
152 return ul;
155 #endif
157 /* similarly trivial conversions between unicode and utf8. utf8 is just
158 * a different way of representing the same numbers as ucs-2. The first
159 * bit determines how many bytes are used to represent a character. If it
160 * is 0, 1 byte is used. Otherwise, the number of non-zero most significant
161 * bits determines the number of bytes used to represent the character (if
162 * the first two bits are set and the third is clear, two characters are
163 * used, and so forth). For whatever reason, every byte in a multi-byte
164 * sequence has as many most significant bits set as there are bytes
165 * remaining in the sequence. */
166 static int utou8(unsigned char *u8s, const unsigned short * us, int ul)
168 register int i, j;
170 for (i = j = 0; i < ul; i++, j++) {
171 /* the number of characters needed depends on character values in
172 * the unicode string. Values up to 2^7-1 (0x80) fit in one byte.
173 * Values up to 2^11-1 (2047) fit in two bytes. Values up to 2^15-1
174 * (32767) fit in three bytes. Values up to 2^18-1 fit in four bytes.
175 * Luckily, we never need more than 2^16-1.
176 * Note that 0xf0 == (1 << 8) | (1 << 7) | (1 << 6) | (1 << 5)
177 * 0xe0 == (1 << 8) | (1 << 7) | (1 << 6)
178 * 0xc0 == (1 << 8) | (1 << 7)
179 * 0x80 == (1 << 8)
180 * which are the significant bits for a multi-byte character, and
181 * 0x3f == (1 << 6) - 1
182 * 0x1f == (1 << 5) - 1
183 * 0x0f == (1 << 4) - 1
184 * 0x07 == (1 << 3) - 1
185 * which are the corresponding masks giving the numeric values
187 if (us[i] < 128) {
188 u8s[j] = us[i];
190 else if (us[i] < 2047) {
191 u8s[j++] = (us[i] >> 6) | 0xc0;
192 u8s[j] = (us[i] & 0x3f) | 0x80;
194 else if (us[i] < 32767) {
195 u8s[j++] = (us[i] >> 11) | 0xe0;
196 u8s[j++] = ((us[i] >> 6) & 0x1f) | 0xc0;
197 u8s[j] = (us[i] & 0x3f) | 0x80;
199 else {
200 u8s[j++] = (us[i] >> 15) | 0xf0;
201 u8s[j++] = ((us[i] >> 11) & 0x0f) | 0xe0;
202 u8s[j++] = ((us[i] >> 6) & 0x1f) | 0xc0;
203 u8s[j] = (us[i] & 0x3f) | 0x80;
206 return j;
209 static int u8tou(unsigned short *us, const unsigned char * u8s, int ul)
211 register int i, j;
213 for (i = j = 0; i < ul; i++, j++) {
214 if (u8s[i] & 0x80) {
215 us[j] = 0;
216 /* 0xf8 == 11111000 */
217 if ((u8s[i] & 0xf8) == 0xf0) {
218 us[j] = u8s[i++] & 0x7;
220 /* fall through ... */
221 if ((u8s[i] & 0xf0) == 0xe0) {
222 us[j] <<= 4;
223 us[j] |= u8s[i++] & 0xf;
225 /* the last two must be true */
226 us[j] <<= 5;
227 us[j] |= u8s[i++] & 0x1f;
228 us[j] <<= 6;
229 us[j] |= u8s[i] & 0x3f;
231 else {
232 us[j] = u8s[i];
235 return j;
238 /* similarly trivial conversions between unicode and utf7.
239 * rfc 2152 describes a set of `directly encoded' characters and a set
240 * of `optional direct characters'. This code directly encodes all of
241 * them.
242 * Everything else is converted to big-endian, and the resulting byte
243 * stream is converted to base64. + is used to shift into base64,
244 * and any non-base64 character can be used to shift out, however
245 * - is handled specially as a shift-out character: - is absorbed.
246 * I expect some converters think - is in fact the only shift-out
247 * character, so this converter always uses it when going to utf-7.
250 /* conversion to `modified base64' */
251 static int utomb64(unsigned char * mb64s, const unsigned short * us, int ul)
253 register int i, j, nb, r;
254 static const char coderange[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
255 unsigned char uc[2];
257 nb = 6; /* nb is the number of bits needed to complete the current character.
258 * since we're working with 16 bits at a time, it can be 6, 2, or 4 */
259 r = 0; /* r is the remainder from the previously encoded character */
261 for (i = j = 0; i < ul; i++) {
262 /* make sure it's big-endian */
263 uc[0] = us[i] >> 8;
264 uc[1] = us[i] & 0xff;
266 if (nb == 6) {
267 mb64s[j++] = coderange[uc[0] >> 2];
268 mb64s[j++] = coderange[((uc[0]&3) << 4)|(uc[1]>>4)];
269 r = uc[1] & 0xf;
270 nb = 2;
272 else if (nb == 2) {
273 mb64s[j++] = coderange[(r << 2) | (uc[0] >> 6)];
274 mb64s[j++] = coderange[uc[0]&0x3f];
275 mb64s[j++] = coderange[uc[1]>>2];
276 r = uc[1] & 0x3;
277 nb = 4;
279 else if (nb == 4) {
280 mb64s[j++] = coderange[(r << 4) | (uc[0] >> 4)];
281 mb64s[j++] = coderange[((uc[0]&0xf) << 2)|(uc[1]>>6)];
282 mb64s[j++] = coderange[uc[1]&0x3f];
283 r = 0;
284 nb = 6;
288 /* if we're have a partial character, need to pad it out appropriately
289 * with 0s -- this is where base 64 is modified */
290 if (nb != 6) {
291 mb64s[j++] = coderange[r << nb];
294 mb64s[j] = 0;
296 return j;
299 static int utou7(unsigned char *u7s, const unsigned short * us, int ul)
301 static unsigned char enc[127];
302 register int i, j;
304 if (!enc['A']) {
305 static const unsigned char directs[] = "\t\n\r !\"#$%&'()*,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}";
306 for (i = 0; i < sizeof(directs)-1; i++)
307 enc[directs[i]] = 1;
310 for (i = j = 0; i < ul; i++, j++) {
311 if (us[i] < 126 && enc[us[i]]) {
312 u7s[j] = us[i];
314 else {
315 register int l;
317 u7s[j++] = '+';
319 /* determine where the set of shift characters ends */
320 for (l = i; l < ul && (us[l] >= 126 || !enc[us[l]]); l++)
322 j += utomb64(u7s+j, us+i, l - i);
323 u7s[j] = '-';
324 i += l - 1;
328 u7s[j] = 0;
329 return j;
332 static int decode(unsigned char c)
334 if (c == '+') return 62;
335 else if (c == '/') return 63;
336 else if ('0' <= c && c <= '9') return c - '0' + 52;
337 else if ('A' <= c && c <= 'Z') return c - 'A';
338 else if ('a' <= c && c <= 'z') return c - 'a' + 26;
339 else return -1;
342 /* decodes a base-64 string until it encounters a character which isn't
343 * allowed in base-64. The index of that character is returned in *pnul,
344 * unless the character is -, in which case the index of the next character
345 * is returned in *pnul. */
346 static int mb64tou(unsigned short * us, const unsigned char * mb64s, int ul, int * pnul)
348 register int i, j, c, r, nb;
349 unsigned char uc[2];
351 nb = 16; /* start needing all 16 bits */
352 r = 0;
353 c = 0;
355 /* yes, this should be i++ */
356 for (i = j = 0; i < ul; i++) {
357 c = decode(mb64s[i]);
359 if (c == -1) {
360 if (mb64s[i] == '-')
361 i++;
362 break;
365 if (nb == 16) {
366 uc[0] = c << 2;
367 nb = 10;
369 else if (nb == 10) {
370 uc[0] |= c >> 4;
371 uc[1] = (c & 0xf) << 4;
372 nb = 4;
374 else if (nb == 4) {
375 uc[1] |= c >> 2;
376 us[j++] = ((unsigned short)uc[0]) << 8 | uc[1];
378 uc[0] = (c & 3) << 6;
379 nb = 14;
381 else if (nb == 14) {
382 uc[0] |= c;
383 nb = 8;
385 else if (nb == 8) {
386 uc[1] = c << 2;
387 nb = 2;
389 else if (nb == 2) {
390 uc[1] |= c >> 4;
391 us[j++] = ((unsigned short)uc[0]) << 8 | uc[1];
393 uc[0] = (c & 0xf) << 4;
394 nb = 12;
396 else if (nb == 12) {
397 uc[0] |= c >> 2;
398 uc[1] = (c & 3) << 6;
400 nb = 6;
402 else if (nb == 6) {
403 uc[1] |= c;
404 us[j++] = ((unsigned short)uc[0]) << 8 | uc[1];
405 nb = 16;
409 /* ignore any left-over bits. If they're not 0, the string is not
410 * well-formed */
412 *pnul = i;
414 return j;
417 static int u7tou(unsigned short *us, const unsigned char * u7s, int ul)
419 register int i, j;
420 int nul;
422 for (i = j = 0; i < ul; i++, j++) {
423 if (u7s[i] == '+') {
424 i++;
425 j += mb64tou(us+j, u7s+i, ul - i, &nul) - 1;
426 i += nul-1;
428 else {
429 us[j] = u7s[i];
433 us[j] = 0;
434 return j;
437 /* SysToUnicode(string, [codepage], [mappingflags], outstem) */
438 rxfunc(systounicode)
440 int cp;
441 int flags = 0;
442 RXSTRING outs;
443 RXSTRING stemv;
444 static const char text[] = "!TEXT";
446 checkparam(4, 4);
448 /* default code page is the `oem' code page. On most systems, this is
449 * 437, while the `windows' code page is iso-latin 1. It's not a bad idea
450 * to make the two be the same, though. */
451 if (argv[1].strptr == NULL || argv[1].strlength == 0)
452 cp = CP_OEMCP;
453 else {
454 char * s;
455 rxstrdup(s, argv[1]);
456 cp = cvtcp(s);
459 #ifdef _WIN32
460 if (argv[2].strptr && argv[2].strlength) {
461 static const struct mapping_flags_T flgs[] = {
462 {{11, "PRECOMPOSED"}, MB_PRECOMPOSED},
463 {{ 9, "COMPOSITE"}, MB_COMPOSITE},
464 {{17, "ERR_INVALID_CHARS"}, MB_ERR_INVALID_CHARS},
465 {{13, "USEGLYPHCHARS"}, MB_USEGLYPHCHARS}
468 flags = getflags(argv[2], flgs, DIM(flgs));
470 #endif
472 outs.strptr = malloc(argv[0].strlength*2);
474 #ifdef _WIN32
475 outs.strlength = MultiByteToWideChar(cp, flags, argv[0].strptr, argv[0].strlength, (LPWSTR)outs.strptr, argv[0].strlength);
477 if (outs.strlength == 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
478 outs.strlength = MultiByteToWideChar(cp, flags, argv[0].strptr, argv[0].strlength, NULL, 0);
479 outs.strptr = realloc(outs.strptr, outs.strlength*2);
480 MultiByteToWideChar(cp, flags, argv[0].strptr, argv[0].strlength, (LPWSTR)outs.strptr, outs.strlength);
483 if (outs.strlength == 0) {
484 switch (GetLastError()) {
485 /* these are my fault -- they shouldn't happen */
486 case ERROR_INSUFFICIENT_BUFFER:
487 case ERROR_INVALID_FLAGS:
488 case ERROR_INVALID_PARAMETER:
489 result->strlength = 2;
490 result->strptr[0] = '4';
491 result->strptr[1] = '0';
492 break;
493 case ERROR_NO_UNICODE_TRANSLATION:
494 result->strlength = 2;
495 result->strptr[0] = '8';
496 result->strptr[1] = '7';
497 break;
498 /* this is their fault */
499 default:
500 result->strlength = 2;
501 result->strptr[0] = '4';
502 result->strptr[1] = '7';
503 break;
506 else {
507 result_zero();
509 #else
511 /* perform default conversion using mbsrtowcs(), since it can be set up
512 * portably, in contrast to iconv(). */
513 if (cp == CP_ACP) {
514 # ifdef HAVE_MBSTOWCS
515 wchar_t * output;
517 if (sizeof(wchar_t) != sizeof(short)) {
518 output = alloca(sizeof(wchar_t)*argv[0].strlength);
520 else
521 output = (wchar_t *) outs.strptr;
523 outs.strlength = mbstowcs(output, argv[0].strptr, argv[0].strlength);
525 /* well, mostly portably -- we want 16 byte output, which might
526 * not be the case for wchar_t */
527 if (sizeof(wchar_t) != 2) {
528 register int i;
530 for (i = 0; i < outs.strlength; i++) {
531 ((short *)outs.strptr)[i] = output[i];
535 # else
536 /* unless, of course, the system doesn't support it -- in this case
537 * assume the trivial conversion */
538 outs.strlength = l1tou((unsigned short *)outs.strptr, argv[0].strptr, argv[0].strlength);
539 # endif
542 /* for the same reason, perform utf-7 and utf-8 conversions here, rather
543 * than using iconv() */
544 else if (cp == CP_UTF7) {
545 outs.strlength = u7tou((unsigned short *)outs.strptr, argv[0].strptr, argv[0].strlength);
547 else if (cp == CP_UTF8) {
548 outs.strlength = u8tou((unsigned short *) outs.strptr, argv[0].strptr, argv[0].strlength);
551 /* if requesting a specific code page, we need iconv, or we return an
552 * error */
553 else {
554 # ifdef HAVE_ICONV
555 iconv_t ic;
556 char * cps;
558 rxstrdup(cps, argv[1]);
560 ic = iconv_open(ICONV_UTF16, cps);
562 if (ic == (iconv_t)-1) {
563 result->strlength = 2;
564 result->strptr[0] = '8';
565 result->strptr[1] = '7';
566 outs.strlength = 0;
568 else {
569 char * inbuf = argv[0].strptr, * outbuf = outs.strptr;
570 size_t inlen = argv[0].strlength, outlen = argv[0].strlength*2;
572 iconv(ic, &inbuf, &inlen, &outbuf, &outlen);
574 if (inlen) {
575 outs.strlength = 0;
576 result->strlength = 4;
577 memcpy(result->strptr, "1113", 4);
579 else {
580 outs.strlength = (argv[0].strlength * 2 - outlen)/2;
581 result_zero();
584 iconv_close(ic);
586 # else
587 result->strlength = 2;
588 result->strptr[0] = '8';
589 result->strptr[1] = '7';
590 outs.strlength = 0;
591 # endif
593 #endif
595 /* outstem.!TEXT is the return value */
597 stemv.strptr = alloca(argv[3].strlength + sizeof(text) + 1);
598 memcpy(stemv.strptr, argv[3].strptr, argv[3].strlength);
599 if (stemv.strptr[argv[3].strlength-1] == '.') {
600 stemv.strlength = argv[3].strlength;
602 else {
603 stemv.strptr[argv[3].strlength] = '.';
604 stemv.strlength = argv[3].strlength + 1;
607 memcpy(stemv.strptr+stemv.strlength, text, sizeof(text)-1);
608 stemv.strlength += sizeof(text) - 1;
609 setavar(&stemv, outs.strptr, outs.strlength*2);
611 free(outs.strptr);
613 return 0;
616 rxfunc(sysfromunicode)
618 int cp;
619 int flags = 0;
620 RXSTRING outs;
621 RXSTRING stemv;
622 static const char text[] = "!TEXT", usedd[] = "!USEDDEFAULTCHAR";
623 int usedDefaultChar = 0;
624 char * defchar = NULL;
625 int * pusedDefaultChar = NULL;
627 checkparam(5, 5);
629 if (argv[1].strptr == NULL || argv[1].strlength == 0)
630 cp = CP_OEMCP;
631 else {
632 char * s;
633 rxstrdup(s, argv[1]);
634 cp = cvtcp(s);
637 #ifdef _WIN32
638 if (argv[2].strptr && argv[2].strlength) {
639 static const struct mapping_flags_T flgs[] = {
640 {{14, "COMPOSITECHECK"}, WC_COMPOSITECHECK},
641 {{ 8, "SEPCHARS"}, WC_SEPCHARS},
642 {{ 9, "DISCARDNS"}, WC_DISCARDNS},
643 {{11, "DEFAULTCHAR"}, WC_DEFAULTCHAR}
646 flags = getflags(argv[2], flgs, DIM(flgs));
647 if (flags & WC_DEFAULTCHAR) {
648 pusedDefaultChar = &usedDefaultChar;
652 if (argv[3].strlength && argv[3].strptr) {
653 defchar = (char *)argv[3].strptr;
655 #endif
657 outs.strptr = malloc(argv[0].strlength*2);
659 #ifdef _WIN32
660 outs.strlength = WideCharToMultiByte(cp, flags, (LPWSTR)argv[0].strptr, argv[0].strlength/2, outs.strptr, argv[0].strlength, defchar, pusedDefaultChar);
662 if (outs.strlength == 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
663 outs.strlength = WideCharToMultiByte(cp, flags, (LPWSTR)argv[0].strptr, argv[0].strlength/2, NULL, 0, NULL, NULL);
664 outs.strptr = realloc(outs.strptr, outs.strlength);
665 WideCharToMultiByte(cp, flags, (LPWSTR)argv[0].strptr, argv[0].strlength/2, outs.strptr, outs.strlength, defchar, &usedDefaultChar);
668 if (outs.strlength == 0) {
669 switch (GetLastError()) {
670 /* these are my fault -- they shouldn't happen */
671 case ERROR_INSUFFICIENT_BUFFER:
672 case ERROR_INVALID_FLAGS:
673 case ERROR_INVALID_PARAMETER:
674 result->strlength = 2;
675 result->strptr[0] = '4';
676 result->strptr[1] = '0';
677 break;
678 case ERROR_NO_UNICODE_TRANSLATION:
679 result->strlength = 2;
680 result->strptr[0] = '8';
681 result->strptr[1] = '7';
682 break;
683 /* this is their fault */
684 default:
685 result->strlength = 2;
686 result->strptr[0] = '4';
687 result->strptr[1] = '7';
688 break;
691 else {
692 result_zero();
695 #else
696 /* perform default conversion using wcsrtombs(), since it can be set up
697 * portably, in contrast to iconv(). */
698 if (cp == CP_ACP) {
699 # ifdef HAVE_MBSTOWCS
700 wchar_t * s;
702 if (sizeof(wchar_t) != 2) {
703 register int i;
704 s = alloca(argv[0].strlength*sizeof(wchar_t));
705 for (i = 0; i < argv[0].strlength/2; i++)
706 s[i] = ((short *)argv[0].strptr)[i];
708 else
709 s = (wchar_t *)argv[0].strptr;
711 outs.strlength = wcstombs(outs.strptr, s, argv[0].strlength/2);
712 # else
713 /* unless, of course, the system doesn't support it -- in this case
714 * assume the trivial conversion */
715 outs.strlength = utol1(outs.strptr, (unsigned short *)argv[0].strptr, argv[0].strlength/2);
716 # endif
719 /* for the same reason, perform utf-7 and utf-8 conversions here, rather
720 * than using iconv() */
721 else if (cp == CP_UTF7) {
722 outs.strlength = utou7(outs.strptr, (unsigned short *)argv[0].strptr, argv[0].strlength/2);
724 else if (cp == CP_UTF8) {
725 outs.strlength = utou8(outs.strptr, (unsigned short *)argv[0].strptr, argv[0].strlength/2);
728 /* if requesting a specific code page, we need iconv, or we return an
729 * error */
730 else {
731 # ifdef HAVE_ICONV
732 iconv_t ic;
733 char * cps;
735 rxstrdup(cps, argv[1]);
737 ic = iconv_open(cps, ICONV_UTF16);
739 if (ic == (iconv_t)-1) {
740 result->strlength = 2;
741 result->strptr[0] = '8';
742 result->strptr[1] = '7';
743 outs.strlength = 0;
745 else {
746 char * inbuf = argv[0].strptr, * outbuf = outs.strptr;
747 size_t inlen = argv[0].strlength, outlen = argv[0].strlength*2;
749 iconv(ic, &inbuf, &inlen, &outbuf, &outlen);
751 if (inlen) {
752 outs.strlength = 0;
753 result->strlength = 4;
754 memcpy(result->strptr, "1113", 4);
756 else {
757 outs.strlength = argv[0].strlength * 2 - outlen;
758 result_zero();
761 iconv_close(ic);
762 result_zero();
764 # else
765 result->strlength = 2;
766 result->strptr[0] = '8';
767 result->strptr[1] = '7';
768 outs.strlength = 0;
769 # endif
771 #endif
773 /* outstem.!TEXT is the return value
774 * outstem.!USEDDEFAULTCHAR is the value of the default character if
775 * applicable */
777 stemv.strptr = alloca(argv[4].strlength + sizeof(usedd) + 1);
778 memcpy(stemv.strptr, argv[4].strptr, argv[4].strlength);
779 if (stemv.strptr[argv[4].strlength-1] == '.') {
780 stemv.strlength = argv[4].strlength;
782 else {
783 stemv.strptr[argv[4].strlength] = '.';
784 stemv.strlength = argv[4].strlength + 1;
787 memcpy(stemv.strptr+stemv.strlength, text, sizeof(text)-1);
788 stemv.strlength += sizeof(text) - 1;
789 setavar(&stemv, outs.strptr, outs.strlength);
791 memcpy(stemv.strptr+stemv.strlength - (sizeof(text) - 1), usedd, sizeof(usedd)-1);
792 stemv.strlength += sizeof(usedd) - sizeof(text);
793 if (usedDefaultChar) {
794 setavar(&stemv, "-", 1);
796 else {
797 setavar(&stemv, "", 0);
800 return 0;