1 /* Copyright (C) 2004-2012 by George Williams */
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright notice, this
7 * list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
13 * The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <unibasics.h>
38 /* I have written an limited iconv which will convert either to or from unichar_t */
39 /* (either UCS2 or UCS4) */
40 /* it will not convert latin1 to latin2, but latin1->UCS2, UCS2->latin2 */
41 /* it uses the encodings built into libgunicode for systems with no iconv */
42 /* (ie. macs before 10.3, perhaps others) */
49 enum extended_encoding
{ e_jisgbpk
= e_encodingmax
};
51 static enum endian
{ end_big
, end_little
, end_unknown
} endian
= end_unknown
;
53 static void endian_detector(void) {
54 union { short s
; char c
[2]; } u
;
63 static enum encoding
name_to_enc(const char *encname
) {
64 struct { const char *name
; enum encoding enc
; } map
[] = {
65 { "UCS-2-INTERNAL", e_unicode
},
66 { "UCS2", e_unicode
},
67 { "UCS-2", e_unicode
},
68 { "UCS-2LE", e_unicode
},
69 { "UCS-2BE", e_unicode
},
70 { "UNICODELITTLE", e_unicode
},
71 { "UNICODEBIG", e_unicode
},
72 { "ISO-10646/UCS2", e_unicode
},
73 { "ISO-10646/USC2", e_unicode
}, /* Old typo */
76 { "UCS-4LE", e_ucs4
},
77 { "UCS-4BE", e_ucs4
},
78 { "UCS-4-INTERNAL", e_ucs4
},
79 { "ISO-10646/UCS4", e_ucs4
},
80 { "iso8859-1", e_iso8859_1
},
81 { "iso8859-2", e_iso8859_2
},
82 { "iso8859-3", e_iso8859_3
},
83 { "iso8859-4", e_iso8859_4
},
84 { "iso8859-5", e_iso8859_5
},
85 { "iso8859-6", e_iso8859_6
},
86 { "iso8859-7", e_iso8859_7
},
87 { "iso8859-8", e_iso8859_8
},
88 { "iso8859-9", e_iso8859_9
},
89 { "iso8859-10", e_iso8859_10
},
90 { "iso8859-11", e_iso8859_11
},
91 { "iso8859-13", e_iso8859_13
},
92 { "iso8859-14", e_iso8859_14
},
93 { "iso8859-15", e_iso8859_15
},
94 { "iso-8859-1", e_iso8859_1
},
95 { "iso-8859-2", e_iso8859_2
},
96 { "iso-8859-3", e_iso8859_3
},
97 { "iso-8859-4", e_iso8859_4
},
98 { "iso-8859-5", e_iso8859_5
},
99 { "iso-8859-6", e_iso8859_6
},
100 { "iso-8859-7", e_iso8859_7
},
101 { "iso-8859-8", e_iso8859_8
},
102 { "iso-8859-9", e_iso8859_9
},
103 { "iso-8859-10", e_iso8859_10
},
104 { "iso-8859-11", e_iso8859_11
},
105 { "iso-8859-13", e_iso8859_13
},
106 { "iso-8859-14", e_iso8859_14
},
107 { "iso-8859-15", e_iso8859_15
},
108 { "koi8-r", e_koi8_r
},
109 { "jis201", e_jis201
},
111 { "Macintosh", e_mac
},
112 { "MS-ANSI", e_win
},
113 { "EUC-KR", e_wansung
},
114 { "johab", e_johab
},
115 { "ISO-2022-KR", e_jiskorean
},
116 { "ISO-2022-CN", e_jisgb
},
117 { "EUC-CN", e_jisgbpk
},
119 { "big5hkscs", e_big5hkscs
},
120 { "ISO-2022-JP", e_jis
},
121 { "ISO-2022-JP-2", e_jis2
},
128 for ( i
=0; map
[i
].name
!=NULL
; ++i
)
129 if ( strmatch(map
[i
].name
,encname
)==0 )
130 return( map
[i
].enc
);
135 gww_iconv_t
gww_iconv_open(const char *toenc
,const char *fromenc
) {
136 struct gww_iconv_t stuff
, *ret
;
138 if ( endian
==end_unknown
)
141 stuff
.from
= name_to_enc(fromenc
);
142 stuff
.to
= name_to_enc(toenc
);
143 if ( stuff
.from
==(enum encoding
) -1 || stuff
.to
==(enum encoding
) -1 ) {
144 /*fprintf( stderr, "Unknown encoding\n" );*/
145 return( (iconv_t
)(-1) );
146 } else if ( stuff
.from
!=e_ucs4
&& stuff
.to
!=e_ucs4
) {
147 fprintf( stderr
, "Bad call to gww_iconv_open, neither arg is UCS4\n" );
148 return( (iconv_t
)(-1) );
151 ret
= xmalloc(sizeof(struct gww_iconv_t
));
156 void gww_iconv_close( gww_iconv_t cd
) {
160 size_t gww_iconv( gww_iconv_t _cd
,
161 char **inbuf
, size_t *inlen
,
162 char **outbuf
, size_t *outlen
) {
163 struct gww_iconv_t
*cd
= _cd
;
165 unsigned char *plane
;
168 if ( inbuf
==NULL
|| outbuf
==NULL
|| inlen
==NULL
|| outlen
==NULL
||
169 *inbuf
==NULL
|| *outbuf
==NULL
)
170 return( 0 ); /* Legal, used to reset the state. As we don't do states, irrelevant */
172 if ( cd
->from
<0 || cd
->from
>e_encodingmax
|| cd
->to
<0 || cd
->to
>e_encodingmax
) {
173 fprintf( stderr
, "Garbage encoding passed to gww_iconv()\n" );
174 return( (size_t) -1 );
177 if ( cd
->from
==e_unicode
) {
178 if ( cd
->to
==e_unicode
) {
179 int min
= *inlen
< *outlen
? *inlen
: *outlen
;
181 memcpy(*inbuf
,*outbuf
,min
);
182 char_cnt
= min
/sizeof(short);
183 *inbuf
+= min
; *outbuf
+= min
;
184 *inlen
-= min
; *outlen
-= min
;
185 if ( *inlen
==1 && *outlen
>0 )
186 return( (size_t) -1 ); /* Incomplete multi-byte sequence */
187 } else if ( cd
->to
==e_ucs4
) {
188 int min
= *inlen
/sizeof(short) < *outlen
/sizeof(int32
) ? *inlen
/sizeof(short) : *outlen
/sizeof(int32
);
190 if ( endian
== end_little
) {
191 while ( *inlen
>=sizeof(short) && *outlen
>=sizeof(int32
) ) {
192 highch
= ((unsigned char *) *inbuf
)[1], lowch
= *(unsigned char *) *inbuf
;
193 ((uint8
*) outbuf
)[3] = 0; ((uint8
*) outbuf
)[2] = 0;
194 ((uint8
*) outbuf
)[1] = highch
; ((uint8
*) outbuf
)[0] = lowch
;
195 outbuf
+= sizeof(int32
); inbuf
+= sizeof(short);
196 *outlen
-= sizeof(int32
); *inlen
-= sizeof(short);
199 while ( *inlen
>=sizeof(short) && *outlen
>=sizeof(int32
) ) {
200 highch
= ((unsigned char *) *inbuf
)[0], lowch
= ((unsigned char *) *inbuf
)[1];
201 ((uint8
*) outbuf
)[0] = 0; ((uint8
*) outbuf
)[1] = 0;
202 ((uint8
*) outbuf
)[2] = highch
; ((uint8
*) outbuf
)[3] = lowch
;
203 outbuf
+= sizeof(int32
); inbuf
+= sizeof(short);
204 *outlen
-= sizeof(int32
); *inlen
-= sizeof(short);
208 if ( *inlen
==1 && *outlen
>0 )
209 return( (size_t) -1 ); /* Incomplete multi-byte sequence */
210 } else if ( cd
->to
<e_first2byte
) {
211 struct charmap
*table
= NULL
;
212 table
= alphabets_from_unicode
[cd
->to
];
213 while ( *inlen
>1 && *outlen
>0 ) {
215 if ( endian
== end_little
) {
216 highch
= ((unsigned char *) *inbuf
)[1], lowch
= *(unsigned char *) *inbuf
;
218 highch
= *(unsigned char *) *inbuf
, lowch
= ((unsigned char *) *inbuf
)[1];
220 if ( highch
>=table
->first
&& highch
<=table
->last
&&
221 (plane
= table
->table
[highch
])!=NULL
&&
222 (ch
=plane
[lowch
])!=0 ) {
229 return( (size_t) -1 );
231 } else if ( cd
->to
==e_utf8
) {
232 while ( *inlen
>1 && *outlen
>0 ) {
234 if ( endian
== end_little
) {
235 uch
= (((unsigned char *) *inbuf
)[1]<<8) | (*((unsigned char *) *inbuf
));
237 uch
= (*((unsigned char *) *inbuf
)<<8) | (((unsigned char *) *inbuf
)[1]);
240 *((*outbuf
)++) = uch
;
242 } else if ( uch
<0x800 ) {
244 return( (size_t) -1 );
245 *((*outbuf
)++) = 0xc0 | (uch
>>6);
246 *((*outbuf
)++) = 0x80 | (uch
&0x3f);
248 } else { /* I'm not dealing with */
250 return( (size_t) -1 );
251 *((*outbuf
)++) = 0xe0 | (uch
>>12);
252 *((*outbuf
)++) = 0x80 | ((uch
>>6)&0x3f);
253 *((*outbuf
)++) = 0x80 | (uch
&0x3f);
261 fprintf( stderr
, "Unexpected encoding\n" );
262 return( (size_t) -1 );
264 } else if ( cd
->from
==e_ucs4
) {
265 if ( cd
->to
==e_unicode
) {
266 int min
= *inlen
/sizeof(int32
) < *outlen
/sizeof(int16
) ? *inlen
/sizeof(int32
) : *outlen
/sizeof(int16
);
268 if ( endian
== end_little
) {
269 while ( *inlen
>=sizeof(short) && *outlen
>=sizeof(int32
) ) {
270 highch
= ((unsigned char *) *inbuf
)[1], lowch
= *(unsigned char *) *inbuf
;
271 ((uint8
*) outbuf
)[1] = highch
; ((uint8
*) outbuf
)[0] = lowch
;
272 outbuf
+= sizeof(int16
); inbuf
+= sizeof(int32
);
273 *outlen
-= sizeof(int16
); *inlen
-= sizeof(int32
);
276 while ( *inlen
>=sizeof(short) && *outlen
>=sizeof(int32
) ) {
277 highch
= ((unsigned char *) *inbuf
)[2], lowch
= ((unsigned char *) *inbuf
)[3];
278 ((uint8
*) outbuf
)[0] = highch
; ((uint8
*) outbuf
)[1] = lowch
;
279 outbuf
+= sizeof(int16
); inbuf
+= sizeof(int32
);
280 *outlen
-= sizeof(int16
); *inlen
-= sizeof(int32
);
284 if ( *inlen
>0 && *outlen
>0 )
285 return( (size_t) -1 ); /* Incomplete multi-byte sequence */
286 } else if ( cd
->to
<e_first2byte
) {
287 struct charmap
*table
= NULL
;
288 table
= alphabets_from_unicode
[cd
->to
];
289 while ( *inlen
>1 && *outlen
>0 ) {
291 if ( endian
== end_little
) {
292 highch
= ((unsigned char *) *inbuf
)[1], lowch
= *(unsigned char *) *inbuf
;
294 highch
= ((unsigned char *) *inbuf
)[2], lowch
= ((unsigned char *) *inbuf
)[3];
296 if ( highch
>=table
->first
&& highch
<=table
->last
&&
297 (plane
= table
->table
[highch
])!=NULL
&&
298 (ch
=plane
[lowch
])!=0 ) {
305 return( (size_t) -1 );
307 } else if ( cd
->to
==e_utf8
) {
308 while ( *inlen
>1 && *outlen
>0 ) {
310 if ( endian
== end_little
) {
311 uch
= (((unsigned char *) *inbuf
)[3]<<24) |
312 (((unsigned char *) *inbuf
)[2]<<16) |
313 (((unsigned char *) *inbuf
)[1]<<8) |
314 (*((unsigned char *) *inbuf
));
316 uch
= (*((unsigned char *) *inbuf
)<<24) |
317 (((unsigned char *) *inbuf
)[1]<<16) |
318 (((unsigned char *) *inbuf
)[2]<<8) |
319 (((unsigned char *) *inbuf
)[3]);
322 *((*outbuf
)++) = uch
;
324 } else if ( uch
<0x800 ) {
326 return( (size_t) -1 );
327 *((*outbuf
)++) = 0xc0 | (uch
>>6);
328 *((*outbuf
)++) = 0x80 | (uch
&0x3f);
330 } else if ( uch
< 0x10000 ) {
332 return( (size_t) -1 );
333 *((*outbuf
)++) = 0xe0 | (uch
>>12);
334 *((*outbuf
)++) = 0x80 | ((uch
>>6)&0x3f);
335 *((*outbuf
)++) = 0x80 | (uch
&0x3f);
338 uint32 val
= uch
-0x10000;
339 int u
= ((val
&0xf0000)>>16)+1, z
=(val
&0x0f000)>>12, y
=(val
&0x00fc0)>>6, x
=val
&0x0003f;
341 return( (size_t) -1 );
342 *(*outbuf
)++ = 0xf0 | (u
>>2);
343 *(*outbuf
)++ = 0x80 | ((u
&3)<<4) | z
;
344 *(*outbuf
)++ = 0x80 | y
;
345 *(*outbuf
)++ = 0x80 | x
;
353 fprintf( stderr
, "Unexpected encoding\n" );
354 return( (size_t) -1 );
356 } else if ( cd
->to
==e_unicode
) {
357 const unichar_t
*table
;
358 if ( cd
->from
<e_first2byte
) {
359 table
= unicode_from_alphabets
[cd
->from
];
360 while ( *inlen
>0 && *outlen
>1 ) {
361 unichar_t ch
= table
[ *(unsigned char *) ((*inbuf
)++)];
363 if ( endian
==end_little
) {
364 *((*outbuf
)++) = ch
&0xff;
365 *((*outbuf
)++) = ch
>>8;
367 *((*outbuf
)++) = ch
>>8;
368 *((*outbuf
)++) = ch
&0xff;
370 *outlen
-= sizeof(unichar_t
);
373 } else if ( cd
->from
==e_jis
|| cd
->from
==e_jis2
||
374 cd
->from
==e_jiskorean
|| cd
->from
==e_jisgb
) {
375 table
= cd
->from
==e_jisgb
? unicode_from_gb2312
:
376 cd
->from
==e_jiskorean
? unicode_from_ksc5601
:
377 cd
->from
==e_jis
? unicode_from_jis208
:
379 while ( *inlen
>1 && *outlen
>1 ) {
380 unsigned char *ipt
= (unsigned char *) *inbuf
;
382 if ( *ipt
<0x21 || *ipt
>0x7e || ipt
[1]<0x21 || ipt
[1]>0x7e )
383 return( (size_t) -1 );
384 ch
= (*ipt
-0x21)*94 + (ipt
[1]-0x21);
387 *inbuf
= (char *) ipt
+2;
388 if ( endian
==end_little
) {
389 *((*outbuf
)++) = ch
&0xff;
390 *((*outbuf
)++) = ch
>>8;
392 *((*outbuf
)++) = ch
>>8;
393 *((*outbuf
)++) = ch
&0xff;
395 *outlen
-= sizeof(unichar_t
);
398 if ( *inlen
==1 && *outlen
>0 )
399 return( (size_t) -1 ); /* Incomplete multi-byte sequence */
400 } else if ( cd
->from
==e_wansung
|| cd
->from
==e_jisgbpk
) {
401 table
= cd
->from
==e_jisgbpk
? unicode_from_gb2312
:
402 unicode_from_ksc5601
;
403 while ( *inlen
>0 && *outlen
>1 ) {
404 unsigned char *ipt
= (unsigned char *) *inbuf
;
409 *inbuf
= (char *) ipt
+1;
411 if ( *ipt
<0xa1 || *ipt
>0xfe || ipt
[1]<0xa1 || ipt
[1]>0xfe ||
413 return( (size_t) -1 );
414 ch
= (*ipt
-0xa1)*94 + (ipt
[1]-0xa1);
417 *inbuf
= (char *) ipt
+2;
419 if ( endian
==end_little
) {
420 *((*outbuf
)++) = ch
&0xff;
421 *((*outbuf
)++) = ch
>>8;
423 *((*outbuf
)++) = ch
>>8;
424 *((*outbuf
)++) = ch
&0xff;
426 *outlen
-= sizeof(unichar_t
);
429 } else if ( cd
->from
==e_johab
|| cd
->from
==e_big5
|| cd
->from
==e_big5hkscs
) {
431 if ( cd
->from
==e_big5
) {
433 table
= unicode_from_big5
;
434 } else if ( cd
->from
==e_big5hkscs
) {
436 table
= unicode_from_big5hkscs
;
439 table
= unicode_from_johab
;
441 while ( *inlen
>0 && *outlen
>1 ) {
442 unsigned char *ipt
= (unsigned char *) *inbuf
;
447 *inbuf
= (char *) ipt
+1;
450 return( (size_t) -1 );
451 ch
= (*ipt
<<8) | ipt
[1];
453 return( (size_t) -1 );
457 *inbuf
= (char *) ipt
+2;
459 if ( endian
==end_little
) {
460 *((*outbuf
)++) = ch
&0xff;
461 *((*outbuf
)++) = ch
>>8;
463 *((*outbuf
)++) = ch
>>8;
464 *((*outbuf
)++) = ch
&0xff;
466 *outlen
-= sizeof(unichar_t
);
469 } else if ( cd
->from
==e_sjis
) {
470 while ( *inlen
>0 && *outlen
>1 ) {
471 unsigned char *ipt
= (unsigned char *) *inbuf
;
473 if ( ch1
<127 || ( ch1
>=161 && ch1
<=223 )) {
474 ch
= unicode_from_jis201
[ch1
];
475 *inbuf
= (char *) ipt
+1;
477 } else if ( *inlen
==1 )
478 return( (size_t) -1 );
481 if ( ch1
>= 129 && ch1
<= 159 )
488 else if ( ch2
>127 ) {
495 if ( ch1
-0x21>=94 || ch2
-0x21>=94 )
496 return( (size_t) -1 );
497 ch
= unicode_from_jis208
[(ch1
-0x21)*94+(ch2
-0x21)];
499 *inbuf
= (char *) ipt
+2;
501 if ( endian
==end_little
) {
502 *((*outbuf
)++) = ch
&0xff;
503 *((*outbuf
)++) = ch
>>8;
505 *((*outbuf
)++) = ch
>>8;
506 *((*outbuf
)++) = ch
&0xff;
508 *outlen
-= sizeof(unichar_t
);
511 } else if ( cd
->from
==e_utf8
) {
512 while ( *inlen
>0 && *outlen
>sizeof(unichar_t
) ) {
513 unsigned char *ipt
= (unsigned char *) *inbuf
;
516 *inbuf
= (char *) ipt
+1;
518 } else if ( ch
<=0xdf ) {
519 if ( *inlen
<2 || ipt
[1]<0x80 )
520 return( (size_t) -1 );
521 ch
= ((ch
&0x1f)<<6) | (ipt
[1] &0x3f);
523 *inbuf
= (char *) ipt
+2;
524 } else if ( ch
<=0xef ) {
525 if ( *inlen
<3 || ipt
[1]<0x80 || ipt
[2]<0x80 )
526 return( (size_t) -1 );
527 ch
= ((ch
&0x1f)<<12) | ((ipt
[1] &0x3f)<<6) | (ipt
[2]&0x3f);
529 *inbuf
= (char *) ipt
+3;
532 if ( *inlen
<4 || *outlen
<4 || ipt
[1]<0x80 || ipt
[2]<0x80 || ipt
[3]<0x80 )
533 return( (size_t) -1 );
534 w
= ( ((ch
&0x7)<<2) | ((ipt
[1]&0x30)>>4) )-1;
535 ch
= 0xd800 | (w
<<6) | ((ipt
[1]&0xf)<<2) | ((ipt
[2]&0x30)>>4);
536 if ( endian
==end_little
) {
537 *((*outbuf
)++) = ch
&0xff;
538 *((*outbuf
)++) = ch
>>8;
540 *((*outbuf
)++) = ch
>>8;
541 *((*outbuf
)++) = ch
&0xff;
544 ch
= 0xdc00 | ((ipt
[2]&0xf)<<6) | (ipt
[3]&0x3f);
546 if ( endian
==end_little
) {
547 *((*outbuf
)++) = ch
&0xff;
548 *((*outbuf
)++) = ch
>>8;
550 *((*outbuf
)++) = ch
>>8;
551 *((*outbuf
)++) = ch
&0xff;
553 *outlen
-= sizeof(unichar_t
);
557 fprintf( stderr
, "Unexpected encoding\n" );
558 return( (size_t) -1 );
560 } else if ( cd
->to
==e_ucs4
) {
561 const unichar_t
*table
;
562 if ( cd
->from
<e_first2byte
) {
563 table
= unicode_from_alphabets
[cd
->from
];
564 while ( *inlen
>0 && *outlen
>1 ) {
565 unichar_t ch
= table
[ *(unsigned char *) ((*inbuf
)++)];
567 if ( endian
==end_little
) {
570 *((*outbuf
)++) = ch
&0xff;
571 *((*outbuf
)++) = ch
>>8;
573 *((*outbuf
)++) = ch
>>8;
574 *((*outbuf
)++) = ch
&0xff;
578 *outlen
-= sizeof(unichar_t
);
581 } else if ( cd
->from
==e_jis
|| cd
->from
==e_jis2
||
582 cd
->from
==e_jiskorean
|| cd
->from
==e_jisgb
) {
583 table
= cd
->from
==e_jisgb
? unicode_from_gb2312
:
584 cd
->from
==e_jiskorean
? unicode_from_ksc5601
:
585 cd
->from
==e_jis
? unicode_from_jis208
:
587 while ( *inlen
>1 && *outlen
>1 ) {
588 unsigned char *ipt
= (unsigned char *) *inbuf
;
590 if ( *ipt
<0x21 || *ipt
>0x7e || ipt
[1]<0x21 || ipt
[1]>0x7e )
591 return( (size_t) -1 );
592 ch
= (*ipt
-0x21)*94 + (ipt
[1]-0x21);
595 *inbuf
= (char *) ipt
+2;
596 if ( endian
==end_little
) {
599 *((*outbuf
)++) = ch
&0xff;
600 *((*outbuf
)++) = ch
>>8;
602 *((*outbuf
)++) = ch
>>8;
603 *((*outbuf
)++) = ch
&0xff;
607 *outlen
-= sizeof(unichar_t
);
610 if ( *inlen
==1 && *outlen
>0 )
611 return( (size_t) -1 ); /* Incomplete multi-byte sequence */
612 } else if ( cd
->from
==e_wansung
|| cd
->from
==e_jisgbpk
) {
613 table
= cd
->from
==e_jisgbpk
? unicode_from_gb2312
:
614 unicode_from_ksc5601
;
615 while ( *inlen
>0 && *outlen
>1 ) {
616 unsigned char *ipt
= (unsigned char *) *inbuf
;
621 *inbuf
= (char *) ipt
+1;
623 if ( *ipt
<0xa1 || *ipt
>0xfe || ipt
[1]<0xa1 || ipt
[1]>0xfe ||
625 return( (size_t) -1 );
626 ch
= (*ipt
-0xa1)*94 + (ipt
[1]-0xa1);
629 *inbuf
= (char *) ipt
+2;
631 if ( endian
==end_little
) {
634 *((*outbuf
)++) = ch
&0xff;
635 *((*outbuf
)++) = ch
>>8;
637 *((*outbuf
)++) = ch
>>8;
638 *((*outbuf
)++) = ch
&0xff;
642 *outlen
-= sizeof(unichar_t
);
645 } else if ( cd
->from
==e_johab
|| cd
->from
==e_big5
|| cd
->from
==e_big5hkscs
) {
647 if ( cd
->from
==e_big5
) {
649 table
= unicode_from_big5
;
650 } else if ( cd
->from
==e_big5hkscs
) {
652 table
= unicode_from_big5hkscs
;
655 table
= unicode_from_johab
;
657 while ( *inlen
>0 && *outlen
>1 ) {
658 unsigned char *ipt
= (unsigned char *) *inbuf
;
663 *inbuf
= (char *) ipt
+1;
666 return( (size_t) -1 );
667 ch
= (*ipt
<<8) | ipt
[1];
669 return( (size_t) -1 );
673 *inbuf
= (char *) ipt
+2;
675 if ( endian
==end_little
) {
678 *((*outbuf
)++) = ch
&0xff;
679 *((*outbuf
)++) = ch
>>8;
681 *((*outbuf
)++) = ch
>>8;
682 *((*outbuf
)++) = ch
&0xff;
686 *outlen
-= sizeof(unichar_t
);
689 } else if ( cd
->from
==e_sjis
) {
690 while ( *inlen
>0 && *outlen
>1 ) {
691 unsigned char *ipt
= (unsigned char *) *inbuf
;
693 if ( ch1
<127 || ( ch1
>=161 && ch1
<=223 )) {
694 ch
= unicode_from_jis201
[ch1
];
695 *inbuf
= (char *) ipt
+1;
697 } else if ( *inlen
==1 )
698 return( (size_t) -1 );
701 if ( ch1
>= 129 && ch1
<= 159 )
708 else if ( ch2
>127 ) {
715 if ( ch1
-0x21>=94 || ch2
-0x21>=94 )
716 return( (size_t) -1 );
717 ch
= unicode_from_jis208
[(ch1
-0x21)*94+(ch2
-0x21)];
719 *inbuf
= (char *) ipt
+2;
721 if ( endian
==end_little
) {
724 *((*outbuf
)++) = ch
&0xff;
725 *((*outbuf
)++) = ch
>>8;
727 *((*outbuf
)++) = ch
>>8;
728 *((*outbuf
)++) = ch
&0xff;
732 *outlen
-= sizeof(unichar_t
);
735 } else if ( cd
->from
==e_utf8
) {
736 while ( *inlen
>0 && *outlen
>sizeof(unichar_t
) ) {
737 unsigned char *ipt
= (unsigned char *) *inbuf
;
740 *inbuf
= (char *) ipt
+1;
742 } else if ( ch
<=0xdf ) {
743 if ( *inlen
<2 || ipt
[1]<0x80 )
744 return( (size_t) -1 );
745 ch
= ((ch
&0x1f)<<6) | (ipt
[1] &0x3f);
747 *inbuf
= (char *) ipt
+2;
748 } else if ( ch
<=0xef ) {
749 if ( *inlen
<3 || ipt
[1]<0x80 || ipt
[2]<0x80 )
750 return( (size_t) -1 );
751 ch
= ((ch
&0x1f)<<12) | ((ipt
[1] &0x3f)<<6) | (ipt
[2]&0x3f);
753 *inbuf
= (char *) ipt
+3;
756 w
= ( ((*ipt
&0x7)<<2) | ((ipt
[1]&0x30)>>4) )-1;
757 w
= (w
<<6) | ((ipt
[1]&0xf)<<2) | ((ipt
[2]&0x30)>>4);
758 w2
= ((ipt
[2]&0xf)<<6) | (ipt
[3]&0x3f);
759 ch
= w
*0x400 + w2
+ 0x10000;
760 *inbuf
= (char *) ipt
+4;
762 if ( endian
==end_little
) {
763 *((*outbuf
)++) = ch
&0xff;
764 *((*outbuf
)++) = ch
>>8;
765 *((*outbuf
)++) = ch
>>16;
766 *((*outbuf
)++) = ch
>>24;
768 *((*outbuf
)++) = ch
>>24;
769 *((*outbuf
)++) = ch
>>16;
770 *((*outbuf
)++) = ch
>>8;
771 *((*outbuf
)++) = ch
&0xff;
773 *outlen
-= sizeof(unichar_t
);
777 fprintf( stderr
, "Unexpected encoding\n" );
778 return( (size_t) -1 );
781 fprintf( stderr
, "One of the two encodings must be UCS2 in gww_iconv()\n" );
782 return( (size_t) -1 );
789 if ( cd
->to
==e_ucs4
&& *outlen
>3 ) {
797 static const int a_file_must_define_something
=1;
798 #endif /* HAVE_ICONV_H */