source/lib/iconv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    minimal iconv implementation
   4    Copyright (C) Andrew Tridgell 2001
   5    Copyright (C) Jelmer Vernooij 2002,2003
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22 #include "includes.h"
  23
  24 /*
  25  * We have to use strcasecmp here as the character conversions
  26  * haven't been initialised yet. JRA.
  27  */
  28
  29 #undef strcasecmp
  30
  31 /**
  32  * @file
  33  *
  34  * @brief Samba wrapper/stub for iconv character set conversion.
  35  *
  36  * iconv is the XPG2 interface for converting between character
  37  * encodings.  This file provides a Samba wrapper around it, and also
  38  * a simple reimplementation that is used if the system does not
  39  * implement iconv.
  40  *
  41  * Samba only works with encodings that are supersets of ASCII: ascii
  42  * characters like whitespace can be tested for directly, multibyte
  43  * sequences start with a byte with the high bit set, and strings are
  44  * terminated by a nul byte.
  45  *
  46  * Note that the only function provided by iconv is conversion between
  47  * characters.  It doesn't directly support operations like
  48  * uppercasing or comparison.  We have to convert to UCS-2 and compare
  49  * there.
  50  *
  51  * @sa Samba Developers Guide
  52  **/
  53
  54 static_decl_charset;
  55
  56 static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
  57 static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
  58 static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
  59 static size_t  utf8_pull(void *,const char **, size_t *, char **, size_t *);
  60 static size_t  utf8_push(void *,const char **, size_t *, char **, size_t *);
  61 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
  62 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
  63 static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
  64 static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
  65
  66 static struct charset_functions builtin_functions[] = {
  67         /* windows is really neither UCS-2 not UTF-16 */
  68         {"UCS-2LE",  iconv_copy, iconv_copy},
  69         {"UTF-16LE",  iconv_copy, iconv_copy},
  70         {"UCS-2BE",  iconv_swab, iconv_swab},
  71         {"UTF-16BE",  iconv_swab, iconv_swab},
  72
  73         /* we include the UTF-8 alias to cope with differing locale settings */
  74         {"UTF8",   utf8_pull,  utf8_push},
  75         {"UTF-8",   utf8_pull,  utf8_push},
  76         {"ASCII", ascii_pull, ascii_push},
  77         {"646", ascii_pull, ascii_push},
  78         {"ISO-8859-1", ascii_pull, latin1_push},
  79         {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
  80         {NULL, NULL, NULL}
  81 };
  82
  83 static struct charset_functions *charsets = NULL;
  84
  85 static struct charset_functions *find_charset_functions(const char *name)
  86 {
  87         struct charset_functions *c = charsets;
  88
  89         while(c) {
  90                 if (strcasecmp(name, c->name) == 0) {
  91                         return c;
  92                 }
  93                 c = c->next;
  94         }
  95
  96         return NULL;
  97 }
  98
  99 NTSTATUS smb_register_charset(struct charset_functions *funcs)
 100 {
 101         if (!funcs) {
 102                 return NT_STATUS_INVALID_PARAMETER;
 103         }
 104
 105         DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
 106         /* Check whether we already have this charset... */
 107         if (find_charset_functions(funcs->name)) {
 108                 DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
 109                 return NT_STATUS_OBJECT_NAME_COLLISION;
 110         }
 111
 112         funcs->next = funcs->prev = NULL;
 113         DEBUG(5, ("Registered charset %s\n", funcs->name));
 114         DLIST_ADD(charsets, funcs);
 115         return NT_STATUS_OK;
 116 }
 117
 118 static void lazy_initialize_iconv(void)
 119 {
 120         static BOOL initialized;
 121         int i;
 122
 123         if (!initialized) {
 124                 initialized = True;
 125                 for(i = 0; builtin_functions[i].name; i++)
 126                         smb_register_charset(&builtin_functions[i]);
 127                 static_init_charset;
 128         }
 129 }
 130
 131 #ifdef HAVE_NATIVE_ICONV
 132 /* if there was an error then reset the internal state,
 133    this ensures that we don't have a shift state remaining for
 134    character sets like SJIS */
 135 static size_t sys_iconv(void *cd,
 136                         const char **inbuf, size_t *inbytesleft,
 137                         char **outbuf, size_t *outbytesleft)
 138 {
 139         size_t ret = iconv((iconv_t)cd,
 140                            (char **)inbuf, inbytesleft,
 141                            outbuf, outbytesleft);
 142         if (ret == (size_t)-1) {
 143                 int saved_errno = errno;
 144                 iconv(cd, NULL, NULL, NULL, NULL);
 145                 errno = saved_errno;
 146         }
 147         return ret;
 148 }
 149 #endif
 150
 151 /**
 152  * This is a simple portable iconv() implementaion.
 153  *
 154  * It only knows about a very small number of character sets - just
 155  * enough that Samba works on systems that don't have iconv.
 156  **/
 157 size_t smb_iconv(smb_iconv_t cd,
 158                  const char **inbuf, size_t *inbytesleft,
 159                  char **outbuf, size_t *outbytesleft)
 160 {
 161         char cvtbuf[2048];
 162         char *bufp = cvtbuf;
 163         size_t bufsize;
 164
 165         /* in many cases we can go direct */
 166         if (cd->direct) {
 167                 return cd->direct(cd->cd_direct,
 168                                   inbuf, inbytesleft, outbuf, outbytesleft);
 169         }
 170
 171
 172         /* otherwise we have to do it chunks at a time */
 173         while (*inbytesleft > 0) {
 174                 bufp = cvtbuf;
 175                 bufsize = sizeof(cvtbuf);
 176
 177                 if (cd->pull(cd->cd_pull,
 178                              inbuf, inbytesleft, &bufp, &bufsize) == -1
 179                     && errno != E2BIG) return -1;
 180
 181                 bufp = cvtbuf;
 182                 bufsize = sizeof(cvtbuf) - bufsize;
 183
 184                 if (cd->push(cd->cd_push,
 185                              (const char **)&bufp, &bufsize,
 186                              outbuf, outbytesleft) == -1) return -1;
 187         }
 188
 189         return 0;
 190 }
 191
 192
 193 static BOOL is_utf16(const char *name)
 194 {
 195         return strcasecmp(name, "UCS-2LE") == 0 ||
 196                 strcasecmp(name, "UTF-16LE") == 0;
 197 }
 198
 199 /*
 200   simple iconv_open() wrapper
 201  */
 202 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 203 {
 204         smb_iconv_t ret;
 205         struct charset_functions *from, *to;
 206
 207         lazy_initialize_iconv();
 208         from = charsets;
 209         to = charsets;
 210
 211         ret = SMB_MALLOC_P(struct _smb_iconv_t);
 212         if (!ret) {
 213                 errno = ENOMEM;
 214                 return (smb_iconv_t)-1;
 215         }
 216         memset(ret, 0, sizeof(struct _smb_iconv_t));
 217
 218         ret->from_name = SMB_STRDUP(fromcode);
 219         ret->to_name = SMB_STRDUP(tocode);
 220
 221         /* check for the simplest null conversion */
 222         if (strcasecmp(fromcode, tocode) == 0) {
 223                 ret->direct = iconv_copy;
 224                 return ret;
 225         }
 226
 227         /* check if we have a builtin function for this conversion */
 228         from = find_charset_functions(fromcode);
 229         if(from)ret->pull = from->pull;
 230
 231         to = find_charset_functions(tocode);
 232         if(to)ret->push = to->push;
 233
 234         /* check if we can use iconv for this conversion */
 235 #ifdef HAVE_NATIVE_ICONV
 236         if (!ret->pull) {
 237                 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
 238                 if (ret->cd_pull == (iconv_t)-1)
 239                         ret->cd_pull = iconv_open("UCS-2LE", fromcode);
 240                 if (ret->cd_pull != (iconv_t)-1)
 241                         ret->pull = sys_iconv;
 242         }
 243
 244         if (!ret->push) {
 245                 ret->cd_push = iconv_open(tocode, "UTF-16LE");
 246                 if (ret->cd_push == (iconv_t)-1)
 247                         ret->cd_push = iconv_open(tocode, "UCS-2LE");
 248                 if (ret->cd_push != (iconv_t)-1)
 249                         ret->push = sys_iconv;
 250         }
 251 #endif
 252
 253         /* check if there is a module available that can do this conversion */
 254         if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
 255                 if(!(from = find_charset_functions(fromcode)))
 256                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
 257                 else
 258                         ret->pull = from->pull;
 259         }
 260
 261         if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
 262                 if(!(to = find_charset_functions(tocode)))
 263                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
 264                 else
 265                         ret->push = to->push;
 266         }
 267
 268         if (!ret->push || !ret->pull) {
 269                 SAFE_FREE(ret->from_name);
 270                 SAFE_FREE(ret->to_name);
 271                 SAFE_FREE(ret);
 272                 errno = EINVAL;
 273                 return (smb_iconv_t)-1;
 274         }
 275
 276         /* check for conversion to/from ucs2 */
 277         if (is_utf16(fromcode) && to) {
 278                 ret->direct = to->push;
 279                 ret->push = ret->pull = NULL;
 280                 return ret;
 281         }
 282
 283         if (is_utf16(tocode) && from) {
 284                 ret->direct = from->pull;
 285                 ret->push = ret->pull = NULL;
 286                 return ret;
 287         }
 288
 289         /* Check if we can do the conversion direct */
 290 #ifdef HAVE_NATIVE_ICONV
 291         if (is_utf16(fromcode)) {
 292                 ret->direct = sys_iconv;
 293                 ret->cd_direct = ret->cd_push;
 294                 ret->cd_push = NULL;
 295                 return ret;
 296         }
 297         if (is_utf16(tocode)) {
 298                 ret->direct = sys_iconv;
 299                 ret->cd_direct = ret->cd_pull;
 300                 ret->cd_pull = NULL;
 301                 return ret;
 302         }
 303 #endif
 304
 305         return ret;
 306 }
 307
 308 /*
 309   simple iconv_close() wrapper
 310 */
 311 int smb_iconv_close (smb_iconv_t cd)
 312 {
 313 #ifdef HAVE_NATIVE_ICONV
 314         if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
 315         if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
 316         if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
 317 #endif
 318
 319         SAFE_FREE(cd->from_name);
 320         SAFE_FREE(cd->to_name);
 321
 322         memset(cd, 0, sizeof(*cd));
 323         SAFE_FREE(cd);
 324         return 0;
 325 }
 326
 327
 328 /**********************************************************************
 329  the following functions implement the builtin character sets in Samba
 330  and also the "test" character sets that are designed to test
 331  multi-byte character set support for english users
 332 ***********************************************************************/
 333
 334 static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
 335                          char **outbuf, size_t *outbytesleft)
 336 {
 337         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
 338                 (*outbuf)[0] = (*inbuf)[0];
 339                 (*outbuf)[1] = 0;
 340                 (*inbytesleft)  -= 1;
 341                 (*outbytesleft) -= 2;
 342                 (*inbuf)  += 1;
 343                 (*outbuf) += 2;
 344         }
 345
 346         if (*inbytesleft > 0) {
 347                 errno = E2BIG;
 348                 return -1;
 349         }
 350
 351         return 0;
 352 }
 353
 354 static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
 355                          char **outbuf, size_t *outbytesleft)
 356 {
 357         int ir_count=0;
 358
 359         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
 360                 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
 361                 if ((*inbuf)[1]) ir_count++;
 362                 (*inbytesleft)  -= 2;
 363                 (*outbytesleft) -= 1;
 364                 (*inbuf)  += 2;
 365                 (*outbuf) += 1;
 366         }
 367
 368         if (*inbytesleft == 1) {
 369                 errno = EINVAL;
 370                 return -1;
 371         }
 372
 373         if (*inbytesleft > 1) {
 374                 errno = E2BIG;
 375                 return -1;
 376         }
 377
 378         return ir_count;
 379 }
 380
 381 static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
 382                          char **outbuf, size_t *outbytesleft)
 383 {
 384         int ir_count=0;
 385
 386         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
 387                 (*outbuf)[0] = (*inbuf)[0];
 388                 if ((*inbuf)[1]) ir_count++;
 389                 (*inbytesleft)  -= 2;
 390                 (*outbytesleft) -= 1;
 391                 (*inbuf)  += 2;
 392                 (*outbuf) += 1;
 393         }
 394
 395         if (*inbytesleft == 1) {
 396                 errno = EINVAL;
 397                 return -1;
 398         }
 399
 400         if (*inbytesleft > 1) {
 401                 errno = E2BIG;
 402                 return -1;
 403         }
 404
 405         return ir_count;
 406 }
 407
 408 static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
 409                          char **outbuf, size_t *outbytesleft)
 410 {
 411         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
 412                 unsigned v;
 413
 414                 if ((*inbuf)[0] != '@') {
 415                         /* seven bit ascii case */
 416                         (*outbuf)[0] = (*inbuf)[0];
 417                         (*outbuf)[1] = 0;
 418                         (*inbytesleft)  -= 1;
 419                         (*outbytesleft) -= 2;
 420                         (*inbuf)  += 1;
 421                         (*outbuf) += 2;
 422                         continue;
 423                 }
 424                 /* it's a hex character */
 425                 if (*inbytesleft < 5) {
 426                         errno = EINVAL;
 427                         return -1;
 428                 }
 429
 430                 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
 431                         errno = EILSEQ;
 432                         return -1;
 433                 }
 434
 435                 (*outbuf)[0] = v&0xff;
 436                 (*outbuf)[1] = v>>8;
 437                 (*inbytesleft)  -= 5;
 438                 (*outbytesleft) -= 2;
 439                 (*inbuf)  += 5;
 440                 (*outbuf) += 2;
 441         }
 442
 443         if (*inbytesleft > 0) {
 444                 errno = E2BIG;
 445                 return -1;
 446         }
 447
 448         return 0;
 449 }
 450
 451 static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
 452                            char **outbuf, size_t *outbytesleft)
 453 {
 454         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
 455                 char buf[6];
 456
 457                 if ((*inbuf)[1] == 0 &&
 458                     ((*inbuf)[0] & 0x80) == 0 &&
 459                     (*inbuf)[0] != '@') {
 460                         (*outbuf)[0] = (*inbuf)[0];
 461                         (*inbytesleft)  -= 2;
 462                         (*outbytesleft) -= 1;
 463                         (*inbuf)  += 2;
 464                         (*outbuf) += 1;
 465                         continue;
 466                 }
 467                 if (*outbytesleft < 5) {
 468                         errno = E2BIG;
 469                         return -1;
 470                 }
 471                 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
 472                 memcpy(*outbuf, buf, 5);
 473                 (*inbytesleft)  -= 2;
 474                 (*outbytesleft) -= 5;
 475                 (*inbuf)  += 2;
 476                 (*outbuf) += 5;
 477         }
 478
 479         if (*inbytesleft == 1) {
 480                 errno = EINVAL;
 481                 return -1;
 482         }
 483
 484         if (*inbytesleft > 1) {
 485                 errno = E2BIG;
 486                 return -1;
 487         }
 488
 489         return 0;
 490 }
 491
 492 static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
 493                          char **outbuf, size_t *outbytesleft)
 494 {
 495         int n;
 496
 497         n = MIN(*inbytesleft, *outbytesleft);
 498
 499         swab(*inbuf, *outbuf, (n&~1));
 500         if (n&1) {
 501                 (*outbuf)[n-1] = 0;
 502         }
 503
 504         (*inbytesleft) -= n;
 505         (*outbytesleft) -= n;
 506         (*inbuf) += n;
 507         (*outbuf) += n;
 508
 509         if (*inbytesleft > 0) {
 510                 errno = E2BIG;
 511                 return -1;
 512         }
 513
 514         return 0;
 515 }
 516
 517 static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
 518                          char **outbuf, size_t *outbytesleft)
 519 {
 520         int n;
 521
 522         n = MIN(*inbytesleft, *outbytesleft);
 523
 524         memmove(*outbuf, *inbuf, n);
 525
 526         (*inbytesleft) -= n;
 527         (*outbytesleft) -= n;
 528         (*inbuf) += n;
 529         (*outbuf) += n;
 530
 531         if (*inbytesleft > 0) {
 532                 errno = E2BIG;
 533                 return -1;
 534         }
 535
 536         return 0;
 537 }
 538
 539 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
 540                          char **outbuf, size_t *outbytesleft)
 541 {
 542         size_t in_left=*inbytesleft, out_left=*outbytesleft;
 543         const uint8 *c = (const uint8 *)*inbuf;
 544         uint8 *uc = (uint8 *)*outbuf;
 545
 546         while (in_left >= 1 && out_left >= 2) {
 547                 unsigned int codepoint;
 548
 549                 if ((c[0] & 0x80) == 0) {
 550                         uc[0] = c[0];
 551                         uc[1] = 0;
 552                         c  += 1;
 553                         in_left  -= 1;
 554                         out_left -= 2;
 555                         uc += 2;
 556                         continue;
 557                 }
 558
 559                 if ((c[0] & 0xe0) == 0xc0) {
 560                         if (in_left < 2 ||
 561                             (c[1] & 0xc0) != 0x80) {
 562                                 errno = EILSEQ;
 563                                 goto error;
 564                         }
 565                         codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
 566                         if (codepoint < 0x80) {
 567                                 /* don't accept UTF-8 characters that are not minimally packed */
 568                                 errno = EILSEQ;
 569                                 goto error;
 570                         }
 571                         uc[1] = codepoint >> 8;
 572                         uc[0] = codepoint & 0xff;
 573                         c  += 2;
 574                         in_left  -= 2;
 575                         out_left -= 2;
 576                         uc += 2;
 577                         continue;
 578                 }
 579
 580                 if ((c[0] & 0xf0) == 0xe0) {
 581                         if (in_left < 3 ||
 582                             (c[1] & 0xc0) != 0x80 ||
 583                             (c[2] & 0xc0) != 0x80) {
 584                                 errno = EILSEQ;
 585                                 goto error;
 586                         }
 587                         codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
 588                         if (codepoint < 0x800) {
 589                                 /* don't accept UTF-8 characters that are not minimally packed */
 590                                 errno = EILSEQ;
 591                                 goto error;
 592                         }
 593                         uc[1] = codepoint >> 8;
 594                         uc[0] = codepoint & 0xff;
 595                         c  += 3;
 596                         in_left  -= 3;
 597                         out_left -= 2;
 598                         uc += 2;
 599                         continue;
 600                 }
 601
 602                 if ((c[0] & 0xf8) == 0xf0) {
 603                         if (in_left < 4 ||
 604                             (c[1] & 0xc0) != 0x80 ||
 605                             (c[2] & 0xc0) != 0x80 ||
 606                             (c[3] & 0xc0) != 0x80) {
 607                                 errno = EILSEQ;
 608                                 goto error;
 609                         }
 610                         codepoint =
 611                                 (c[3]&0x3f) |
 612                                 ((c[2]&0x3f)<<6) |
 613                                 ((c[1]&0x3f)<<12) |
 614                                 ((c[0]&0x7)<<18);
 615                         if (codepoint < 0x10000 || codepoint > 0x10ffff) {
 616                                 /* don't accept UTF-8 characters that are not minimally packed */
 617                                 errno = EILSEQ;
 618                                 goto error;
 619                         }
 620
 621                         codepoint -= 0x10000;
 622
 623                         if (out_left < 4) {
 624                                 errno = E2BIG;
 625                                 goto error;
 626                         }
 627
 628                         uc[0] = (codepoint>>10) & 0xFF;
 629                         uc[1] = (codepoint>>18) | 0xd8;
 630                         uc[2] = codepoint & 0xFF;
 631                         uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
 632                         c  += 4;
 633                         in_left  -= 4;
 634                         out_left -= 4;
 635                         uc += 4;
 636                         continue;
 637                 }
 638
 639                 /* we don't handle 5 byte sequences */
 640                 errno = EINVAL;
 641                 goto error;
 642         }
 643
 644         if (in_left > 0) {
 645                 errno = E2BIG;
 646                 goto error;
 647         }
 648
 649         *inbytesleft = in_left;
 650         *outbytesleft = out_left;
 651         *inbuf = (char *)c;
 652         *outbuf = (char *)uc;
 653         return 0;
 654
 655 error:
 656         *inbytesleft = in_left;
 657         *outbytesleft = out_left;
 658         *inbuf = (char *)c;
 659         *outbuf = (char *)uc;
 660         return -1;
 661 }
 662
 663 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
 664                         char **outbuf, size_t *outbytesleft)
 665 {
 666         size_t in_left=*inbytesleft, out_left=*outbytesleft;
 667         uint8 *c = (uint8 *)*outbuf;
 668         const uint8 *uc = (const uint8 *)*inbuf;
 669
 670         while (in_left >= 2 && out_left >= 1) {
 671                 unsigned int codepoint;
 672
 673                 if (uc[1] == 0 && !(uc[0] & 0x80)) {
 674                         /* simplest case */
 675                         c[0] = uc[0];
 676                         in_left  -= 2;
 677                         out_left -= 1;
 678                         uc += 2;
 679                         c  += 1;
 680                         continue;
 681                 }
 682
 683                 if ((uc[1]&0xf8) == 0) {
 684                         /* next simplest case */
 685                         if (out_left < 2) {
 686                                 errno = E2BIG;
 687                                 goto error;
 688                         }
 689                         c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
 690                         c[1] = 0x80 | (uc[0] & 0x3f);
 691                         in_left  -= 2;
 692                         out_left -= 2;
 693                         uc += 2;
 694                         c  += 2;
 695                         continue;
 696                 }
 697
 698                 if ((uc[1] & 0xfc) == 0xdc) {
 699                         /* its the second part of a 4 byte sequence. Illegal */
 700                         if (in_left < 4) {
 701                                 errno = EINVAL;
 702                         } else {
 703                                 errno = EILSEQ;
 704                         }
 705                         goto error;
 706                 }
 707
 708                 if ((uc[1] & 0xfc) != 0xd8) {
 709                         codepoint = uc[0] | (uc[1]<<8);
 710                         if (out_left < 3) {
 711                                 errno = E2BIG;
 712                                 goto error;
 713                         }
 714                         c[0] = 0xe0 | (codepoint >> 12);
 715                         c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
 716                         c[2] = 0x80 | (codepoint & 0x3f);
 717
 718                         in_left  -= 2;
 719                         out_left -= 3;
 720                         uc  += 2;
 721                         c   += 3;
 722                         continue;
 723                 }
 724
 725                 /* its the first part of a 4 byte sequence */
 726                 if (in_left < 4) {
 727                         errno = EINVAL;
 728                         goto error;
 729                 }
 730                 if ((uc[3] & 0xfc) != 0xdc) {
 731                         errno = EILSEQ;
 732                         goto error;
 733                 }
 734                 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
 735                                        (uc[0]<<10) | ((uc[1] & 0x3)<<18));
 736
 737                 if (out_left < 4) {
 738                         errno = E2BIG;
 739                         goto error;
 740                 }
 741                 c[0] = 0xf0 | (codepoint >> 18);
 742                 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
 743                 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
 744                 c[3] = 0x80 | (codepoint & 0x3f);
 745
 746                 in_left  -= 4;
 747                 out_left -= 4;
 748                 uc       += 4;
 749                 c        += 4;
 750         }
 751
 752         if (in_left == 1) {
 753                 errno = EINVAL;
 754                 goto error;
 755         }
 756
 757         if (in_left > 1) {
 758                 errno = E2BIG;
 759                 goto error;
 760         }
 761
 762         *inbytesleft = in_left;
 763         *outbytesleft = out_left;
 764         *inbuf  = (char *)uc;
 765         *outbuf = (char *)c;
 766
 767         return 0;
 768
 769 error:
 770         *inbytesleft = in_left;
 771         *outbytesleft = out_left;
 772         *inbuf  = (char *)uc;
 773         *outbuf = (char *)c;
 774         return -1;
 775 }
 776