iconv/loop.c

   1 /* Conversion loop frame work.
   2    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 /* This file provides a frame for the reader loop in all conversion modules.
  22    The actual code must (of course) be provided in the actual module source
  23    code but certain actions can be written down generically, with some
  24    customization options which are these:
  25
  26      MIN_NEEDED_INPUT   minimal number of input bytes needed for the next
  27                         conversion.
  28      MIN_NEEDED_OUTPUT  minimal number of bytes produced by the next round
  29                         of conversion.
  30
  31      MAX_NEEDED_INPUT   you guess it, this is the maximal number of input
  32                         bytes needed.  It defaults to MIN_NEEDED_INPUT
  33      MAX_NEEDED_OUTPUT  likewise for output bytes.
  34
  35      LOOPFCT            name of the function created.  If not specified
  36                         the name is `loop' but this prevents the use
  37                         of multiple functions in the same file.
  38
  39      BODY               this is supposed to expand to the body of the loop.
  40                         The user must provide this.
  41
  42      EXTRA_LOOP_DECLS   extra arguments passed from converion loop call.
  43
  44      INIT_PARAMS        code to define and initialize variables from params.
  45      UPDATE_PARAMS      code to store result in params.
  46 */
  47
  48 #include <assert.h>
  49 #include <endian.h>
  50 #include <gconv.h>
  51 #include <stdint.h>
  52 #include <string.h>
  53 #include <wchar.h>
  54 #include <sys/param.h>          /* For MIN.  */
  55 #define __need_size_t
  56 #include <stddef.h>
  57
  58
  59 /* We have to provide support for machines which are not able to handled
  60    unaligned memory accesses.  Some of the character encodings have
  61    representations with a fixed width of 2 or 4 bytes.  But if we cannot
  62    access unaligned memory we still have to read byte-wise.  */
  63 #undef FCTNAME2
  64 #if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
  65 /* We can handle unaligned memory access.  */
  66 # define get16(addr) *((__const uint16_t *) (addr))
  67 # define get32(addr) *((__const uint32_t *) (addr))
  68
  69 /* We need no special support for writing values either.  */
  70 # define put16(addr, val) *((uint16_t *) (addr)) = (val)
  71 # define put32(addr, val) *((uint32_t *) (addr)) = (val)
  72
  73 # define FCTNAME2(name) name
  74 #else
  75 /* Distinguish between big endian and little endian.  */
  76 # if __BYTE_ORDER == __LITTLE_ENDIAN
  77 #  define get16(addr) \
  78      (((__const unsigned char *) (addr))[1] << 8                              \
  79       | ((__const unsigned char *) (addr))[0])
  80 #  define get32(addr) \
  81      (((((__const unsigned char *) (addr))[3] << 8                            \
  82         | ((__const unsigned char *) (addr))[2]) << 8                         \
  83        | ((__const unsigned char *) (addr))[1]) << 8                          \
  84       | ((__const unsigned char *) (addr))[0])
  85
  86 #  define put16(addr, val) \
  87      ({ uint16_t __val = (val);                                               \
  88         ((unsigned char *) (addr))[0] = __val;                                \
  89         ((unsigned char *) (addr))[1] = __val >> 8;                           \
  90         (void) 0; })
  91 #  define put32(addr, val) \
  92      ({ uint32_t __val = (val);                                               \
  93         ((unsigned char *) (addr))[0] = __val;                                \
  94         __val >>= 8;                                                          \
  95         ((unsigned char *) (addr))[1] = __val;                                \
  96         __val >>= 8;                                                          \
  97         ((unsigned char *) (addr))[2] = __val;                                \
  98         __val >>= 8;                                                          \
  99         ((unsigned char *) (addr))[3] = __val;                                \
 100         (void) 0; })
 101 # else
 102 #  define get16(addr) \
 103      (((__const unsigned char *) (addr))[0] << 8                              \
 104       | ((__const unsigned char *) (addr))[1])
 105 #  define get32(addr) \
 106      (((((__const unsigned char *) (addr))[0] << 8                            \
 107         | ((__const unsigned char *) (addr))[1]) << 8                         \
 108        | ((__const unsigned char *) (addr))[2]) << 8                          \
 109       | ((__const unsigned char *) (addr))[3])
 110
 111 #  define put16(addr, val) \
 112      ({ uint16_t __val = (val);                                               \
 113         ((unsigned char *) (addr))[1] = __val;                                \
 114         ((unsigned char *) (addr))[0] = __val >> 8;                           \
 115         (void) 0; })
 116 #  define put32(addr, val) \
 117      ({ uint32_t __val = (val);                                               \
 118         ((unsigned char *) (addr))[3] = __val;                                \
 119         __val >>= 8;                                                          \
 120         ((unsigned char *) (addr))[2] = __val;                                \
 121         __val >>= 8;                                                          \
 122         ((unsigned char *) (addr))[1] = __val;                                \
 123         __val >>= 8;                                                          \
 124         ((unsigned char *) (addr))[0] = __val;                                \
 125         (void) 0; })
 126 # endif
 127
 128 # define FCTNAME2(name) name##_unaligned
 129 #endif
 130 #define FCTNAME(name) FCTNAME2(name)
 131
 132
 133 /* We need at least one byte for the next round.  */
 134 #ifndef MIN_NEEDED_INPUT
 135 # error "MIN_NEEDED_INPUT definition missing"
 136 #endif
 137
 138 /* Let's see how many bytes we produce.  */
 139 #ifndef MAX_NEEDED_INPUT
 140 # define MAX_NEEDED_INPUT       MIN_NEEDED_INPUT
 141 #endif
 142
 143 /* We produce at least one byte in the next round.  */
 144 #ifndef MIN_NEEDED_OUTPUT
 145 # error "MIN_NEEDED_OUTPUT definition missing"
 146 #endif
 147
 148 /* Let's see how many bytes we produce.  */
 149 #ifndef MAX_NEEDED_OUTPUT
 150 # define MAX_NEEDED_OUTPUT      MIN_NEEDED_OUTPUT
 151 #endif
 152
 153 /* Default name for the function.  */
 154 #ifndef LOOPFCT
 155 # define LOOPFCT                loop
 156 #endif
 157
 158 /* Make sure we have a loop body.  */
 159 #ifndef BODY
 160 # error "Definition of BODY missing for function" LOOPFCT
 161 #endif
 162
 163
 164 /* If no arguments have to passed to the loop function define the macro
 165    as empty.  */
 166 #ifndef EXTRA_LOOP_DECLS
 167 # define EXTRA_LOOP_DECLS
 168 #endif
 169
 170
 171 /* To make it easier for the writers of the modules, we define a macro
 172    to test whether we have to ignore errors.  */
 173 #define ignore_errors_p() \
 174   (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
 175
 176
 177 /* Error handling with transliteration/transcription function use and
 178    ignoring of errors.  Note that we cannot use the do while (0) trick
 179    since `break' and `continue' must reach certain points.  */
 180 #define STANDARD_ERR_HANDLER(Incr) \
 181   {                                                                           \
 182     struct __gconv_trans_data *trans;                                         \
 183                                                                               \
 184     result = __GCONV_ILLEGAL_INPUT;                                           \
 185                                                                               \
 186     if (irreversible == NULL)                                                 \
 187       /* This means we are in call from __gconv_transliterate.  In this       \
 188          case we are not doing any error recovery outself.  */                \
 189       break;                                                                  \
 190                                                                               \
 191     /* First try the transliteration methods.  */                             \
 192     for (trans = step_data->__trans; trans != NULL; trans = trans->__next)    \
 193       {                                                                       \
 194         result = DL_CALL_FCT (trans->__trans_fct,                             \
 195                               (step, step_data, trans->__data, *inptrp,       \
 196                                &inptr, inend, &outptr, irreversible));        \
 197         if (result != __GCONV_ILLEGAL_INPUT)                                  \
 198           break;                                                              \
 199       }                                                                       \
 200     /* If any of them recognized the input continue with the loop.  */        \
 201     if (result != __GCONV_ILLEGAL_INPUT)                                      \
 202       continue;                                                               \
 203                                                                               \
 204     /* Next see whether we have to ignore the error.  If not, stop.  */       \
 205     if (! ignore_errors_p ())                                                 \
 206       break;                                                                  \
 207                                                                               \
 208     /* When we come here it means we ignore the character.  */                \
 209     ++*irreversible;                                                          \
 210     inptr += Incr;                                                            \
 211     continue;                                                                 \
 212   }
 213
 214
 215 /* Handling of Unicode 3.1 TAG characters.  Unicode recommends
 216    "If language codes are not relevant to the particular processing
 217     operation, then they should be ignored."
 218    This macro is usually called right before STANDARD_ERR_HANDLER (Incr).  */
 219 #define UNICODE_TAG_HANDLER(Character, Incr) \
 220   {                                                                           \
 221     /* TAG characters are those in the range U+E0000..U+E007F.  */            \
 222     if (((Character) >> 7) == (0xe0000 >> 7))                                 \
 223       {                                                                       \
 224         inptr += Incr;                                                        \
 225         continue;                                                             \
 226       }                                                                       \
 227   }
 228
 229
 230 /* The function returns the status, as defined in gconv.h.  */
 231 static inline int
 232 FCTNAME (LOOPFCT) (struct __gconv_step *step,
 233                    struct __gconv_step_data *step_data,
 234                    const unsigned char **inptrp, const unsigned char *inend,
 235                    unsigned char **outptrp, const unsigned char *outend,
 236                    size_t *irreversible EXTRA_LOOP_DECLS)
 237 {
 238 #ifdef LOOP_NEED_STATE
 239   mbstate_t *state = step_data->__statep;
 240 #endif
 241 #ifdef LOOP_NEED_FLAGS
 242   int flags = step_data->__flags;
 243 #endif
 244 #ifdef LOOP_NEED_DATA
 245   void *data = step->__data;
 246 #endif
 247   int result = __GCONV_EMPTY_INPUT;
 248   const unsigned char *inptr = *inptrp;
 249   unsigned char *outptr = *outptrp;
 250
 251 #ifdef INIT_PARAMS
 252   INIT_PARAMS;
 253 #endif
 254
 255   while (inptr != inend)
 256     {
 257       /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
 258          compiler generating better code.  They will be optimized away
 259          since MIN_NEEDED_OUTPUT is always a constant.  */
 260       if ((MIN_NEEDED_OUTPUT != 1
 261            && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
 262           || (MIN_NEEDED_OUTPUT == 1
 263               && __builtin_expect (outptr >= outend, 0)))
 264         {
 265           /* Overflow in the output buffer.  */
 266           result = __GCONV_FULL_OUTPUT;
 267           break;
 268         }
 269       if (MIN_NEEDED_INPUT > 1
 270           && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
 271         {
 272           /* We don't have enough input for another complete input
 273              character.  */
 274           result = __GCONV_INCOMPLETE_INPUT;
 275           break;
 276         }
 277
 278       /* Here comes the body the user provides.  It can stop with
 279          RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
 280          input characters vary in size), GCONV_ILLEGAL_INPUT, or
 281          GCONV_FULL_OUTPUT (if the output characters vary in size).  */
 282       BODY
 283     }
 284
 285   /* Update the pointers pointed to by the parameters.  */
 286   *inptrp = inptr;
 287   *outptrp = outptr;
 288 #ifdef UPDATE_PARAMS
 289   UPDATE_PARAMS;
 290 #endif
 291
 292   return result;
 293 }
 294
 295
 296 /* Include the file a second time to define the function to handle
 297    unaligned access.  */
 298 #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
 299     && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
 300     && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
 301 # undef get16
 302 # undef get32
 303 # undef put16
 304 # undef put32
 305 # undef unaligned
 306
 307 # define DEFINE_UNALIGNED
 308 # include "loop.c"
 309 # undef DEFINE_UNALIGNED
 310 #endif
 311
 312
 313 #if MAX_NEEDED_INPUT > 1
 314 # define SINGLE(fct) SINGLE2 (fct)
 315 # define SINGLE2(fct) fct##_single
 316 static inline int
 317 SINGLE(LOOPFCT) (struct __gconv_step *step,
 318                  struct __gconv_step_data *step_data,
 319                  const unsigned char **inptrp, const unsigned char *inend,
 320                  unsigned char **outptrp, unsigned char *outend,
 321                  size_t *irreversible EXTRA_LOOP_DECLS)
 322 {
 323   mbstate_t *state = step_data->__statep;
 324 #ifdef LOOP_NEED_FLAGS
 325   int flags = step_data->__flags;
 326 #endif
 327 #ifdef LOOP_NEED_DATA
 328   void *data = step->__data;
 329 #endif
 330   int result = __GCONV_OK;
 331   unsigned char bytebuf[MAX_NEEDED_INPUT];
 332   const unsigned char *inptr = *inptrp;
 333   unsigned char *outptr = *outptrp;
 334   size_t inlen;
 335
 336 #ifdef INIT_PARAMS
 337   INIT_PARAMS;
 338 #endif
 339
 340 #ifdef UNPACK_BYTES
 341   UNPACK_BYTES
 342 #else
 343   /* Add the bytes from the state to the input buffer.  */
 344   for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
 345     bytebuf[inlen] = state->__value.__wchb[inlen];
 346 #endif
 347
 348   /* Are there enough bytes in the input buffer?  */
 349   if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
 350     {
 351       *inptrp = inend;
 352 #ifdef STORE_REST
 353       inptr = bytebuf;
 354       inptrp = &inptr;
 355       inend = &bytebuf[inlen];
 356
 357       STORE_REST
 358 #else
 359       /* We don't have enough input for another complete input
 360          character.  */
 361       while (inptr < inend)
 362         state->__value.__wchb[inlen++] = *inptr++;
 363 #endif
 364
 365       return __GCONV_INCOMPLETE_INPUT;
 366     }
 367
 368   /* Enough space in output buffer.  */
 369   if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
 370       || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
 371     /* Overflow in the output buffer.  */
 372     return __GCONV_FULL_OUTPUT;
 373
 374   /*  Now add characters from the normal input buffer.  */
 375   do
 376     bytebuf[inlen++] = *inptr++;
 377   while (inlen < MAX_NEEDED_INPUT && inptr < inend);
 378
 379   inptr = bytebuf;
 380   inend = &bytebuf[inlen];
 381
 382   do
 383     {
 384       BODY
 385     }
 386   while (0);
 387
 388   /* Now we either have produced an output character and consumed all the
 389      bytes from the state and at least one more, or the character is still
 390      incomplete, or we have some other error (like illegal input character,
 391      no space in output buffer).  */
 392   if (__builtin_expect (inptr != bytebuf, 1))
 393     {
 394       /* We found a new character.  */
 395       assert (inptr - bytebuf > (state->__count & 7));
 396
 397       *inptrp += inptr - bytebuf - (state->__count & 7);
 398       *outptrp = outptr;
 399
 400       result = __GCONV_OK;
 401
 402       /* Clear the state buffer.  */
 403       state->__count &= ~7;
 404     }
 405   else if (result == __GCONV_INCOMPLETE_INPUT)
 406     {
 407       /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
 408          available.  */
 409       assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
 410
 411       *inptrp += inend - bytebuf - (state->__count & 7);
 412 #ifdef STORE_REST
 413       inptrp = &inptr;
 414
 415       STORE_REST
 416 #else
 417       /* We don't have enough input for another complete input
 418          character.  */
 419       while (inptr < inend)
 420         state->__value.__wchb[inlen++] = *inptr++;
 421 #endif
 422     }
 423
 424   return result;
 425 }
 426 # undef SINGLE
 427 # undef SINGLE2
 428 #endif
 429
 430
 431 /* We remove the macro definitions so that we can include this file again
 432    for the definition of another function.  */
 433 #undef MIN_NEEDED_INPUT
 434 #undef MAX_NEEDED_INPUT
 435 #undef MIN_NEEDED_OUTPUT
 436 #undef MAX_NEEDED_OUTPUT
 437 #undef LOOPFCT
 438 #undef BODY
 439 #undef LOOPFCT
 440 #undef EXTRA_LOOP_DECLS
 441 #undef INIT_PARAMS
 442 #undef UPDATE_PARAMS
 443 #undef UNPACK_BYTES
 444 #undef LOOP_NEED_STATE
 445 #undef LOOP_NEED_FLAGS
 446 #undef LOOP_NEED_DATA
 447 #undef get16
 448 #undef get32
 449 #undef put16
 450 #undef put32
 451 #undef unaligned