Update.
[glibc.git] / iconv / skeleton.c
blob98abc33f6d837bd1782e5deb79b3667199549a8b
1 /* Skeleton for a conversion module.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* This file can be included to provide definitions of several things
22 many modules have in common. It can be customized using the following
23 macros:
25 DEFINE_INIT define the default initializer. This requires the
26 following symbol to be defined.
28 CHARSET_NAME string with official name of the coded character
29 set (in all-caps)
31 DEFINE_FINI define the default destructor function.
33 MIN_NEEDED_FROM minimal number of bytes needed for the from-charset.
34 MIN_NEEDED_TO likewise for the to-charset.
36 MAX_NEEDED_FROM maximal number of bytes needed for the from-charset.
37 This macro is optional, it defaults to MIN_NEEDED_FROM.
38 MAX_NEEDED_TO likewise for the to-charset.
40 DEFINE_DIRECTION_OBJECTS
41 two objects will be defined to be used when the
42 `gconv' function must only distinguish two
43 directions. This is implied by DEFINE_INIT.
44 If this macro is not defined the following
45 macro must be available.
47 FROM_DIRECTION this macro is supposed to return a value != 0
48 if we convert from the current character set,
49 otherwise it return 0.
51 EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it
52 defines some code which writes out a sequence
53 of characters which bring the current state into
54 the initial state.
56 FROM_LOOP name of the function implementing the conversion
57 from the current characters.
58 TO_LOOP likewise for the other direction
60 ONE_DIRECTION optional. If defined to 1, only one conversion
61 direction is defined instead of two. In this
62 case, FROM_DIRECTION should be defined to 1, and
63 FROM_LOOP and TO_LOOP should have the same value.
65 SAVE_RESET_STATE in case of an error we must reset the state for
66 the rerun so this macro must be defined for
67 stateful encodings. It takes an argument which
68 is nonzero when saving.
70 RESET_INPUT_BUFFER If the input character sets allow this the macro
71 can be defined to reset the input buffer pointers
72 to cover only those characters up to the error.
74 FUNCTION_NAME if not set the conversion function is named `gconv'.
76 PREPARE_LOOP optional code preparing the conversion loop. Can
77 contain variable definitions.
78 END_LOOP also optional, may be used to store information
80 EXTRA_LOOP_ARGS optional macro specifying extra arguments passed
81 to loop function.
84 #include <assert.h>
85 #include <gconv.h>
86 #include <string.h>
87 #define __need_size_t
88 #define __need_NULL
89 #include <stddef.h>
91 #ifndef STATIC_GCONV
92 # include <dlfcn.h>
93 #endif
95 #ifndef DL_CALL_FCT
96 # define DL_CALL_FCT(fct, args) fct args
97 #endif
99 /* The direction objects. */
100 #if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT
101 static int from_object;
102 static int to_object;
104 # ifndef FROM_DIRECTION
105 # define FROM_DIRECTION (step->__data == &from_object)
106 # endif
107 #else
108 # ifndef FROM_DIRECTION
109 # error "FROM_DIRECTION must be provided if direction objects are not used"
110 # endif
111 #endif
114 /* How many bytes are needed at most for the from-charset. */
115 #ifndef MAX_NEEDED_FROM
116 # define MAX_NEEDED_FROM MIN_NEEDED_FROM
117 #endif
119 /* Same for the to-charset. */
120 #ifndef MAX_NEEDED_TO
121 # define MAX_NEEDED_TO MIN_NEEDED_TO
122 #endif
125 /* Define macros which can access unaligned buffers. These macros are
126 supposed to be used only in code outside the inner loops. For the inner
127 loops we have other definitions which allow optimized access. */
128 #ifdef _STRING_ARCH_unaligned
129 /* We can handle unaligned memory access. */
130 # define get16u(addr) *((__const uint16_t *) (addr))
131 # define get32u(addr) *((__const uint32_t *) (addr))
133 /* We need no special support for writing values either. */
134 # define put16u(addr, val) *((uint16_t *) (addr)) = (val)
135 # define put32u(addr, val) *((uint32_t *) (addr)) = (val)
136 #else
137 /* Distinguish between big endian and little endian. */
138 # if __BYTE_ORDER == __LITTLE_ENDIAN
139 # define get16u(addr) \
140 (((__const unsigned char *) (addr))[1] << 8 \
141 | ((__const unsigned char *) (addr))[0])
142 # define get32u(addr) \
143 (((((__const unsigned char *) (addr))[3] << 8 \
144 | ((__const unsigned char *) (addr))[2]) << 8 \
145 | ((__const unsigned char *) (addr))[1]) << 8 \
146 | ((__const unsigned char *) (addr))[0])
148 # define put16u(addr, val) \
149 ({ uint16_t __val = (val); \
150 ((unsigned char *) (addr))[0] = __val; \
151 ((unsigned char *) (addr))[1] = __val >> 8; \
152 (void) 0; })
153 # define put32u(addr, val) \
154 ({ uint32_t __val = (val); \
155 ((unsigned char *) (addr))[0] = __val; \
156 __val >>= 8; \
157 ((unsigned char *) (addr))[1] = __val; \
158 __val >>= 8; \
159 ((unsigned char *) (addr))[2] = __val; \
160 __val >>= 8; \
161 ((unsigned char *) (addr))[3] = __val; \
162 (void) 0; })
163 # else
164 # define get16u(addr) \
165 (((__const unsigned char *) (addr))[0] << 8 \
166 | ((__const unsigned char *) (addr))[1])
167 # define get32u(addr) \
168 (((((__const unsigned char *) (addr))[0] << 8 \
169 | ((__const unsigned char *) (addr))[1]) << 8 \
170 | ((__const unsigned char *) (addr))[2]) << 8 \
171 | ((__const unsigned char *) (addr))[3])
173 # define put16u(addr, val) \
174 ({ uint16_t __val = (val); \
175 ((unsigned char *) (addr))[1] = __val; \
176 ((unsigned char *) (addr))[0] = __val >> 8; \
177 (void) 0; })
178 # define put32u(addr, val) \
179 ({ uint32_t __val = (val); \
180 ((unsigned char *) (addr))[3] = __val; \
181 __val >>= 8; \
182 ((unsigned char *) (addr))[2] = __val; \
183 __val >>= 8; \
184 ((unsigned char *) (addr))[1] = __val; \
185 __val >>= 8; \
186 ((unsigned char *) (addr))[0] = __val; \
187 (void) 0; })
188 # endif
189 #endif
192 /* For conversions from a fixed width character set to another fixed width
193 character set we can define RESET_INPUT_BUFFER in a very fast way. */
194 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
195 # if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
196 /* We have to use these `if's here since the compiler cannot know that
197 (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */
198 # define RESET_INPUT_BUFFER \
199 if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \
200 *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \
201 else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \
202 *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \
203 else \
204 *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM
205 # endif
206 #endif
209 /* The default init function. It simply matches the name and initializes
210 the step data to point to one of the objects above. */
211 #if DEFINE_INIT
212 # ifndef CHARSET_NAME
213 # error "CHARSET_NAME not defined"
214 # endif
216 extern int gconv_init (struct __gconv_step *step);
218 gconv_init (struct __gconv_step *step)
220 /* Determine which direction. */
221 if (strcmp (step->__from_name, CHARSET_NAME) == 0)
223 step->__data = &from_object;
225 step->__min_needed_from = MIN_NEEDED_FROM;
226 step->__max_needed_from = MAX_NEEDED_FROM;
227 step->__min_needed_to = MIN_NEEDED_TO;
228 step->__max_needed_to = MAX_NEEDED_TO;
230 else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0)
232 step->__data = &to_object;
234 step->__min_needed_from = MIN_NEEDED_TO;
235 step->__max_needed_from = MAX_NEEDED_TO;
236 step->__min_needed_to = MIN_NEEDED_FROM;
237 step->__max_needed_to = MAX_NEEDED_FROM;
239 else
240 return __GCONV_NOCONV;
242 #ifdef SAVE_RESET_STATE
243 step->__stateful = 1;
244 #else
245 step->__stateful = 0;
246 #endif
248 return __GCONV_OK;
250 #endif
253 /* The default destructor function does nothing in the moment and so
254 we don't define it at all. But we still provide the macro just in
255 case we need it some day. */
256 #if DEFINE_FINI
257 #endif
260 /* If no arguments have to passed to the loop function define the macro
261 as empty. */
262 #ifndef EXTRA_LOOP_ARGS
263 # define EXTRA_LOOP_ARGS
264 #endif
267 /* This is the actual conversion function. */
268 #ifndef FUNCTION_NAME
269 # define FUNCTION_NAME gconv
270 #endif
272 /* The macros are used to access the function to convert single characters. */
273 #define SINGLE(fct) SINGLE2 (fct)
274 #define SINGLE2(fct) fct##_single
277 extern int FUNCTION_NAME (struct __gconv_step *step,
278 struct __gconv_step_data *data,
279 const unsigned char **inptrp,
280 const unsigned char *inend,
281 unsigned char **outbufstart, size_t *irreversible,
282 int do_flush, int consume_incomplete);
284 FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
285 const unsigned char **inptrp, const unsigned char *inend,
286 unsigned char **outbufstart, size_t *irreversible, int do_flush,
287 int consume_incomplete)
289 struct __gconv_step *next_step = step + 1;
290 struct __gconv_step_data *next_data = data + 1;
291 __gconv_fct fct;
292 int status;
294 fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct;
296 /* If the function is called with no input this means we have to reset
297 to the initial state. The possibly partly converted input is
298 dropped. */
299 if (__builtin_expect (do_flush, 0))
301 status = __GCONV_OK;
303 /* This should never happen during error handling. */
304 assert (outbufstart == NULL);
306 #ifdef EMIT_SHIFT_TO_INIT
307 /* Emit the escape sequence to reset the state. */
308 EMIT_SHIFT_TO_INIT;
309 #else
310 /* Clear the state object. There might be bytes in there from
311 previous calls with CONSUME_INCOMPLETE == 1. */
312 memset (data->__statep, '\0', sizeof (*data->__statep));
313 #endif
314 /* Call the steps down the chain if there are any but only if we
315 successfully emitted the escape sequence. This should only
316 fail if the output buffer is full. If the input is invalid
317 it should be discarded since the user wants to start from a
318 clean slate. */
319 if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST))
320 status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
321 NULL, irreversible, 1,
322 consume_incomplete));
324 else
326 /* We preserve the initial values of the pointer variables. */
327 const unsigned char *inptr = *inptrp;
328 unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1)
329 ? data->__outbuf : *outbufstart);
330 unsigned char *outend = data->__outbufend;
331 unsigned char *outstart;
332 /* This variable is used to count the number of characters we
333 actually converted. */
334 size_t lirreversible = 0;
335 size_t *lirreversiblep = irreversible ? &lirreversible : NULL;
336 #if defined _STRING_ARCH_unaligned \
337 || MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \
338 || MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0
339 # define unaligned 0
340 #else
341 int unaligned;
342 # define GEN_unaligned(name) GEN_unaligned2 (name)
343 # define GEN_unaligned2(name) name##_unaligned
344 #endif
346 #ifdef PREPARE_LOOP
347 PREPARE_LOOP
348 #endif
350 #if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
351 /* If the function is used to implement the mb*towc*() or wc*tomb*()
352 functions we must test whether any bytes from the last call are
353 stored in the `state' object. */
354 if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
355 || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
356 || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
357 && consume_incomplete && (data->__statep->__count & 7) != 0)
359 /* Yep, we have some bytes left over. Process them now.
360 But this must not happen while we are called from an
361 error handler. */
362 assert (outbufstart == NULL);
364 # if MAX_NEEDED_FROM > 1
365 if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
366 status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf,
367 outend, lirreversiblep
368 EXTRA_LOOP_ARGS);
369 # endif
370 # if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION
371 else
372 # endif
373 # if MAX_NEEDED_TO > 1 && !ONE_DIRECTION
374 status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf,
375 outend, lirreversiblep EXTRA_LOOP_ARGS);
376 # endif
378 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
379 return status;
381 #endif
383 #if !defined _STRING_ARCH_unaligned \
384 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
385 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
386 /* The following assumes that encodings, which have a variable length
387 what might unalign a buffer even though it is a aligned in the
388 beginning, either don't have the minimal number of bytes as a divisor
389 of the maximum length or have a minimum length of 1. This is true
390 for all known and supported encodings. */
391 unaligned = ((FROM_DIRECTION
392 && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0
393 || ((data->__flags & __GCONV_IS_LAST)
394 && (uintptr_t) outbuf % MIN_NEEDED_TO != 0)))
395 || (!FROM_DIRECTION
396 && (((data->__flags & __GCONV_IS_LAST)
397 && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0)
398 || (uintptr_t) inptr % MIN_NEEDED_TO != 0)));
399 #endif
401 while (1)
403 struct __gconv_trans_data *trans;
405 /* Remember the start value for this round. */
406 inptr = *inptrp;
407 /* The outbuf buffer is empty. */
408 outstart = outbuf;
410 #ifdef SAVE_RESET_STATE
411 SAVE_RESET_STATE (1);
412 #endif
414 if (__builtin_expect (!unaligned, 1))
416 if (FROM_DIRECTION)
417 /* Run the conversion loop. */
418 status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend,
419 lirreversiblep EXTRA_LOOP_ARGS);
420 else
421 /* Run the conversion loop. */
422 status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend,
423 lirreversiblep EXTRA_LOOP_ARGS);
425 #if !defined _STRING_ARCH_unaligned \
426 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
427 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
428 else
430 if (FROM_DIRECTION)
431 /* Run the conversion loop. */
432 status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend,
433 &outbuf, outend,
434 lirreversiblep
435 EXTRA_LOOP_ARGS);
436 else
437 /* Run the conversion loop. */
438 status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend,
439 &outbuf, outend,
440 lirreversiblep
441 EXTRA_LOOP_ARGS);
443 #endif
445 /* If we were called as part of an error handling module we
446 don't do anything else here. */
447 if (__builtin_expect (outbufstart != NULL, 0))
449 *outbufstart = outbuf;
450 return status;
453 /* Give the transliteration module the chance to store the
454 original text and the result in case it needs a context. */
455 for (trans = data->__trans; trans != NULL; trans = trans->__next)
456 if (trans->__trans_context_fct != NULL)
457 DL_CALL_FCT (trans->__trans_context_fct,
458 (trans->__data, inptr, *inptrp, outstart, outbuf));
460 /* We finished one use of the loops. */
461 ++data->__invocation_counter;
463 /* If this is the last step leave the loop, there is nothing
464 we can do. */
465 if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0))
467 /* Store information about how many bytes are available. */
468 data->__outbuf = outbuf;
470 /* Remember how many non-identical characters we
471 converted in a irreversible way. */
472 *irreversible += lirreversible;
474 break;
477 /* Write out all output which was produced. */
478 if (__builtin_expect (outbuf > outstart, 1))
480 const unsigned char *outerr = data->__outbuf;
481 int result;
483 result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
484 outbuf, NULL, irreversible, 0,
485 consume_incomplete));
487 if (result != __GCONV_EMPTY_INPUT)
489 if (__builtin_expect (outerr != outbuf, 0))
491 #ifdef RESET_INPUT_BUFFER
492 RESET_INPUT_BUFFER;
493 #else
494 /* We have a problem with the in on of the functions
495 below. Undo the conversion upto the error point. */
496 size_t nstatus;
498 /* Reload the pointers. */
499 *inptrp = inptr;
500 outbuf = outstart;
502 /* Reset the state. */
503 # ifdef SAVE_RESET_STATE
504 SAVE_RESET_STATE (0);
505 # endif
507 if (__builtin_expect (!unaligned, 1))
509 if (FROM_DIRECTION)
510 /* Run the conversion loop. */
511 nstatus = FROM_LOOP (step, data, inptrp, inend,
512 &outbuf, outerr,
513 lirreversiblep
514 EXTRA_LOOP_ARGS);
515 else
516 /* Run the conversion loop. */
517 nstatus = TO_LOOP (step, data, inptrp, inend,
518 &outbuf, outerr,
519 lirreversiblep
520 EXTRA_LOOP_ARGS);
522 # if !defined _STRING_ARCH_unaligned \
523 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
524 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
525 else
527 if (FROM_DIRECTION)
528 /* Run the conversion loop. */
529 nstatus = GEN_unaligned (FROM_LOOP) (step, data,
530 inptrp, inend,
531 &outbuf,
532 outerr,
533 lirreversiblep
534 EXTRA_LOOP_ARGS);
535 else
536 /* Run the conversion loop. */
537 nstatus = GEN_unaligned (TO_LOOP) (step, data,
538 inptrp, inend,
539 &outbuf, outerr,
540 lirreversiblep
541 EXTRA_LOOP_ARGS);
543 # endif
545 /* We must run out of output buffer space in this
546 rerun. */
547 assert (outbuf == outerr);
548 assert (nstatus == __GCONV_FULL_OUTPUT);
550 /* If we haven't consumed a single byte decrement
551 the invocation counter. */
552 if (__builtin_expect (outbuf == outstart, 0))
553 --data->__invocation_counter;
554 #endif /* reset input buffer */
557 /* Change the status. */
558 status = result;
560 else
561 /* All the output is consumed, we can make another run
562 if everything was ok. */
563 if (status == __GCONV_FULL_OUTPUT)
565 status = __GCONV_OK;
566 outbuf = data->__outbuf;
570 if (status != __GCONV_OK)
571 break;
573 /* Reset the output buffer pointer for the next round. */
574 outbuf = data->__outbuf;
577 #ifdef END_LOOP
578 END_LOOP
579 #endif
581 /* If we are supposed to consume all character store now all of the
582 remaining characters in the `state' object. */
583 #if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
584 if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
585 || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
586 || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
587 && __builtin_expect (consume_incomplete, 0)
588 && status == __GCONV_INCOMPLETE_INPUT)
590 # ifdef STORE_REST
591 mbstate_t *state = data->__statep;
593 STORE_REST
594 # else
595 size_t cnt;
597 /* Make sure the remaining bytes fit into the state objects
598 buffer. */
599 assert (inend - *inptrp < 4);
601 for (cnt = 0; *inptrp < inend; ++cnt)
602 data->__statep->__value.__wchb[cnt] = *(*inptrp)++;
603 data->__statep->__count &= ~7;
604 data->__statep->__count |= cnt;
605 # endif
607 #endif
610 return status;
613 #undef DEFINE_INIT
614 #undef CHARSET_NAME
615 #undef DEFINE_FINI
616 #undef MIN_NEEDED_FROM
617 #undef MIN_NEEDED_TO
618 #undef MAX_NEEDED_FROM
619 #undef MAX_NEEDED_TO
620 #undef DEFINE_DIRECTION_OBJECTS
621 #undef FROM_DIRECTION
622 #undef EMIT_SHIFT_TO_INIT
623 #undef FROM_LOOP
624 #undef TO_LOOP
625 #undef SAVE_RESET_STATE
626 #undef RESET_INPUT_BUFFER
627 #undef FUNCTION_NAME
628 #undef PREPARE_LOOP
629 #undef END_LOOP
630 #undef ONE_DIRECTION
631 #undef STORE_REST