S/390: Disable two UTF conversion instructions
[glibc.git] / sysdeps / s390 / s390-64 / utf16-utf32-z9.c
blob14daf2118fe54b8c26c6daa358a26ac5cf9a031e
1 /* Conversion between UTF-16 and UTF-32 BE/internal.
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
5 Copyright (C) 1997-2009 Free Software Foundation, Inc.
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
23 You should have received a copy of the GNU Lesser General Public
24 License along with the GNU C Library; if not, write to the Free
25 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307 USA. */
28 #include <dlfcn.h>
29 #include <stdint.h>
30 #include <unistd.h>
31 #include <dl-procinfo.h>
32 #include <gconv.h>
34 /* UTF-32 big endian byte order mark. */
35 #define BOM_UTF32 0x0000feffu
37 /* UTF-16 big endian byte order mark. */
38 #define BOM_UTF16 0xfeff
40 #define DEFINE_INIT 0
41 #define DEFINE_FINI 0
42 #define MIN_NEEDED_FROM 2
43 #define MAX_NEEDED_FROM 4
44 #define MIN_NEEDED_TO 4
45 #define FROM_LOOP from_utf16_loop
46 #define TO_LOOP to_utf16_loop
47 #define FROM_DIRECTION (dir == from_utf16)
48 #define PREPARE_LOOP \
49 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
50 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
52 if (emit_bom && !data->__internal_use \
53 && data->__invocation_counter == 0) \
54 { \
55 if (dir == to_utf16) \
56 { \
57 /* Emit the UTF-16 Byte Order Mark. */ \
58 if (__builtin_expect (outbuf + 2 > outend, 0)) \
59 return __GCONV_FULL_OUTPUT; \
61 put16u (outbuf, BOM_UTF16); \
62 outbuf += 2; \
63 } \
64 else \
65 { \
66 /* Emit the UTF-32 Byte Order Mark. */ \
67 if (__builtin_expect (outbuf + 4 > outend, 0)) \
68 return __GCONV_FULL_OUTPUT; \
70 put32u (outbuf, BOM_UTF32); \
71 outbuf += 4; \
72 } \
75 /* Direction of the transformation. */
76 enum direction
78 illegal_dir,
79 to_utf16,
80 from_utf16
83 struct utf16_data
85 enum direction dir;
86 int emit_bom;
90 extern int gconv_init (struct __gconv_step *step);
91 int
92 gconv_init (struct __gconv_step *step)
94 /* Determine which direction. */
95 struct utf16_data *new_data;
96 enum direction dir = illegal_dir;
97 int emit_bom;
98 int result;
100 emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
101 || __strcasecmp (step->__to_name, "UTF-16//") == 0);
103 if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
104 && (__strcasecmp (step->__to_name, "UTF-32//") == 0
105 || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
106 || __strcasecmp (step->__to_name, "INTERNAL") == 0))
108 dir = from_utf16;
110 else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
111 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
112 && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
113 || __strcasecmp (step->__from_name, "INTERNAL") == 0))
115 dir = to_utf16;
118 result = __GCONV_NOCONV;
119 if (dir != illegal_dir)
121 new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
123 result = __GCONV_NOMEM;
124 if (new_data != NULL)
126 new_data->dir = dir;
127 new_data->emit_bom = emit_bom;
128 step->__data = new_data;
130 if (dir == from_utf16)
132 step->__min_needed_from = MIN_NEEDED_FROM;
133 step->__max_needed_from = MIN_NEEDED_FROM;
134 step->__min_needed_to = MIN_NEEDED_TO;
135 step->__max_needed_to = MIN_NEEDED_TO;
137 else
139 step->__min_needed_from = MIN_NEEDED_TO;
140 step->__max_needed_from = MIN_NEEDED_TO;
141 step->__min_needed_to = MIN_NEEDED_FROM;
142 step->__max_needed_to = MIN_NEEDED_FROM;
145 step->__stateful = 0;
147 result = __GCONV_OK;
151 return result;
155 extern void gconv_end (struct __gconv_step *data);
156 void
157 gconv_end (struct __gconv_step *data)
159 free (data->__data);
162 /* The macro for the hardware loop. This is used for both
163 directions. */
164 #define HARDWARE_CONVERT(INSTRUCTION) \
166 register const unsigned char* pInput asm ("8") = inptr; \
167 register unsigned long long inlen asm ("9") = inend - inptr; \
168 register unsigned char* pOutput asm ("10") = outptr; \
169 register unsigned long long outlen asm("11") = outend - outptr; \
170 uint64_t cc = 0; \
172 asm volatile ("0: " INSTRUCTION " \n\t" \
173 " jo 0b \n\t" \
174 " ipm %2 \n" \
175 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
176 "+d" (outlen), "+d" (inlen) \
178 : "cc", "memory"); \
180 inptr = pInput; \
181 outptr = pOutput; \
182 cc >>= 28; \
184 if (cc == 1) \
186 result = __GCONV_FULL_OUTPUT; \
187 break; \
189 else if (cc == 2) \
191 result = __GCONV_ILLEGAL_INPUT; \
192 break; \
196 /* Conversion function from UTF-16 to UTF-32 internal/BE. */
198 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
199 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
200 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
201 #define LOOPFCT FROM_LOOP
202 /* The software routine is copied from utf-16.c (minus bytes
203 swapping). */
204 #define BODY \
206 /* The hardware instruction currently fails to report an error for \
207 isolated low surrogates so we have to disable the instruction \
208 until this gets resolved. */ \
209 if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
211 HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
212 if (inptr != inend) \
214 /* Check if the third byte is \
215 a valid start of a UTF-16 surrogate. */ \
216 if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
217 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
219 result = __GCONV_INCOMPLETE_INPUT; \
220 break; \
222 continue; \
225 uint16_t u1 = get16 (inptr); \
227 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
229 /* No surrogate. */ \
230 put32 (outptr, u1); \
231 inptr += 2; \
233 else \
235 /* An isolated low-surrogate was found. This has to be \
236 considered ill-formed. */ \
237 if (__builtin_expect (u1 >= 0xdc00, 0)) \
239 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
241 /* It's a surrogate character. At least the first word says \
242 it is. */ \
243 if (__builtin_expect (inptr + 4 > inend, 0)) \
245 /* We don't have enough input for another complete input \
246 character. */ \
247 result = __GCONV_INCOMPLETE_INPUT; \
248 break; \
251 inptr += 2; \
252 uint16_t u2 = get16 (inptr); \
253 if (__builtin_expect (u2 < 0xdc00, 0) \
254 || __builtin_expect (u2 > 0xdfff, 0)) \
256 /* This is no valid second word for a surrogate. */ \
257 inptr -= 2; \
258 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
261 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
262 inptr += 2; \
264 outptr += 4; \
266 #define LOOP_NEED_FLAGS
267 #include <iconv/loop.c>
269 /* Conversion from UTF-32 internal/BE to UTF-16. */
271 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
272 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
273 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
274 #define LOOPFCT TO_LOOP
275 /* The software routine is copied from utf-16.c (minus bytes
276 swapping). */
277 #define BODY \
279 if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
281 HARDWARE_CONVERT ("cu42 %0, %1"); \
283 if (inptr != inend) \
285 result = __GCONV_INCOMPLETE_INPUT; \
286 break; \
288 continue; \
291 uint32_t c = get32 (inptr); \
293 if (__builtin_expect (c <= 0xd7ff, 1) \
294 || (c >=0xdc00 && c <= 0xffff)) \
296 /* Two UTF-16 chars. */ \
297 put16 (outptr, c); \
299 else if (__builtin_expect (c >= 0x10000, 1) \
300 && __builtin_expect (c <= 0x10ffff, 1)) \
302 /* Four UTF-16 chars. */ \
303 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
304 uint16_t out; \
306 /* Generate a surrogate character. */ \
307 if (__builtin_expect (outptr + 4 > outend, 0)) \
309 /* Overflow in the output buffer. */ \
310 result = __GCONV_FULL_OUTPUT; \
311 break; \
314 out = 0xd800; \
315 out |= (zabcd & 0xff) << 6; \
316 out |= (c >> 10) & 0x3f; \
317 put16 (outptr, out); \
318 outptr += 2; \
320 out = 0xdc00; \
321 out |= c & 0x3ff; \
322 put16 (outptr, out); \
324 else \
326 STANDARD_TO_LOOP_ERR_HANDLER (4); \
328 outptr += 2; \
329 inptr += 4; \
331 #define LOOP_NEED_FLAGS
332 #include <iconv/loop.c>
334 #include <iconv/skeleton.c>