Update copyright dates with scripts/update-copyrights.
[glibc.git] / sysdeps / s390 / s390-64 / utf16-utf32-z9.c
blobf887c34e61aa6b043d1cae7e9e6ce63555e6f55f
1 /* Conversion between UTF-16 and UTF-32 BE/internal.
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
5 Copyright (C) 1997-2015 Free Software Foundation, Inc.
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
23 You should have received a copy of the GNU Lesser General Public
24 License along with the GNU C Library; if not, see
25 <http://www.gnu.org/licenses/>. */
27 #include <dlfcn.h>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <dl-procinfo.h>
31 #include <gconv.h>
33 /* UTF-32 big endian byte order mark. */
34 #define BOM_UTF32 0x0000feffu
36 /* UTF-16 big endian byte order mark. */
37 #define BOM_UTF16 0xfeff
39 #define DEFINE_INIT 0
40 #define DEFINE_FINI 0
41 #define MIN_NEEDED_FROM 2
42 #define MAX_NEEDED_FROM 4
43 #define MIN_NEEDED_TO 4
44 #define FROM_LOOP from_utf16_loop
45 #define TO_LOOP to_utf16_loop
46 #define FROM_DIRECTION (dir == from_utf16)
47 #define ONE_DIRECTION 0
48 #define PREPARE_LOOP \
49 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
50 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
52 if (emit_bom && !data->__internal_use \
53 && data->__invocation_counter == 0) \
54 { \
55 if (dir == to_utf16) \
56 { \
57 /* Emit the UTF-16 Byte Order Mark. */ \
58 if (__glibc_unlikely (outbuf + 2 > outend)) \
59 return __GCONV_FULL_OUTPUT; \
61 put16u (outbuf, BOM_UTF16); \
62 outbuf += 2; \
63 } \
64 else \
65 { \
66 /* Emit the UTF-32 Byte Order Mark. */ \
67 if (__glibc_unlikely (outbuf + 4 > outend)) \
68 return __GCONV_FULL_OUTPUT; \
70 put32u (outbuf, BOM_UTF32); \
71 outbuf += 4; \
72 } \
75 /* Direction of the transformation. */
76 enum direction
78 illegal_dir,
79 to_utf16,
80 from_utf16
83 struct utf16_data
85 enum direction dir;
86 int emit_bom;
90 extern int gconv_init (struct __gconv_step *step);
91 int
92 gconv_init (struct __gconv_step *step)
94 /* Determine which direction. */
95 struct utf16_data *new_data;
96 enum direction dir = illegal_dir;
97 int emit_bom;
98 int result;
100 emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
101 || __strcasecmp (step->__to_name, "UTF-16//") == 0);
103 if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
104 && (__strcasecmp (step->__to_name, "UTF-32//") == 0
105 || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
106 || __strcasecmp (step->__to_name, "INTERNAL") == 0))
108 dir = from_utf16;
110 else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
111 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
112 && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
113 || __strcasecmp (step->__from_name, "INTERNAL") == 0))
115 dir = to_utf16;
118 result = __GCONV_NOCONV;
119 if (dir != illegal_dir)
121 new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
123 result = __GCONV_NOMEM;
124 if (new_data != NULL)
126 new_data->dir = dir;
127 new_data->emit_bom = emit_bom;
128 step->__data = new_data;
130 if (dir == from_utf16)
132 step->__min_needed_from = MIN_NEEDED_FROM;
133 step->__max_needed_from = MIN_NEEDED_FROM;
134 step->__min_needed_to = MIN_NEEDED_TO;
135 step->__max_needed_to = MIN_NEEDED_TO;
137 else
139 step->__min_needed_from = MIN_NEEDED_TO;
140 step->__max_needed_from = MIN_NEEDED_TO;
141 step->__min_needed_to = MIN_NEEDED_FROM;
142 step->__max_needed_to = MIN_NEEDED_FROM;
145 step->__stateful = 0;
147 result = __GCONV_OK;
151 return result;
155 extern void gconv_end (struct __gconv_step *data);
156 void
157 gconv_end (struct __gconv_step *data)
159 free (data->__data);
162 /* The macro for the hardware loop. This is used for both
163 directions. */
164 #define HARDWARE_CONVERT(INSTRUCTION) \
166 register const unsigned char* pInput asm ("8") = inptr; \
167 register unsigned long long inlen asm ("9") = inend - inptr; \
168 register unsigned char* pOutput asm ("10") = outptr; \
169 register unsigned long long outlen asm("11") = outend - outptr; \
170 uint64_t cc = 0; \
172 asm volatile (".machine push \n\t" \
173 ".machine \"z9-109\" \n\t" \
174 "0: " INSTRUCTION " \n\t" \
175 ".machine pop \n\t" \
176 " jo 0b \n\t" \
177 " ipm %2 \n" \
178 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
179 "+d" (outlen), "+d" (inlen) \
181 : "cc", "memory"); \
183 inptr = pInput; \
184 outptr = pOutput; \
185 cc >>= 28; \
187 if (cc == 1) \
189 result = __GCONV_FULL_OUTPUT; \
190 break; \
192 else if (cc == 2) \
194 result = __GCONV_ILLEGAL_INPUT; \
195 break; \
199 /* Conversion function from UTF-16 to UTF-32 internal/BE. */
201 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
202 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
203 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
204 #define LOOPFCT FROM_LOOP
205 /* The software routine is copied from utf-16.c (minus bytes
206 swapping). */
207 #define BODY \
209 /* The hardware instruction currently fails to report an error for \
210 isolated low surrogates so we have to disable the instruction \
211 until this gets resolved. */ \
212 if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
214 HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
215 if (inptr != inend) \
217 /* Check if the third byte is \
218 a valid start of a UTF-16 surrogate. */ \
219 if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
220 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
222 result = __GCONV_INCOMPLETE_INPUT; \
223 break; \
225 continue; \
228 uint16_t u1 = get16 (inptr); \
230 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
232 /* No surrogate. */ \
233 put32 (outptr, u1); \
234 inptr += 2; \
236 else \
238 /* An isolated low-surrogate was found. This has to be \
239 considered ill-formed. */ \
240 if (__glibc_unlikely (u1 >= 0xdc00)) \
242 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
244 /* It's a surrogate character. At least the first word says \
245 it is. */ \
246 if (__glibc_unlikely (inptr + 4 > inend)) \
248 /* We don't have enough input for another complete input \
249 character. */ \
250 result = __GCONV_INCOMPLETE_INPUT; \
251 break; \
254 inptr += 2; \
255 uint16_t u2 = get16 (inptr); \
256 if (__builtin_expect (u2 < 0xdc00, 0) \
257 || __builtin_expect (u2 > 0xdfff, 0)) \
259 /* This is no valid second word for a surrogate. */ \
260 inptr -= 2; \
261 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
264 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
265 inptr += 2; \
267 outptr += 4; \
269 #define LOOP_NEED_FLAGS
270 #include <iconv/loop.c>
272 /* Conversion from UTF-32 internal/BE to UTF-16. */
274 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
275 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
276 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
277 #define LOOPFCT TO_LOOP
278 /* The software routine is copied from utf-16.c (minus bytes
279 swapping). */
280 #define BODY \
282 if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
284 HARDWARE_CONVERT ("cu42 %0, %1"); \
286 if (inptr != inend) \
288 result = __GCONV_INCOMPLETE_INPUT; \
289 break; \
291 continue; \
294 uint32_t c = get32 (inptr); \
296 if (__builtin_expect (c <= 0xd7ff, 1) \
297 || (c >=0xdc00 && c <= 0xffff)) \
299 /* Two UTF-16 chars. */ \
300 put16 (outptr, c); \
302 else if (__builtin_expect (c >= 0x10000, 1) \
303 && __builtin_expect (c <= 0x10ffff, 1)) \
305 /* Four UTF-16 chars. */ \
306 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
307 uint16_t out; \
309 /* Generate a surrogate character. */ \
310 if (__glibc_unlikely (outptr + 4 > outend)) \
312 /* Overflow in the output buffer. */ \
313 result = __GCONV_FULL_OUTPUT; \
314 break; \
317 out = 0xd800; \
318 out |= (zabcd & 0xff) << 6; \
319 out |= (c >> 10) & 0x3f; \
320 put16 (outptr, out); \
321 outptr += 2; \
323 out = 0xdc00; \
324 out |= c & 0x3ff; \
325 put16 (outptr, out); \
327 else \
329 STANDARD_TO_LOOP_ERR_HANDLER (4); \
331 outptr += 2; \
332 inptr += 4; \
334 #define LOOP_NEED_FLAGS
335 #include <iconv/loop.c>
337 #include <iconv/skeleton.c>