Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / s390 / s390-64 / utf16-utf32-z9.c
blobf7d9e0795795f5414ac1a6823f8fb8ccef73cfd1
1 /* Conversion between UTF-16 and UTF-32 BE/internal.
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
5 Copyright (C) 1997-2014 Free Software Foundation, Inc.
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
23 You should have received a copy of the GNU Lesser General Public
24 License along with the GNU C Library; if not, see
25 <http://www.gnu.org/licenses/>. */
27 #include <dlfcn.h>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <dl-procinfo.h>
31 #include <gconv.h>
33 /* UTF-32 big endian byte order mark. */
34 #define BOM_UTF32 0x0000feffu
36 /* UTF-16 big endian byte order mark. */
37 #define BOM_UTF16 0xfeff
39 #define DEFINE_INIT 0
40 #define DEFINE_FINI 0
41 #define MIN_NEEDED_FROM 2
42 #define MAX_NEEDED_FROM 4
43 #define MIN_NEEDED_TO 4
44 #define FROM_LOOP from_utf16_loop
45 #define TO_LOOP to_utf16_loop
46 #define FROM_DIRECTION (dir == from_utf16)
47 #define PREPARE_LOOP \
48 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
49 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
51 if (emit_bom && !data->__internal_use \
52 && data->__invocation_counter == 0) \
53 { \
54 if (dir == to_utf16) \
55 { \
56 /* Emit the UTF-16 Byte Order Mark. */ \
57 if (__builtin_expect (outbuf + 2 > outend, 0)) \
58 return __GCONV_FULL_OUTPUT; \
60 put16u (outbuf, BOM_UTF16); \
61 outbuf += 2; \
62 } \
63 else \
64 { \
65 /* Emit the UTF-32 Byte Order Mark. */ \
66 if (__builtin_expect (outbuf + 4 > outend, 0)) \
67 return __GCONV_FULL_OUTPUT; \
69 put32u (outbuf, BOM_UTF32); \
70 outbuf += 4; \
71 } \
74 /* Direction of the transformation. */
75 enum direction
77 illegal_dir,
78 to_utf16,
79 from_utf16
82 struct utf16_data
84 enum direction dir;
85 int emit_bom;
89 extern int gconv_init (struct __gconv_step *step);
90 int
91 gconv_init (struct __gconv_step *step)
93 /* Determine which direction. */
94 struct utf16_data *new_data;
95 enum direction dir = illegal_dir;
96 int emit_bom;
97 int result;
99 emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
100 || __strcasecmp (step->__to_name, "UTF-16//") == 0);
102 if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
103 && (__strcasecmp (step->__to_name, "UTF-32//") == 0
104 || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
105 || __strcasecmp (step->__to_name, "INTERNAL") == 0))
107 dir = from_utf16;
109 else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
110 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
111 && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
112 || __strcasecmp (step->__from_name, "INTERNAL") == 0))
114 dir = to_utf16;
117 result = __GCONV_NOCONV;
118 if (dir != illegal_dir)
120 new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
122 result = __GCONV_NOMEM;
123 if (new_data != NULL)
125 new_data->dir = dir;
126 new_data->emit_bom = emit_bom;
127 step->__data = new_data;
129 if (dir == from_utf16)
131 step->__min_needed_from = MIN_NEEDED_FROM;
132 step->__max_needed_from = MIN_NEEDED_FROM;
133 step->__min_needed_to = MIN_NEEDED_TO;
134 step->__max_needed_to = MIN_NEEDED_TO;
136 else
138 step->__min_needed_from = MIN_NEEDED_TO;
139 step->__max_needed_from = MIN_NEEDED_TO;
140 step->__min_needed_to = MIN_NEEDED_FROM;
141 step->__max_needed_to = MIN_NEEDED_FROM;
144 step->__stateful = 0;
146 result = __GCONV_OK;
150 return result;
154 extern void gconv_end (struct __gconv_step *data);
155 void
156 gconv_end (struct __gconv_step *data)
158 free (data->__data);
161 /* The macro for the hardware loop. This is used for both
162 directions. */
163 #define HARDWARE_CONVERT(INSTRUCTION) \
165 register const unsigned char* pInput asm ("8") = inptr; \
166 register unsigned long long inlen asm ("9") = inend - inptr; \
167 register unsigned char* pOutput asm ("10") = outptr; \
168 register unsigned long long outlen asm("11") = outend - outptr; \
169 uint64_t cc = 0; \
171 asm volatile (".machine push \n\t" \
172 ".machine \"z9-109\" \n\t" \
173 "0: " INSTRUCTION " \n\t" \
174 ".machine pop \n\t" \
175 " jo 0b \n\t" \
176 " ipm %2 \n" \
177 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
178 "+d" (outlen), "+d" (inlen) \
180 : "cc", "memory"); \
182 inptr = pInput; \
183 outptr = pOutput; \
184 cc >>= 28; \
186 if (cc == 1) \
188 result = __GCONV_FULL_OUTPUT; \
189 break; \
191 else if (cc == 2) \
193 result = __GCONV_ILLEGAL_INPUT; \
194 break; \
198 /* Conversion function from UTF-16 to UTF-32 internal/BE. */
200 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
201 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
202 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
203 #define LOOPFCT FROM_LOOP
204 /* The software routine is copied from utf-16.c (minus bytes
205 swapping). */
206 #define BODY \
208 /* The hardware instruction currently fails to report an error for \
209 isolated low surrogates so we have to disable the instruction \
210 until this gets resolved. */ \
211 if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
213 HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
214 if (inptr != inend) \
216 /* Check if the third byte is \
217 a valid start of a UTF-16 surrogate. */ \
218 if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
219 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
221 result = __GCONV_INCOMPLETE_INPUT; \
222 break; \
224 continue; \
227 uint16_t u1 = get16 (inptr); \
229 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
231 /* No surrogate. */ \
232 put32 (outptr, u1); \
233 inptr += 2; \
235 else \
237 /* An isolated low-surrogate was found. This has to be \
238 considered ill-formed. */ \
239 if (__builtin_expect (u1 >= 0xdc00, 0)) \
241 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
243 /* It's a surrogate character. At least the first word says \
244 it is. */ \
245 if (__builtin_expect (inptr + 4 > inend, 0)) \
247 /* We don't have enough input for another complete input \
248 character. */ \
249 result = __GCONV_INCOMPLETE_INPUT; \
250 break; \
253 inptr += 2; \
254 uint16_t u2 = get16 (inptr); \
255 if (__builtin_expect (u2 < 0xdc00, 0) \
256 || __builtin_expect (u2 > 0xdfff, 0)) \
258 /* This is no valid second word for a surrogate. */ \
259 inptr -= 2; \
260 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
263 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
264 inptr += 2; \
266 outptr += 4; \
268 #define LOOP_NEED_FLAGS
269 #include <iconv/loop.c>
271 /* Conversion from UTF-32 internal/BE to UTF-16. */
273 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
274 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
275 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
276 #define LOOPFCT TO_LOOP
277 /* The software routine is copied from utf-16.c (minus bytes
278 swapping). */
279 #define BODY \
281 if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
283 HARDWARE_CONVERT ("cu42 %0, %1"); \
285 if (inptr != inend) \
287 result = __GCONV_INCOMPLETE_INPUT; \
288 break; \
290 continue; \
293 uint32_t c = get32 (inptr); \
295 if (__builtin_expect (c <= 0xd7ff, 1) \
296 || (c >=0xdc00 && c <= 0xffff)) \
298 /* Two UTF-16 chars. */ \
299 put16 (outptr, c); \
301 else if (__builtin_expect (c >= 0x10000, 1) \
302 && __builtin_expect (c <= 0x10ffff, 1)) \
304 /* Four UTF-16 chars. */ \
305 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
306 uint16_t out; \
308 /* Generate a surrogate character. */ \
309 if (__builtin_expect (outptr + 4 > outend, 0)) \
311 /* Overflow in the output buffer. */ \
312 result = __GCONV_FULL_OUTPUT; \
313 break; \
316 out = 0xd800; \
317 out |= (zabcd & 0xff) << 6; \
318 out |= (c >> 10) & 0x3f; \
319 put16 (outptr, out); \
320 outptr += 2; \
322 out = 0xdc00; \
323 out |= c & 0x3ff; \
324 put16 (outptr, out); \
326 else \
328 STANDARD_TO_LOOP_ERR_HANDLER (4); \
330 outptr += 2; \
331 inptr += 4; \
333 #define LOOP_NEED_FLAGS
334 #include <iconv/loop.c>
336 #include <iconv/skeleton.c>