mips64: time64 for n32 ABI breaks a lot of tests, disable it for now
[uclibc-ng.git] / libc / string / i386 / string.h
blobcf4333dec0a6ef02bd82c615c352f031bf505f00
1 /*
2 * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com>
4 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball
5 */
7 #if !defined _STRING_H
8 #error "Never use <libc-string_i386.h> directly; include <string.h> instead"
9 #endif
11 #ifndef _LIBC_STRING_i386_H
12 #define _LIBC_STRING_i386_H 1
14 static __always_inline
15 void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count)
17 int ecx, edi;
19 if (count == 0)
20 return s;
22 /* Very small (2 stores or less) are best done with direct
23 * mov <const>,<mem> instructions (they do not clobber registers) */
24 if (count == 1) {
25 *(char *)(s + 0) = eax;
26 return s;
29 /* You wonder why & 0xff is needed? Try memset(p, '\xff', size).
30 * If char is signed, '\xff' == -1! */
31 eax = (eax & 0xff) * 0x01010101; /* done at compile time */
33 if (count == 2) {
34 *(short *)(s + 0) = eax;
35 return s;
37 if (count == 3) {
38 *(short *)(s + 0) = eax;
39 *(char *) (s + 2) = eax;
40 return s;
42 if (count == 1*4 + 0) {
43 *(int *)(s + 0) = eax;
44 return s;
46 if (count == 1*4 + 1) {
47 *(int *) (s + 0) = eax;
48 *(char *)(s + 4) = eax;
49 return s;
51 if (count == 1*4 + 2) {
52 *(int *) (s + 0) = eax;
53 *(short *)(s + 4) = eax;
54 return s;
57 /* Small string stores: don't clobber ecx
58 * (clobbers only eax and edi) */
59 #define small_store(arg) { \
60 __asm__ __volatile__( \
61 arg \
62 : "=&D" (edi) \
63 : "a" (eax), "0" (s) \
64 : "memory" \
65 ); \
66 return s; \
68 if (count == 1*4 + 3) small_store("stosl; stosw; stosb");
69 if (count == 2*4 + 0) {
70 ((int *)s)[0] = eax;
71 ((int *)s)[1] = eax;
72 return s;
74 if (count == 2*4 + 1) small_store("stosl; stosl; stosb");
75 if (count == 2*4 + 2) small_store("stosl; stosl; stosw");
76 if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb");
77 if (count == 3*4 + 0) small_store("stosl; stosl; stosl");
78 if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb");
79 if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw");
80 if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb");
81 if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl");
82 if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb");
83 /* going over 7 bytes is suboptimal */
84 /* stosw is 2-byte insn, so this one takes 6 bytes: */
85 if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw");
86 /* 7 bytes */
87 if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; stosb");
88 /* 5 bytes */
89 if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl");
90 /* 6 bytes */
91 if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; stosb");
92 /* 7 bytes */
93 if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; stosw");
94 /* 8 bytes, but oh well... */
95 if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; stosw; stosb");
96 /* 6 bytes */
97 if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; stosl");
98 /* the rest would be 7+ bytes and is handled below instead */
99 #undef small_store
101 /* Not small, but multiple-of-4 store.
102 * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */
103 __asm__ __volatile__(
104 " rep; stosl\n"
105 : "=&c" (ecx), "=&D" (edi)
106 : "a" (eax), "0" (count / 4), "1" (s)
107 : "memory"
109 return s;
111 #if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */
112 #define memset(s, c, count) ( \
113 ( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \
114 || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
116 ? memset((s), (c), (count)) \
117 : inlined_memset_const_c_count4((s), (c), (count)) \
119 #endif
122 static __always_inline
123 void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count)
125 int ecx;
126 char *esi, *edi;
128 if (count == 0)
129 return d;
131 if (count == 1) {
132 *(char *)d = *(char *)s;
133 return d + 1;
135 if (count == 2) {
136 *(short *)d = *(short *)s;
137 return d + 2;
139 /* Small string moves: don't clobber ecx
140 * (clobbers only esi and edi) */
141 #define small_move(arg) { \
142 __asm__ __volatile__( \
143 arg \
144 : "=&S" (esi), "=&D" (edi) \
145 : "0" (s), "1" (d) \
146 : "memory" \
147 ); \
148 return edi; \
150 if (count == 3) small_move("movsw; movsb");
151 if (count == 1*4 + 0) {
152 *(int *)d = *(int *)s;
153 return d + 4;
155 if (count == 1*4 + 1) small_move("movsl; movsb");
156 if (count == 1*4 + 2) small_move("movsl; movsw");
157 if (count == 1*4 + 3) small_move("movsl; movsw; movsb");
158 if (count == 2*4 + 0) small_move("movsl; movsl");
159 if (count == 2*4 + 1) small_move("movsl; movsl; movsb");
160 if (count == 2*4 + 2) small_move("movsl; movsl; movsw");
161 if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb");
162 if (count == 3*4 + 0) small_move("movsl; movsl; movsl");
163 if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb");
164 if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw");
165 if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb");
166 if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl");
167 if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb");
168 /* going over 7 bytes is suboptimal */
169 /* movsw is 2-byte insn, so this one takes 6 bytes: */
170 if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw");
171 /* 7 bytes */
172 if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; movsb");
173 /* 5 bytes */
174 if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl");
175 /* 6 bytes */
176 if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; movsb");
177 /* 7 bytes */
178 if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; movsw");
179 /* 8 bytes, but oh well... */
180 if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; movsw; movsb");
181 /* 6 bytes */
182 if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; movsl");
183 /* the rest would be 7+ bytes and is handled below instead */
184 #undef small_move
186 /* Not small, but multiple-of-4 move.
187 * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */
188 __asm__ __volatile__(
189 " rep; movsl\n"
190 : "=&c" (ecx), "=&S" (esi), "=&D" (edi)
191 : "0" (count / 4), "1" (s), "2" (d)
192 : "memory"
194 return edi;
196 static __always_inline
197 void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count)
199 inlined_mempcpy_const_count4(d, s, count);
200 return d;
202 #if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */
203 #define mempcpy(d, s, count) ( \
204 ( !(__builtin_constant_p(count)) \
205 || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
207 ? mempcpy((d), (s), (count)) \
208 : inlined_mempcpy_const_count4((d), (s), (count)) \
210 #define memcpy(d, s, count) ( \
211 ( !(__builtin_constant_p(count)) \
212 || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
214 ? memcpy((d), (s), (count)) \
215 : inlined_memcpy_const_count4((d), (s), (count)) \
217 #endif
220 static __always_inline
221 size_t inlined_strlen(const char *s)
223 int edi;
224 int ecx;
225 __asm__ __volatile__(
226 " repne; scasb\n"
227 /* " notl %0\n" */
228 /* " decl %0\n" */
229 : "=c" (ecx), "=&D" (edi)
230 : "1" (s), "a" (0), "0" (0xffffffffu)
231 /* : no clobbers */
233 return -ecx - 1;
235 #if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */
236 #define strlen(s) inlined_strlen(s)
237 #endif
240 static __always_inline
241 char *inlined_stpcpy(char *dest, const char *src)
243 char *esi, *edi;
244 int eax;
245 __asm__ __volatile__(
246 "1: lodsb\n"
247 " stosb\n"
248 " testb %%al, %%al\n"
249 " jnz 1b\n"
250 : "=&S" (esi), "=&D" (edi), "=&a" (eax)
251 : "0" (src), "1" (dest)
252 : "memory"
254 return edi - 1;
256 static __always_inline
257 char *inlined_strcpy(char *dest, const char *src)
259 inlined_stpcpy(dest, src);
260 return dest;
262 #if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */
263 #define stpcpy(dest, src) inlined_stpcpy(dest, src)
264 #define strcpy(dest, src) inlined_strcpy(dest, src)
265 #endif
268 static __always_inline
269 void *inlined_memchr(const void *s, int c, size_t count)
271 void *edi;
272 int ecx;
273 /* Unfortunately, c gets loaded to %eax (wide insn), not %al */
274 __asm__ __volatile__(
275 " jecxz 1f\n"
276 " repne; scasb\n"
277 " leal -1(%%edi), %%edi\n"
278 " je 2f\n"
279 "1:\n"
280 " xorl %%edi, %%edi\n"
281 "2:\n"
282 : "=&D" (edi), "=&c" (ecx)
283 : "a" (c), "0" (s), "1" (count)
284 /* : no clobbers */
286 return edi;
288 static __always_inline
289 void *inlined_memchr_const_c(const void *s, int c, size_t count)
291 #if defined __OPTIMIZE__
292 void *edi;
293 int ecx, eax;
294 __asm__ __volatile__(
295 " jecxz 1f\n"
296 " movb %4, %%al\n" /* const c to %%al */
297 " repne; scasb\n"
298 " leal -1(%%edi), %%edi\n"
299 " je 2f\n"
300 "1:\n"
301 " xorl %%edi, %%edi\n"
302 "2:\n"
303 : "=&D" (edi), "=&c" (ecx), "=&a" (eax)
304 : "0" (s), "i" (c), "1" (count)
305 /* : no clobbers */
307 return edi;
308 #else
309 /* With -O0, gcc can't figure out how to encode CONST c
310 * as an immediate operand. Generating slightly bigger code
311 * (usually "movl CONST,%eax", 3 bytes bigger than needed):
313 void *edi;
314 int ecx, eax;
315 __asm__ __volatile__(
316 " jecxz 1f\n"
317 " repne; scasb\n"
318 " leal -1(%%edi), %%edi\n"
319 " je 2f\n"
320 "1:\n"
321 " xorl %%edi, %%edi\n"
322 "2:\n"
323 : "=&D" (edi), "=&c" (ecx), "=&a" (eax)
324 : "0" (s), "2" (c), "1" (count)
325 /* : no clobbers */
327 return edi;
328 #endif
330 #if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */
331 #define memchr(s, c, count) ( \
332 __builtin_constant_p(c) \
333 ? inlined_memchr_const_c(s, (c) & 0xff, count) \
334 : inlined_memchr(s, c, count) \
336 #endif
338 #endif /* _LIBC_STRING_i386_H */