2014-04-15 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / sse4_2-pcmpstr.h
blob999b5c8edabd926d042aeca3fa2127070a30d806
1 #include <nmmintrin.h>
2 #include <string.h>
4 #define CFLAG 0x00000001
5 #define ZFLAG 0x00000002
6 #define SFLAG 0x00000004
7 #define OFLAG 0x00000008
8 #define AFLAG 0x00000010
9 #define PFLAG 0x00000020
11 #define PCMPSTR_EQ(X, Y, RES) \
12 { \
13 int __size = (sizeof (*X) ^ 3) * 8; \
14 int __i, __j; \
15 for (__i = 0; __i < __size; __i++) \
16 for (__j = 0; __j < __size; __j++) \
17 RES[__j][__i] = (X[__i] == Y[__j]); \
20 #define PCMPSTR_RNG(X, Y, RES) \
21 { \
22 int __size = (sizeof (*X) ^ 3) * 8; \
23 int __i, __j; \
24 for (__j = 0; __j < __size; __j++) \
25 for (__i = 0; __i < __size - 1; __i += 2) \
26 { \
27 RES[__j][__i] = (Y[__j] >= X[__i]); \
28 RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \
29 } \
32 static void
33 override_invalid (unsigned char res[16][16], int la, int lb,
34 const int mode, int dim)
36 int i, j;
38 for (j = 0; j < dim; j++)
39 for (i = 0; i < dim; i++)
40 if (i < la && j >= lb)
41 res[j][i] = 0;
42 else if (i >= la)
43 switch ((mode & 0x0C))
45 case _SIDD_CMP_EQUAL_ANY:
46 case _SIDD_CMP_RANGES:
47 res[j][i] = 0;
48 break;
49 case _SIDD_CMP_EQUAL_EACH:
50 res[j][i] = (j >= lb) ? 1: 0;
51 break;
52 case _SIDD_CMP_EQUAL_ORDERED:
53 res[j][i] = 1;
54 break;
58 static void
59 calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
60 unsigned char res[16][16])
62 union
64 __m128i x;
65 signed char sc[16];
66 unsigned char uc[16];
67 signed short ss[8];
68 unsigned short us[8];
69 } d, s;
71 d.x = a;
72 s.x = b;
74 switch ((mode & 3))
76 case _SIDD_UBYTE_OPS:
77 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
79 PCMPSTR_RNG (d.uc, s.uc, res);
81 else
83 PCMPSTR_EQ (d.uc, s.uc, res);
85 break;
86 case _SIDD_UWORD_OPS:
87 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
89 PCMPSTR_RNG (d.us, s.us, res);
91 else
93 PCMPSTR_EQ (d.us, s.us, res);
95 break;
96 case _SIDD_SBYTE_OPS:
97 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
99 PCMPSTR_RNG (d.sc, s.sc, res);
101 else
103 PCMPSTR_EQ (d.sc, s.sc, res);
105 break;
106 case _SIDD_SWORD_OPS:
107 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
109 PCMPSTR_RNG (d.ss, s.ss, res);
111 else
113 PCMPSTR_EQ (d.ss, s.ss, res);
115 break;
118 override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
121 static int
122 calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
124 unsigned char mtx[16][16];
125 int i, j, k, dim, res = 0;
127 memset (mtx, 0, sizeof (mtx));
129 dim = (mode & 1) == 0 ? 16 : 8;
131 if (la < 0)
132 la = -la;
134 if (lb < 0)
135 lb = -lb;
137 if (la > dim)
138 la = dim;
140 if (lb > dim)
141 lb = dim;
143 calc_matrix (a, la, b, lb, mode, mtx);
145 switch ((mode & 0x0C))
147 case _SIDD_CMP_EQUAL_ANY:
148 for (i = 0; i < dim; i++)
149 for (j = 0; j < dim; j++)
150 if (mtx[i][j])
151 res |= (1 << i);
152 break;
154 case _SIDD_CMP_RANGES:
155 for (i = 0; i < dim; i += 2)
156 for(j = 0; j < dim; j++)
157 if (mtx[j][i] && mtx[j][i+1])
158 res |= (1 << j);
159 break;
161 case _SIDD_CMP_EQUAL_EACH:
162 for(i = 0; i < dim; i++)
163 if (mtx[i][i])
164 res |= (1 << i);
165 break;
167 case _SIDD_CMP_EQUAL_ORDERED:
168 for(i = 0; i < dim; i++)
170 unsigned char val = 1;
172 for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
173 val &= mtx[k][j];
175 if (val)
176 res |= (1 << i);
177 else
178 res &= ~(1 << i);
180 break;
183 switch ((mode & 0x30))
185 case _SIDD_POSITIVE_POLARITY:
186 case _SIDD_MASKED_POSITIVE_POLARITY:
187 break;
189 case _SIDD_NEGATIVE_POLARITY:
190 res ^= -1;
191 break;
193 case _SIDD_MASKED_NEGATIVE_POLARITY:
194 for (i = 0; i < lb; i++)
195 if (res & (1 << i))
196 res &= ~(1 << i);
197 else
198 res |= (1 << i);
199 break;
202 return res & ((dim == 8) ? 0xFF : 0xFFFF);
205 static int
206 cmp_flags (__m128i a, int la, __m128i b, int lb,
207 int mode, int res2, int is_implicit)
209 int i;
210 int flags = 0;
211 int is_bytes_mode = (mode & 1) == 0;
212 union
214 __m128i x;
215 unsigned char uc[16];
216 unsigned short us[8];
217 } d, s;
219 d.x = a;
220 s.x = b;
222 /* CF: reset if (RES2 == 0), set otherwise. */
223 if (res2 != 0)
224 flags |= CFLAG;
226 if (is_implicit)
228 /* ZF: set if any byte/word of src xmm operand is null, reset
229 otherwise.
230 SF: set if any byte/word of dst xmm operand is null, reset
231 otherwise. */
233 if (is_bytes_mode)
235 for (i = 0; i < 16; i++)
237 if (s.uc[i] == 0)
238 flags |= ZFLAG;
239 if (d.uc[i] == 0)
240 flags |= SFLAG;
243 else
245 for (i = 0; i < 8; i++)
247 if (s.us[i] == 0)
248 flags |= ZFLAG;
249 if (d.us[i] == 0)
250 flags |= SFLAG;
254 else
256 /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
257 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */
258 int max_ind = is_bytes_mode ? 16 : 8;
260 if (la < 0)
261 la = -la;
262 if (lb < 0)
263 lb = -lb;
265 if (lb < max_ind)
266 flags |= ZFLAG;
267 if (la < max_ind)
268 flags |= SFLAG;
271 /* OF: equal to RES2[0]. */
272 if ((res2 & 0x1))
273 flags |= OFLAG;
275 /* AF: Reset.
276 PF: Reset. */
277 return flags;
280 static int
281 cmp_indexed (__m128i a, int la, __m128i b, int lb,
282 const int mode, int *res2)
284 int i, ndx;
285 int dim = (mode & 1) == 0 ? 16 : 8;
286 int r2;
288 r2 = calc_res (a, la, b, lb, mode);
290 ndx = dim;
291 if ((mode & 0x40))
293 for (i = dim - 1; i >= 0; i--)
294 if (r2 & (1 << i))
296 ndx = i;
297 break;
300 else
302 for (i = 0; i < dim; i++)
303 if ((r2 & (1 << i)))
305 ndx = i;
306 break;
310 *res2 = r2;
311 return ndx;
314 static __m128i
315 cmp_masked (__m128i a, int la, __m128i b, int lb,
316 const int mode, int *res2)
318 union
320 __m128i x;
321 char c[16];
322 short s[8];
323 } ret;
324 int i;
325 int dim = (mode & 1) == 0 ? 16 : 8;
326 union
328 int i;
329 char c[4];
330 short s[2];
331 } r2;
333 r2.i = calc_res (a, la, b, lb, mode);
335 memset (&ret, 0, sizeof (ret));
337 if (mode & 0x40)
339 for (i = 0; i < dim; i++)
340 if (dim == 8)
341 ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
342 else
343 ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
345 else
347 if (dim == 16)
348 ret.s[0] = r2.s[0];
349 else
350 ret.c[0] = r2.c[0];
353 *res2 = r2.i;
355 return ret.x;
358 static int
359 calc_str_len (__m128i a, const int mode)
361 union
363 __m128i x;
364 char c[16];
365 short s[8];
366 } s;
367 int i;
368 int dim = (mode & 1) == 0 ? 16 : 8;
370 s.x = a;
372 if ((mode & 1))
374 for (i = 0; i < dim; i++)
375 if (s.s[i] == 0)
376 break;
378 else
380 for (i = 0; i < dim; i++)
381 if (s.c[i] == 0)
382 break;
385 return i;
388 static inline int
389 cmp_ei (__m128i *a, int la, __m128i *b, int lb,
390 const int mode, int *flags)
392 int res2;
393 int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
395 if (flags != NULL)
396 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
398 return index;
401 static inline int
402 cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
404 int la, lb;
405 int res2;
406 int index;
408 la = calc_str_len (*a, mode);
409 lb = calc_str_len (*b, mode);
411 index = cmp_indexed (*a, la, *b, lb, mode, &res2);
413 if (flags != NULL)
414 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
416 return index;
419 static inline __m128i
420 cmp_em (__m128i *a, int la, __m128i *b, int lb,
421 const int mode, int *flags )
423 int res2;
424 __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
426 if (flags != NULL)
427 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
429 return mask;
432 static inline __m128i
433 cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
435 int la, lb;
436 int res2;
437 __m128i mask;
439 la = calc_str_len (*a, mode);
440 lb = calc_str_len (*b, mode);
442 mask = cmp_masked (*a, la, *b, lb, mode, &res2);
443 if (flags != NULL)
444 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
446 return mask;