Update concepts branch to revision 131834
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / pr23570.c
blob1542663fa22cd74ec3499b4fd332e2424c7c2892
1 /* { dg-do compile } */
2 /* { dg-options "-O2 -msse2" } */
4 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
5 typedef float __m128 __attribute__ ((__vector_size__ (16)));
6 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
8 static __inline __m128
9 _mm_cmpeq_ps (__m128 __A, __m128 __B)
11 return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
14 static __inline __m128
15 _mm_setr_ps (float __Z, float __Y, float __X, float __W)
17 return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W };
20 static __inline __m128
21 _mm_and_si128 (__m128 __A, __m128 __B)
23 return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
26 static __inline __m128
27 _mm_or_si128 (__m128 __A, __m128 __B)
29 return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
32 typedef union
34 __m128 xmmi;
35 int si[4];
37 __attribute__ ((aligned (16))) um128;
39 um128 u;
41 static inline int
42 sse_max_abs_indexf (float *v, int step, int n)
44 __m128 m1, mm;
45 __m128 mim, mi, msk;
46 um128 u, ui;
47 int n4, step2, step3;
48 mm = __builtin_ia32_andps ((__m128) (__v4sf)
49 { 0.0, v[step], v[step2], v[step3] }
50 , u.xmmi);
51 if (n4)
53 int i;
54 for (i = 0; i < n4; ++i);
55 msk = (__m128) _mm_cmpeq_ps (m1, mm);
56 mim = _mm_or_si128 (_mm_and_si128 (msk, mi), mim);
58 ui.xmmi = (__m128) mim;
59 return ui.si[n];
62 static void
63 sse_swap_rowf (float *r1, float *r2, int n)
65 int n4 = (n / 4) * 4;
66 float *r14end = r1 + n4;
67 while (r1 < r14end)
69 *r1 = *r2;
70 r1++;
74 void
75 ludcompf (float *m, int nw, int *prow, int n)
77 int i, s = 0;
78 float *pm;
79 for (i = 0, pm = m; i < n - 1; ++i, pm += nw)
81 int vi = sse_max_abs_indexf (pm + i, nw, n - i);
82 float *pt;
83 int j;
84 if (vi != 0)
86 sse_swap_rowf (pm, pm + vi * nw, nw);
87 swap_index (prow, i, i + vi);
89 for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw)
90 sse_add_rowf (pt + i + 1, pm + i + 1, -1.0, n - i - 1);