1 /* { dg-do compile } */
2 /* { dg-options "-O2 -msse2" } */
4 typedef float __v4sf
__attribute__ ((__vector_size__ (16)));
5 typedef float __m128
__attribute__ ((__vector_size__ (16)));
6 typedef long long __v2di
__attribute__ ((__vector_size__ (16)));
9 _mm_cmpeq_ps (__m128 __A
, __m128 __B
)
11 return (__m128
) __builtin_ia32_cmpeqps ((__v4sf
)__A
, (__v4sf
)__B
);
14 static __inline __m128
15 _mm_setr_ps (float __Z
, float __Y
, float __X
, float __W
)
17 return __extension__ (__m128
)(__v4sf
){__Z
, __Y
, __X
, __W
};
20 static __inline __m128
21 _mm_and_si128 (__m128 __A
, __m128 __B
)
23 return (__m128
)__builtin_ia32_pand128 ((__v2di
)__A
, (__v2di
)__B
);
26 static __inline __m128
27 _mm_or_si128 (__m128 __A
, __m128 __B
)
29 return (__m128
)__builtin_ia32_por128 ((__v2di
)__A
, (__v2di
)__B
);
37 __attribute__ ((aligned (16))) um128
;
42 sse_max_abs_indexf (float *v
, int step
, int n
)
48 mm
= __builtin_ia32_andps ((__m128
) (__v4sf
)
49 { 0.0, v
[step
], v
[step2
], v
[step3
] }
54 for (i
= 0; i
< n4
; ++i
);
55 msk
= (__m128
) _mm_cmpeq_ps (m1
, mm
);
56 mim
= _mm_or_si128 (_mm_and_si128 (msk
, mi
), mim
);
58 ui
.xmmi
= (__m128
) mim
;
63 sse_swap_rowf (float *r1
, float *r2
, int n
)
66 float *r14end
= r1
+ n4
;
75 ludcompf (float *m
, int nw
, int *prow
, int n
)
79 for (i
= 0, pm
= m
; i
< n
- 1; ++i
, pm
+= nw
)
81 int vi
= sse_max_abs_indexf (pm
+ i
, nw
, n
- i
);
86 sse_swap_rowf (pm
, pm
+ vi
* nw
, nw
);
87 swap_index (prow
, i
, i
+ vi
);
89 for (j
= i
+ 1, pt
= pm
+ nw
; j
< n
; ++j
, pt
+= nw
)
90 sse_add_rowf (pt
+ i
+ 1, pm
+ i
+ 1, -1.0, n
- i
- 1);