Update concepts branch to revision 131834
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / reload-1.c
blob8ccfcb55d7fc9da68b87fd11ed6f7625e4944efd
1 /* { dg-do compile } */
2 /* { dg-require-effective-target ilp32 } */
3 /* { dg-options "-O3 -msse2 -fdump-rtl-csa" } */
5 #include <emmintrin.h>
6 #include <stdint.h>
8 typedef __SIZE_TYPE__ size_t;
9 typedef float vFloat __attribute__ ((__vector_size__ (16)));
10 typedef double vDouble __attribute__ ((__vector_size__ (16)));
11 typedef struct buf
13 void *data;
14 unsigned long h;
15 unsigned long w;
16 size_t bytes;
17 } buf;
19 typedef struct job
21 struct Job *next;
22 void * info;
23 long (*func)(struct Job *job);
24 long error;
25 } job;
27 typedef struct fj
29 job hd;
30 buf src;
31 buf dest;
32 float g;
33 unsigned int flags;
34 } fj;
36 static const double r[256], t[256];
38 long bar (const buf *src, const buf *dest, float g, unsigned int flags)
40 float *d0 = (float*) src->data;
41 float *d1 = (float*) dest->data;
42 uintptr_t w = dest->w;
43 uintptr_t idx;
44 vFloat p0;
45 static const vFloat m0;
46 static const vDouble p[3], m, b;
47 float *sr = d0;
48 float *dr = d1;
49 for( idx = 0; idx + 8 <= w; idx += 8 )
51 vFloat f0 = _mm_loadu_ps (sr);
52 vFloat f1 = _mm_loadu_ps (sr + 4);
53 sr += 8;
54 vFloat fa0 = _mm_andnot_ps (m0, f0);
55 vFloat fa1 = _mm_andnot_ps (m0, f1);
56 vDouble v0 = _mm_cvtps_pd (fa0);
57 vDouble v1 = _mm_cvtps_pd (_mm_movehl_ps (fa0, fa0));
58 vDouble v2 = _mm_cvtps_pd (fa1);
59 vDouble v3 = _mm_cvtps_pd (_mm_movehl_ps (fa1, fa1));
60 vDouble vi0, vi1, vi2, vi3;
61 __m128i b0, b1, b2, b3;
62 b0 = _mm_packs_epi32 (_mm_packs_epi32 (b0, b1), _mm_packs_epi32 (b2, b3));
63 b1 = _mm_srli_epi64 (b0, 32);
64 unsigned int i0 = _mm_cvtsi128_si32 (b0);
65 unsigned int i2 = _mm_cvtsi128_si32 (b1);
66 v0 -= _mm_loadh_pd (_mm_load_sd (r + (i0 & 0xff)), r + (i0 >> 16));
67 v1 -= _mm_loadh_pd (_mm_load_sd (r + (i2 & 0xff)), r + (i2 >> 16));
68 b0 = _mm_unpackhi_epi64 (b0, b0);
69 b1 = _mm_unpackhi_epi64 (b1, b1);
70 unsigned int i4 = _mm_cvtsi128_si32 (b0);
71 unsigned int i6 = _mm_cvtsi128_si32 (b1);
72 v2 -= _mm_loadh_pd (_mm_load_sd (r + (i4 & 0xff)), r + (i4 >> 16));
73 v3 -= _mm_loadh_pd (_mm_load_sd (r + (i6 & 0xff)), r + (i6 >> 16));
74 v0 = p[0] + (p[1] + p[2] * v0) * v0;
75 v1 = p[0] + (p[1] + p[2] * v1) * v1;
76 v2 = p[0] + (p[1] + p[2] * v2) * v2;
77 v3 = p[0] + (p[1] + p[2] * v3) * v3;
78 vi0 = (vDouble) _mm_slli_epi64 ((__m128i)((vi0 + b) + m), 52);
79 vi1 = (vDouble) _mm_slli_epi64 ((__m128i)((vi1 + b) + m), 52);
80 vi2 = (vDouble) _mm_slli_epi64 ((__m128i)((vi2 + b) + m), 52);
81 vi3 = (vDouble) _mm_slli_epi64 ((__m128i)((vi3 + b) + m), 52);
82 vi0 *= _mm_loadh_pd (_mm_load_sd (t + (i0 & 0xff)), t + (i0 >> 16));
83 vi1 *= _mm_loadh_pd (_mm_load_sd (t + (i2 & 0xff)), t + (i2 >> 16));
84 vi2 *= _mm_loadh_pd (_mm_load_sd (t + (i4 & 0xff)), t + (i4 >> 16));
85 vi3 *= _mm_loadh_pd (_mm_load_sd (t + (i6 & 0xff)), t + (i6 >> 16));
86 v0 *= vi0;
87 v1 *= vi1;
88 v2 *= vi2;
89 v3 *= vi3;
90 vFloat r0 = _mm_movelh_ps (_mm_cvtpd_ps( v0 ), _mm_cvtpd_ps (v1));
91 vFloat r1 = _mm_movelh_ps (_mm_cvtpd_ps( v2 ), _mm_cvtpd_ps (v3));
92 vFloat z0 = _mm_cmpeq_ps (f0, _mm_setzero_ps());
93 vFloat z1 = _mm_cmpeq_ps (f1, _mm_setzero_ps());
94 r0 = _mm_andnot_ps (z0, r0);
95 r1 = _mm_andnot_ps (z1, r1);
96 z0 = _mm_and_ps (z0, p0);
97 z1 = _mm_and_ps (z1, p0);
98 r0 = _mm_or_ps (r0, z0);
99 r1 = _mm_or_ps (r1, z1);
100 _mm_storeu_ps (dr, r0);
101 _mm_storeu_ps (dr + 4, r1);
102 dr += 8;
104 return 0;
107 long foo (job *j )
109 fj *jd = (fj*) j;
110 return bar (&jd->src, &jd->dest, jd->g, jd->flags);
113 /* { dg-final { scan-rtl-dump-not "deleted 1 dead insns" "csa" } } */
114 /* { dg-final { cleanup-rtl-dump "csa" } } */