2018-03-08 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / testsuite / gcc.target / powerpc / pr80718.c
blobb001e2f63f974b60dfcd3775118bec53013ae8a2
1 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
3 /* { dg-require-effective-target powerpc_p8vector_ok } */
4 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
5 /* { dg-options "-mcpu=power8 -O3 -ffast-math" } */
7 /* Taken from the Spec 2006 milc brenchmark. Ultimately, GCC wants to generate
8 a DF splat from offsettable memory. The register allocator decided it was
9 better to do the load in the GPR registers and do a move direct, rather than
10 doing a load in the VSX register sets. */
12 typedef struct
14 double real;
15 double imag;
16 } complex;
18 typedef struct
20 double real;
21 double imag;
22 } double_complex;
24 complex cmplx (double x, double y);
25 complex cadd (complex * a, complex * b);
26 complex cmul (complex * a, complex * b);
27 complex csub (complex * a, complex * b);
28 complex cdiv (complex * a, complex * b);
29 complex conjg (complex * a);
30 complex ce_itheta (double theta);
32 double_complex dcmplx (double x, double y);
33 double_complex dcadd (double_complex * a, double_complex * b);
34 double_complex dcmul (double_complex * a, double_complex * b);
35 double_complex dcsub (double_complex * a, double_complex * b);
36 double_complex dcdiv (double_complex * a, double_complex * b);
37 double_complex dconjg (double_complex * a);
38 double_complex dcexp (double_complex * a);
39 double_complex dclog (double_complex * a);
40 double_complex dcsqrt (double_complex * z);
41 double_complex dce_itheta (double theta);
43 typedef struct
45 unsigned long r0, r1, r2, r3, r4, r5, r6;
46 unsigned long multiplier, addend, ic_state;
47 double scale;
48 } double_prn;
50 double myrand (double_prn * prn_pt);
52 typedef struct
54 complex e[3][3];
55 } su3_matrix;
57 typedef struct
59 complex c[3];
60 } su3_vector;
62 typedef struct
64 complex m01, m02, m12;
65 double m00im, m11im, m22im;
66 double space;
67 } anti_hermitmat;
69 typedef struct
71 complex e[2][2];
72 } su2_matrix;
73 typedef struct
75 su3_vector d[4];
76 } wilson_vector;
77 typedef struct
79 su3_vector h[2];
80 } half_wilson_vector;
81 typedef struct
83 wilson_vector c[3];
84 } color_wilson_vector;
85 typedef struct
87 wilson_vector d[4];
88 } spin_wilson_vector;
89 typedef struct
91 color_wilson_vector d[4];
92 } wilson_matrix;
93 typedef struct
95 spin_wilson_vector c[3];
96 } wilson_propagator;
98 void mult_su3_nn (su3_matrix * a, su3_matrix * b, su3_matrix * c);
99 void mult_su3_na (su3_matrix * a, su3_matrix * b, su3_matrix * c);
100 void mult_su3_an (su3_matrix * a, su3_matrix * b, su3_matrix * c);
101 double realtrace_su3 (su3_matrix * a, su3_matrix * b);
102 complex trace_su3 (su3_matrix * a);
103 complex complextrace_su3 (su3_matrix * a, su3_matrix * b);
104 complex det_su3 (su3_matrix * a);
105 void add_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
106 void sub_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
107 void scalar_mult_su3_matrix (su3_matrix * src, double scalar,
108 su3_matrix * dest);
109 void scalar_mult_add_su3_matrix (su3_matrix * src1, su3_matrix * src2,
110 double scalar, su3_matrix * dest);
111 void scalar_mult_sub_su3_matrix (su3_matrix * src1, su3_matrix * src2,
112 double scalar, su3_matrix * dest);
113 void c_scalar_mult_su3mat (su3_matrix * src, complex * scalar,
114 su3_matrix * dest);
115 void c_scalar_mult_add_su3mat (su3_matrix * src1, su3_matrix * src2,
116 complex * scalar, su3_matrix * dest);
117 void c_scalar_mult_sub_su3mat (su3_matrix * src1, su3_matrix * src2,
118 complex * scalar, su3_matrix * dest);
119 void su3_adjoint (su3_matrix * a, su3_matrix * b);
120 void make_anti_hermitian (su3_matrix * m3, anti_hermitmat * ah3);
121 void random_anti_hermitian (anti_hermitmat * mat_antihermit,
122 double_prn * prn_pt);
123 void uncompress_anti_hermitian (anti_hermitmat * mat_anti, su3_matrix * mat);
124 void compress_anti_hermitian (su3_matrix * mat, anti_hermitmat * mat_anti);
125 void clear_su3mat (su3_matrix * dest);
126 void su3mat_copy (su3_matrix * a, su3_matrix * b);
127 void dumpmat (su3_matrix * m);
129 void su3_projector (su3_vector * a, su3_vector * b, su3_matrix * c);
130 complex su3_dot (su3_vector * a, su3_vector * b);
131 double su3_rdot (su3_vector * a, su3_vector * b);
132 double magsq_su3vec (su3_vector * a);
133 void su3vec_copy (su3_vector * a, su3_vector * b);
134 void dumpvec (su3_vector * v);
135 void clearvec (su3_vector * v);
137 void mult_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
138 void mult_su3_mat_vec_sum (su3_matrix * a, su3_vector * b, su3_vector * c);
139 void mult_su3_mat_vec_sum_4dir (su3_matrix * a, su3_vector * b0,
140 su3_vector * b1, su3_vector * b2,
141 su3_vector * b3, su3_vector * c);
142 void mult_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b, su3_vector * c);
143 void mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
144 void mult_adj_su3_mat_vec_4dir (su3_matrix * a, su3_vector * b,
145 su3_vector * c);
146 void mult_adj_su3_mat_4vec (su3_matrix * mat, su3_vector * src,
147 su3_vector * dest0, su3_vector * dest1,
148 su3_vector * dest2, su3_vector * dest3);
149 void mult_adj_su3_mat_vec_sum (su3_matrix * a, su3_vector * b,
150 su3_vector * c);
151 void mult_adj_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b,
152 su3_vector * c);
154 void add_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
155 void sub_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
156 void sub_four_su3_vecs (su3_vector * a, su3_vector * b1, su3_vector * b2,
157 su3_vector * b3, su3_vector * b4);
159 void scalar_mult_su3_vector (su3_vector * src, double scalar,
160 su3_vector * dest);
161 void scalar_mult_add_su3_vector (su3_vector * src1, su3_vector * src2,
162 double scalar, su3_vector * dest);
163 void scalar_mult_sum_su3_vector (su3_vector * src1, su3_vector * src2,
164 double scalar);
165 void scalar_mult_sub_su3_vector (su3_vector * src1, su3_vector * src2,
166 double scalar, su3_vector * dest);
167 void scalar_mult_wvec (wilson_vector * src, double s, wilson_vector * dest);
168 void scalar_mult_hwvec (half_wilson_vector * src, double s,
169 half_wilson_vector * dest);
170 void scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
171 double scalar, wilson_vector * dest);
172 void scalar_mult_addtm_wvec (wilson_vector * src1, wilson_vector * src2,
173 double scalar, wilson_vector * dest);
174 void c_scalar_mult_wvec (wilson_vector * src1, complex * phase,
175 wilson_vector * dest);
176 void c_scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
177 complex * phase, wilson_vector * dest);
178 void c_scalar_mult_add_wvec2 (wilson_vector * src1, wilson_vector * src2,
179 complex s, wilson_vector * dest);
180 void c_scalar_mult_su3vec (su3_vector * src, complex * phase,
181 su3_vector * dest);
182 void c_scalar_mult_add_su3vec (su3_vector * v1, complex * phase,
183 su3_vector * v2);
184 void c_scalar_mult_sub_su3vec (su3_vector * v1, complex * phase,
185 su3_vector * v2);
187 void left_su2_hit_n (su2_matrix * u, int p, int q, su3_matrix * link);
188 void right_su2_hit_a (su2_matrix * u, int p, int q, su3_matrix * link);
189 void dumpsu2 (su2_matrix * u);
190 void mult_su2_mat_vec_elem_n (su2_matrix * u, complex * x0, complex * x1);
191 void mult_su2_mat_vec_elem_a (su2_matrix * u, complex * x0, complex * x1);
193 void mult_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
194 wilson_vector * dest);
195 void mult_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
196 half_wilson_vector * dest);
197 void mult_adj_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
198 wilson_vector * dest);
199 void mult_adj_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
200 half_wilson_vector * dest);
202 void add_wilson_vector (wilson_vector * src1, wilson_vector * src2,
203 wilson_vector * dest);
204 void sub_wilson_vector (wilson_vector * src1, wilson_vector * src2,
205 wilson_vector * dest);
206 double magsq_wvec (wilson_vector * src);
207 complex wvec_dot (wilson_vector * src1, wilson_vector * src2);
208 complex wvec2_dot (wilson_vector * src1, wilson_vector * src2);
209 double wvec_rdot (wilson_vector * a, wilson_vector * b);
211 void wp_shrink (wilson_vector * src, half_wilson_vector * dest,
212 int dir, int sign);
213 void wp_shrink_4dir (wilson_vector * a, half_wilson_vector * b1,
214 half_wilson_vector * b2, half_wilson_vector * b3,
215 half_wilson_vector * b4, int sign);
216 void wp_grow (half_wilson_vector * src, wilson_vector * dest,
217 int dir, int sign);
218 void wp_grow_add (half_wilson_vector * src, wilson_vector * dest,
219 int dir, int sign);
220 void grow_add_four_wvecs (wilson_vector * a, half_wilson_vector * b1,
221 half_wilson_vector * b2, half_wilson_vector * b3,
222 half_wilson_vector * b4, int sign, int sum);
223 void mult_by_gamma (wilson_vector * src, wilson_vector * dest, int dir);
224 void mult_by_gamma_left (wilson_matrix * src, wilson_matrix * dest, int dir);
225 void mult_by_gamma_right (wilson_matrix * src, wilson_matrix * dest, int dir);
226 void mult_swv_by_gamma_l (spin_wilson_vector * src, spin_wilson_vector * dest,
227 int dir);
228 void mult_swv_by_gamma_r (spin_wilson_vector * src, spin_wilson_vector * dest,
229 int dir);
230 void su3_projector_w (wilson_vector * a, wilson_vector * b, su3_matrix * c);
231 void clear_wvec (wilson_vector * dest);
232 void copy_wvec (wilson_vector * src, wilson_vector * dest);
233 void dump_wilson_vec (wilson_vector * src);
235 double gaussian_rand_no (double_prn * prn_pt);
236 typedef int int32type;
237 typedef unsigned int u_int32type;
238 void byterevn (int32type w[], int n);
240 void
241 mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c)
243 int i;
244 register double t, ar, ai, br, bi, cr, ci;
245 for (i = 0; i < 3; i++)
247 ar = a->e[0][i].real;
248 ai = a->e[0][i].imag;
250 br = b->c[0].real;
251 bi = b->c[0].imag;
253 cr = ar * br;
254 t = ai * bi;
255 cr += t;
257 ci = ar * bi;
258 t = ai * br;
259 ci -= t;
261 ar = a->e[1][i].real;
262 ai = a->e[1][i].imag;
264 br = b->c[1].real;
265 bi = b->c[1].imag;
267 t = ar * br;
268 cr += t;
269 t = ai * bi;
270 cr += t;
272 t = ar * bi;
273 ci += t;
274 t = ai * br;
275 ci -= t;
277 ar = a->e[2][i].real;
278 ai = a->e[2][i].imag;
280 br = b->c[2].real;
281 bi = b->c[2].imag;
283 t = ar * br;
284 cr += t;
285 t = ai * bi;
286 cr += t;
288 t = ar * bi;
289 ci += t;
290 t = ai * br;
291 ci -= t;
293 c->c[i].real = cr;
294 c->c[i].imag = ci;
298 /* { dg-final { scan-assembler-not "mtvsrd" } } */