1 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
3 /* { dg-require-effective-target powerpc_p8vector_ok } */
4 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
5 /* { dg-options "-mcpu=power8 -O3 -ffast-math" } */
7 /* Taken from the Spec 2006 milc brenchmark. Ultimately, GCC wants to generate
8 a DF splat from offsettable memory. The register allocator decided it was
9 better to do the load in the GPR registers and do a move direct, rather than
10 doing a load in the VSX register sets. */
24 complex cmplx (double x
, double y
);
25 complex cadd (complex * a
, complex * b
);
26 complex cmul (complex * a
, complex * b
);
27 complex csub (complex * a
, complex * b
);
28 complex cdiv (complex * a
, complex * b
);
29 complex conjg (complex * a
);
30 complex ce_itheta (double theta
);
32 double_complex
dcmplx (double x
, double y
);
33 double_complex
dcadd (double_complex
* a
, double_complex
* b
);
34 double_complex
dcmul (double_complex
* a
, double_complex
* b
);
35 double_complex
dcsub (double_complex
* a
, double_complex
* b
);
36 double_complex
dcdiv (double_complex
* a
, double_complex
* b
);
37 double_complex
dconjg (double_complex
* a
);
38 double_complex
dcexp (double_complex
* a
);
39 double_complex
dclog (double_complex
* a
);
40 double_complex
dcsqrt (double_complex
* z
);
41 double_complex
dce_itheta (double theta
);
45 unsigned long r0
, r1
, r2
, r3
, r4
, r5
, r6
;
46 unsigned long multiplier
, addend
, ic_state
;
50 double myrand (double_prn
* prn_pt
);
64 complex m01
, m02
, m12
;
65 double m00im
, m11im
, m22im
;
84 } color_wilson_vector
;
91 color_wilson_vector d
[4];
95 spin_wilson_vector c
[3];
98 void mult_su3_nn (su3_matrix
* a
, su3_matrix
* b
, su3_matrix
* c
);
99 void mult_su3_na (su3_matrix
* a
, su3_matrix
* b
, su3_matrix
* c
);
100 void mult_su3_an (su3_matrix
* a
, su3_matrix
* b
, su3_matrix
* c
);
101 double realtrace_su3 (su3_matrix
* a
, su3_matrix
* b
);
102 complex trace_su3 (su3_matrix
* a
);
103 complex complextrace_su3 (su3_matrix
* a
, su3_matrix
* b
);
104 complex det_su3 (su3_matrix
* a
);
105 void add_su3_matrix (su3_matrix
* a
, su3_matrix
* b
, su3_matrix
* c
);
106 void sub_su3_matrix (su3_matrix
* a
, su3_matrix
* b
, su3_matrix
* c
);
107 void scalar_mult_su3_matrix (su3_matrix
* src
, double scalar
,
109 void scalar_mult_add_su3_matrix (su3_matrix
* src1
, su3_matrix
* src2
,
110 double scalar
, su3_matrix
* dest
);
111 void scalar_mult_sub_su3_matrix (su3_matrix
* src1
, su3_matrix
* src2
,
112 double scalar
, su3_matrix
* dest
);
113 void c_scalar_mult_su3mat (su3_matrix
* src
, complex * scalar
,
115 void c_scalar_mult_add_su3mat (su3_matrix
* src1
, su3_matrix
* src2
,
116 complex * scalar
, su3_matrix
* dest
);
117 void c_scalar_mult_sub_su3mat (su3_matrix
* src1
, su3_matrix
* src2
,
118 complex * scalar
, su3_matrix
* dest
);
119 void su3_adjoint (su3_matrix
* a
, su3_matrix
* b
);
120 void make_anti_hermitian (su3_matrix
* m3
, anti_hermitmat
* ah3
);
121 void random_anti_hermitian (anti_hermitmat
* mat_antihermit
,
122 double_prn
* prn_pt
);
123 void uncompress_anti_hermitian (anti_hermitmat
* mat_anti
, su3_matrix
* mat
);
124 void compress_anti_hermitian (su3_matrix
* mat
, anti_hermitmat
* mat_anti
);
125 void clear_su3mat (su3_matrix
* dest
);
126 void su3mat_copy (su3_matrix
* a
, su3_matrix
* b
);
127 void dumpmat (su3_matrix
* m
);
129 void su3_projector (su3_vector
* a
, su3_vector
* b
, su3_matrix
* c
);
130 complex su3_dot (su3_vector
* a
, su3_vector
* b
);
131 double su3_rdot (su3_vector
* a
, su3_vector
* b
);
132 double magsq_su3vec (su3_vector
* a
);
133 void su3vec_copy (su3_vector
* a
, su3_vector
* b
);
134 void dumpvec (su3_vector
* v
);
135 void clearvec (su3_vector
* v
);
137 void mult_su3_mat_vec (su3_matrix
* a
, su3_vector
* b
, su3_vector
* c
);
138 void mult_su3_mat_vec_sum (su3_matrix
* a
, su3_vector
* b
, su3_vector
* c
);
139 void mult_su3_mat_vec_sum_4dir (su3_matrix
* a
, su3_vector
* b0
,
140 su3_vector
* b1
, su3_vector
* b2
,
141 su3_vector
* b3
, su3_vector
* c
);
142 void mult_su3_mat_vec_nsum (su3_matrix
* a
, su3_vector
* b
, su3_vector
* c
);
143 void mult_adj_su3_mat_vec (su3_matrix
* a
, su3_vector
* b
, su3_vector
* c
);
144 void mult_adj_su3_mat_vec_4dir (su3_matrix
* a
, su3_vector
* b
,
146 void mult_adj_su3_mat_4vec (su3_matrix
* mat
, su3_vector
* src
,
147 su3_vector
* dest0
, su3_vector
* dest1
,
148 su3_vector
* dest2
, su3_vector
* dest3
);
149 void mult_adj_su3_mat_vec_sum (su3_matrix
* a
, su3_vector
* b
,
151 void mult_adj_su3_mat_vec_nsum (su3_matrix
* a
, su3_vector
* b
,
154 void add_su3_vector (su3_vector
* a
, su3_vector
* b
, su3_vector
* c
);
155 void sub_su3_vector (su3_vector
* a
, su3_vector
* b
, su3_vector
* c
);
156 void sub_four_su3_vecs (su3_vector
* a
, su3_vector
* b1
, su3_vector
* b2
,
157 su3_vector
* b3
, su3_vector
* b4
);
159 void scalar_mult_su3_vector (su3_vector
* src
, double scalar
,
161 void scalar_mult_add_su3_vector (su3_vector
* src1
, su3_vector
* src2
,
162 double scalar
, su3_vector
* dest
);
163 void scalar_mult_sum_su3_vector (su3_vector
* src1
, su3_vector
* src2
,
165 void scalar_mult_sub_su3_vector (su3_vector
* src1
, su3_vector
* src2
,
166 double scalar
, su3_vector
* dest
);
167 void scalar_mult_wvec (wilson_vector
* src
, double s
, wilson_vector
* dest
);
168 void scalar_mult_hwvec (half_wilson_vector
* src
, double s
,
169 half_wilson_vector
* dest
);
170 void scalar_mult_add_wvec (wilson_vector
* src1
, wilson_vector
* src2
,
171 double scalar
, wilson_vector
* dest
);
172 void scalar_mult_addtm_wvec (wilson_vector
* src1
, wilson_vector
* src2
,
173 double scalar
, wilson_vector
* dest
);
174 void c_scalar_mult_wvec (wilson_vector
* src1
, complex * phase
,
175 wilson_vector
* dest
);
176 void c_scalar_mult_add_wvec (wilson_vector
* src1
, wilson_vector
* src2
,
177 complex * phase
, wilson_vector
* dest
);
178 void c_scalar_mult_add_wvec2 (wilson_vector
* src1
, wilson_vector
* src2
,
179 complex s
, wilson_vector
* dest
);
180 void c_scalar_mult_su3vec (su3_vector
* src
, complex * phase
,
182 void c_scalar_mult_add_su3vec (su3_vector
* v1
, complex * phase
,
184 void c_scalar_mult_sub_su3vec (su3_vector
* v1
, complex * phase
,
187 void left_su2_hit_n (su2_matrix
* u
, int p
, int q
, su3_matrix
* link
);
188 void right_su2_hit_a (su2_matrix
* u
, int p
, int q
, su3_matrix
* link
);
189 void dumpsu2 (su2_matrix
* u
);
190 void mult_su2_mat_vec_elem_n (su2_matrix
* u
, complex * x0
, complex * x1
);
191 void mult_su2_mat_vec_elem_a (su2_matrix
* u
, complex * x0
, complex * x1
);
193 void mult_mat_wilson_vec (su3_matrix
* mat
, wilson_vector
* src
,
194 wilson_vector
* dest
);
195 void mult_su3_mat_hwvec (su3_matrix
* mat
, half_wilson_vector
* src
,
196 half_wilson_vector
* dest
);
197 void mult_adj_mat_wilson_vec (su3_matrix
* mat
, wilson_vector
* src
,
198 wilson_vector
* dest
);
199 void mult_adj_su3_mat_hwvec (su3_matrix
* mat
, half_wilson_vector
* src
,
200 half_wilson_vector
* dest
);
202 void add_wilson_vector (wilson_vector
* src1
, wilson_vector
* src2
,
203 wilson_vector
* dest
);
204 void sub_wilson_vector (wilson_vector
* src1
, wilson_vector
* src2
,
205 wilson_vector
* dest
);
206 double magsq_wvec (wilson_vector
* src
);
207 complex wvec_dot (wilson_vector
* src1
, wilson_vector
* src2
);
208 complex wvec2_dot (wilson_vector
* src1
, wilson_vector
* src2
);
209 double wvec_rdot (wilson_vector
* a
, wilson_vector
* b
);
211 void wp_shrink (wilson_vector
* src
, half_wilson_vector
* dest
,
213 void wp_shrink_4dir (wilson_vector
* a
, half_wilson_vector
* b1
,
214 half_wilson_vector
* b2
, half_wilson_vector
* b3
,
215 half_wilson_vector
* b4
, int sign
);
216 void wp_grow (half_wilson_vector
* src
, wilson_vector
* dest
,
218 void wp_grow_add (half_wilson_vector
* src
, wilson_vector
* dest
,
220 void grow_add_four_wvecs (wilson_vector
* a
, half_wilson_vector
* b1
,
221 half_wilson_vector
* b2
, half_wilson_vector
* b3
,
222 half_wilson_vector
* b4
, int sign
, int sum
);
223 void mult_by_gamma (wilson_vector
* src
, wilson_vector
* dest
, int dir
);
224 void mult_by_gamma_left (wilson_matrix
* src
, wilson_matrix
* dest
, int dir
);
225 void mult_by_gamma_right (wilson_matrix
* src
, wilson_matrix
* dest
, int dir
);
226 void mult_swv_by_gamma_l (spin_wilson_vector
* src
, spin_wilson_vector
* dest
,
228 void mult_swv_by_gamma_r (spin_wilson_vector
* src
, spin_wilson_vector
* dest
,
230 void su3_projector_w (wilson_vector
* a
, wilson_vector
* b
, su3_matrix
* c
);
231 void clear_wvec (wilson_vector
* dest
);
232 void copy_wvec (wilson_vector
* src
, wilson_vector
* dest
);
233 void dump_wilson_vec (wilson_vector
* src
);
235 double gaussian_rand_no (double_prn
* prn_pt
);
236 typedef int int32type
;
237 typedef unsigned int u_int32type
;
238 void byterevn (int32type w
[], int n
);
241 mult_adj_su3_mat_vec (su3_matrix
* a
, su3_vector
* b
, su3_vector
* c
)
244 register double t
, ar
, ai
, br
, bi
, cr
, ci
;
245 for (i
= 0; i
< 3; i
++)
247 ar
= a
->e
[0][i
].real
;
248 ai
= a
->e
[0][i
].imag
;
261 ar
= a
->e
[1][i
].real
;
262 ai
= a
->e
[1][i
].imag
;
277 ar
= a
->e
[2][i
].real
;
278 ai
= a
->e
[2][i
].imag
;
298 /* { dg-final { scan-assembler-not "mtvsrd" } } */