1 /* PR rtl-optimization/48830 */
2 /* Testcase by Hans-Peter Nilsson <hp@gcc.gnu.org> */
4 /* { dg-do compile } */
5 /* { dg-require-effective-target lp64 } */
6 /* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
8 typedef unsigned char uint8_t;
9 typedef unsigned int uint32_t;
10 typedef unsigned long int uint64_t;
11 typedef unsigned long int uintmax_t;
12 typedef unsigned char rc_vec_t
__attribute__((__vector_size__(8)));
13 typedef short rc_svec_type_
__attribute__((__vector_size__(8)));
14 typedef unsigned char rc_vec4_type_
__attribute__((__vector_size__(4)));
17 rc_stat_xsum_acc(const uint8_t *__restrict src1
, int src1_dim
,
18 const uint8_t *__restrict src2
, int src2_dim
,
19 int len
, int height
, uintmax_t sum
[5])
26 int full
= len
/ ((1024) < (1024) ? (1024) : (1024));
27 int rem
= len
% ((1024) < (1024) ? (1024) : (1024));
30 unsigned int rc_gsr_scale_
__attribute__ ((__unused__
)) = 7; unsigned int rc_gsr_align_
__attribute__ ((__unused__
)) = 4; unsigned int rc_gsr_set_
__attribute__ ((__unused__
)) = 0; register unsigned int rc_gsr_fakedep_
__attribute__ ((__unused__
)) = 0; unsigned int rc_gsr_ldinit_
__attribute__ ((__unused__
)) = 0;
31 for (y
= 0; y
< height
; y
++) {
32 rc_vec_t a1
, a2
, a11
, a22
, a12
;
33 int i1
= (y
)*(src1_dim
);
34 int i2
= (y
)*(src2_dim
);
36 ((a1
) = ((rc_vec_t
) {0}));
37 ((a2
) = ((rc_vec_t
) {0}));
38 ((a11
) = ((rc_vec_t
) {0}));
39 ((a22
) = ((rc_vec_t
) {0}));
40 ((a12
) = ((rc_vec_t
) {0}));
41 for (x
= 0; x
< full
; x
++) {
43 for (k
= 0; k
< ((1024) < (1024) ? (1024) : (1024)) /
46 do { rc_vec_t v1
, v2
; ((v1
) = *(const rc_vec_t
*)(&(src1
)[i1
])); ((v2
) = *(const rc_vec_t
*)(&(src2
)[i2
])); ((a1
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(__builtin_vis_pdist (v1
, ((rc_vec_t
) {0}), (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a1
)).i
)))).v
)); ((a2
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(__builtin_vis_pdist (v2
, ((rc_vec_t
) {0}), (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a2
)).i
)))).v
)); do { rc_vec_t s1_
= (v1
); rc_vec_t s2_
= (v1
); rc_vec_t accvin_
= (a11
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a11
) = accvout_
; } while (0); do { rc_vec_t s1_
= (v2
); rc_vec_t s2_
= (v2
); rc_vec_t accvin_
= (a22
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a22
) = accvout_
; } while (0); do { rc_vec_t s1_
= (v1
); rc_vec_t s2_
= (v2
); rc_vec_t accvin_
= (a12
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a12
) = accvout_
; } while (0); (i1
) += 8; (i2
) += 8; } while (0);
49 do { uint32_t t1
, t2
, t11
, t22
, t12
; ((t1
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a1
)).i
)); ((t2
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a2
)).i
)); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a11
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t11
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a22
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t22
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a12
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t12
) = maclo_
+ machi_
* 256; } while (0); ((a1
) = ((rc_vec_t
) {0})); ((a2
) = ((rc_vec_t
) {0})); ((a11
) = ((rc_vec_t
) {0})); ((a22
) = ((rc_vec_t
) {0})); ((a12
) = ((rc_vec_t
) {0})); (s1
) += t1
; (s2
) += t2
; (s11
) += t11
; (s22
) += t22
; (s12
) += t12
; } while (0);
51 for (x
= 0; x
< rem1
; x
++) {
52 do { rc_vec_t v1
, v2
; ((v1
) = *(const rc_vec_t
*)(&(src1
)[i1
])); ((v2
) = *(const rc_vec_t
*)(&(src2
)[i2
])); ((a1
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(__builtin_vis_pdist (v1
, ((rc_vec_t
) {0}), (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a1
)).i
)))).v
)); ((a2
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(__builtin_vis_pdist (v2
, ((rc_vec_t
) {0}), (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a2
)).i
)))).v
)); do { rc_vec_t s1_
= (v1
); rc_vec_t s2_
= (v1
); rc_vec_t accvin_
= (a11
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a11
) = accvout_
; } while (0); do { rc_vec_t s1_
= (v2
); rc_vec_t s2_
= (v2
); rc_vec_t accvin_
= (a22
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a22
) = accvout_
; } while (0); do { rc_vec_t s1_
= (v1
); rc_vec_t s2_
= (v2
); rc_vec_t accvin_
= (a12
); rc_vec_t s1lo7_
, s1msb_
, accvout_
; uint32_t maclo_
, machi_
; rc_svec_type_ masklow_
= (rc_svec_type_
){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_
, s1msblo_
, s1lo7hi_
, s1lo7lo_
; rc_svec_type_ s1msbdiv2hi_
, s1msbdiv2lo_
; rc_vec4_type_ s1lo7hi4_
, s1lo7lo4_
, s1msbhi4_
, s1msblo4_
; rc_vec4_type_ s1msbdiv2hi4_
, s1msbdiv2lo4_
, s2hi4_
, s2lo4_
; rc_vec4_type_ accvhi4_
, accvlo4_
; rc_svec_type_ mulhilo7_
, mullolo7_
, mulhimsbdiv2_
, mullomsbdiv2_
; rc_svec_type_ mulhi_
, mullo_
, mulhihi_
, mullohi_
; rc_svec_type_ mulhilo_
, mullolo_
; rc_vec4_type_ zero4_
= (((union { rc_vec4_type_ v
; uint64_t i
; })(uint64_t)(0)).v
); rc_vec_t msb_
= (rc_vec_t
){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_
) = (s1_
) & (msb_
)); ((s1lo7_
) = (s1_
) & (~msb_
)); do { if (rc_gsr_ldinit_
) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_
) || !__builtin_constant_p(2) || !rc_gsr_set_
|| (unsigned) (rc_gsr_align_
) != rc_gsr_align_
|| (unsigned) (2) != rc_gsr_scale_
) { rc_gsr_set_
= 1; rc_gsr_align_
= (rc_gsr_align_
); rc_gsr_scale_
= (2); unsigned int val_
= (rc_gsr_scale_
<< 3) | rc_gsr_align_
; if (__builtin_constant_p (val_
)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "i" (val_
), "1" (rc_gsr_fakedep_
)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
), [fakegsr
] "=rm" (rc_gsr_fakedep_
) : "0" (s1msb_
), [gsrval
] "r" (val_
), "1" (rc_gsr_fakedep_
)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec
] "=brm" (s1msb_
) : "0" (s1msb_
), [fakegsr
] "g" (rc_gsr_fakedep_
)); } } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1msb_
); (s1msbhi4_
) = hl_
.hilo_
.hi_
; (s1msblo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbhi_
= __builtin_vis_fexpand(s1msbhi4_
); s1msblo_
= __builtin_vis_fexpand(s1msblo4_
); s1msbdiv2hi4_
= __builtin_vis_fpack16(s1msbhi_
); s1msbdiv2lo4_
= __builtin_vis_fpack16(s1msblo_
); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s2_
); (s2hi4_
) = hl_
.hilo_
.hi_
; (s2lo4_
) = hl_
.hilo_
.lo_
; } while (0); do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (s1lo7_
); (s1lo7hi4_
) = hl_
.hilo_
.hi_
; (s1lo7lo4_
) = hl_
.hilo_
.lo_
; } while (0); s1msbdiv2hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2hi4_
, zero4_
); s1msbdiv2lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1msbdiv2lo4_
, zero4_
); s1lo7hi_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7hi4_
, zero4_
); s1lo7lo_
= (rc_svec_type_
)__builtin_vis_fpmerge(s1lo7lo4_
, zero4_
); mulhilo7_
= __builtin_vis_fmul8x16(s2hi4_
, s1lo7hi_
); mullolo7_
= __builtin_vis_fmul8x16(s2lo4_
, s1lo7lo_
); mulhimsbdiv2_
= __builtin_vis_fmul8x16(s2hi4_
, s1msbdiv2hi_
); mullomsbdiv2_
= __builtin_vis_fmul8x16(s2lo4_
, s1msbdiv2lo_
); mulhi_
= mulhilo7_
+ mulhimsbdiv2_
+ mulhimsbdiv2_
; mullo_
= mullolo7_
+ mullomsbdiv2_
+ mullomsbdiv2_
; mulhihi_
= mulhi_
& ~masklow_
; mulhilo_
= mulhi_
& masklow_
; mullohi_
= mullo_
& ~masklow_
; mullolo_
= mullo_
& masklow_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (accvin_
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mullolo_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
)); maclo_
= __builtin_vis_pdist ((rc_vec_t
)mulhilo_
, ((rc_vec_t
) {0}), maclo_
); machi_
= __builtin_vis_pdist ((rc_vec_t
)mullohi_
, ((rc_vec_t
) {0}), (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
)); machi_
= __builtin_vis_pdist ((rc_vec_t
)mulhihi_
, ((rc_vec_t
) {0}), machi_
); do { typedef union { struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; rc_vec_t v_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) {{((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)machi_
)).v
)), ((((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)((uint32_t)maclo_
)).v
))}}; (accvout_
) = hl_
.v_
; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr
] "=brm" (rc_gsr_fakedep_
) : [xdep
] "brm" (accvout_
), "0" (rc_gsr_fakedep_
)); (a12
) = accvout_
; } while (0); (i1
) += 8; (i2
) += 8; } while (0);
54 do { uint32_t t1
, t2
, t11
, t22
, t12
; ((t1
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a1
)).i
)); ((t2
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a2
)).i
)); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a11
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t11
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a22
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t22
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a12
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t12
) = maclo_
+ machi_
* 256; } while (0); ((a1
) = ((rc_vec_t
) {0})); ((a2
) = ((rc_vec_t
) {0})); ((a11
) = ((rc_vec_t
) {0})); ((a22
) = ((rc_vec_t
) {0})); ((a12
) = ((rc_vec_t
) {0})); (s1
) += t1
; (s2
) += t2
; (s11
) += t11
; (s22
) += t22
; (s12
) += t12
; } while (0);
56 do { uint32_t t1
, t2
, t11
, t22
, t12
; ((t1
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a1
)).i
)); ((t2
) = (((union { rc_vec_t v
; uint64_t i
; })(uint64_t)(a2
)).i
)); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a11
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t11
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a22
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t22
) = maclo_
+ machi_
* 256; } while (0); do { rc_vec4_type_ accvhi4_
, accvlo4_
; uint64_t machi_
, maclo_
; do { typedef union { rc_vec_t v_
; struct { rc_vec4_type_ hi_
, lo_
; } hilo_
; } RC_hl_type_
; RC_hl_type_ hl_
= (RC_hl_type_
) (a12
); (accvhi4_
) = hl_
.hilo_
.hi_
; (accvlo4_
) = hl_
.hilo_
.lo_
; } while (0); machi_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvhi4_
)).i
); maclo_
= (((union { rc_vec4_type_ v
; uint32_t i
; })(uint32_t)(accvlo4_
)).i
); (t12
) = maclo_
+ machi_
* 256; } while (0); ((a1
) = ((rc_vec_t
) {0})); ((a2
) = ((rc_vec_t
) {0})); ((a11
) = ((rc_vec_t
) {0})); ((a22
) = ((rc_vec_t
) {0})); ((a12
) = ((rc_vec_t
) {0})); (s1
) += t1
; (s2
) += t2
; (s11
) += t11
; (s22
) += t22
; (s12
) += t12
; } while (0);