1 /* { dg-do compile { target { ! ia32 } } } */
2 /* { dg-options "-Ofast -mavx512vl" } */
6 __attribute__((noinline
, noclone
)) double
9 register double a
__asm__ ("xmm16") = __builtin_round (x
);
10 __asm__ ("" : "+v" (a
));
14 __attribute__((noinline
, noclone
)) float
17 register float a
__asm__ ("xmm16") = __builtin_roundf (x
);
18 __asm__ ("" : "+v" (a
));
22 __attribute__((noinline
, noclone
)) __m128d
23 f3 (__m128d x
, __m128d y
)
25 register __m128d a
__asm__ ("xmm16") = x
, b
__asm__ ("xmm17") = y
;
26 __asm__ ("" : "+v" (a
), "+v" (b
));
27 a
= _mm_round_sd (a
, b
, _MM_FROUND_NINT
);
28 __asm__ ("" : "+v" (a
));
32 __attribute__((noinline
, noclone
)) __m128
33 f4 (__m128 x
, __m128 y
)
35 register __m128 a
__asm__ ("xmm16") = x
, b
__asm__ ("xmm17") = y
;
36 __asm__ ("" : "+v" (a
), "+v" (b
));
37 a
= _mm_round_ss (a
, b
, _MM_FROUND_NINT
);
38 __asm__ ("" : "+v" (a
));
42 __attribute__((noinline
, noclone
)) __m128d
45 register __m128d a
__asm__ ("xmm16") = x
;
46 __asm__ ("" : "+v" (a
));
47 a
= _mm_round_pd (a
, _MM_FROUND_NINT
);
48 __asm__ ("" : "+v" (a
));
52 __attribute__((noinline
, noclone
)) __m128
55 register __m128 a
__asm__ ("xmm16") = x
;
56 __asm__ ("" : "+v" (a
));
57 a
= _mm_round_ps (a
, _MM_FROUND_NINT
);
58 __asm__ ("" : "+v" (a
));
62 __attribute__((noinline
, noclone
)) __m256d
65 register __m256d a
__asm__ ("xmm16") = x
;
66 __asm__ ("" : "+v" (a
));
67 a
= _mm256_round_pd (a
, _MM_FROUND_NINT
);
68 __asm__ ("" : "+v" (a
));
72 __attribute__((noinline
, noclone
)) __m256
75 register __m256 a
__asm__ ("xmm16") = x
;
76 __asm__ ("" : "+v" (a
));
77 a
= _mm256_round_ps (a
, _MM_FROUND_NINT
);
78 __asm__ ("" : "+v" (a
));
82 /* Instead of vround{sd,ss,pd,ps} this should use vrndscale{sd,ss,pd,ps}
83 counterparts, so that [xy]mm1[67] can be referenced directly in the
85 /* { dg-final { scan-assembler-times "vrndscalesd\[^\n\r\]*xmm" 2 } } */
86 /* { dg-final { scan-assembler-times "vrndscaless\[^\n\r\]*xmm" 2 } } */
87 /* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*xmm" 1 } } */
88 /* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*xmm" 1 } } */
89 /* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*ymm" 1 } } */
90 /* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*ymm" 1 } } */
91 /* { dg-final { scan-assembler-not "vroundsd\[^\n\r\]*xmm" } } */
92 /* { dg-final { scan-assembler-not "vroundss\[^\n\r\]*xmm" } } */
93 /* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*xmm" } } */
94 /* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*xmm" } } */
95 /* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*ymm" } } */
96 /* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*ymm" } } */