2 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
4 #pragma GCC target "+nosve"
6 typedef signed char S8_t
;
7 typedef signed short S16_t
;
8 typedef signed int S32_t
;
9 typedef signed long long S64_t
;
11 typedef signed char *__restrict__ pS8_t
;
12 typedef signed short *__restrict__ pS16_t
;
13 typedef signed int *__restrict__ pS32_t
;
14 typedef signed long long *__restrict__ pS64_t
;
16 typedef unsigned char U8_t
;
17 typedef unsigned short U16_t
;
18 typedef unsigned int U32_t
;
19 typedef unsigned long long U64_t
;
21 typedef unsigned char *__restrict__ pU8_t
;
22 typedef unsigned short *__restrict__ pU16_t
;
23 typedef unsigned int *__restrict__ pU32_t
;
24 typedef unsigned long long *__restrict__ pU64_t
;
29 test_addl_S64_S32_4 (pS64_t a
, pS32_t b
, pS32_t c
)
32 for (i
= 0; i
< 4; i
++)
33 a
[i
] = (S64_t
) b
[i
] + (S64_t
) c
[i
];
35 /* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
36 /* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
38 /* a = -b + c => a = c - b */
40 test_addl_S64_S32_4_neg0 (pS64_t a
, pS32_t b
, pS32_t c
)
43 for (i
= 0; i
< 4; i
++)
44 a
[i
] = -(S64_t
) b
[i
] + (S64_t
) c
[i
];
46 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
47 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
49 /* a = b + -c => a = b - c */
51 test_addl_S64_S32_4_neg1 (pS64_t a
, pS32_t b
, pS32_t c
)
54 for (i
= 0; i
< 4; i
++)
55 a
[i
] = (S64_t
) b
[i
] + -(S64_t
) c
[i
];
57 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
58 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
61 test_addl_S32_S16_8 (pS32_t a
, pS16_t b
, pS16_t c
)
64 for (i
= 0; i
< 8; i
++)
65 a
[i
] = (S32_t
) b
[i
] + (S32_t
) c
[i
];
67 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
68 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
71 test_addl_S16_S8_16 (pS16_t a
, pS8_t b
, pS8_t c
)
74 for (i
= 0; i
< 16; i
++)
75 a
[i
] = (S16_t
) b
[i
] + (S16_t
) c
[i
];
77 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
78 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
81 test_addl_U64_U32_4 (pU64_t a
, pU32_t b
, pU32_t c
)
84 for (i
= 0; i
< 4; i
++)
85 a
[i
] = (U64_t
) b
[i
] + (U64_t
) c
[i
];
87 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
88 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
91 test_addl_U32_U16_8 (pU32_t a
, pU16_t b
, pU16_t c
)
94 for (i
= 0; i
< 8; i
++)
95 a
[i
] = (U32_t
) b
[i
] + (U32_t
) c
[i
];
97 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
98 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
101 test_addl_U16_U8_16 (pU16_t a
, pU8_t b
, pU8_t c
)
104 for (i
= 0; i
< 16; i
++)
105 a
[i
] = (U16_t
) b
[i
] + (U16_t
) c
[i
];
107 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
108 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
111 test_subl_S64_S32_4 (pS64_t a
, pS32_t b
, pS32_t c
)
114 for (i
= 0; i
< 4; i
++)
115 a
[i
] = (S64_t
) b
[i
] - (S64_t
) c
[i
];
117 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
118 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
120 /* a = b - -c => a = b + c */
122 test_subl_S64_S32_4_neg0 (pS64_t a
, pS32_t b
, pS32_t c
)
125 for (i
= 0; i
< 4; i
++)
126 a
[i
] = (S64_t
) b
[i
] - -(S64_t
) c
[i
];
128 /* { dg-final { scan-assembler-times "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */
129 /* { dg-final { scan-assembler-times "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 2 } } */
131 /* a = -b - -c => a = c - b */
133 test_subl_S64_S32_4_neg1 (pS64_t a
, pS32_t b
, pS32_t c
)
136 for (i
= 0; i
< 4; i
++)
137 a
[i
] = -(S64_t
) b
[i
] - -(S64_t
) c
[i
];
139 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
140 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
142 /* a = -(b - c) => a = c - b */
144 test_subl_S64_S32_4_neg2 (pS64_t a
, pS32_t b
, pS32_t c
)
147 for (i
= 0; i
< 4; i
++)
148 a
[i
] = -((S64_t
) b
[i
] - (S64_t
) c
[i
]);
150 /* { dg-final { scan-assembler-times "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 5 } } */
151 /* { dg-final { scan-assembler-times "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 5 } } */
154 test_subl_S32_S16_8 (pS32_t a
, pS16_t b
, pS16_t c
)
157 for (i
= 0; i
< 8; i
++)
158 a
[i
] = (S32_t
) b
[i
] - (S32_t
) c
[i
];
160 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
161 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
164 test_subl_S16_S8_16 (pS16_t a
, pS8_t b
, pS8_t c
)
167 for (i
= 0; i
< 16; i
++)
168 a
[i
] = (S16_t
) b
[i
] - (S16_t
) c
[i
];
170 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
171 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
174 test_subl_U64_U32_4 (pU64_t a
, pU32_t b
, pU32_t c
)
177 for (i
= 0; i
< 4; i
++)
178 a
[i
] = (U64_t
) b
[i
] - (U64_t
) c
[i
];
180 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
181 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
184 test_subl_U32_U16_8 (pU32_t a
, pU16_t b
, pU16_t c
)
187 for (i
= 0; i
< 8; i
++)
188 a
[i
] = (U32_t
) b
[i
] - (U32_t
) c
[i
];
190 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
191 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
194 test_subl_U16_U8_16 (pU16_t a
, pU8_t b
, pU8_t c
)
197 for (i
= 0; i
< 16; i
++)
198 a
[i
] = (U16_t
) b
[i
] - (U16_t
) c
[i
];
200 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
201 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
206 S32_t S32_tb
[4] = { 0, 1, 2, 3 };
207 S32_t S32_tc
[4] = { 2, 2, -2, -2 };
210 S16_t S16_tb
[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
211 S16_t S16_tc
[8] = { 2, 2, -2, -2, 2, 2, -2, -2 };
214 S8_t S8_tb
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
215 S8_t S8_tc
[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
217 /* expected output */
219 S64_t addl_rS64
[] = { 2, 3, 0, 1 };
220 S64_t neg_r
[] = { 2, 1, -4, -5 };
221 S32_t addl_rS32
[] = { 2, 3, 0, 1, 6, 7, 4, 5 };
222 S16_t addl_rS16
[] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 };
223 S64_t subl_rS64
[] = { -2, -1, 4, 5 };
224 S32_t subl_rS32
[] = { -2, -1, 4, 5, 2, 3, 8, 9 };
226 { -2, -1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 16, 17 };
227 U64_t addl_rU64
[] = { 2, 3, 0x100000000, 0x100000001 };
228 U32_t addl_rU32
[] = { 2, 3, 0x10000, 0x10001, 6, 7, 0x10004, 0x10005 };
231 0x0002, 0x0003, 0x0100, 0x0101, 0x0006, 0x0007, 0x0104, 0x0105,
232 0x000a, 0x000b, 0x0108, 0x0109, 0x000e, 0x000f, 0x010c, 0x010d
236 0xfffffffffffffffe, 0xffffffffffffffff,
237 0xffffffff00000004, 0xffffffff00000005
241 0xfffffffe, 0xffffffff, 0xffff0004, 0xffff0005,
242 0x00000002, 0x00000003, 0xffff0008, 0xffff0009
246 0xfffe, 0xffff, 0xff04, 0xff05, 0x0002, 0x0003, 0xff08, 0xff09,
247 0x0006, 0x0007, 0xff0c, 0xff0d, 0x000a, 0x000b, 0xff10, 0xff11
250 #define CHECK(T,N,AS,US) \
253 for (i = 0; i < N; i++) \
254 if ((US##T##_t)S##T##_ta[i] != AS##_##r##US##T[i]) \
259 #define NCHECK(RES) \
262 for (i = 0; i < 4; i++) \
263 if (S64_ta[i] != RES[i]) \
268 #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
269 #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
276 test_addl_S64_S32_4 (S64_ta
, S32_tb
, S32_tc
);
277 SCHECK (64, 4, addl
);
278 test_addl_S32_S16_8 (S32_ta
, S16_tb
, S16_tc
);
279 SCHECK (32, 8, addl
);
280 test_addl_S16_S8_16 (S16_ta
, S8_tb
, S8_tc
);
281 SCHECK (16, 16, addl
);
282 test_subl_S64_S32_4 (S64_ta
, S32_tb
, S32_tc
);
283 SCHECK (64, 4, subl
);
284 test_subl_S32_S16_8 (S32_ta
, S16_tb
, S16_tc
);
285 SCHECK (32, 8, subl
);
286 test_subl_S16_S8_16 (S16_ta
, S8_tb
, S8_tc
);
287 SCHECK (16, 16, subl
);
289 test_addl_U64_U32_4 (S64_ta
, S32_tb
, S32_tc
);
290 UCHECK (64, 4, addl
);
291 test_addl_U32_U16_8 (S32_ta
, S16_tb
, S16_tc
);
292 UCHECK (32, 8, addl
);
293 test_addl_U16_U8_16 (S16_ta
, S8_tb
, S8_tc
);
294 UCHECK (16, 16, addl
);
295 test_subl_U64_U32_4 (S64_ta
, S32_tb
, S32_tc
);
296 UCHECK (64, 4, subl
);
297 test_subl_U32_U16_8 (S32_ta
, S16_tb
, S16_tc
);
298 UCHECK (32, 8, subl
);
299 test_subl_U16_U8_16 (S16_ta
, S8_tb
, S8_tc
);
300 UCHECK (16, 16, subl
);
302 test_addl_S64_S32_4_neg0 (S64_ta
, S32_tb
, S32_tc
);
304 test_addl_S64_S32_4_neg1 (S64_ta
, S32_tb
, S32_tc
);
306 test_subl_S64_S32_4_neg0 (S64_ta
, S32_tb
, S32_tc
);
308 test_subl_S64_S32_4_neg1 (S64_ta
, S32_tb
, S32_tc
);
310 test_subl_S64_S32_4_neg2 (S64_ta
, S32_tb
, S32_tc
);