2 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
4 typedef signed char S8_t
;
5 typedef signed short S16_t
;
6 typedef signed int S32_t
;
7 typedef signed long long S64_t
;
9 typedef signed char *__restrict__ pS8_t
;
10 typedef signed short *__restrict__ pS16_t
;
11 typedef signed int *__restrict__ pS32_t
;
12 typedef signed long long *__restrict__ pS64_t
;
14 typedef unsigned char U8_t
;
15 typedef unsigned short U16_t
;
16 typedef unsigned int U32_t
;
17 typedef unsigned long long U64_t
;
19 typedef unsigned char *__restrict__ pU8_t
;
20 typedef unsigned short *__restrict__ pU16_t
;
21 typedef unsigned int *__restrict__ pU32_t
;
22 typedef unsigned long long *__restrict__ pU64_t
;
27 test_addl_S64_S32_4 (pS64_t a
, pS32_t b
, pS32_t c
)
30 for (i
= 0; i
< 4; i
++)
31 a
[i
] = (S64_t
) b
[i
] + (S64_t
) c
[i
];
33 /* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
34 /* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
36 /* a = -b + c => a = c - b */
38 test_addl_S64_S32_4_neg0 (pS64_t a
, pS32_t b
, pS32_t c
)
41 for (i
= 0; i
< 4; i
++)
42 a
[i
] = -(S64_t
) b
[i
] + (S64_t
) c
[i
];
44 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
45 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
47 /* a = b + -c => a = b - c */
49 test_addl_S64_S32_4_neg1 (pS64_t a
, pS32_t b
, pS32_t c
)
52 for (i
= 0; i
< 4; i
++)
53 a
[i
] = (S64_t
) b
[i
] + -(S64_t
) c
[i
];
55 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
56 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
59 test_addl_S32_S16_8 (pS32_t a
, pS16_t b
, pS16_t c
)
62 for (i
= 0; i
< 8; i
++)
63 a
[i
] = (S32_t
) b
[i
] + (S32_t
) c
[i
];
65 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
66 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
69 test_addl_S16_S8_16 (pS16_t a
, pS8_t b
, pS8_t c
)
72 for (i
= 0; i
< 16; i
++)
73 a
[i
] = (S16_t
) b
[i
] + (S16_t
) c
[i
];
75 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
76 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
79 test_addl_U64_U32_4 (pU64_t a
, pU32_t b
, pU32_t c
)
82 for (i
= 0; i
< 4; i
++)
83 a
[i
] = (U64_t
) b
[i
] + (U64_t
) c
[i
];
85 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
86 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
89 test_addl_U32_U16_8 (pU32_t a
, pU16_t b
, pU16_t c
)
92 for (i
= 0; i
< 8; i
++)
93 a
[i
] = (U32_t
) b
[i
] + (U32_t
) c
[i
];
95 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
96 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
99 test_addl_U16_U8_16 (pU16_t a
, pU8_t b
, pU8_t c
)
102 for (i
= 0; i
< 16; i
++)
103 a
[i
] = (U16_t
) b
[i
] + (U16_t
) c
[i
];
105 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
106 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
109 test_subl_S64_S32_4 (pS64_t a
, pS32_t b
, pS32_t c
)
112 for (i
= 0; i
< 4; i
++)
113 a
[i
] = (S64_t
) b
[i
] - (S64_t
) c
[i
];
115 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
116 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
118 /* a = b - -c => a = b + c */
120 test_subl_S64_S32_4_neg0 (pS64_t a
, pS32_t b
, pS32_t c
)
123 for (i
= 0; i
< 4; i
++)
124 a
[i
] = (S64_t
) b
[i
] - -(S64_t
) c
[i
];
126 /* { dg-final { scan-assembler-times "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */
127 /* { dg-final { scan-assembler-times "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 2 } } */
129 /* a = -b - -c => a = c - b */
131 test_subl_S64_S32_4_neg1 (pS64_t a
, pS32_t b
, pS32_t c
)
134 for (i
= 0; i
< 4; i
++)
135 a
[i
] = -(S64_t
) b
[i
] - -(S64_t
) c
[i
];
137 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
138 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
140 /* a = -(b - c) => a = c - b */
142 test_subl_S64_S32_4_neg2 (pS64_t a
, pS32_t b
, pS32_t c
)
145 for (i
= 0; i
< 4; i
++)
146 a
[i
] = -((S64_t
) b
[i
] - (S64_t
) c
[i
]);
148 /* { dg-final { scan-assembler-times "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 5 } } */
149 /* { dg-final { scan-assembler-times "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 5 } } */
152 test_subl_S32_S16_8 (pS32_t a
, pS16_t b
, pS16_t c
)
155 for (i
= 0; i
< 8; i
++)
156 a
[i
] = (S32_t
) b
[i
] - (S32_t
) c
[i
];
158 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
159 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
162 test_subl_S16_S8_16 (pS16_t a
, pS8_t b
, pS8_t c
)
165 for (i
= 0; i
< 16; i
++)
166 a
[i
] = (S16_t
) b
[i
] - (S16_t
) c
[i
];
168 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
169 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
172 test_subl_U64_U32_4 (pU64_t a
, pU32_t b
, pU32_t c
)
175 for (i
= 0; i
< 4; i
++)
176 a
[i
] = (U64_t
) b
[i
] - (U64_t
) c
[i
];
178 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
179 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
182 test_subl_U32_U16_8 (pU32_t a
, pU16_t b
, pU16_t c
)
185 for (i
= 0; i
< 8; i
++)
186 a
[i
] = (U32_t
) b
[i
] - (U32_t
) c
[i
];
188 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
189 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
192 test_subl_U16_U8_16 (pU16_t a
, pU8_t b
, pU8_t c
)
195 for (i
= 0; i
< 16; i
++)
196 a
[i
] = (U16_t
) b
[i
] - (U16_t
) c
[i
];
198 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
199 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
204 S32_t S32_tb
[4] = { 0, 1, 2, 3 };
205 S32_t S32_tc
[4] = { 2, 2, -2, -2 };
208 S16_t S16_tb
[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
209 S16_t S16_tc
[8] = { 2, 2, -2, -2, 2, 2, -2, -2 };
212 S8_t S8_tb
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
213 S8_t S8_tc
[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
215 /* expected output */
217 S64_t addl_rS64
[] = { 2, 3, 0, 1 };
218 S64_t neg_r
[] = { 2, 1, -4, -5 };
219 S32_t addl_rS32
[] = { 2, 3, 0, 1, 6, 7, 4, 5 };
220 S16_t addl_rS16
[] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 };
221 S64_t subl_rS64
[] = { -2, -1, 4, 5 };
222 S32_t subl_rS32
[] = { -2, -1, 4, 5, 2, 3, 8, 9 };
224 { -2, -1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 16, 17 };
225 U64_t addl_rU64
[] = { 2, 3, 0x100000000, 0x100000001 };
226 U32_t addl_rU32
[] = { 2, 3, 0x10000, 0x10001, 6, 7, 0x10004, 0x10005 };
229 0x0002, 0x0003, 0x0100, 0x0101, 0x0006, 0x0007, 0x0104, 0x0105,
230 0x000a, 0x000b, 0x0108, 0x0109, 0x000e, 0x000f, 0x010c, 0x010d
234 0xfffffffffffffffe, 0xffffffffffffffff,
235 0xffffffff00000004, 0xffffffff00000005
239 0xfffffffe, 0xffffffff, 0xffff0004, 0xffff0005,
240 0x00000002, 0x00000003, 0xffff0008, 0xffff0009
244 0xfffe, 0xffff, 0xff04, 0xff05, 0x0002, 0x0003, 0xff08, 0xff09,
245 0x0006, 0x0007, 0xff0c, 0xff0d, 0x000a, 0x000b, 0xff10, 0xff11
248 #define CHECK(T,N,AS,US) \
251 for (i = 0; i < N; i++) \
252 if ((US##T##_t)S##T##_ta[i] != AS##_##r##US##T[i]) \
257 #define NCHECK(RES) \
260 for (i = 0; i < 4; i++) \
261 if (S64_ta[i] != RES[i]) \
266 #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
267 #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
274 test_addl_S64_S32_4 (S64_ta
, S32_tb
, S32_tc
);
275 SCHECK (64, 4, addl
);
276 test_addl_S32_S16_8 (S32_ta
, S16_tb
, S16_tc
);
277 SCHECK (32, 8, addl
);
278 test_addl_S16_S8_16 (S16_ta
, S8_tb
, S8_tc
);
279 SCHECK (16, 16, addl
);
280 test_subl_S64_S32_4 (S64_ta
, S32_tb
, S32_tc
);
281 SCHECK (64, 4, subl
);
282 test_subl_S32_S16_8 (S32_ta
, S16_tb
, S16_tc
);
283 SCHECK (32, 8, subl
);
284 test_subl_S16_S8_16 (S16_ta
, S8_tb
, S8_tc
);
285 SCHECK (16, 16, subl
);
287 test_addl_U64_U32_4 (S64_ta
, S32_tb
, S32_tc
);
288 UCHECK (64, 4, addl
);
289 test_addl_U32_U16_8 (S32_ta
, S16_tb
, S16_tc
);
290 UCHECK (32, 8, addl
);
291 test_addl_U16_U8_16 (S16_ta
, S8_tb
, S8_tc
);
292 UCHECK (16, 16, addl
);
293 test_subl_U64_U32_4 (S64_ta
, S32_tb
, S32_tc
);
294 UCHECK (64, 4, subl
);
295 test_subl_U32_U16_8 (S32_ta
, S16_tb
, S16_tc
);
296 UCHECK (32, 8, subl
);
297 test_subl_U16_U8_16 (S16_ta
, S8_tb
, S8_tc
);
298 UCHECK (16, 16, subl
);
300 test_addl_S64_S32_4_neg0 (S64_ta
, S32_tb
, S32_tc
);
302 test_addl_S64_S32_4_neg1 (S64_ta
, S32_tb
, S32_tc
);
304 test_subl_S64_S32_4_neg0 (S64_ta
, S32_tb
, S32_tc
);
306 test_subl_S64_S32_4_neg1 (S64_ta
, S32_tb
, S32_tc
);
308 test_subl_S64_S32_4_neg2 (S64_ta
, S32_tb
, S32_tc
);