2 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
4 typedef signed char S8_t
;
5 typedef signed short S16_t
;
6 typedef signed int S32_t
;
7 typedef signed long long S64_t
;
8 typedef signed char *__restrict__ pS8_t
;
9 typedef signed short *__restrict__ pS16_t
;
10 typedef signed int *__restrict__ pS32_t
;
11 typedef signed long long *__restrict__ pS64_t
;
12 typedef unsigned char U8_t
;
13 typedef unsigned short U16_t
;
14 typedef unsigned int U32_t
;
15 typedef unsigned long long U64_t
;
16 typedef unsigned char *__restrict__ pU8_t
;
17 typedef unsigned short *__restrict__ pU16_t
;
18 typedef unsigned int *__restrict__ pU32_t
;
19 typedef unsigned long long *__restrict__ pU64_t
;
24 test_addS64_tS32_t4 (pS64_t a
, pS32_t b
, pS32_t c
)
27 for (i
= 0; i
< 4; i
++)
28 a
[i
] += (S64_t
) b
[i
] * (S64_t
) c
[i
];
31 /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.2d" } } */
32 /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.2d" } } */
35 test_addS32_tS16_t8 (pS32_t a
, pS16_t b
, pS16_t c
)
38 for (i
= 0; i
< 8; i
++)
39 a
[i
] += (S32_t
) b
[i
] * (S32_t
) c
[i
];
42 /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.4s" } } */
43 /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.4s" } } */
46 test_addS16_tS8_t16 (pS16_t a
, pS8_t b
, pS8_t c
)
49 for (i
= 0; i
< 16; i
++)
50 a
[i
] += (S16_t
) b
[i
] * (S16_t
) c
[i
];
54 test_addS16_tS8_t16_neg0 (pS16_t a
, pS8_t b
, pS8_t c
)
57 for (i
= 0; i
< 16; i
++)
58 a
[i
] += (S16_t
) -b
[i
] * (S16_t
) -c
[i
];
62 test_addS16_tS8_t16_neg1 (pS16_t a
, pS8_t b
, pS8_t c
)
65 for (i
= 0; i
< 16; i
++)
66 a
[i
] -= (S16_t
) b
[i
] * (S16_t
) -c
[i
];
70 test_addS16_tS8_t16_neg2 (pS16_t a
, pS8_t b
, pS8_t c
)
73 for (i
= 0; i
< 16; i
++)
74 a
[i
] -= (S16_t
) -b
[i
] * (S16_t
) c
[i
];
77 /* { dg-final { scan-assembler-times "smlal\tv\[0-9\]+\.8h" 4 } } */
78 /* { dg-final { scan-assembler-times "smlal2\tv\[0-9\]+\.8h" 4 } } */
81 test_subS64_tS32_t4 (pS64_t a
, pS32_t b
, pS32_t c
)
84 for (i
= 0; i
< 4; i
++)
85 a
[i
] -= (S64_t
) b
[i
] * (S64_t
) c
[i
];
88 /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.2d" } } */
89 /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.2d" } } */
92 test_subS32_tS16_t8 (pS32_t a
, pS16_t b
, pS16_t c
)
95 for (i
= 0; i
< 8; i
++)
96 a
[i
] -= (S32_t
) b
[i
] * (S32_t
) c
[i
];
99 /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.4s" } } */
100 /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.4s" } } */
103 test_subS16_tS8_t16 (pS16_t a
, pS8_t b
, pS8_t c
)
106 for (i
= 0; i
< 16; i
++)
107 a
[i
] -= (S16_t
) b
[i
] * (S16_t
) c
[i
];
111 test_subS16_tS8_t16_neg0 (pS16_t a
, pS8_t b
, pS8_t c
)
114 for (i
= 0; i
< 16; i
++)
115 a
[i
] += (S16_t
) -b
[i
] * (S16_t
) c
[i
];
119 test_subS16_tS8_t16_neg1 (pS16_t a
, pS8_t b
, pS8_t c
)
122 for (i
= 0; i
< 16; i
++)
123 a
[i
] += (S16_t
) b
[i
] * (S16_t
) -c
[i
];
127 test_subS16_tS8_t16_neg2 (pS16_t a
, pS8_t b
, pS8_t c
)
130 for (i
= 0; i
< 16; i
++)
131 a
[i
] += -((S16_t
) b
[i
] * (S16_t
) c
[i
]);
135 test_subS16_tS8_t16_neg3 (pS16_t a
, pS8_t b
, pS8_t c
)
138 for (i
= 0; i
< 16; i
++)
139 a
[i
] -= (S16_t
) -b
[i
] * (S16_t
) -c
[i
];
142 /* { dg-final { scan-assembler-times "smlsl\tv\[0-9\]+\.8h" 5 } } */
143 /* { dg-final { scan-assembler-times "smlsl2\tv\[0-9\]+\.8h" 5 } } */
146 test_addU64_tU32_t4 (pU64_t a
, pU32_t b
, pU32_t c
)
149 for (i
= 0; i
< 4; i
++)
150 a
[i
] += (U64_t
) b
[i
] * (U64_t
) c
[i
];
153 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.2d" } } */
154 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.2d" } } */
157 test_addU32_tU16_t8 (pU32_t a
, pU16_t b
, pU16_t c
)
160 for (i
= 0; i
< 8; i
++)
161 a
[i
] += (U32_t
) b
[i
] * (U32_t
) c
[i
];
164 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.4s" } } */
165 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.4s" } } */
168 test_addU16_tU8_t16 (pU16_t a
, pU8_t b
, pU8_t c
)
171 for (i
= 0; i
< 16; i
++)
172 a
[i
] += (U16_t
) b
[i
] * (U16_t
) c
[i
];
175 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.8h" } } */
176 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.8h" } } */
179 test_subU64_tU32_t4 (pU64_t a
, pU32_t b
, pU32_t c
)
182 for (i
= 0; i
< 4; i
++)
183 a
[i
] -= (U64_t
) b
[i
] * (U64_t
) c
[i
];
186 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.2d" } } */
187 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.2d" } } */
190 test_subU32_tU16_t8 (pU32_t a
, pU16_t b
, pU16_t c
)
193 for (i
= 0; i
< 8; i
++)
194 a
[i
] -= (U32_t
) b
[i
] * (U32_t
) c
[i
];
197 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.4s" } } */
198 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.4s" } } */
201 test_subU16_tU8_t16 (pU16_t a
, pU8_t b
, pU8_t c
)
204 for (i
= 0; i
< 16; i
++)
205 a
[i
] -= (U16_t
) b
[i
] * (U16_t
) c
[i
];
208 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.8h" } } */
209 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.8h" } } */
212 S64_t add_rS64
[4] = { 6, 7, -4, -3 };
213 S32_t add_rS32
[8] = { 6, 7, -4, -3, 10, 11, 0, 1 };
215 { 6, 7, -4, -3, 10, 11, 0, 1, 14, 15, 4, 5, 18, 19, 8, 9 };
217 S64_t sub_rS64
[4] = { 0, 1, 2, 3 };
218 S32_t sub_rS32
[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
219 S16_t sub_rS16
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
221 U64_t add_rU64
[4] = { 0x6, 0x7, 0x2fffffffc, 0x2fffffffd };
225 0x6, 0x7, 0x2fffc, 0x2fffd,
226 0xa, 0xb, 0x30000, 0x30001
231 0x6, 0x7, 0x2fc, 0x2fd, 0xa, 0xb, 0x300, 0x301,
232 0xe, 0xf, 0x304, 0x305, 0x12, 0x13, 0x308, 0x309
235 U64_t sub_rU64
[4] = { 0, 1, 2, 3 };
236 U32_t sub_rU32
[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
237 U16_t sub_rU16
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
239 S8_t neg_r
[16] = { -6, -5, 8, 9, -2, -1, 12, 13, 2, 3, 16, 17, 6, 7, 20, 21 };
241 S64_t S64_ta
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
242 S32_t S32_tb
[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
243 S32_t S32_tc
[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
245 S32_t S32_ta
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
246 S16_t S16_tb
[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
247 S16_t S16_tc
[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
249 S16_t S16_ta
[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
250 S8_t S8_tb
[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
251 S8_t S8_tc
[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
254 #define CHECK(T,N,AS,US) \
257 for (i = 0; i < N; i++) \
258 if (S##T##_ta[i] != AS##_r##US##T[i]) \
263 #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
264 #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
266 #define NCHECK(RES) \
269 for (i = 0; i < 16; i++) \
270 if (S16_ta[i] != RES[i]) \
281 test_addS64_tS32_t4 (S64_ta
, S32_tb
, S32_tc
);
283 test_addS32_tS16_t8 (S32_ta
, S16_tb
, S16_tc
);
285 test_addS16_tS8_t16 (S16_ta
, S8_tb
, S8_tc
);
286 SCHECK (16, 16, add
);
287 test_subS64_tS32_t4 (S64_ta
, S32_tb
, S32_tc
);
289 test_subS32_tS16_t8 (S32_ta
, S16_tb
, S16_tc
);
291 test_subS16_tS8_t16 (S16_ta
, S8_tb
, S8_tc
);
292 SCHECK (16, 16, sub
);
294 test_addU64_tU32_t4 (S64_ta
, S32_tb
, S32_tc
);
296 test_addU32_tU16_t8 (S32_ta
, S16_tb
, S16_tc
);
298 test_addU16_tU8_t16 (S16_ta
, S8_tb
, S8_tc
);
299 UCHECK (16, 16, add
);
300 test_subU64_tU32_t4 (S64_ta
, S32_tb
, S32_tc
);
302 test_subU32_tU16_t8 (S32_ta
, S16_tb
, S16_tc
);
304 test_subU16_tU8_t16 (S16_ta
, S8_tb
, S8_tc
);
305 UCHECK (16, 16, sub
);
307 test_addS16_tS8_t16_neg0 (S16_ta
, S8_tb
, S8_tc
);
309 test_subS16_tS8_t16_neg0 (S16_ta
, S8_tb
, S8_tc
);
311 test_addS16_tS8_t16_neg1 (S16_ta
, S8_tb
, S8_tc
);
313 test_subS16_tS8_t16_neg1 (S16_ta
, S8_tb
, S8_tc
);
315 test_addS16_tS8_t16_neg2 (S16_ta
, S8_tb
, S8_tc
);
317 test_subS16_tS8_t16_neg2 (S16_ta
, S8_tb
, S8_tc
);
319 test_subS16_tS8_t16_neg3 (S16_ta
, S8_tb
, S8_tc
);