1 /* { dg-require-effective-target arm_v8_1m_mve_ok } */
2 /* { dg-add-options arm_v8_1m_mve } */
3 /* { dg-additional-options "-O1" } */
4 /* { dg-final { check-function-bodies "**" "" } } */
15 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
16 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
18 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
19 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
21 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
22 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
24 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
25 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
29 test(const uint8_t * in
, uint8_t * out
, int width
)
31 uint8x16x2_t rg
= vld2q(in
);
32 uint8x16x2_t gb
= vld2q(in
+ width
);
34 vst2q (out
+ width
, gb
);
40 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
41 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
42 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
43 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
44 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
45 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
46 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
47 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
51 test2(const uint8_t * in
, uint8_t * out
)
53 uint8x16x2_t rg
= vld2q(in
);
54 uint8x16x2_t gb
= vld2q(in
+ 32);
62 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
63 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
65 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
66 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
68 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
69 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
71 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
72 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
76 test3(const uint8_t * in
, uint8_t * out
)
78 uint8x16x2_t rg
= vld2q(in
);
79 uint8x16x2_t gb
= vld2q(in
- 32);
87 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
88 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
90 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
91 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
93 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
94 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
96 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
97 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
101 test4(const uint8_t * in
, uint8_t * out
)
103 uint8x16x2_t rg
= vld2q(in
);
104 uint8x16x2_t gb
= vld2q(in
+ 64);
106 vst2q (out
+ 64, gb
);
112 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
113 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
115 ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
116 ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
118 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
119 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
121 ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
122 ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
126 test5(const uint8_t * in
, uint8_t * out
)
128 uint8x16x2_t rg
= vld2q(in
);
129 uint8x16x2_t gb
= vld2q(in
+ 42);
131 vst2q (out
+ 42, gb
);
137 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
138 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
139 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
140 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
142 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
143 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
144 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
145 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
147 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
148 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
149 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
150 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
152 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
153 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
154 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
155 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
159 test6(const uint8_t * in
, uint8_t * out
, int width
)
161 uint8x16x4_t rg
= vld4q(in
);
162 uint8x16x4_t gb
= vld4q(in
+ width
);
164 vst4q (out
+ width
, gb
);
170 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
171 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
172 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
173 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
175 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
176 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
177 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
178 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
180 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
181 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
182 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
183 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
185 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
186 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
187 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
188 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
192 test7(const uint8_t * in
, uint8_t * out
)
194 uint8x16x4_t rg
= vld4q(in
);
195 uint8x16x4_t gb
= vld4q(in
+ 32);
197 vst4q (out
+ 32, gb
);
203 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
204 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
205 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
206 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
207 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
208 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
209 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
210 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
211 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
212 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
213 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
214 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
215 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
216 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
217 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
218 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
222 test8(const uint8_t * in
, uint8_t * out
)
224 uint8x16x4_t rg
= vld4q(in
);
225 uint8x16x4_t gb
= vld4q(in
+ 64);
227 vst4q (out
+ 64, gb
);
233 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
234 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
235 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
236 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
238 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
239 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
240 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
241 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
243 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
244 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
245 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
246 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
248 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
249 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
250 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
251 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
255 test9(const uint8_t * in
, uint8_t * out
)
257 uint8x16x4_t rg
= vld4q(in
);
258 uint8x16x4_t gb
= vld4q(in
- 64);
260 vst4q (out
- 64, gb
);
266 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
267 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
268 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
269 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
271 ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
272 ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
273 ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
274 ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
276 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
277 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
278 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
279 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
281 ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
282 ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
283 ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
284 ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
288 test10(const uint8_t * in
, uint8_t * out
)
290 uint8x16x4_t rg
= vld4q(in
);
291 uint8x16x4_t gb
= vld4q(in
+ 42);
293 vst4q (out
+ 42, gb
);
300 /* { dg-final { scan-assembler-not "__ARM_undef" } } */