1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
6 ; CHECK-LABEL: foo_v4i32_v4i32:
7 ; CHECK: @ %bb.0: @ %entry
9 ; CHECK-NEXT: sub sp, #8
10 ; CHECK-NEXT: vldrw.u32 q0, [r1]
11 ; CHECK-NEXT: add r3, sp, #4
12 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
13 ; CHECK-NEXT: @ implicit-def: $q0
14 ; CHECK-NEXT: vstr p0, [r3]
15 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
16 ; CHECK-NEXT: lsls r3, r1, #31
18 ; CHECK-NEXT: ldrne r3, [r2]
19 ; CHECK-NEXT: vmovne.32 q0[0], r3
20 ; CHECK-NEXT: lsls r3, r1, #30
22 ; CHECK-NEXT: ldrmi r3, [r2, #4]
23 ; CHECK-NEXT: vmovmi.32 q0[1], r3
24 ; CHECK-NEXT: lsls r3, r1, #29
26 ; CHECK-NEXT: ldrmi r3, [r2, #8]
27 ; CHECK-NEXT: vmovmi.32 q0[2], r3
28 ; CHECK-NEXT: lsls r1, r1, #28
30 ; CHECK-NEXT: ldrmi r1, [r2, #12]
31 ; CHECK-NEXT: vmovmi.32 q0[3], r1
32 ; CHECK-NEXT: mov r1, sp
33 ; CHECK-NEXT: vstr p0, [r1]
34 ; CHECK-NEXT: ldrb.w r1, [sp]
35 ; CHECK-NEXT: lsls r2, r1, #31
37 ; CHECK-NEXT: vmovne r2, s0
38 ; CHECK-NEXT: strne r2, [r0]
39 ; CHECK-NEXT: lsls r2, r1, #30
41 ; CHECK-NEXT: vmovmi r2, s1
42 ; CHECK-NEXT: strmi r2, [r0, #4]
43 ; CHECK-NEXT: lsls r2, r1, #29
45 ; CHECK-NEXT: vmovmi r2, s2
46 ; CHECK-NEXT: strmi r2, [r0, #8]
47 ; CHECK-NEXT: lsls r1, r1, #28
49 ; CHECK-NEXT: vmovmi r1, s3
50 ; CHECK-NEXT: strmi r1, [r0, #12]
51 ; CHECK-NEXT: add sp, #8
54 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
55 %1 = icmp sgt <4 x i32> %0, zeroinitializer
56 %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
57 call void @llvm.masked.store.v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1)
61 define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
62 ; CHECK-LABEL: foo_sext_v4i32_v4i8:
63 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: sub sp, #8
66 ; CHECK-NEXT: vldrw.u32 q0, [r1]
67 ; CHECK-NEXT: add r3, sp, #4
68 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
69 ; CHECK-NEXT: @ implicit-def: $q0
70 ; CHECK-NEXT: vstr p0, [r3]
71 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
72 ; CHECK-NEXT: lsls r3, r1, #31
74 ; CHECK-NEXT: ldrbne r3, [r2]
75 ; CHECK-NEXT: vmovne.32 q0[0], r3
76 ; CHECK-NEXT: lsls r3, r1, #30
78 ; CHECK-NEXT: ldrbmi r3, [r2, #1]
79 ; CHECK-NEXT: vmovmi.32 q0[1], r3
80 ; CHECK-NEXT: lsls r3, r1, #29
82 ; CHECK-NEXT: ldrbmi r3, [r2, #2]
83 ; CHECK-NEXT: vmovmi.32 q0[2], r3
84 ; CHECK-NEXT: lsls r1, r1, #28
86 ; CHECK-NEXT: ldrbmi r1, [r2, #3]
87 ; CHECK-NEXT: vmovmi.32 q0[3], r1
88 ; CHECK-NEXT: mov r1, sp
89 ; CHECK-NEXT: vmovlb.s8 q0, q0
90 ; CHECK-NEXT: vstr p0, [r1]
91 ; CHECK-NEXT: vmovlb.s16 q0, q0
92 ; CHECK-NEXT: ldrb.w r1, [sp]
93 ; CHECK-NEXT: lsls r2, r1, #31
95 ; CHECK-NEXT: vmovne r2, s0
96 ; CHECK-NEXT: strne r2, [r0]
97 ; CHECK-NEXT: lsls r2, r1, #30
99 ; CHECK-NEXT: vmovmi r2, s1
100 ; CHECK-NEXT: strmi r2, [r0, #4]
101 ; CHECK-NEXT: lsls r2, r1, #29
103 ; CHECK-NEXT: vmovmi r2, s2
104 ; CHECK-NEXT: strmi r2, [r0, #8]
105 ; CHECK-NEXT: lsls r1, r1, #28
107 ; CHECK-NEXT: vmovmi r1, s3
108 ; CHECK-NEXT: strmi r1, [r0, #12]
109 ; CHECK-NEXT: add sp, #8
112 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
113 %1 = icmp sgt <4 x i32> %0, zeroinitializer
114 %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef)
115 %3 = sext <4 x i8> %2 to <4 x i32>
116 call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
120 define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
121 ; CHECK-LABEL: foo_sext_v4i32_v4i16:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: .pad #8
124 ; CHECK-NEXT: sub sp, #8
125 ; CHECK-NEXT: vldrw.u32 q0, [r1]
126 ; CHECK-NEXT: add r3, sp, #4
127 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
128 ; CHECK-NEXT: @ implicit-def: $q0
129 ; CHECK-NEXT: vstr p0, [r3]
130 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
131 ; CHECK-NEXT: lsls r3, r1, #31
133 ; CHECK-NEXT: ldrhne r3, [r2]
134 ; CHECK-NEXT: vmovne.32 q0[0], r3
135 ; CHECK-NEXT: lsls r3, r1, #30
137 ; CHECK-NEXT: ldrhmi r3, [r2, #2]
138 ; CHECK-NEXT: vmovmi.32 q0[1], r3
139 ; CHECK-NEXT: lsls r3, r1, #29
141 ; CHECK-NEXT: ldrhmi r3, [r2, #4]
142 ; CHECK-NEXT: vmovmi.32 q0[2], r3
143 ; CHECK-NEXT: lsls r1, r1, #28
145 ; CHECK-NEXT: ldrhmi r1, [r2, #6]
146 ; CHECK-NEXT: vmovmi.32 q0[3], r1
147 ; CHECK-NEXT: mov r1, sp
148 ; CHECK-NEXT: vmovlb.s16 q0, q0
149 ; CHECK-NEXT: vstr p0, [r1]
150 ; CHECK-NEXT: ldrb.w r1, [sp]
151 ; CHECK-NEXT: lsls r2, r1, #31
153 ; CHECK-NEXT: vmovne r2, s0
154 ; CHECK-NEXT: strne r2, [r0]
155 ; CHECK-NEXT: lsls r2, r1, #30
157 ; CHECK-NEXT: vmovmi r2, s1
158 ; CHECK-NEXT: strmi r2, [r0, #4]
159 ; CHECK-NEXT: lsls r2, r1, #29
161 ; CHECK-NEXT: vmovmi r2, s2
162 ; CHECK-NEXT: strmi r2, [r0, #8]
163 ; CHECK-NEXT: lsls r1, r1, #28
165 ; CHECK-NEXT: vmovmi r1, s3
166 ; CHECK-NEXT: strmi r1, [r0, #12]
167 ; CHECK-NEXT: add sp, #8
170 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
171 %1 = icmp sgt <4 x i32> %0, zeroinitializer
172 %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef)
173 %3 = sext <4 x i16> %2 to <4 x i32>
174 call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
178 define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
179 ; CHECK-LABEL: foo_zext_v4i32_v4i8:
180 ; CHECK: @ %bb.0: @ %entry
181 ; CHECK-NEXT: .pad #8
182 ; CHECK-NEXT: sub sp, #8
183 ; CHECK-NEXT: vldrw.u32 q0, [r1]
184 ; CHECK-NEXT: add r3, sp, #4
185 ; CHECK-NEXT: vmov.i32 q1, #0xff
186 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
187 ; CHECK-NEXT: @ implicit-def: $q0
188 ; CHECK-NEXT: vstr p0, [r3]
189 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
190 ; CHECK-NEXT: lsls r3, r1, #31
192 ; CHECK-NEXT: ldrbne r3, [r2]
193 ; CHECK-NEXT: vmovne.32 q0[0], r3
194 ; CHECK-NEXT: lsls r3, r1, #30
196 ; CHECK-NEXT: ldrbmi r3, [r2, #1]
197 ; CHECK-NEXT: vmovmi.32 q0[1], r3
198 ; CHECK-NEXT: lsls r3, r1, #29
200 ; CHECK-NEXT: ldrbmi r3, [r2, #2]
201 ; CHECK-NEXT: vmovmi.32 q0[2], r3
202 ; CHECK-NEXT: lsls r1, r1, #28
204 ; CHECK-NEXT: ldrbmi r1, [r2, #3]
205 ; CHECK-NEXT: vmovmi.32 q0[3], r1
206 ; CHECK-NEXT: mov r1, sp
207 ; CHECK-NEXT: vand q0, q0, q1
208 ; CHECK-NEXT: vstr p0, [r1]
209 ; CHECK-NEXT: ldrb.w r1, [sp]
210 ; CHECK-NEXT: lsls r2, r1, #31
212 ; CHECK-NEXT: vmovne r2, s0
213 ; CHECK-NEXT: strne r2, [r0]
214 ; CHECK-NEXT: lsls r2, r1, #30
216 ; CHECK-NEXT: vmovmi r2, s1
217 ; CHECK-NEXT: strmi r2, [r0, #4]
218 ; CHECK-NEXT: lsls r2, r1, #29
220 ; CHECK-NEXT: vmovmi r2, s2
221 ; CHECK-NEXT: strmi r2, [r0, #8]
222 ; CHECK-NEXT: lsls r1, r1, #28
224 ; CHECK-NEXT: vmovmi r1, s3
225 ; CHECK-NEXT: strmi r1, [r0, #12]
226 ; CHECK-NEXT: add sp, #8
229 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
230 %1 = icmp sgt <4 x i32> %0, zeroinitializer
231 %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef)
232 %3 = zext <4 x i8> %2 to <4 x i32>
233 call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
237 define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
238 ; CHECK-LABEL: foo_zext_v4i32_v4i16:
239 ; CHECK: @ %bb.0: @ %entry
240 ; CHECK-NEXT: .pad #8
241 ; CHECK-NEXT: sub sp, #8
242 ; CHECK-NEXT: vldrw.u32 q0, [r1]
243 ; CHECK-NEXT: add r3, sp, #4
244 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
245 ; CHECK-NEXT: @ implicit-def: $q0
246 ; CHECK-NEXT: vstr p0, [r3]
247 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
248 ; CHECK-NEXT: lsls r3, r1, #31
250 ; CHECK-NEXT: ldrhne r3, [r2]
251 ; CHECK-NEXT: vmovne.32 q0[0], r3
252 ; CHECK-NEXT: lsls r3, r1, #30
254 ; CHECK-NEXT: ldrhmi r3, [r2, #2]
255 ; CHECK-NEXT: vmovmi.32 q0[1], r3
256 ; CHECK-NEXT: lsls r3, r1, #29
258 ; CHECK-NEXT: ldrhmi r3, [r2, #4]
259 ; CHECK-NEXT: vmovmi.32 q0[2], r3
260 ; CHECK-NEXT: lsls r1, r1, #28
262 ; CHECK-NEXT: ldrhmi r1, [r2, #6]
263 ; CHECK-NEXT: vmovmi.32 q0[3], r1
264 ; CHECK-NEXT: mov r1, sp
265 ; CHECK-NEXT: vmovlb.u16 q0, q0
266 ; CHECK-NEXT: vstr p0, [r1]
267 ; CHECK-NEXT: ldrb.w r1, [sp]
268 ; CHECK-NEXT: lsls r2, r1, #31
270 ; CHECK-NEXT: vmovne r2, s0
271 ; CHECK-NEXT: strne r2, [r0]
272 ; CHECK-NEXT: lsls r2, r1, #30
274 ; CHECK-NEXT: vmovmi r2, s1
275 ; CHECK-NEXT: strmi r2, [r0, #4]
276 ; CHECK-NEXT: lsls r2, r1, #29
278 ; CHECK-NEXT: vmovmi r2, s2
279 ; CHECK-NEXT: strmi r2, [r0, #8]
280 ; CHECK-NEXT: lsls r1, r1, #28
282 ; CHECK-NEXT: vmovmi r1, s3
283 ; CHECK-NEXT: strmi r1, [r0, #12]
284 ; CHECK-NEXT: add sp, #8
287 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
288 %1 = icmp sgt <4 x i32> %0, zeroinitializer
289 %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef)
290 %3 = zext <4 x i16> %2 to <4 x i32>
291 call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
295 define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src) {
296 ; CHECK-LABEL: foo_v8i16_v8i16:
297 ; CHECK: @ %bb.0: @ %entry
298 ; CHECK-NEXT: .pad #16
299 ; CHECK-NEXT: sub sp, #16
300 ; CHECK-NEXT: vldrh.u16 q0, [r1]
301 ; CHECK-NEXT: add r3, sp, #8
302 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
303 ; CHECK-NEXT: @ implicit-def: $q0
304 ; CHECK-NEXT: vstr p0, [r3]
305 ; CHECK-NEXT: ldrb.w r1, [sp, #8]
306 ; CHECK-NEXT: lsls r3, r1, #31
308 ; CHECK-NEXT: ldrhne r3, [r2]
309 ; CHECK-NEXT: vmovne.16 q0[0], r3
310 ; CHECK-NEXT: lsls r3, r1, #30
312 ; CHECK-NEXT: ldrhmi r3, [r2, #2]
313 ; CHECK-NEXT: vmovmi.16 q0[1], r3
314 ; CHECK-NEXT: lsls r3, r1, #29
316 ; CHECK-NEXT: ldrhmi r3, [r2, #4]
317 ; CHECK-NEXT: vmovmi.16 q0[2], r3
318 ; CHECK-NEXT: lsls r3, r1, #28
320 ; CHECK-NEXT: ldrhmi r3, [r2, #6]
321 ; CHECK-NEXT: vmovmi.16 q0[3], r3
322 ; CHECK-NEXT: lsls r3, r1, #27
324 ; CHECK-NEXT: ldrhmi r3, [r2, #8]
325 ; CHECK-NEXT: vmovmi.16 q0[4], r3
326 ; CHECK-NEXT: lsls r3, r1, #26
328 ; CHECK-NEXT: ldrhmi r3, [r2, #10]
329 ; CHECK-NEXT: vmovmi.16 q0[5], r3
330 ; CHECK-NEXT: lsls r3, r1, #25
332 ; CHECK-NEXT: ldrhmi r3, [r2, #12]
333 ; CHECK-NEXT: vmovmi.16 q0[6], r3
334 ; CHECK-NEXT: lsls r1, r1, #24
336 ; CHECK-NEXT: ldrhmi r1, [r2, #14]
337 ; CHECK-NEXT: vmovmi.16 q0[7], r1
338 ; CHECK-NEXT: mov r1, sp
339 ; CHECK-NEXT: vstr p0, [r1]
340 ; CHECK-NEXT: ldrb.w r1, [sp]
341 ; CHECK-NEXT: lsls r2, r1, #31
343 ; CHECK-NEXT: vmovne.u16 r2, q0[0]
344 ; CHECK-NEXT: strhne r2, [r0]
345 ; CHECK-NEXT: lsls r2, r1, #30
347 ; CHECK-NEXT: vmovmi.u16 r2, q0[1]
348 ; CHECK-NEXT: strhmi r2, [r0, #2]
349 ; CHECK-NEXT: lsls r2, r1, #29
351 ; CHECK-NEXT: vmovmi.u16 r2, q0[2]
352 ; CHECK-NEXT: strhmi r2, [r0, #4]
353 ; CHECK-NEXT: lsls r2, r1, #28
355 ; CHECK-NEXT: vmovmi.u16 r2, q0[3]
356 ; CHECK-NEXT: strhmi r2, [r0, #6]
357 ; CHECK-NEXT: lsls r2, r1, #27
359 ; CHECK-NEXT: vmovmi.u16 r2, q0[4]
360 ; CHECK-NEXT: strhmi r2, [r0, #8]
361 ; CHECK-NEXT: lsls r2, r1, #26
363 ; CHECK-NEXT: vmovmi.u16 r2, q0[5]
364 ; CHECK-NEXT: strhmi r2, [r0, #10]
365 ; CHECK-NEXT: lsls r2, r1, #25
367 ; CHECK-NEXT: vmovmi.u16 r2, q0[6]
368 ; CHECK-NEXT: strhmi r2, [r0, #12]
369 ; CHECK-NEXT: lsls r1, r1, #24
371 ; CHECK-NEXT: vmovmi.u16 r1, q0[7]
372 ; CHECK-NEXT: strhmi r1, [r0, #14]
373 ; CHECK-NEXT: add sp, #16
376 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
377 %1 = icmp sgt <8 x i16> %0, zeroinitializer
378 %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef)
379 call void @llvm.masked.store.v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1)
383 define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
384 ; CHECK-LABEL: foo_sext_v8i16_v8i8:
385 ; CHECK: @ %bb.0: @ %entry
386 ; CHECK-NEXT: .pad #16
387 ; CHECK-NEXT: sub sp, #16
388 ; CHECK-NEXT: vldrh.u16 q0, [r1]
389 ; CHECK-NEXT: add r3, sp, #8
390 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
391 ; CHECK-NEXT: @ implicit-def: $q0
392 ; CHECK-NEXT: vstr p0, [r3]
393 ; CHECK-NEXT: ldrb.w r1, [sp, #8]
394 ; CHECK-NEXT: lsls r3, r1, #31
396 ; CHECK-NEXT: ldrbne r3, [r2]
397 ; CHECK-NEXT: vmovne.16 q0[0], r3
398 ; CHECK-NEXT: lsls r3, r1, #30
400 ; CHECK-NEXT: ldrbmi r3, [r2, #1]
401 ; CHECK-NEXT: vmovmi.16 q0[1], r3
402 ; CHECK-NEXT: lsls r3, r1, #29
404 ; CHECK-NEXT: ldrbmi r3, [r2, #2]
405 ; CHECK-NEXT: vmovmi.16 q0[2], r3
406 ; CHECK-NEXT: lsls r3, r1, #28
408 ; CHECK-NEXT: ldrbmi r3, [r2, #3]
409 ; CHECK-NEXT: vmovmi.16 q0[3], r3
410 ; CHECK-NEXT: lsls r3, r1, #27
412 ; CHECK-NEXT: ldrbmi r3, [r2, #4]
413 ; CHECK-NEXT: vmovmi.16 q0[4], r3
414 ; CHECK-NEXT: lsls r3, r1, #26
416 ; CHECK-NEXT: ldrbmi r3, [r2, #5]
417 ; CHECK-NEXT: vmovmi.16 q0[5], r3
418 ; CHECK-NEXT: lsls r3, r1, #25
420 ; CHECK-NEXT: ldrbmi r3, [r2, #6]
421 ; CHECK-NEXT: vmovmi.16 q0[6], r3
422 ; CHECK-NEXT: lsls r1, r1, #24
424 ; CHECK-NEXT: ldrbmi r1, [r2, #7]
425 ; CHECK-NEXT: vmovmi.16 q0[7], r1
426 ; CHECK-NEXT: mov r1, sp
427 ; CHECK-NEXT: vmovlb.s8 q0, q0
428 ; CHECK-NEXT: vstr p0, [r1]
429 ; CHECK-NEXT: ldrb.w r1, [sp]
430 ; CHECK-NEXT: lsls r2, r1, #31
432 ; CHECK-NEXT: vmovne.u16 r2, q0[0]
433 ; CHECK-NEXT: strhne r2, [r0]
434 ; CHECK-NEXT: lsls r2, r1, #30
436 ; CHECK-NEXT: vmovmi.u16 r2, q0[1]
437 ; CHECK-NEXT: strhmi r2, [r0, #2]
438 ; CHECK-NEXT: lsls r2, r1, #29
440 ; CHECK-NEXT: vmovmi.u16 r2, q0[2]
441 ; CHECK-NEXT: strhmi r2, [r0, #4]
442 ; CHECK-NEXT: lsls r2, r1, #28
444 ; CHECK-NEXT: vmovmi.u16 r2, q0[3]
445 ; CHECK-NEXT: strhmi r2, [r0, #6]
446 ; CHECK-NEXT: lsls r2, r1, #27
448 ; CHECK-NEXT: vmovmi.u16 r2, q0[4]
449 ; CHECK-NEXT: strhmi r2, [r0, #8]
450 ; CHECK-NEXT: lsls r2, r1, #26
452 ; CHECK-NEXT: vmovmi.u16 r2, q0[5]
453 ; CHECK-NEXT: strhmi r2, [r0, #10]
454 ; CHECK-NEXT: lsls r2, r1, #25
456 ; CHECK-NEXT: vmovmi.u16 r2, q0[6]
457 ; CHECK-NEXT: strhmi r2, [r0, #12]
458 ; CHECK-NEXT: lsls r1, r1, #24
460 ; CHECK-NEXT: vmovmi.u16 r1, q0[7]
461 ; CHECK-NEXT: strhmi r1, [r0, #14]
462 ; CHECK-NEXT: add sp, #16
465 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
466 %1 = icmp sgt <8 x i16> %0, zeroinitializer
467 %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef)
468 %3 = sext <8 x i8> %2 to <8 x i16>
469 call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1)
473 define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
474 ; CHECK-LABEL: foo_zext_v8i16_v8i8:
475 ; CHECK: @ %bb.0: @ %entry
476 ; CHECK-NEXT: .pad #16
477 ; CHECK-NEXT: sub sp, #16
478 ; CHECK-NEXT: vldrh.u16 q0, [r1]
479 ; CHECK-NEXT: add r3, sp, #8
480 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
481 ; CHECK-NEXT: @ implicit-def: $q0
482 ; CHECK-NEXT: vstr p0, [r3]
483 ; CHECK-NEXT: ldrb.w r1, [sp, #8]
484 ; CHECK-NEXT: lsls r3, r1, #31
486 ; CHECK-NEXT: ldrbne r3, [r2]
487 ; CHECK-NEXT: vmovne.16 q0[0], r3
488 ; CHECK-NEXT: lsls r3, r1, #30
490 ; CHECK-NEXT: ldrbmi r3, [r2, #1]
491 ; CHECK-NEXT: vmovmi.16 q0[1], r3
492 ; CHECK-NEXT: lsls r3, r1, #29
494 ; CHECK-NEXT: ldrbmi r3, [r2, #2]
495 ; CHECK-NEXT: vmovmi.16 q0[2], r3
496 ; CHECK-NEXT: lsls r3, r1, #28
498 ; CHECK-NEXT: ldrbmi r3, [r2, #3]
499 ; CHECK-NEXT: vmovmi.16 q0[3], r3
500 ; CHECK-NEXT: lsls r3, r1, #27
502 ; CHECK-NEXT: ldrbmi r3, [r2, #4]
503 ; CHECK-NEXT: vmovmi.16 q0[4], r3
504 ; CHECK-NEXT: lsls r3, r1, #26
506 ; CHECK-NEXT: ldrbmi r3, [r2, #5]
507 ; CHECK-NEXT: vmovmi.16 q0[5], r3
508 ; CHECK-NEXT: lsls r3, r1, #25
510 ; CHECK-NEXT: ldrbmi r3, [r2, #6]
511 ; CHECK-NEXT: vmovmi.16 q0[6], r3
512 ; CHECK-NEXT: lsls r1, r1, #24
514 ; CHECK-NEXT: ldrbmi r1, [r2, #7]
515 ; CHECK-NEXT: vmovmi.16 q0[7], r1
516 ; CHECK-NEXT: mov r1, sp
517 ; CHECK-NEXT: vmovlb.u8 q0, q0
518 ; CHECK-NEXT: vstr p0, [r1]
519 ; CHECK-NEXT: ldrb.w r1, [sp]
520 ; CHECK-NEXT: lsls r2, r1, #31
522 ; CHECK-NEXT: vmovne.u16 r2, q0[0]
523 ; CHECK-NEXT: strhne r2, [r0]
524 ; CHECK-NEXT: lsls r2, r1, #30
526 ; CHECK-NEXT: vmovmi.u16 r2, q0[1]
527 ; CHECK-NEXT: strhmi r2, [r0, #2]
528 ; CHECK-NEXT: lsls r2, r1, #29
530 ; CHECK-NEXT: vmovmi.u16 r2, q0[2]
531 ; CHECK-NEXT: strhmi r2, [r0, #4]
532 ; CHECK-NEXT: lsls r2, r1, #28
534 ; CHECK-NEXT: vmovmi.u16 r2, q0[3]
535 ; CHECK-NEXT: strhmi r2, [r0, #6]
536 ; CHECK-NEXT: lsls r2, r1, #27
538 ; CHECK-NEXT: vmovmi.u16 r2, q0[4]
539 ; CHECK-NEXT: strhmi r2, [r0, #8]
540 ; CHECK-NEXT: lsls r2, r1, #26
542 ; CHECK-NEXT: vmovmi.u16 r2, q0[5]
543 ; CHECK-NEXT: strhmi r2, [r0, #10]
544 ; CHECK-NEXT: lsls r2, r1, #25
546 ; CHECK-NEXT: vmovmi.u16 r2, q0[6]
547 ; CHECK-NEXT: strhmi r2, [r0, #12]
548 ; CHECK-NEXT: lsls r1, r1, #24
550 ; CHECK-NEXT: vmovmi.u16 r1, q0[7]
551 ; CHECK-NEXT: strhmi r1, [r0, #14]
552 ; CHECK-NEXT: add sp, #16
555 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
556 %1 = icmp sgt <8 x i16> %0, zeroinitializer
557 %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef)
558 %3 = zext <8 x i8> %2 to <8 x i16>
559 call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1)
563 define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src) {
564 ; CHECK-LABEL: foo_v16i8_v16i8:
565 ; CHECK: @ %bb.0: @ %entry
566 ; CHECK-NEXT: .save {r4, r6, r7, lr}
567 ; CHECK-NEXT: push {r4, r6, r7, lr}
568 ; CHECK-NEXT: .setfp r7, sp, #8
569 ; CHECK-NEXT: add r7, sp, #8
570 ; CHECK-NEXT: .pad #32
571 ; CHECK-NEXT: sub sp, #32
572 ; CHECK-NEXT: mov r4, sp
573 ; CHECK-NEXT: bfc r4, #0, #4
574 ; CHECK-NEXT: mov sp, r4
575 ; CHECK-NEXT: vldrb.u8 q0, [r1]
576 ; CHECK-NEXT: add r3, sp, #16
577 ; CHECK-NEXT: sub.w r4, r7, #8
578 ; CHECK-NEXT: vcmp.s8 gt, q0, zr
579 ; CHECK-NEXT: @ implicit-def: $q0
580 ; CHECK-NEXT: vstr p0, [r3]
581 ; CHECK-NEXT: ldrh.w r1, [sp, #16]
582 ; CHECK-NEXT: lsls r3, r1, #31
584 ; CHECK-NEXT: ldrbne r3, [r2]
585 ; CHECK-NEXT: vmovne.8 q0[0], r3
586 ; CHECK-NEXT: lsls r3, r1, #30
588 ; CHECK-NEXT: ldrbmi r3, [r2, #1]
589 ; CHECK-NEXT: vmovmi.8 q0[1], r3
590 ; CHECK-NEXT: lsls r3, r1, #29
592 ; CHECK-NEXT: ldrbmi r3, [r2, #2]
593 ; CHECK-NEXT: vmovmi.8 q0[2], r3
594 ; CHECK-NEXT: lsls r3, r1, #28
596 ; CHECK-NEXT: ldrbmi r3, [r2, #3]
597 ; CHECK-NEXT: vmovmi.8 q0[3], r3
598 ; CHECK-NEXT: lsls r3, r1, #27
600 ; CHECK-NEXT: ldrbmi r3, [r2, #4]
601 ; CHECK-NEXT: vmovmi.8 q0[4], r3
602 ; CHECK-NEXT: lsls r3, r1, #26
604 ; CHECK-NEXT: ldrbmi r3, [r2, #5]
605 ; CHECK-NEXT: vmovmi.8 q0[5], r3
606 ; CHECK-NEXT: lsls r3, r1, #25
608 ; CHECK-NEXT: ldrbmi r3, [r2, #6]
609 ; CHECK-NEXT: vmovmi.8 q0[6], r3
610 ; CHECK-NEXT: lsls r3, r1, #24
612 ; CHECK-NEXT: ldrbmi r3, [r2, #7]
613 ; CHECK-NEXT: vmovmi.8 q0[7], r3
614 ; CHECK-NEXT: lsls r3, r1, #23
616 ; CHECK-NEXT: ldrbmi r3, [r2, #8]
617 ; CHECK-NEXT: vmovmi.8 q0[8], r3
618 ; CHECK-NEXT: lsls r3, r1, #22
620 ; CHECK-NEXT: ldrbmi r3, [r2, #9]
621 ; CHECK-NEXT: vmovmi.8 q0[9], r3
622 ; CHECK-NEXT: lsls r3, r1, #21
624 ; CHECK-NEXT: ldrbmi r3, [r2, #10]
625 ; CHECK-NEXT: vmovmi.8 q0[10], r3
626 ; CHECK-NEXT: lsls r3, r1, #20
628 ; CHECK-NEXT: ldrbmi r3, [r2, #11]
629 ; CHECK-NEXT: vmovmi.8 q0[11], r3
630 ; CHECK-NEXT: lsls r3, r1, #19
632 ; CHECK-NEXT: ldrbmi r3, [r2, #12]
633 ; CHECK-NEXT: vmovmi.8 q0[12], r3
634 ; CHECK-NEXT: lsls r3, r1, #18
636 ; CHECK-NEXT: ldrbmi r3, [r2, #13]
637 ; CHECK-NEXT: vmovmi.8 q0[13], r3
638 ; CHECK-NEXT: lsls r3, r1, #17
640 ; CHECK-NEXT: ldrbmi r3, [r2, #14]
641 ; CHECK-NEXT: vmovmi.8 q0[14], r3
642 ; CHECK-NEXT: lsls r1, r1, #16
644 ; CHECK-NEXT: ldrbmi r1, [r2, #15]
645 ; CHECK-NEXT: vmovmi.8 q0[15], r1
646 ; CHECK-NEXT: mov r1, sp
647 ; CHECK-NEXT: vstr p0, [r1]
648 ; CHECK-NEXT: ldrh.w r1, [sp]
649 ; CHECK-NEXT: lsls r2, r1, #31
651 ; CHECK-NEXT: vmovne.u8 r2, q0[0]
652 ; CHECK-NEXT: strbne r2, [r0]
653 ; CHECK-NEXT: lsls r2, r1, #30
655 ; CHECK-NEXT: vmovmi.u8 r2, q0[1]
656 ; CHECK-NEXT: strbmi r2, [r0, #1]
657 ; CHECK-NEXT: lsls r2, r1, #29
659 ; CHECK-NEXT: vmovmi.u8 r2, q0[2]
660 ; CHECK-NEXT: strbmi r2, [r0, #2]
661 ; CHECK-NEXT: lsls r2, r1, #28
663 ; CHECK-NEXT: vmovmi.u8 r2, q0[3]
664 ; CHECK-NEXT: strbmi r2, [r0, #3]
665 ; CHECK-NEXT: lsls r2, r1, #27
667 ; CHECK-NEXT: vmovmi.u8 r2, q0[4]
668 ; CHECK-NEXT: strbmi r2, [r0, #4]
669 ; CHECK-NEXT: lsls r2, r1, #26
671 ; CHECK-NEXT: vmovmi.u8 r2, q0[5]
672 ; CHECK-NEXT: strbmi r2, [r0, #5]
673 ; CHECK-NEXT: lsls r2, r1, #25
675 ; CHECK-NEXT: vmovmi.u8 r2, q0[6]
676 ; CHECK-NEXT: strbmi r2, [r0, #6]
677 ; CHECK-NEXT: lsls r2, r1, #24
679 ; CHECK-NEXT: vmovmi.u8 r2, q0[7]
680 ; CHECK-NEXT: strbmi r2, [r0, #7]
681 ; CHECK-NEXT: lsls r2, r1, #23
683 ; CHECK-NEXT: vmovmi.u8 r2, q0[8]
684 ; CHECK-NEXT: strbmi r2, [r0, #8]
685 ; CHECK-NEXT: lsls r2, r1, #22
687 ; CHECK-NEXT: vmovmi.u8 r2, q0[9]
688 ; CHECK-NEXT: strbmi r2, [r0, #9]
689 ; CHECK-NEXT: lsls r2, r1, #21
691 ; CHECK-NEXT: vmovmi.u8 r2, q0[10]
692 ; CHECK-NEXT: strbmi r2, [r0, #10]
693 ; CHECK-NEXT: lsls r2, r1, #20
695 ; CHECK-NEXT: vmovmi.u8 r2, q0[11]
696 ; CHECK-NEXT: strbmi r2, [r0, #11]
697 ; CHECK-NEXT: lsls r2, r1, #19
699 ; CHECK-NEXT: vmovmi.u8 r2, q0[12]
700 ; CHECK-NEXT: strbmi r2, [r0, #12]
701 ; CHECK-NEXT: lsls r2, r1, #18
703 ; CHECK-NEXT: vmovmi.u8 r2, q0[13]
704 ; CHECK-NEXT: strbmi r2, [r0, #13]
705 ; CHECK-NEXT: lsls r2, r1, #17
707 ; CHECK-NEXT: vmovmi.u8 r2, q0[14]
708 ; CHECK-NEXT: strbmi r2, [r0, #14]
709 ; CHECK-NEXT: lsls r1, r1, #16
711 ; CHECK-NEXT: vmovmi.u8 r1, q0[15]
712 ; CHECK-NEXT: strbmi r1, [r0, #15]
713 ; CHECK-NEXT: mov sp, r4
714 ; CHECK-NEXT: pop {r4, r6, r7, pc}
716 %0 = load <16 x i8>, <16 x i8>* %mask, align 1
717 %1 = icmp sgt <16 x i8> %0, zeroinitializer
718 %2 = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef)
719 call void @llvm.masked.store.v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1)
723 define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *%src) {
724 ; CHECK-LABEL: foo_trunc_v8i8_v8i16:
725 ; CHECK: @ %bb.0: @ %entry
726 ; CHECK-NEXT: .pad #16
727 ; CHECK-NEXT: sub sp, #16
728 ; CHECK-NEXT: vldrh.u16 q0, [r1]
729 ; CHECK-NEXT: add r3, sp, #8
730 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
731 ; CHECK-NEXT: @ implicit-def: $q0
732 ; CHECK-NEXT: vstr p0, [r3]
733 ; CHECK-NEXT: ldrb.w r1, [sp, #8]
734 ; CHECK-NEXT: lsls r3, r1, #31
736 ; CHECK-NEXT: ldrhne r3, [r2]
737 ; CHECK-NEXT: vmovne.16 q0[0], r3
738 ; CHECK-NEXT: lsls r3, r1, #30
740 ; CHECK-NEXT: ldrhmi r3, [r2, #2]
741 ; CHECK-NEXT: vmovmi.16 q0[1], r3
742 ; CHECK-NEXT: lsls r3, r1, #29
744 ; CHECK-NEXT: ldrhmi r3, [r2, #4]
745 ; CHECK-NEXT: vmovmi.16 q0[2], r3
746 ; CHECK-NEXT: lsls r3, r1, #28
748 ; CHECK-NEXT: ldrhmi r3, [r2, #6]
749 ; CHECK-NEXT: vmovmi.16 q0[3], r3
750 ; CHECK-NEXT: lsls r3, r1, #27
752 ; CHECK-NEXT: ldrhmi r3, [r2, #8]
753 ; CHECK-NEXT: vmovmi.16 q0[4], r3
754 ; CHECK-NEXT: lsls r3, r1, #26
756 ; CHECK-NEXT: ldrhmi r3, [r2, #10]
757 ; CHECK-NEXT: vmovmi.16 q0[5], r3
758 ; CHECK-NEXT: lsls r3, r1, #25
760 ; CHECK-NEXT: ldrhmi r3, [r2, #12]
761 ; CHECK-NEXT: vmovmi.16 q0[6], r3
762 ; CHECK-NEXT: lsls r1, r1, #24
764 ; CHECK-NEXT: ldrhmi r1, [r2, #14]
765 ; CHECK-NEXT: vmovmi.16 q0[7], r1
766 ; CHECK-NEXT: mov r1, sp
767 ; CHECK-NEXT: vstr p0, [r1]
768 ; CHECK-NEXT: ldrb.w r1, [sp]
769 ; CHECK-NEXT: lsls r2, r1, #31
771 ; CHECK-NEXT: vmovne.u16 r2, q0[0]
772 ; CHECK-NEXT: strbne r2, [r0]
773 ; CHECK-NEXT: lsls r2, r1, #30
775 ; CHECK-NEXT: vmovmi.u16 r2, q0[1]
776 ; CHECK-NEXT: strbmi r2, [r0, #1]
777 ; CHECK-NEXT: lsls r2, r1, #29
779 ; CHECK-NEXT: vmovmi.u16 r2, q0[2]
780 ; CHECK-NEXT: strbmi r2, [r0, #2]
781 ; CHECK-NEXT: lsls r2, r1, #28
783 ; CHECK-NEXT: vmovmi.u16 r2, q0[3]
784 ; CHECK-NEXT: strbmi r2, [r0, #3]
785 ; CHECK-NEXT: lsls r2, r1, #27
787 ; CHECK-NEXT: vmovmi.u16 r2, q0[4]
788 ; CHECK-NEXT: strbmi r2, [r0, #4]
789 ; CHECK-NEXT: lsls r2, r1, #26
791 ; CHECK-NEXT: vmovmi.u16 r2, q0[5]
792 ; CHECK-NEXT: strbmi r2, [r0, #5]
793 ; CHECK-NEXT: lsls r2, r1, #25
795 ; CHECK-NEXT: vmovmi.u16 r2, q0[6]
796 ; CHECK-NEXT: strbmi r2, [r0, #6]
797 ; CHECK-NEXT: lsls r1, r1, #24
799 ; CHECK-NEXT: vmovmi.u16 r1, q0[7]
800 ; CHECK-NEXT: strbmi r1, [r0, #7]
801 ; CHECK-NEXT: add sp, #16
804 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
805 %1 = icmp sgt <8 x i16> %0, zeroinitializer
806 %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef)
807 %3 = trunc <8 x i16> %2 to <8 x i8>
808 call void @llvm.masked.store.v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1)
812 define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
813 ; CHECK-LABEL: foo_trunc_v4i8_v4i32:
814 ; CHECK: @ %bb.0: @ %entry
815 ; CHECK-NEXT: .pad #8
816 ; CHECK-NEXT: sub sp, #8
817 ; CHECK-NEXT: vldrw.u32 q0, [r1]
818 ; CHECK-NEXT: add r3, sp, #4
819 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
820 ; CHECK-NEXT: @ implicit-def: $q0
821 ; CHECK-NEXT: vstr p0, [r3]
822 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
823 ; CHECK-NEXT: lsls r3, r1, #31
825 ; CHECK-NEXT: ldrne r3, [r2]
826 ; CHECK-NEXT: vmovne.32 q0[0], r3
827 ; CHECK-NEXT: lsls r3, r1, #30
829 ; CHECK-NEXT: ldrmi r3, [r2, #4]
830 ; CHECK-NEXT: vmovmi.32 q0[1], r3
831 ; CHECK-NEXT: lsls r3, r1, #29
833 ; CHECK-NEXT: ldrmi r3, [r2, #8]
834 ; CHECK-NEXT: vmovmi.32 q0[2], r3
835 ; CHECK-NEXT: lsls r1, r1, #28
837 ; CHECK-NEXT: ldrmi r1, [r2, #12]
838 ; CHECK-NEXT: vmovmi.32 q0[3], r1
839 ; CHECK-NEXT: mov r1, sp
840 ; CHECK-NEXT: vstr p0, [r1]
841 ; CHECK-NEXT: ldrb.w r1, [sp]
842 ; CHECK-NEXT: lsls r2, r1, #31
844 ; CHECK-NEXT: vmovne r2, s0
845 ; CHECK-NEXT: strbne r2, [r0]
846 ; CHECK-NEXT: lsls r2, r1, #30
848 ; CHECK-NEXT: vmovmi r2, s1
849 ; CHECK-NEXT: strbmi r2, [r0, #1]
850 ; CHECK-NEXT: lsls r2, r1, #29
852 ; CHECK-NEXT: vmovmi r2, s2
853 ; CHECK-NEXT: strbmi r2, [r0, #2]
854 ; CHECK-NEXT: lsls r1, r1, #28
856 ; CHECK-NEXT: vmovmi r1, s3
857 ; CHECK-NEXT: strbmi r1, [r0, #3]
858 ; CHECK-NEXT: add sp, #8
861 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
862 %1 = icmp sgt <4 x i32> %0, zeroinitializer
863 %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
864 %3 = trunc <4 x i32> %2 to <4 x i8>
865 call void @llvm.masked.store.v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1)
869 define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
870 ; CHECK-LABEL: foo_trunc_v4i16_v4i32:
871 ; CHECK: @ %bb.0: @ %entry
872 ; CHECK-NEXT: .pad #8
873 ; CHECK-NEXT: sub sp, #8
874 ; CHECK-NEXT: vldrw.u32 q0, [r1]
875 ; CHECK-NEXT: add r3, sp, #4
876 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
877 ; CHECK-NEXT: @ implicit-def: $q0
878 ; CHECK-NEXT: vstr p0, [r3]
879 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
880 ; CHECK-NEXT: lsls r3, r1, #31
882 ; CHECK-NEXT: ldrne r3, [r2]
883 ; CHECK-NEXT: vmovne.32 q0[0], r3
884 ; CHECK-NEXT: lsls r3, r1, #30
886 ; CHECK-NEXT: ldrmi r3, [r2, #4]
887 ; CHECK-NEXT: vmovmi.32 q0[1], r3
888 ; CHECK-NEXT: lsls r3, r1, #29
890 ; CHECK-NEXT: ldrmi r3, [r2, #8]
891 ; CHECK-NEXT: vmovmi.32 q0[2], r3
892 ; CHECK-NEXT: lsls r1, r1, #28
894 ; CHECK-NEXT: ldrmi r1, [r2, #12]
895 ; CHECK-NEXT: vmovmi.32 q0[3], r1
896 ; CHECK-NEXT: mov r1, sp
897 ; CHECK-NEXT: vstr p0, [r1]
898 ; CHECK-NEXT: ldrb.w r1, [sp]
899 ; CHECK-NEXT: lsls r2, r1, #31
901 ; CHECK-NEXT: vmovne r2, s0
902 ; CHECK-NEXT: strhne r2, [r0]
903 ; CHECK-NEXT: lsls r2, r1, #30
905 ; CHECK-NEXT: vmovmi r2, s1
906 ; CHECK-NEXT: strhmi r2, [r0, #2]
907 ; CHECK-NEXT: lsls r2, r1, #29
909 ; CHECK-NEXT: vmovmi r2, s2
910 ; CHECK-NEXT: strhmi r2, [r0, #4]
911 ; CHECK-NEXT: lsls r1, r1, #28
913 ; CHECK-NEXT: vmovmi r1, s3
914 ; CHECK-NEXT: strhmi r1, [r0, #6]
915 ; CHECK-NEXT: add sp, #8
918 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
919 %1 = icmp sgt <4 x i32> %0, zeroinitializer
920 %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
921 %3 = trunc <4 x i32> %2 to <4 x i16>
922 call void @llvm.masked.store.v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1)
926 define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *%src) {
927 ; CHECK-LABEL: foo_v4f32_v4f32:
928 ; CHECK: @ %bb.0: @ %entry
929 ; CHECK-NEXT: .pad #8
930 ; CHECK-NEXT: sub sp, #8
931 ; CHECK-NEXT: vldrw.u32 q0, [r1]
932 ; CHECK-NEXT: add r3, sp, #4
933 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
934 ; CHECK-NEXT: @ implicit-def: $q0
935 ; CHECK-NEXT: vstr p0, [r3]
936 ; CHECK-NEXT: ldrb.w r1, [sp, #4]
937 ; CHECK-NEXT: lsls r3, r1, #31
939 ; CHECK-NEXT: vldrne s0, [r2]
940 ; CHECK-NEXT: lsls r3, r1, #30
942 ; CHECK-NEXT: vldrmi s1, [r2, #4]
943 ; CHECK-NEXT: lsls r3, r1, #29
945 ; CHECK-NEXT: vldrmi s2, [r2, #8]
946 ; CHECK-NEXT: lsls r1, r1, #28
948 ; CHECK-NEXT: vldrmi s3, [r2, #12]
949 ; CHECK-NEXT: mov r1, sp
950 ; CHECK-NEXT: vstr p0, [r1]
951 ; CHECK-NEXT: ldrb.w r1, [sp]
952 ; CHECK-NEXT: lsls r2, r1, #31
954 ; CHECK-NEXT: vstrne s0, [r0]
955 ; CHECK-NEXT: lsls r2, r1, #30
957 ; CHECK-NEXT: vstrmi s1, [r0, #4]
958 ; CHECK-NEXT: lsls r2, r1, #29
960 ; CHECK-NEXT: vstrmi s2, [r0, #8]
961 ; CHECK-NEXT: lsls r1, r1, #28
963 ; CHECK-NEXT: vstrmi s3, [r0, #12]
964 ; CHECK-NEXT: add sp, #8
967 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
968 %1 = icmp sgt <4 x i32> %0, zeroinitializer
969 %2 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef)
970 call void @llvm.masked.store.v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1)
974 define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%src) {
975 ; CHECK-LABEL: foo_v8f16_v8f16:
976 ; CHECK: @ %bb.0: @ %entry
977 ; CHECK-NEXT: .pad #16
978 ; CHECK-NEXT: sub sp, #16
979 ; CHECK-NEXT: vldrh.u16 q0, [r1]
980 ; CHECK-NEXT: add r3, sp, #8
981 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
982 ; CHECK-NEXT: @ implicit-def: $q0
983 ; CHECK-NEXT: vstr p0, [r3]
984 ; CHECK-NEXT: ldrb.w r1, [sp, #8]
985 ; CHECK-NEXT: lsls r3, r1, #31
986 ; CHECK-NEXT: bne .LBB13_18
987 ; CHECK-NEXT: @ %bb.1: @ %else
988 ; CHECK-NEXT: lsls r3, r1, #30
989 ; CHECK-NEXT: bmi .LBB13_19
990 ; CHECK-NEXT: .LBB13_2: @ %else2
991 ; CHECK-NEXT: lsls r3, r1, #29
992 ; CHECK-NEXT: bmi .LBB13_20
993 ; CHECK-NEXT: .LBB13_3: @ %else5
994 ; CHECK-NEXT: lsls r3, r1, #28
995 ; CHECK-NEXT: bmi .LBB13_21
996 ; CHECK-NEXT: .LBB13_4: @ %else8
997 ; CHECK-NEXT: lsls r3, r1, #27
998 ; CHECK-NEXT: bmi .LBB13_22
999 ; CHECK-NEXT: .LBB13_5: @ %else11
1000 ; CHECK-NEXT: lsls r3, r1, #26
1001 ; CHECK-NEXT: bmi .LBB13_23
1002 ; CHECK-NEXT: .LBB13_6: @ %else14
1003 ; CHECK-NEXT: lsls r3, r1, #25
1004 ; CHECK-NEXT: bmi .LBB13_24
1005 ; CHECK-NEXT: .LBB13_7: @ %else17
1006 ; CHECK-NEXT: lsls r1, r1, #24
1007 ; CHECK-NEXT: bpl .LBB13_9
1008 ; CHECK-NEXT: .LBB13_8: @ %cond.load19
1009 ; CHECK-NEXT: vldr.16 s4, [r2, #14]
1010 ; CHECK-NEXT: vmov r1, s4
1011 ; CHECK-NEXT: vmov.16 q0[7], r1
1012 ; CHECK-NEXT: .LBB13_9: @ %else20
1013 ; CHECK-NEXT: mov r1, sp
1014 ; CHECK-NEXT: vstr p0, [r1]
1015 ; CHECK-NEXT: ldrb.w r1, [sp]
1016 ; CHECK-NEXT: lsls r2, r1, #31
1017 ; CHECK-NEXT: bne .LBB13_25
1018 ; CHECK-NEXT: @ %bb.10: @ %else23
1019 ; CHECK-NEXT: lsls r2, r1, #30
1020 ; CHECK-NEXT: bmi .LBB13_26
1021 ; CHECK-NEXT: .LBB13_11: @ %else25
1022 ; CHECK-NEXT: lsls r2, r1, #29
1023 ; CHECK-NEXT: bmi .LBB13_27
1024 ; CHECK-NEXT: .LBB13_12: @ %else27
1025 ; CHECK-NEXT: lsls r2, r1, #28
1026 ; CHECK-NEXT: bmi .LBB13_28
1027 ; CHECK-NEXT: .LBB13_13: @ %else29
1028 ; CHECK-NEXT: lsls r2, r1, #27
1029 ; CHECK-NEXT: bmi .LBB13_29
1030 ; CHECK-NEXT: .LBB13_14: @ %else31
1031 ; CHECK-NEXT: lsls r2, r1, #26
1032 ; CHECK-NEXT: bmi .LBB13_30
1033 ; CHECK-NEXT: .LBB13_15: @ %else33
1034 ; CHECK-NEXT: lsls r2, r1, #25
1035 ; CHECK-NEXT: bmi .LBB13_31
1036 ; CHECK-NEXT: .LBB13_16: @ %else35
1037 ; CHECK-NEXT: lsls r1, r1, #24
1038 ; CHECK-NEXT: bmi .LBB13_32
1039 ; CHECK-NEXT: .LBB13_17: @ %else37
1040 ; CHECK-NEXT: add sp, #16
1042 ; CHECK-NEXT: .LBB13_18: @ %cond.load
1043 ; CHECK-NEXT: vldr.16 s0, [r2]
1044 ; CHECK-NEXT: lsls r3, r1, #30
1045 ; CHECK-NEXT: bpl .LBB13_2
1046 ; CHECK-NEXT: .LBB13_19: @ %cond.load1
1047 ; CHECK-NEXT: vldr.16 s4, [r2, #2]
1048 ; CHECK-NEXT: vmov r3, s4
1049 ; CHECK-NEXT: vmov.16 q0[1], r3
1050 ; CHECK-NEXT: lsls r3, r1, #29
1051 ; CHECK-NEXT: bpl .LBB13_3
1052 ; CHECK-NEXT: .LBB13_20: @ %cond.load4
1053 ; CHECK-NEXT: vldr.16 s4, [r2, #4]
1054 ; CHECK-NEXT: vmov r3, s4
1055 ; CHECK-NEXT: vmov.16 q0[2], r3
1056 ; CHECK-NEXT: lsls r3, r1, #28
1057 ; CHECK-NEXT: bpl .LBB13_4
1058 ; CHECK-NEXT: .LBB13_21: @ %cond.load7
1059 ; CHECK-NEXT: vldr.16 s4, [r2, #6]
1060 ; CHECK-NEXT: vmov r3, s4
1061 ; CHECK-NEXT: vmov.16 q0[3], r3
1062 ; CHECK-NEXT: lsls r3, r1, #27
1063 ; CHECK-NEXT: bpl .LBB13_5
1064 ; CHECK-NEXT: .LBB13_22: @ %cond.load10
1065 ; CHECK-NEXT: vldr.16 s4, [r2, #8]
1066 ; CHECK-NEXT: vmov r3, s4
1067 ; CHECK-NEXT: vmov.16 q0[4], r3
1068 ; CHECK-NEXT: lsls r3, r1, #26
1069 ; CHECK-NEXT: bpl .LBB13_6
1070 ; CHECK-NEXT: .LBB13_23: @ %cond.load13
1071 ; CHECK-NEXT: vldr.16 s4, [r2, #10]
1072 ; CHECK-NEXT: vmov r3, s4
1073 ; CHECK-NEXT: vmov.16 q0[5], r3
1074 ; CHECK-NEXT: lsls r3, r1, #25
1075 ; CHECK-NEXT: bpl .LBB13_7
1076 ; CHECK-NEXT: .LBB13_24: @ %cond.load16
1077 ; CHECK-NEXT: vldr.16 s4, [r2, #12]
1078 ; CHECK-NEXT: vmov r3, s4
1079 ; CHECK-NEXT: vmov.16 q0[6], r3
1080 ; CHECK-NEXT: lsls r1, r1, #24
1081 ; CHECK-NEXT: bmi .LBB13_8
1082 ; CHECK-NEXT: b .LBB13_9
1083 ; CHECK-NEXT: .LBB13_25: @ %cond.store
1084 ; CHECK-NEXT: vstr.16 s0, [r0]
1085 ; CHECK-NEXT: lsls r2, r1, #30
1086 ; CHECK-NEXT: bpl .LBB13_11
1087 ; CHECK-NEXT: .LBB13_26: @ %cond.store24
1088 ; CHECK-NEXT: vmovx.f16 s4, s0
1089 ; CHECK-NEXT: vstr.16 s4, [r0, #2]
1090 ; CHECK-NEXT: lsls r2, r1, #29
1091 ; CHECK-NEXT: bpl .LBB13_12
1092 ; CHECK-NEXT: .LBB13_27: @ %cond.store26
1093 ; CHECK-NEXT: vstr.16 s1, [r0, #4]
1094 ; CHECK-NEXT: lsls r2, r1, #28
1095 ; CHECK-NEXT: bpl .LBB13_13
1096 ; CHECK-NEXT: .LBB13_28: @ %cond.store28
1097 ; CHECK-NEXT: vmovx.f16 s4, s1
1098 ; CHECK-NEXT: vstr.16 s4, [r0, #6]
1099 ; CHECK-NEXT: lsls r2, r1, #27
1100 ; CHECK-NEXT: bpl .LBB13_14
1101 ; CHECK-NEXT: .LBB13_29: @ %cond.store30
1102 ; CHECK-NEXT: vstr.16 s2, [r0, #8]
1103 ; CHECK-NEXT: lsls r2, r1, #26
1104 ; CHECK-NEXT: bpl .LBB13_15
1105 ; CHECK-NEXT: .LBB13_30: @ %cond.store32
1106 ; CHECK-NEXT: vmovx.f16 s4, s2
1107 ; CHECK-NEXT: vstr.16 s4, [r0, #10]
1108 ; CHECK-NEXT: lsls r2, r1, #25
1109 ; CHECK-NEXT: bpl .LBB13_16
1110 ; CHECK-NEXT: .LBB13_31: @ %cond.store34
1111 ; CHECK-NEXT: vstr.16 s3, [r0, #12]
1112 ; CHECK-NEXT: lsls r1, r1, #24
1113 ; CHECK-NEXT: bpl .LBB13_17
1114 ; CHECK-NEXT: .LBB13_32: @ %cond.store36
1115 ; CHECK-NEXT: vmovx.f16 s0, s3
1116 ; CHECK-NEXT: vstr.16 s0, [r0, #14]
1117 ; CHECK-NEXT: add sp, #16
1120 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
1121 %1 = icmp sgt <8 x i16> %0, zeroinitializer
1122 %2 = call <8 x half> @llvm.masked.load.v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef)
1123 call void @llvm.masked.store.v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1)
1127 declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
1128 declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
1129 declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
1130 declare void @llvm.masked.store.v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>)
1131 declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
1132 declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
1133 declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
1134 declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
1135 declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
1136 declare <8 x half> @llvm.masked.load.v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
1138 declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
1139 declare void @llvm.masked.store.v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
1140 declare void @llvm.masked.store.v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
1141 declare <4 x i16> @llvm.masked.load.v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
1142 declare <4 x i8> @llvm.masked.load.v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
1143 declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)