1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define i8* @ldrwu32_4(i8* %x, i8* %y) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %z to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LABEL: ldrwu32_3:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: adds r2, r0, #3
24 ; CHECK-NEXT: vldrw.u32 q0, [r2]
25 ; CHECK-NEXT: vstrw.32 q0, [r1]
28 %z = getelementptr inbounds i8, i8* %x, i32 3
29 %0 = bitcast i8* %z to <4 x i32>*
30 %1 = load <4 x i32>, <4 x i32>* %0, align 4
31 %2 = bitcast i8* %y to <4 x i32>*
32 store <4 x i32> %1, <4 x i32>* %2, align 4
36 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
37 ; CHECK-LABEL: ldrwu32_m4:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
40 ; CHECK-NEXT: vstrw.32 q0, [r1]
43 %z = getelementptr inbounds i8, i8* %x, i32 -4
44 %0 = bitcast i8* %z to <4 x i32>*
45 %1 = load <4 x i32>, <4 x i32>* %0, align 4
46 %2 = bitcast i8* %y to <4 x i32>*
47 store <4 x i32> %1, <4 x i32>* %2, align 4
51 define i8* @ldrwu32_508(i8* %x, i8* %y) {
52 ; CHECK-LABEL: ldrwu32_508:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: add.w r2, r0, #508
55 ; CHECK-NEXT: vldrw.u32 q0, [r2]
56 ; CHECK-NEXT: vstrw.32 q0, [r1]
59 %z = getelementptr inbounds i8, i8* %x, i32 508
60 %0 = bitcast i8* %z to <4 x i32>*
61 %1 = load <4 x i32>, <4 x i32>* %0, align 4
62 %2 = bitcast i8* %y to <4 x i32>*
63 store <4 x i32> %1, <4 x i32>* %2, align 4
67 define i8* @ldrwu32_512(i8* %x, i8* %y) {
68 ; CHECK-LABEL: ldrwu32_512:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: add.w r2, r0, #512
71 ; CHECK-NEXT: vldrw.u32 q0, [r2]
72 ; CHECK-NEXT: vstrw.32 q0, [r1]
75 %z = getelementptr inbounds i8, i8* %x, i32 512
76 %0 = bitcast i8* %z to <4 x i32>*
77 %1 = load <4 x i32>, <4 x i32>* %0, align 4
78 %2 = bitcast i8* %y to <4 x i32>*
79 store <4 x i32> %1, <4 x i32>* %2, align 4
83 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
84 ; CHECK-LABEL: ldrwu32_m508:
85 ; CHECK: @ %bb.0: @ %entry
86 ; CHECK-NEXT: sub.w r2, r0, #508
87 ; CHECK-NEXT: vldrw.u32 q0, [r2]
88 ; CHECK-NEXT: vstrw.32 q0, [r1]
91 %z = getelementptr inbounds i8, i8* %x, i32 -508
92 %0 = bitcast i8* %z to <4 x i32>*
93 %1 = load <4 x i32>, <4 x i32>* %0, align 4
94 %2 = bitcast i8* %y to <4 x i32>*
95 store <4 x i32> %1, <4 x i32>* %2, align 4
99 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
100 ; CHECK-LABEL: ldrwu32_m512:
101 ; CHECK: @ %bb.0: @ %entry
102 ; CHECK-NEXT: sub.w r2, r0, #512
103 ; CHECK-NEXT: vldrw.u32 q0, [r2]
104 ; CHECK-NEXT: vstrw.32 q0, [r1]
107 %z = getelementptr inbounds i8, i8* %x, i32 -512
108 %0 = bitcast i8* %z to <4 x i32>*
109 %1 = load <4 x i32>, <4 x i32>* %0, align 4
110 %2 = bitcast i8* %y to <4 x i32>*
111 store <4 x i32> %1, <4 x i32>* %2, align 4
116 define i8* @ldrhu32_4(i8* %x, i8* %y) {
117 ; CHECK-LABEL: ldrhu32_4:
118 ; CHECK: @ %bb.0: @ %entry
119 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
120 ; CHECK-NEXT: vstrw.32 q0, [r1]
123 %z = getelementptr inbounds i8, i8* %x, i32 4
124 %0 = bitcast i8* %z to <4 x i16>*
125 %1 = load <4 x i16>, <4 x i16>* %0, align 2
126 %2 = zext <4 x i16> %1 to <4 x i32>
127 %3 = bitcast i8* %y to <4 x i32>*
128 store <4 x i32> %2, <4 x i32>* %3, align 4
132 define i8* @ldrhu32_3(i8* %x, i8* %y) {
133 ; CHECK-LABEL: ldrhu32_3:
134 ; CHECK: @ %bb.0: @ %entry
135 ; CHECK-NEXT: adds r2, r0, #3
136 ; CHECK-NEXT: vldrh.u32 q0, [r2]
137 ; CHECK-NEXT: vstrw.32 q0, [r1]
140 %z = getelementptr inbounds i8, i8* %x, i32 3
141 %0 = bitcast i8* %z to <4 x i16>*
142 %1 = load <4 x i16>, <4 x i16>* %0, align 2
143 %2 = zext <4 x i16> %1 to <4 x i32>
144 %3 = bitcast i8* %y to <4 x i32>*
145 store <4 x i32> %2, <4 x i32>* %3, align 4
149 define i8* @ldrhu32_2(i8* %x, i8* %y) {
150 ; CHECK-LABEL: ldrhu32_2:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
153 ; CHECK-NEXT: vstrw.32 q0, [r1]
156 %z = getelementptr inbounds i8, i8* %x, i32 2
157 %0 = bitcast i8* %z to <4 x i16>*
158 %1 = load <4 x i16>, <4 x i16>* %0, align 2
159 %2 = zext <4 x i16> %1 to <4 x i32>
160 %3 = bitcast i8* %y to <4 x i32>*
161 store <4 x i32> %2, <4 x i32>* %3, align 4
165 define i8* @ldrhu32_254(i8* %x, i8* %y) {
166 ; CHECK-LABEL: ldrhu32_254:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: add.w r2, r0, #254
169 ; CHECK-NEXT: vldrh.u32 q0, [r2]
170 ; CHECK-NEXT: vstrw.32 q0, [r1]
173 %z = getelementptr inbounds i8, i8* %x, i32 254
174 %0 = bitcast i8* %z to <4 x i16>*
175 %1 = load <4 x i16>, <4 x i16>* %0, align 2
176 %2 = zext <4 x i16> %1 to <4 x i32>
177 %3 = bitcast i8* %y to <4 x i32>*
178 store <4 x i32> %2, <4 x i32>* %3, align 4
182 define i8* @ldrhu32_256(i8* %x, i8* %y) {
183 ; CHECK-LABEL: ldrhu32_256:
184 ; CHECK: @ %bb.0: @ %entry
185 ; CHECK-NEXT: add.w r2, r0, #256
186 ; CHECK-NEXT: vldrh.u32 q0, [r2]
187 ; CHECK-NEXT: vstrw.32 q0, [r1]
190 %z = getelementptr inbounds i8, i8* %x, i32 256
191 %0 = bitcast i8* %z to <4 x i16>*
192 %1 = load <4 x i16>, <4 x i16>* %0, align 2
193 %2 = zext <4 x i16> %1 to <4 x i32>
194 %3 = bitcast i8* %y to <4 x i32>*
195 store <4 x i32> %2, <4 x i32>* %3, align 4
199 define i8* @ldrhu32_m254(i8* %x, i8* %y) {
200 ; CHECK-LABEL: ldrhu32_m254:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: sub.w r2, r0, #254
203 ; CHECK-NEXT: vldrh.u32 q0, [r2]
204 ; CHECK-NEXT: vstrw.32 q0, [r1]
207 %z = getelementptr inbounds i8, i8* %x, i32 -254
208 %0 = bitcast i8* %z to <4 x i16>*
209 %1 = load <4 x i16>, <4 x i16>* %0, align 2
210 %2 = zext <4 x i16> %1 to <4 x i32>
211 %3 = bitcast i8* %y to <4 x i32>*
212 store <4 x i32> %2, <4 x i32>* %3, align 4
216 define i8* @ldrhu32_m256(i8* %x, i8* %y) {
217 ; CHECK-LABEL: ldrhu32_m256:
218 ; CHECK: @ %bb.0: @ %entry
219 ; CHECK-NEXT: sub.w r2, r0, #256
220 ; CHECK-NEXT: vldrh.u32 q0, [r2]
221 ; CHECK-NEXT: vstrw.32 q0, [r1]
224 %z = getelementptr inbounds i8, i8* %x, i32 -256
225 %0 = bitcast i8* %z to <4 x i16>*
226 %1 = load <4 x i16>, <4 x i16>* %0, align 2
227 %2 = zext <4 x i16> %1 to <4 x i32>
228 %3 = bitcast i8* %y to <4 x i32>*
229 store <4 x i32> %2, <4 x i32>* %3, align 4
234 define i8* @ldrhs32_4(i8* %x, i8* %y) {
235 ; CHECK-LABEL: ldrhs32_4:
236 ; CHECK: @ %bb.0: @ %entry
237 ; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
238 ; CHECK-NEXT: vstrw.32 q0, [r1]
241 %z = getelementptr inbounds i8, i8* %x, i32 4
242 %0 = bitcast i8* %z to <4 x i16>*
243 %1 = load <4 x i16>, <4 x i16>* %0, align 2
244 %2 = sext <4 x i16> %1 to <4 x i32>
245 %3 = bitcast i8* %y to <4 x i32>*
246 store <4 x i32> %2, <4 x i32>* %3, align 4
250 define i8* @ldrhs32_3(i8* %x, i8* %y) {
251 ; CHECK-LABEL: ldrhs32_3:
252 ; CHECK: @ %bb.0: @ %entry
253 ; CHECK-NEXT: adds r2, r0, #3
254 ; CHECK-NEXT: vldrh.s32 q0, [r2]
255 ; CHECK-NEXT: vstrw.32 q0, [r1]
258 %z = getelementptr inbounds i8, i8* %x, i32 3
259 %0 = bitcast i8* %z to <4 x i16>*
260 %1 = load <4 x i16>, <4 x i16>* %0, align 2
261 %2 = sext <4 x i16> %1 to <4 x i32>
262 %3 = bitcast i8* %y to <4 x i32>*
263 store <4 x i32> %2, <4 x i32>* %3, align 4
267 define i8* @ldrhs32_2(i8* %x, i8* %y) {
268 ; CHECK-LABEL: ldrhs32_2:
269 ; CHECK: @ %bb.0: @ %entry
270 ; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
271 ; CHECK-NEXT: vstrw.32 q0, [r1]
274 %z = getelementptr inbounds i8, i8* %x, i32 2
275 %0 = bitcast i8* %z to <4 x i16>*
276 %1 = load <4 x i16>, <4 x i16>* %0, align 2
277 %2 = sext <4 x i16> %1 to <4 x i32>
278 %3 = bitcast i8* %y to <4 x i32>*
279 store <4 x i32> %2, <4 x i32>* %3, align 4
283 define i8* @ldrhs32_254(i8* %x, i8* %y) {
284 ; CHECK-LABEL: ldrhs32_254:
285 ; CHECK: @ %bb.0: @ %entry
286 ; CHECK-NEXT: add.w r2, r0, #254
287 ; CHECK-NEXT: vldrh.s32 q0, [r2]
288 ; CHECK-NEXT: vstrw.32 q0, [r1]
291 %z = getelementptr inbounds i8, i8* %x, i32 254
292 %0 = bitcast i8* %z to <4 x i16>*
293 %1 = load <4 x i16>, <4 x i16>* %0, align 2
294 %2 = sext <4 x i16> %1 to <4 x i32>
295 %3 = bitcast i8* %y to <4 x i32>*
296 store <4 x i32> %2, <4 x i32>* %3, align 4
300 define i8* @ldrhs32_256(i8* %x, i8* %y) {
301 ; CHECK-LABEL: ldrhs32_256:
302 ; CHECK: @ %bb.0: @ %entry
303 ; CHECK-NEXT: add.w r2, r0, #256
304 ; CHECK-NEXT: vldrh.s32 q0, [r2]
305 ; CHECK-NEXT: vstrw.32 q0, [r1]
308 %z = getelementptr inbounds i8, i8* %x, i32 256
309 %0 = bitcast i8* %z to <4 x i16>*
310 %1 = load <4 x i16>, <4 x i16>* %0, align 2
311 %2 = sext <4 x i16> %1 to <4 x i32>
312 %3 = bitcast i8* %y to <4 x i32>*
313 store <4 x i32> %2, <4 x i32>* %3, align 4
317 define i8* @ldrhs32_m254(i8* %x, i8* %y) {
318 ; CHECK-LABEL: ldrhs32_m254:
319 ; CHECK: @ %bb.0: @ %entry
320 ; CHECK-NEXT: sub.w r2, r0, #254
321 ; CHECK-NEXT: vldrh.s32 q0, [r2]
322 ; CHECK-NEXT: vstrw.32 q0, [r1]
325 %z = getelementptr inbounds i8, i8* %x, i32 -254
326 %0 = bitcast i8* %z to <4 x i16>*
327 %1 = load <4 x i16>, <4 x i16>* %0, align 2
328 %2 = sext <4 x i16> %1 to <4 x i32>
329 %3 = bitcast i8* %y to <4 x i32>*
330 store <4 x i32> %2, <4 x i32>* %3, align 4
334 define i8* @ldrhs32_m256(i8* %x, i8* %y) {
335 ; CHECK-LABEL: ldrhs32_m256:
336 ; CHECK: @ %bb.0: @ %entry
337 ; CHECK-NEXT: sub.w r2, r0, #256
338 ; CHECK-NEXT: vldrh.s32 q0, [r2]
339 ; CHECK-NEXT: vstrw.32 q0, [r1]
342 %z = getelementptr inbounds i8, i8* %x, i32 -256
343 %0 = bitcast i8* %z to <4 x i16>*
344 %1 = load <4 x i16>, <4 x i16>* %0, align 2
345 %2 = sext <4 x i16> %1 to <4 x i32>
346 %3 = bitcast i8* %y to <4 x i32>*
347 store <4 x i32> %2, <4 x i32>* %3, align 4
352 define i8* @ldrhu16_4(i8* %x, i8* %y) {
353 ; CHECK-LABEL: ldrhu16_4:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
356 ; CHECK-NEXT: vstrh.16 q0, [r1]
359 %z = getelementptr inbounds i8, i8* %x, i32 4
360 %0 = bitcast i8* %z to <8 x i16>*
361 %1 = load <8 x i16>, <8 x i16>* %0, align 2
362 %2 = bitcast i8* %y to <8 x i16>*
363 store <8 x i16> %1, <8 x i16>* %2, align 2
367 define i8* @ldrhu16_3(i8* %x, i8* %y) {
368 ; CHECK-LABEL: ldrhu16_3:
369 ; CHECK: @ %bb.0: @ %entry
370 ; CHECK-NEXT: adds r2, r0, #3
371 ; CHECK-NEXT: vldrh.u16 q0, [r2]
372 ; CHECK-NEXT: vstrh.16 q0, [r1]
375 %z = getelementptr inbounds i8, i8* %x, i32 3
376 %0 = bitcast i8* %z to <8 x i16>*
377 %1 = load <8 x i16>, <8 x i16>* %0, align 2
378 %2 = bitcast i8* %y to <8 x i16>*
379 store <8 x i16> %1, <8 x i16>* %2, align 2
383 define i8* @ldrhu16_2(i8* %x, i8* %y) {
384 ; CHECK-LABEL: ldrhu16_2:
385 ; CHECK: @ %bb.0: @ %entry
386 ; CHECK-NEXT: vldrh.u16 q0, [r0, #2]
387 ; CHECK-NEXT: vstrh.16 q0, [r1]
390 %z = getelementptr inbounds i8, i8* %x, i32 2
391 %0 = bitcast i8* %z to <8 x i16>*
392 %1 = load <8 x i16>, <8 x i16>* %0, align 2
393 %2 = bitcast i8* %y to <8 x i16>*
394 store <8 x i16> %1, <8 x i16>* %2, align 2
398 define i8* @ldrhu16_254(i8* %x, i8* %y) {
399 ; CHECK-LABEL: ldrhu16_254:
400 ; CHECK: @ %bb.0: @ %entry
401 ; CHECK-NEXT: add.w r2, r0, #254
402 ; CHECK-NEXT: vldrh.u16 q0, [r2]
403 ; CHECK-NEXT: vstrh.16 q0, [r1]
406 %z = getelementptr inbounds i8, i8* %x, i32 254
407 %0 = bitcast i8* %z to <8 x i16>*
408 %1 = load <8 x i16>, <8 x i16>* %0, align 2
409 %2 = bitcast i8* %y to <8 x i16>*
410 store <8 x i16> %1, <8 x i16>* %2, align 2
414 define i8* @ldrhu16_256(i8* %x, i8* %y) {
415 ; CHECK-LABEL: ldrhu16_256:
416 ; CHECK: @ %bb.0: @ %entry
417 ; CHECK-NEXT: add.w r2, r0, #256
418 ; CHECK-NEXT: vldrh.u16 q0, [r2]
419 ; CHECK-NEXT: vstrh.16 q0, [r1]
422 %z = getelementptr inbounds i8, i8* %x, i32 256
423 %0 = bitcast i8* %z to <8 x i16>*
424 %1 = load <8 x i16>, <8 x i16>* %0, align 2
425 %2 = bitcast i8* %y to <8 x i16>*
426 store <8 x i16> %1, <8 x i16>* %2, align 2
430 define i8* @ldrhu16_m254(i8* %x, i8* %y) {
431 ; CHECK-LABEL: ldrhu16_m254:
432 ; CHECK: @ %bb.0: @ %entry
433 ; CHECK-NEXT: sub.w r2, r0, #254
434 ; CHECK-NEXT: vldrh.u16 q0, [r2]
435 ; CHECK-NEXT: vstrh.16 q0, [r1]
438 %z = getelementptr inbounds i8, i8* %x, i32 -254
439 %0 = bitcast i8* %z to <8 x i16>*
440 %1 = load <8 x i16>, <8 x i16>* %0, align 2
441 %2 = bitcast i8* %y to <8 x i16>*
442 store <8 x i16> %1, <8 x i16>* %2, align 2
446 define i8* @ldrhu16_m256(i8* %x, i8* %y) {
447 ; CHECK-LABEL: ldrhu16_m256:
448 ; CHECK: @ %bb.0: @ %entry
449 ; CHECK-NEXT: sub.w r2, r0, #256
450 ; CHECK-NEXT: vldrh.u16 q0, [r2]
451 ; CHECK-NEXT: vstrh.16 q0, [r1]
454 %z = getelementptr inbounds i8, i8* %x, i32 -256
455 %0 = bitcast i8* %z to <8 x i16>*
456 %1 = load <8 x i16>, <8 x i16>* %0, align 2
457 %2 = bitcast i8* %y to <8 x i16>*
458 store <8 x i16> %1, <8 x i16>* %2, align 2
463 define i8* @ldrbu32_4(i8* %x, i8* %y) {
464 ; CHECK-LABEL: ldrbu32_4:
465 ; CHECK: @ %bb.0: @ %entry
466 ; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
467 ; CHECK-NEXT: vstrw.32 q0, [r1]
470 %z = getelementptr inbounds i8, i8* %x, i32 4
471 %0 = bitcast i8* %z to <4 x i8>*
472 %1 = load <4 x i8>, <4 x i8>* %0, align 1
473 %2 = zext <4 x i8> %1 to <4 x i32>
474 %3 = bitcast i8* %y to <4 x i32>*
475 store <4 x i32> %2, <4 x i32>* %3, align 4
479 define i8* @ldrbu32_3(i8* %x, i8* %y) {
480 ; CHECK-LABEL: ldrbu32_3:
481 ; CHECK: @ %bb.0: @ %entry
482 ; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
483 ; CHECK-NEXT: vstrw.32 q0, [r1]
486 %z = getelementptr inbounds i8, i8* %x, i32 3
487 %0 = bitcast i8* %z to <4 x i8>*
488 %1 = load <4 x i8>, <4 x i8>* %0, align 1
489 %2 = zext <4 x i8> %1 to <4 x i32>
490 %3 = bitcast i8* %y to <4 x i32>*
491 store <4 x i32> %2, <4 x i32>* %3, align 4
495 define i8* @ldrbu32_127(i8* %x, i8* %y) {
496 ; CHECK-LABEL: ldrbu32_127:
497 ; CHECK: @ %bb.0: @ %entry
498 ; CHECK-NEXT: add.w r2, r0, #127
499 ; CHECK-NEXT: vldrb.u32 q0, [r2]
500 ; CHECK-NEXT: vstrw.32 q0, [r1]
503 %z = getelementptr inbounds i8, i8* %x, i32 127
504 %0 = bitcast i8* %z to <4 x i8>*
505 %1 = load <4 x i8>, <4 x i8>* %0, align 1
506 %2 = zext <4 x i8> %1 to <4 x i32>
507 %3 = bitcast i8* %y to <4 x i32>*
508 store <4 x i32> %2, <4 x i32>* %3, align 4
512 define i8* @ldrbu32_128(i8* %x, i8* %y) {
513 ; CHECK-LABEL: ldrbu32_128:
514 ; CHECK: @ %bb.0: @ %entry
515 ; CHECK-NEXT: add.w r2, r0, #128
516 ; CHECK-NEXT: vldrb.u32 q0, [r2]
517 ; CHECK-NEXT: vstrw.32 q0, [r1]
520 %z = getelementptr inbounds i8, i8* %x, i32 128
521 %0 = bitcast i8* %z to <4 x i8>*
522 %1 = load <4 x i8>, <4 x i8>* %0, align 1
523 %2 = zext <4 x i8> %1 to <4 x i32>
524 %3 = bitcast i8* %y to <4 x i32>*
525 store <4 x i32> %2, <4 x i32>* %3, align 4
529 define i8* @ldrbu32_m127(i8* %x, i8* %y) {
530 ; CHECK-LABEL: ldrbu32_m127:
531 ; CHECK: @ %bb.0: @ %entry
532 ; CHECK-NEXT: sub.w r2, r0, #127
533 ; CHECK-NEXT: vldrb.u32 q0, [r2]
534 ; CHECK-NEXT: vstrw.32 q0, [r1]
537 %z = getelementptr inbounds i8, i8* %x, i32 -127
538 %0 = bitcast i8* %z to <4 x i8>*
539 %1 = load <4 x i8>, <4 x i8>* %0, align 1
540 %2 = zext <4 x i8> %1 to <4 x i32>
541 %3 = bitcast i8* %y to <4 x i32>*
542 store <4 x i32> %2, <4 x i32>* %3, align 4
546 define i8* @ldrbu32_m128(i8* %x, i8* %y) {
547 ; CHECK-LABEL: ldrbu32_m128:
548 ; CHECK: @ %bb.0: @ %entry
549 ; CHECK-NEXT: sub.w r2, r0, #128
550 ; CHECK-NEXT: vldrb.u32 q0, [r2]
551 ; CHECK-NEXT: vstrw.32 q0, [r1]
554 %z = getelementptr inbounds i8, i8* %x, i32 -128
555 %0 = bitcast i8* %z to <4 x i8>*
556 %1 = load <4 x i8>, <4 x i8>* %0, align 1
557 %2 = zext <4 x i8> %1 to <4 x i32>
558 %3 = bitcast i8* %y to <4 x i32>*
559 store <4 x i32> %2, <4 x i32>* %3, align 4
564 define i8* @ldrbs32_4(i8* %x, i8* %y) {
565 ; CHECK-LABEL: ldrbs32_4:
566 ; CHECK: @ %bb.0: @ %entry
567 ; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
568 ; CHECK-NEXT: vstrw.32 q0, [r1]
571 %z = getelementptr inbounds i8, i8* %x, i32 4
572 %0 = bitcast i8* %z to <4 x i8>*
573 %1 = load <4 x i8>, <4 x i8>* %0, align 1
574 %2 = sext <4 x i8> %1 to <4 x i32>
575 %3 = bitcast i8* %y to <4 x i32>*
576 store <4 x i32> %2, <4 x i32>* %3, align 4
580 define i8* @ldrbs32_3(i8* %x, i8* %y) {
581 ; CHECK-LABEL: ldrbs32_3:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
584 ; CHECK-NEXT: vstrw.32 q0, [r1]
587 %z = getelementptr inbounds i8, i8* %x, i32 3
588 %0 = bitcast i8* %z to <4 x i8>*
589 %1 = load <4 x i8>, <4 x i8>* %0, align 1
590 %2 = sext <4 x i8> %1 to <4 x i32>
591 %3 = bitcast i8* %y to <4 x i32>*
592 store <4 x i32> %2, <4 x i32>* %3, align 4
596 define i8* @ldrbs32_127(i8* %x, i8* %y) {
597 ; CHECK-LABEL: ldrbs32_127:
598 ; CHECK: @ %bb.0: @ %entry
599 ; CHECK-NEXT: add.w r2, r0, #127
600 ; CHECK-NEXT: vldrb.s32 q0, [r2]
601 ; CHECK-NEXT: vstrw.32 q0, [r1]
604 %z = getelementptr inbounds i8, i8* %x, i32 127
605 %0 = bitcast i8* %z to <4 x i8>*
606 %1 = load <4 x i8>, <4 x i8>* %0, align 1
607 %2 = sext <4 x i8> %1 to <4 x i32>
608 %3 = bitcast i8* %y to <4 x i32>*
609 store <4 x i32> %2, <4 x i32>* %3, align 4
613 define i8* @ldrbs32_128(i8* %x, i8* %y) {
614 ; CHECK-LABEL: ldrbs32_128:
615 ; CHECK: @ %bb.0: @ %entry
616 ; CHECK-NEXT: add.w r2, r0, #128
617 ; CHECK-NEXT: vldrb.s32 q0, [r2]
618 ; CHECK-NEXT: vstrw.32 q0, [r1]
621 %z = getelementptr inbounds i8, i8* %x, i32 128
622 %0 = bitcast i8* %z to <4 x i8>*
623 %1 = load <4 x i8>, <4 x i8>* %0, align 1
624 %2 = sext <4 x i8> %1 to <4 x i32>
625 %3 = bitcast i8* %y to <4 x i32>*
626 store <4 x i32> %2, <4 x i32>* %3, align 4
630 define i8* @ldrbs32_m127(i8* %x, i8* %y) {
631 ; CHECK-LABEL: ldrbs32_m127:
632 ; CHECK: @ %bb.0: @ %entry
633 ; CHECK-NEXT: sub.w r2, r0, #127
634 ; CHECK-NEXT: vldrb.s32 q0, [r2]
635 ; CHECK-NEXT: vstrw.32 q0, [r1]
638 %z = getelementptr inbounds i8, i8* %x, i32 -127
639 %0 = bitcast i8* %z to <4 x i8>*
640 %1 = load <4 x i8>, <4 x i8>* %0, align 1
641 %2 = sext <4 x i8> %1 to <4 x i32>
642 %3 = bitcast i8* %y to <4 x i32>*
643 store <4 x i32> %2, <4 x i32>* %3, align 4
647 define i8* @ldrbs32_m128(i8* %x, i8* %y) {
648 ; CHECK-LABEL: ldrbs32_m128:
649 ; CHECK: @ %bb.0: @ %entry
650 ; CHECK-NEXT: sub.w r2, r0, #128
651 ; CHECK-NEXT: vldrb.s32 q0, [r2]
652 ; CHECK-NEXT: vstrw.32 q0, [r1]
655 %z = getelementptr inbounds i8, i8* %x, i32 -128
656 %0 = bitcast i8* %z to <4 x i8>*
657 %1 = load <4 x i8>, <4 x i8>* %0, align 1
658 %2 = sext <4 x i8> %1 to <4 x i32>
659 %3 = bitcast i8* %y to <4 x i32>*
660 store <4 x i32> %2, <4 x i32>* %3, align 4
665 define i8* @ldrbu16_4(i8* %x, i8* %y) {
666 ; CHECK-LABEL: ldrbu16_4:
667 ; CHECK: @ %bb.0: @ %entry
668 ; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
669 ; CHECK-NEXT: vstrh.16 q0, [r1]
672 %z = getelementptr inbounds i8, i8* %x, i32 4
673 %0 = bitcast i8* %z to <8 x i8>*
674 %1 = load <8 x i8>, <8 x i8>* %0, align 1
675 %2 = zext <8 x i8> %1 to <8 x i16>
676 %3 = bitcast i8* %y to <8 x i16>*
677 store <8 x i16> %2, <8 x i16>* %3, align 2
681 define i8* @ldrbu16_3(i8* %x, i8* %y) {
682 ; CHECK-LABEL: ldrbu16_3:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
685 ; CHECK-NEXT: vstrh.16 q0, [r1]
688 %z = getelementptr inbounds i8, i8* %x, i32 3
689 %0 = bitcast i8* %z to <8 x i8>*
690 %1 = load <8 x i8>, <8 x i8>* %0, align 1
691 %2 = zext <8 x i8> %1 to <8 x i16>
692 %3 = bitcast i8* %y to <8 x i16>*
693 store <8 x i16> %2, <8 x i16>* %3, align 2
697 define i8* @ldrbu16_127(i8* %x, i8* %y) {
698 ; CHECK-LABEL: ldrbu16_127:
699 ; CHECK: @ %bb.0: @ %entry
700 ; CHECK-NEXT: add.w r2, r0, #127
701 ; CHECK-NEXT: vldrb.u16 q0, [r2]
702 ; CHECK-NEXT: vstrh.16 q0, [r1]
705 %z = getelementptr inbounds i8, i8* %x, i32 127
706 %0 = bitcast i8* %z to <8 x i8>*
707 %1 = load <8 x i8>, <8 x i8>* %0, align 1
708 %2 = zext <8 x i8> %1 to <8 x i16>
709 %3 = bitcast i8* %y to <8 x i16>*
710 store <8 x i16> %2, <8 x i16>* %3, align 2
714 define i8* @ldrbu16_128(i8* %x, i8* %y) {
715 ; CHECK-LABEL: ldrbu16_128:
716 ; CHECK: @ %bb.0: @ %entry
717 ; CHECK-NEXT: add.w r2, r0, #128
718 ; CHECK-NEXT: vldrb.u16 q0, [r2]
719 ; CHECK-NEXT: vstrh.16 q0, [r1]
722 %z = getelementptr inbounds i8, i8* %x, i32 128
723 %0 = bitcast i8* %z to <8 x i8>*
724 %1 = load <8 x i8>, <8 x i8>* %0, align 1
725 %2 = zext <8 x i8> %1 to <8 x i16>
726 %3 = bitcast i8* %y to <8 x i16>*
727 store <8 x i16> %2, <8 x i16>* %3, align 2
731 define i8* @ldrbu16_m127(i8* %x, i8* %y) {
732 ; CHECK-LABEL: ldrbu16_m127:
733 ; CHECK: @ %bb.0: @ %entry
734 ; CHECK-NEXT: sub.w r2, r0, #127
735 ; CHECK-NEXT: vldrb.u16 q0, [r2]
736 ; CHECK-NEXT: vstrh.16 q0, [r1]
739 %z = getelementptr inbounds i8, i8* %x, i32 -127
740 %0 = bitcast i8* %z to <8 x i8>*
741 %1 = load <8 x i8>, <8 x i8>* %0, align 1
742 %2 = zext <8 x i8> %1 to <8 x i16>
743 %3 = bitcast i8* %y to <8 x i16>*
744 store <8 x i16> %2, <8 x i16>* %3, align 2
748 define i8* @ldrbu16_m128(i8* %x, i8* %y) {
749 ; CHECK-LABEL: ldrbu16_m128:
750 ; CHECK: @ %bb.0: @ %entry
751 ; CHECK-NEXT: sub.w r2, r0, #128
752 ; CHECK-NEXT: vldrb.u16 q0, [r2]
753 ; CHECK-NEXT: vstrh.16 q0, [r1]
756 %z = getelementptr inbounds i8, i8* %x, i32 -128
757 %0 = bitcast i8* %z to <8 x i8>*
758 %1 = load <8 x i8>, <8 x i8>* %0, align 1
759 %2 = zext <8 x i8> %1 to <8 x i16>
760 %3 = bitcast i8* %y to <8 x i16>*
761 store <8 x i16> %2, <8 x i16>* %3, align 2
766 define i8* @ldrbs16_4(i8* %x, i8* %y) {
767 ; CHECK-LABEL: ldrbs16_4:
768 ; CHECK: @ %bb.0: @ %entry
769 ; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
770 ; CHECK-NEXT: vstrh.16 q0, [r1]
773 %z = getelementptr inbounds i8, i8* %x, i32 4
774 %0 = bitcast i8* %z to <8 x i8>*
775 %1 = load <8 x i8>, <8 x i8>* %0, align 1
776 %2 = sext <8 x i8> %1 to <8 x i16>
777 %3 = bitcast i8* %y to <8 x i16>*
778 store <8 x i16> %2, <8 x i16>* %3, align 2
782 define i8* @ldrbs16_3(i8* %x, i8* %y) {
783 ; CHECK-LABEL: ldrbs16_3:
784 ; CHECK: @ %bb.0: @ %entry
785 ; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
786 ; CHECK-NEXT: vstrh.16 q0, [r1]
789 %z = getelementptr inbounds i8, i8* %x, i32 3
790 %0 = bitcast i8* %z to <8 x i8>*
791 %1 = load <8 x i8>, <8 x i8>* %0, align 1
792 %2 = sext <8 x i8> %1 to <8 x i16>
793 %3 = bitcast i8* %y to <8 x i16>*
794 store <8 x i16> %2, <8 x i16>* %3, align 2
798 define i8* @ldrbs16_127(i8* %x, i8* %y) {
799 ; CHECK-LABEL: ldrbs16_127:
800 ; CHECK: @ %bb.0: @ %entry
801 ; CHECK-NEXT: add.w r2, r0, #127
802 ; CHECK-NEXT: vldrb.s16 q0, [r2]
803 ; CHECK-NEXT: vstrh.16 q0, [r1]
806 %z = getelementptr inbounds i8, i8* %x, i32 127
807 %0 = bitcast i8* %z to <8 x i8>*
808 %1 = load <8 x i8>, <8 x i8>* %0, align 1
809 %2 = sext <8 x i8> %1 to <8 x i16>
810 %3 = bitcast i8* %y to <8 x i16>*
811 store <8 x i16> %2, <8 x i16>* %3, align 2
815 define i8* @ldrbs16_128(i8* %x, i8* %y) {
816 ; CHECK-LABEL: ldrbs16_128:
817 ; CHECK: @ %bb.0: @ %entry
818 ; CHECK-NEXT: add.w r2, r0, #128
819 ; CHECK-NEXT: vldrb.s16 q0, [r2]
820 ; CHECK-NEXT: vstrh.16 q0, [r1]
823 %z = getelementptr inbounds i8, i8* %x, i32 128
824 %0 = bitcast i8* %z to <8 x i8>*
825 %1 = load <8 x i8>, <8 x i8>* %0, align 1
826 %2 = sext <8 x i8> %1 to <8 x i16>
827 %3 = bitcast i8* %y to <8 x i16>*
828 store <8 x i16> %2, <8 x i16>* %3, align 2
832 define i8* @ldrbs16_m127(i8* %x, i8* %y) {
833 ; CHECK-LABEL: ldrbs16_m127:
834 ; CHECK: @ %bb.0: @ %entry
835 ; CHECK-NEXT: sub.w r2, r0, #127
836 ; CHECK-NEXT: vldrb.s16 q0, [r2]
837 ; CHECK-NEXT: vstrh.16 q0, [r1]
840 %z = getelementptr inbounds i8, i8* %x, i32 -127
841 %0 = bitcast i8* %z to <8 x i8>*
842 %1 = load <8 x i8>, <8 x i8>* %0, align 1
843 %2 = sext <8 x i8> %1 to <8 x i16>
844 %3 = bitcast i8* %y to <8 x i16>*
845 store <8 x i16> %2, <8 x i16>* %3, align 2
849 define i8* @ldrbs16_m128(i8* %x, i8* %y) {
850 ; CHECK-LABEL: ldrbs16_m128:
851 ; CHECK: @ %bb.0: @ %entry
852 ; CHECK-NEXT: sub.w r2, r0, #128
853 ; CHECK-NEXT: vldrb.s16 q0, [r2]
854 ; CHECK-NEXT: vstrh.16 q0, [r1]
857 %z = getelementptr inbounds i8, i8* %x, i32 -128
858 %0 = bitcast i8* %z to <8 x i8>*
859 %1 = load <8 x i8>, <8 x i8>* %0, align 1
860 %2 = sext <8 x i8> %1 to <8 x i16>
861 %3 = bitcast i8* %y to <8 x i16>*
862 store <8 x i16> %2, <8 x i16>* %3, align 2
867 define i8* @ldrbu8_4(i8* %x, i8* %y) {
868 ; CHECK-LABEL: ldrbu8_4:
869 ; CHECK: @ %bb.0: @ %entry
870 ; CHECK-NEXT: vldrb.u8 q0, [r0, #4]
871 ; CHECK-NEXT: vstrb.8 q0, [r1]
874 %z = getelementptr inbounds i8, i8* %x, i32 4
875 %0 = bitcast i8* %z to <16 x i8>*
876 %1 = load <16 x i8>, <16 x i8>* %0, align 1
877 %2 = bitcast i8* %y to <16 x i8>*
878 store <16 x i8> %1, <16 x i8>* %2, align 1
882 define i8* @ldrbu8_3(i8* %x, i8* %y) {
883 ; CHECK-LABEL: ldrbu8_3:
884 ; CHECK: @ %bb.0: @ %entry
885 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
886 ; CHECK-NEXT: vstrb.8 q0, [r1]
889 %z = getelementptr inbounds i8, i8* %x, i32 3
890 %0 = bitcast i8* %z to <16 x i8>*
891 %1 = load <16 x i8>, <16 x i8>* %0, align 1
892 %2 = bitcast i8* %y to <16 x i8>*
893 store <16 x i8> %1, <16 x i8>* %2, align 1
897 define i8* @ldrbu8_127(i8* %x, i8* %y) {
898 ; CHECK-LABEL: ldrbu8_127:
899 ; CHECK: @ %bb.0: @ %entry
900 ; CHECK-NEXT: add.w r2, r0, #127
901 ; CHECK-NEXT: vldrb.u8 q0, [r2]
902 ; CHECK-NEXT: vstrb.8 q0, [r1]
905 %z = getelementptr inbounds i8, i8* %x, i32 127
906 %0 = bitcast i8* %z to <16 x i8>*
907 %1 = load <16 x i8>, <16 x i8>* %0, align 1
908 %2 = bitcast i8* %y to <16 x i8>*
909 store <16 x i8> %1, <16 x i8>* %2, align 1
913 define i8* @ldrbu8_128(i8* %x, i8* %y) {
914 ; CHECK-LABEL: ldrbu8_128:
915 ; CHECK: @ %bb.0: @ %entry
916 ; CHECK-NEXT: add.w r2, r0, #128
917 ; CHECK-NEXT: vldrb.u8 q0, [r2]
918 ; CHECK-NEXT: vstrb.8 q0, [r1]
921 %z = getelementptr inbounds i8, i8* %x, i32 128
922 %0 = bitcast i8* %z to <16 x i8>*
923 %1 = load <16 x i8>, <16 x i8>* %0, align 1
924 %2 = bitcast i8* %y to <16 x i8>*
925 store <16 x i8> %1, <16 x i8>* %2, align 1
929 define i8* @ldrbu8_m127(i8* %x, i8* %y) {
930 ; CHECK-LABEL: ldrbu8_m127:
931 ; CHECK: @ %bb.0: @ %entry
932 ; CHECK-NEXT: sub.w r2, r0, #127
933 ; CHECK-NEXT: vldrb.u8 q0, [r2]
934 ; CHECK-NEXT: vstrb.8 q0, [r1]
937 %z = getelementptr inbounds i8, i8* %x, i32 -127
938 %0 = bitcast i8* %z to <16 x i8>*
939 %1 = load <16 x i8>, <16 x i8>* %0, align 1
940 %2 = bitcast i8* %y to <16 x i8>*
941 store <16 x i8> %1, <16 x i8>* %2, align 1
945 define i8* @ldrbu8_m128(i8* %x, i8* %y) {
946 ; CHECK-LABEL: ldrbu8_m128:
947 ; CHECK: @ %bb.0: @ %entry
948 ; CHECK-NEXT: sub.w r2, r0, #128
949 ; CHECK-NEXT: vldrb.u8 q0, [r2]
950 ; CHECK-NEXT: vstrb.8 q0, [r1]
953 %z = getelementptr inbounds i8, i8* %x, i32 -128
954 %0 = bitcast i8* %z to <16 x i8>*
955 %1 = load <16 x i8>, <16 x i8>* %0, align 1
956 %2 = bitcast i8* %y to <16 x i8>*
957 store <16 x i8> %1, <16 x i8>* %2, align 1
962 define i8* @ldrwf32_4(i8* %x, i8* %y) {
963 ; CHECK-LABEL: ldrwf32_4:
964 ; CHECK: @ %bb.0: @ %entry
965 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
966 ; CHECK-NEXT: vstrw.32 q0, [r1]
969 %z = getelementptr inbounds i8, i8* %x, i32 4
970 %0 = bitcast i8* %z to <4 x float>*
971 %1 = load <4 x float>, <4 x float>* %0, align 4
972 %2 = bitcast i8* %y to <4 x float>*
973 store <4 x float> %1, <4 x float>* %2, align 4
977 define i8* @ldrwf16_4(i8* %x, i8* %y) {
978 ; CHECK-LABEL: ldrwf16_4:
979 ; CHECK: @ %bb.0: @ %entry
980 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
981 ; CHECK-NEXT: vstrh.16 q0, [r1]
984 %z = getelementptr inbounds i8, i8* %x, i32 4
985 %0 = bitcast i8* %z to <8 x half>*
986 %1 = load <8 x half>, <8 x half>* %0, align 2
987 %2 = bitcast i8* %y to <8 x half>*
988 store <8 x half> %1, <8 x half>* %2, align 2
992 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
993 ; CHECK-LE-LABEL: ldrwi32_align1:
994 ; CHECK-LE: @ %bb.0: @ %entry
995 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
996 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
997 ; CHECK-LE-NEXT: bx lr
999 ; CHECK-BE-LABEL: ldrwi32_align1:
1000 ; CHECK-BE: @ %bb.0: @ %entry
1001 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1002 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1003 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
1004 ; CHECK-BE-NEXT: bx lr
1006 %z = getelementptr inbounds i8, i8* %x, i32 3
1007 %0 = bitcast i8* %z to <4 x i32>*
1008 %1 = load <4 x i32>, <4 x i32>* %0, align 1
1009 %2 = bitcast i8* %y to <4 x i32>*
1010 store <4 x i32> %1, <4 x i32>* %2, align 4
1014 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
1015 ; CHECK-LE-LABEL: ldrhi16_align1:
1016 ; CHECK-LE: @ %bb.0: @ %entry
1017 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1018 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1019 ; CHECK-LE-NEXT: bx lr
1021 ; CHECK-BE-LABEL: ldrhi16_align1:
1022 ; CHECK-BE: @ %bb.0: @ %entry
1023 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1024 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1025 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1026 ; CHECK-BE-NEXT: bx lr
1028 %z = getelementptr inbounds i8, i8* %x, i32 3
1029 %0 = bitcast i8* %z to <8 x i16>*
1030 %1 = load <8 x i16>, <8 x i16>* %0, align 1
1031 %2 = bitcast i8* %y to <8 x i16>*
1032 store <8 x i16> %1, <8 x i16>* %2, align 2
1036 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1037 ; CHECK-LABEL: ldrhi32_align1:
1038 ; CHECK: @ %bb.0: @ %entry
1039 ; CHECK-NEXT: .pad #8
1040 ; CHECK-NEXT: sub sp, #8
1041 ; CHECK-NEXT: ldr.w r3, [r0, #7]
1042 ; CHECK-NEXT: ldr.w r2, [r0, #3]
1043 ; CHECK-NEXT: strd r2, r3, [sp]
1044 ; CHECK-NEXT: mov r2, sp
1045 ; CHECK-NEXT: vldrh.s32 q0, [r2]
1046 ; CHECK-NEXT: vstrw.32 q0, [r1]
1047 ; CHECK-NEXT: add sp, #8
1050 %z = getelementptr inbounds i8, i8* %x, i32 3
1051 %0 = bitcast i8* %z to <4 x i16>*
1052 %1 = load <4 x i16>, <4 x i16>* %0, align 1
1053 %2 = bitcast i8* %y to <4 x i32>*
1054 %3 = sext <4 x i16> %1 to <4 x i32>
1055 store <4 x i32> %3, <4 x i32>* %2, align 4
1059 define i8* @ldrf32_align1(i8* %x, i8* %y) {
1060 ; CHECK-LE-LABEL: ldrf32_align1:
1061 ; CHECK-LE: @ %bb.0: @ %entry
1062 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1063 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
1064 ; CHECK-LE-NEXT: bx lr
1066 ; CHECK-BE-LABEL: ldrf32_align1:
1067 ; CHECK-BE: @ %bb.0: @ %entry
1068 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1069 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1070 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
1071 ; CHECK-BE-NEXT: bx lr
1073 %z = getelementptr inbounds i8, i8* %x, i32 3
1074 %0 = bitcast i8* %z to <4 x float>*
1075 %1 = load <4 x float>, <4 x float>* %0, align 1
1076 %2 = bitcast i8* %y to <4 x float>*
1077 store <4 x float> %1, <4 x float>* %2, align 4
1081 define i8* @ldrf16_align1(i8* %x, i8* %y) {
1082 ; CHECK-LE-LABEL: ldrf16_align1:
1083 ; CHECK-LE: @ %bb.0: @ %entry
1084 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1085 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1086 ; CHECK-LE-NEXT: bx lr
1088 ; CHECK-BE-LABEL: ldrf16_align1:
1089 ; CHECK-BE: @ %bb.0: @ %entry
1090 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1091 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1092 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1093 ; CHECK-BE-NEXT: bx lr
1095 %z = getelementptr inbounds i8, i8* %x, i32 3
1096 %0 = bitcast i8* %z to <8 x half>*
1097 %1 = load <8 x half>, <8 x half>* %0, align 1
1098 %2 = bitcast i8* %y to <8 x half>*
1099 store <8 x half> %1, <8 x half>* %2, align 2
1103 define i8* @ldrh16_align8(i8* %x, i8* %y) {
1104 ; CHECK-LE-LABEL: ldrh16_align8:
1105 ; CHECK-LE: @ %bb.0: @ %entry
1106 ; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]
1107 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1108 ; CHECK-LE-NEXT: bx lr
1110 ; CHECK-BE-LABEL: ldrh16_align8:
1111 ; CHECK-BE: @ %bb.0: @ %entry
1112 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]
1113 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1114 ; CHECK-BE-NEXT: bx lr
1116 %z = getelementptr inbounds i8, i8* %x, i32 4
1117 %0 = bitcast i8* %z to <8 x i16>*
1118 %1 = load <8 x i16>, <8 x i16>* %0, align 8
1119 %2 = bitcast i8* %y to <8 x i16>*
1120 store <8 x i16> %1, <8 x i16>* %2, align 2
1128 define i8* @strw32_4(i8* %y, i8* %x) {
1129 ; CHECK-LABEL: strw32_4:
1130 ; CHECK: @ %bb.0: @ %entry
1131 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1132 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1135 %z = getelementptr inbounds i8, i8* %y, i32 4
1136 %0 = bitcast i8* %x to <4 x i32>*
1137 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1138 %2 = bitcast i8* %z to <4 x i32>*
1139 store <4 x i32> %1, <4 x i32>* %2, align 4
1143 define i8* @strw32_3(i8* %y, i8* %x) {
1144 ; CHECK-LABEL: strw32_3:
1145 ; CHECK: @ %bb.0: @ %entry
1146 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1147 ; CHECK-NEXT: adds r1, r0, #3
1148 ; CHECK-NEXT: vstrw.32 q0, [r1]
1151 %z = getelementptr inbounds i8, i8* %y, i32 3
1152 %0 = bitcast i8* %x to <4 x i32>*
1153 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1154 %2 = bitcast i8* %z to <4 x i32>*
1155 store <4 x i32> %1, <4 x i32>* %2, align 4
1159 define i8* @strw32_m4(i8* %y, i8* %x) {
1160 ; CHECK-LABEL: strw32_m4:
1161 ; CHECK: @ %bb.0: @ %entry
1162 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1163 ; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
1166 %z = getelementptr inbounds i8, i8* %y, i32 -4
1167 %0 = bitcast i8* %x to <4 x i32>*
1168 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1169 %2 = bitcast i8* %z to <4 x i32>*
1170 store <4 x i32> %1, <4 x i32>* %2, align 4
1174 define i8* @strw32_508(i8* %y, i8* %x) {
1175 ; CHECK-LABEL: strw32_508:
1176 ; CHECK: @ %bb.0: @ %entry
1177 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1178 ; CHECK-NEXT: add.w r1, r0, #508
1179 ; CHECK-NEXT: vstrw.32 q0, [r1]
1182 %z = getelementptr inbounds i8, i8* %y, i32 508
1183 %0 = bitcast i8* %x to <4 x i32>*
1184 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1185 %2 = bitcast i8* %z to <4 x i32>*
1186 store <4 x i32> %1, <4 x i32>* %2, align 4
1190 define i8* @strw32_512(i8* %y, i8* %x) {
1191 ; CHECK-LABEL: strw32_512:
1192 ; CHECK: @ %bb.0: @ %entry
1193 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1194 ; CHECK-NEXT: add.w r1, r0, #512
1195 ; CHECK-NEXT: vstrw.32 q0, [r1]
1198 %z = getelementptr inbounds i8, i8* %y, i32 512
1199 %0 = bitcast i8* %x to <4 x i32>*
1200 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1201 %2 = bitcast i8* %z to <4 x i32>*
1202 store <4 x i32> %1, <4 x i32>* %2, align 4
1206 define i8* @strw32_m508(i8* %y, i8* %x) {
1207 ; CHECK-LABEL: strw32_m508:
1208 ; CHECK: @ %bb.0: @ %entry
1209 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1210 ; CHECK-NEXT: sub.w r1, r0, #508
1211 ; CHECK-NEXT: vstrw.32 q0, [r1]
1214 %z = getelementptr inbounds i8, i8* %y, i32 -508
1215 %0 = bitcast i8* %x to <4 x i32>*
1216 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1217 %2 = bitcast i8* %z to <4 x i32>*
1218 store <4 x i32> %1, <4 x i32>* %2, align 4
1222 define i8* @strw32_m512(i8* %y, i8* %x) {
1223 ; CHECK-LABEL: strw32_m512:
1224 ; CHECK: @ %bb.0: @ %entry
1225 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1226 ; CHECK-NEXT: sub.w r1, r0, #512
1227 ; CHECK-NEXT: vstrw.32 q0, [r1]
1230 %z = getelementptr inbounds i8, i8* %y, i32 -512
1231 %0 = bitcast i8* %x to <4 x i32>*
1232 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1233 %2 = bitcast i8* %z to <4 x i32>*
1234 store <4 x i32> %1, <4 x i32>* %2, align 4
1239 define i8* @strh32_4(i8* %y, i8* %x) {
1240 ; CHECK-LABEL: strh32_4:
1241 ; CHECK: @ %bb.0: @ %entry
1242 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1243 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]
1246 %z = getelementptr inbounds i8, i8* %y, i32 4
1247 %0 = bitcast i8* %x to <4 x i16>*
1248 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1249 %2 = bitcast i8* %z to <4 x i16>*
1250 store <4 x i16> %1, <4 x i16>* %2, align 2
1254 define i8* @strh32_3(i8* %y, i8* %x) {
1255 ; CHECK-LABEL: strh32_3:
1256 ; CHECK: @ %bb.0: @ %entry
1257 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1258 ; CHECK-NEXT: adds r1, r0, #3
1259 ; CHECK-NEXT: vstrh.32 q0, [r1]
1262 %z = getelementptr inbounds i8, i8* %y, i32 3
1263 %0 = bitcast i8* %x to <4 x i16>*
1264 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1265 %2 = bitcast i8* %z to <4 x i16>*
1266 store <4 x i16> %1, <4 x i16>* %2, align 2
1270 define i8* @strh32_2(i8* %y, i8* %x) {
1271 ; CHECK-LABEL: strh32_2:
1272 ; CHECK: @ %bb.0: @ %entry
1273 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1274 ; CHECK-NEXT: vstrh.32 q0, [r0, #2]
1277 %z = getelementptr inbounds i8, i8* %y, i32 2
1278 %0 = bitcast i8* %x to <4 x i16>*
1279 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1280 %2 = bitcast i8* %z to <4 x i16>*
1281 store <4 x i16> %1, <4 x i16>* %2, align 2
1285 define i8* @strh32_254(i8* %y, i8* %x) {
1286 ; CHECK-LABEL: strh32_254:
1287 ; CHECK: @ %bb.0: @ %entry
1288 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1289 ; CHECK-NEXT: add.w r1, r0, #254
1290 ; CHECK-NEXT: vstrh.32 q0, [r1]
1293 %z = getelementptr inbounds i8, i8* %y, i32 254
1294 %0 = bitcast i8* %x to <4 x i16>*
1295 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1296 %2 = bitcast i8* %z to <4 x i16>*
1297 store <4 x i16> %1, <4 x i16>* %2, align 2
1301 define i8* @strh32_256(i8* %y, i8* %x) {
1302 ; CHECK-LABEL: strh32_256:
1303 ; CHECK: @ %bb.0: @ %entry
1304 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1305 ; CHECK-NEXT: add.w r1, r0, #256
1306 ; CHECK-NEXT: vstrh.32 q0, [r1]
1309 %z = getelementptr inbounds i8, i8* %y, i32 256
1310 %0 = bitcast i8* %x to <4 x i16>*
1311 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1312 %2 = bitcast i8* %z to <4 x i16>*
1313 store <4 x i16> %1, <4 x i16>* %2, align 2
1317 define i8* @strh32_m254(i8* %y, i8* %x) {
1318 ; CHECK-LABEL: strh32_m254:
1319 ; CHECK: @ %bb.0: @ %entry
1320 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1321 ; CHECK-NEXT: sub.w r1, r0, #254
1322 ; CHECK-NEXT: vstrh.32 q0, [r1]
1325 %z = getelementptr inbounds i8, i8* %y, i32 -254
1326 %0 = bitcast i8* %x to <4 x i16>*
1327 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1328 %2 = bitcast i8* %z to <4 x i16>*
1329 store <4 x i16> %1, <4 x i16>* %2, align 2
1333 define i8* @strh32_m256(i8* %y, i8* %x) {
1334 ; CHECK-LABEL: strh32_m256:
1335 ; CHECK: @ %bb.0: @ %entry
1336 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1337 ; CHECK-NEXT: sub.w r1, r0, #256
1338 ; CHECK-NEXT: vstrh.32 q0, [r1]
1341 %z = getelementptr inbounds i8, i8* %y, i32 -256
1342 %0 = bitcast i8* %x to <4 x i16>*
1343 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1344 %2 = bitcast i8* %z to <4 x i16>*
1345 store <4 x i16> %1, <4 x i16>* %2, align 2
1350 define i8* @strh16_4(i8* %y, i8* %x) {
1351 ; CHECK-LABEL: strh16_4:
1352 ; CHECK: @ %bb.0: @ %entry
1353 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1354 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1357 %z = getelementptr inbounds i8, i8* %y, i32 4
1358 %0 = bitcast i8* %x to <8 x i16>*
1359 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1360 %2 = bitcast i8* %z to <8 x i16>*
1361 store <8 x i16> %1, <8 x i16>* %2, align 2
1365 define i8* @strh16_3(i8* %y, i8* %x) {
1366 ; CHECK-LABEL: strh16_3:
1367 ; CHECK: @ %bb.0: @ %entry
1368 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1369 ; CHECK-NEXT: adds r1, r0, #3
1370 ; CHECK-NEXT: vstrh.16 q0, [r1]
1373 %z = getelementptr inbounds i8, i8* %y, i32 3
1374 %0 = bitcast i8* %x to <8 x i16>*
1375 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1376 %2 = bitcast i8* %z to <8 x i16>*
1377 store <8 x i16> %1, <8 x i16>* %2, align 2
1381 define i8* @strh16_2(i8* %y, i8* %x) {
1382 ; CHECK-LABEL: strh16_2:
1383 ; CHECK: @ %bb.0: @ %entry
1384 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1385 ; CHECK-NEXT: vstrh.16 q0, [r0, #2]
1388 %z = getelementptr inbounds i8, i8* %y, i32 2
1389 %0 = bitcast i8* %x to <8 x i16>*
1390 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1391 %2 = bitcast i8* %z to <8 x i16>*
1392 store <8 x i16> %1, <8 x i16>* %2, align 2
1396 define i8* @strh16_254(i8* %y, i8* %x) {
1397 ; CHECK-LABEL: strh16_254:
1398 ; CHECK: @ %bb.0: @ %entry
1399 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1400 ; CHECK-NEXT: add.w r1, r0, #254
1401 ; CHECK-NEXT: vstrh.16 q0, [r1]
1404 %z = getelementptr inbounds i8, i8* %y, i32 254
1405 %0 = bitcast i8* %x to <8 x i16>*
1406 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1407 %2 = bitcast i8* %z to <8 x i16>*
1408 store <8 x i16> %1, <8 x i16>* %2, align 2
1412 define i8* @strh16_256(i8* %y, i8* %x) {
1413 ; CHECK-LABEL: strh16_256:
1414 ; CHECK: @ %bb.0: @ %entry
1415 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1416 ; CHECK-NEXT: add.w r1, r0, #256
1417 ; CHECK-NEXT: vstrh.16 q0, [r1]
1420 %z = getelementptr inbounds i8, i8* %y, i32 256
1421 %0 = bitcast i8* %x to <8 x i16>*
1422 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1423 %2 = bitcast i8* %z to <8 x i16>*
1424 store <8 x i16> %1, <8 x i16>* %2, align 2
1428 define i8* @strh16_m254(i8* %y, i8* %x) {
1429 ; CHECK-LABEL: strh16_m254:
1430 ; CHECK: @ %bb.0: @ %entry
1431 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1432 ; CHECK-NEXT: sub.w r1, r0, #254
1433 ; CHECK-NEXT: vstrh.16 q0, [r1]
1436 %z = getelementptr inbounds i8, i8* %y, i32 -254
1437 %0 = bitcast i8* %x to <8 x i16>*
1438 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1439 %2 = bitcast i8* %z to <8 x i16>*
1440 store <8 x i16> %1, <8 x i16>* %2, align 2
1444 define i8* @strh16_m256(i8* %y, i8* %x) {
1445 ; CHECK-LABEL: strh16_m256:
1446 ; CHECK: @ %bb.0: @ %entry
1447 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1448 ; CHECK-NEXT: sub.w r1, r0, #256
1449 ; CHECK-NEXT: vstrh.16 q0, [r1]
1452 %z = getelementptr inbounds i8, i8* %y, i32 -256
1453 %0 = bitcast i8* %x to <8 x i16>*
1454 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1455 %2 = bitcast i8* %z to <8 x i16>*
1456 store <8 x i16> %1, <8 x i16>* %2, align 2
1461 define i8* @strb32_4(i8* %y, i8* %x) {
1462 ; CHECK-LABEL: strb32_4:
1463 ; CHECK: @ %bb.0: @ %entry
1464 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1465 ; CHECK-NEXT: vstrb.32 q0, [r0, #4]
1468 %z = getelementptr inbounds i8, i8* %y, i32 4
1469 %0 = bitcast i8* %x to <4 x i8>*
1470 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1471 %2 = bitcast i8* %z to <4 x i8>*
1472 store <4 x i8> %1, <4 x i8>* %2, align 1
1476 define i8* @strb32_3(i8* %y, i8* %x) {
1477 ; CHECK-LABEL: strb32_3:
1478 ; CHECK: @ %bb.0: @ %entry
1479 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1480 ; CHECK-NEXT: vstrb.32 q0, [r0, #3]
1483 %z = getelementptr inbounds i8, i8* %y, i32 3
1484 %0 = bitcast i8* %x to <4 x i8>*
1485 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1486 %2 = bitcast i8* %z to <4 x i8>*
1487 store <4 x i8> %1, <4 x i8>* %2, align 1
1491 define i8* @strb32_127(i8* %y, i8* %x) {
1492 ; CHECK-LABEL: strb32_127:
1493 ; CHECK: @ %bb.0: @ %entry
1494 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1495 ; CHECK-NEXT: add.w r1, r0, #127
1496 ; CHECK-NEXT: vstrb.32 q0, [r1]
1499 %z = getelementptr inbounds i8, i8* %y, i32 127
1500 %0 = bitcast i8* %x to <4 x i8>*
1501 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1502 %2 = bitcast i8* %z to <4 x i8>*
1503 store <4 x i8> %1, <4 x i8>* %2, align 1
1507 define i8* @strb32_128(i8* %y, i8* %x) {
1508 ; CHECK-LABEL: strb32_128:
1509 ; CHECK: @ %bb.0: @ %entry
1510 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1511 ; CHECK-NEXT: add.w r1, r0, #128
1512 ; CHECK-NEXT: vstrb.32 q0, [r1]
1515 %z = getelementptr inbounds i8, i8* %y, i32 128
1516 %0 = bitcast i8* %x to <4 x i8>*
1517 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1518 %2 = bitcast i8* %z to <4 x i8>*
1519 store <4 x i8> %1, <4 x i8>* %2, align 1
1523 define i8* @strb32_m127(i8* %y, i8* %x) {
1524 ; CHECK-LABEL: strb32_m127:
1525 ; CHECK: @ %bb.0: @ %entry
1526 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1527 ; CHECK-NEXT: sub.w r1, r0, #127
1528 ; CHECK-NEXT: vstrb.32 q0, [r1]
1531 %z = getelementptr inbounds i8, i8* %y, i32 -127
1532 %0 = bitcast i8* %x to <4 x i8>*
1533 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1534 %2 = bitcast i8* %z to <4 x i8>*
1535 store <4 x i8> %1, <4 x i8>* %2, align 1
1539 define i8* @strb32_m128(i8* %y, i8* %x) {
1540 ; CHECK-LABEL: strb32_m128:
1541 ; CHECK: @ %bb.0: @ %entry
1542 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1543 ; CHECK-NEXT: sub.w r1, r0, #128
1544 ; CHECK-NEXT: vstrb.32 q0, [r1]
1547 %z = getelementptr inbounds i8, i8* %y, i32 -128
1548 %0 = bitcast i8* %x to <4 x i8>*
1549 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1550 %2 = bitcast i8* %z to <4 x i8>*
1551 store <4 x i8> %1, <4 x i8>* %2, align 1
1556 define i8* @strb16_4(i8* %y, i8* %x) {
1557 ; CHECK-LABEL: strb16_4:
1558 ; CHECK: @ %bb.0: @ %entry
1559 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1560 ; CHECK-NEXT: vstrb.16 q0, [r0, #4]
1563 %z = getelementptr inbounds i8, i8* %y, i32 4
1564 %0 = bitcast i8* %x to <8 x i8>*
1565 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1566 %2 = bitcast i8* %z to <8 x i8>*
1567 store <8 x i8> %1, <8 x i8>* %2, align 1
1571 define i8* @strb16_3(i8* %y, i8* %x) {
1572 ; CHECK-LABEL: strb16_3:
1573 ; CHECK: @ %bb.0: @ %entry
1574 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1575 ; CHECK-NEXT: vstrb.16 q0, [r0, #3]
1578 %z = getelementptr inbounds i8, i8* %y, i32 3
1579 %0 = bitcast i8* %x to <8 x i8>*
1580 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1581 %2 = bitcast i8* %z to <8 x i8>*
1582 store <8 x i8> %1, <8 x i8>* %2, align 1
1586 define i8* @strb16_127(i8* %y, i8* %x) {
1587 ; CHECK-LABEL: strb16_127:
1588 ; CHECK: @ %bb.0: @ %entry
1589 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1590 ; CHECK-NEXT: add.w r1, r0, #127
1591 ; CHECK-NEXT: vstrb.16 q0, [r1]
1594 %z = getelementptr inbounds i8, i8* %y, i32 127
1595 %0 = bitcast i8* %x to <8 x i8>*
1596 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1597 %2 = bitcast i8* %z to <8 x i8>*
1598 store <8 x i8> %1, <8 x i8>* %2, align 1
1602 define i8* @strb16_128(i8* %y, i8* %x) {
1603 ; CHECK-LABEL: strb16_128:
1604 ; CHECK: @ %bb.0: @ %entry
1605 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1606 ; CHECK-NEXT: add.w r1, r0, #128
1607 ; CHECK-NEXT: vstrb.16 q0, [r1]
1610 %z = getelementptr inbounds i8, i8* %y, i32 128
1611 %0 = bitcast i8* %x to <8 x i8>*
1612 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1613 %2 = bitcast i8* %z to <8 x i8>*
1614 store <8 x i8> %1, <8 x i8>* %2, align 1
1618 define i8* @strb16_m127(i8* %y, i8* %x) {
1619 ; CHECK-LABEL: strb16_m127:
1620 ; CHECK: @ %bb.0: @ %entry
1621 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1622 ; CHECK-NEXT: sub.w r1, r0, #127
1623 ; CHECK-NEXT: vstrb.16 q0, [r1]
1626 %z = getelementptr inbounds i8, i8* %y, i32 -127
1627 %0 = bitcast i8* %x to <8 x i8>*
1628 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1629 %2 = bitcast i8* %z to <8 x i8>*
1630 store <8 x i8> %1, <8 x i8>* %2, align 1
1634 define i8* @strb16_m128(i8* %y, i8* %x) {
1635 ; CHECK-LABEL: strb16_m128:
1636 ; CHECK: @ %bb.0: @ %entry
1637 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1638 ; CHECK-NEXT: sub.w r1, r0, #128
1639 ; CHECK-NEXT: vstrb.16 q0, [r1]
1642 %z = getelementptr inbounds i8, i8* %y, i32 -128
1643 %0 = bitcast i8* %x to <8 x i8>*
1644 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1645 %2 = bitcast i8* %z to <8 x i8>*
1646 store <8 x i8> %1, <8 x i8>* %2, align 1
1651 define i8* @strb8_4(i8* %y, i8* %x) {
1652 ; CHECK-LABEL: strb8_4:
1653 ; CHECK: @ %bb.0: @ %entry
1654 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1655 ; CHECK-NEXT: vstrb.8 q0, [r0, #4]
1658 %z = getelementptr inbounds i8, i8* %y, i32 4
1659 %0 = bitcast i8* %x to <16 x i8>*
1660 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1661 %2 = bitcast i8* %z to <16 x i8>*
1662 store <16 x i8> %1, <16 x i8>* %2, align 1
1666 define i8* @strb8_3(i8* %y, i8* %x) {
1667 ; CHECK-LABEL: strb8_3:
1668 ; CHECK: @ %bb.0: @ %entry
1669 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1670 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1673 %z = getelementptr inbounds i8, i8* %y, i32 3
1674 %0 = bitcast i8* %x to <16 x i8>*
1675 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1676 %2 = bitcast i8* %z to <16 x i8>*
1677 store <16 x i8> %1, <16 x i8>* %2, align 1
1681 define i8* @strb8_127(i8* %y, i8* %x) {
1682 ; CHECK-LABEL: strb8_127:
1683 ; CHECK: @ %bb.0: @ %entry
1684 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1685 ; CHECK-NEXT: add.w r1, r0, #127
1686 ; CHECK-NEXT: vstrb.8 q0, [r1]
1689 %z = getelementptr inbounds i8, i8* %y, i32 127
1690 %0 = bitcast i8* %x to <16 x i8>*
1691 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1692 %2 = bitcast i8* %z to <16 x i8>*
1693 store <16 x i8> %1, <16 x i8>* %2, align 1
1697 define i8* @strb8_128(i8* %y, i8* %x) {
1698 ; CHECK-LABEL: strb8_128:
1699 ; CHECK: @ %bb.0: @ %entry
1700 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1701 ; CHECK-NEXT: add.w r1, r0, #128
1702 ; CHECK-NEXT: vstrb.8 q0, [r1]
1705 %z = getelementptr inbounds i8, i8* %y, i32 128
1706 %0 = bitcast i8* %x to <16 x i8>*
1707 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1708 %2 = bitcast i8* %z to <16 x i8>*
1709 store <16 x i8> %1, <16 x i8>* %2, align 1
1713 define i8* @strb8_m127(i8* %y, i8* %x) {
1714 ; CHECK-LABEL: strb8_m127:
1715 ; CHECK: @ %bb.0: @ %entry
1716 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1717 ; CHECK-NEXT: sub.w r1, r0, #127
1718 ; CHECK-NEXT: vstrb.8 q0, [r1]
1721 %z = getelementptr inbounds i8, i8* %y, i32 -127
1722 %0 = bitcast i8* %x to <16 x i8>*
1723 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1724 %2 = bitcast i8* %z to <16 x i8>*
1725 store <16 x i8> %1, <16 x i8>* %2, align 1
1729 define i8* @strb8_m128(i8* %y, i8* %x) {
1730 ; CHECK-LABEL: strb8_m128:
1731 ; CHECK: @ %bb.0: @ %entry
1732 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1733 ; CHECK-NEXT: sub.w r1, r0, #128
1734 ; CHECK-NEXT: vstrb.8 q0, [r1]
1737 %z = getelementptr inbounds i8, i8* %y, i32 -128
1738 %0 = bitcast i8* %x to <16 x i8>*
1739 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1740 %2 = bitcast i8* %z to <16 x i8>*
1741 store <16 x i8> %1, <16 x i8>* %2, align 1
1746 define i8* @strf32_4(i8* %y, i8* %x) {
1747 ; CHECK-LABEL: strf32_4:
1748 ; CHECK: @ %bb.0: @ %entry
1749 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1750 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1753 %z = getelementptr inbounds i8, i8* %y, i32 4
1754 %0 = bitcast i8* %x to <4 x float>*
1755 %1 = load <4 x float>, <4 x float>* %0, align 4
1756 %2 = bitcast i8* %z to <4 x float>*
1757 store <4 x float> %1, <4 x float>* %2, align 4
1761 define i8* @strf16_4(i8* %y, i8* %x) {
1762 ; CHECK-LABEL: strf16_4:
1763 ; CHECK: @ %bb.0: @ %entry
1764 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1765 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1768 %z = getelementptr inbounds i8, i8* %y, i32 4
1769 %0 = bitcast i8* %x to <8 x half>*
1770 %1 = load <8 x half>, <8 x half>* %0, align 2
1771 %2 = bitcast i8* %z to <8 x half>*
1772 store <8 x half> %1, <8 x half>* %2, align 2
1776 define i8* @strwi32_align1(i8* %y, i8* %x) {
1777 ; CHECK-LE-LABEL: strwi32_align1:
1778 ; CHECK-LE: @ %bb.0: @ %entry
1779 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1780 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1781 ; CHECK-LE-NEXT: bx lr
1783 ; CHECK-BE-LABEL: strwi32_align1:
1784 ; CHECK-BE: @ %bb.0: @ %entry
1785 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1786 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1787 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1788 ; CHECK-BE-NEXT: bx lr
1790 %z = getelementptr inbounds i8, i8* %y, i32 3
1791 %0 = bitcast i8* %x to <4 x i32>*
1792 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1793 %2 = bitcast i8* %z to <4 x i32>*
1794 store <4 x i32> %1, <4 x i32>* %2, align 1
1798 define i8* @strhi16_align1(i8* %y, i8* %x) {
1799 ; CHECK-LE-LABEL: strhi16_align1:
1800 ; CHECK-LE: @ %bb.0: @ %entry
1801 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1802 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1803 ; CHECK-LE-NEXT: bx lr
1805 ; CHECK-BE-LABEL: strhi16_align1:
1806 ; CHECK-BE: @ %bb.0: @ %entry
1807 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1808 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1809 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1810 ; CHECK-BE-NEXT: bx lr
1812 %z = getelementptr inbounds i8, i8* %y, i32 3
1813 %0 = bitcast i8* %x to <8 x i16>*
1814 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1815 %2 = bitcast i8* %z to <8 x i16>*
1816 store <8 x i16> %1, <8 x i16>* %2, align 1
1820 define i8* @strhi32_align1(i8* %y, i8* %x) {
1821 ; CHECK-LABEL: strhi32_align1:
1822 ; CHECK: @ %bb.0: @ %entry
1823 ; CHECK-NEXT: .pad #8
1824 ; CHECK-NEXT: sub sp, #8
1825 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1826 ; CHECK-NEXT: mov r1, sp
1827 ; CHECK-NEXT: vstrh.32 q0, [r1]
1828 ; CHECK-NEXT: ldrd r1, r2, [sp]
1829 ; CHECK-NEXT: str.w r1, [r0, #3]
1830 ; CHECK-NEXT: str.w r2, [r0, #7]
1831 ; CHECK-NEXT: add sp, #8
1834 %z = getelementptr inbounds i8, i8* %y, i32 3
1835 %0 = bitcast i8* %x to <4 x i32>*
1836 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1837 %2 = bitcast i8* %z to <4 x i16>*
1838 %3 = trunc <4 x i32> %1 to <4 x i16>
1839 store <4 x i16> %3, <4 x i16>* %2, align 1
1843 define i8* @strf32_align1(i8* %y, i8* %x) {
1844 ; CHECK-LE-LABEL: strf32_align1:
1845 ; CHECK-LE: @ %bb.0: @ %entry
1846 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1847 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1848 ; CHECK-LE-NEXT: bx lr
1850 ; CHECK-BE-LABEL: strf32_align1:
1851 ; CHECK-BE: @ %bb.0: @ %entry
1852 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1853 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1854 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1855 ; CHECK-BE-NEXT: bx lr
1857 %z = getelementptr inbounds i8, i8* %y, i32 3
1858 %0 = bitcast i8* %x to <4 x float>*
1859 %1 = load <4 x float>, <4 x float>* %0, align 4
1860 %2 = bitcast i8* %z to <4 x float>*
1861 store <4 x float> %1, <4 x float>* %2, align 1
1865 define i8* @strf16_align1(i8* %y, i8* %x) {
1866 ; CHECK-LE-LABEL: strf16_align1:
1867 ; CHECK-LE: @ %bb.0: @ %entry
1868 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1869 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1870 ; CHECK-LE-NEXT: bx lr
1872 ; CHECK-BE-LABEL: strf16_align1:
1873 ; CHECK-BE: @ %bb.0: @ %entry
1874 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1875 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1876 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1877 ; CHECK-BE-NEXT: bx lr
1879 %z = getelementptr inbounds i8, i8* %y, i32 3
1880 %0 = bitcast i8* %x to <8 x half>*
1881 %1 = load <8 x half>, <8 x half>* %0, align 2
1882 %2 = bitcast i8* %z to <8 x half>*
1883 store <8 x half> %1, <8 x half>* %2, align 1
1887 define i8* @strf16_align8(i8* %y, i8* %x) {
1888 ; CHECK-LE-LABEL: strf16_align8:
1889 ; CHECK-LE: @ %bb.0: @ %entry
1890 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1891 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
1892 ; CHECK-LE-NEXT: bx lr
1894 ; CHECK-BE-LABEL: strf16_align8:
1895 ; CHECK-BE: @ %bb.0: @ %entry
1896 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1897 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]
1898 ; CHECK-BE-NEXT: bx lr
1900 %z = getelementptr inbounds i8, i8* %y, i32 16
1901 %0 = bitcast i8* %x to <8 x i16>*
1902 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1903 %2 = bitcast i8* %z to <8 x i16>*
1904 store <8 x i16> %1, <8 x i16>* %2, align 8