1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define i8* @ldrwu32_4(i8* %x, i8* %y) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %x to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LE-LABEL: ldrwu32_3:
22 ; CHECK-LE: @ %bb.0: @ %entry
23 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
24 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
25 ; CHECK-LE-NEXT: bx lr
27 ; CHECK-BE-LABEL: ldrwu32_3:
28 ; CHECK-BE: @ %bb.0: @ %entry
29 ; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
30 ; CHECK-BE-NEXT: adds r0, #3
31 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
32 ; CHECK-BE-NEXT: bx lr
34 %z = getelementptr inbounds i8, i8* %x, i32 3
35 %0 = bitcast i8* %x to <4 x i32>*
36 %1 = load <4 x i32>, <4 x i32>* %0, align 4
37 %2 = bitcast i8* %y to <4 x i32>*
38 store <4 x i32> %1, <4 x i32>* %2, align 4
42 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
43 ; CHECK-LABEL: ldrwu32_m4:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-4
46 ; CHECK-NEXT: vstrw.32 q0, [r1]
49 %z = getelementptr inbounds i8, i8* %x, i32 -4
50 %0 = bitcast i8* %x to <4 x i32>*
51 %1 = load <4 x i32>, <4 x i32>* %0, align 4
52 %2 = bitcast i8* %y to <4 x i32>*
53 store <4 x i32> %1, <4 x i32>* %2, align 4
57 define i8* @ldrwu32_508(i8* %x, i8* %y) {
58 ; CHECK-LABEL: ldrwu32_508:
59 ; CHECK: @ %bb.0: @ %entry
60 ; CHECK-NEXT: vldrw.u32 q0, [r0], #508
61 ; CHECK-NEXT: vstrw.32 q0, [r1]
64 %z = getelementptr inbounds i8, i8* %x, i32 508
65 %0 = bitcast i8* %x to <4 x i32>*
66 %1 = load <4 x i32>, <4 x i32>* %0, align 4
67 %2 = bitcast i8* %y to <4 x i32>*
68 store <4 x i32> %1, <4 x i32>* %2, align 4
72 define i8* @ldrwu32_512(i8* %x, i8* %y) {
73 ; CHECK-LABEL: ldrwu32_512:
74 ; CHECK: @ %bb.0: @ %entry
75 ; CHECK-NEXT: vldrw.u32 q0, [r0]
76 ; CHECK-NEXT: add.w r0, r0, #512
77 ; CHECK-NEXT: vstrw.32 q0, [r1]
80 %z = getelementptr inbounds i8, i8* %x, i32 512
81 %0 = bitcast i8* %x to <4 x i32>*
82 %1 = load <4 x i32>, <4 x i32>* %0, align 4
83 %2 = bitcast i8* %y to <4 x i32>*
84 store <4 x i32> %1, <4 x i32>* %2, align 4
88 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
89 ; CHECK-LABEL: ldrwu32_m508:
90 ; CHECK: @ %bb.0: @ %entry
91 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-508
92 ; CHECK-NEXT: vstrw.32 q0, [r1]
95 %z = getelementptr inbounds i8, i8* %x, i32 -508
96 %0 = bitcast i8* %x to <4 x i32>*
97 %1 = load <4 x i32>, <4 x i32>* %0, align 4
98 %2 = bitcast i8* %y to <4 x i32>*
99 store <4 x i32> %1, <4 x i32>* %2, align 4
103 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
104 ; CHECK-LABEL: ldrwu32_m512:
105 ; CHECK: @ %bb.0: @ %entry
106 ; CHECK-NEXT: vldrw.u32 q0, [r0]
107 ; CHECK-NEXT: sub.w r0, r0, #512
108 ; CHECK-NEXT: vstrw.32 q0, [r1]
111 %z = getelementptr inbounds i8, i8* %x, i32 -512
112 %0 = bitcast i8* %x to <4 x i32>*
113 %1 = load <4 x i32>, <4 x i32>* %0, align 4
114 %2 = bitcast i8* %y to <4 x i32>*
115 store <4 x i32> %1, <4 x i32>* %2, align 4
120 define i8* @ldrhu32_4(i8* %x, i8* %y) {
121 ; CHECK-LABEL: ldrhu32_4:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vldrh.u32 q0, [r0], #4
124 ; CHECK-NEXT: vstrw.32 q0, [r1]
127 %z = getelementptr inbounds i8, i8* %x, i32 4
128 %0 = bitcast i8* %x to <4 x i16>*
129 %1 = load <4 x i16>, <4 x i16>* %0, align 2
130 %2 = zext <4 x i16> %1 to <4 x i32>
131 %3 = bitcast i8* %y to <4 x i32>*
132 store <4 x i32> %2, <4 x i32>* %3, align 4
136 define i8* @ldrhu32_3(i8* %x, i8* %y) {
137 ; CHECK-LABEL: ldrhu32_3:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vldrh.u32 q0, [r0]
140 ; CHECK-NEXT: adds r0, #3
141 ; CHECK-NEXT: vstrw.32 q0, [r1]
144 %z = getelementptr inbounds i8, i8* %x, i32 3
145 %0 = bitcast i8* %x to <4 x i16>*
146 %1 = load <4 x i16>, <4 x i16>* %0, align 2
147 %2 = zext <4 x i16> %1 to <4 x i32>
148 %3 = bitcast i8* %y to <4 x i32>*
149 store <4 x i32> %2, <4 x i32>* %3, align 4
153 define i8* @ldrhu32_2(i8* %x, i8* %y) {
154 ; CHECK-LABEL: ldrhu32_2:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vldrh.u32 q0, [r0], #2
157 ; CHECK-NEXT: vstrw.32 q0, [r1]
160 %z = getelementptr inbounds i8, i8* %x, i32 2
161 %0 = bitcast i8* %x to <4 x i16>*
162 %1 = load <4 x i16>, <4 x i16>* %0, align 2
163 %2 = zext <4 x i16> %1 to <4 x i32>
164 %3 = bitcast i8* %y to <4 x i32>*
165 store <4 x i32> %2, <4 x i32>* %3, align 4
169 define i8* @ldrhu32_254(i8* %x, i8* %y) {
170 ; CHECK-LABEL: ldrhu32_254:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vldrh.u32 q0, [r0], #254
173 ; CHECK-NEXT: vstrw.32 q0, [r1]
176 %z = getelementptr inbounds i8, i8* %x, i32 254
177 %0 = bitcast i8* %x to <4 x i16>*
178 %1 = load <4 x i16>, <4 x i16>* %0, align 2
179 %2 = zext <4 x i16> %1 to <4 x i32>
180 %3 = bitcast i8* %y to <4 x i32>*
181 store <4 x i32> %2, <4 x i32>* %3, align 4
185 define i8* @ldrhu32_256(i8* %x, i8* %y) {
186 ; CHECK-LABEL: ldrhu32_256:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: vldrh.u32 q0, [r0]
189 ; CHECK-NEXT: add.w r0, r0, #256
190 ; CHECK-NEXT: vstrw.32 q0, [r1]
193 %z = getelementptr inbounds i8, i8* %x, i32 256
194 %0 = bitcast i8* %x to <4 x i16>*
195 %1 = load <4 x i16>, <4 x i16>* %0, align 2
196 %2 = zext <4 x i16> %1 to <4 x i32>
197 %3 = bitcast i8* %y to <4 x i32>*
198 store <4 x i32> %2, <4 x i32>* %3, align 4
202 define i8* @ldrhu32_m254(i8* %x, i8* %y) {
203 ; CHECK-LABEL: ldrhu32_m254:
204 ; CHECK: @ %bb.0: @ %entry
205 ; CHECK-NEXT: vldrh.u32 q0, [r0], #-254
206 ; CHECK-NEXT: vstrw.32 q0, [r1]
209 %z = getelementptr inbounds i8, i8* %x, i32 -254
210 %0 = bitcast i8* %x to <4 x i16>*
211 %1 = load <4 x i16>, <4 x i16>* %0, align 2
212 %2 = zext <4 x i16> %1 to <4 x i32>
213 %3 = bitcast i8* %y to <4 x i32>*
214 store <4 x i32> %2, <4 x i32>* %3, align 4
218 define i8* @ldrhu32_m256(i8* %x, i8* %y) {
219 ; CHECK-LABEL: ldrhu32_m256:
220 ; CHECK: @ %bb.0: @ %entry
221 ; CHECK-NEXT: vldrh.u32 q0, [r0]
222 ; CHECK-NEXT: sub.w r0, r0, #256
223 ; CHECK-NEXT: vstrw.32 q0, [r1]
226 %z = getelementptr inbounds i8, i8* %x, i32 -256
227 %0 = bitcast i8* %x to <4 x i16>*
228 %1 = load <4 x i16>, <4 x i16>* %0, align 2
229 %2 = zext <4 x i16> %1 to <4 x i32>
230 %3 = bitcast i8* %y to <4 x i32>*
231 store <4 x i32> %2, <4 x i32>* %3, align 4
236 define i8* @ldrhs32_4(i8* %x, i8* %y) {
237 ; CHECK-LABEL: ldrhs32_4:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vldrh.s32 q0, [r0], #4
240 ; CHECK-NEXT: vstrw.32 q0, [r1]
243 %z = getelementptr inbounds i8, i8* %x, i32 4
244 %0 = bitcast i8* %x to <4 x i16>*
245 %1 = load <4 x i16>, <4 x i16>* %0, align 2
246 %2 = sext <4 x i16> %1 to <4 x i32>
247 %3 = bitcast i8* %y to <4 x i32>*
248 store <4 x i32> %2, <4 x i32>* %3, align 4
252 define i8* @ldrhs32_3(i8* %x, i8* %y) {
253 ; CHECK-LABEL: ldrhs32_3:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vldrh.s32 q0, [r0]
256 ; CHECK-NEXT: adds r0, #3
257 ; CHECK-NEXT: vstrw.32 q0, [r1]
260 %z = getelementptr inbounds i8, i8* %x, i32 3
261 %0 = bitcast i8* %x to <4 x i16>*
262 %1 = load <4 x i16>, <4 x i16>* %0, align 2
263 %2 = sext <4 x i16> %1 to <4 x i32>
264 %3 = bitcast i8* %y to <4 x i32>*
265 store <4 x i32> %2, <4 x i32>* %3, align 4
269 define i8* @ldrhs32_2(i8* %x, i8* %y) {
270 ; CHECK-LABEL: ldrhs32_2:
271 ; CHECK: @ %bb.0: @ %entry
272 ; CHECK-NEXT: vldrh.s32 q0, [r0], #2
273 ; CHECK-NEXT: vstrw.32 q0, [r1]
276 %z = getelementptr inbounds i8, i8* %x, i32 2
277 %0 = bitcast i8* %x to <4 x i16>*
278 %1 = load <4 x i16>, <4 x i16>* %0, align 2
279 %2 = sext <4 x i16> %1 to <4 x i32>
280 %3 = bitcast i8* %y to <4 x i32>*
281 store <4 x i32> %2, <4 x i32>* %3, align 4
285 define i8* @ldrhs32_254(i8* %x, i8* %y) {
286 ; CHECK-LABEL: ldrhs32_254:
287 ; CHECK: @ %bb.0: @ %entry
288 ; CHECK-NEXT: vldrh.s32 q0, [r0], #254
289 ; CHECK-NEXT: vstrw.32 q0, [r1]
292 %z = getelementptr inbounds i8, i8* %x, i32 254
293 %0 = bitcast i8* %x to <4 x i16>*
294 %1 = load <4 x i16>, <4 x i16>* %0, align 2
295 %2 = sext <4 x i16> %1 to <4 x i32>
296 %3 = bitcast i8* %y to <4 x i32>*
297 store <4 x i32> %2, <4 x i32>* %3, align 4
301 define i8* @ldrhs32_256(i8* %x, i8* %y) {
302 ; CHECK-LABEL: ldrhs32_256:
303 ; CHECK: @ %bb.0: @ %entry
304 ; CHECK-NEXT: vldrh.s32 q0, [r0]
305 ; CHECK-NEXT: add.w r0, r0, #256
306 ; CHECK-NEXT: vstrw.32 q0, [r1]
309 %z = getelementptr inbounds i8, i8* %x, i32 256
310 %0 = bitcast i8* %x to <4 x i16>*
311 %1 = load <4 x i16>, <4 x i16>* %0, align 2
312 %2 = sext <4 x i16> %1 to <4 x i32>
313 %3 = bitcast i8* %y to <4 x i32>*
314 store <4 x i32> %2, <4 x i32>* %3, align 4
318 define i8* @ldrhs32_m254(i8* %x, i8* %y) {
319 ; CHECK-LABEL: ldrhs32_m254:
320 ; CHECK: @ %bb.0: @ %entry
321 ; CHECK-NEXT: vldrh.s32 q0, [r0], #-254
322 ; CHECK-NEXT: vstrw.32 q0, [r1]
325 %z = getelementptr inbounds i8, i8* %x, i32 -254
326 %0 = bitcast i8* %x to <4 x i16>*
327 %1 = load <4 x i16>, <4 x i16>* %0, align 2
328 %2 = sext <4 x i16> %1 to <4 x i32>
329 %3 = bitcast i8* %y to <4 x i32>*
330 store <4 x i32> %2, <4 x i32>* %3, align 4
334 define i8* @ldrhs32_m256(i8* %x, i8* %y) {
335 ; CHECK-LABEL: ldrhs32_m256:
336 ; CHECK: @ %bb.0: @ %entry
337 ; CHECK-NEXT: vldrh.s32 q0, [r0]
338 ; CHECK-NEXT: sub.w r0, r0, #256
339 ; CHECK-NEXT: vstrw.32 q0, [r1]
342 %z = getelementptr inbounds i8, i8* %x, i32 -256
343 %0 = bitcast i8* %x to <4 x i16>*
344 %1 = load <4 x i16>, <4 x i16>* %0, align 2
345 %2 = sext <4 x i16> %1 to <4 x i32>
346 %3 = bitcast i8* %y to <4 x i32>*
347 store <4 x i32> %2, <4 x i32>* %3, align 4
352 define i8* @ldrhu16_4(i8* %x, i8* %y) {
353 ; CHECK-LABEL: ldrhu16_4:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
356 ; CHECK-NEXT: vstrh.16 q0, [r1]
359 %z = getelementptr inbounds i8, i8* %x, i32 4
360 %0 = bitcast i8* %x to <8 x i16>*
361 %1 = load <8 x i16>, <8 x i16>* %0, align 2
362 %2 = bitcast i8* %y to <8 x i16>*
363 store <8 x i16> %1, <8 x i16>* %2, align 2
367 define i8* @ldrhu16_3(i8* %x, i8* %y) {
368 ; CHECK-LE-LABEL: ldrhu16_3:
369 ; CHECK-LE: @ %bb.0: @ %entry
370 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
371 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
372 ; CHECK-LE-NEXT: bx lr
374 ; CHECK-BE-LABEL: ldrhu16_3:
375 ; CHECK-BE: @ %bb.0: @ %entry
376 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
377 ; CHECK-BE-NEXT: adds r0, #3
378 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
379 ; CHECK-BE-NEXT: bx lr
381 %z = getelementptr inbounds i8, i8* %x, i32 3
382 %0 = bitcast i8* %x to <8 x i16>*
383 %1 = load <8 x i16>, <8 x i16>* %0, align 2
384 %2 = bitcast i8* %y to <8 x i16>*
385 store <8 x i16> %1, <8 x i16>* %2, align 2
389 define i8* @ldrhu16_2(i8* %x, i8* %y) {
390 ; CHECK-LABEL: ldrhu16_2:
391 ; CHECK: @ %bb.0: @ %entry
392 ; CHECK-NEXT: vldrh.u16 q0, [r0], #2
393 ; CHECK-NEXT: vstrh.16 q0, [r1]
396 %z = getelementptr inbounds i8, i8* %x, i32 2
397 %0 = bitcast i8* %x to <8 x i16>*
398 %1 = load <8 x i16>, <8 x i16>* %0, align 2
399 %2 = bitcast i8* %y to <8 x i16>*
400 store <8 x i16> %1, <8 x i16>* %2, align 2
404 define i8* @ldrhu16_254(i8* %x, i8* %y) {
405 ; CHECK-LABEL: ldrhu16_254:
406 ; CHECK: @ %bb.0: @ %entry
407 ; CHECK-NEXT: vldrh.u16 q0, [r0], #254
408 ; CHECK-NEXT: vstrh.16 q0, [r1]
411 %z = getelementptr inbounds i8, i8* %x, i32 254
412 %0 = bitcast i8* %x to <8 x i16>*
413 %1 = load <8 x i16>, <8 x i16>* %0, align 2
414 %2 = bitcast i8* %y to <8 x i16>*
415 store <8 x i16> %1, <8 x i16>* %2, align 2
419 define i8* @ldrhu16_256(i8* %x, i8* %y) {
420 ; CHECK-LABEL: ldrhu16_256:
421 ; CHECK: @ %bb.0: @ %entry
422 ; CHECK-NEXT: vldrh.u16 q0, [r0]
423 ; CHECK-NEXT: add.w r0, r0, #256
424 ; CHECK-NEXT: vstrh.16 q0, [r1]
427 %z = getelementptr inbounds i8, i8* %x, i32 256
428 %0 = bitcast i8* %x to <8 x i16>*
429 %1 = load <8 x i16>, <8 x i16>* %0, align 2
430 %2 = bitcast i8* %y to <8 x i16>*
431 store <8 x i16> %1, <8 x i16>* %2, align 2
435 define i8* @ldrhu16_m254(i8* %x, i8* %y) {
436 ; CHECK-LABEL: ldrhu16_m254:
437 ; CHECK: @ %bb.0: @ %entry
438 ; CHECK-NEXT: vldrh.u16 q0, [r0], #-254
439 ; CHECK-NEXT: vstrh.16 q0, [r1]
442 %z = getelementptr inbounds i8, i8* %x, i32 -254
443 %0 = bitcast i8* %x to <8 x i16>*
444 %1 = load <8 x i16>, <8 x i16>* %0, align 2
445 %2 = bitcast i8* %y to <8 x i16>*
446 store <8 x i16> %1, <8 x i16>* %2, align 2
450 define i8* @ldrhu16_m256(i8* %x, i8* %y) {
451 ; CHECK-LABEL: ldrhu16_m256:
452 ; CHECK: @ %bb.0: @ %entry
453 ; CHECK-NEXT: vldrh.u16 q0, [r0]
454 ; CHECK-NEXT: sub.w r0, r0, #256
455 ; CHECK-NEXT: vstrh.16 q0, [r1]
458 %z = getelementptr inbounds i8, i8* %x, i32 -256
459 %0 = bitcast i8* %x to <8 x i16>*
460 %1 = load <8 x i16>, <8 x i16>* %0, align 2
461 %2 = bitcast i8* %y to <8 x i16>*
462 store <8 x i16> %1, <8 x i16>* %2, align 2
467 define i8* @ldrbu32_4(i8* %x, i8* %y) {
468 ; CHECK-LABEL: ldrbu32_4:
469 ; CHECK: @ %bb.0: @ %entry
470 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4
471 ; CHECK-NEXT: vstrw.32 q0, [r1]
474 %z = getelementptr inbounds i8, i8* %x, i32 4
475 %0 = bitcast i8* %x to <4 x i8>*
476 %1 = load <4 x i8>, <4 x i8>* %0, align 1
477 %2 = zext <4 x i8> %1 to <4 x i32>
478 %3 = bitcast i8* %y to <4 x i32>*
479 store <4 x i32> %2, <4 x i32>* %3, align 4
483 define i8* @ldrbu32_3(i8* %x, i8* %y) {
484 ; CHECK-LABEL: ldrbu32_3:
485 ; CHECK: @ %bb.0: @ %entry
486 ; CHECK-NEXT: vldrb.u32 q0, [r0], #3
487 ; CHECK-NEXT: vstrw.32 q0, [r1]
490 %z = getelementptr inbounds i8, i8* %x, i32 3
491 %0 = bitcast i8* %x to <4 x i8>*
492 %1 = load <4 x i8>, <4 x i8>* %0, align 1
493 %2 = zext <4 x i8> %1 to <4 x i32>
494 %3 = bitcast i8* %y to <4 x i32>*
495 store <4 x i32> %2, <4 x i32>* %3, align 4
499 define i8* @ldrbu32_127(i8* %x, i8* %y) {
500 ; CHECK-LABEL: ldrbu32_127:
501 ; CHECK: @ %bb.0: @ %entry
502 ; CHECK-NEXT: vldrb.u32 q0, [r0], #127
503 ; CHECK-NEXT: vstrw.32 q0, [r1]
506 %z = getelementptr inbounds i8, i8* %x, i32 127
507 %0 = bitcast i8* %x to <4 x i8>*
508 %1 = load <4 x i8>, <4 x i8>* %0, align 1
509 %2 = zext <4 x i8> %1 to <4 x i32>
510 %3 = bitcast i8* %y to <4 x i32>*
511 store <4 x i32> %2, <4 x i32>* %3, align 4
515 define i8* @ldrbu32_128(i8* %x, i8* %y) {
516 ; CHECK-LABEL: ldrbu32_128:
517 ; CHECK: @ %bb.0: @ %entry
518 ; CHECK-NEXT: vldrb.u32 q0, [r0]
519 ; CHECK-NEXT: adds r0, #128
520 ; CHECK-NEXT: vstrw.32 q0, [r1]
523 %z = getelementptr inbounds i8, i8* %x, i32 128
524 %0 = bitcast i8* %x to <4 x i8>*
525 %1 = load <4 x i8>, <4 x i8>* %0, align 1
526 %2 = zext <4 x i8> %1 to <4 x i32>
527 %3 = bitcast i8* %y to <4 x i32>*
528 store <4 x i32> %2, <4 x i32>* %3, align 4
532 define i8* @ldrbu32_m127(i8* %x, i8* %y) {
533 ; CHECK-LABEL: ldrbu32_m127:
534 ; CHECK: @ %bb.0: @ %entry
535 ; CHECK-NEXT: vldrb.u32 q0, [r0], #-127
536 ; CHECK-NEXT: vstrw.32 q0, [r1]
539 %z = getelementptr inbounds i8, i8* %x, i32 -127
540 %0 = bitcast i8* %x to <4 x i8>*
541 %1 = load <4 x i8>, <4 x i8>* %0, align 1
542 %2 = zext <4 x i8> %1 to <4 x i32>
543 %3 = bitcast i8* %y to <4 x i32>*
544 store <4 x i32> %2, <4 x i32>* %3, align 4
548 define i8* @ldrbu32_m128(i8* %x, i8* %y) {
549 ; CHECK-LABEL: ldrbu32_m128:
550 ; CHECK: @ %bb.0: @ %entry
551 ; CHECK-NEXT: vldrb.u32 q0, [r0]
552 ; CHECK-NEXT: subs r0, #128
553 ; CHECK-NEXT: vstrw.32 q0, [r1]
556 %z = getelementptr inbounds i8, i8* %x, i32 -128
557 %0 = bitcast i8* %x to <4 x i8>*
558 %1 = load <4 x i8>, <4 x i8>* %0, align 1
559 %2 = zext <4 x i8> %1 to <4 x i32>
560 %3 = bitcast i8* %y to <4 x i32>*
561 store <4 x i32> %2, <4 x i32>* %3, align 4
566 define i8* @ldrbs32_4(i8* %x, i8* %y) {
567 ; CHECK-LABEL: ldrbs32_4:
568 ; CHECK: @ %bb.0: @ %entry
569 ; CHECK-NEXT: vldrb.s32 q0, [r0], #4
570 ; CHECK-NEXT: vstrw.32 q0, [r1]
573 %z = getelementptr inbounds i8, i8* %x, i32 4
574 %0 = bitcast i8* %x to <4 x i8>*
575 %1 = load <4 x i8>, <4 x i8>* %0, align 1
576 %2 = sext <4 x i8> %1 to <4 x i32>
577 %3 = bitcast i8* %y to <4 x i32>*
578 store <4 x i32> %2, <4 x i32>* %3, align 4
582 define i8* @ldrbs32_3(i8* %x, i8* %y) {
583 ; CHECK-LABEL: ldrbs32_3:
584 ; CHECK: @ %bb.0: @ %entry
585 ; CHECK-NEXT: vldrb.s32 q0, [r0], #3
586 ; CHECK-NEXT: vstrw.32 q0, [r1]
589 %z = getelementptr inbounds i8, i8* %x, i32 3
590 %0 = bitcast i8* %x to <4 x i8>*
591 %1 = load <4 x i8>, <4 x i8>* %0, align 1
592 %2 = sext <4 x i8> %1 to <4 x i32>
593 %3 = bitcast i8* %y to <4 x i32>*
594 store <4 x i32> %2, <4 x i32>* %3, align 4
598 define i8* @ldrbs32_127(i8* %x, i8* %y) {
599 ; CHECK-LABEL: ldrbs32_127:
600 ; CHECK: @ %bb.0: @ %entry
601 ; CHECK-NEXT: vldrb.s32 q0, [r0], #127
602 ; CHECK-NEXT: vstrw.32 q0, [r1]
605 %z = getelementptr inbounds i8, i8* %x, i32 127
606 %0 = bitcast i8* %x to <4 x i8>*
607 %1 = load <4 x i8>, <4 x i8>* %0, align 1
608 %2 = sext <4 x i8> %1 to <4 x i32>
609 %3 = bitcast i8* %y to <4 x i32>*
610 store <4 x i32> %2, <4 x i32>* %3, align 4
614 define i8* @ldrbs32_128(i8* %x, i8* %y) {
615 ; CHECK-LABEL: ldrbs32_128:
616 ; CHECK: @ %bb.0: @ %entry
617 ; CHECK-NEXT: vldrb.s32 q0, [r0]
618 ; CHECK-NEXT: adds r0, #128
619 ; CHECK-NEXT: vstrw.32 q0, [r1]
622 %z = getelementptr inbounds i8, i8* %x, i32 128
623 %0 = bitcast i8* %x to <4 x i8>*
624 %1 = load <4 x i8>, <4 x i8>* %0, align 1
625 %2 = sext <4 x i8> %1 to <4 x i32>
626 %3 = bitcast i8* %y to <4 x i32>*
627 store <4 x i32> %2, <4 x i32>* %3, align 4
631 define i8* @ldrbs32_m127(i8* %x, i8* %y) {
632 ; CHECK-LABEL: ldrbs32_m127:
633 ; CHECK: @ %bb.0: @ %entry
634 ; CHECK-NEXT: vldrb.s32 q0, [r0], #-127
635 ; CHECK-NEXT: vstrw.32 q0, [r1]
638 %z = getelementptr inbounds i8, i8* %x, i32 -127
639 %0 = bitcast i8* %x to <4 x i8>*
640 %1 = load <4 x i8>, <4 x i8>* %0, align 1
641 %2 = sext <4 x i8> %1 to <4 x i32>
642 %3 = bitcast i8* %y to <4 x i32>*
643 store <4 x i32> %2, <4 x i32>* %3, align 4
647 define i8* @ldrbs32_m128(i8* %x, i8* %y) {
648 ; CHECK-LABEL: ldrbs32_m128:
649 ; CHECK: @ %bb.0: @ %entry
650 ; CHECK-NEXT: vldrb.s32 q0, [r0]
651 ; CHECK-NEXT: subs r0, #128
652 ; CHECK-NEXT: vstrw.32 q0, [r1]
655 %z = getelementptr inbounds i8, i8* %x, i32 -128
656 %0 = bitcast i8* %x to <4 x i8>*
657 %1 = load <4 x i8>, <4 x i8>* %0, align 1
658 %2 = sext <4 x i8> %1 to <4 x i32>
659 %3 = bitcast i8* %y to <4 x i32>*
660 store <4 x i32> %2, <4 x i32>* %3, align 4
665 define i8* @ldrbu16_4(i8* %x, i8* %y) {
666 ; CHECK-LABEL: ldrbu16_4:
667 ; CHECK: @ %bb.0: @ %entry
668 ; CHECK-NEXT: vldrb.u16 q0, [r0], #4
669 ; CHECK-NEXT: vstrh.16 q0, [r1]
672 %z = getelementptr inbounds i8, i8* %x, i32 4
673 %0 = bitcast i8* %x to <8 x i8>*
674 %1 = load <8 x i8>, <8 x i8>* %0, align 1
675 %2 = zext <8 x i8> %1 to <8 x i16>
676 %3 = bitcast i8* %y to <8 x i16>*
677 store <8 x i16> %2, <8 x i16>* %3, align 2
681 define i8* @ldrbu16_3(i8* %x, i8* %y) {
682 ; CHECK-LABEL: ldrbu16_3:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vldrb.u16 q0, [r0], #3
685 ; CHECK-NEXT: vstrh.16 q0, [r1]
688 %z = getelementptr inbounds i8, i8* %x, i32 3
689 %0 = bitcast i8* %x to <8 x i8>*
690 %1 = load <8 x i8>, <8 x i8>* %0, align 1
691 %2 = zext <8 x i8> %1 to <8 x i16>
692 %3 = bitcast i8* %y to <8 x i16>*
693 store <8 x i16> %2, <8 x i16>* %3, align 2
697 define i8* @ldrbu16_127(i8* %x, i8* %y) {
698 ; CHECK-LABEL: ldrbu16_127:
699 ; CHECK: @ %bb.0: @ %entry
700 ; CHECK-NEXT: vldrb.u16 q0, [r0], #127
701 ; CHECK-NEXT: vstrh.16 q0, [r1]
704 %z = getelementptr inbounds i8, i8* %x, i32 127
705 %0 = bitcast i8* %x to <8 x i8>*
706 %1 = load <8 x i8>, <8 x i8>* %0, align 1
707 %2 = zext <8 x i8> %1 to <8 x i16>
708 %3 = bitcast i8* %y to <8 x i16>*
709 store <8 x i16> %2, <8 x i16>* %3, align 2
713 define i8* @ldrbu16_128(i8* %x, i8* %y) {
714 ; CHECK-LABEL: ldrbu16_128:
715 ; CHECK: @ %bb.0: @ %entry
716 ; CHECK-NEXT: vldrb.u16 q0, [r0]
717 ; CHECK-NEXT: adds r0, #128
718 ; CHECK-NEXT: vstrh.16 q0, [r1]
721 %z = getelementptr inbounds i8, i8* %x, i32 128
722 %0 = bitcast i8* %x to <8 x i8>*
723 %1 = load <8 x i8>, <8 x i8>* %0, align 1
724 %2 = zext <8 x i8> %1 to <8 x i16>
725 %3 = bitcast i8* %y to <8 x i16>*
726 store <8 x i16> %2, <8 x i16>* %3, align 2
730 define i8* @ldrbu16_m127(i8* %x, i8* %y) {
731 ; CHECK-LABEL: ldrbu16_m127:
732 ; CHECK: @ %bb.0: @ %entry
733 ; CHECK-NEXT: vldrb.u16 q0, [r0], #-127
734 ; CHECK-NEXT: vstrh.16 q0, [r1]
737 %z = getelementptr inbounds i8, i8* %x, i32 -127
738 %0 = bitcast i8* %x to <8 x i8>*
739 %1 = load <8 x i8>, <8 x i8>* %0, align 1
740 %2 = zext <8 x i8> %1 to <8 x i16>
741 %3 = bitcast i8* %y to <8 x i16>*
742 store <8 x i16> %2, <8 x i16>* %3, align 2
746 define i8* @ldrbu16_m128(i8* %x, i8* %y) {
747 ; CHECK-LABEL: ldrbu16_m128:
748 ; CHECK: @ %bb.0: @ %entry
749 ; CHECK-NEXT: vldrb.u16 q0, [r0]
750 ; CHECK-NEXT: subs r0, #128
751 ; CHECK-NEXT: vstrh.16 q0, [r1]
754 %z = getelementptr inbounds i8, i8* %x, i32 -128
755 %0 = bitcast i8* %x to <8 x i8>*
756 %1 = load <8 x i8>, <8 x i8>* %0, align 1
757 %2 = zext <8 x i8> %1 to <8 x i16>
758 %3 = bitcast i8* %y to <8 x i16>*
759 store <8 x i16> %2, <8 x i16>* %3, align 2
764 define i8* @ldrbs16_4(i8* %x, i8* %y) {
765 ; CHECK-LABEL: ldrbs16_4:
766 ; CHECK: @ %bb.0: @ %entry
767 ; CHECK-NEXT: vldrb.s16 q0, [r0], #4
768 ; CHECK-NEXT: vstrh.16 q0, [r1]
771 %z = getelementptr inbounds i8, i8* %x, i32 4
772 %0 = bitcast i8* %x to <8 x i8>*
773 %1 = load <8 x i8>, <8 x i8>* %0, align 1
774 %2 = sext <8 x i8> %1 to <8 x i16>
775 %3 = bitcast i8* %y to <8 x i16>*
776 store <8 x i16> %2, <8 x i16>* %3, align 2
780 define i8* @ldrbs16_3(i8* %x, i8* %y) {
781 ; CHECK-LABEL: ldrbs16_3:
782 ; CHECK: @ %bb.0: @ %entry
783 ; CHECK-NEXT: vldrb.s16 q0, [r0], #3
784 ; CHECK-NEXT: vstrh.16 q0, [r1]
787 %z = getelementptr inbounds i8, i8* %x, i32 3
788 %0 = bitcast i8* %x to <8 x i8>*
789 %1 = load <8 x i8>, <8 x i8>* %0, align 1
790 %2 = sext <8 x i8> %1 to <8 x i16>
791 %3 = bitcast i8* %y to <8 x i16>*
792 store <8 x i16> %2, <8 x i16>* %3, align 2
796 define i8* @ldrbs16_127(i8* %x, i8* %y) {
797 ; CHECK-LABEL: ldrbs16_127:
798 ; CHECK: @ %bb.0: @ %entry
799 ; CHECK-NEXT: vldrb.s16 q0, [r0], #127
800 ; CHECK-NEXT: vstrh.16 q0, [r1]
803 %z = getelementptr inbounds i8, i8* %x, i32 127
804 %0 = bitcast i8* %x to <8 x i8>*
805 %1 = load <8 x i8>, <8 x i8>* %0, align 1
806 %2 = sext <8 x i8> %1 to <8 x i16>
807 %3 = bitcast i8* %y to <8 x i16>*
808 store <8 x i16> %2, <8 x i16>* %3, align 2
812 define i8* @ldrbs16_128(i8* %x, i8* %y) {
813 ; CHECK-LABEL: ldrbs16_128:
814 ; CHECK: @ %bb.0: @ %entry
815 ; CHECK-NEXT: vldrb.s16 q0, [r0]
816 ; CHECK-NEXT: adds r0, #128
817 ; CHECK-NEXT: vstrh.16 q0, [r1]
820 %z = getelementptr inbounds i8, i8* %x, i32 128
821 %0 = bitcast i8* %x to <8 x i8>*
822 %1 = load <8 x i8>, <8 x i8>* %0, align 1
823 %2 = sext <8 x i8> %1 to <8 x i16>
824 %3 = bitcast i8* %y to <8 x i16>*
825 store <8 x i16> %2, <8 x i16>* %3, align 2
829 define i8* @ldrbs16_m127(i8* %x, i8* %y) {
830 ; CHECK-LABEL: ldrbs16_m127:
831 ; CHECK: @ %bb.0: @ %entry
832 ; CHECK-NEXT: vldrb.s16 q0, [r0], #-127
833 ; CHECK-NEXT: vstrh.16 q0, [r1]
836 %z = getelementptr inbounds i8, i8* %x, i32 -127
837 %0 = bitcast i8* %x to <8 x i8>*
838 %1 = load <8 x i8>, <8 x i8>* %0, align 1
839 %2 = sext <8 x i8> %1 to <8 x i16>
840 %3 = bitcast i8* %y to <8 x i16>*
841 store <8 x i16> %2, <8 x i16>* %3, align 2
845 define i8* @ldrbs16_m128(i8* %x, i8* %y) {
846 ; CHECK-LABEL: ldrbs16_m128:
847 ; CHECK: @ %bb.0: @ %entry
848 ; CHECK-NEXT: vldrb.s16 q0, [r0]
849 ; CHECK-NEXT: subs r0, #128
850 ; CHECK-NEXT: vstrh.16 q0, [r1]
853 %z = getelementptr inbounds i8, i8* %x, i32 -128
854 %0 = bitcast i8* %x to <8 x i8>*
855 %1 = load <8 x i8>, <8 x i8>* %0, align 1
856 %2 = sext <8 x i8> %1 to <8 x i16>
857 %3 = bitcast i8* %y to <8 x i16>*
858 store <8 x i16> %2, <8 x i16>* %3, align 2
863 define i8* @ldrbu8_4(i8* %x, i8* %y) {
864 ; CHECK-LABEL: ldrbu8_4:
865 ; CHECK: @ %bb.0: @ %entry
866 ; CHECK-NEXT: vldrb.u8 q0, [r0], #4
867 ; CHECK-NEXT: vstrb.8 q0, [r1]
870 %z = getelementptr inbounds i8, i8* %x, i32 4
871 %0 = bitcast i8* %x to <16 x i8>*
872 %1 = load <16 x i8>, <16 x i8>* %0, align 1
873 %2 = bitcast i8* %y to <16 x i8>*
874 store <16 x i8> %1, <16 x i8>* %2, align 1
878 define i8* @ldrbu8_3(i8* %x, i8* %y) {
879 ; CHECK-LABEL: ldrbu8_3:
880 ; CHECK: @ %bb.0: @ %entry
881 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
882 ; CHECK-NEXT: vstrb.8 q0, [r1]
885 %z = getelementptr inbounds i8, i8* %x, i32 3
886 %0 = bitcast i8* %x to <16 x i8>*
887 %1 = load <16 x i8>, <16 x i8>* %0, align 1
888 %2 = bitcast i8* %y to <16 x i8>*
889 store <16 x i8> %1, <16 x i8>* %2, align 1
893 define i8* @ldrbu8_127(i8* %x, i8* %y) {
894 ; CHECK-LABEL: ldrbu8_127:
895 ; CHECK: @ %bb.0: @ %entry
896 ; CHECK-NEXT: vldrb.u8 q0, [r0], #127
897 ; CHECK-NEXT: vstrb.8 q0, [r1]
900 %z = getelementptr inbounds i8, i8* %x, i32 127
901 %0 = bitcast i8* %x to <16 x i8>*
902 %1 = load <16 x i8>, <16 x i8>* %0, align 1
903 %2 = bitcast i8* %y to <16 x i8>*
904 store <16 x i8> %1, <16 x i8>* %2, align 1
908 define i8* @ldrbu8_128(i8* %x, i8* %y) {
909 ; CHECK-LABEL: ldrbu8_128:
910 ; CHECK: @ %bb.0: @ %entry
911 ; CHECK-NEXT: vldrb.u8 q0, [r0]
912 ; CHECK-NEXT: adds r0, #128
913 ; CHECK-NEXT: vstrb.8 q0, [r1]
916 %z = getelementptr inbounds i8, i8* %x, i32 128
917 %0 = bitcast i8* %x to <16 x i8>*
918 %1 = load <16 x i8>, <16 x i8>* %0, align 1
919 %2 = bitcast i8* %y to <16 x i8>*
920 store <16 x i8> %1, <16 x i8>* %2, align 1
924 define i8* @ldrbu8_m127(i8* %x, i8* %y) {
925 ; CHECK-LABEL: ldrbu8_m127:
926 ; CHECK: @ %bb.0: @ %entry
927 ; CHECK-NEXT: vldrb.u8 q0, [r0], #-127
928 ; CHECK-NEXT: vstrb.8 q0, [r1]
931 %z = getelementptr inbounds i8, i8* %x, i32 -127
932 %0 = bitcast i8* %x to <16 x i8>*
933 %1 = load <16 x i8>, <16 x i8>* %0, align 1
934 %2 = bitcast i8* %y to <16 x i8>*
935 store <16 x i8> %1, <16 x i8>* %2, align 1
939 define i8* @ldrbu8_m128(i8* %x, i8* %y) {
940 ; CHECK-LABEL: ldrbu8_m128:
941 ; CHECK: @ %bb.0: @ %entry
942 ; CHECK-NEXT: vldrb.u8 q0, [r0]
943 ; CHECK-NEXT: subs r0, #128
944 ; CHECK-NEXT: vstrb.8 q0, [r1]
947 %z = getelementptr inbounds i8, i8* %x, i32 -128
948 %0 = bitcast i8* %x to <16 x i8>*
949 %1 = load <16 x i8>, <16 x i8>* %0, align 1
950 %2 = bitcast i8* %y to <16 x i8>*
951 store <16 x i8> %1, <16 x i8>* %2, align 1
956 define i8* @ldrwf32_4(i8* %x, i8* %y) {
957 ; CHECK-LABEL: ldrwf32_4:
958 ; CHECK: @ %bb.0: @ %entry
959 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
960 ; CHECK-NEXT: vstrw.32 q0, [r1]
963 %z = getelementptr inbounds i8, i8* %x, i32 4
964 %0 = bitcast i8* %x to <4 x float>*
965 %1 = load <4 x float>, <4 x float>* %0, align 4
966 %2 = bitcast i8* %y to <4 x float>*
967 store <4 x float> %1, <4 x float>* %2, align 4
971 define i8* @ldrwf16_4(i8* %x, i8* %y) {
972 ; CHECK-LABEL: ldrwf16_4:
973 ; CHECK: @ %bb.0: @ %entry
974 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
975 ; CHECK-NEXT: vstrh.16 q0, [r1]
978 %z = getelementptr inbounds i8, i8* %x, i32 4
979 %0 = bitcast i8* %x to <8 x half>*
980 %1 = load <8 x half>, <8 x half>* %0, align 2
981 %2 = bitcast i8* %y to <8 x half>*
982 store <8 x half> %1, <8 x half>* %2, align 2
986 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
987 ; CHECK-LE-LABEL: ldrwi32_align1:
988 ; CHECK-LE: @ %bb.0: @ %entry
989 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
990 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
991 ; CHECK-LE-NEXT: bx lr
993 ; CHECK-BE-LABEL: ldrwi32_align1:
994 ; CHECK-BE: @ %bb.0: @ %entry
995 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
996 ; CHECK-BE-NEXT: adds r0, #3
997 ; CHECK-BE-NEXT: vrev32.8 q0, q0
998 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
999 ; CHECK-BE-NEXT: bx lr
1001 %z = getelementptr inbounds i8, i8* %x, i32 3
1002 %0 = bitcast i8* %x to <4 x i32>*
1003 %1 = load <4 x i32>, <4 x i32>* %0, align 1
1004 %2 = bitcast i8* %y to <4 x i32>*
1005 store <4 x i32> %1, <4 x i32>* %2, align 4
1009 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
1010 ; CHECK-LE-LABEL: ldrhi16_align1:
1011 ; CHECK-LE: @ %bb.0: @ %entry
1012 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1013 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1014 ; CHECK-LE-NEXT: bx lr
1016 ; CHECK-BE-LABEL: ldrhi16_align1:
1017 ; CHECK-BE: @ %bb.0: @ %entry
1018 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
1019 ; CHECK-BE-NEXT: adds r0, #3
1020 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1021 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1022 ; CHECK-BE-NEXT: bx lr
1024 %z = getelementptr inbounds i8, i8* %x, i32 3
1025 %0 = bitcast i8* %x to <8 x i16>*
1026 %1 = load <8 x i16>, <8 x i16>* %0, align 1
1027 %2 = bitcast i8* %y to <8 x i16>*
1028 store <8 x i16> %1, <8 x i16>* %2, align 2
1032 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1033 ; CHECK-LABEL: ldrhi32_align1:
1034 ; CHECK: @ %bb.0: @ %entry
1035 ; CHECK-NEXT: .pad #8
1036 ; CHECK-NEXT: sub sp, #8
1037 ; CHECK-NEXT: ldr r3, [r0, #4]
1038 ; CHECK-NEXT: ldr r2, [r0]
1039 ; CHECK-NEXT: adds r0, #3
1040 ; CHECK-NEXT: strd r2, r3, [sp]
1041 ; CHECK-NEXT: mov r2, sp
1042 ; CHECK-NEXT: vldrh.s32 q0, [r2]
1043 ; CHECK-NEXT: vstrw.32 q0, [r1]
1044 ; CHECK-NEXT: add sp, #8
1047 %z = getelementptr inbounds i8, i8* %x, i32 3
1048 %0 = bitcast i8* %x to <4 x i16>*
1049 %1 = load <4 x i16>, <4 x i16>* %0, align 1
1050 %2 = bitcast i8* %y to <4 x i32>*
1051 %3 = sext <4 x i16> %1 to <4 x i32>
1052 store <4 x i32> %3, <4 x i32>* %2, align 4
1056 define i8* @ldrf32_align1(i8* %x, i8* %y) {
1057 ; CHECK-LE-LABEL: ldrf32_align1:
1058 ; CHECK-LE: @ %bb.0: @ %entry
1059 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1060 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
1061 ; CHECK-LE-NEXT: bx lr
1063 ; CHECK-BE-LABEL: ldrf32_align1:
1064 ; CHECK-BE: @ %bb.0: @ %entry
1065 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
1066 ; CHECK-BE-NEXT: adds r0, #3
1067 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1068 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
1069 ; CHECK-BE-NEXT: bx lr
1071 %z = getelementptr inbounds i8, i8* %x, i32 3
1072 %0 = bitcast i8* %x to <4 x float>*
1073 %1 = load <4 x float>, <4 x float>* %0, align 1
1074 %2 = bitcast i8* %y to <4 x float>*
1075 store <4 x float> %1, <4 x float>* %2, align 4
1079 define i8* @ldrf16_align1(i8* %x, i8* %y) {
1080 ; CHECK-LE-LABEL: ldrf16_align1:
1081 ; CHECK-LE: @ %bb.0: @ %entry
1082 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1083 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1084 ; CHECK-LE-NEXT: bx lr
1086 ; CHECK-BE-LABEL: ldrf16_align1:
1087 ; CHECK-BE: @ %bb.0: @ %entry
1088 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
1089 ; CHECK-BE-NEXT: adds r0, #3
1090 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1091 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1092 ; CHECK-BE-NEXT: bx lr
1094 %z = getelementptr inbounds i8, i8* %x, i32 3
1095 %0 = bitcast i8* %x to <8 x half>*
1096 %1 = load <8 x half>, <8 x half>* %0, align 1
1097 %2 = bitcast i8* %y to <8 x half>*
1098 store <8 x half> %1, <8 x half>* %2, align 2
1102 define i8* @ldrh16_align8(i8* %x, i8* %y) {
1103 ; CHECK-LE-LABEL: ldrh16_align8:
1104 ; CHECK-LE: @ %bb.0: @ %entry
1105 ; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4
1106 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1107 ; CHECK-LE-NEXT: bx lr
1109 ; CHECK-BE-LABEL: ldrh16_align8:
1110 ; CHECK-BE: @ %bb.0: @ %entry
1111 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4
1112 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1113 ; CHECK-BE-NEXT: bx lr
1115 %z = getelementptr inbounds i8, i8* %x, i32 4
1116 %0 = bitcast i8* %x to <8 x i16>*
1117 %1 = load <8 x i16>, <8 x i16>* %0, align 8
1118 %2 = bitcast i8* %y to <8 x i16>*
1119 store <8 x i16> %1, <8 x i16>* %2, align 2
1127 define i8* @strw32_4(i8* %y, i8* %x) {
1128 ; CHECK-LE-LABEL: strw32_4:
1129 ; CHECK-LE: @ %bb.0: @ %entry
1130 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1131 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1132 ; CHECK-LE-NEXT: bx lr
1134 ; CHECK-BE-LABEL: strw32_4:
1135 ; CHECK-BE: @ %bb.0: @ %entry
1136 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1137 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
1138 ; CHECK-BE-NEXT: bx lr
1140 %z = getelementptr inbounds i8, i8* %y, i32 4
1141 %0 = bitcast i8* %x to <4 x i32>*
1142 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1143 %2 = bitcast i8* %y to <4 x i32>*
1144 store <4 x i32> %1, <4 x i32>* %2, align 4
1148 define i8* @strw32_3(i8* %y, i8* %x) {
1149 ; CHECK-LE-LABEL: strw32_3:
1150 ; CHECK-LE: @ %bb.0: @ %entry
1151 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1152 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1153 ; CHECK-LE-NEXT: bx lr
1155 ; CHECK-BE-LABEL: strw32_3:
1156 ; CHECK-BE: @ %bb.0: @ %entry
1157 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1158 ; CHECK-BE-NEXT: vstrw.32 q0, [r0]
1159 ; CHECK-BE-NEXT: adds r0, #3
1160 ; CHECK-BE-NEXT: bx lr
1162 %z = getelementptr inbounds i8, i8* %y, i32 3
1163 %0 = bitcast i8* %x to <4 x i32>*
1164 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1165 %2 = bitcast i8* %y to <4 x i32>*
1166 store <4 x i32> %1, <4 x i32>* %2, align 4
1170 define i8* @strw32_m4(i8* %y, i8* %x) {
1171 ; CHECK-LE-LABEL: strw32_m4:
1172 ; CHECK-LE: @ %bb.0: @ %entry
1173 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1174 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4
1175 ; CHECK-LE-NEXT: bx lr
1177 ; CHECK-BE-LABEL: strw32_m4:
1178 ; CHECK-BE: @ %bb.0: @ %entry
1179 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1180 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4
1181 ; CHECK-BE-NEXT: bx lr
1183 %z = getelementptr inbounds i8, i8* %y, i32 -4
1184 %0 = bitcast i8* %x to <4 x i32>*
1185 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1186 %2 = bitcast i8* %y to <4 x i32>*
1187 store <4 x i32> %1, <4 x i32>* %2, align 4
1191 define i8* @strw32_508(i8* %y, i8* %x) {
1192 ; CHECK-LABEL: strw32_508:
1193 ; CHECK: @ %bb.0: @ %entry
1194 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1195 ; CHECK-NEXT: vstrw.32 q0, [r0], #508
1198 %z = getelementptr inbounds i8, i8* %y, i32 508
1199 %0 = bitcast i8* %x to <4 x i32>*
1200 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1201 %2 = bitcast i8* %y to <4 x i32>*
1202 store <4 x i32> %1, <4 x i32>* %2, align 4
1206 define i8* @strw32_512(i8* %y, i8* %x) {
1207 ; CHECK-LABEL: strw32_512:
1208 ; CHECK: @ %bb.0: @ %entry
1209 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1210 ; CHECK-NEXT: vstrw.32 q0, [r0]
1211 ; CHECK-NEXT: add.w r0, r0, #512
1214 %z = getelementptr inbounds i8, i8* %y, i32 512
1215 %0 = bitcast i8* %x to <4 x i32>*
1216 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1217 %2 = bitcast i8* %y to <4 x i32>*
1218 store <4 x i32> %1, <4 x i32>* %2, align 4
1222 define i8* @strw32_m508(i8* %y, i8* %x) {
1223 ; CHECK-LABEL: strw32_m508:
1224 ; CHECK: @ %bb.0: @ %entry
1225 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1226 ; CHECK-NEXT: vstrw.32 q0, [r0], #-508
1229 %z = getelementptr inbounds i8, i8* %y, i32 -508
1230 %0 = bitcast i8* %x to <4 x i32>*
1231 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1232 %2 = bitcast i8* %y to <4 x i32>*
1233 store <4 x i32> %1, <4 x i32>* %2, align 4
1237 define i8* @strw32_m512(i8* %y, i8* %x) {
1238 ; CHECK-LABEL: strw32_m512:
1239 ; CHECK: @ %bb.0: @ %entry
1240 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1241 ; CHECK-NEXT: vstrw.32 q0, [r0]
1242 ; CHECK-NEXT: sub.w r0, r0, #512
1245 %z = getelementptr inbounds i8, i8* %y, i32 -512
1246 %0 = bitcast i8* %x to <4 x i32>*
1247 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1248 %2 = bitcast i8* %y to <4 x i32>*
1249 store <4 x i32> %1, <4 x i32>* %2, align 4
1254 define i8* @strh32_4(i8* %y, i8* %x) {
1255 ; CHECK-LABEL: strh32_4:
1256 ; CHECK: @ %bb.0: @ %entry
1257 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1258 ; CHECK-NEXT: vstrh.32 q0, [r0], #4
1261 %z = getelementptr inbounds i8, i8* %y, i32 4
1262 %0 = bitcast i8* %x to <4 x i16>*
1263 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1264 %2 = bitcast i8* %y to <4 x i16>*
1265 store <4 x i16> %1, <4 x i16>* %2, align 2
1269 define i8* @strh32_3(i8* %y, i8* %x) {
1270 ; CHECK-LABEL: strh32_3:
1271 ; CHECK: @ %bb.0: @ %entry
1272 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1273 ; CHECK-NEXT: vstrh.32 q0, [r0]
1274 ; CHECK-NEXT: adds r0, #3
1277 %z = getelementptr inbounds i8, i8* %y, i32 3
1278 %0 = bitcast i8* %x to <4 x i16>*
1279 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1280 %2 = bitcast i8* %y to <4 x i16>*
1281 store <4 x i16> %1, <4 x i16>* %2, align 2
1285 define i8* @strh32_2(i8* %y, i8* %x) {
1286 ; CHECK-LABEL: strh32_2:
1287 ; CHECK: @ %bb.0: @ %entry
1288 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1289 ; CHECK-NEXT: vstrh.32 q0, [r0], #2
1292 %z = getelementptr inbounds i8, i8* %y, i32 2
1293 %0 = bitcast i8* %x to <4 x i16>*
1294 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1295 %2 = bitcast i8* %y to <4 x i16>*
1296 store <4 x i16> %1, <4 x i16>* %2, align 2
1300 define i8* @strh32_254(i8* %y, i8* %x) {
1301 ; CHECK-LABEL: strh32_254:
1302 ; CHECK: @ %bb.0: @ %entry
1303 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1304 ; CHECK-NEXT: vstrh.32 q0, [r0], #254
1307 %z = getelementptr inbounds i8, i8* %y, i32 254
1308 %0 = bitcast i8* %x to <4 x i16>*
1309 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1310 %2 = bitcast i8* %y to <4 x i16>*
1311 store <4 x i16> %1, <4 x i16>* %2, align 2
1315 define i8* @strh32_256(i8* %y, i8* %x) {
1316 ; CHECK-LABEL: strh32_256:
1317 ; CHECK: @ %bb.0: @ %entry
1318 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1319 ; CHECK-NEXT: vstrh.32 q0, [r0]
1320 ; CHECK-NEXT: add.w r0, r0, #256
1323 %z = getelementptr inbounds i8, i8* %y, i32 256
1324 %0 = bitcast i8* %x to <4 x i16>*
1325 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1326 %2 = bitcast i8* %y to <4 x i16>*
1327 store <4 x i16> %1, <4 x i16>* %2, align 2
1331 define i8* @strh32_m254(i8* %y, i8* %x) {
1332 ; CHECK-LABEL: strh32_m254:
1333 ; CHECK: @ %bb.0: @ %entry
1334 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1335 ; CHECK-NEXT: vstrh.32 q0, [r0], #-254
1338 %z = getelementptr inbounds i8, i8* %y, i32 -254
1339 %0 = bitcast i8* %x to <4 x i16>*
1340 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1341 %2 = bitcast i8* %y to <4 x i16>*
1342 store <4 x i16> %1, <4 x i16>* %2, align 2
1346 define i8* @strh32_m256(i8* %y, i8* %x) {
1347 ; CHECK-LABEL: strh32_m256:
1348 ; CHECK: @ %bb.0: @ %entry
1349 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1350 ; CHECK-NEXT: vstrh.32 q0, [r0]
1351 ; CHECK-NEXT: sub.w r0, r0, #256
1354 %z = getelementptr inbounds i8, i8* %y, i32 -256
1355 %0 = bitcast i8* %x to <4 x i16>*
1356 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1357 %2 = bitcast i8* %y to <4 x i16>*
1358 store <4 x i16> %1, <4 x i16>* %2, align 2
1363 define i8* @strh16_4(i8* %y, i8* %x) {
1364 ; CHECK-LE-LABEL: strh16_4:
1365 ; CHECK-LE: @ %bb.0: @ %entry
1366 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1367 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1368 ; CHECK-LE-NEXT: bx lr
1370 ; CHECK-BE-LABEL: strh16_4:
1371 ; CHECK-BE: @ %bb.0: @ %entry
1372 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1373 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
1374 ; CHECK-BE-NEXT: bx lr
1376 %z = getelementptr inbounds i8, i8* %y, i32 4
1377 %0 = bitcast i8* %x to <8 x i16>*
1378 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1379 %2 = bitcast i8* %y to <8 x i16>*
1380 store <8 x i16> %1, <8 x i16>* %2, align 2
1384 define i8* @strh16_3(i8* %y, i8* %x) {
1385 ; CHECK-LE-LABEL: strh16_3:
1386 ; CHECK-LE: @ %bb.0: @ %entry
1387 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1388 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1389 ; CHECK-LE-NEXT: bx lr
1391 ; CHECK-BE-LABEL: strh16_3:
1392 ; CHECK-BE: @ %bb.0: @ %entry
1393 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1394 ; CHECK-BE-NEXT: vstrh.16 q0, [r0]
1395 ; CHECK-BE-NEXT: adds r0, #3
1396 ; CHECK-BE-NEXT: bx lr
1398 %z = getelementptr inbounds i8, i8* %y, i32 3
1399 %0 = bitcast i8* %x to <8 x i16>*
1400 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1401 %2 = bitcast i8* %y to <8 x i16>*
1402 store <8 x i16> %1, <8 x i16>* %2, align 2
1406 define i8* @strh16_2(i8* %y, i8* %x) {
1407 ; CHECK-LE-LABEL: strh16_2:
1408 ; CHECK-LE: @ %bb.0: @ %entry
1409 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1410 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2
1411 ; CHECK-LE-NEXT: bx lr
1413 ; CHECK-BE-LABEL: strh16_2:
1414 ; CHECK-BE: @ %bb.0: @ %entry
1415 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1416 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2
1417 ; CHECK-BE-NEXT: bx lr
1419 %z = getelementptr inbounds i8, i8* %y, i32 2
1420 %0 = bitcast i8* %x to <8 x i16>*
1421 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1422 %2 = bitcast i8* %y to <8 x i16>*
1423 store <8 x i16> %1, <8 x i16>* %2, align 2
1427 define i8* @strh16_254(i8* %y, i8* %x) {
1428 ; CHECK-LABEL: strh16_254:
1429 ; CHECK: @ %bb.0: @ %entry
1430 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1431 ; CHECK-NEXT: vstrh.16 q0, [r0], #254
1434 %z = getelementptr inbounds i8, i8* %y, i32 254
1435 %0 = bitcast i8* %x to <8 x i16>*
1436 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1437 %2 = bitcast i8* %y to <8 x i16>*
1438 store <8 x i16> %1, <8 x i16>* %2, align 2
1442 define i8* @strh16_256(i8* %y, i8* %x) {
1443 ; CHECK-LABEL: strh16_256:
1444 ; CHECK: @ %bb.0: @ %entry
1445 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1446 ; CHECK-NEXT: vstrh.16 q0, [r0]
1447 ; CHECK-NEXT: add.w r0, r0, #256
1450 %z = getelementptr inbounds i8, i8* %y, i32 256
1451 %0 = bitcast i8* %x to <8 x i16>*
1452 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1453 %2 = bitcast i8* %y to <8 x i16>*
1454 store <8 x i16> %1, <8 x i16>* %2, align 2
1458 define i8* @strh16_m254(i8* %y, i8* %x) {
1459 ; CHECK-LABEL: strh16_m254:
1460 ; CHECK: @ %bb.0: @ %entry
1461 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1462 ; CHECK-NEXT: vstrh.16 q0, [r0], #-254
1465 %z = getelementptr inbounds i8, i8* %y, i32 -254
1466 %0 = bitcast i8* %x to <8 x i16>*
1467 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1468 %2 = bitcast i8* %y to <8 x i16>*
1469 store <8 x i16> %1, <8 x i16>* %2, align 2
1473 define i8* @strh16_m256(i8* %y, i8* %x) {
1474 ; CHECK-LABEL: strh16_m256:
1475 ; CHECK: @ %bb.0: @ %entry
1476 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1477 ; CHECK-NEXT: vstrh.16 q0, [r0]
1478 ; CHECK-NEXT: sub.w r0, r0, #256
1481 %z = getelementptr inbounds i8, i8* %y, i32 -256
1482 %0 = bitcast i8* %x to <8 x i16>*
1483 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1484 %2 = bitcast i8* %y to <8 x i16>*
1485 store <8 x i16> %1, <8 x i16>* %2, align 2
1490 define i8* @strb32_4(i8* %y, i8* %x) {
1491 ; CHECK-LABEL: strb32_4:
1492 ; CHECK: @ %bb.0: @ %entry
1493 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1494 ; CHECK-NEXT: vstrb.32 q0, [r0], #4
1497 %z = getelementptr inbounds i8, i8* %y, i32 4
1498 %0 = bitcast i8* %x to <4 x i8>*
1499 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1500 %2 = bitcast i8* %y to <4 x i8>*
1501 store <4 x i8> %1, <4 x i8>* %2, align 1
1505 define i8* @strb32_3(i8* %y, i8* %x) {
1506 ; CHECK-LABEL: strb32_3:
1507 ; CHECK: @ %bb.0: @ %entry
1508 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1509 ; CHECK-NEXT: vstrb.32 q0, [r0], #3
1512 %z = getelementptr inbounds i8, i8* %y, i32 3
1513 %0 = bitcast i8* %x to <4 x i8>*
1514 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1515 %2 = bitcast i8* %y to <4 x i8>*
1516 store <4 x i8> %1, <4 x i8>* %2, align 1
1520 define i8* @strb32_127(i8* %y, i8* %x) {
1521 ; CHECK-LABEL: strb32_127:
1522 ; CHECK: @ %bb.0: @ %entry
1523 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1524 ; CHECK-NEXT: vstrb.32 q0, [r0], #127
1527 %z = getelementptr inbounds i8, i8* %y, i32 127
1528 %0 = bitcast i8* %x to <4 x i8>*
1529 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1530 %2 = bitcast i8* %y to <4 x i8>*
1531 store <4 x i8> %1, <4 x i8>* %2, align 1
1535 define i8* @strb32_128(i8* %y, i8* %x) {
1536 ; CHECK-LABEL: strb32_128:
1537 ; CHECK: @ %bb.0: @ %entry
1538 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1539 ; CHECK-NEXT: vstrb.32 q0, [r0]
1540 ; CHECK-NEXT: adds r0, #128
1543 %z = getelementptr inbounds i8, i8* %y, i32 128
1544 %0 = bitcast i8* %x to <4 x i8>*
1545 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1546 %2 = bitcast i8* %y to <4 x i8>*
1547 store <4 x i8> %1, <4 x i8>* %2, align 1
1551 define i8* @strb32_m127(i8* %y, i8* %x) {
1552 ; CHECK-LABEL: strb32_m127:
1553 ; CHECK: @ %bb.0: @ %entry
1554 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1555 ; CHECK-NEXT: vstrb.32 q0, [r0], #-127
1558 %z = getelementptr inbounds i8, i8* %y, i32 -127
1559 %0 = bitcast i8* %x to <4 x i8>*
1560 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1561 %2 = bitcast i8* %y to <4 x i8>*
1562 store <4 x i8> %1, <4 x i8>* %2, align 1
1566 define i8* @strb32_m128(i8* %y, i8* %x) {
1567 ; CHECK-LABEL: strb32_m128:
1568 ; CHECK: @ %bb.0: @ %entry
1569 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1570 ; CHECK-NEXT: vstrb.32 q0, [r0]
1571 ; CHECK-NEXT: subs r0, #128
1574 %z = getelementptr inbounds i8, i8* %y, i32 -128
1575 %0 = bitcast i8* %x to <4 x i8>*
1576 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1577 %2 = bitcast i8* %y to <4 x i8>*
1578 store <4 x i8> %1, <4 x i8>* %2, align 1
1583 define i8* @strb16_4(i8* %y, i8* %x) {
1584 ; CHECK-LABEL: strb16_4:
1585 ; CHECK: @ %bb.0: @ %entry
1586 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1587 ; CHECK-NEXT: vstrb.16 q0, [r0], #4
1590 %z = getelementptr inbounds i8, i8* %y, i32 4
1591 %0 = bitcast i8* %x to <8 x i8>*
1592 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1593 %2 = bitcast i8* %y to <8 x i8>*
1594 store <8 x i8> %1, <8 x i8>* %2, align 1
1598 define i8* @strb16_3(i8* %y, i8* %x) {
1599 ; CHECK-LABEL: strb16_3:
1600 ; CHECK: @ %bb.0: @ %entry
1601 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1602 ; CHECK-NEXT: vstrb.16 q0, [r0], #3
1605 %z = getelementptr inbounds i8, i8* %y, i32 3
1606 %0 = bitcast i8* %x to <8 x i8>*
1607 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1608 %2 = bitcast i8* %y to <8 x i8>*
1609 store <8 x i8> %1, <8 x i8>* %2, align 1
1613 define i8* @strb16_127(i8* %y, i8* %x) {
1614 ; CHECK-LABEL: strb16_127:
1615 ; CHECK: @ %bb.0: @ %entry
1616 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1617 ; CHECK-NEXT: vstrb.16 q0, [r0], #127
1620 %z = getelementptr inbounds i8, i8* %y, i32 127
1621 %0 = bitcast i8* %x to <8 x i8>*
1622 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1623 %2 = bitcast i8* %y to <8 x i8>*
1624 store <8 x i8> %1, <8 x i8>* %2, align 1
1628 define i8* @strb16_128(i8* %y, i8* %x) {
1629 ; CHECK-LABEL: strb16_128:
1630 ; CHECK: @ %bb.0: @ %entry
1631 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1632 ; CHECK-NEXT: vstrb.16 q0, [r0]
1633 ; CHECK-NEXT: adds r0, #128
1636 %z = getelementptr inbounds i8, i8* %y, i32 128
1637 %0 = bitcast i8* %x to <8 x i8>*
1638 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1639 %2 = bitcast i8* %y to <8 x i8>*
1640 store <8 x i8> %1, <8 x i8>* %2, align 1
1644 define i8* @strb16_m127(i8* %y, i8* %x) {
1645 ; CHECK-LABEL: strb16_m127:
1646 ; CHECK: @ %bb.0: @ %entry
1647 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1648 ; CHECK-NEXT: vstrb.16 q0, [r0], #-127
1651 %z = getelementptr inbounds i8, i8* %y, i32 -127
1652 %0 = bitcast i8* %x to <8 x i8>*
1653 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1654 %2 = bitcast i8* %y to <8 x i8>*
1655 store <8 x i8> %1, <8 x i8>* %2, align 1
1659 define i8* @strb16_m128(i8* %y, i8* %x) {
1660 ; CHECK-LABEL: strb16_m128:
1661 ; CHECK: @ %bb.0: @ %entry
1662 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1663 ; CHECK-NEXT: vstrb.16 q0, [r0]
1664 ; CHECK-NEXT: subs r0, #128
1667 %z = getelementptr inbounds i8, i8* %y, i32 -128
1668 %0 = bitcast i8* %x to <8 x i8>*
1669 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1670 %2 = bitcast i8* %y to <8 x i8>*
1671 store <8 x i8> %1, <8 x i8>* %2, align 1
1676 define i8* @strb8_4(i8* %y, i8* %x) {
1677 ; CHECK-LABEL: strb8_4:
1678 ; CHECK: @ %bb.0: @ %entry
1679 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1680 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
1683 %z = getelementptr inbounds i8, i8* %y, i32 4
1684 %0 = bitcast i8* %x to <16 x i8>*
1685 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1686 %2 = bitcast i8* %y to <16 x i8>*
1687 store <16 x i8> %1, <16 x i8>* %2, align 1
1691 define i8* @strb8_3(i8* %y, i8* %x) {
1692 ; CHECK-LABEL: strb8_3:
1693 ; CHECK: @ %bb.0: @ %entry
1694 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1695 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1698 %z = getelementptr inbounds i8, i8* %y, i32 3
1699 %0 = bitcast i8* %x to <16 x i8>*
1700 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1701 %2 = bitcast i8* %y to <16 x i8>*
1702 store <16 x i8> %1, <16 x i8>* %2, align 1
1706 define i8* @strb8_127(i8* %y, i8* %x) {
1707 ; CHECK-LABEL: strb8_127:
1708 ; CHECK: @ %bb.0: @ %entry
1709 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1710 ; CHECK-NEXT: vstrb.8 q0, [r0], #127
1713 %z = getelementptr inbounds i8, i8* %y, i32 127
1714 %0 = bitcast i8* %x to <16 x i8>*
1715 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1716 %2 = bitcast i8* %y to <16 x i8>*
1717 store <16 x i8> %1, <16 x i8>* %2, align 1
1721 define i8* @strb8_128(i8* %y, i8* %x) {
1722 ; CHECK-LABEL: strb8_128:
1723 ; CHECK: @ %bb.0: @ %entry
1724 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1725 ; CHECK-NEXT: vstrb.8 q0, [r0]
1726 ; CHECK-NEXT: adds r0, #128
1729 %z = getelementptr inbounds i8, i8* %y, i32 128
1730 %0 = bitcast i8* %x to <16 x i8>*
1731 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1732 %2 = bitcast i8* %y to <16 x i8>*
1733 store <16 x i8> %1, <16 x i8>* %2, align 1
1737 define i8* @strb8_m127(i8* %y, i8* %x) {
1738 ; CHECK-LABEL: strb8_m127:
1739 ; CHECK: @ %bb.0: @ %entry
1740 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1741 ; CHECK-NEXT: vstrb.8 q0, [r0], #-127
1744 %z = getelementptr inbounds i8, i8* %y, i32 -127
1745 %0 = bitcast i8* %x to <16 x i8>*
1746 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1747 %2 = bitcast i8* %y to <16 x i8>*
1748 store <16 x i8> %1, <16 x i8>* %2, align 1
1752 define i8* @strb8_m128(i8* %y, i8* %x) {
1753 ; CHECK-LABEL: strb8_m128:
1754 ; CHECK: @ %bb.0: @ %entry
1755 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1756 ; CHECK-NEXT: vstrb.8 q0, [r0]
1757 ; CHECK-NEXT: subs r0, #128
1760 %z = getelementptr inbounds i8, i8* %y, i32 -128
1761 %0 = bitcast i8* %x to <16 x i8>*
1762 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1763 %2 = bitcast i8* %y to <16 x i8>*
1764 store <16 x i8> %1, <16 x i8>* %2, align 1
1769 define i8* @strf32_4(i8* %y, i8* %x) {
1770 ; CHECK-LE-LABEL: strf32_4:
1771 ; CHECK-LE: @ %bb.0: @ %entry
1772 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1773 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1774 ; CHECK-LE-NEXT: bx lr
1776 ; CHECK-BE-LABEL: strf32_4:
1777 ; CHECK-BE: @ %bb.0: @ %entry
1778 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1779 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
1780 ; CHECK-BE-NEXT: bx lr
1782 %z = getelementptr inbounds i8, i8* %y, i32 4
1783 %0 = bitcast i8* %x to <4 x float>*
1784 %1 = load <4 x float>, <4 x float>* %0, align 4
1785 %2 = bitcast i8* %y to <4 x float>*
1786 store <4 x float> %1, <4 x float>* %2, align 4
1790 define i8* @strf16_4(i8* %y, i8* %x) {
1791 ; CHECK-LE-LABEL: strf16_4:
1792 ; CHECK-LE: @ %bb.0: @ %entry
1793 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1794 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1795 ; CHECK-LE-NEXT: bx lr
1797 ; CHECK-BE-LABEL: strf16_4:
1798 ; CHECK-BE: @ %bb.0: @ %entry
1799 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1800 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
1801 ; CHECK-BE-NEXT: bx lr
1803 %z = getelementptr inbounds i8, i8* %y, i32 4
1804 %0 = bitcast i8* %x to <8 x half>*
1805 %1 = load <8 x half>, <8 x half>* %0, align 2
1806 %2 = bitcast i8* %y to <8 x half>*
1807 store <8 x half> %1, <8 x half>* %2, align 2
1811 define i8* @strwi32_align1(i8* %y, i8* %x) {
1812 ; CHECK-LE-LABEL: strwi32_align1:
1813 ; CHECK-LE: @ %bb.0: @ %entry
1814 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1815 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1816 ; CHECK-LE-NEXT: bx lr
1818 ; CHECK-BE-LABEL: strwi32_align1:
1819 ; CHECK-BE: @ %bb.0: @ %entry
1820 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1821 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1822 ; CHECK-BE-NEXT: vstrb.8 q0, [r0]
1823 ; CHECK-BE-NEXT: adds r0, #3
1824 ; CHECK-BE-NEXT: bx lr
1826 %z = getelementptr inbounds i8, i8* %y, i32 3
1827 %0 = bitcast i8* %x to <4 x i32>*
1828 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1829 %2 = bitcast i8* %y to <4 x i32>*
1830 store <4 x i32> %1, <4 x i32>* %2, align 1
1834 define i8* @strhi16_align1(i8* %y, i8* %x) {
1835 ; CHECK-LE-LABEL: strhi16_align1:
1836 ; CHECK-LE: @ %bb.0: @ %entry
1837 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1838 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1839 ; CHECK-LE-NEXT: bx lr
1841 ; CHECK-BE-LABEL: strhi16_align1:
1842 ; CHECK-BE: @ %bb.0: @ %entry
1843 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1844 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1845 ; CHECK-BE-NEXT: vstrb.8 q0, [r0]
1846 ; CHECK-BE-NEXT: adds r0, #3
1847 ; CHECK-BE-NEXT: bx lr
1849 %z = getelementptr inbounds i8, i8* %y, i32 3
1850 %0 = bitcast i8* %x to <8 x i16>*
1851 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1852 %2 = bitcast i8* %y to <8 x i16>*
1853 store <8 x i16> %1, <8 x i16>* %2, align 1
1857 define i8* @strhi32_align1(i8* %y, i8* %x) {
1858 ; CHECK-LABEL: strhi32_align1:
1859 ; CHECK: @ %bb.0: @ %entry
1860 ; CHECK-NEXT: .pad #8
1861 ; CHECK-NEXT: sub sp, #8
1862 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1863 ; CHECK-NEXT: mov r1, sp
1864 ; CHECK-NEXT: vstrh.32 q0, [r1]
1865 ; CHECK-NEXT: ldrd r1, r2, [sp]
1866 ; CHECK-NEXT: str r1, [r0]
1867 ; CHECK-NEXT: str r2, [r0, #4]
1868 ; CHECK-NEXT: adds r0, #3
1869 ; CHECK-NEXT: add sp, #8
1872 %z = getelementptr inbounds i8, i8* %y, i32 3
1873 %0 = bitcast i8* %x to <4 x i32>*
1874 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1875 %2 = bitcast i8* %y to <4 x i16>*
1876 %3 = trunc <4 x i32> %1 to <4 x i16>
1877 store <4 x i16> %3, <4 x i16>* %2, align 1
1881 define i8* @strf32_align1(i8* %y, i8* %x) {
1882 ; CHECK-LE-LABEL: strf32_align1:
1883 ; CHECK-LE: @ %bb.0: @ %entry
1884 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1885 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1886 ; CHECK-LE-NEXT: bx lr
1888 ; CHECK-BE-LABEL: strf32_align1:
1889 ; CHECK-BE: @ %bb.0: @ %entry
1890 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1891 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1892 ; CHECK-BE-NEXT: vstrb.8 q0, [r0]
1893 ; CHECK-BE-NEXT: adds r0, #3
1894 ; CHECK-BE-NEXT: bx lr
1896 %z = getelementptr inbounds i8, i8* %y, i32 3
1897 %0 = bitcast i8* %x to <4 x float>*
1898 %1 = load <4 x float>, <4 x float>* %0, align 4
1899 %2 = bitcast i8* %y to <4 x float>*
1900 store <4 x float> %1, <4 x float>* %2, align 1
1904 define i8* @strf16_align1(i8* %y, i8* %x) {
1905 ; CHECK-LE-LABEL: strf16_align1:
1906 ; CHECK-LE: @ %bb.0: @ %entry
1907 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1908 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1909 ; CHECK-LE-NEXT: bx lr
1911 ; CHECK-BE-LABEL: strf16_align1:
1912 ; CHECK-BE: @ %bb.0: @ %entry
1913 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1914 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1915 ; CHECK-BE-NEXT: vstrb.8 q0, [r0]
1916 ; CHECK-BE-NEXT: adds r0, #3
1917 ; CHECK-BE-NEXT: bx lr
1919 %z = getelementptr inbounds i8, i8* %y, i32 3
1920 %0 = bitcast i8* %x to <8 x half>*
1921 %1 = load <8 x half>, <8 x half>* %0, align 2
1922 %2 = bitcast i8* %y to <8 x half>*
1923 store <8 x half> %1, <8 x half>* %2, align 1
1927 define i8* @strf16_align8(i8* %y, i8* %x) {
1928 ; CHECK-LE-LABEL: strf16_align8:
1929 ; CHECK-LE: @ %bb.0: @ %entry
1930 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1931 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16
1932 ; CHECK-LE-NEXT: bx lr
1934 ; CHECK-BE-LABEL: strf16_align8:
1935 ; CHECK-BE: @ %bb.0: @ %entry
1936 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1937 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16
1938 ; CHECK-BE-NEXT: bx lr
1940 %z = getelementptr inbounds i8, i8* %y, i32 16
1941 %0 = bitcast i8* %x to <8 x i16>*
1942 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1943 %2 = bitcast i8* %y to <8 x i16>*
1944 store <8 x i16> %1, <8 x i16>* %2, align 8