1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define i8* @ldrwu32_4(i8* %x, i8* %y) {
5 ; CHECK-LABEL: ldrwu32_4:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
8 ; CHECK-NEXT: vstrw.32 q0, [r1]
11 %z = getelementptr inbounds i8, i8* %x, i32 4
12 %0 = bitcast i8* %x to <4 x i32>*
13 %1 = load <4 x i32>, <4 x i32>* %0, align 4
14 %2 = bitcast i8* %y to <4 x i32>*
15 store <4 x i32> %1, <4 x i32>* %2, align 4
19 define i8* @ldrwu32_3(i8* %x, i8* %y) {
20 ; CHECK-LABEL: ldrwu32_3:
21 ; CHECK: @ %bb.0: @ %entry
22 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
23 ; CHECK-NEXT: vstrw.32 q0, [r1]
26 %z = getelementptr inbounds i8, i8* %x, i32 3
27 %0 = bitcast i8* %x to <4 x i32>*
28 %1 = load <4 x i32>, <4 x i32>* %0, align 4
29 %2 = bitcast i8* %y to <4 x i32>*
30 store <4 x i32> %1, <4 x i32>* %2, align 4
34 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
35 ; CHECK-LABEL: ldrwu32_m4:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-4
38 ; CHECK-NEXT: vstrw.32 q0, [r1]
41 %z = getelementptr inbounds i8, i8* %x, i32 -4
42 %0 = bitcast i8* %x to <4 x i32>*
43 %1 = load <4 x i32>, <4 x i32>* %0, align 4
44 %2 = bitcast i8* %y to <4 x i32>*
45 store <4 x i32> %1, <4 x i32>* %2, align 4
49 define i8* @ldrwu32_508(i8* %x, i8* %y) {
50 ; CHECK-LABEL: ldrwu32_508:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vldrw.u32 q0, [r0], #508
53 ; CHECK-NEXT: vstrw.32 q0, [r1]
56 %z = getelementptr inbounds i8, i8* %x, i32 508
57 %0 = bitcast i8* %x to <4 x i32>*
58 %1 = load <4 x i32>, <4 x i32>* %0, align 4
59 %2 = bitcast i8* %y to <4 x i32>*
60 store <4 x i32> %1, <4 x i32>* %2, align 4
64 define i8* @ldrwu32_512(i8* %x, i8* %y) {
65 ; CHECK-LABEL: ldrwu32_512:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vldrw.u32 q0, [r0]
68 ; CHECK-NEXT: add.w r0, r0, #512
69 ; CHECK-NEXT: vstrw.32 q0, [r1]
72 %z = getelementptr inbounds i8, i8* %x, i32 512
73 %0 = bitcast i8* %x to <4 x i32>*
74 %1 = load <4 x i32>, <4 x i32>* %0, align 4
75 %2 = bitcast i8* %y to <4 x i32>*
76 store <4 x i32> %1, <4 x i32>* %2, align 4
80 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
81 ; CHECK-LABEL: ldrwu32_m508:
82 ; CHECK: @ %bb.0: @ %entry
83 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-508
84 ; CHECK-NEXT: vstrw.32 q0, [r1]
87 %z = getelementptr inbounds i8, i8* %x, i32 -508
88 %0 = bitcast i8* %x to <4 x i32>*
89 %1 = load <4 x i32>, <4 x i32>* %0, align 4
90 %2 = bitcast i8* %y to <4 x i32>*
91 store <4 x i32> %1, <4 x i32>* %2, align 4
95 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
96 ; CHECK-LABEL: ldrwu32_m512:
97 ; CHECK: @ %bb.0: @ %entry
98 ; CHECK-NEXT: vldrw.u32 q0, [r0]
99 ; CHECK-NEXT: sub.w r0, r0, #512
100 ; CHECK-NEXT: vstrw.32 q0, [r1]
103 %z = getelementptr inbounds i8, i8* %x, i32 -512
104 %0 = bitcast i8* %x to <4 x i32>*
105 %1 = load <4 x i32>, <4 x i32>* %0, align 4
106 %2 = bitcast i8* %y to <4 x i32>*
107 store <4 x i32> %1, <4 x i32>* %2, align 4
112 define i8* @ldrhu32_4(i8* %x, i8* %y) {
113 ; CHECK-LABEL: ldrhu32_4:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: vldrh.u32 q0, [r0], #4
116 ; CHECK-NEXT: vstrw.32 q0, [r1]
119 %z = getelementptr inbounds i8, i8* %x, i32 4
120 %0 = bitcast i8* %x to <4 x i16>*
121 %1 = load <4 x i16>, <4 x i16>* %0, align 2
122 %2 = zext <4 x i16> %1 to <4 x i32>
123 %3 = bitcast i8* %y to <4 x i32>*
124 store <4 x i32> %2, <4 x i32>* %3, align 4
128 define i8* @ldrhu32_3(i8* %x, i8* %y) {
129 ; CHECK-LABEL: ldrhu32_3:
130 ; CHECK: @ %bb.0: @ %entry
131 ; CHECK-NEXT: vldrh.u32 q0, [r0]
132 ; CHECK-NEXT: adds r0, #3
133 ; CHECK-NEXT: vstrw.32 q0, [r1]
136 %z = getelementptr inbounds i8, i8* %x, i32 3
137 %0 = bitcast i8* %x to <4 x i16>*
138 %1 = load <4 x i16>, <4 x i16>* %0, align 2
139 %2 = zext <4 x i16> %1 to <4 x i32>
140 %3 = bitcast i8* %y to <4 x i32>*
141 store <4 x i32> %2, <4 x i32>* %3, align 4
145 define i8* @ldrhu32_2(i8* %x, i8* %y) {
146 ; CHECK-LABEL: ldrhu32_2:
147 ; CHECK: @ %bb.0: @ %entry
148 ; CHECK-NEXT: vldrh.u32 q0, [r0], #2
149 ; CHECK-NEXT: vstrw.32 q0, [r1]
152 %z = getelementptr inbounds i8, i8* %x, i32 2
153 %0 = bitcast i8* %x to <4 x i16>*
154 %1 = load <4 x i16>, <4 x i16>* %0, align 2
155 %2 = zext <4 x i16> %1 to <4 x i32>
156 %3 = bitcast i8* %y to <4 x i32>*
157 store <4 x i32> %2, <4 x i32>* %3, align 4
161 define i8* @ldrhu32_254(i8* %x, i8* %y) {
162 ; CHECK-LABEL: ldrhu32_254:
163 ; CHECK: @ %bb.0: @ %entry
164 ; CHECK-NEXT: vldrh.u32 q0, [r0], #254
165 ; CHECK-NEXT: vstrw.32 q0, [r1]
168 %z = getelementptr inbounds i8, i8* %x, i32 254
169 %0 = bitcast i8* %x to <4 x i16>*
170 %1 = load <4 x i16>, <4 x i16>* %0, align 2
171 %2 = zext <4 x i16> %1 to <4 x i32>
172 %3 = bitcast i8* %y to <4 x i32>*
173 store <4 x i32> %2, <4 x i32>* %3, align 4
177 define i8* @ldrhu32_256(i8* %x, i8* %y) {
178 ; CHECK-LABEL: ldrhu32_256:
179 ; CHECK: @ %bb.0: @ %entry
180 ; CHECK-NEXT: vldrh.u32 q0, [r0]
181 ; CHECK-NEXT: add.w r0, r0, #256
182 ; CHECK-NEXT: vstrw.32 q0, [r1]
185 %z = getelementptr inbounds i8, i8* %x, i32 256
186 %0 = bitcast i8* %x to <4 x i16>*
187 %1 = load <4 x i16>, <4 x i16>* %0, align 2
188 %2 = zext <4 x i16> %1 to <4 x i32>
189 %3 = bitcast i8* %y to <4 x i32>*
190 store <4 x i32> %2, <4 x i32>* %3, align 4
195 define i8* @ldrhs32_4(i8* %x, i8* %y) {
196 ; CHECK-LABEL: ldrhs32_4:
197 ; CHECK: @ %bb.0: @ %entry
198 ; CHECK-NEXT: vldrh.s32 q0, [r0], #4
199 ; CHECK-NEXT: vstrw.32 q0, [r1]
202 %z = getelementptr inbounds i8, i8* %x, i32 4
203 %0 = bitcast i8* %x to <4 x i16>*
204 %1 = load <4 x i16>, <4 x i16>* %0, align 2
205 %2 = sext <4 x i16> %1 to <4 x i32>
206 %3 = bitcast i8* %y to <4 x i32>*
207 store <4 x i32> %2, <4 x i32>* %3, align 4
211 define i8* @ldrhs32_3(i8* %x, i8* %y) {
212 ; CHECK-LABEL: ldrhs32_3:
213 ; CHECK: @ %bb.0: @ %entry
214 ; CHECK-NEXT: vldrh.s32 q0, [r0]
215 ; CHECK-NEXT: adds r0, #3
216 ; CHECK-NEXT: vstrw.32 q0, [r1]
219 %z = getelementptr inbounds i8, i8* %x, i32 3
220 %0 = bitcast i8* %x to <4 x i16>*
221 %1 = load <4 x i16>, <4 x i16>* %0, align 2
222 %2 = sext <4 x i16> %1 to <4 x i32>
223 %3 = bitcast i8* %y to <4 x i32>*
224 store <4 x i32> %2, <4 x i32>* %3, align 4
228 define i8* @ldrhs32_2(i8* %x, i8* %y) {
229 ; CHECK-LABEL: ldrhs32_2:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vldrh.s32 q0, [r0], #2
232 ; CHECK-NEXT: vstrw.32 q0, [r1]
235 %z = getelementptr inbounds i8, i8* %x, i32 2
236 %0 = bitcast i8* %x to <4 x i16>*
237 %1 = load <4 x i16>, <4 x i16>* %0, align 2
238 %2 = sext <4 x i16> %1 to <4 x i32>
239 %3 = bitcast i8* %y to <4 x i32>*
240 store <4 x i32> %2, <4 x i32>* %3, align 4
244 define i8* @ldrhs32_254(i8* %x, i8* %y) {
245 ; CHECK-LABEL: ldrhs32_254:
246 ; CHECK: @ %bb.0: @ %entry
247 ; CHECK-NEXT: vldrh.s32 q0, [r0], #254
248 ; CHECK-NEXT: vstrw.32 q0, [r1]
251 %z = getelementptr inbounds i8, i8* %x, i32 254
252 %0 = bitcast i8* %x to <4 x i16>*
253 %1 = load <4 x i16>, <4 x i16>* %0, align 2
254 %2 = sext <4 x i16> %1 to <4 x i32>
255 %3 = bitcast i8* %y to <4 x i32>*
256 store <4 x i32> %2, <4 x i32>* %3, align 4
260 define i8* @ldrhs32_256(i8* %x, i8* %y) {
261 ; CHECK-LABEL: ldrhs32_256:
262 ; CHECK: @ %bb.0: @ %entry
263 ; CHECK-NEXT: vldrh.s32 q0, [r0]
264 ; CHECK-NEXT: add.w r0, r0, #256
265 ; CHECK-NEXT: vstrw.32 q0, [r1]
268 %z = getelementptr inbounds i8, i8* %x, i32 256
269 %0 = bitcast i8* %x to <4 x i16>*
270 %1 = load <4 x i16>, <4 x i16>* %0, align 2
271 %2 = sext <4 x i16> %1 to <4 x i32>
272 %3 = bitcast i8* %y to <4 x i32>*
273 store <4 x i32> %2, <4 x i32>* %3, align 4
278 define i8* @ldrhu16_4(i8* %x, i8* %y) {
279 ; CHECK-LABEL: ldrhu16_4:
280 ; CHECK: @ %bb.0: @ %entry
281 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
282 ; CHECK-NEXT: vstrh.16 q0, [r1]
285 %z = getelementptr inbounds i8, i8* %x, i32 4
286 %0 = bitcast i8* %x to <8 x i16>*
287 %1 = load <8 x i16>, <8 x i16>* %0, align 2
288 %2 = bitcast i8* %y to <8 x i16>*
289 store <8 x i16> %1, <8 x i16>* %2, align 2
293 define i8* @ldrhu16_3(i8* %x, i8* %y) {
294 ; CHECK-LABEL: ldrhu16_3:
295 ; CHECK: @ %bb.0: @ %entry
296 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
297 ; CHECK-NEXT: vstrh.16 q0, [r1]
300 %z = getelementptr inbounds i8, i8* %x, i32 3
301 %0 = bitcast i8* %x to <8 x i16>*
302 %1 = load <8 x i16>, <8 x i16>* %0, align 2
303 %2 = bitcast i8* %y to <8 x i16>*
304 store <8 x i16> %1, <8 x i16>* %2, align 2
308 define i8* @ldrhu16_2(i8* %x, i8* %y) {
309 ; CHECK-LABEL: ldrhu16_2:
310 ; CHECK: @ %bb.0: @ %entry
311 ; CHECK-NEXT: vldrh.u16 q0, [r0], #2
312 ; CHECK-NEXT: vstrh.16 q0, [r1]
315 %z = getelementptr inbounds i8, i8* %x, i32 2
316 %0 = bitcast i8* %x to <8 x i16>*
317 %1 = load <8 x i16>, <8 x i16>* %0, align 2
318 %2 = bitcast i8* %y to <8 x i16>*
319 store <8 x i16> %1, <8 x i16>* %2, align 2
323 define i8* @ldrhu16_254(i8* %x, i8* %y) {
324 ; CHECK-LABEL: ldrhu16_254:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vldrh.u16 q0, [r0], #254
327 ; CHECK-NEXT: vstrh.16 q0, [r1]
330 %z = getelementptr inbounds i8, i8* %x, i32 254
331 %0 = bitcast i8* %x to <8 x i16>*
332 %1 = load <8 x i16>, <8 x i16>* %0, align 2
333 %2 = bitcast i8* %y to <8 x i16>*
334 store <8 x i16> %1, <8 x i16>* %2, align 2
338 define i8* @ldrhu16_256(i8* %x, i8* %y) {
339 ; CHECK-LABEL: ldrhu16_256:
340 ; CHECK: @ %bb.0: @ %entry
341 ; CHECK-NEXT: vldrh.u16 q0, [r0]
342 ; CHECK-NEXT: add.w r0, r0, #256
343 ; CHECK-NEXT: vstrh.16 q0, [r1]
346 %z = getelementptr inbounds i8, i8* %x, i32 256
347 %0 = bitcast i8* %x to <8 x i16>*
348 %1 = load <8 x i16>, <8 x i16>* %0, align 2
349 %2 = bitcast i8* %y to <8 x i16>*
350 store <8 x i16> %1, <8 x i16>* %2, align 2
355 define i8* @ldrbu32_4(i8* %x, i8* %y) {
356 ; CHECK-LABEL: ldrbu32_4:
357 ; CHECK: @ %bb.0: @ %entry
358 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4
359 ; CHECK-NEXT: vstrw.32 q0, [r1]
362 %z = getelementptr inbounds i8, i8* %x, i32 4
363 %0 = bitcast i8* %x to <4 x i8>*
364 %1 = load <4 x i8>, <4 x i8>* %0, align 1
365 %2 = zext <4 x i8> %1 to <4 x i32>
366 %3 = bitcast i8* %y to <4 x i32>*
367 store <4 x i32> %2, <4 x i32>* %3, align 4
371 define i8* @ldrbu32_3(i8* %x, i8* %y) {
372 ; CHECK-LABEL: ldrbu32_3:
373 ; CHECK: @ %bb.0: @ %entry
374 ; CHECK-NEXT: vldrb.u32 q0, [r0], #3
375 ; CHECK-NEXT: vstrw.32 q0, [r1]
378 %z = getelementptr inbounds i8, i8* %x, i32 3
379 %0 = bitcast i8* %x to <4 x i8>*
380 %1 = load <4 x i8>, <4 x i8>* %0, align 1
381 %2 = zext <4 x i8> %1 to <4 x i32>
382 %3 = bitcast i8* %y to <4 x i32>*
383 store <4 x i32> %2, <4 x i32>* %3, align 4
387 define i8* @ldrbu32_127(i8* %x, i8* %y) {
388 ; CHECK-LABEL: ldrbu32_127:
389 ; CHECK: @ %bb.0: @ %entry
390 ; CHECK-NEXT: vldrb.u32 q0, [r0], #127
391 ; CHECK-NEXT: vstrw.32 q0, [r1]
394 %z = getelementptr inbounds i8, i8* %x, i32 127
395 %0 = bitcast i8* %x to <4 x i8>*
396 %1 = load <4 x i8>, <4 x i8>* %0, align 1
397 %2 = zext <4 x i8> %1 to <4 x i32>
398 %3 = bitcast i8* %y to <4 x i32>*
399 store <4 x i32> %2, <4 x i32>* %3, align 4
403 define i8* @ldrbu32_128(i8* %x, i8* %y) {
404 ; CHECK-LABEL: ldrbu32_128:
405 ; CHECK: @ %bb.0: @ %entry
406 ; CHECK-NEXT: vldrb.u32 q0, [r0]
407 ; CHECK-NEXT: adds r0, #128
408 ; CHECK-NEXT: vstrw.32 q0, [r1]
411 %z = getelementptr inbounds i8, i8* %x, i32 128
412 %0 = bitcast i8* %x to <4 x i8>*
413 %1 = load <4 x i8>, <4 x i8>* %0, align 1
414 %2 = zext <4 x i8> %1 to <4 x i32>
415 %3 = bitcast i8* %y to <4 x i32>*
416 store <4 x i32> %2, <4 x i32>* %3, align 4
421 define i8* @ldrbs32_4(i8* %x, i8* %y) {
422 ; CHECK-LABEL: ldrbs32_4:
423 ; CHECK: @ %bb.0: @ %entry
424 ; CHECK-NEXT: vldrb.s32 q0, [r0], #4
425 ; CHECK-NEXT: vstrw.32 q0, [r1]
428 %z = getelementptr inbounds i8, i8* %x, i32 4
429 %0 = bitcast i8* %x to <4 x i8>*
430 %1 = load <4 x i8>, <4 x i8>* %0, align 1
431 %2 = sext <4 x i8> %1 to <4 x i32>
432 %3 = bitcast i8* %y to <4 x i32>*
433 store <4 x i32> %2, <4 x i32>* %3, align 4
437 define i8* @ldrbs32_3(i8* %x, i8* %y) {
438 ; CHECK-LABEL: ldrbs32_3:
439 ; CHECK: @ %bb.0: @ %entry
440 ; CHECK-NEXT: vldrb.s32 q0, [r0], #3
441 ; CHECK-NEXT: vstrw.32 q0, [r1]
444 %z = getelementptr inbounds i8, i8* %x, i32 3
445 %0 = bitcast i8* %x to <4 x i8>*
446 %1 = load <4 x i8>, <4 x i8>* %0, align 1
447 %2 = sext <4 x i8> %1 to <4 x i32>
448 %3 = bitcast i8* %y to <4 x i32>*
449 store <4 x i32> %2, <4 x i32>* %3, align 4
453 define i8* @ldrbs32_127(i8* %x, i8* %y) {
454 ; CHECK-LABEL: ldrbs32_127:
455 ; CHECK: @ %bb.0: @ %entry
456 ; CHECK-NEXT: vldrb.s32 q0, [r0], #127
457 ; CHECK-NEXT: vstrw.32 q0, [r1]
460 %z = getelementptr inbounds i8, i8* %x, i32 127
461 %0 = bitcast i8* %x to <4 x i8>*
462 %1 = load <4 x i8>, <4 x i8>* %0, align 1
463 %2 = sext <4 x i8> %1 to <4 x i32>
464 %3 = bitcast i8* %y to <4 x i32>*
465 store <4 x i32> %2, <4 x i32>* %3, align 4
469 define i8* @ldrbs32_128(i8* %x, i8* %y) {
470 ; CHECK-LABEL: ldrbs32_128:
471 ; CHECK: @ %bb.0: @ %entry
472 ; CHECK-NEXT: vldrb.s32 q0, [r0]
473 ; CHECK-NEXT: adds r0, #128
474 ; CHECK-NEXT: vstrw.32 q0, [r1]
477 %z = getelementptr inbounds i8, i8* %x, i32 128
478 %0 = bitcast i8* %x to <4 x i8>*
479 %1 = load <4 x i8>, <4 x i8>* %0, align 1
480 %2 = sext <4 x i8> %1 to <4 x i32>
481 %3 = bitcast i8* %y to <4 x i32>*
482 store <4 x i32> %2, <4 x i32>* %3, align 4
487 define i8* @ldrbu16_4(i8* %x, i8* %y) {
488 ; CHECK-LABEL: ldrbu16_4:
489 ; CHECK: @ %bb.0: @ %entry
490 ; CHECK-NEXT: vldrb.u16 q0, [r0], #4
491 ; CHECK-NEXT: vstrh.16 q0, [r1]
494 %z = getelementptr inbounds i8, i8* %x, i32 4
495 %0 = bitcast i8* %x to <8 x i8>*
496 %1 = load <8 x i8>, <8 x i8>* %0, align 1
497 %2 = zext <8 x i8> %1 to <8 x i16>
498 %3 = bitcast i8* %y to <8 x i16>*
499 store <8 x i16> %2, <8 x i16>* %3, align 2
503 define i8* @ldrbu16_3(i8* %x, i8* %y) {
504 ; CHECK-LABEL: ldrbu16_3:
505 ; CHECK: @ %bb.0: @ %entry
506 ; CHECK-NEXT: vldrb.u16 q0, [r0], #3
507 ; CHECK-NEXT: vstrh.16 q0, [r1]
510 %z = getelementptr inbounds i8, i8* %x, i32 3
511 %0 = bitcast i8* %x to <8 x i8>*
512 %1 = load <8 x i8>, <8 x i8>* %0, align 1
513 %2 = zext <8 x i8> %1 to <8 x i16>
514 %3 = bitcast i8* %y to <8 x i16>*
515 store <8 x i16> %2, <8 x i16>* %3, align 2
519 define i8* @ldrbu16_127(i8* %x, i8* %y) {
520 ; CHECK-LABEL: ldrbu16_127:
521 ; CHECK: @ %bb.0: @ %entry
522 ; CHECK-NEXT: vldrb.u16 q0, [r0], #127
523 ; CHECK-NEXT: vstrh.16 q0, [r1]
526 %z = getelementptr inbounds i8, i8* %x, i32 127
527 %0 = bitcast i8* %x to <8 x i8>*
528 %1 = load <8 x i8>, <8 x i8>* %0, align 1
529 %2 = zext <8 x i8> %1 to <8 x i16>
530 %3 = bitcast i8* %y to <8 x i16>*
531 store <8 x i16> %2, <8 x i16>* %3, align 2
535 define i8* @ldrbu16_128(i8* %x, i8* %y) {
536 ; CHECK-LABEL: ldrbu16_128:
537 ; CHECK: @ %bb.0: @ %entry
538 ; CHECK-NEXT: vldrb.u16 q0, [r0]
539 ; CHECK-NEXT: adds r0, #128
540 ; CHECK-NEXT: vstrh.16 q0, [r1]
543 %z = getelementptr inbounds i8, i8* %x, i32 128
544 %0 = bitcast i8* %x to <8 x i8>*
545 %1 = load <8 x i8>, <8 x i8>* %0, align 1
546 %2 = zext <8 x i8> %1 to <8 x i16>
547 %3 = bitcast i8* %y to <8 x i16>*
548 store <8 x i16> %2, <8 x i16>* %3, align 2
553 define i8* @ldrbs16_4(i8* %x, i8* %y) {
554 ; CHECK-LABEL: ldrbs16_4:
555 ; CHECK: @ %bb.0: @ %entry
556 ; CHECK-NEXT: vldrb.s16 q0, [r0], #4
557 ; CHECK-NEXT: vstrh.16 q0, [r1]
560 %z = getelementptr inbounds i8, i8* %x, i32 4
561 %0 = bitcast i8* %x to <8 x i8>*
562 %1 = load <8 x i8>, <8 x i8>* %0, align 1
563 %2 = sext <8 x i8> %1 to <8 x i16>
564 %3 = bitcast i8* %y to <8 x i16>*
565 store <8 x i16> %2, <8 x i16>* %3, align 2
569 define i8* @ldrbs16_3(i8* %x, i8* %y) {
570 ; CHECK-LABEL: ldrbs16_3:
571 ; CHECK: @ %bb.0: @ %entry
572 ; CHECK-NEXT: vldrb.s16 q0, [r0], #3
573 ; CHECK-NEXT: vstrh.16 q0, [r1]
576 %z = getelementptr inbounds i8, i8* %x, i32 3
577 %0 = bitcast i8* %x to <8 x i8>*
578 %1 = load <8 x i8>, <8 x i8>* %0, align 1
579 %2 = sext <8 x i8> %1 to <8 x i16>
580 %3 = bitcast i8* %y to <8 x i16>*
581 store <8 x i16> %2, <8 x i16>* %3, align 2
585 define i8* @ldrbs16_127(i8* %x, i8* %y) {
586 ; CHECK-LABEL: ldrbs16_127:
587 ; CHECK: @ %bb.0: @ %entry
588 ; CHECK-NEXT: vldrb.s16 q0, [r0], #127
589 ; CHECK-NEXT: vstrh.16 q0, [r1]
592 %z = getelementptr inbounds i8, i8* %x, i32 127
593 %0 = bitcast i8* %x to <8 x i8>*
594 %1 = load <8 x i8>, <8 x i8>* %0, align 1
595 %2 = sext <8 x i8> %1 to <8 x i16>
596 %3 = bitcast i8* %y to <8 x i16>*
597 store <8 x i16> %2, <8 x i16>* %3, align 2
601 define i8* @ldrbs16_128(i8* %x, i8* %y) {
602 ; CHECK-LABEL: ldrbs16_128:
603 ; CHECK: @ %bb.0: @ %entry
604 ; CHECK-NEXT: vldrb.s16 q0, [r0]
605 ; CHECK-NEXT: adds r0, #128
606 ; CHECK-NEXT: vstrh.16 q0, [r1]
609 %z = getelementptr inbounds i8, i8* %x, i32 128
610 %0 = bitcast i8* %x to <8 x i8>*
611 %1 = load <8 x i8>, <8 x i8>* %0, align 1
612 %2 = sext <8 x i8> %1 to <8 x i16>
613 %3 = bitcast i8* %y to <8 x i16>*
614 store <8 x i16> %2, <8 x i16>* %3, align 2
619 define i8* @ldrbu8_4(i8* %x, i8* %y) {
620 ; CHECK-LABEL: ldrbu8_4:
621 ; CHECK: @ %bb.0: @ %entry
622 ; CHECK-NEXT: vldrb.u8 q0, [r0], #4
623 ; CHECK-NEXT: vstrb.8 q0, [r1]
626 %z = getelementptr inbounds i8, i8* %x, i32 4
627 %0 = bitcast i8* %x to <16 x i8>*
628 %1 = load <16 x i8>, <16 x i8>* %0, align 1
629 %2 = bitcast i8* %y to <16 x i8>*
630 store <16 x i8> %1, <16 x i8>* %2, align 1
634 define i8* @ldrbu8_3(i8* %x, i8* %y) {
635 ; CHECK-LABEL: ldrbu8_3:
636 ; CHECK: @ %bb.0: @ %entry
637 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
638 ; CHECK-NEXT: vstrb.8 q0, [r1]
641 %z = getelementptr inbounds i8, i8* %x, i32 3
642 %0 = bitcast i8* %x to <16 x i8>*
643 %1 = load <16 x i8>, <16 x i8>* %0, align 1
644 %2 = bitcast i8* %y to <16 x i8>*
645 store <16 x i8> %1, <16 x i8>* %2, align 1
649 define i8* @ldrbu8_127(i8* %x, i8* %y) {
650 ; CHECK-LABEL: ldrbu8_127:
651 ; CHECK: @ %bb.0: @ %entry
652 ; CHECK-NEXT: vldrb.u8 q0, [r0], #127
653 ; CHECK-NEXT: vstrb.8 q0, [r1]
656 %z = getelementptr inbounds i8, i8* %x, i32 127
657 %0 = bitcast i8* %x to <16 x i8>*
658 %1 = load <16 x i8>, <16 x i8>* %0, align 1
659 %2 = bitcast i8* %y to <16 x i8>*
660 store <16 x i8> %1, <16 x i8>* %2, align 1
664 define i8* @ldrbu8_128(i8* %x, i8* %y) {
665 ; CHECK-LABEL: ldrbu8_128:
666 ; CHECK: @ %bb.0: @ %entry
667 ; CHECK-NEXT: vldrb.u8 q0, [r0]
668 ; CHECK-NEXT: adds r0, #128
669 ; CHECK-NEXT: vstrb.8 q0, [r1]
672 %z = getelementptr inbounds i8, i8* %x, i32 128
673 %0 = bitcast i8* %x to <16 x i8>*
674 %1 = load <16 x i8>, <16 x i8>* %0, align 1
675 %2 = bitcast i8* %y to <16 x i8>*
676 store <16 x i8> %1, <16 x i8>* %2, align 1
680 define i8* @ldrwf32_4(i8* %x, i8* %y) {
681 ; CHECK-LABEL: ldrwf32_4:
682 ; CHECK: @ %bb.0: @ %entry
683 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
684 ; CHECK-NEXT: vstrw.32 q0, [r1]
687 %z = getelementptr inbounds i8, i8* %x, i32 4
688 %0 = bitcast i8* %x to <4 x float>*
689 %1 = load <4 x float>, <4 x float>* %0, align 4
690 %2 = bitcast i8* %y to <4 x float>*
691 store <4 x float> %1, <4 x float>* %2, align 4
695 define i8* @ldrwf16_4(i8* %x, i8* %y) {
696 ; CHECK-LABEL: ldrwf16_4:
697 ; CHECK: @ %bb.0: @ %entry
698 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
699 ; CHECK-NEXT: vstrh.16 q0, [r1]
702 %z = getelementptr inbounds i8, i8* %x, i32 4
703 %0 = bitcast i8* %x to <8 x half>*
704 %1 = load <8 x half>, <8 x half>* %0, align 2
705 %2 = bitcast i8* %y to <8 x half>*
706 store <8 x half> %1, <8 x half>* %2, align 2
710 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
711 ; CHECK-LABEL: ldrwi32_align1:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
714 ; CHECK-NEXT: vstrw.32 q0, [r1]
717 %z = getelementptr inbounds i8, i8* %x, i32 3
718 %0 = bitcast i8* %x to <4 x i32>*
719 %1 = load <4 x i32>, <4 x i32>* %0, align 1
720 %2 = bitcast i8* %y to <4 x i32>*
721 store <4 x i32> %1, <4 x i32>* %2, align 4
725 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
726 ; CHECK-LABEL: ldrhi16_align1:
727 ; CHECK: @ %bb.0: @ %entry
728 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
729 ; CHECK-NEXT: vstrh.16 q0, [r1]
732 %z = getelementptr inbounds i8, i8* %x, i32 3
733 %0 = bitcast i8* %x to <8 x i16>*
734 %1 = load <8 x i16>, <8 x i16>* %0, align 1
735 %2 = bitcast i8* %y to <8 x i16>*
736 store <8 x i16> %1, <8 x i16>* %2, align 2
740 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
741 ; CHECK-LABEL: ldrhi32_align1:
742 ; CHECK: @ %bb.0: @ %entry
743 ; CHECK-NEXT: .pad #8
744 ; CHECK-NEXT: sub sp, #8
745 ; CHECK-NEXT: ldr r3, [r0, #4]
746 ; CHECK-NEXT: ldr r2, [r0]
747 ; CHECK-NEXT: adds r0, #3
748 ; CHECK-NEXT: strd r2, r3, [sp]
749 ; CHECK-NEXT: mov r2, sp
750 ; CHECK-NEXT: vldrh.s32 q0, [r2]
751 ; CHECK-NEXT: vstrw.32 q0, [r1]
752 ; CHECK-NEXT: add sp, #8
755 %z = getelementptr inbounds i8, i8* %x, i32 3
756 %0 = bitcast i8* %x to <4 x i16>*
757 %1 = load <4 x i16>, <4 x i16>* %0, align 1
758 %2 = bitcast i8* %y to <4 x i32>*
759 %3 = sext <4 x i16> %1 to <4 x i32>
760 store <4 x i32> %3, <4 x i32>* %2, align 4
764 define i8* @ldrf32_align1(i8* %x, i8* %y) {
765 ; CHECK-LABEL: ldrf32_align1:
766 ; CHECK: @ %bb.0: @ %entry
767 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
768 ; CHECK-NEXT: vstrw.32 q0, [r1]
771 %z = getelementptr inbounds i8, i8* %x, i32 3
772 %0 = bitcast i8* %x to <4 x float>*
773 %1 = load <4 x float>, <4 x float>* %0, align 1
774 %2 = bitcast i8* %y to <4 x float>*
775 store <4 x float> %1, <4 x float>* %2, align 4
779 define i8* @ldrf16_align1(i8* %x, i8* %y) {
780 ; CHECK-LABEL: ldrf16_align1:
781 ; CHECK: @ %bb.0: @ %entry
782 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
783 ; CHECK-NEXT: vstrh.16 q0, [r1]
786 %z = getelementptr inbounds i8, i8* %x, i32 3
787 %0 = bitcast i8* %x to <8 x half>*
788 %1 = load <8 x half>, <8 x half>* %0, align 1
789 %2 = bitcast i8* %y to <8 x half>*
790 store <8 x half> %1, <8 x half>* %2, align 2
798 define i8* @strw32_4(i8* %y, i8* %x) {
799 ; CHECK-LABEL: strw32_4:
800 ; CHECK: @ %bb.0: @ %entry
801 ; CHECK-NEXT: vldrw.u32 q0, [r1]
802 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
805 %z = getelementptr inbounds i8, i8* %y, i32 4
806 %0 = bitcast i8* %x to <4 x i32>*
807 %1 = load <4 x i32>, <4 x i32>* %0, align 4
808 %2 = bitcast i8* %y to <4 x i32>*
809 store <4 x i32> %1, <4 x i32>* %2, align 4
813 define i8* @strw32_3(i8* %y, i8* %x) {
814 ; CHECK-LABEL: strw32_3:
815 ; CHECK: @ %bb.0: @ %entry
816 ; CHECK-NEXT: vldrw.u32 q0, [r1]
817 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
820 %z = getelementptr inbounds i8, i8* %y, i32 3
821 %0 = bitcast i8* %x to <4 x i32>*
822 %1 = load <4 x i32>, <4 x i32>* %0, align 4
823 %2 = bitcast i8* %y to <4 x i32>*
824 store <4 x i32> %1, <4 x i32>* %2, align 4
828 define i8* @strw32_m4(i8* %y, i8* %x) {
829 ; CHECK-LABEL: strw32_m4:
830 ; CHECK: @ %bb.0: @ %entry
831 ; CHECK-NEXT: vldrw.u32 q0, [r1]
832 ; CHECK-NEXT: vstrb.8 q0, [r0], #-4
835 %z = getelementptr inbounds i8, i8* %y, i32 -4
836 %0 = bitcast i8* %x to <4 x i32>*
837 %1 = load <4 x i32>, <4 x i32>* %0, align 4
838 %2 = bitcast i8* %y to <4 x i32>*
839 store <4 x i32> %1, <4 x i32>* %2, align 4
843 define i8* @strw32_508(i8* %y, i8* %x) {
844 ; CHECK-LABEL: strw32_508:
845 ; CHECK: @ %bb.0: @ %entry
846 ; CHECK-NEXT: vldrw.u32 q0, [r1]
847 ; CHECK-NEXT: vstrw.32 q0, [r0], #508
850 %z = getelementptr inbounds i8, i8* %y, i32 508
851 %0 = bitcast i8* %x to <4 x i32>*
852 %1 = load <4 x i32>, <4 x i32>* %0, align 4
853 %2 = bitcast i8* %y to <4 x i32>*
854 store <4 x i32> %1, <4 x i32>* %2, align 4
858 define i8* @strw32_512(i8* %y, i8* %x) {
859 ; CHECK-LABEL: strw32_512:
860 ; CHECK: @ %bb.0: @ %entry
861 ; CHECK-NEXT: vldrw.u32 q0, [r1]
862 ; CHECK-NEXT: vstrw.32 q0, [r0]
863 ; CHECK-NEXT: add.w r0, r0, #512
866 %z = getelementptr inbounds i8, i8* %y, i32 512
867 %0 = bitcast i8* %x to <4 x i32>*
868 %1 = load <4 x i32>, <4 x i32>* %0, align 4
869 %2 = bitcast i8* %y to <4 x i32>*
870 store <4 x i32> %1, <4 x i32>* %2, align 4
874 define i8* @strw32_m508(i8* %y, i8* %x) {
875 ; CHECK-LABEL: strw32_m508:
876 ; CHECK: @ %bb.0: @ %entry
877 ; CHECK-NEXT: vldrw.u32 q0, [r1]
878 ; CHECK-NEXT: vstrw.32 q0, [r0], #-508
881 %z = getelementptr inbounds i8, i8* %y, i32 -508
882 %0 = bitcast i8* %x to <4 x i32>*
883 %1 = load <4 x i32>, <4 x i32>* %0, align 4
884 %2 = bitcast i8* %y to <4 x i32>*
885 store <4 x i32> %1, <4 x i32>* %2, align 4
889 define i8* @strw32_m512(i8* %y, i8* %x) {
890 ; CHECK-LABEL: strw32_m512:
891 ; CHECK: @ %bb.0: @ %entry
892 ; CHECK-NEXT: vldrw.u32 q0, [r1]
893 ; CHECK-NEXT: vstrw.32 q0, [r0]
894 ; CHECK-NEXT: sub.w r0, r0, #512
897 %z = getelementptr inbounds i8, i8* %y, i32 -512
898 %0 = bitcast i8* %x to <4 x i32>*
899 %1 = load <4 x i32>, <4 x i32>* %0, align 4
900 %2 = bitcast i8* %y to <4 x i32>*
901 store <4 x i32> %1, <4 x i32>* %2, align 4
906 define i8* @strh32_4(i8* %y, i8* %x) {
907 ; CHECK-LABEL: strh32_4:
908 ; CHECK: @ %bb.0: @ %entry
909 ; CHECK-NEXT: vldrh.u32 q0, [r1]
910 ; CHECK-NEXT: vstrh.32 q0, [r0], #4
913 %z = getelementptr inbounds i8, i8* %y, i32 4
914 %0 = bitcast i8* %x to <4 x i16>*
915 %1 = load <4 x i16>, <4 x i16>* %0, align 2
916 %2 = bitcast i8* %y to <4 x i16>*
917 store <4 x i16> %1, <4 x i16>* %2, align 2
921 define i8* @strh32_3(i8* %y, i8* %x) {
922 ; CHECK-LABEL: strh32_3:
923 ; CHECK: @ %bb.0: @ %entry
924 ; CHECK-NEXT: vldrh.u32 q0, [r1]
925 ; CHECK-NEXT: vstrh.32 q0, [r0]
926 ; CHECK-NEXT: adds r0, #3
929 %z = getelementptr inbounds i8, i8* %y, i32 3
930 %0 = bitcast i8* %x to <4 x i16>*
931 %1 = load <4 x i16>, <4 x i16>* %0, align 2
932 %2 = bitcast i8* %y to <4 x i16>*
933 store <4 x i16> %1, <4 x i16>* %2, align 2
937 define i8* @strh32_2(i8* %y, i8* %x) {
938 ; CHECK-LABEL: strh32_2:
939 ; CHECK: @ %bb.0: @ %entry
940 ; CHECK-NEXT: vldrh.u32 q0, [r1]
941 ; CHECK-NEXT: vstrh.32 q0, [r0], #2
944 %z = getelementptr inbounds i8, i8* %y, i32 2
945 %0 = bitcast i8* %x to <4 x i16>*
946 %1 = load <4 x i16>, <4 x i16>* %0, align 2
947 %2 = bitcast i8* %y to <4 x i16>*
948 store <4 x i16> %1, <4 x i16>* %2, align 2
952 define i8* @strh32_254(i8* %y, i8* %x) {
953 ; CHECK-LABEL: strh32_254:
954 ; CHECK: @ %bb.0: @ %entry
955 ; CHECK-NEXT: vldrh.u32 q0, [r1]
956 ; CHECK-NEXT: vstrh.32 q0, [r0], #254
959 %z = getelementptr inbounds i8, i8* %y, i32 254
960 %0 = bitcast i8* %x to <4 x i16>*
961 %1 = load <4 x i16>, <4 x i16>* %0, align 2
962 %2 = bitcast i8* %y to <4 x i16>*
963 store <4 x i16> %1, <4 x i16>* %2, align 2
967 define i8* @strh32_256(i8* %y, i8* %x) {
968 ; CHECK-LABEL: strh32_256:
969 ; CHECK: @ %bb.0: @ %entry
970 ; CHECK-NEXT: vldrh.u32 q0, [r1]
971 ; CHECK-NEXT: vstrh.32 q0, [r0]
972 ; CHECK-NEXT: add.w r0, r0, #256
975 %z = getelementptr inbounds i8, i8* %y, i32 256
976 %0 = bitcast i8* %x to <4 x i16>*
977 %1 = load <4 x i16>, <4 x i16>* %0, align 2
978 %2 = bitcast i8* %y to <4 x i16>*
979 store <4 x i16> %1, <4 x i16>* %2, align 2
984 define i8* @strh16_4(i8* %y, i8* %x) {
985 ; CHECK-LABEL: strh16_4:
986 ; CHECK: @ %bb.0: @ %entry
987 ; CHECK-NEXT: vldrh.u16 q0, [r1]
988 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
991 %z = getelementptr inbounds i8, i8* %y, i32 4
992 %0 = bitcast i8* %x to <8 x i16>*
993 %1 = load <8 x i16>, <8 x i16>* %0, align 2
994 %2 = bitcast i8* %y to <8 x i16>*
995 store <8 x i16> %1, <8 x i16>* %2, align 2
999 define i8* @strh16_3(i8* %y, i8* %x) {
1000 ; CHECK-LABEL: strh16_3:
1001 ; CHECK: @ %bb.0: @ %entry
1002 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1003 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1006 %z = getelementptr inbounds i8, i8* %y, i32 3
1007 %0 = bitcast i8* %x to <8 x i16>*
1008 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1009 %2 = bitcast i8* %y to <8 x i16>*
1010 store <8 x i16> %1, <8 x i16>* %2, align 2
1014 define i8* @strh16_2(i8* %y, i8* %x) {
1015 ; CHECK-LABEL: strh16_2:
1016 ; CHECK: @ %bb.0: @ %entry
1017 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1018 ; CHECK-NEXT: vstrb.8 q0, [r0], #2
1021 %z = getelementptr inbounds i8, i8* %y, i32 2
1022 %0 = bitcast i8* %x to <8 x i16>*
1023 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1024 %2 = bitcast i8* %y to <8 x i16>*
1025 store <8 x i16> %1, <8 x i16>* %2, align 2
1029 define i8* @strh16_254(i8* %y, i8* %x) {
1030 ; CHECK-LABEL: strh16_254:
1031 ; CHECK: @ %bb.0: @ %entry
1032 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1033 ; CHECK-NEXT: vstrh.16 q0, [r0], #254
1036 %z = getelementptr inbounds i8, i8* %y, i32 254
1037 %0 = bitcast i8* %x to <8 x i16>*
1038 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1039 %2 = bitcast i8* %y to <8 x i16>*
1040 store <8 x i16> %1, <8 x i16>* %2, align 2
1044 define i8* @strh16_256(i8* %y, i8* %x) {
1045 ; CHECK-LABEL: strh16_256:
1046 ; CHECK: @ %bb.0: @ %entry
1047 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1048 ; CHECK-NEXT: vstrh.16 q0, [r0]
1049 ; CHECK-NEXT: add.w r0, r0, #256
1052 %z = getelementptr inbounds i8, i8* %y, i32 256
1053 %0 = bitcast i8* %x to <8 x i16>*
1054 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1055 %2 = bitcast i8* %y to <8 x i16>*
1056 store <8 x i16> %1, <8 x i16>* %2, align 2
1061 define i8* @strb32_4(i8* %y, i8* %x) {
1062 ; CHECK-LABEL: strb32_4:
1063 ; CHECK: @ %bb.0: @ %entry
1064 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1065 ; CHECK-NEXT: vstrb.32 q0, [r0], #4
1068 %z = getelementptr inbounds i8, i8* %y, i32 4
1069 %0 = bitcast i8* %x to <4 x i8>*
1070 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1071 %2 = bitcast i8* %y to <4 x i8>*
1072 store <4 x i8> %1, <4 x i8>* %2, align 1
1076 define i8* @strb32_3(i8* %y, i8* %x) {
1077 ; CHECK-LABEL: strb32_3:
1078 ; CHECK: @ %bb.0: @ %entry
1079 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1080 ; CHECK-NEXT: vstrb.32 q0, [r0], #3
1083 %z = getelementptr inbounds i8, i8* %y, i32 3
1084 %0 = bitcast i8* %x to <4 x i8>*
1085 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1086 %2 = bitcast i8* %y to <4 x i8>*
1087 store <4 x i8> %1, <4 x i8>* %2, align 1
1091 define i8* @strb32_127(i8* %y, i8* %x) {
1092 ; CHECK-LABEL: strb32_127:
1093 ; CHECK: @ %bb.0: @ %entry
1094 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1095 ; CHECK-NEXT: vstrb.32 q0, [r0], #127
1098 %z = getelementptr inbounds i8, i8* %y, i32 127
1099 %0 = bitcast i8* %x to <4 x i8>*
1100 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1101 %2 = bitcast i8* %y to <4 x i8>*
1102 store <4 x i8> %1, <4 x i8>* %2, align 1
1106 define i8* @strb32_128(i8* %y, i8* %x) {
1107 ; CHECK-LABEL: strb32_128:
1108 ; CHECK: @ %bb.0: @ %entry
1109 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1110 ; CHECK-NEXT: vstrb.32 q0, [r0]
1111 ; CHECK-NEXT: adds r0, #128
1114 %z = getelementptr inbounds i8, i8* %y, i32 128
1115 %0 = bitcast i8* %x to <4 x i8>*
1116 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1117 %2 = bitcast i8* %y to <4 x i8>*
1118 store <4 x i8> %1, <4 x i8>* %2, align 1
1123 define i8* @strb16_4(i8* %y, i8* %x) {
1124 ; CHECK-LABEL: strb16_4:
1125 ; CHECK: @ %bb.0: @ %entry
1126 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1127 ; CHECK-NEXT: vstrb.16 q0, [r0], #4
1130 %z = getelementptr inbounds i8, i8* %y, i32 4
1131 %0 = bitcast i8* %x to <8 x i8>*
1132 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1133 %2 = bitcast i8* %y to <8 x i8>*
1134 store <8 x i8> %1, <8 x i8>* %2, align 1
1138 define i8* @strb16_3(i8* %y, i8* %x) {
1139 ; CHECK-LABEL: strb16_3:
1140 ; CHECK: @ %bb.0: @ %entry
1141 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1142 ; CHECK-NEXT: vstrb.16 q0, [r0], #3
1145 %z = getelementptr inbounds i8, i8* %y, i32 3
1146 %0 = bitcast i8* %x to <8 x i8>*
1147 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1148 %2 = bitcast i8* %y to <8 x i8>*
1149 store <8 x i8> %1, <8 x i8>* %2, align 1
1153 define i8* @strb16_127(i8* %y, i8* %x) {
1154 ; CHECK-LABEL: strb16_127:
1155 ; CHECK: @ %bb.0: @ %entry
1156 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1157 ; CHECK-NEXT: vstrb.16 q0, [r0], #127
1160 %z = getelementptr inbounds i8, i8* %y, i32 127
1161 %0 = bitcast i8* %x to <8 x i8>*
1162 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1163 %2 = bitcast i8* %y to <8 x i8>*
1164 store <8 x i8> %1, <8 x i8>* %2, align 1
1168 define i8* @strb16_128(i8* %y, i8* %x) {
1169 ; CHECK-LABEL: strb16_128:
1170 ; CHECK: @ %bb.0: @ %entry
1171 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1172 ; CHECK-NEXT: vstrb.16 q0, [r0]
1173 ; CHECK-NEXT: adds r0, #128
1176 %z = getelementptr inbounds i8, i8* %y, i32 128
1177 %0 = bitcast i8* %x to <8 x i8>*
1178 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1179 %2 = bitcast i8* %y to <8 x i8>*
1180 store <8 x i8> %1, <8 x i8>* %2, align 1
1185 define i8* @strb8_4(i8* %y, i8* %x) {
1186 ; CHECK-LABEL: strb8_4:
1187 ; CHECK: @ %bb.0: @ %entry
1188 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1189 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
1192 %z = getelementptr inbounds i8, i8* %y, i32 4
1193 %0 = bitcast i8* %x to <16 x i8>*
1194 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1195 %2 = bitcast i8* %y to <16 x i8>*
1196 store <16 x i8> %1, <16 x i8>* %2, align 1
1200 define i8* @strb8_3(i8* %y, i8* %x) {
1201 ; CHECK-LABEL: strb8_3:
1202 ; CHECK: @ %bb.0: @ %entry
1203 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1204 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1207 %z = getelementptr inbounds i8, i8* %y, i32 3
1208 %0 = bitcast i8* %x to <16 x i8>*
1209 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1210 %2 = bitcast i8* %y to <16 x i8>*
1211 store <16 x i8> %1, <16 x i8>* %2, align 1
1215 define i8* @strb8_127(i8* %y, i8* %x) {
1216 ; CHECK-LABEL: strb8_127:
1217 ; CHECK: @ %bb.0: @ %entry
1218 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1219 ; CHECK-NEXT: vstrb.8 q0, [r0], #127
1222 %z = getelementptr inbounds i8, i8* %y, i32 127
1223 %0 = bitcast i8* %x to <16 x i8>*
1224 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1225 %2 = bitcast i8* %y to <16 x i8>*
1226 store <16 x i8> %1, <16 x i8>* %2, align 1
1230 define i8* @strb8_128(i8* %y, i8* %x) {
1231 ; CHECK-LABEL: strb8_128:
1232 ; CHECK: @ %bb.0: @ %entry
1233 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1234 ; CHECK-NEXT: vstrb.8 q0, [r0]
1235 ; CHECK-NEXT: adds r0, #128
1238 %z = getelementptr inbounds i8, i8* %y, i32 128
1239 %0 = bitcast i8* %x to <16 x i8>*
1240 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1241 %2 = bitcast i8* %y to <16 x i8>*
1242 store <16 x i8> %1, <16 x i8>* %2, align 1
1246 define i8* @strf32_4(i8* %y, i8* %x) {
1247 ; CHECK-LABEL: strf32_4:
1248 ; CHECK: @ %bb.0: @ %entry
1249 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1250 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
1253 %z = getelementptr inbounds i8, i8* %y, i32 4
1254 %0 = bitcast i8* %x to <4 x float>*
1255 %1 = load <4 x float>, <4 x float>* %0, align 4
1256 %2 = bitcast i8* %y to <4 x float>*
1257 store <4 x float> %1, <4 x float>* %2, align 4
1261 define i8* @strf16_4(i8* %y, i8* %x) {
1262 ; CHECK-LABEL: strf16_4:
1263 ; CHECK: @ %bb.0: @ %entry
1264 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1265 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
1268 %z = getelementptr inbounds i8, i8* %y, i32 4
1269 %0 = bitcast i8* %x to <8 x half>*
1270 %1 = load <8 x half>, <8 x half>* %0, align 2
1271 %2 = bitcast i8* %y to <8 x half>*
1272 store <8 x half> %1, <8 x half>* %2, align 2
1276 define i8* @strwi32_align1(i8* %y, i8* %x) {
1277 ; CHECK-LABEL: strwi32_align1:
1278 ; CHECK: @ %bb.0: @ %entry
1279 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1280 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1283 %z = getelementptr inbounds i8, i8* %y, i32 3
1284 %0 = bitcast i8* %x to <4 x i32>*
1285 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1286 %2 = bitcast i8* %y to <4 x i32>*
1287 store <4 x i32> %1, <4 x i32>* %2, align 1
1291 define i8* @strhi16_align1(i8* %y, i8* %x) {
1292 ; CHECK-LABEL: strhi16_align1:
1293 ; CHECK: @ %bb.0: @ %entry
1294 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1295 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1298 %z = getelementptr inbounds i8, i8* %y, i32 3
1299 %0 = bitcast i8* %x to <8 x i16>*
1300 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1301 %2 = bitcast i8* %y to <8 x i16>*
1302 store <8 x i16> %1, <8 x i16>* %2, align 1
1306 define i8* @strhi32_align1(i8* %y, i8* %x) {
1307 ; CHECK-LABEL: strhi32_align1:
1308 ; CHECK: @ %bb.0: @ %entry
1309 ; CHECK-NEXT: .pad #8
1310 ; CHECK-NEXT: sub sp, #8
1311 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1312 ; CHECK-NEXT: mov r1, sp
1313 ; CHECK-NEXT: vstrh.32 q0, [r1]
1314 ; CHECK-NEXT: ldrd r1, r2, [sp]
1315 ; CHECK-NEXT: str r1, [r0]
1316 ; CHECK-NEXT: str r2, [r0, #4]
1317 ; CHECK-NEXT: adds r0, #3
1318 ; CHECK-NEXT: add sp, #8
1321 %z = getelementptr inbounds i8, i8* %y, i32 3
1322 %0 = bitcast i8* %x to <4 x i32>*
1323 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1324 %2 = bitcast i8* %y to <4 x i16>*
1325 %3 = trunc <4 x i32> %1 to <4 x i16>
1326 store <4 x i16> %3, <4 x i16>* %2, align 1
1330 define i8* @strf32_align1(i8* %y, i8* %x) {
1331 ; CHECK-LABEL: strf32_align1:
1332 ; CHECK: @ %bb.0: @ %entry
1333 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1334 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1337 %z = getelementptr inbounds i8, i8* %y, i32 3
1338 %0 = bitcast i8* %x to <4 x float>*
1339 %1 = load <4 x float>, <4 x float>* %0, align 4
1340 %2 = bitcast i8* %y to <4 x float>*
1341 store <4 x float> %1, <4 x float>* %2, align 1
1345 define i8* @strf16_align1(i8* %y, i8* %x) {
1346 ; CHECK-LABEL: strf16_align1:
1347 ; CHECK: @ %bb.0: @ %entry
1348 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1349 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1352 %z = getelementptr inbounds i8, i8* %y, i32 3
1353 %0 = bitcast i8* %x to <8 x half>*
1354 %1 = load <8 x half>, <8 x half>* %0, align 2
1355 %2 = bitcast i8* %y to <8 x half>*
1356 store <8 x half> %1, <8 x half>* %2, align 1