aarch64: Some tweaks to the early-ra pass
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / sme / strided_2.c
blob2e58ae643ec27c40b1bf9a4f4798d02ae0053ceb
1 // { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" }
2 // { dg-final { check-function-bodies "**" "" } }
4 #include <arm_sme.h>
6 #pragma GCC target "+sme2"
8 // This file deliberately contains nonsense code.
11 ** test1:
12 ** ptrue (pn[0-9]+)\.s
13 ** ld1w {z16\.s - z19\.s}, \1/z, \[x1\]
14 ** ld1w {z20\.s - z23\.s}, \1/z, \[x1, #4, mul vl\]
15 ** ld1w {z24\.s - z27\.s}, \1/z, \[x1, #8, mul vl\]
16 ** ld1w {z28\.s - z31\.s}, \1/z, \[x1, #12, mul vl\]
17 ** ptrue [^\n]+
18 ** ld1rqw [^\n]+
19 ** ld1rqw [^\n]+
20 ** sclamp {z16.s - z19.s}, [^\n]+
21 ** sclamp {z20.s - z23.s}, [^\n]+
22 ** sclamp {z24.s - z27.s}, [^\n]+
23 ** sclamp {z28.s - z31.s}, [^\n]+
24 ** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0\]
25 ** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #4, mul vl\]
26 ** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #8, mul vl\]
27 ** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #12, mul vl\]
28 ** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0, #16, mul vl\]
29 ** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #20, mul vl\]
30 ** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #24, mul vl\]
31 ** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #28, mul vl\]
32 ** ld1w {z16\.s - z19\.s}, \1/z, \[x3\]
33 ** ld1w {z20\.s - z23\.s}, \1/z, \[x3, #4, mul vl\]
34 ** ld1w {z24\.s - z27\.s}, \1/z, \[x3, #8, mul vl\]
35 ** ld1w {z28\.s - z31\.s}, \1/z, \[x3, #12, mul vl\]
36 ** sclamp {z16.s - z19.s}, [^\n]+
37 ** sclamp {z20.s - z23.s}, [^\n]+
38 ** sclamp {z24.s - z27.s}, [^\n]+
39 ** sclamp {z28.s - z31.s}, [^\n]+
40 ** ...
41 ** ret
43 void test1(int32_t *dest, int32_t *src1, int32_t *src2,
44 int32_t *src3) __arm_streaming
46 svcount_t pg = svptrue_c32();
47 svint32x4_t l0 = svld1_vnum_x4(pg, src1, 0);
48 svint32x4_t l1 = svld1_vnum_x4(pg, src1, 4);
49 svint32x4_t l2 = svld1_vnum_x4(pg, src1, 8);
50 svint32x4_t l3 = svld1_vnum_x4(pg, src1, 12);
51 svint32_t l4 = svld1rq(svptrue_b32(), src2);
52 svint32_t l5 = svld1rq(svptrue_b32(), src2 + 4);
53 l0 = svclamp(l0, l4, l5);
54 l1 = svclamp(l1, l4, l5);
55 l2 = svclamp(l2, l4, l5);
56 l3 = svclamp(l3, l4, l5);
57 svst1_vnum(pg, dest, 0,
58 svcreate4(svget4(l0, 0), svget4(l1, 0),
59 svget4(l2, 0), svget4(l3, 0)));
60 svst1_vnum(pg, dest, 4,
61 svcreate4(svget4(l0, 1), svget4(l1, 1),
62 svget4(l2, 1), svget4(l3, 1)));
63 svst1_vnum(pg, dest, 8,
64 svcreate4(svget4(l0, 2), svget4(l1, 2),
65 svget4(l2, 2), svget4(l3, 2)));
66 svst1_vnum(pg, dest, 12,
67 svcreate4(svget4(l0, 3), svget4(l1, 3),
68 svget4(l2, 3), svget4(l3, 3)));
69 svst1_vnum(pg, dest, 16,
70 svcreate4(svget4(l0, 0), svget4(l1, 0),
71 svget4(l2, 0), svget4(l3, 0)));
72 svst1_vnum(pg, dest, 20,
73 svcreate4(svget4(l0, 1), svget4(l1, 1),
74 svget4(l2, 1), svget4(l3, 1)));
75 svst1_vnum(pg, dest, 24,
76 svcreate4(svget4(l0, 2), svget4(l1, 2),
77 svget4(l2, 2), svget4(l3, 2)));
78 svst1_vnum(pg, dest, 28,
79 svcreate4(svget4(l0, 3), svget4(l1, 3),
80 svget4(l2, 3), svget4(l3, 3)));
81 l0 = svld1_vnum_x4(pg, src3, 0);
82 l1 = svld1_vnum_x4(pg, src3, 4);
83 l2 = svld1_vnum_x4(pg, src3, 8);
84 l3 = svld1_vnum_x4(pg, src3, 12);
85 l0 = svclamp(l0, l4, l5);
86 l1 = svclamp(l1, l4, l5);
87 l2 = svclamp(l2, l4, l5);
88 l3 = svclamp(l3, l4, l5);
89 svst1_vnum(pg, dest, 32,
90 svcreate4(svget4(l0, 0), svget4(l1, 0),
91 svget4(l2, 0), svget4(l3, 0)));
92 svst1_vnum(pg, dest, 36,
93 svcreate4(svget4(l0, 1), svget4(l1, 1),
94 svget4(l2, 1), svget4(l3, 1)));
95 svst1_vnum(pg, dest, 40,
96 svcreate4(svget4(l0, 2), svget4(l1, 2),
97 svget4(l2, 2), svget4(l3, 2)));
98 svst1_vnum(pg, dest, 44,
99 svcreate4(svget4(l0, 3), svget4(l1, 3),
100 svget4(l2, 3), svget4(l3, 3)));
101 svst1_vnum(pg, dest, 48,
102 svcreate4(svget4(l0, 0), svget4(l1, 0),
103 svget4(l2, 0), svget4(l3, 0)));
104 svst1_vnum(pg, dest, 52,
105 svcreate4(svget4(l0, 1), svget4(l1, 1),
106 svget4(l2, 1), svget4(l3, 1)));
107 svst1_vnum(pg, dest, 56,
108 svcreate4(svget4(l0, 2), svget4(l1, 2),
109 svget4(l2, 2), svget4(l3, 2)));
110 svst1_vnum(pg, dest, 60,
111 svcreate4(svget4(l0, 3), svget4(l1, 3),
112 svget4(l2, 3), svget4(l3, 3)));
115 /* { dg-final { scan-assembler-not {\tmov\tz} } } */