1 // { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" }
2 // { dg-final { check-function-bodies "**" "" } }
6 #pragma GCC target "+sme2"
8 // This file deliberately contains nonsense code.
12 ** ptrue (pn[0-9]+)\.s
13 ** ld1w {z16\.s - z19\.s}, \1/z, \[x1\]
14 ** ld1w {z20\.s - z23\.s}, \1/z, \[x1, #4, mul vl\]
15 ** ld1w {z24\.s - z27\.s}, \1/z, \[x1, #8, mul vl\]
16 ** ld1w {z28\.s - z31\.s}, \1/z, \[x1, #12, mul vl\]
20 ** sclamp {z16.s - z19.s}, [^\n]+
21 ** sclamp {z20.s - z23.s}, [^\n]+
22 ** sclamp {z24.s - z27.s}, [^\n]+
23 ** sclamp {z28.s - z31.s}, [^\n]+
24 ** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0\]
25 ** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #4, mul vl\]
26 ** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #8, mul vl\]
27 ** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #12, mul vl\]
28 ** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0, #16, mul vl\]
29 ** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #20, mul vl\]
30 ** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #24, mul vl\]
31 ** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #28, mul vl\]
32 ** ld1w {z16\.s - z19\.s}, \1/z, \[x3\]
33 ** ld1w {z20\.s - z23\.s}, \1/z, \[x3, #4, mul vl\]
34 ** ld1w {z24\.s - z27\.s}, \1/z, \[x3, #8, mul vl\]
35 ** ld1w {z28\.s - z31\.s}, \1/z, \[x3, #12, mul vl\]
36 ** sclamp {z16.s - z19.s}, [^\n]+
37 ** sclamp {z20.s - z23.s}, [^\n]+
38 ** sclamp {z24.s - z27.s}, [^\n]+
39 ** sclamp {z28.s - z31.s}, [^\n]+
43 void test1(int32_t *dest
, int32_t *src1
, int32_t *src2
,
44 int32_t *src3
) __arm_streaming
46 svcount_t pg
= svptrue_c32();
47 svint32x4_t l0
= svld1_vnum_x4(pg
, src1
, 0);
48 svint32x4_t l1
= svld1_vnum_x4(pg
, src1
, 4);
49 svint32x4_t l2
= svld1_vnum_x4(pg
, src1
, 8);
50 svint32x4_t l3
= svld1_vnum_x4(pg
, src1
, 12);
51 svint32_t l4
= svld1rq(svptrue_b32(), src2
);
52 svint32_t l5
= svld1rq(svptrue_b32(), src2
+ 4);
53 l0
= svclamp(l0
, l4
, l5
);
54 l1
= svclamp(l1
, l4
, l5
);
55 l2
= svclamp(l2
, l4
, l5
);
56 l3
= svclamp(l3
, l4
, l5
);
57 svst1_vnum(pg
, dest
, 0,
58 svcreate4(svget4(l0
, 0), svget4(l1
, 0),
59 svget4(l2
, 0), svget4(l3
, 0)));
60 svst1_vnum(pg
, dest
, 4,
61 svcreate4(svget4(l0
, 1), svget4(l1
, 1),
62 svget4(l2
, 1), svget4(l3
, 1)));
63 svst1_vnum(pg
, dest
, 8,
64 svcreate4(svget4(l0
, 2), svget4(l1
, 2),
65 svget4(l2
, 2), svget4(l3
, 2)));
66 svst1_vnum(pg
, dest
, 12,
67 svcreate4(svget4(l0
, 3), svget4(l1
, 3),
68 svget4(l2
, 3), svget4(l3
, 3)));
69 svst1_vnum(pg
, dest
, 16,
70 svcreate4(svget4(l0
, 0), svget4(l1
, 0),
71 svget4(l2
, 0), svget4(l3
, 0)));
72 svst1_vnum(pg
, dest
, 20,
73 svcreate4(svget4(l0
, 1), svget4(l1
, 1),
74 svget4(l2
, 1), svget4(l3
, 1)));
75 svst1_vnum(pg
, dest
, 24,
76 svcreate4(svget4(l0
, 2), svget4(l1
, 2),
77 svget4(l2
, 2), svget4(l3
, 2)));
78 svst1_vnum(pg
, dest
, 28,
79 svcreate4(svget4(l0
, 3), svget4(l1
, 3),
80 svget4(l2
, 3), svget4(l3
, 3)));
81 l0
= svld1_vnum_x4(pg
, src3
, 0);
82 l1
= svld1_vnum_x4(pg
, src3
, 4);
83 l2
= svld1_vnum_x4(pg
, src3
, 8);
84 l3
= svld1_vnum_x4(pg
, src3
, 12);
85 l0
= svclamp(l0
, l4
, l5
);
86 l1
= svclamp(l1
, l4
, l5
);
87 l2
= svclamp(l2
, l4
, l5
);
88 l3
= svclamp(l3
, l4
, l5
);
89 svst1_vnum(pg
, dest
, 32,
90 svcreate4(svget4(l0
, 0), svget4(l1
, 0),
91 svget4(l2
, 0), svget4(l3
, 0)));
92 svst1_vnum(pg
, dest
, 36,
93 svcreate4(svget4(l0
, 1), svget4(l1
, 1),
94 svget4(l2
, 1), svget4(l3
, 1)));
95 svst1_vnum(pg
, dest
, 40,
96 svcreate4(svget4(l0
, 2), svget4(l1
, 2),
97 svget4(l2
, 2), svget4(l3
, 2)));
98 svst1_vnum(pg
, dest
, 44,
99 svcreate4(svget4(l0
, 3), svget4(l1
, 3),
100 svget4(l2
, 3), svget4(l3
, 3)));
101 svst1_vnum(pg
, dest
, 48,
102 svcreate4(svget4(l0
, 0), svget4(l1
, 0),
103 svget4(l2
, 0), svget4(l3
, 0)));
104 svst1_vnum(pg
, dest
, 52,
105 svcreate4(svget4(l0
, 1), svget4(l1
, 1),
106 svget4(l2
, 1), svget4(l3
, 1)));
107 svst1_vnum(pg
, dest
, 56,
108 svcreate4(svget4(l0
, 2), svget4(l1
, 2),
109 svget4(l2
, 2), svget4(l3
, 2)));
110 svst1_vnum(pg
, dest
, 60,
111 svcreate4(svget4(l0
, 3), svget4(l1
, 3),
112 svget4(l2
, 3), svget4(l3
, 3)));
115 /* { dg-final { scan-assembler-not {\tmov\tz} } } */