2 #include "arm-neon-ref.h"
3 #include "compute-ref-data.h"
5 /* Expected results. */
8 VECT_VAR_DECL(expected_vld2_0
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
9 0xaa, 0xaa, 0xaa, 0xaa };
10 VECT_VAR_DECL(expected_vld2_0
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
11 VECT_VAR_DECL(expected_vld2_0
,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
12 VECT_VAR_DECL(expected_vld2_0
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
13 0xaa, 0xaa, 0xaa, 0xaa };
14 VECT_VAR_DECL(expected_vld2_0
,uint
,16,4) [] = { 0xaaaa, 0xaaaa,
16 VECT_VAR_DECL(expected_vld2_0
,uint
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
17 VECT_VAR_DECL(expected_vld2_0
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
18 0xaa, 0xaa, 0xaa, 0xaa };
19 VECT_VAR_DECL(expected_vld2_0
,poly
,16,4) [] = { 0xaaaa, 0xaaaa,
21 VECT_VAR_DECL(expected_vld2_0
,hfloat
,32,2) [] = { 0xc1800000, 0xc1700000 };
22 VECT_VAR_DECL(expected_vld2_0
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
23 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
24 VECT_VAR_DECL(expected_vld2_0
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
25 0xaaaaaaaa, 0xaaaaaaaa };
26 VECT_VAR_DECL(expected_vld2_0
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
27 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
28 VECT_VAR_DECL(expected_vld2_0
,uint
,32,4) [] = { 0xfffffff0, 0xfffffff1,
29 0xaaaaaaaa, 0xaaaaaaaa };
30 VECT_VAR_DECL(expected_vld2_0
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
31 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
32 VECT_VAR_DECL(expected_vld2_0
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
33 0xaaaaaaaa, 0xaaaaaaaa };
36 VECT_VAR_DECL(expected_vld2_1
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
37 0xaa, 0xaa, 0xf0, 0xf1 };
38 VECT_VAR_DECL(expected_vld2_1
,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
39 VECT_VAR_DECL(expected_vld2_1
,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
40 VECT_VAR_DECL(expected_vld2_1
,uint
,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
41 0xaa, 0xaa, 0xaa, 0xaa };
42 VECT_VAR_DECL(expected_vld2_1
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
43 VECT_VAR_DECL(expected_vld2_1
,uint
,32,2) [] = { 0xfffffff0, 0xfffffff1 };
44 VECT_VAR_DECL(expected_vld2_1
,poly
,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
45 0xaa, 0xaa, 0xaa, 0xaa };
46 VECT_VAR_DECL(expected_vld2_1
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
47 VECT_VAR_DECL(expected_vld2_1
,hfloat
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
48 VECT_VAR_DECL(expected_vld2_1
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
49 0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
50 VECT_VAR_DECL(expected_vld2_1
,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
51 0xaaaaaaaa, 0xaaaaaaaa };
52 VECT_VAR_DECL(expected_vld2_1
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
53 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
54 VECT_VAR_DECL(expected_vld2_1
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
55 0xaaaaaaaa, 0xaaaaaaaa };
56 VECT_VAR_DECL(expected_vld2_1
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
57 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
58 VECT_VAR_DECL(expected_vld2_1
,hfloat
,32,4) [] = { 0xc1800000, 0xc1700000,
59 0xaaaaaaaa, 0xaaaaaaaa };
62 VECT_VAR_DECL(expected_vld3_0
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
63 0xaa, 0xaa, 0xaa, 0xaa };
64 VECT_VAR_DECL(expected_vld3_0
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
65 VECT_VAR_DECL(expected_vld3_0
,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
66 VECT_VAR_DECL(expected_vld3_0
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
67 0xaa, 0xaa, 0xaa, 0xaa };
68 VECT_VAR_DECL(expected_vld3_0
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
69 VECT_VAR_DECL(expected_vld3_0
,uint
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
70 VECT_VAR_DECL(expected_vld3_0
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
71 0xaa, 0xaa, 0xaa, 0xaa };
72 VECT_VAR_DECL(expected_vld3_0
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
73 VECT_VAR_DECL(expected_vld3_0
,hfloat
,32,2) [] = { 0xc1800000, 0xc1700000 };
74 VECT_VAR_DECL(expected_vld3_0
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
75 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
76 VECT_VAR_DECL(expected_vld3_0
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
77 0xaaaaaaaa, 0xaaaaaaaa };
78 VECT_VAR_DECL(expected_vld3_0
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
79 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
80 VECT_VAR_DECL(expected_vld3_0
,uint
,32,4) [] = { 0xfffffff0, 0xfffffff1,
81 0xfffffff2, 0xaaaaaaaa };
82 VECT_VAR_DECL(expected_vld3_0
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
83 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
84 VECT_VAR_DECL(expected_vld3_0
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
85 0xaaaaaaaa, 0xaaaaaaaa };
88 VECT_VAR_DECL(expected_vld3_1
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
89 0xaa, 0xaa, 0xaa, 0xaa };
90 VECT_VAR_DECL(expected_vld3_1
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
91 VECT_VAR_DECL(expected_vld3_1
,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa };
92 VECT_VAR_DECL(expected_vld3_1
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
93 0xf0, 0xf1, 0xf2, 0xaa };
94 VECT_VAR_DECL(expected_vld3_1
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
95 VECT_VAR_DECL(expected_vld3_1
,uint
,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
96 VECT_VAR_DECL(expected_vld3_1
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
97 0xf0, 0xf1, 0xf2, 0xaa };
98 VECT_VAR_DECL(expected_vld3_1
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
99 VECT_VAR_DECL(expected_vld3_1
,hfloat
,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
100 VECT_VAR_DECL(expected_vld3_1
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
101 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
102 VECT_VAR_DECL(expected_vld3_1
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
103 0xfffffff0, 0xfffffff1 };
104 VECT_VAR_DECL(expected_vld3_1
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
105 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
106 VECT_VAR_DECL(expected_vld3_1
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
107 0xaaaaaaaa, 0xaaaaaaaa };
108 VECT_VAR_DECL(expected_vld3_1
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
109 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
110 VECT_VAR_DECL(expected_vld3_1
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
111 0xc1800000, 0xc1700000 };
114 VECT_VAR_DECL(expected_vld3_2
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
115 0xaa, 0xf0, 0xf1, 0xf2 };
116 VECT_VAR_DECL(expected_vld3_2
,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
117 VECT_VAR_DECL(expected_vld3_2
,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
118 VECT_VAR_DECL(expected_vld3_2
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
119 0xaa, 0xaa, 0xaa, 0xaa };
120 VECT_VAR_DECL(expected_vld3_2
,uint
,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
121 VECT_VAR_DECL(expected_vld3_2
,uint
,32,2) [] = { 0xfffffff1, 0xfffffff2 };
122 VECT_VAR_DECL(expected_vld3_2
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
123 0xaa, 0xaa, 0xaa, 0xaa };
124 VECT_VAR_DECL(expected_vld3_2
,poly
,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
125 VECT_VAR_DECL(expected_vld3_2
,hfloat
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
126 VECT_VAR_DECL(expected_vld3_2
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
127 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
128 VECT_VAR_DECL(expected_vld3_2
,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa,
129 0xaaaaaaaa, 0xaaaaaaaa };
130 VECT_VAR_DECL(expected_vld3_2
,uint
,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
131 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
132 VECT_VAR_DECL(expected_vld3_2
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
133 0xaaaaaaaa, 0xaaaaaaaa };
134 VECT_VAR_DECL(expected_vld3_2
,poly
,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
135 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
136 VECT_VAR_DECL(expected_vld3_2
,hfloat
,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
137 0xaaaaaaaa, 0xaaaaaaaa };
140 VECT_VAR_DECL(expected_vld4_0
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
141 0xaa, 0xaa, 0xaa, 0xaa };
142 VECT_VAR_DECL(expected_vld4_0
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
143 VECT_VAR_DECL(expected_vld4_0
,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
144 VECT_VAR_DECL(expected_vld4_0
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
145 0xaa, 0xaa, 0xaa, 0xaa };
146 VECT_VAR_DECL(expected_vld4_0
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
147 VECT_VAR_DECL(expected_vld4_0
,uint
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
148 VECT_VAR_DECL(expected_vld4_0
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
149 0xaa, 0xaa, 0xaa, 0xaa };
150 VECT_VAR_DECL(expected_vld4_0
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
151 VECT_VAR_DECL(expected_vld4_0
,hfloat
,32,2) [] = { 0xc1800000, 0xc1700000 };
152 VECT_VAR_DECL(expected_vld4_0
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
153 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
154 VECT_VAR_DECL(expected_vld4_0
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
155 0xaaaaaaaa, 0xaaaaaaaa };
156 VECT_VAR_DECL(expected_vld4_0
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
157 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
158 VECT_VAR_DECL(expected_vld4_0
,uint
,32,4) [] = { 0xfffffff0, 0xfffffff1,
159 0xfffffff2, 0xfffffff3 };
160 VECT_VAR_DECL(expected_vld4_0
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
161 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
162 VECT_VAR_DECL(expected_vld4_0
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
163 0xaaaaaaaa, 0xaaaaaaaa };
166 VECT_VAR_DECL(expected_vld4_1
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
167 0xaa, 0xaa, 0xaa, 0xaa };
168 VECT_VAR_DECL(expected_vld4_1
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
169 VECT_VAR_DECL(expected_vld4_1
,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
170 VECT_VAR_DECL(expected_vld4_1
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
171 0xaa, 0xaa, 0xaa, 0xaa };
172 VECT_VAR_DECL(expected_vld4_1
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
173 VECT_VAR_DECL(expected_vld4_1
,uint
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
174 VECT_VAR_DECL(expected_vld4_1
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
175 0xaa, 0xaa, 0xaa, 0xaa };
176 VECT_VAR_DECL(expected_vld4_1
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
177 VECT_VAR_DECL(expected_vld4_1
,hfloat
,32,2) [] = { 0xc1600000, 0xc1500000 };
178 VECT_VAR_DECL(expected_vld4_1
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
179 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
180 VECT_VAR_DECL(expected_vld4_1
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
181 0xaaaaaaaa, 0xaaaaaaaa };
182 VECT_VAR_DECL(expected_vld4_1
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
183 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
184 VECT_VAR_DECL(expected_vld4_1
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
185 0xaaaaaaaa, 0xaaaaaaaa };
186 VECT_VAR_DECL(expected_vld4_1
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
187 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
188 VECT_VAR_DECL(expected_vld4_1
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
189 0xaaaaaaaa, 0xaaaaaaaa };
192 VECT_VAR_DECL(expected_vld4_2
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
193 0xaa, 0xaa, 0xaa, 0xaa };
194 VECT_VAR_DECL(expected_vld4_2
,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
195 VECT_VAR_DECL(expected_vld4_2
,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
196 VECT_VAR_DECL(expected_vld4_2
,uint
,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
197 0xaa, 0xaa, 0xaa, 0xaa };
198 VECT_VAR_DECL(expected_vld4_2
,uint
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
199 VECT_VAR_DECL(expected_vld4_2
,uint
,32,2) [] = { 0xfffffff0, 0xfffffff1 };
200 VECT_VAR_DECL(expected_vld4_2
,poly
,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
201 0xaa, 0xaa, 0xaa, 0xaa };
202 VECT_VAR_DECL(expected_vld4_2
,poly
,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
203 VECT_VAR_DECL(expected_vld4_2
,hfloat
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
204 VECT_VAR_DECL(expected_vld4_2
,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
205 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
206 VECT_VAR_DECL(expected_vld4_2
,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
207 0xfffffff2, 0xfffffff3 };
208 VECT_VAR_DECL(expected_vld4_2
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
209 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
210 VECT_VAR_DECL(expected_vld4_2
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
211 0xaaaaaaaa, 0xaaaaaaaa };
212 VECT_VAR_DECL(expected_vld4_2
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
213 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
214 VECT_VAR_DECL(expected_vld4_2
,hfloat
,32,4) [] = { 0xc1800000, 0xc1700000,
215 0xc1600000, 0xc1500000 };
218 VECT_VAR_DECL(expected_vld4_3
,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
219 0xf0, 0xf1, 0xf2, 0xf3 };
220 VECT_VAR_DECL(expected_vld4_3
,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
221 VECT_VAR_DECL(expected_vld4_3
,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
222 VECT_VAR_DECL(expected_vld4_3
,uint
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
223 0xaa, 0xaa, 0xaa, 0xaa };
224 VECT_VAR_DECL(expected_vld4_3
,uint
,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
225 VECT_VAR_DECL(expected_vld4_3
,uint
,32,2) [] = { 0xfffffff2, 0xfffffff3 };
226 VECT_VAR_DECL(expected_vld4_3
,poly
,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
227 0xaa, 0xaa, 0xaa, 0xaa };
228 VECT_VAR_DECL(expected_vld4_3
,poly
,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
229 VECT_VAR_DECL(expected_vld4_3
,hfloat
,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
230 VECT_VAR_DECL(expected_vld4_3
,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
231 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
232 VECT_VAR_DECL(expected_vld4_3
,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
233 0xaaaaaaaa, 0xaaaaaaaa };
234 VECT_VAR_DECL(expected_vld4_3
,uint
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
235 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
236 VECT_VAR_DECL(expected_vld4_3
,uint
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
237 0xaaaaaaaa, 0xaaaaaaaa };
238 VECT_VAR_DECL(expected_vld4_3
,poly
,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
239 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
240 VECT_VAR_DECL(expected_vld4_3
,hfloat
,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
241 0xaaaaaaaa, 0xaaaaaaaa };
243 /* Declare additional input buffers as needed. */
244 /* Input buffers for vld2_lane */
245 VECT_VAR_DECL_INIT(buffer_vld2_lane
, int, 8, 2);
246 VECT_VAR_DECL_INIT(buffer_vld2_lane
, int, 16, 2);
247 VECT_VAR_DECL_INIT(buffer_vld2_lane
, int, 32, 2);
248 VECT_VAR_DECL_INIT(buffer_vld2_lane
, int, 64, 2);
249 VECT_VAR_DECL_INIT(buffer_vld2_lane
, uint
, 8, 2);
250 VECT_VAR_DECL_INIT(buffer_vld2_lane
, uint
, 16, 2);
251 VECT_VAR_DECL_INIT(buffer_vld2_lane
, uint
, 32, 2);
252 VECT_VAR_DECL_INIT(buffer_vld2_lane
, uint
, 64, 2);
253 VECT_VAR_DECL_INIT(buffer_vld2_lane
, poly
, 8, 2);
254 VECT_VAR_DECL_INIT(buffer_vld2_lane
, poly
, 16, 2);
255 VECT_VAR_DECL_INIT(buffer_vld2_lane
, float, 32, 2);
257 /* Input buffers for vld3_lane */
258 VECT_VAR_DECL_INIT(buffer_vld3_lane
, int, 8, 3);
259 VECT_VAR_DECL_INIT(buffer_vld3_lane
, int, 16, 3);
260 VECT_VAR_DECL_INIT(buffer_vld3_lane
, int, 32, 3);
261 VECT_VAR_DECL_INIT(buffer_vld3_lane
, int, 64, 3);
262 VECT_VAR_DECL_INIT(buffer_vld3_lane
, uint
, 8, 3);
263 VECT_VAR_DECL_INIT(buffer_vld3_lane
, uint
, 16, 3);
264 VECT_VAR_DECL_INIT(buffer_vld3_lane
, uint
, 32, 3);
265 VECT_VAR_DECL_INIT(buffer_vld3_lane
, uint
, 64, 3);
266 VECT_VAR_DECL_INIT(buffer_vld3_lane
, poly
, 8, 3);
267 VECT_VAR_DECL_INIT(buffer_vld3_lane
, poly
, 16, 3);
268 VECT_VAR_DECL_INIT(buffer_vld3_lane
, float, 32, 3);
270 /* Input buffers for vld4_lane */
271 VECT_VAR_DECL_INIT(buffer_vld4_lane
, int, 8, 4);
272 VECT_VAR_DECL_INIT(buffer_vld4_lane
, int, 16, 4);
273 VECT_VAR_DECL_INIT(buffer_vld4_lane
, int, 32, 4);
274 VECT_VAR_DECL_INIT(buffer_vld4_lane
, int, 64, 4);
275 VECT_VAR_DECL_INIT(buffer_vld4_lane
, uint
, 8, 4);
276 VECT_VAR_DECL_INIT(buffer_vld4_lane
, uint
, 16, 4);
277 VECT_VAR_DECL_INIT(buffer_vld4_lane
, uint
, 32, 4);
278 VECT_VAR_DECL_INIT(buffer_vld4_lane
, uint
, 64, 4);
279 VECT_VAR_DECL_INIT(buffer_vld4_lane
, poly
, 8, 4);
280 VECT_VAR_DECL_INIT(buffer_vld4_lane
, poly
, 16, 4);
281 VECT_VAR_DECL_INIT(buffer_vld4_lane
, float, 32, 4);
283 void exec_vldX_lane (void)
285 /* In this case, input variables are arrays of vectors. */
286 #define DECL_VLDX_LANE(T1, W, N, X) \
287 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
288 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
289 VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
291 /* We need to use a temporary result buffer (result_bis), because
292 the one used for other tests is not large enough. A subset of the
293 result data is moved from result_bis to result, and it is this
294 subset which is used to check the actual behaviour. The next
295 macro enables to move another chunk of data from result_bis to
297 /* We also use another extra input buffer (buffer_src), which we
298 fill with 0xAA, and which it used to load a vector from which we
299 read a given lane. */
300 #define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
301 memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
302 sizeof(VECT_VAR(buffer_src, T1, W, N))); \
304 VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
305 vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
307 VECT_ARRAY_VAR(vector, T1, W, N, X) = \
308 /* Use dedicated init buffer, of size. X */ \
309 vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
310 VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
312 vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
313 VECT_ARRAY_VAR(vector, T1, W, N, X)); \
314 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
315 sizeof(VECT_VAR(result, T1, W, N)))
317 /* Overwrite "result" with the contents of "result_bis"[Y]. */
318 #define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
319 memcpy(VECT_VAR(result, T1, W, N), \
320 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
321 sizeof(VECT_VAR(result, T1, W, N)));
323 /* We need all variants in 64 bits, but there is no 64x2 variant. */
324 #define DECL_ALL_VLDX_LANE(X) \
325 DECL_VLDX_LANE(int, 8, 8, X); \
326 DECL_VLDX_LANE(int, 16, 4, X); \
327 DECL_VLDX_LANE(int, 32, 2, X); \
328 DECL_VLDX_LANE(uint, 8, 8, X); \
329 DECL_VLDX_LANE(uint, 16, 4, X); \
330 DECL_VLDX_LANE(uint, 32, 2, X); \
331 DECL_VLDX_LANE(poly, 8, 8, X); \
332 DECL_VLDX_LANE(poly, 16, 4, X); \
333 DECL_VLDX_LANE(int, 16, 8, X); \
334 DECL_VLDX_LANE(int, 32, 4, X); \
335 DECL_VLDX_LANE(uint, 16, 8, X); \
336 DECL_VLDX_LANE(uint, 32, 4, X); \
337 DECL_VLDX_LANE(poly, 16, 8, X); \
338 DECL_VLDX_LANE(float, 32, 2, X); \
339 DECL_VLDX_LANE(float, 32, 4, X)
341 /* Add some padding to try to catch out of bound accesses. */
342 #define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
343 #define DUMMY_ARRAY(V, T, W, N, L) \
344 VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
345 ARRAY1(V##_pad,T,W,N)
347 /* Use the same lanes regardless of the size of the array (X), for
349 #define TEST_ALL_VLDX_LANE(X) \
350 TEST_VLDX_LANE(, int, s, 8, 8, X, 7); \
351 TEST_VLDX_LANE(, int, s, 16, 4, X, 2); \
352 TEST_VLDX_LANE(, int, s, 32, 2, X, 0); \
353 TEST_VLDX_LANE(, uint, u, 8, 8, X, 4); \
354 TEST_VLDX_LANE(, uint, u, 16, 4, X, 3); \
355 TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \
356 TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \
357 TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \
358 TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \
359 TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \
360 TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \
361 TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \
362 TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \
363 TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
364 TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
366 #define TEST_ALL_EXTRA_CHUNKS(X, Y) \
367 TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
368 TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
369 TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
370 TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \
371 TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \
372 TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
373 TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
374 TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
375 TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
376 TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
377 TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
378 TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
379 TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
380 TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
381 TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
383 /* vldX_lane supports only a subset of all variants. */
384 #define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \
386 CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
387 CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
388 CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
389 CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
390 CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
391 CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
392 CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
393 CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
394 CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
395 CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
396 CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
397 CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
398 CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
399 CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
400 CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
403 /* Declare the temporary buffers / variables. */
404 DECL_ALL_VLDX_LANE(2);
405 DECL_ALL_VLDX_LANE(3);
406 DECL_ALL_VLDX_LANE(4);
408 /* Define dummy input arrays, large enough for x4 vectors. */
409 DUMMY_ARRAY(buffer_src
, int, 8, 8, 4);
410 DUMMY_ARRAY(buffer_src
, int, 16, 4, 4);
411 DUMMY_ARRAY(buffer_src
, int, 32, 2, 4);
412 DUMMY_ARRAY(buffer_src
, uint
, 8, 8, 4);
413 DUMMY_ARRAY(buffer_src
, uint
, 16, 4, 4);
414 DUMMY_ARRAY(buffer_src
, uint
, 32, 2, 4);
415 DUMMY_ARRAY(buffer_src
, poly
, 8, 8, 4);
416 DUMMY_ARRAY(buffer_src
, poly
, 16, 4, 4);
417 DUMMY_ARRAY(buffer_src
, int, 16, 8, 4);
418 DUMMY_ARRAY(buffer_src
, int, 32, 4, 4);
419 DUMMY_ARRAY(buffer_src
, uint
, 16, 8, 4);
420 DUMMY_ARRAY(buffer_src
, uint
, 32, 4, 4);
421 DUMMY_ARRAY(buffer_src
, poly
, 16, 8, 4);
422 DUMMY_ARRAY(buffer_src
, float, 32, 2, 4);
423 DUMMY_ARRAY(buffer_src
, float, 32, 4, 4);
425 /* Check vld2_lane/vld2q_lane. */
427 #define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
428 TEST_ALL_VLDX_LANE(2);
429 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld2_0
, " chunk 0");
431 TEST_ALL_EXTRA_CHUNKS(2, 1);
432 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld2_1
, " chunk 1");
434 /* Check vld3_lane/vld3q_lane. */
437 #define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
438 TEST_ALL_VLDX_LANE(3);
439 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld3_0
, " chunk 0");
441 TEST_ALL_EXTRA_CHUNKS(3, 1);
442 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld3_1
, " chunk 1");
444 TEST_ALL_EXTRA_CHUNKS(3, 2);
445 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld3_2
, " chunk 2");
447 /* Check vld4_lane/vld4q_lane. */
450 #define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
451 TEST_ALL_VLDX_LANE(4);
452 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld4_0
, " chunk 0");
454 TEST_ALL_EXTRA_CHUNKS(4, 1);
455 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld4_1
, " chunk 1");
456 TEST_ALL_EXTRA_CHUNKS(4, 2);
458 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld4_2
, " chunk 2");
460 TEST_ALL_EXTRA_CHUNKS(4, 3);
461 CHECK_RESULTS_VLDX_LANE (TEST_MSG
, expected_vld4_3
, " chunk 3");