2 * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
28 #define fVROUND(VAL, SHAMT) \
29 ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
31 #define fVSATUB(VAL) \
32 ((((VAL) & 0xffLL) == (VAL)) ? \
34 ((((int32_t)(VAL)) < 0) ? 0 : 0xff))
36 #define fVSATUH(VAL) \
37 ((((VAL) & 0xffffLL) == (VAL)) ? \
39 ((((int32_t)(VAL)) < 0) ? 0 : 0xffff))
41 static void test_vasrvuhubrndsat(void)
47 memset(expect
, 0xaa, sizeof(expect
));
48 memset(output
, 0xbb, sizeof(output
));
50 for (int i
= 0; i
< BUFSIZE
/ 2; i
++) {
51 asm("v4 = vmem(%0 + #0)\n\t"
52 "v5 = vmem(%0 + #1)\n\t"
53 "v6 = vmem(%1 + #0)\n\t"
54 "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t"
56 : : "r"(p0
), "r"(p1
), "r"(pout
)
57 : "v4", "v5", "v6", "memory");
58 p0
+= sizeof(MMVector
) * 2;
59 p1
+= sizeof(MMVector
);
60 pout
+= sizeof(MMVector
);
62 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 2; j
++) {
67 shamt
= buffer1
[i
].ub
[2 * j
+ 0] & 0x7;
68 byte0
= fVSATUB(fVROUND(buffer0
[2 * i
+ 0].uh
[j
], shamt
) >> shamt
);
69 shamt
= buffer1
[i
].ub
[2 * j
+ 1] & 0x7;
70 byte1
= fVSATUB(fVROUND(buffer0
[2 * i
+ 1].uh
[j
], shamt
) >> shamt
);
71 expect
[i
].uh
[j
] = (byte1
<< 8) | (byte0
& 0xff);
75 check_output_h(__LINE__
, BUFSIZE
/ 2);
78 static void test_vasrvuhubsat(void)
84 memset(expect
, 0xaa, sizeof(expect
));
85 memset(output
, 0xbb, sizeof(output
));
87 for (int i
= 0; i
< BUFSIZE
/ 2; i
++) {
88 asm("v4 = vmem(%0 + #0)\n\t"
89 "v5 = vmem(%0 + #1)\n\t"
90 "v6 = vmem(%1 + #0)\n\t"
91 "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t"
93 : : "r"(p0
), "r"(p1
), "r"(pout
)
94 : "v4", "v5", "v6", "memory");
95 p0
+= sizeof(MMVector
) * 2;
96 p1
+= sizeof(MMVector
);
97 pout
+= sizeof(MMVector
);
99 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 2; j
++) {
104 shamt
= buffer1
[i
].ub
[2 * j
+ 0] & 0x7;
105 byte0
= fVSATUB(buffer0
[2 * i
+ 0].uh
[j
] >> shamt
);
106 shamt
= buffer1
[i
].ub
[2 * j
+ 1] & 0x7;
107 byte1
= fVSATUB(buffer0
[2 * i
+ 1].uh
[j
] >> shamt
);
108 expect
[i
].uh
[j
] = (byte1
<< 8) | (byte0
& 0xff);
112 check_output_h(__LINE__
, BUFSIZE
/ 2);
115 static void test_vasrvwuhrndsat(void)
121 memset(expect
, 0xaa, sizeof(expect
));
122 memset(output
, 0xbb, sizeof(output
));
124 for (int i
= 0; i
< BUFSIZE
/ 2; i
++) {
125 asm("v4 = vmem(%0 + #0)\n\t"
126 "v5 = vmem(%0 + #1)\n\t"
127 "v6 = vmem(%1 + #0)\n\t"
128 "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t"
130 : : "r"(p0
), "r"(p1
), "r"(pout
)
131 : "v4", "v5", "v6", "memory");
132 p0
+= sizeof(MMVector
) * 2;
133 p1
+= sizeof(MMVector
);
134 pout
+= sizeof(MMVector
);
136 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 4; j
++) {
141 shamt
= buffer1
[i
].uh
[2 * j
+ 0] & 0xf;
142 half0
= fVSATUH(fVROUND(buffer0
[2 * i
+ 0].w
[j
], shamt
) >> shamt
);
143 shamt
= buffer1
[i
].uh
[2 * j
+ 1] & 0xf;
144 half1
= fVSATUH(fVROUND(buffer0
[2 * i
+ 1].w
[j
], shamt
) >> shamt
);
145 expect
[i
].w
[j
] = (half1
<< 16) | (half0
& 0xffff);
149 check_output_w(__LINE__
, BUFSIZE
/ 2);
152 static void test_vasrvwuhsat(void)
158 memset(expect
, 0xaa, sizeof(expect
));
159 memset(output
, 0xbb, sizeof(output
));
161 for (int i
= 0; i
< BUFSIZE
/ 2; i
++) {
162 asm("v4 = vmem(%0 + #0)\n\t"
163 "v5 = vmem(%0 + #1)\n\t"
164 "v6 = vmem(%1 + #0)\n\t"
165 "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t"
167 : : "r"(p0
), "r"(p1
), "r"(pout
)
168 : "v4", "v5", "v6", "memory");
169 p0
+= sizeof(MMVector
) * 2;
170 p1
+= sizeof(MMVector
);
171 pout
+= sizeof(MMVector
);
173 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 4; j
++) {
178 shamt
= buffer1
[i
].uh
[2 * j
+ 0] & 0xf;
179 half0
= fVSATUH(buffer0
[2 * i
+ 0].w
[j
] >> shamt
);
180 shamt
= buffer1
[i
].uh
[2 * j
+ 1] & 0xf;
181 half1
= fVSATUH(buffer0
[2 * i
+ 1].w
[j
] >> shamt
);
182 expect
[i
].w
[j
] = (half1
<< 16) | (half0
& 0xffff);
186 check_output_w(__LINE__
, BUFSIZE
/ 2);
189 static void test_vassign_tmp(void)
194 memset(expect
, 0xaa, sizeof(expect
));
195 memset(output
, 0xbb, sizeof(output
));
197 for (int i
= 0; i
< BUFSIZE
; i
++) {
199 * Assign into v12 as .tmp, then use it in the next packet
200 * Should get the new value within the same packet and
201 * the old value in the next packet
203 asm("v3 = vmem(%0 + #0)\n\t"
205 "v12 = vsplat(r1)\n\t"
207 "v13 = vsplat(r1)\n\t"
210 " v4.w = vadd(v12.w, v3.w)\n\t"
212 "v4.w = vadd(v4.w, v12.w)\n\t"
213 "vmem(%1 + #0) = v4\n\t"
214 : : "r"(p0
), "r"(pout
)
215 : "r1", "v3", "v4", "v12", "v13", "memory");
216 p0
+= sizeof(MMVector
);
217 pout
+= sizeof(MMVector
);
219 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 4; j
++) {
220 expect
[i
].w
[j
] = buffer0
[i
].w
[j
] + 3;
224 check_output_w(__LINE__
, BUFSIZE
);
227 static void test_vcombine_tmp(void)
233 memset(expect
, 0xaa, sizeof(expect
));
234 memset(output
, 0xbb, sizeof(output
));
236 for (int i
= 0; i
< BUFSIZE
; i
++) {
238 * Combine into v13:12 as .tmp, then use it in the next packet
239 * Should get the new value within the same packet and
240 * the old value in the next packet
242 asm("v3 = vmem(%0 + #0)\n\t"
244 "v12 = vsplat(r1)\n\t"
246 "v13 = vsplat(r1)\n\t"
248 "v14 = vsplat(r1)\n\t"
250 "v15 = vsplat(r1)\n\t"
252 " v13:12.tmp = vcombine(v15, v14)\n\t"
253 " v4.w = vadd(v12.w, v3.w)\n\t"
256 "v4.w = vadd(v4.w, v12.w)\n\t"
257 "v4.w = vadd(v4.w, v13.w)\n\t"
258 "v4.w = vadd(v4.w, v16.w)\n\t"
259 "vmem(%2 + #0) = v4\n\t"
260 : : "r"(p0
), "r"(p1
), "r"(pout
)
261 : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory");
262 p0
+= sizeof(MMVector
);
263 p1
+= sizeof(MMVector
);
264 pout
+= sizeof(MMVector
);
266 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 4; j
++) {
267 expect
[i
].w
[j
] = buffer0
[i
].w
[j
] + 10;
271 check_output_w(__LINE__
, BUFSIZE
);
274 static void test_vmpyuhvs(void)
280 memset(expect
, 0xaa, sizeof(expect
));
281 memset(output
, 0xbb, sizeof(output
));
283 for (int i
= 0; i
< BUFSIZE
; i
++) {
284 asm("v4 = vmem(%0 + #0)\n\t"
285 "v5 = vmem(%1 + #0)\n\t"
286 "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t"
288 : : "r"(p0
), "r"(p1
), "r"(pout
)
289 : "v4", "v5", "memory");
290 p0
+= sizeof(MMVector
);
291 p1
+= sizeof(MMVector
);
292 pout
+= sizeof(MMVector
);
294 for (int j
= 0; j
< MAX_VEC_SIZE_BYTES
/ 2; j
++) {
295 expect
[i
].uh
[j
] = (buffer0
[i
].uh
[j
] * buffer1
[i
].uh
[j
]) >> 16;
299 check_output_h(__LINE__
, BUFSIZE
);
306 test_vasrvuhubrndsat();
308 test_vasrvwuhrndsat();
316 puts(err
? "FAIL" : "PASS");