tcg: Split out exec/user/guest-base.h
[qemu/ar7.git] / tests / tcg / hexagon / hvx_misc.c
blobd0e64e035f642352914b85521d0e1e7c3eeb2b10
1 /*
2 * Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 #include <stdio.h>
19 #include <stdint.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <limits.h>
24 int err;
26 #include "hvx_misc.h"
28 static void test_load_tmp(void)
30 void *p0 = buffer0;
31 void *p1 = buffer1;
32 void *pout = output;
34 for (int i = 0; i < BUFSIZE; i++) {
36 * Load into v12 as .tmp, then use it in the next packet
37 * Should get the new value within the same packet and
38 * the old value in the next packet
40 asm("v3 = vmem(%0 + #0)\n\t"
41 "r1 = #1\n\t"
42 "v12 = vsplat(r1)\n\t"
43 "{\n\t"
44 " v12.tmp = vmem(%1 + #0)\n\t"
45 " v4.w = vadd(v12.w, v3.w)\n\t"
46 "}\n\t"
47 "v4.w = vadd(v4.w, v12.w)\n\t"
48 "vmem(%2 + #0) = v4\n\t"
49 : : "r"(p0), "r"(p1), "r"(pout)
50 : "r1", "v12", "v3", "v4", "v6", "memory");
51 p0 += sizeof(MMVector);
52 p1 += sizeof(MMVector);
53 pout += sizeof(MMVector);
55 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
56 expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1;
60 check_output_w(__LINE__, BUFSIZE);
63 static void test_load_cur(void)
65 void *p0 = buffer0;
66 void *pout = output;
68 for (int i = 0; i < BUFSIZE; i++) {
69 asm("{\n\t"
70 " v2.cur = vmem(%0 + #0)\n\t"
71 " vmem(%1 + #0) = v2\n\t"
72 "}\n\t"
73 : : "r"(p0), "r"(pout) : "v2", "memory");
74 p0 += sizeof(MMVector);
75 pout += sizeof(MMVector);
77 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
78 expect[i].uw[j] = buffer0[i].uw[j];
82 check_output_w(__LINE__, BUFSIZE);
85 static void test_load_aligned(void)
87 /* Aligned loads ignore the low bits of the address */
88 void *p0 = buffer0;
89 void *pout = output;
90 const size_t offset = 13;
92 p0 += offset; /* Create an unaligned address */
93 asm("v2 = vmem(%0 + #0)\n\t"
94 "vmem(%1 + #0) = v2\n\t"
95 : : "r"(p0), "r"(pout) : "v2", "memory");
97 expect[0] = buffer0[0];
99 check_output_w(__LINE__, 1);
102 static void test_load_unaligned(void)
104 void *p0 = buffer0;
105 void *pout = output;
106 const size_t offset = 12;
108 p0 += offset; /* Create an unaligned address */
109 asm("v2 = vmemu(%0 + #0)\n\t"
110 "vmem(%1 + #0) = v2\n\t"
111 : : "r"(p0), "r"(pout) : "v2", "memory");
113 memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector));
115 check_output_w(__LINE__, 1);
118 static void test_store_aligned(void)
120 /* Aligned stores ignore the low bits of the address */
121 void *p0 = buffer0;
122 void *pout = output;
123 const size_t offset = 13;
125 pout += offset; /* Create an unaligned address */
126 asm("v2 = vmem(%0 + #0)\n\t"
127 "vmem(%1 + #0) = v2\n\t"
128 : : "r"(p0), "r"(pout) : "v2", "memory");
130 expect[0] = buffer0[0];
132 check_output_w(__LINE__, 1);
135 static void test_store_unaligned(void)
137 void *p0 = buffer0;
138 void *pout = output;
139 const size_t offset = 12;
141 pout += offset; /* Create an unaligned address */
142 asm("v2 = vmem(%0 + #0)\n\t"
143 "vmemu(%1 + #0) = v2\n\t"
144 : : "r"(p0), "r"(pout) : "v2", "memory");
146 memcpy(expect, buffer0, 2 * sizeof(MMVector));
147 memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector));
149 check_output_w(__LINE__, 2);
152 static void test_masked_store(bool invert)
154 void *p0 = buffer0;
155 void *pmask = mask;
156 void *pout = output;
158 memset(expect, 0xff, sizeof(expect));
159 memset(output, 0xff, sizeof(expect));
161 for (int i = 0; i < BUFSIZE; i++) {
162 if (invert) {
163 asm("r4 = #0\n\t"
164 "v4 = vsplat(r4)\n\t"
165 "v5 = vmem(%0 + #0)\n\t"
166 "q0 = vcmp.eq(v4.w, v5.w)\n\t"
167 "v5 = vmem(%1)\n\t"
168 "if (!q0) vmem(%2) = v5\n\t" /* Inverted test */
169 : : "r"(pmask), "r"(p0), "r"(pout)
170 : "r4", "v4", "v5", "q0", "memory");
171 } else {
172 asm("r4 = #0\n\t"
173 "v4 = vsplat(r4)\n\t"
174 "v5 = vmem(%0 + #0)\n\t"
175 "q0 = vcmp.eq(v4.w, v5.w)\n\t"
176 "v5 = vmem(%1)\n\t"
177 "if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */
178 : : "r"(pmask), "r"(p0), "r"(pout)
179 : "r4", "v4", "v5", "q0", "memory");
181 p0 += sizeof(MMVector);
182 pmask += sizeof(MMVector);
183 pout += sizeof(MMVector);
185 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
186 if (invert) {
187 if (i + j % MASKMOD != 0) {
188 expect[i].w[j] = buffer0[i].w[j];
190 } else {
191 if (i + j % MASKMOD == 0) {
192 expect[i].w[j] = buffer0[i].w[j];
198 check_output_w(__LINE__, BUFSIZE);
201 static void test_new_value_store(void)
203 void *p0 = buffer0;
204 void *pout = output;
206 asm("{\n\t"
207 " v2 = vmem(%0 + #0)\n\t"
208 " vmem(%1 + #0) = v2.new\n\t"
209 "}\n\t"
210 : : "r"(p0), "r"(pout) : "v2", "memory");
212 expect[0] = buffer0[0];
214 check_output_w(__LINE__, 1);
217 static void test_max_temps()
219 void *p0 = buffer0;
220 void *pout = output;
222 asm("v0 = vmem(%0 + #0)\n\t"
223 "v1 = vmem(%0 + #1)\n\t"
224 "v2 = vmem(%0 + #2)\n\t"
225 "v3 = vmem(%0 + #3)\n\t"
226 "v4 = vmem(%0 + #4)\n\t"
227 "{\n\t"
228 " v1:0.w = vadd(v3:2.w, v1:0.w)\n\t"
229 " v2.b = vshuffe(v3.b, v2.b)\n\t"
230 " v3.w = vadd(v1.w, v4.w)\n\t"
231 " v4.tmp = vmem(%0 + #5)\n\t"
232 "}\n\t"
233 "vmem(%1 + #0) = v0\n\t"
234 "vmem(%1 + #1) = v1\n\t"
235 "vmem(%1 + #2) = v2\n\t"
236 "vmem(%1 + #3) = v3\n\t"
237 "vmem(%1 + #4) = v4\n\t"
238 : : "r"(p0), "r"(pout) : "memory");
240 /* The first two vectors come from the vadd-pair instruction */
241 for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
242 expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i];
243 expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i];
245 /* The third vector comes from the vshuffe instruction */
246 for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) {
247 expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) |
248 (buffer0[3].uh[i] & 0xff) << 8;
250 /* The fourth vector comes from the vadd-single instruction */
251 for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
252 expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i];
255 * The fifth vector comes from the load to v4
256 * make sure the .tmp is dropped
258 expect[4] = buffer0[4];
260 check_output_b(__LINE__, 5);
263 TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +)
264 TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +)
265 TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +)
266 TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -)
267 TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -)
268 TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -)
269 TEST_VEC_OP2(vxor, vxor, , d, 8, ^)
270 TEST_VEC_OP2(vand, vand, , d, 8, &)
271 TEST_VEC_OP2(vor, vor, , d, 8, |)
272 TEST_VEC_OP1(vnot, vnot, , d, 8, ~)
274 TEST_PRED_OP2(pred_or, or, |, "")
275 TEST_PRED_OP2(pred_or_n, or, |, "!")
276 TEST_PRED_OP2(pred_and, and, &, "")
277 TEST_PRED_OP2(pred_and_n, and, &, "!")
278 TEST_PRED_OP2(pred_xor, xor, ^, "")
280 static void test_vadduwsat(void)
283 * Test for saturation by adding two numbers that add to more than UINT_MAX
284 * and make sure the result saturates to UINT_MAX
286 const uint32_t x = 0xffff0000;
287 const uint32_t y = 0x000fffff;
289 memset(expect, 0x12, sizeof(MMVector));
290 memset(output, 0x34, sizeof(MMVector));
292 asm volatile ("v10 = vsplat(%0)\n\t"
293 "v11 = vsplat(%1)\n\t"
294 "v21.uw = vadd(v11.uw, v10.uw):sat\n\t"
295 "vmem(%2+#0) = v21\n\t"
296 : /* no outputs */
297 : "r"(x), "r"(y), "r"(output)
298 : "v10", "v11", "v21", "memory");
300 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
301 expect[0].uw[j] = UINT_MAX;
304 check_output_w(__LINE__, 1);
307 static void test_vsubuwsat_dv(void)
310 * Test for saturation by subtracting two numbers where the result is
311 * negative and make sure the result saturates to zero
313 * vsubuwsat_dv operates on an HVX register pair, so we'll have a
314 * pair of subtractions
315 * w - x < 0
316 * y - z < 0
318 const uint32_t w = 0x000000b7;
319 const uint32_t x = 0xffffff4e;
320 const uint32_t y = 0x31fe88e7;
321 const uint32_t z = 0x7fffff79;
323 memset(expect, 0x12, sizeof(MMVector) * 2);
324 memset(output, 0x34, sizeof(MMVector) * 2);
326 asm volatile ("v16 = vsplat(%0)\n\t"
327 "v17 = vsplat(%1)\n\t"
328 "v26 = vsplat(%2)\n\t"
329 "v27 = vsplat(%3)\n\t"
330 "v25:24.uw = vsub(v17:16.uw, v27:26.uw):sat\n\t"
331 "vmem(%4+#0) = v24\n\t"
332 "vmem(%4+#1) = v25\n\t"
333 : /* no outputs */
334 : "r"(w), "r"(y), "r"(x), "r"(z), "r"(output)
335 : "v16", "v17", "v24", "v25", "v26", "v27", "memory");
337 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
338 expect[0].uw[j] = 0x00000000;
339 expect[1].uw[j] = 0x00000000;
342 check_output_w(__LINE__, 2);
345 static void test_vshuff(void)
347 /* Test that vshuff works when the two operands are the same register */
348 const uint32_t splat = 0x089be55c;
349 const uint32_t shuff = 0x454fa926;
350 MMVector v0, v1;
352 memset(expect, 0x12, sizeof(MMVector));
353 memset(output, 0x34, sizeof(MMVector));
355 asm volatile("v25 = vsplat(%0)\n\t"
356 "vshuff(v25, v25, %1)\n\t"
357 "vmem(%2 + #0) = v25\n\t"
358 : /* no outputs */
359 : "r"(splat), "r"(shuff), "r"(output)
360 : "v25", "memory");
363 * The semantics of Hexagon are the operands are pass-by-value, so create
364 * two copies of the vsplat result.
366 for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
367 v0.uw[i] = splat;
368 v1.uw[i] = splat;
370 /* Do the vshuff operation */
371 for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) {
372 if (shuff & offset) {
373 for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) {
374 if (!(k & offset)) {
375 uint8_t tmp = v0.ub[k];
376 v0.ub[k] = v1.ub[k + offset];
377 v1.ub[k + offset] = tmp;
382 /* Put the result in the expect buffer for verification */
383 expect[0] = v1;
385 check_output_b(__LINE__, 1);
388 static void test_load_tmp_predicated(void)
390 void *p0 = buffer0;
391 void *p1 = buffer1;
392 void *pout = output;
393 bool pred = true;
395 for (int i = 0; i < BUFSIZE; i++) {
397 * Load into v12 as .tmp with a predicate
398 * When the predicate is true, we get the vector from buffer1[i]
399 * When the predicate is false, we get a vector of all 1's
400 * Regardless of the predicate, the next packet should have
401 * a vector of all 1's
403 asm("v3 = vmem(%0 + #0)\n\t"
404 "r1 = #1\n\t"
405 "v12 = vsplat(r1)\n\t"
406 "p1 = !cmp.eq(%3, #0)\n\t"
407 "{\n\t"
408 " if (p1) v12.tmp = vmem(%1 + #0)\n\t"
409 " v4.w = vadd(v12.w, v3.w)\n\t"
410 "}\n\t"
411 "v4.w = vadd(v4.w, v12.w)\n\t"
412 "vmem(%2 + #0) = v4\n\t"
413 : : "r"(p0), "r"(p1), "r"(pout), "r"(pred)
414 : "r1", "p1", "v12", "v3", "v4", "v6", "memory");
415 p0 += sizeof(MMVector);
416 p1 += sizeof(MMVector);
417 pout += sizeof(MMVector);
419 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
420 expect[i].w[j] =
421 pred ? buffer0[i].w[j] + buffer1[i].w[j] + 1
422 : buffer0[i].w[j] + 2;
424 pred = !pred;
427 check_output_w(__LINE__, BUFSIZE);
430 static void test_load_cur_predicated(void)
432 bool pred = true;
433 for (int i = 0; i < BUFSIZE; i++) {
434 asm volatile("p0 = !cmp.eq(%3, #0)\n\t"
435 "v3 = vmem(%0+#0)\n\t"
437 * Preload v4 to make sure that the assignment from the
438 * packet below is not being ignored when pred is false.
440 "r0 = #0x01237654\n\t"
441 "v4 = vsplat(r0)\n\t"
442 "{\n\t"
443 " if (p0) v3.cur = vmem(%1+#0)\n\t"
444 " v4 = v3\n\t"
445 "}\n\t"
446 "vmem(%2+#0) = v4\n\t"
448 : "r"(&buffer0[i]), "r"(&buffer1[i]),
449 "r"(&output[i]), "r"(pred)
450 : "r0", "p0", "v3", "v4", "memory");
451 expect[i] = pred ? buffer1[i] : buffer0[i];
452 pred = !pred;
454 check_output_w(__LINE__, BUFSIZE);
457 int main()
459 init_buffers();
461 test_load_tmp();
462 test_load_cur();
463 test_load_aligned();
464 test_load_unaligned();
465 test_store_aligned();
466 test_store_unaligned();
467 test_masked_store(false);
468 test_masked_store(true);
469 test_new_value_store();
470 test_max_temps();
472 test_vadd_w();
473 test_vadd_h();
474 test_vadd_b();
475 test_vsub_w();
476 test_vsub_h();
477 test_vsub_b();
478 test_vxor();
479 test_vand();
480 test_vor();
481 test_vnot();
483 test_pred_or(false);
484 test_pred_or_n(true);
485 test_pred_and(false);
486 test_pred_and_n(true);
487 test_pred_xor(false);
489 test_vadduwsat();
490 test_vsubuwsat_dv();
492 test_vshuff();
494 test_load_tmp_predicated();
495 test_load_cur_predicated();
497 puts(err ? "FAIL" : "PASS");
498 return err ? 1 : 0;