2 * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
26 #define DEBUG_PRINTF(...) \
29 printf(__VA_ARGS__); \
34 #define NBYTES (1 << 8)
35 #define NHALFS (NBYTES / sizeof(short))
36 #define NWORDS (NBYTES / sizeof(int))
37 #define NDOBLS (NBYTES / sizeof(long long))
39 int64_t dbuf
[NDOBLS
] __attribute__((aligned(1 << 12))) = {0};
40 int32_t wbuf
[NWORDS
] __attribute__((aligned(1 << 12))) = {0};
41 int16_t hbuf
[NHALFS
] __attribute__((aligned(1 << 12))) = {0};
42 uint8_t bbuf
[NBYTES
] __attribute__((aligned(1 << 12))) = {0};
45 * We use the C preporcessor to deal with the combinations of types
48 #define INIT(BUF, N) \
49 void init_##BUF(void) \
51 for (int i = 0; i < N; i++) { \
62 * Macros for performing circular load
65 * START start address of buffer
66 * LEN length of buffer (in bytes)
67 * INC address increment (in bytes for IMM, elements for REG)
69 #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
74 "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
75 : "=r"(RES), "+r"(ADDR) \
76 : "r"(START), "r"(LEN) \
78 #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
79 CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
80 #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
81 CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
82 #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
83 CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
84 #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
85 CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
86 #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
87 CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
88 #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
89 CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
92 * The mreg has the following pieces
93 * mreg[31:28] increment[10:7]
94 * mreg[27:24] K value (used Hexagon v3 and earlier)
95 * mreg[23:17] increment[6:0]
96 * mreg[16:0] circular buffer length
98 static int32_t build_mreg(int32_t inc
, int32_t K
, int32_t len
)
100 return ((inc
& 0x780) << 21) |
102 ((inc
& 0x7f) << 17) |
106 #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
111 "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
112 : "=r"(RES), "+r"(ADDR) \
113 : "r"(build_mreg((INC), 0, (LEN))), \
116 #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
117 CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
118 #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
119 CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
120 #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
121 CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
122 #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
123 CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
124 #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
125 CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
126 #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
127 CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
130 * Macros for performing circular store
133 * START start address of buffer
134 * LEN length of buffer (in bytes)
135 * INC address increment (in bytes for IMM, elements for REG)
137 #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
142 "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
144 : "r"(START), "r"(VAL), "r"(LEN) \
145 : "r4", "m0", "cs0", "memory")
146 #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
147 CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
148 #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
149 CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
150 #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
151 CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
152 #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
153 CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
154 #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
155 CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
157 #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
164 " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
167 : "r"(START), "r"(VAL), "r"(LEN) \
168 : "r4", "r5", "m0", "cs0", "memory")
169 #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
170 CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
171 #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
172 CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
173 #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
174 CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
176 #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
181 "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
183 : "r"(build_mreg((INC), 0, (LEN))), \
186 : "r4", "m1", "cs1", "memory")
187 #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
188 CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
189 #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
190 CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
191 #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
192 CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
193 #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
194 CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
195 #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
196 CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
198 #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
205 " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
208 : "r"(build_mreg((INC), 0, (LEN))), \
211 : "r4", "r5", "m1", "cs1", "memory")
212 #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
213 CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
214 #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
215 CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
216 #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
217 CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
220 /* We'll test increments +1 and -1 */
221 void __check_load(int line
, int32_t i
, int64_t res
, int32_t inc
, int32_t size
)
223 int32_t expect
= (i
* inc
);
224 while (expect
>= size
) {
230 __check32(line
, res
, expect
);
233 #define check_load(I, RES, INC, SZ) __check_load(__LINE__, I, RES, INC, SZ)
235 #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
236 void circ_test_load_imm_##SZ(void) \
238 TYPE *p = (TYPE *)BUF; \
240 for (int i = 0; i < BUFSIZE; i++) { \
242 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
243 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
245 check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
248 for (int i = 0; i < BUFSIZE; i++) { \
250 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
251 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
253 check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
257 TEST_LOAD_IMM(b
, int8_t, bbuf
, NBYTES
, 1, d
)
258 TEST_LOAD_IMM(ub
, uint8_t, bbuf
, NBYTES
, 1, d
)
259 TEST_LOAD_IMM(h
, int16_t, hbuf
, NHALFS
, 2, d
)
260 TEST_LOAD_IMM(uh
, uint16_t, hbuf
, NHALFS
, 2, d
)
261 TEST_LOAD_IMM(w
, int32_t, wbuf
, NWORDS
, 4, d
)
262 TEST_LOAD_IMM(d
, int64_t, dbuf
, NDOBLS
, 8, lld
)
264 #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
265 void circ_test_load_reg_##SZ(void) \
267 TYPE *p = (TYPE *)BUF; \
269 for (int i = 0; i < BUFSIZE; i++) { \
271 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
272 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
274 check_load(i, element, 1, size); \
277 for (int i = 0; i < BUFSIZE; i++) { \
279 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
280 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
282 check_load(i, element, -1, size); \
286 TEST_LOAD_REG(b
, int8_t, bbuf
, NBYTES
, d
)
287 TEST_LOAD_REG(ub
, uint8_t, bbuf
, NBYTES
, d
)
288 TEST_LOAD_REG(h
, int16_t, hbuf
, NHALFS
, d
)
289 TEST_LOAD_REG(uh
, uint16_t, hbuf
, NHALFS
, d
)
290 TEST_LOAD_REG(w
, int32_t, wbuf
, NWORDS
, d
)
291 TEST_LOAD_REG(d
, int64_t, dbuf
, NDOBLS
, lld
)
293 /* The circular stores will wrap around somewhere inside the buffer */
294 #define CIRC_VAL(SZ, TYPE, BUFSIZE) \
295 TYPE circ_val_##SZ(int i, int32_t inc, int32_t size) \
297 int mod = BUFSIZE % size; \
298 int elem = i * inc; \
300 if (-elem <= size - mod) { \
301 return (elem + BUFSIZE - mod); \
303 return (elem + BUFSIZE + size - mod); \
305 } else if (elem < mod) {\
306 return (elem + BUFSIZE - mod); \
308 return (elem + BUFSIZE - size - mod); \
312 CIRC_VAL(b
, uint8_t, NBYTES
)
313 CIRC_VAL(h
, int16_t, NHALFS
)
314 CIRC_VAL(w
, int32_t, NWORDS
)
315 CIRC_VAL(d
, int64_t, NDOBLS
)
318 * Circular stores should only write to the first "size" elements of the buffer
319 * the remainder of the elements should have BUF[i] == i
321 #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
322 void check_store_##SZ(int32_t inc, int32_t size) \
324 for (int i = 0; i < size; i++) { \
325 DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
326 i, BUF[i], circ_val_##SZ(i, inc, size)); \
327 check64(BUF[i], circ_val_##SZ(i, inc, size)); \
329 for (int i = size; i < BUFSIZE; i++) { \
330 check64(BUF[i], i); \
334 CHECK_STORE(b
, bbuf
, NBYTES
, x
)
335 CHECK_STORE(h
, hbuf
, NHALFS
, x
)
336 CHECK_STORE(w
, wbuf
, NWORDS
, x
)
337 CHECK_STORE(d
, dbuf
, NDOBLS
, llx
)
339 #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
340 void circ_test_store_imm_##SZ(void) \
342 uint32_t size = 27; \
346 for (int i = 0; i < BUFSIZE; i++) { \
347 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
350 check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
354 for (int i = 0; i < BUFSIZE; i++) { \
355 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
359 check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
362 CIRC_TEST_STORE_IMM(b
, b
, uint8_t, bbuf
, NBYTES
, 0, 1)
363 CIRC_TEST_STORE_IMM(h
, h
, int16_t, hbuf
, NHALFS
, 0, 2)
364 CIRC_TEST_STORE_IMM(f
, h
, int16_t, hbuf
, NHALFS
, 16, 2)
365 CIRC_TEST_STORE_IMM(w
, w
, int32_t, wbuf
, NWORDS
, 0, 4)
366 CIRC_TEST_STORE_IMM(d
, d
, int64_t, dbuf
, NDOBLS
, 0, 8)
367 CIRC_TEST_STORE_IMM(bnew
, b
, uint8_t, bbuf
, NBYTES
, 0, 1)
368 CIRC_TEST_STORE_IMM(hnew
, h
, int16_t, hbuf
, NHALFS
, 0, 2)
369 CIRC_TEST_STORE_IMM(wnew
, w
, int32_t, wbuf
, NWORDS
, 0, 4)
371 #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
372 void circ_test_store_reg_##SZ(void) \
375 uint32_t size = 19; \
378 for (int i = 0; i < BUFSIZE; i++) { \
379 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
382 check_store_##CHK(1, size); \
386 for (int i = 0; i < BUFSIZE; i++) { \
387 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
390 check_store_##CHK(-1, size); \
393 CIRC_TEST_STORE_REG(b
, b
, uint8_t, bbuf
, NBYTES
, 0)
394 CIRC_TEST_STORE_REG(h
, h
, int16_t, hbuf
, NHALFS
, 0)
395 CIRC_TEST_STORE_REG(f
, h
, int16_t, hbuf
, NHALFS
, 16)
396 CIRC_TEST_STORE_REG(w
, w
, int32_t, wbuf
, NWORDS
, 0)
397 CIRC_TEST_STORE_REG(d
, d
, int64_t, dbuf
, NDOBLS
, 0)
398 CIRC_TEST_STORE_REG(bnew
, b
, uint8_t, bbuf
, NBYTES
, 0)
399 CIRC_TEST_STORE_REG(hnew
, h
, int16_t, hbuf
, NHALFS
, 0)
400 CIRC_TEST_STORE_REG(wnew
, w
, int32_t, wbuf
, NWORDS
, 0)
402 /* Test the old scheme used in Hexagon V3 */
403 static void circ_test_v3(void)
407 /* set high bit in K to test unsigned extract in fcirc */
408 int32_t K
= 8; /* 1024 bytes */
413 for (int i
= 0; i
< NWORDS
; i
++) {
417 "%0 = memw(%1++I:circ(M1))\n\t"
418 : "=r"(element
), "+r"(p
)
419 : "r"(build_mreg(1, K
, size
* sizeof(int)))
421 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i
, p
, element
);
422 check_load(i
, element
, 1, size
);
433 DEBUG_PRINTF("NBYTES = %d\n", NBYTES
);
434 DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf
);
435 DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf
);
436 DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf
);
437 DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf
);
439 circ_test_load_imm_b();
440 circ_test_load_imm_ub();
441 circ_test_load_imm_h();
442 circ_test_load_imm_uh();
443 circ_test_load_imm_w();
444 circ_test_load_imm_d();
446 circ_test_load_reg_b();
447 circ_test_load_reg_ub();
448 circ_test_load_reg_h();
449 circ_test_load_reg_uh();
450 circ_test_load_reg_w();
451 circ_test_load_reg_d();
453 circ_test_store_imm_b();
454 circ_test_store_imm_h();
455 circ_test_store_imm_f();
456 circ_test_store_imm_w();
457 circ_test_store_imm_d();
458 circ_test_store_imm_bnew();
459 circ_test_store_imm_hnew();
460 circ_test_store_imm_wnew();
462 circ_test_store_reg_b();
463 circ_test_store_reg_h();
464 circ_test_store_reg_f();
465 circ_test_store_reg_w();
466 circ_test_store_reg_d();
467 circ_test_store_reg_bnew();
468 circ_test_store_reg_hnew();
469 circ_test_store_reg_wnew();
473 puts(err
? "FAIL" : "PASS");