2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
27 * Make sure that the :mem_noshuf packet attribute is honored.
28 * This is important when the addresses overlap.
29 * The store instruction in slot 1 effectively executes first,
30 * followed by the load instruction in slot 0.
33 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
34 static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
37 asm volatile("{\n\t" \
38 " " #ST_OP "(%1) = %3\n\t" \
39 " %0 = " #LD_OP "(%2)\n\t" \
42 : "r"(p), "r"(q), "r"(x) \
47 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
48 static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
51 asm volatile("{\n\t" \
52 " " #ST_OP "(%1) = %3\n\t" \
53 " %0 = " #LD_OP "(%2)\n\t" \
56 : "r"(p), "r"(q), "r"(x) \
61 /* Store byte combinations */
62 MEM_NOSHUF32(mem_noshuf_sb_lb
, int8_t, int8_t, memb
, memb
)
63 MEM_NOSHUF32(mem_noshuf_sb_lub
, int8_t, uint8_t, memb
, memub
)
64 MEM_NOSHUF32(mem_noshuf_sb_lh
, int8_t, int16_t, memb
, memh
)
65 MEM_NOSHUF32(mem_noshuf_sb_luh
, int8_t, uint16_t, memb
, memuh
)
66 MEM_NOSHUF32(mem_noshuf_sb_lw
, int8_t, int32_t, memb
, memw
)
67 MEM_NOSHUF64(mem_noshuf_sb_ld
, int8_t, int64_t, memb
, memd
)
69 /* Store half combinations */
70 MEM_NOSHUF32(mem_noshuf_sh_lb
, int16_t, int8_t, memh
, memb
)
71 MEM_NOSHUF32(mem_noshuf_sh_lub
, int16_t, uint8_t, memh
, memub
)
72 MEM_NOSHUF32(mem_noshuf_sh_lh
, int16_t, int16_t, memh
, memh
)
73 MEM_NOSHUF32(mem_noshuf_sh_luh
, int16_t, uint16_t, memh
, memuh
)
74 MEM_NOSHUF32(mem_noshuf_sh_lw
, int16_t, int32_t, memh
, memw
)
75 MEM_NOSHUF64(mem_noshuf_sh_ld
, int16_t, int64_t, memh
, memd
)
77 /* Store word combinations */
78 MEM_NOSHUF32(mem_noshuf_sw_lb
, int32_t, int8_t, memw
, memb
)
79 MEM_NOSHUF32(mem_noshuf_sw_lub
, int32_t, uint8_t, memw
, memub
)
80 MEM_NOSHUF32(mem_noshuf_sw_lh
, int32_t, int16_t, memw
, memh
)
81 MEM_NOSHUF32(mem_noshuf_sw_luh
, int32_t, uint16_t, memw
, memuh
)
82 MEM_NOSHUF32(mem_noshuf_sw_lw
, int32_t, int32_t, memw
, memw
)
83 MEM_NOSHUF64(mem_noshuf_sw_ld
, int32_t, int64_t, memw
, memd
)
85 /* Store double combinations */
86 MEM_NOSHUF32(mem_noshuf_sd_lb
, int64_t, int8_t, memd
, memb
)
87 MEM_NOSHUF32(mem_noshuf_sd_lub
, int64_t, uint8_t, memd
, memub
)
88 MEM_NOSHUF32(mem_noshuf_sd_lh
, int64_t, int16_t, memd
, memh
)
89 MEM_NOSHUF32(mem_noshuf_sd_luh
, int64_t, uint16_t, memd
, memuh
)
90 MEM_NOSHUF32(mem_noshuf_sd_lw
, int64_t, int32_t, memd
, memw
)
91 MEM_NOSHUF64(mem_noshuf_sd_ld
, int64_t, int64_t, memd
, memd
)
93 static inline int pred_lw_sw(bool pred
, int32_t *p
, int32_t *q
,
97 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
101 " if (!p0) %0 = memw(%2)\n\t"
104 : "r"(p
), "r"(q
), "r"(x
), "r"(y
), "r"(pred
)
109 static inline int pred_lw_sw_pi(bool pred
, int32_t *p
, int32_t *q
,
110 int32_t x
, int32_t y
)
113 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
118 " if (!p0) %0 = memw(r7++#4)\n\t"
121 : "r"(p
), "r"(q
), "r"(x
), "r"(y
), "r"(pred
)
122 : "r7", "p0", "memory");
126 static inline int64_t pred_ld_sd(bool pred
, int64_t *p
, int64_t *q
,
127 int64_t x
, int64_t y
)
130 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
134 " if (!p0) %0 = memd(%2)\n\t"
137 : "r"(p
), "r"(q
), "r"(x
), "r"(y
), "r"(pred
)
142 static inline int64_t pred_ld_sd_pi(bool pred
, int64_t *p
, int64_t *q
,
143 int64_t x
, int64_t y
)
146 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
151 " if (!p0) %0 = memd(r7++#8)\n\t"
154 : "r"(p
), "r"(q
), "r"(x
), "r"(y
), "r"(pred
)
155 : "r7", "p0", "memory");
159 static inline int32_t cancel_sw_lb(bool pred
, int32_t *p
, int8_t *q
, int32_t x
)
162 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
164 " if (!p0) memw(%1) = %3\n\t"
168 : "r"(p
), "r"(q
), "r"(x
), "r"(pred
)
173 static inline int64_t cancel_sw_ld(bool pred
, int32_t *p
, int64_t *q
, int32_t x
)
176 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
178 " if (!p0) memw(%1) = %3\n\t"
182 : "r"(p
), "r"(q
), "r"(x
), "r"(pred
)
205 * Store byte combinations
208 res32
= mem_noshuf_sb_lb(&n
.b
[0], &n
.b
[0], 0x87);
209 check32(res32
, 0xffffff87);
212 res32
= mem_noshuf_sb_lub(&n
.b
[0], &n
.ub
[0], 0x87);
213 check32(res32
, 0x00000087);
216 res32
= mem_noshuf_sb_lh(&n
.b
[0], &n
.h
[0], 0x87);
217 check32(res32
, 0xffffff87);
220 res32
= mem_noshuf_sb_luh(&n
.b
[0], &n
.uh
[0], 0x87);
221 check32(res32
, 0x0000ff87);
224 res32
= mem_noshuf_sb_lw(&n
.b
[0], &n
.w
[0], 0x87);
225 check32(res32
, 0xffffff87);
228 res64
= mem_noshuf_sb_ld(&n
.b
[0], &n
.d
[0], 0x87);
229 check64(res64
, 0xffffffffffffff87LL
);
232 * Store half combinations
235 res32
= mem_noshuf_sh_lb(&n
.h
[0], &n
.b
[0], 0x8787);
236 check32(res32
, 0xffffff87);
239 res32
= mem_noshuf_sh_lub(&n
.h
[0], &n
.ub
[1], 0x8f87);
240 check32(res32
, 0x0000008f);
243 res32
= mem_noshuf_sh_lh(&n
.h
[0], &n
.h
[0], 0x8a87);
244 check32(res32
, 0xffff8a87);
247 res32
= mem_noshuf_sh_luh(&n
.h
[0], &n
.uh
[0], 0x8a87);
248 check32(res32
, 0x8a87);
251 res32
= mem_noshuf_sh_lw(&n
.h
[1], &n
.w
[0], 0x8a87);
252 check32(res32
, 0x8a87ffff);
255 res64
= mem_noshuf_sh_ld(&n
.h
[1], &n
.d
[0], 0x8a87);
256 check64(res64
, 0xffffffff8a87ffffLL
);
259 * Store word combinations
262 res32
= mem_noshuf_sw_lb(&n
.w
[0], &n
.b
[0], 0x12345687);
263 check32(res32
, 0xffffff87);
266 res32
= mem_noshuf_sw_lub(&n
.w
[0], &n
.ub
[0], 0x12345687);
267 check32(res32
, 0x00000087);
270 res32
= mem_noshuf_sw_lh(&n
.w
[0], &n
.h
[0], 0x1234f678);
271 check32(res32
, 0xfffff678);
274 res32
= mem_noshuf_sw_luh(&n
.w
[0], &n
.uh
[0], 0x12345678);
275 check32(res32
, 0x00005678);
278 res32
= mem_noshuf_sw_lw(&n
.w
[0], &n
.w
[0], 0x12345678);
279 check32(res32
, 0x12345678);
282 res64
= mem_noshuf_sw_ld(&n
.w
[0], &n
.d
[0], 0x12345678);
283 check64(res64
, 0xffffffff12345678LL
);
286 * Store double combinations
289 res32
= mem_noshuf_sd_lb(&n
.d
[0], &n
.b
[1], 0x123456789abcdef0);
290 check32(res32
, 0xffffffde);
293 res32
= mem_noshuf_sd_lub(&n
.d
[0], &n
.ub
[1], 0x123456789abcdef0);
294 check32(res32
, 0x000000de);
297 res32
= mem_noshuf_sd_lh(&n
.d
[0], &n
.h
[1], 0x123456789abcdef0);
298 check32(res32
, 0xffff9abc);
301 res32
= mem_noshuf_sd_luh(&n
.d
[0], &n
.uh
[1], 0x123456789abcdef0);
302 check32(res32
, 0x00009abc);
305 res32
= mem_noshuf_sd_lw(&n
.d
[0], &n
.w
[1], 0x123456789abcdef0);
306 check32(res32
, 0x12345678);
309 res64
= mem_noshuf_sd_ld(&n
.d
[0], &n
.d
[0], 0x123456789abcdef0);
310 check64(res64
, 0x123456789abcdef0LL
);
313 * Predicated word stores
316 res32
= cancel_sw_lb(false, &n
.w
[0], &n
.b
[0], 0x12345678);
317 check32(res32
, 0xffffffff);
320 res32
= cancel_sw_lb(true, &n
.w
[0], &n
.b
[0], 0x12345687);
321 check32(res32
, 0xffffff87);
324 * Predicated double stores
327 res64
= cancel_sw_ld(false, &n
.w
[0], &n
.d
[0], 0x12345678);
328 check64(res64
, 0xffffffffffffffffLL
);
331 res64
= cancel_sw_ld(true, &n
.w
[0], &n
.d
[0], 0x12345678);
332 check64(res64
, 0xffffffff12345678LL
);
335 res64
= cancel_sw_ld(false, &n
.w
[1], &n
.d
[0], 0x12345678);
336 check64(res64
, 0xffffffffffffffffLL
);
339 res64
= cancel_sw_ld(true, &n
.w
[1], &n
.d
[0], 0x12345678);
340 check64(res64
, 0x12345678ffffffffLL
);
346 res32
= mem_noshuf_sb_lb(&n
.b
[1], &n
.b
[0], 0x87);
347 check32(res32
, 0xffffffff);
350 res32
= mem_noshuf_sb_lb(&n
.b
[0], &n
.b
[1], 0x87);
351 check32(res32
, 0xffffffff);
354 res32
= mem_noshuf_sh_lh(&n
.h
[1], &n
.h
[0], 0x8787);
355 check32(res32
, 0xffffffff);
358 res32
= mem_noshuf_sh_lh(&n
.h
[0], &n
.h
[1], 0x8787);
359 check32(res32
, 0xffffffff);
362 res32
= mem_noshuf_sw_lw(&n
.w
[0], &n
.w
[1], 0x12345678);
363 check32(res32
, 0xffffffff);
366 res32
= mem_noshuf_sw_lw(&n
.w
[1], &n
.w
[0], 0x12345678);
367 check32(res32
, 0xffffffff);
371 res64
= mem_noshuf_sd_ld(&n
.d
[1], &n
.d
[0], 0x123456789abcdef0LL
);
372 check64(res64
, 0xffffffffffffffffLL
);
376 res64
= mem_noshuf_sd_ld(&n
.d
[0], &n
.d
[1], 0x123456789abcdef0LL
);
377 check64(res64
, 0xffffffffffffffffLL
);
380 res32
= pred_lw_sw(false, &n
.w
[0], &n
.w
[0], 0x12345678, 0xc0ffeeda);
381 check32(res32
, 0x12345678);
382 check32(n
.w
[0], 0xc0ffeeda);
385 res32
= pred_lw_sw(true, &n
.w
[0], &n
.w
[0], 0x12345678, 0xc0ffeeda);
386 check32(res32
, 0xc0ffeeda);
387 check32(n
.w
[0], 0xc0ffeeda);
390 res32
= pred_lw_sw_pi(false, &n
.w
[0], &n
.w
[0], 0x12345678, 0xc0ffeeda);
391 check32(res32
, 0x12345678);
392 check32(n
.w
[0], 0xc0ffeeda);
395 res32
= pred_lw_sw_pi(true, &n
.w
[0], &n
.w
[0], 0x12345678, 0xc0ffeeda);
396 check32(res32
, 0xc0ffeeda);
397 check32(n
.w
[0], 0xc0ffeeda);
400 res64
= pred_ld_sd(false, &n
.d
[0], &n
.d
[0],
401 0x1234567812345678LL
, 0xc0ffeedac0ffeedaLL
);
402 check64(res64
, 0x1234567812345678LL
);
403 check64(n
.d
[0], 0xc0ffeedac0ffeedaLL
);
406 res64
= pred_ld_sd(true, &n
.d
[0], &n
.d
[0],
407 0x1234567812345678LL
, 0xc0ffeedac0ffeedaLL
);
408 check64(res64
, 0xc0ffeedac0ffeedaLL
);
409 check64(n
.d
[0], 0xc0ffeedac0ffeedaLL
);
412 res64
= pred_ld_sd_pi(false, &n
.d
[0], &n
.d
[0],
413 0x1234567812345678LL
, 0xc0ffeedac0ffeedaLL
);
414 check64(res64
, 0x1234567812345678LL
);
415 check64(n
.d
[0], 0xc0ffeedac0ffeedaLL
);
418 res64
= pred_ld_sd_pi(true, &n
.d
[0], &n
.d
[0],
419 0x1234567812345678LL
, 0xc0ffeedac0ffeedaLL
);
420 check64(res64
, 0xc0ffeedac0ffeedaLL
);
421 check64(n
.d
[0], 0xc0ffeedac0ffeedaLL
);
423 puts(err
? "FAIL" : "PASS");