2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
21 * Make sure that the :mem_noshuf packet attribute is honored.
22 * This is important when the addresses overlap.
23 * The store instruction in slot 1 effectively executes first,
24 * followed by the load instruction in slot 0.
27 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
28 static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
31 asm volatile("{\n\t" \
32 " " #ST_OP "(%1) = %3\n\t" \
33 " %0 = " #LD_OP "(%2)\n\t" \
36 : "r"(p), "r"(q), "r"(x) \
41 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
42 static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
44 unsigned long long ret; \
45 asm volatile("{\n\t" \
46 " " #ST_OP "(%1) = %3\n\t" \
47 " %0 = " #LD_OP "(%2)\n\t" \
50 : "r"(p), "r"(q), "r"(x) \
55 /* Store byte combinations */
56 MEM_NOSHUF32(mem_noshuf_sb_lb
, signed char, signed char, memb
, memb
)
57 MEM_NOSHUF32(mem_noshuf_sb_lub
, signed char, unsigned char, memb
, memub
)
58 MEM_NOSHUF32(mem_noshuf_sb_lh
, signed char, signed short, memb
, memh
)
59 MEM_NOSHUF32(mem_noshuf_sb_luh
, signed char, unsigned short, memb
, memuh
)
60 MEM_NOSHUF32(mem_noshuf_sb_lw
, signed char, signed int, memb
, memw
)
61 MEM_NOSHUF64(mem_noshuf_sb_ld
, signed char, signed long long, memb
, memd
)
63 /* Store half combinations */
64 MEM_NOSHUF32(mem_noshuf_sh_lb
, signed short, signed char, memh
, memb
)
65 MEM_NOSHUF32(mem_noshuf_sh_lub
, signed short, unsigned char, memh
, memub
)
66 MEM_NOSHUF32(mem_noshuf_sh_lh
, signed short, signed short, memh
, memh
)
67 MEM_NOSHUF32(mem_noshuf_sh_luh
, signed short, unsigned short, memh
, memuh
)
68 MEM_NOSHUF32(mem_noshuf_sh_lw
, signed short, signed int, memh
, memw
)
69 MEM_NOSHUF64(mem_noshuf_sh_ld
, signed short, signed long long, memh
, memd
)
71 /* Store word combinations */
72 MEM_NOSHUF32(mem_noshuf_sw_lb
, signed int, signed char, memw
, memb
)
73 MEM_NOSHUF32(mem_noshuf_sw_lub
, signed int, unsigned char, memw
, memub
)
74 MEM_NOSHUF32(mem_noshuf_sw_lh
, signed int, signed short, memw
, memh
)
75 MEM_NOSHUF32(mem_noshuf_sw_luh
, signed int, unsigned short, memw
, memuh
)
76 MEM_NOSHUF32(mem_noshuf_sw_lw
, signed int, signed int, memw
, memw
)
77 MEM_NOSHUF64(mem_noshuf_sw_ld
, signed int, signed long long, memw
, memd
)
79 /* Store double combinations */
80 MEM_NOSHUF32(mem_noshuf_sd_lb
, long long, signed char, memd
, memb
)
81 MEM_NOSHUF32(mem_noshuf_sd_lub
, long long, unsigned char, memd
, memub
)
82 MEM_NOSHUF32(mem_noshuf_sd_lh
, long long, signed short, memd
, memh
)
83 MEM_NOSHUF32(mem_noshuf_sd_luh
, long long, unsigned short, memd
, memuh
)
84 MEM_NOSHUF32(mem_noshuf_sd_lw
, long long, signed int, memd
, memw
)
85 MEM_NOSHUF64(mem_noshuf_sd_ld
, long long, signed long long, memd
, memd
)
87 static inline unsigned int cancel_sw_lb(int pred
, int *p
, signed char *q
, int x
)
90 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
92 " if (!p0) memw(%1) = %3\n\t"
96 : "r"(p
), "r"(q
), "r"(x
), "r"(pred
)
102 unsigned long long cancel_sw_ld(int pred
, int *p
, long long *q
, int x
)
105 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
107 " if (!p0) memw(%1) = %3\n\t"
111 : "r"(p
), "r"(q
), "r"(x
), "r"(pred
)
117 signed long long d
[2];
118 unsigned long long ud
[2];
122 unsigned short uh
[8];
124 unsigned char ub
[16];
129 static void check32(int n
, int expect
)
132 printf("ERROR: 0x%08x != 0x%08x\n", n
, expect
);
137 static void check64(long long n
, long long expect
)
140 printf("ERROR: 0x%08llx != 0x%08llx\n", n
, expect
);
149 unsigned long long res64
;
152 * Store byte combinations
155 res32
= mem_noshuf_sb_lb(&n
.b
[0], &n
.b
[0], 0x87);
156 check32(res32
, 0xffffff87);
159 res32
= mem_noshuf_sb_lub(&n
.b
[0], &n
.ub
[0], 0x87);
160 check32(res32
, 0x00000087);
163 res32
= mem_noshuf_sb_lh(&n
.b
[0], &n
.h
[0], 0x87);
164 check32(res32
, 0xffffff87);
167 res32
= mem_noshuf_sb_luh(&n
.b
[0], &n
.uh
[0], 0x87);
168 check32(res32
, 0x0000ff87);
171 res32
= mem_noshuf_sb_lw(&n
.b
[0], &n
.w
[0], 0x87);
172 check32(res32
, 0xffffff87);
175 res64
= mem_noshuf_sb_ld(&n
.b
[0], &n
.d
[0], 0x87);
176 check64(res64
, 0xffffffffffffff87LL
);
179 * Store half combinations
182 res32
= mem_noshuf_sh_lb(&n
.h
[0], &n
.b
[0], 0x8787);
183 check32(res32
, 0xffffff87);
186 res32
= mem_noshuf_sh_lub(&n
.h
[0], &n
.ub
[1], 0x8f87);
187 check32(res32
, 0x0000008f);
190 res32
= mem_noshuf_sh_lh(&n
.h
[0], &n
.h
[0], 0x8a87);
191 check32(res32
, 0xffff8a87);
194 res32
= mem_noshuf_sh_luh(&n
.h
[0], &n
.uh
[0], 0x8a87);
195 check32(res32
, 0x8a87);
198 res32
= mem_noshuf_sh_lw(&n
.h
[1], &n
.w
[0], 0x8a87);
199 check32(res32
, 0x8a87ffff);
202 res64
= mem_noshuf_sh_ld(&n
.h
[1], &n
.d
[0], 0x8a87);
203 check64(res64
, 0xffffffff8a87ffffLL
);
206 * Store word combinations
209 res32
= mem_noshuf_sw_lb(&n
.w
[0], &n
.b
[0], 0x12345687);
210 check32(res32
, 0xffffff87);
213 res32
= mem_noshuf_sw_lub(&n
.w
[0], &n
.ub
[0], 0x12345687);
214 check32(res32
, 0x00000087);
217 res32
= mem_noshuf_sw_lh(&n
.w
[0], &n
.h
[0], 0x1234f678);
218 check32(res32
, 0xfffff678);
221 res32
= mem_noshuf_sw_luh(&n
.w
[0], &n
.uh
[0], 0x12345678);
222 check32(res32
, 0x00005678);
225 res32
= mem_noshuf_sw_lw(&n
.w
[0], &n
.w
[0], 0x12345678);
226 check32(res32
, 0x12345678);
229 res64
= mem_noshuf_sw_ld(&n
.w
[0], &n
.d
[0], 0x12345678);
230 check64(res64
, 0xffffffff12345678LL
);
233 * Store double combinations
236 res32
= mem_noshuf_sd_lb(&n
.d
[0], &n
.b
[1], 0x123456789abcdef0);
237 check32(res32
, 0xffffffde);
240 res32
= mem_noshuf_sd_lub(&n
.d
[0], &n
.ub
[1], 0x123456789abcdef0);
241 check32(res32
, 0x000000de);
244 res32
= mem_noshuf_sd_lh(&n
.d
[0], &n
.h
[1], 0x123456789abcdef0);
245 check32(res32
, 0xffff9abc);
248 res32
= mem_noshuf_sd_luh(&n
.d
[0], &n
.uh
[1], 0x123456789abcdef0);
249 check32(res32
, 0x00009abc);
252 res32
= mem_noshuf_sd_lw(&n
.d
[0], &n
.w
[1], 0x123456789abcdef0);
253 check32(res32
, 0x12345678);
256 res64
= mem_noshuf_sd_ld(&n
.d
[0], &n
.d
[0], 0x123456789abcdef0);
257 check64(res64
, 0x123456789abcdef0LL
);
260 * Predicated word stores
263 res32
= cancel_sw_lb(0, &n
.w
[0], &n
.b
[0], 0x12345678);
264 check32(res32
, 0xffffffff);
267 res32
= cancel_sw_lb(1, &n
.w
[0], &n
.b
[0], 0x12345687);
268 check32(res32
, 0xffffff87);
271 * Predicated double stores
274 res64
= cancel_sw_ld(0, &n
.w
[0], &n
.d
[0], 0x12345678);
275 check64(res64
, 0xffffffffffffffffLL
);
278 res64
= cancel_sw_ld(1, &n
.w
[0], &n
.d
[0], 0x12345678);
279 check64(res64
, 0xffffffff12345678LL
);
282 res64
= cancel_sw_ld(0, &n
.w
[1], &n
.d
[0], 0x12345678);
283 check64(res64
, 0xffffffffffffffffLL
);
286 res64
= cancel_sw_ld(1, &n
.w
[1], &n
.d
[0], 0x12345678);
287 check64(res64
, 0x12345678ffffffffLL
);
293 res32
= mem_noshuf_sb_lb(&n
.b
[1], &n
.b
[0], 0x87);
294 check32(res32
, 0xffffffff);
297 res32
= mem_noshuf_sb_lb(&n
.b
[0], &n
.b
[1], 0x87);
298 check32(res32
, 0xffffffff);
301 res32
= mem_noshuf_sh_lh(&n
.h
[1], &n
.h
[0], 0x8787);
302 check32(res32
, 0xffffffff);
305 res32
= mem_noshuf_sh_lh(&n
.h
[0], &n
.h
[1], 0x8787);
306 check32(res32
, 0xffffffff);
309 res32
= mem_noshuf_sw_lw(&n
.w
[0], &n
.w
[1], 0x12345678);
310 check32(res32
, 0xffffffff);
313 res32
= mem_noshuf_sw_lw(&n
.w
[1], &n
.w
[0], 0x12345678);
314 check32(res32
, 0xffffffff);
318 res64
= mem_noshuf_sd_ld(&n
.d
[1], &n
.d
[0], 0x123456789abcdef0LL
);
319 check64(res64
, 0xffffffffffffffffLL
);
323 res64
= mem_noshuf_sd_ld(&n
.d
[0], &n
.d
[1], 0x123456789abcdef0LL
);
324 check64(res64
, 0xffffffffffffffffLL
);
326 puts(err
? "FAIL" : "PASS");