2 * QEMU TCG support -- s390x vector string instruction support
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/helper-proto.h"
22 * Returns a bit set in the MSB of each element that is zero,
23 * as defined by the mask.
25 static inline uint64_t zero_search(uint64_t a
, uint64_t mask
)
27 return ~(((a
& mask
) + mask
) | a
| mask
);
31 * Returns a bit set in the MSB of each element that is not zero,
32 * as defined by the mask.
34 static inline uint64_t nonzero_search(uint64_t a
, uint64_t mask
)
36 return (((a
& mask
) + mask
) | a
) & ~mask
;
40 * Returns the byte offset for the first match, or 16 for no match.
42 static inline int match_index(uint64_t c0
, uint64_t c1
)
44 return (c0
? clz64(c0
) : clz64(c1
) + 64) >> 3;
48 * Returns the number of bits composing one element.
50 static uint8_t get_element_bits(uint8_t es
)
52 return (1 << es
) * BITS_PER_BYTE
;
56 * Returns the bitmask for a single element.
58 static uint64_t get_single_element_mask(uint8_t es
)
60 return -1ull >> (64 - get_element_bits(es
));
64 * Returns the bitmask for a single element (excluding the MSB).
66 static uint64_t get_single_element_lsbs_mask(uint8_t es
)
68 return -1ull >> (65 - get_element_bits(es
));
72 * Returns the bitmasks for multiple elements (excluding the MSBs).
74 static uint64_t get_element_lsbs_mask(uint8_t es
)
76 return dup_const(es
, get_single_element_lsbs_mask(es
));
79 static int vfae(void *v1
, const void *v2
, const void *v3
, bool in
,
80 bool rt
, bool zs
, uint8_t es
)
82 const uint64_t mask
= get_element_lsbs_mask(es
);
83 const int bits
= get_element_bits(es
);
84 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, t0
, t1
, z0
, z1
;
85 uint64_t first_zero
= 16;
89 a0
= s390_vec_read_element64(v2
, 0);
90 a1
= s390_vec_read_element64(v2
, 1);
91 b0
= s390_vec_read_element64(v3
, 0);
92 b1
= s390_vec_read_element64(v3
, 1);
95 /* compare against equality with every other element */
96 for (i
= 0; i
< 64; i
+= bits
) {
99 e0
|= zero_search(a0
^ t0
, mask
);
100 e0
|= zero_search(a0
^ t1
, mask
);
101 e1
|= zero_search(a1
^ t0
, mask
);
102 e1
|= zero_search(a1
^ t1
, mask
);
104 /* invert the result if requested - invert only the MSBs */
109 first_equal
= match_index(e0
, e1
);
112 z0
= zero_search(a0
, mask
);
113 z1
= zero_search(a1
, mask
);
114 first_zero
= match_index(z0
, z1
);
118 e0
= (e0
>> (bits
- 1)) * get_single_element_mask(es
);
119 e1
= (e1
>> (bits
- 1)) * get_single_element_mask(es
);
120 s390_vec_write_element64(v1
, 0, e0
);
121 s390_vec_write_element64(v1
, 1, e1
);
123 s390_vec_write_element64(v1
, 0, MIN(first_equal
, first_zero
));
124 s390_vec_write_element64(v1
, 1, 0);
127 if (first_zero
== 16 && first_equal
== 16) {
128 return 3; /* no match */
129 } else if (first_zero
== 16) {
130 return 1; /* matching elements, no match for zero */
131 } else if (first_equal
< first_zero
) {
132 return 2; /* matching elements before match for zero */
134 return 0; /* match for zero */
137 #define DEF_VFAE_HELPER(BITS) \
138 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
141 const bool in = extract32(simd_data(desc), 3, 1); \
142 const bool rt = extract32(simd_data(desc), 2, 1); \
143 const bool zs = extract32(simd_data(desc), 1, 1); \
145 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
151 #define DEF_VFAE_CC_HELPER(BITS) \
152 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
153 CPUS390XState *env, uint32_t desc) \
155 const bool in = extract32(simd_data(desc), 3, 1); \
156 const bool rt = extract32(simd_data(desc), 2, 1); \
157 const bool zs = extract32(simd_data(desc), 1, 1); \
159 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
161 DEF_VFAE_CC_HELPER(8)
162 DEF_VFAE_CC_HELPER(16)
163 DEF_VFAE_CC_HELPER(32)
165 static int vfee(void *v1
, const void *v2
, const void *v3
, bool zs
, uint8_t es
)
167 const uint64_t mask
= get_element_lsbs_mask(es
);
168 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, z0
, z1
;
169 uint64_t first_zero
= 16;
170 uint64_t first_equal
;
172 a0
= s390_vec_read_element64(v2
, 0);
173 a1
= s390_vec_read_element64(v2
, 1);
174 b0
= s390_vec_read_element64(v3
, 0);
175 b1
= s390_vec_read_element64(v3
, 1);
176 e0
= zero_search(a0
^ b0
, mask
);
177 e1
= zero_search(a1
^ b1
, mask
);
178 first_equal
= match_index(e0
, e1
);
181 z0
= zero_search(a0
, mask
);
182 z1
= zero_search(a1
, mask
);
183 first_zero
= match_index(z0
, z1
);
186 s390_vec_write_element64(v1
, 0, MIN(first_equal
, first_zero
));
187 s390_vec_write_element64(v1
, 1, 0);
188 if (first_zero
== 16 && first_equal
== 16) {
189 return 3; /* no match */
190 } else if (first_zero
== 16) {
191 return 1; /* matching elements, no match for zero */
192 } else if (first_equal
< first_zero
) {
193 return 2; /* matching elements before match for zero */
195 return 0; /* match for zero */
198 #define DEF_VFEE_HELPER(BITS) \
199 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
202 const bool zs = extract32(simd_data(desc), 1, 1); \
204 vfee(v1, v2, v3, zs, MO_##BITS); \
210 #define DEF_VFEE_CC_HELPER(BITS) \
211 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
212 CPUS390XState *env, uint32_t desc) \
214 const bool zs = extract32(simd_data(desc), 1, 1); \
216 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
218 DEF_VFEE_CC_HELPER(8)
219 DEF_VFEE_CC_HELPER(16)
220 DEF_VFEE_CC_HELPER(32)
222 static int vfene(void *v1
, const void *v2
, const void *v3
, bool zs
, uint8_t es
)
224 const uint64_t mask
= get_element_lsbs_mask(es
);
225 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, z0
, z1
;
226 uint64_t first_zero
= 16;
227 uint64_t first_inequal
;
228 bool smaller
= false;
230 a0
= s390_vec_read_element64(v2
, 0);
231 a1
= s390_vec_read_element64(v2
, 1);
232 b0
= s390_vec_read_element64(v3
, 0);
233 b1
= s390_vec_read_element64(v3
, 1);
234 e0
= nonzero_search(a0
^ b0
, mask
);
235 e1
= nonzero_search(a1
^ b1
, mask
);
236 first_inequal
= match_index(e0
, e1
);
238 /* identify the smaller element */
239 if (first_inequal
< 16) {
240 uint8_t enr
= first_inequal
/ (1 << es
);
241 uint32_t a
= s390_vec_read_element(v2
, enr
, es
);
242 uint32_t b
= s390_vec_read_element(v3
, enr
, es
);
248 z0
= zero_search(a0
, mask
);
249 z1
= zero_search(a1
, mask
);
250 first_zero
= match_index(z0
, z1
);
253 s390_vec_write_element64(v1
, 0, MIN(first_inequal
, first_zero
));
254 s390_vec_write_element64(v1
, 1, 0);
255 if (first_zero
== 16 && first_inequal
== 16) {
257 } else if (first_zero
< first_inequal
) {
260 return smaller
? 1 : 2;
263 #define DEF_VFENE_HELPER(BITS) \
264 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
267 const bool zs = extract32(simd_data(desc), 1, 1); \
269 vfene(v1, v2, v3, zs, MO_##BITS); \
275 #define DEF_VFENE_CC_HELPER(BITS) \
276 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
277 CPUS390XState *env, uint32_t desc) \
279 const bool zs = extract32(simd_data(desc), 1, 1); \
281 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
283 DEF_VFENE_CC_HELPER(8)
284 DEF_VFENE_CC_HELPER(16)
285 DEF_VFENE_CC_HELPER(32)
287 static int vistr(void *v1
, const void *v2
, uint8_t es
)
289 const uint64_t mask
= get_element_lsbs_mask(es
);
290 uint64_t a0
= s390_vec_read_element64(v2
, 0);
291 uint64_t a1
= s390_vec_read_element64(v2
, 1);
295 z
= zero_search(a0
, mask
);
297 a0
&= ~(-1ull >> clz64(z
));
301 z
= zero_search(a1
, mask
);
303 a1
&= ~(-1ull >> clz64(z
));
308 s390_vec_write_element64(v1
, 0, a0
);
309 s390_vec_write_element64(v1
, 1, a1
);
313 #define DEF_VISTR_HELPER(BITS) \
314 void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
316 vistr(v1, v2, MO_##BITS); \
322 #define DEF_VISTR_CC_HELPER(BITS) \
323 void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
326 env->cc_op = vistr(v1, v2, MO_##BITS); \
328 DEF_VISTR_CC_HELPER(8)
329 DEF_VISTR_CC_HELPER(16)
330 DEF_VISTR_CC_HELPER(32)
332 static bool element_compare(uint32_t data
, uint32_t l
, uint8_t c
)
334 const bool equal
= extract32(c
, 7, 1);
335 const bool lower
= extract32(c
, 6, 1);
336 const bool higher
= extract32(c
, 5, 1);
340 } else if (data
> l
) {
346 static int vstrc(void *v1
, const void *v2
, const void *v3
, const void *v4
,
347 bool in
, bool rt
, bool zs
, uint8_t es
)
349 const uint64_t mask
= get_element_lsbs_mask(es
);
350 uint64_t a0
= s390_vec_read_element64(v2
, 0);
351 uint64_t a1
= s390_vec_read_element64(v2
, 1);
352 int first_zero
= 16, first_match
= 16;
353 S390Vector rt_result
= {};
358 z0
= zero_search(a0
, mask
);
359 z1
= zero_search(a1
, mask
);
360 first_zero
= match_index(z0
, z1
);
363 for (i
= 0; i
< 16 / (1 << es
); i
++) {
364 const uint32_t data
= s390_vec_read_element(v2
, i
, es
);
365 const int cur_byte
= i
* (1 << es
);
366 bool any_match
= false;
368 /* if we don't need a bit vector, we can stop early */
369 if (cur_byte
== first_zero
&& !rt
) {
373 for (j
= 0; j
< 16 / (1 << es
); j
+= 2) {
374 const uint32_t l1
= s390_vec_read_element(v3
, j
, es
);
375 const uint32_t l2
= s390_vec_read_element(v3
, j
+ 1, es
);
376 /* we are only interested in the highest byte of each element */
377 const uint8_t c1
= s390_vec_read_element8(v4
, j
* (1 << es
));
378 const uint8_t c2
= s390_vec_read_element8(v4
, (j
+ 1) * (1 << es
));
380 if (element_compare(data
, l1
, c1
) &&
381 element_compare(data
, l2
, c2
)) {
386 /* invert the result if requested */
387 any_match
= in
^ any_match
;
390 /* indicate bit vector if requested */
392 const uint64_t val
= -1ull;
394 first_match
= MIN(cur_byte
, first_match
);
395 s390_vec_write_element(&rt_result
, i
, es
, val
);
397 /* stop on the first match */
398 first_match
= cur_byte
;
405 *(S390Vector
*)v1
= rt_result
;
407 s390_vec_write_element64(v1
, 0, MIN(first_match
, first_zero
));
408 s390_vec_write_element64(v1
, 1, 0);
411 if (first_zero
== 16 && first_match
== 16) {
412 return 3; /* no match */
413 } else if (first_zero
== 16) {
414 return 1; /* matching elements, no match for zero */
415 } else if (first_match
< first_zero
) {
416 return 2; /* matching elements before match for zero */
418 return 0; /* match for zero */
421 #define DEF_VSTRC_HELPER(BITS) \
422 void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
423 const void *v4, uint32_t desc) \
425 const bool in = extract32(simd_data(desc), 3, 1); \
426 const bool zs = extract32(simd_data(desc), 1, 1); \
428 vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
434 #define DEF_VSTRC_RT_HELPER(BITS) \
435 void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
436 const void *v4, uint32_t desc) \
438 const bool in = extract32(simd_data(desc), 3, 1); \
439 const bool zs = extract32(simd_data(desc), 1, 1); \
441 vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
443 DEF_VSTRC_RT_HELPER(8)
444 DEF_VSTRC_RT_HELPER(16)
445 DEF_VSTRC_RT_HELPER(32)
447 #define DEF_VSTRC_CC_HELPER(BITS) \
448 void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
449 const void *v4, CPUS390XState *env, \
452 const bool in = extract32(simd_data(desc), 3, 1); \
453 const bool zs = extract32(simd_data(desc), 1, 1); \
455 env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
457 DEF_VSTRC_CC_HELPER(8)
458 DEF_VSTRC_CC_HELPER(16)
459 DEF_VSTRC_CC_HELPER(32)
461 #define DEF_VSTRC_CC_RT_HELPER(BITS) \
462 void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
463 const void *v4, CPUS390XState *env, \
466 const bool in = extract32(simd_data(desc), 3, 1); \
467 const bool zs = extract32(simd_data(desc), 1, 1); \
469 env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
471 DEF_VSTRC_CC_RT_HELPER(8)
472 DEF_VSTRC_CC_RT_HELPER(16)
473 DEF_VSTRC_CC_RT_HELPER(32)