s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL
[qemu/ar7.git] / target / s390x / vec_string_helper.c
blob0ee3470112b517e861218edf4201f9f61eab7c6a
1 /*
2 * QEMU TCG support -- s390x vector string instruction support
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "internal.h"
16 #include "vec.h"
17 #include "tcg/tcg.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/helper-proto.h"
22 * Returns a bit set in the MSB of each element that is zero,
23 * as defined by the mask.
25 static inline uint64_t zero_search(uint64_t a, uint64_t mask)
27 return ~(((a & mask) + mask) | a | mask);
31 * Returns a bit set in the MSB of each element that is not zero,
32 * as defined by the mask.
34 static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
36 return (((a & mask) + mask) | a) & ~mask;
40 * Returns the byte offset for the first match, or 16 for no match.
42 static inline int match_index(uint64_t c0, uint64_t c1)
44 return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
48 * Returns the number of bits composing one element.
50 static uint8_t get_element_bits(uint8_t es)
52 return (1 << es) * BITS_PER_BYTE;
56 * Returns the bitmask for a single element.
58 static uint64_t get_single_element_mask(uint8_t es)
60 return -1ull >> (64 - get_element_bits(es));
64 * Returns the bitmask for a single element (excluding the MSB).
66 static uint64_t get_single_element_lsbs_mask(uint8_t es)
68 return -1ull >> (65 - get_element_bits(es));
72 * Returns the bitmasks for multiple elements (excluding the MSBs).
74 static uint64_t get_element_lsbs_mask(uint8_t es)
76 return dup_const(es, get_single_element_lsbs_mask(es));
79 static int vfae(void *v1, const void *v2, const void *v3, bool in,
80 bool rt, bool zs, uint8_t es)
82 const uint64_t mask = get_element_lsbs_mask(es);
83 const int bits = get_element_bits(es);
84 uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
85 uint64_t first_zero = 16;
86 uint64_t first_equal;
87 int i;
89 a0 = s390_vec_read_element64(v2, 0);
90 a1 = s390_vec_read_element64(v2, 1);
91 b0 = s390_vec_read_element64(v3, 0);
92 b1 = s390_vec_read_element64(v3, 1);
93 e0 = 0;
94 e1 = 0;
95 /* compare against equality with every other element */
96 for (i = 0; i < 64; i += bits) {
97 t0 = rol64(b0, i);
98 t1 = rol64(b1, i);
99 e0 |= zero_search(a0 ^ t0, mask);
100 e0 |= zero_search(a0 ^ t1, mask);
101 e1 |= zero_search(a1 ^ t0, mask);
102 e1 |= zero_search(a1 ^ t1, mask);
104 /* invert the result if requested - invert only the MSBs */
105 if (in) {
106 e0 = ~e0 & ~mask;
107 e1 = ~e1 & ~mask;
109 first_equal = match_index(e0, e1);
111 if (zs) {
112 z0 = zero_search(a0, mask);
113 z1 = zero_search(a1, mask);
114 first_zero = match_index(z0, z1);
117 if (rt) {
118 e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
119 e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
120 s390_vec_write_element64(v1, 0, e0);
121 s390_vec_write_element64(v1, 1, e1);
122 } else {
123 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
124 s390_vec_write_element64(v1, 1, 0);
127 if (first_zero == 16 && first_equal == 16) {
128 return 3; /* no match */
129 } else if (first_zero == 16) {
130 return 1; /* matching elements, no match for zero */
131 } else if (first_equal < first_zero) {
132 return 2; /* matching elements before match for zero */
134 return 0; /* match for zero */
137 #define DEF_VFAE_HELPER(BITS) \
138 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
139 uint32_t desc) \
141 const bool in = extract32(simd_data(desc), 3, 1); \
142 const bool rt = extract32(simd_data(desc), 2, 1); \
143 const bool zs = extract32(simd_data(desc), 1, 1); \
145 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
147 DEF_VFAE_HELPER(8)
148 DEF_VFAE_HELPER(16)
149 DEF_VFAE_HELPER(32)
151 #define DEF_VFAE_CC_HELPER(BITS) \
152 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
153 CPUS390XState *env, uint32_t desc) \
155 const bool in = extract32(simd_data(desc), 3, 1); \
156 const bool rt = extract32(simd_data(desc), 2, 1); \
157 const bool zs = extract32(simd_data(desc), 1, 1); \
159 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
161 DEF_VFAE_CC_HELPER(8)
162 DEF_VFAE_CC_HELPER(16)
163 DEF_VFAE_CC_HELPER(32)
165 static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
167 const uint64_t mask = get_element_lsbs_mask(es);
168 uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
169 uint64_t first_zero = 16;
170 uint64_t first_equal;
172 a0 = s390_vec_read_element64(v2, 0);
173 a1 = s390_vec_read_element64(v2, 1);
174 b0 = s390_vec_read_element64(v3, 0);
175 b1 = s390_vec_read_element64(v3, 1);
176 e0 = zero_search(a0 ^ b0, mask);
177 e1 = zero_search(a1 ^ b1, mask);
178 first_equal = match_index(e0, e1);
180 if (zs) {
181 z0 = zero_search(a0, mask);
182 z1 = zero_search(a1, mask);
183 first_zero = match_index(z0, z1);
186 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
187 s390_vec_write_element64(v1, 1, 0);
188 if (first_zero == 16 && first_equal == 16) {
189 return 3; /* no match */
190 } else if (first_zero == 16) {
191 return 1; /* matching elements, no match for zero */
192 } else if (first_equal < first_zero) {
193 return 2; /* matching elements before match for zero */
195 return 0; /* match for zero */
198 #define DEF_VFEE_HELPER(BITS) \
199 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
200 uint32_t desc) \
202 const bool zs = extract32(simd_data(desc), 1, 1); \
204 vfee(v1, v2, v3, zs, MO_##BITS); \
206 DEF_VFEE_HELPER(8)
207 DEF_VFEE_HELPER(16)
208 DEF_VFEE_HELPER(32)
210 #define DEF_VFEE_CC_HELPER(BITS) \
211 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
212 CPUS390XState *env, uint32_t desc) \
214 const bool zs = extract32(simd_data(desc), 1, 1); \
216 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
218 DEF_VFEE_CC_HELPER(8)
219 DEF_VFEE_CC_HELPER(16)
220 DEF_VFEE_CC_HELPER(32)
222 static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
224 const uint64_t mask = get_element_lsbs_mask(es);
225 uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
226 uint64_t first_zero = 16;
227 uint64_t first_inequal;
228 bool smaller = false;
230 a0 = s390_vec_read_element64(v2, 0);
231 a1 = s390_vec_read_element64(v2, 1);
232 b0 = s390_vec_read_element64(v3, 0);
233 b1 = s390_vec_read_element64(v3, 1);
234 e0 = nonzero_search(a0 ^ b0, mask);
235 e1 = nonzero_search(a1 ^ b1, mask);
236 first_inequal = match_index(e0, e1);
238 /* identify the smaller element */
239 if (first_inequal < 16) {
240 uint8_t enr = first_inequal / (1 << es);
241 uint32_t a = s390_vec_read_element(v2, enr, es);
242 uint32_t b = s390_vec_read_element(v3, enr, es);
244 smaller = a < b;
247 if (zs) {
248 z0 = zero_search(a0, mask);
249 z1 = zero_search(a1, mask);
250 first_zero = match_index(z0, z1);
253 s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
254 s390_vec_write_element64(v1, 1, 0);
255 if (first_zero == 16 && first_inequal == 16) {
256 return 3;
257 } else if (first_zero < first_inequal) {
258 return 0;
260 return smaller ? 1 : 2;
263 #define DEF_VFENE_HELPER(BITS) \
264 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
265 uint32_t desc) \
267 const bool zs = extract32(simd_data(desc), 1, 1); \
269 vfene(v1, v2, v3, zs, MO_##BITS); \
271 DEF_VFENE_HELPER(8)
272 DEF_VFENE_HELPER(16)
273 DEF_VFENE_HELPER(32)
275 #define DEF_VFENE_CC_HELPER(BITS) \
276 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
277 CPUS390XState *env, uint32_t desc) \
279 const bool zs = extract32(simd_data(desc), 1, 1); \
281 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
283 DEF_VFENE_CC_HELPER(8)
284 DEF_VFENE_CC_HELPER(16)
285 DEF_VFENE_CC_HELPER(32)