s390x/tcg: Implement VECTOR SELECT
[qemu/ar7.git] / target / s390x / translate_vx.inc.c
blobe685506b8c39423d24701f5f0e83355d1e3d4d45
1 /*
2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
26 * 128 bit elements:
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
30 * Sizes:
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
37 * CC handling:
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
49 #define ES_8 MO_8
50 #define ES_16 MO_16
51 #define ES_32 MO_32
52 #define ES_64 MO_64
53 #define ES_128 4
55 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
57 return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
61 TCGMemOp memop)
63 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
65 switch (memop) {
66 case ES_8:
67 tcg_gen_ld8u_i64(dst, cpu_env, offs);
68 break;
69 case ES_16:
70 tcg_gen_ld16u_i64(dst, cpu_env, offs);
71 break;
72 case ES_32:
73 tcg_gen_ld32u_i64(dst, cpu_env, offs);
74 break;
75 case ES_8 | MO_SIGN:
76 tcg_gen_ld8s_i64(dst, cpu_env, offs);
77 break;
78 case ES_16 | MO_SIGN:
79 tcg_gen_ld16s_i64(dst, cpu_env, offs);
80 break;
81 case ES_32 | MO_SIGN:
82 tcg_gen_ld32s_i64(dst, cpu_env, offs);
83 break;
84 case ES_64:
85 case ES_64 | MO_SIGN:
86 tcg_gen_ld_i64(dst, cpu_env, offs);
87 break;
88 default:
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 TCGMemOp memop)
96 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
98 switch (memop) {
99 case ES_8:
100 tcg_gen_st8_i64(src, cpu_env, offs);
101 break;
102 case ES_16:
103 tcg_gen_st16_i64(src, cpu_env, offs);
104 break;
105 case ES_32:
106 tcg_gen_st32_i64(src, cpu_env, offs);
107 break;
108 case ES_64:
109 tcg_gen_st_i64(src, cpu_env, offs);
110 break;
111 default:
112 g_assert_not_reached();
116 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
117 uint8_t es)
119 TCGv_i64 tmp = tcg_temp_new_i64();
121 /* mask off invalid parts from the element nr */
122 tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
124 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
125 tcg_gen_shli_i64(tmp, tmp, es);
126 #ifndef HOST_WORDS_BIGENDIAN
127 tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
128 #endif
129 tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
131 /* generate the final ptr by adding cpu_env */
132 tcg_gen_trunc_i64_ptr(ptr, tmp);
133 tcg_gen_add_ptr(ptr, ptr, cpu_env);
135 tcg_temp_free_i64(tmp);
138 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
139 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
140 vec_full_reg_offset(v3), 16, 16, data, fn)
141 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
142 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
143 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
144 #define gen_gvec_4(v1, v2, v3, v4, gen) \
145 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
146 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
147 16, 16, gen)
148 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
149 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
150 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
151 16, 16, data, fn)
152 #define gen_gvec_dup_i64(es, v1, c) \
153 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
154 #define gen_gvec_mov(v1, v2) \
155 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
157 #define gen_gvec_dup64i(v1, c) \
158 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
160 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
162 switch (es) {
163 case ES_8:
164 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
165 break;
166 case ES_16:
167 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
168 break;
169 case ES_32:
170 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
171 break;
172 case ES_64:
173 gen_gvec_dup64i(reg, c);
174 break;
175 default:
176 g_assert_not_reached();
180 static void zero_vec(uint8_t reg)
182 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
185 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
187 const uint8_t es = s->insn->data;
188 const uint8_t enr = get_field(s->fields, m3);
189 TCGv_i64 tmp;
191 if (!valid_vec_element(enr, es)) {
192 gen_program_exception(s, PGM_SPECIFICATION);
193 return DISAS_NORETURN;
196 tmp = tcg_temp_new_i64();
197 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
198 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
199 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
201 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
202 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
203 tcg_temp_free_i64(tmp);
204 return DISAS_NEXT;
207 static uint64_t generate_byte_mask(uint8_t mask)
209 uint64_t r = 0;
210 int i;
212 for (i = 0; i < 8; i++) {
213 if ((mask >> i) & 1) {
214 r |= 0xffull << (i * 8);
217 return r;
220 static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
222 const uint16_t i2 = get_field(s->fields, i2);
224 if (i2 == (i2 & 0xff) * 0x0101) {
226 * Masks for both 64 bit elements of the vector are the same.
227 * Trust tcg to produce a good constant loading.
229 gen_gvec_dup64i(get_field(s->fields, v1),
230 generate_byte_mask(i2 & 0xff));
231 } else {
232 TCGv_i64 t = tcg_temp_new_i64();
234 tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
235 write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64);
236 tcg_gen_movi_i64(t, generate_byte_mask(i2));
237 write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64);
238 tcg_temp_free_i64(t);
240 return DISAS_NEXT;
243 static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
245 const uint8_t es = get_field(s->fields, m4);
246 const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
247 const uint8_t i2 = get_field(s->fields, i2) & (bits - 1);
248 const uint8_t i3 = get_field(s->fields, i3) & (bits - 1);
249 uint64_t mask = 0;
250 int i;
252 if (es > ES_64) {
253 gen_program_exception(s, PGM_SPECIFICATION);
254 return DISAS_NORETURN;
257 /* generate the mask - take care of wrapping */
258 for (i = i2; ; i = (i + 1) % bits) {
259 mask |= 1ull << (bits - i - 1);
260 if (i == i3) {
261 break;
265 gen_gvec_dupi(es, get_field(s->fields, v1), mask);
266 return DISAS_NEXT;
269 static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
271 TCGv_i64 t0 = tcg_temp_new_i64();
272 TCGv_i64 t1 = tcg_temp_new_i64();
274 tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
275 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
276 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
277 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
278 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
279 tcg_temp_free(t0);
280 tcg_temp_free(t1);
281 return DISAS_NEXT;
284 static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
286 gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2));
287 return DISAS_NEXT;
290 static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
292 const uint8_t es = get_field(s->fields, m3);
293 TCGv_i64 tmp;
295 if (es > ES_64) {
296 gen_program_exception(s, PGM_SPECIFICATION);
297 return DISAS_NORETURN;
300 tmp = tcg_temp_new_i64();
301 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
302 gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp);
303 tcg_temp_free_i64(tmp);
304 return DISAS_NEXT;
307 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
309 const uint8_t es = s->insn->data;
310 const uint8_t enr = get_field(s->fields, m3);
311 TCGv_i64 tmp;
313 if (!valid_vec_element(enr, es)) {
314 gen_program_exception(s, PGM_SPECIFICATION);
315 return DISAS_NORETURN;
318 tmp = tcg_temp_new_i64();
319 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
320 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
321 tcg_temp_free_i64(tmp);
322 return DISAS_NEXT;
325 static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
327 const uint8_t es = s->insn->data;
328 const uint8_t enr = get_field(s->fields, m3);
329 TCGv_i64 tmp;
331 if (!valid_vec_element(enr, es)) {
332 gen_program_exception(s, PGM_SPECIFICATION);
333 return DISAS_NORETURN;
336 tmp = tcg_const_i64((int16_t)get_field(s->fields, i2));
337 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
338 tcg_temp_free_i64(tmp);
339 return DISAS_NEXT;
342 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
344 const uint8_t es = get_field(s->fields, m4);
345 TCGv_ptr ptr;
347 if (es > ES_64) {
348 gen_program_exception(s, PGM_SPECIFICATION);
349 return DISAS_NORETURN;
352 /* fast path if we don't need the register content */
353 if (!get_field(s->fields, b2)) {
354 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
356 read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es);
357 return DISAS_NEXT;
360 ptr = tcg_temp_new_ptr();
361 get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es);
362 switch (es) {
363 case ES_8:
364 tcg_gen_ld8u_i64(o->out, ptr, 0);
365 break;
366 case ES_16:
367 tcg_gen_ld16u_i64(o->out, ptr, 0);
368 break;
369 case ES_32:
370 tcg_gen_ld32u_i64(o->out, ptr, 0);
371 break;
372 case ES_64:
373 tcg_gen_ld_i64(o->out, ptr, 0);
374 break;
375 default:
376 g_assert_not_reached();
378 tcg_temp_free_ptr(ptr);
380 return DISAS_NEXT;
383 static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
385 uint8_t es = get_field(s->fields, m3);
386 uint8_t enr;
387 TCGv_i64 t;
389 switch (es) {
390 /* rightmost sub-element of leftmost doubleword */
391 case ES_8:
392 enr = 7;
393 break;
394 case ES_16:
395 enr = 3;
396 break;
397 case ES_32:
398 enr = 1;
399 break;
400 case ES_64:
401 enr = 0;
402 break;
403 /* leftmost sub-element of leftmost doubleword */
404 case 6:
405 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
406 es = ES_32;
407 enr = 0;
408 break;
410 default:
411 /* fallthrough */
412 gen_program_exception(s, PGM_SPECIFICATION);
413 return DISAS_NORETURN;
416 t = tcg_temp_new_i64();
417 tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
418 zero_vec(get_field(s->fields, v1));
419 write_vec_element_i64(t, get_field(s->fields, v1), enr, es);
420 tcg_temp_free_i64(t);
421 return DISAS_NEXT;
424 static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
426 const uint8_t v3 = get_field(s->fields, v3);
427 uint8_t v1 = get_field(s->fields, v1);
428 TCGv_i64 t0, t1;
430 if (v3 < v1 || (v3 - v1 + 1) > 16) {
431 gen_program_exception(s, PGM_SPECIFICATION);
432 return DISAS_NORETURN;
436 * Check for possible access exceptions by trying to load the last
437 * element. The first element will be checked first next.
439 t0 = tcg_temp_new_i64();
440 t1 = tcg_temp_new_i64();
441 gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
442 tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
444 for (;; v1++) {
445 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
446 write_vec_element_i64(t1, v1, 0, ES_64);
447 if (v1 == v3) {
448 break;
450 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
451 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
452 write_vec_element_i64(t1, v1, 1, ES_64);
453 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
456 /* Store the last element, loaded first */
457 write_vec_element_i64(t0, v1, 1, ES_64);
459 tcg_temp_free_i64(t0);
460 tcg_temp_free_i64(t1);
461 return DISAS_NEXT;
464 static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
466 const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
467 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
468 TCGv_ptr a0;
469 TCGv_i64 bytes;
471 if (get_field(s->fields, m3) > 6) {
472 gen_program_exception(s, PGM_SPECIFICATION);
473 return DISAS_NORETURN;
476 bytes = tcg_temp_new_i64();
477 a0 = tcg_temp_new_ptr();
478 /* calculate the number of bytes until the next block boundary */
479 tcg_gen_ori_i64(bytes, o->addr1, -block_size);
480 tcg_gen_neg_i64(bytes, bytes);
482 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
483 gen_helper_vll(cpu_env, a0, o->addr1, bytes);
484 tcg_temp_free_i64(bytes);
485 tcg_temp_free_ptr(a0);
486 return DISAS_NEXT;
489 static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
491 const uint8_t es = get_field(s->fields, m4);
492 TCGv_ptr ptr;
494 if (es > ES_64) {
495 gen_program_exception(s, PGM_SPECIFICATION);
496 return DISAS_NORETURN;
499 /* fast path if we don't need the register content */
500 if (!get_field(s->fields, b2)) {
501 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
503 write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es);
504 return DISAS_NEXT;
507 ptr = tcg_temp_new_ptr();
508 get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es);
509 switch (es) {
510 case ES_8:
511 tcg_gen_st8_i64(o->in2, ptr, 0);
512 break;
513 case ES_16:
514 tcg_gen_st16_i64(o->in2, ptr, 0);
515 break;
516 case ES_32:
517 tcg_gen_st32_i64(o->in2, ptr, 0);
518 break;
519 case ES_64:
520 tcg_gen_st_i64(o->in2, ptr, 0);
521 break;
522 default:
523 g_assert_not_reached();
525 tcg_temp_free_ptr(ptr);
527 return DISAS_NEXT;
530 static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
532 write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64);
533 write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64);
534 return DISAS_NEXT;
537 static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
539 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
540 TCGv_ptr a0 = tcg_temp_new_ptr();
542 /* convert highest index into an actual length */
543 tcg_gen_addi_i64(o->in2, o->in2, 1);
544 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
545 gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
546 tcg_temp_free_ptr(a0);
547 return DISAS_NEXT;
550 static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
552 const uint8_t v1 = get_field(s->fields, v1);
553 const uint8_t v2 = get_field(s->fields, v2);
554 const uint8_t v3 = get_field(s->fields, v3);
555 const uint8_t es = get_field(s->fields, m4);
556 int dst_idx, src_idx;
557 TCGv_i64 tmp;
559 if (es > ES_64) {
560 gen_program_exception(s, PGM_SPECIFICATION);
561 return DISAS_NORETURN;
564 tmp = tcg_temp_new_i64();
565 if (s->fields->op2 == 0x61) {
566 /* iterate backwards to avoid overwriting data we might need later */
567 for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
568 src_idx = dst_idx / 2;
569 if (dst_idx % 2 == 0) {
570 read_vec_element_i64(tmp, v2, src_idx, es);
571 } else {
572 read_vec_element_i64(tmp, v3, src_idx, es);
574 write_vec_element_i64(tmp, v1, dst_idx, es);
576 } else {
577 /* iterate forward to avoid overwriting data we might need later */
578 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
579 src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
580 if (dst_idx % 2 == 0) {
581 read_vec_element_i64(tmp, v2, src_idx, es);
582 } else {
583 read_vec_element_i64(tmp, v3, src_idx, es);
585 write_vec_element_i64(tmp, v1, dst_idx, es);
588 tcg_temp_free_i64(tmp);
589 return DISAS_NEXT;
592 static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
594 const uint8_t v1 = get_field(s->fields, v1);
595 const uint8_t v2 = get_field(s->fields, v2);
596 const uint8_t v3 = get_field(s->fields, v3);
597 const uint8_t es = get_field(s->fields, m4);
598 static gen_helper_gvec_3 * const vpk[3] = {
599 gen_helper_gvec_vpk16,
600 gen_helper_gvec_vpk32,
601 gen_helper_gvec_vpk64,
603 static gen_helper_gvec_3 * const vpks[3] = {
604 gen_helper_gvec_vpks16,
605 gen_helper_gvec_vpks32,
606 gen_helper_gvec_vpks64,
608 static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
609 gen_helper_gvec_vpks_cc16,
610 gen_helper_gvec_vpks_cc32,
611 gen_helper_gvec_vpks_cc64,
613 static gen_helper_gvec_3 * const vpkls[3] = {
614 gen_helper_gvec_vpkls16,
615 gen_helper_gvec_vpkls32,
616 gen_helper_gvec_vpkls64,
618 static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
619 gen_helper_gvec_vpkls_cc16,
620 gen_helper_gvec_vpkls_cc32,
621 gen_helper_gvec_vpkls_cc64,
624 if (es == ES_8 || es > ES_64) {
625 gen_program_exception(s, PGM_SPECIFICATION);
626 return DISAS_NORETURN;
629 switch (s->fields->op2) {
630 case 0x97:
631 if (get_field(s->fields, m5) & 0x1) {
632 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
633 set_cc_static(s);
634 } else {
635 gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
637 break;
638 case 0x95:
639 if (get_field(s->fields, m5) & 0x1) {
640 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
641 set_cc_static(s);
642 } else {
643 gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
645 break;
646 case 0x94:
647 /* If sources and destination dont't overlap -> fast path */
648 if (v1 != v2 && v1 != v3) {
649 const uint8_t src_es = get_field(s->fields, m4);
650 const uint8_t dst_es = src_es - 1;
651 TCGv_i64 tmp = tcg_temp_new_i64();
652 int dst_idx, src_idx;
654 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
655 src_idx = dst_idx;
656 if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
657 read_vec_element_i64(tmp, v2, src_idx, src_es);
658 } else {
659 src_idx -= NUM_VEC_ELEMENTS(src_es);
660 read_vec_element_i64(tmp, v3, src_idx, src_es);
662 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
664 tcg_temp_free_i64(tmp);
665 } else {
666 gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
668 break;
669 default:
670 g_assert_not_reached();
672 return DISAS_NEXT;
675 static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
677 gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
678 get_field(s->fields, v3), get_field(s->fields, v4),
679 0, gen_helper_gvec_vperm);
680 return DISAS_NEXT;
683 static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
685 const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1);
686 const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1);
687 TCGv_i64 t0 = tcg_temp_new_i64();
688 TCGv_i64 t1 = tcg_temp_new_i64();
690 read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64);
691 read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64);
692 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
693 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
694 tcg_temp_free_i64(t0);
695 tcg_temp_free_i64(t1);
696 return DISAS_NEXT;
699 static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
701 const uint8_t enr = get_field(s->fields, i2);
702 const uint8_t es = get_field(s->fields, m4);
704 if (es > ES_64 || !valid_vec_element(enr, es)) {
705 gen_program_exception(s, PGM_SPECIFICATION);
706 return DISAS_NORETURN;
709 tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)),
710 vec_reg_offset(get_field(s->fields, v3), enr, es),
711 16, 16);
712 return DISAS_NEXT;
715 static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
717 const int64_t data = (int16_t)get_field(s->fields, i2);
718 const uint8_t es = get_field(s->fields, m3);
720 if (es > ES_64) {
721 gen_program_exception(s, PGM_SPECIFICATION);
722 return DISAS_NORETURN;
725 gen_gvec_dupi(es, get_field(s->fields, v1), data);
726 return DISAS_NEXT;
729 static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
731 const uint8_t es = s->insn->data;
732 const uint8_t enr = get_field(s->fields, m3);
733 TCGv_i64 tmp;
735 if (!valid_vec_element(enr, es)) {
736 gen_program_exception(s, PGM_SPECIFICATION);
737 return DISAS_NORETURN;
740 tmp = tcg_temp_new_i64();
741 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
742 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
743 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
745 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
746 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
747 tcg_temp_free_i64(tmp);
748 return DISAS_NEXT;
751 static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
753 TCGv_i64 t = tcg_temp_new_i64();
755 /* bit in c not set -> copy bit from b */
756 tcg_gen_andc_i64(t, b, c);
757 /* bit in c set -> copy bit from a */
758 tcg_gen_and_i64(d, a, c);
759 /* merge the results */
760 tcg_gen_or_i64(d, d, t);
761 tcg_temp_free_i64(t);
764 static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
765 TCGv_vec c)
767 TCGv_vec t = tcg_temp_new_vec_matching(d);
769 tcg_gen_andc_vec(vece, t, b, c);
770 tcg_gen_and_vec(vece, d, a, c);
771 tcg_gen_or_vec(vece, d, d, t);
772 tcg_temp_free_vec(t);
775 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
777 static const GVecGen4 gvec_op = {
778 .fni8 = gen_sel_i64,
779 .fniv = gen_sel_vec,
780 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
783 gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
784 get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
785 return DISAS_NEXT;