s390x/tcg: Implement VECTOR STORE WITH LENGTH
[qemu/ar7.git] / target / s390x / translate_vx.inc.c
blobfcdda0c59147667f69e12540b91051c2659f1ea2
1 /*
2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
26 * 128 bit elements:
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
30 * Sizes:
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
37 * CC handling:
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
49 #define ES_8 MO_8
50 #define ES_16 MO_16
51 #define ES_32 MO_32
52 #define ES_64 MO_64
53 #define ES_128 4
55 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
57 return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
61 TCGMemOp memop)
63 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
65 switch (memop) {
66 case ES_8:
67 tcg_gen_ld8u_i64(dst, cpu_env, offs);
68 break;
69 case ES_16:
70 tcg_gen_ld16u_i64(dst, cpu_env, offs);
71 break;
72 case ES_32:
73 tcg_gen_ld32u_i64(dst, cpu_env, offs);
74 break;
75 case ES_8 | MO_SIGN:
76 tcg_gen_ld8s_i64(dst, cpu_env, offs);
77 break;
78 case ES_16 | MO_SIGN:
79 tcg_gen_ld16s_i64(dst, cpu_env, offs);
80 break;
81 case ES_32 | MO_SIGN:
82 tcg_gen_ld32s_i64(dst, cpu_env, offs);
83 break;
84 case ES_64:
85 case ES_64 | MO_SIGN:
86 tcg_gen_ld_i64(dst, cpu_env, offs);
87 break;
88 default:
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 TCGMemOp memop)
96 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
98 switch (memop) {
99 case ES_8:
100 tcg_gen_st8_i64(src, cpu_env, offs);
101 break;
102 case ES_16:
103 tcg_gen_st16_i64(src, cpu_env, offs);
104 break;
105 case ES_32:
106 tcg_gen_st32_i64(src, cpu_env, offs);
107 break;
108 case ES_64:
109 tcg_gen_st_i64(src, cpu_env, offs);
110 break;
111 default:
112 g_assert_not_reached();
117 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
118 uint8_t es)
120 TCGv_i64 tmp = tcg_temp_new_i64();
122 /* mask off invalid parts from the element nr */
123 tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
125 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
126 tcg_gen_shli_i64(tmp, tmp, es);
127 #ifndef HOST_WORDS_BIGENDIAN
128 tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
129 #endif
130 tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
132 /* generate the final ptr by adding cpu_env */
133 tcg_gen_trunc_i64_ptr(ptr, tmp);
134 tcg_gen_add_ptr(ptr, ptr, cpu_env);
136 tcg_temp_free_i64(tmp);
139 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
140 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 vec_full_reg_offset(v3), 16, 16, data, fn)
142 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
143 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
145 #define gen_gvec_4(v1, v2, v3, v4, gen) \
146 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
148 16, 16, gen)
149 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
150 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
151 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
152 16, 16, data, fn)
153 #define gen_gvec_dup_i64(es, v1, c) \
154 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
155 #define gen_gvec_mov(v1, v2) \
156 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
158 #define gen_gvec_dup64i(v1, c) \
159 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
161 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
163 switch (es) {
164 case ES_8:
165 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
166 break;
167 case ES_16:
168 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
169 break;
170 case ES_32:
171 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
172 break;
173 case ES_64:
174 gen_gvec_dup64i(reg, c);
175 break;
176 default:
177 g_assert_not_reached();
181 static void zero_vec(uint8_t reg)
183 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
186 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
188 const uint8_t es = s->insn->data;
189 const uint8_t enr = get_field(s->fields, m3);
190 TCGv_i64 tmp;
192 if (!valid_vec_element(enr, es)) {
193 gen_program_exception(s, PGM_SPECIFICATION);
194 return DISAS_NORETURN;
197 tmp = tcg_temp_new_i64();
198 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
199 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
200 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
202 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
203 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
204 tcg_temp_free_i64(tmp);
205 return DISAS_NEXT;
208 static uint64_t generate_byte_mask(uint8_t mask)
210 uint64_t r = 0;
211 int i;
213 for (i = 0; i < 8; i++) {
214 if ((mask >> i) & 1) {
215 r |= 0xffull << (i * 8);
218 return r;
221 static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
223 const uint16_t i2 = get_field(s->fields, i2);
225 if (i2 == (i2 & 0xff) * 0x0101) {
227 * Masks for both 64 bit elements of the vector are the same.
228 * Trust tcg to produce a good constant loading.
230 gen_gvec_dup64i(get_field(s->fields, v1),
231 generate_byte_mask(i2 & 0xff));
232 } else {
233 TCGv_i64 t = tcg_temp_new_i64();
235 tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
236 write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64);
237 tcg_gen_movi_i64(t, generate_byte_mask(i2));
238 write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64);
239 tcg_temp_free_i64(t);
241 return DISAS_NEXT;
244 static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
246 const uint8_t es = get_field(s->fields, m4);
247 const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
248 const uint8_t i2 = get_field(s->fields, i2) & (bits - 1);
249 const uint8_t i3 = get_field(s->fields, i3) & (bits - 1);
250 uint64_t mask = 0;
251 int i;
253 if (es > ES_64) {
254 gen_program_exception(s, PGM_SPECIFICATION);
255 return DISAS_NORETURN;
258 /* generate the mask - take care of wrapping */
259 for (i = i2; ; i = (i + 1) % bits) {
260 mask |= 1ull << (bits - i - 1);
261 if (i == i3) {
262 break;
266 gen_gvec_dupi(es, get_field(s->fields, v1), mask);
267 return DISAS_NEXT;
270 static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
272 TCGv_i64 t0 = tcg_temp_new_i64();
273 TCGv_i64 t1 = tcg_temp_new_i64();
275 tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
276 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
277 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
278 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
279 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
280 tcg_temp_free(t0);
281 tcg_temp_free(t1);
282 return DISAS_NEXT;
285 static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
287 gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2));
288 return DISAS_NEXT;
291 static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
293 const uint8_t es = get_field(s->fields, m3);
294 TCGv_i64 tmp;
296 if (es > ES_64) {
297 gen_program_exception(s, PGM_SPECIFICATION);
298 return DISAS_NORETURN;
301 tmp = tcg_temp_new_i64();
302 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
303 gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp);
304 tcg_temp_free_i64(tmp);
305 return DISAS_NEXT;
308 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
310 const uint8_t es = s->insn->data;
311 const uint8_t enr = get_field(s->fields, m3);
312 TCGv_i64 tmp;
314 if (!valid_vec_element(enr, es)) {
315 gen_program_exception(s, PGM_SPECIFICATION);
316 return DISAS_NORETURN;
319 tmp = tcg_temp_new_i64();
320 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
321 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
322 tcg_temp_free_i64(tmp);
323 return DISAS_NEXT;
326 static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
328 const uint8_t es = s->insn->data;
329 const uint8_t enr = get_field(s->fields, m3);
330 TCGv_i64 tmp;
332 if (!valid_vec_element(enr, es)) {
333 gen_program_exception(s, PGM_SPECIFICATION);
334 return DISAS_NORETURN;
337 tmp = tcg_const_i64((int16_t)get_field(s->fields, i2));
338 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
339 tcg_temp_free_i64(tmp);
340 return DISAS_NEXT;
343 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
345 const uint8_t es = get_field(s->fields, m4);
346 TCGv_ptr ptr;
348 if (es > ES_64) {
349 gen_program_exception(s, PGM_SPECIFICATION);
350 return DISAS_NORETURN;
353 /* fast path if we don't need the register content */
354 if (!get_field(s->fields, b2)) {
355 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
357 read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es);
358 return DISAS_NEXT;
361 ptr = tcg_temp_new_ptr();
362 get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es);
363 switch (es) {
364 case ES_8:
365 tcg_gen_ld8u_i64(o->out, ptr, 0);
366 break;
367 case ES_16:
368 tcg_gen_ld16u_i64(o->out, ptr, 0);
369 break;
370 case ES_32:
371 tcg_gen_ld32u_i64(o->out, ptr, 0);
372 break;
373 case ES_64:
374 tcg_gen_ld_i64(o->out, ptr, 0);
375 break;
376 default:
377 g_assert_not_reached();
379 tcg_temp_free_ptr(ptr);
381 return DISAS_NEXT;
384 static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
386 uint8_t es = get_field(s->fields, m3);
387 uint8_t enr;
388 TCGv_i64 t;
390 switch (es) {
391 /* rightmost sub-element of leftmost doubleword */
392 case ES_8:
393 enr = 7;
394 break;
395 case ES_16:
396 enr = 3;
397 break;
398 case ES_32:
399 enr = 1;
400 break;
401 case ES_64:
402 enr = 0;
403 break;
404 /* leftmost sub-element of leftmost doubleword */
405 case 6:
406 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
407 es = ES_32;
408 enr = 0;
409 break;
411 default:
412 /* fallthrough */
413 gen_program_exception(s, PGM_SPECIFICATION);
414 return DISAS_NORETURN;
417 t = tcg_temp_new_i64();
418 tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
419 zero_vec(get_field(s->fields, v1));
420 write_vec_element_i64(t, get_field(s->fields, v1), enr, es);
421 tcg_temp_free_i64(t);
422 return DISAS_NEXT;
425 static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
427 const uint8_t v3 = get_field(s->fields, v3);
428 uint8_t v1 = get_field(s->fields, v1);
429 TCGv_i64 t0, t1;
431 if (v3 < v1 || (v3 - v1 + 1) > 16) {
432 gen_program_exception(s, PGM_SPECIFICATION);
433 return DISAS_NORETURN;
437 * Check for possible access exceptions by trying to load the last
438 * element. The first element will be checked first next.
440 t0 = tcg_temp_new_i64();
441 t1 = tcg_temp_new_i64();
442 gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
443 tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
445 for (;; v1++) {
446 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
447 write_vec_element_i64(t1, v1, 0, ES_64);
448 if (v1 == v3) {
449 break;
451 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
452 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
453 write_vec_element_i64(t1, v1, 1, ES_64);
454 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
457 /* Store the last element, loaded first */
458 write_vec_element_i64(t0, v1, 1, ES_64);
460 tcg_temp_free_i64(t0);
461 tcg_temp_free_i64(t1);
462 return DISAS_NEXT;
465 static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
467 const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
468 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
469 TCGv_ptr a0;
470 TCGv_i64 bytes;
472 if (get_field(s->fields, m3) > 6) {
473 gen_program_exception(s, PGM_SPECIFICATION);
474 return DISAS_NORETURN;
477 bytes = tcg_temp_new_i64();
478 a0 = tcg_temp_new_ptr();
479 /* calculate the number of bytes until the next block boundary */
480 tcg_gen_ori_i64(bytes, o->addr1, -block_size);
481 tcg_gen_neg_i64(bytes, bytes);
483 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
484 gen_helper_vll(cpu_env, a0, o->addr1, bytes);
485 tcg_temp_free_i64(bytes);
486 tcg_temp_free_ptr(a0);
487 return DISAS_NEXT;
490 static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
492 const uint8_t es = get_field(s->fields, m4);
493 TCGv_ptr ptr;
495 if (es > ES_64) {
496 gen_program_exception(s, PGM_SPECIFICATION);
497 return DISAS_NORETURN;
500 /* fast path if we don't need the register content */
501 if (!get_field(s->fields, b2)) {
502 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
504 write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es);
505 return DISAS_NEXT;
508 ptr = tcg_temp_new_ptr();
509 get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es);
510 switch (es) {
511 case ES_8:
512 tcg_gen_st8_i64(o->in2, ptr, 0);
513 break;
514 case ES_16:
515 tcg_gen_st16_i64(o->in2, ptr, 0);
516 break;
517 case ES_32:
518 tcg_gen_st32_i64(o->in2, ptr, 0);
519 break;
520 case ES_64:
521 tcg_gen_st_i64(o->in2, ptr, 0);
522 break;
523 default:
524 g_assert_not_reached();
526 tcg_temp_free_ptr(ptr);
528 return DISAS_NEXT;
531 static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
533 write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64);
534 write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64);
535 return DISAS_NEXT;
538 static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
540 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
541 TCGv_ptr a0 = tcg_temp_new_ptr();
543 /* convert highest index into an actual length */
544 tcg_gen_addi_i64(o->in2, o->in2, 1);
545 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
546 gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
547 tcg_temp_free_ptr(a0);
548 return DISAS_NEXT;
551 static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
553 const uint8_t v1 = get_field(s->fields, v1);
554 const uint8_t v2 = get_field(s->fields, v2);
555 const uint8_t v3 = get_field(s->fields, v3);
556 const uint8_t es = get_field(s->fields, m4);
557 int dst_idx, src_idx;
558 TCGv_i64 tmp;
560 if (es > ES_64) {
561 gen_program_exception(s, PGM_SPECIFICATION);
562 return DISAS_NORETURN;
565 tmp = tcg_temp_new_i64();
566 if (s->fields->op2 == 0x61) {
567 /* iterate backwards to avoid overwriting data we might need later */
568 for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
569 src_idx = dst_idx / 2;
570 if (dst_idx % 2 == 0) {
571 read_vec_element_i64(tmp, v2, src_idx, es);
572 } else {
573 read_vec_element_i64(tmp, v3, src_idx, es);
575 write_vec_element_i64(tmp, v1, dst_idx, es);
577 } else {
578 /* iterate forward to avoid overwriting data we might need later */
579 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
580 src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
581 if (dst_idx % 2 == 0) {
582 read_vec_element_i64(tmp, v2, src_idx, es);
583 } else {
584 read_vec_element_i64(tmp, v3, src_idx, es);
586 write_vec_element_i64(tmp, v1, dst_idx, es);
589 tcg_temp_free_i64(tmp);
590 return DISAS_NEXT;
593 static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
595 const uint8_t v1 = get_field(s->fields, v1);
596 const uint8_t v2 = get_field(s->fields, v2);
597 const uint8_t v3 = get_field(s->fields, v3);
598 const uint8_t es = get_field(s->fields, m4);
599 static gen_helper_gvec_3 * const vpk[3] = {
600 gen_helper_gvec_vpk16,
601 gen_helper_gvec_vpk32,
602 gen_helper_gvec_vpk64,
604 static gen_helper_gvec_3 * const vpks[3] = {
605 gen_helper_gvec_vpks16,
606 gen_helper_gvec_vpks32,
607 gen_helper_gvec_vpks64,
609 static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
610 gen_helper_gvec_vpks_cc16,
611 gen_helper_gvec_vpks_cc32,
612 gen_helper_gvec_vpks_cc64,
614 static gen_helper_gvec_3 * const vpkls[3] = {
615 gen_helper_gvec_vpkls16,
616 gen_helper_gvec_vpkls32,
617 gen_helper_gvec_vpkls64,
619 static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
620 gen_helper_gvec_vpkls_cc16,
621 gen_helper_gvec_vpkls_cc32,
622 gen_helper_gvec_vpkls_cc64,
625 if (es == ES_8 || es > ES_64) {
626 gen_program_exception(s, PGM_SPECIFICATION);
627 return DISAS_NORETURN;
630 switch (s->fields->op2) {
631 case 0x97:
632 if (get_field(s->fields, m5) & 0x1) {
633 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
634 set_cc_static(s);
635 } else {
636 gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
638 break;
639 case 0x95:
640 if (get_field(s->fields, m5) & 0x1) {
641 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
642 set_cc_static(s);
643 } else {
644 gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
646 break;
647 case 0x94:
648 /* If sources and destination dont't overlap -> fast path */
649 if (v1 != v2 && v1 != v3) {
650 const uint8_t src_es = get_field(s->fields, m4);
651 const uint8_t dst_es = src_es - 1;
652 TCGv_i64 tmp = tcg_temp_new_i64();
653 int dst_idx, src_idx;
655 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
656 src_idx = dst_idx;
657 if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
658 read_vec_element_i64(tmp, v2, src_idx, src_es);
659 } else {
660 src_idx -= NUM_VEC_ELEMENTS(src_es);
661 read_vec_element_i64(tmp, v3, src_idx, src_es);
663 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
665 tcg_temp_free_i64(tmp);
666 } else {
667 gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
669 break;
670 default:
671 g_assert_not_reached();
673 return DISAS_NEXT;
676 static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
678 gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
679 get_field(s->fields, v3), get_field(s->fields, v4),
680 0, gen_helper_gvec_vperm);
681 return DISAS_NEXT;
684 static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
686 const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1);
687 const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1);
688 TCGv_i64 t0 = tcg_temp_new_i64();
689 TCGv_i64 t1 = tcg_temp_new_i64();
691 read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64);
692 read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64);
693 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
694 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
695 tcg_temp_free_i64(t0);
696 tcg_temp_free_i64(t1);
697 return DISAS_NEXT;
700 static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
702 const uint8_t enr = get_field(s->fields, i2);
703 const uint8_t es = get_field(s->fields, m4);
705 if (es > ES_64 || !valid_vec_element(enr, es)) {
706 gen_program_exception(s, PGM_SPECIFICATION);
707 return DISAS_NORETURN;
710 tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)),
711 vec_reg_offset(get_field(s->fields, v3), enr, es),
712 16, 16);
713 return DISAS_NEXT;
716 static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
718 const int64_t data = (int16_t)get_field(s->fields, i2);
719 const uint8_t es = get_field(s->fields, m3);
721 if (es > ES_64) {
722 gen_program_exception(s, PGM_SPECIFICATION);
723 return DISAS_NORETURN;
726 gen_gvec_dupi(es, get_field(s->fields, v1), data);
727 return DISAS_NEXT;
730 static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
732 const uint8_t es = s->insn->data;
733 const uint8_t enr = get_field(s->fields, m3);
734 TCGv_i64 tmp;
736 if (!valid_vec_element(enr, es)) {
737 gen_program_exception(s, PGM_SPECIFICATION);
738 return DISAS_NORETURN;
741 tmp = tcg_temp_new_i64();
742 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
743 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
744 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
746 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
747 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
748 tcg_temp_free_i64(tmp);
749 return DISAS_NEXT;
752 static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
754 TCGv_i64 t = tcg_temp_new_i64();
756 /* bit in c not set -> copy bit from b */
757 tcg_gen_andc_i64(t, b, c);
758 /* bit in c set -> copy bit from a */
759 tcg_gen_and_i64(d, a, c);
760 /* merge the results */
761 tcg_gen_or_i64(d, d, t);
762 tcg_temp_free_i64(t);
765 static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
766 TCGv_vec c)
768 TCGv_vec t = tcg_temp_new_vec_matching(d);
770 tcg_gen_andc_vec(vece, t, b, c);
771 tcg_gen_and_vec(vece, d, a, c);
772 tcg_gen_or_vec(vece, d, d, t);
773 tcg_temp_free_vec(t);
776 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
778 static const GVecGen4 gvec_op = {
779 .fni8 = gen_sel_i64,
780 .fniv = gen_sel_vec,
781 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
784 gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
785 get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
786 return DISAS_NEXT;
789 static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
791 const uint8_t es = get_field(s->fields, m3);
792 int idx1, idx2;
793 TCGv_i64 tmp;
795 switch (es) {
796 case ES_8:
797 idx1 = 7;
798 idx2 = 15;
799 break;
800 case ES_16:
801 idx1 = 3;
802 idx2 = 7;
803 break;
804 case ES_32:
805 idx1 = 1;
806 idx2 = 3;
807 break;
808 default:
809 gen_program_exception(s, PGM_SPECIFICATION);
810 return DISAS_NORETURN;
813 tmp = tcg_temp_new_i64();
814 read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN);
815 write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
816 read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN);
817 write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
818 tcg_temp_free_i64(tmp);
819 return DISAS_NEXT;
822 static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
824 TCGv_i64 tmp = tcg_const_i64(16);
826 /* Probe write access before actually modifying memory */
827 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
829 read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
830 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
831 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
832 read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
833 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
834 tcg_temp_free_i64(tmp);
835 return DISAS_NEXT;
838 static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
840 const uint8_t es = s->insn->data;
841 const uint8_t enr = get_field(s->fields, m3);
842 TCGv_i64 tmp;
844 if (!valid_vec_element(enr, es)) {
845 gen_program_exception(s, PGM_SPECIFICATION);
846 return DISAS_NORETURN;
849 tmp = tcg_temp_new_i64();
850 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
851 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
852 tcg_temp_free_i64(tmp);
853 return DISAS_NEXT;
856 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
858 const uint8_t v3 = get_field(s->fields, v3);
859 uint8_t v1 = get_field(s->fields, v1);
860 TCGv_i64 tmp;
862 while (v3 < v1 || (v3 - v1 + 1) > 16) {
863 gen_program_exception(s, PGM_SPECIFICATION);
864 return DISAS_NORETURN;
867 /* Probe write access before actually modifying memory */
868 tmp = tcg_const_i64((v3 - v1 + 1) * 16);
869 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
871 for (;; v1++) {
872 read_vec_element_i64(tmp, v1, 0, ES_64);
873 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
874 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
875 read_vec_element_i64(tmp, v1, 1, ES_64);
876 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
877 if (v1 == v3) {
878 break;
880 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
882 tcg_temp_free_i64(tmp);
883 return DISAS_NEXT;
886 static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
888 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
889 TCGv_ptr a0 = tcg_temp_new_ptr();
891 /* convert highest index into an actual length */
892 tcg_gen_addi_i64(o->in2, o->in2, 1);
893 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
894 gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
895 tcg_temp_free_ptr(a0);
896 return DISAS_NEXT;