s390x/tcg: Implement VECTOR ADD COMPUTE CARRY
[qemu/ar7.git] / target / s390x / translate_vx.inc.c
bloba97fce5b656b9865d3d84700974bab54e1f78ab5
1 /*
2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
26 * 128 bit elements:
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
30 * Sizes:
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
37 * CC handling:
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
49 #define ES_8 MO_8
50 #define ES_16 MO_16
51 #define ES_32 MO_32
52 #define ES_64 MO_64
53 #define ES_128 4
55 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
57 return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
61 TCGMemOp memop)
63 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
65 switch (memop) {
66 case ES_8:
67 tcg_gen_ld8u_i64(dst, cpu_env, offs);
68 break;
69 case ES_16:
70 tcg_gen_ld16u_i64(dst, cpu_env, offs);
71 break;
72 case ES_32:
73 tcg_gen_ld32u_i64(dst, cpu_env, offs);
74 break;
75 case ES_8 | MO_SIGN:
76 tcg_gen_ld8s_i64(dst, cpu_env, offs);
77 break;
78 case ES_16 | MO_SIGN:
79 tcg_gen_ld16s_i64(dst, cpu_env, offs);
80 break;
81 case ES_32 | MO_SIGN:
82 tcg_gen_ld32s_i64(dst, cpu_env, offs);
83 break;
84 case ES_64:
85 case ES_64 | MO_SIGN:
86 tcg_gen_ld_i64(dst, cpu_env, offs);
87 break;
88 default:
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 TCGMemOp memop)
96 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
98 switch (memop) {
99 case ES_8:
100 tcg_gen_st8_i64(src, cpu_env, offs);
101 break;
102 case ES_16:
103 tcg_gen_st16_i64(src, cpu_env, offs);
104 break;
105 case ES_32:
106 tcg_gen_st32_i64(src, cpu_env, offs);
107 break;
108 case ES_64:
109 tcg_gen_st_i64(src, cpu_env, offs);
110 break;
111 default:
112 g_assert_not_reached();
117 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
118 uint8_t es)
120 TCGv_i64 tmp = tcg_temp_new_i64();
122 /* mask off invalid parts from the element nr */
123 tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
125 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
126 tcg_gen_shli_i64(tmp, tmp, es);
127 #ifndef HOST_WORDS_BIGENDIAN
128 tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
129 #endif
130 tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
132 /* generate the final ptr by adding cpu_env */
133 tcg_gen_trunc_i64_ptr(ptr, tmp);
134 tcg_gen_add_ptr(ptr, ptr, cpu_env);
136 tcg_temp_free_i64(tmp);
139 #define gen_gvec_3(v1, v2, v3, gen) \
140 tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 vec_full_reg_offset(v3), 16, 16, gen)
142 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
143 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 vec_full_reg_offset(v3), 16, 16, data, fn)
145 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
146 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
148 #define gen_gvec_4(v1, v2, v3, v4, gen) \
149 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
150 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
151 16, 16, gen)
152 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
153 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
154 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
155 16, 16, data, fn)
156 #define gen_gvec_dup_i64(es, v1, c) \
157 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
158 #define gen_gvec_mov(v1, v2) \
159 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
161 #define gen_gvec_dup64i(v1, c) \
162 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
163 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
164 tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
165 vec_full_reg_offset(v3), 16, 16)
168 * Helper to carry out a 128 bit vector computation using 2 i64 values per
169 * vector.
171 typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
172 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
173 static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
174 uint8_t b)
176 TCGv_i64 dh = tcg_temp_new_i64();
177 TCGv_i64 dl = tcg_temp_new_i64();
178 TCGv_i64 ah = tcg_temp_new_i64();
179 TCGv_i64 al = tcg_temp_new_i64();
180 TCGv_i64 bh = tcg_temp_new_i64();
181 TCGv_i64 bl = tcg_temp_new_i64();
183 read_vec_element_i64(ah, a, 0, ES_64);
184 read_vec_element_i64(al, a, 1, ES_64);
185 read_vec_element_i64(bh, b, 0, ES_64);
186 read_vec_element_i64(bl, b, 1, ES_64);
187 fn(dl, dh, al, ah, bl, bh);
188 write_vec_element_i64(dh, d, 0, ES_64);
189 write_vec_element_i64(dl, d, 1, ES_64);
191 tcg_temp_free_i64(dh);
192 tcg_temp_free_i64(dl);
193 tcg_temp_free_i64(ah);
194 tcg_temp_free_i64(al);
195 tcg_temp_free_i64(bh);
196 tcg_temp_free_i64(bl);
199 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
201 switch (es) {
202 case ES_8:
203 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
204 break;
205 case ES_16:
206 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
207 break;
208 case ES_32:
209 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
210 break;
211 case ES_64:
212 gen_gvec_dup64i(reg, c);
213 break;
214 default:
215 g_assert_not_reached();
219 static void zero_vec(uint8_t reg)
221 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
224 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
226 const uint8_t es = s->insn->data;
227 const uint8_t enr = get_field(s->fields, m3);
228 TCGv_i64 tmp;
230 if (!valid_vec_element(enr, es)) {
231 gen_program_exception(s, PGM_SPECIFICATION);
232 return DISAS_NORETURN;
235 tmp = tcg_temp_new_i64();
236 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
237 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
238 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
240 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
241 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
242 tcg_temp_free_i64(tmp);
243 return DISAS_NEXT;
246 static uint64_t generate_byte_mask(uint8_t mask)
248 uint64_t r = 0;
249 int i;
251 for (i = 0; i < 8; i++) {
252 if ((mask >> i) & 1) {
253 r |= 0xffull << (i * 8);
256 return r;
259 static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
261 const uint16_t i2 = get_field(s->fields, i2);
263 if (i2 == (i2 & 0xff) * 0x0101) {
265 * Masks for both 64 bit elements of the vector are the same.
266 * Trust tcg to produce a good constant loading.
268 gen_gvec_dup64i(get_field(s->fields, v1),
269 generate_byte_mask(i2 & 0xff));
270 } else {
271 TCGv_i64 t = tcg_temp_new_i64();
273 tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
274 write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64);
275 tcg_gen_movi_i64(t, generate_byte_mask(i2));
276 write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64);
277 tcg_temp_free_i64(t);
279 return DISAS_NEXT;
282 static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
284 const uint8_t es = get_field(s->fields, m4);
285 const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
286 const uint8_t i2 = get_field(s->fields, i2) & (bits - 1);
287 const uint8_t i3 = get_field(s->fields, i3) & (bits - 1);
288 uint64_t mask = 0;
289 int i;
291 if (es > ES_64) {
292 gen_program_exception(s, PGM_SPECIFICATION);
293 return DISAS_NORETURN;
296 /* generate the mask - take care of wrapping */
297 for (i = i2; ; i = (i + 1) % bits) {
298 mask |= 1ull << (bits - i - 1);
299 if (i == i3) {
300 break;
304 gen_gvec_dupi(es, get_field(s->fields, v1), mask);
305 return DISAS_NEXT;
308 static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
310 TCGv_i64 t0 = tcg_temp_new_i64();
311 TCGv_i64 t1 = tcg_temp_new_i64();
313 tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
314 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
315 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
316 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
317 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
318 tcg_temp_free(t0);
319 tcg_temp_free(t1);
320 return DISAS_NEXT;
323 static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
325 gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2));
326 return DISAS_NEXT;
329 static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
331 const uint8_t es = get_field(s->fields, m3);
332 TCGv_i64 tmp;
334 if (es > ES_64) {
335 gen_program_exception(s, PGM_SPECIFICATION);
336 return DISAS_NORETURN;
339 tmp = tcg_temp_new_i64();
340 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
341 gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp);
342 tcg_temp_free_i64(tmp);
343 return DISAS_NEXT;
346 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
348 const uint8_t es = s->insn->data;
349 const uint8_t enr = get_field(s->fields, m3);
350 TCGv_i64 tmp;
352 if (!valid_vec_element(enr, es)) {
353 gen_program_exception(s, PGM_SPECIFICATION);
354 return DISAS_NORETURN;
357 tmp = tcg_temp_new_i64();
358 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
359 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
360 tcg_temp_free_i64(tmp);
361 return DISAS_NEXT;
364 static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
366 const uint8_t es = s->insn->data;
367 const uint8_t enr = get_field(s->fields, m3);
368 TCGv_i64 tmp;
370 if (!valid_vec_element(enr, es)) {
371 gen_program_exception(s, PGM_SPECIFICATION);
372 return DISAS_NORETURN;
375 tmp = tcg_const_i64((int16_t)get_field(s->fields, i2));
376 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
377 tcg_temp_free_i64(tmp);
378 return DISAS_NEXT;
381 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
383 const uint8_t es = get_field(s->fields, m4);
384 TCGv_ptr ptr;
386 if (es > ES_64) {
387 gen_program_exception(s, PGM_SPECIFICATION);
388 return DISAS_NORETURN;
391 /* fast path if we don't need the register content */
392 if (!get_field(s->fields, b2)) {
393 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
395 read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es);
396 return DISAS_NEXT;
399 ptr = tcg_temp_new_ptr();
400 get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es);
401 switch (es) {
402 case ES_8:
403 tcg_gen_ld8u_i64(o->out, ptr, 0);
404 break;
405 case ES_16:
406 tcg_gen_ld16u_i64(o->out, ptr, 0);
407 break;
408 case ES_32:
409 tcg_gen_ld32u_i64(o->out, ptr, 0);
410 break;
411 case ES_64:
412 tcg_gen_ld_i64(o->out, ptr, 0);
413 break;
414 default:
415 g_assert_not_reached();
417 tcg_temp_free_ptr(ptr);
419 return DISAS_NEXT;
422 static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
424 uint8_t es = get_field(s->fields, m3);
425 uint8_t enr;
426 TCGv_i64 t;
428 switch (es) {
429 /* rightmost sub-element of leftmost doubleword */
430 case ES_8:
431 enr = 7;
432 break;
433 case ES_16:
434 enr = 3;
435 break;
436 case ES_32:
437 enr = 1;
438 break;
439 case ES_64:
440 enr = 0;
441 break;
442 /* leftmost sub-element of leftmost doubleword */
443 case 6:
444 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
445 es = ES_32;
446 enr = 0;
447 break;
449 default:
450 /* fallthrough */
451 gen_program_exception(s, PGM_SPECIFICATION);
452 return DISAS_NORETURN;
455 t = tcg_temp_new_i64();
456 tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
457 zero_vec(get_field(s->fields, v1));
458 write_vec_element_i64(t, get_field(s->fields, v1), enr, es);
459 tcg_temp_free_i64(t);
460 return DISAS_NEXT;
463 static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
465 const uint8_t v3 = get_field(s->fields, v3);
466 uint8_t v1 = get_field(s->fields, v1);
467 TCGv_i64 t0, t1;
469 if (v3 < v1 || (v3 - v1 + 1) > 16) {
470 gen_program_exception(s, PGM_SPECIFICATION);
471 return DISAS_NORETURN;
475 * Check for possible access exceptions by trying to load the last
476 * element. The first element will be checked first next.
478 t0 = tcg_temp_new_i64();
479 t1 = tcg_temp_new_i64();
480 gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
481 tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
483 for (;; v1++) {
484 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
485 write_vec_element_i64(t1, v1, 0, ES_64);
486 if (v1 == v3) {
487 break;
489 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
490 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
491 write_vec_element_i64(t1, v1, 1, ES_64);
492 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
495 /* Store the last element, loaded first */
496 write_vec_element_i64(t0, v1, 1, ES_64);
498 tcg_temp_free_i64(t0);
499 tcg_temp_free_i64(t1);
500 return DISAS_NEXT;
503 static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
505 const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
506 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
507 TCGv_ptr a0;
508 TCGv_i64 bytes;
510 if (get_field(s->fields, m3) > 6) {
511 gen_program_exception(s, PGM_SPECIFICATION);
512 return DISAS_NORETURN;
515 bytes = tcg_temp_new_i64();
516 a0 = tcg_temp_new_ptr();
517 /* calculate the number of bytes until the next block boundary */
518 tcg_gen_ori_i64(bytes, o->addr1, -block_size);
519 tcg_gen_neg_i64(bytes, bytes);
521 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
522 gen_helper_vll(cpu_env, a0, o->addr1, bytes);
523 tcg_temp_free_i64(bytes);
524 tcg_temp_free_ptr(a0);
525 return DISAS_NEXT;
528 static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
530 const uint8_t es = get_field(s->fields, m4);
531 TCGv_ptr ptr;
533 if (es > ES_64) {
534 gen_program_exception(s, PGM_SPECIFICATION);
535 return DISAS_NORETURN;
538 /* fast path if we don't need the register content */
539 if (!get_field(s->fields, b2)) {
540 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
542 write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es);
543 return DISAS_NEXT;
546 ptr = tcg_temp_new_ptr();
547 get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es);
548 switch (es) {
549 case ES_8:
550 tcg_gen_st8_i64(o->in2, ptr, 0);
551 break;
552 case ES_16:
553 tcg_gen_st16_i64(o->in2, ptr, 0);
554 break;
555 case ES_32:
556 tcg_gen_st32_i64(o->in2, ptr, 0);
557 break;
558 case ES_64:
559 tcg_gen_st_i64(o->in2, ptr, 0);
560 break;
561 default:
562 g_assert_not_reached();
564 tcg_temp_free_ptr(ptr);
566 return DISAS_NEXT;
569 static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
571 write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64);
572 write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64);
573 return DISAS_NEXT;
576 static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
578 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
579 TCGv_ptr a0 = tcg_temp_new_ptr();
581 /* convert highest index into an actual length */
582 tcg_gen_addi_i64(o->in2, o->in2, 1);
583 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
584 gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
585 tcg_temp_free_ptr(a0);
586 return DISAS_NEXT;
589 static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
591 const uint8_t v1 = get_field(s->fields, v1);
592 const uint8_t v2 = get_field(s->fields, v2);
593 const uint8_t v3 = get_field(s->fields, v3);
594 const uint8_t es = get_field(s->fields, m4);
595 int dst_idx, src_idx;
596 TCGv_i64 tmp;
598 if (es > ES_64) {
599 gen_program_exception(s, PGM_SPECIFICATION);
600 return DISAS_NORETURN;
603 tmp = tcg_temp_new_i64();
604 if (s->fields->op2 == 0x61) {
605 /* iterate backwards to avoid overwriting data we might need later */
606 for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
607 src_idx = dst_idx / 2;
608 if (dst_idx % 2 == 0) {
609 read_vec_element_i64(tmp, v2, src_idx, es);
610 } else {
611 read_vec_element_i64(tmp, v3, src_idx, es);
613 write_vec_element_i64(tmp, v1, dst_idx, es);
615 } else {
616 /* iterate forward to avoid overwriting data we might need later */
617 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
618 src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
619 if (dst_idx % 2 == 0) {
620 read_vec_element_i64(tmp, v2, src_idx, es);
621 } else {
622 read_vec_element_i64(tmp, v3, src_idx, es);
624 write_vec_element_i64(tmp, v1, dst_idx, es);
627 tcg_temp_free_i64(tmp);
628 return DISAS_NEXT;
631 static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
633 const uint8_t v1 = get_field(s->fields, v1);
634 const uint8_t v2 = get_field(s->fields, v2);
635 const uint8_t v3 = get_field(s->fields, v3);
636 const uint8_t es = get_field(s->fields, m4);
637 static gen_helper_gvec_3 * const vpk[3] = {
638 gen_helper_gvec_vpk16,
639 gen_helper_gvec_vpk32,
640 gen_helper_gvec_vpk64,
642 static gen_helper_gvec_3 * const vpks[3] = {
643 gen_helper_gvec_vpks16,
644 gen_helper_gvec_vpks32,
645 gen_helper_gvec_vpks64,
647 static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
648 gen_helper_gvec_vpks_cc16,
649 gen_helper_gvec_vpks_cc32,
650 gen_helper_gvec_vpks_cc64,
652 static gen_helper_gvec_3 * const vpkls[3] = {
653 gen_helper_gvec_vpkls16,
654 gen_helper_gvec_vpkls32,
655 gen_helper_gvec_vpkls64,
657 static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
658 gen_helper_gvec_vpkls_cc16,
659 gen_helper_gvec_vpkls_cc32,
660 gen_helper_gvec_vpkls_cc64,
663 if (es == ES_8 || es > ES_64) {
664 gen_program_exception(s, PGM_SPECIFICATION);
665 return DISAS_NORETURN;
668 switch (s->fields->op2) {
669 case 0x97:
670 if (get_field(s->fields, m5) & 0x1) {
671 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
672 set_cc_static(s);
673 } else {
674 gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
676 break;
677 case 0x95:
678 if (get_field(s->fields, m5) & 0x1) {
679 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
680 set_cc_static(s);
681 } else {
682 gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
684 break;
685 case 0x94:
686 /* If sources and destination dont't overlap -> fast path */
687 if (v1 != v2 && v1 != v3) {
688 const uint8_t src_es = get_field(s->fields, m4);
689 const uint8_t dst_es = src_es - 1;
690 TCGv_i64 tmp = tcg_temp_new_i64();
691 int dst_idx, src_idx;
693 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
694 src_idx = dst_idx;
695 if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
696 read_vec_element_i64(tmp, v2, src_idx, src_es);
697 } else {
698 src_idx -= NUM_VEC_ELEMENTS(src_es);
699 read_vec_element_i64(tmp, v3, src_idx, src_es);
701 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
703 tcg_temp_free_i64(tmp);
704 } else {
705 gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
707 break;
708 default:
709 g_assert_not_reached();
711 return DISAS_NEXT;
714 static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
716 gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
717 get_field(s->fields, v3), get_field(s->fields, v4),
718 0, gen_helper_gvec_vperm);
719 return DISAS_NEXT;
722 static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
724 const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1);
725 const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1);
726 TCGv_i64 t0 = tcg_temp_new_i64();
727 TCGv_i64 t1 = tcg_temp_new_i64();
729 read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64);
730 read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64);
731 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
732 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
733 tcg_temp_free_i64(t0);
734 tcg_temp_free_i64(t1);
735 return DISAS_NEXT;
738 static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
740 const uint8_t enr = get_field(s->fields, i2);
741 const uint8_t es = get_field(s->fields, m4);
743 if (es > ES_64 || !valid_vec_element(enr, es)) {
744 gen_program_exception(s, PGM_SPECIFICATION);
745 return DISAS_NORETURN;
748 tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)),
749 vec_reg_offset(get_field(s->fields, v3), enr, es),
750 16, 16);
751 return DISAS_NEXT;
754 static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
756 const int64_t data = (int16_t)get_field(s->fields, i2);
757 const uint8_t es = get_field(s->fields, m3);
759 if (es > ES_64) {
760 gen_program_exception(s, PGM_SPECIFICATION);
761 return DISAS_NORETURN;
764 gen_gvec_dupi(es, get_field(s->fields, v1), data);
765 return DISAS_NEXT;
768 static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
770 const uint8_t es = s->insn->data;
771 const uint8_t enr = get_field(s->fields, m3);
772 TCGv_i64 tmp;
774 if (!valid_vec_element(enr, es)) {
775 gen_program_exception(s, PGM_SPECIFICATION);
776 return DISAS_NORETURN;
779 tmp = tcg_temp_new_i64();
780 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
781 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
782 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
784 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
785 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
786 tcg_temp_free_i64(tmp);
787 return DISAS_NEXT;
790 static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
792 TCGv_i64 t = tcg_temp_new_i64();
794 /* bit in c not set -> copy bit from b */
795 tcg_gen_andc_i64(t, b, c);
796 /* bit in c set -> copy bit from a */
797 tcg_gen_and_i64(d, a, c);
798 /* merge the results */
799 tcg_gen_or_i64(d, d, t);
800 tcg_temp_free_i64(t);
803 static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
804 TCGv_vec c)
806 TCGv_vec t = tcg_temp_new_vec_matching(d);
808 tcg_gen_andc_vec(vece, t, b, c);
809 tcg_gen_and_vec(vece, d, a, c);
810 tcg_gen_or_vec(vece, d, d, t);
811 tcg_temp_free_vec(t);
814 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
816 static const GVecGen4 gvec_op = {
817 .fni8 = gen_sel_i64,
818 .fniv = gen_sel_vec,
819 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
822 gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
823 get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
824 return DISAS_NEXT;
827 static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
829 const uint8_t es = get_field(s->fields, m3);
830 int idx1, idx2;
831 TCGv_i64 tmp;
833 switch (es) {
834 case ES_8:
835 idx1 = 7;
836 idx2 = 15;
837 break;
838 case ES_16:
839 idx1 = 3;
840 idx2 = 7;
841 break;
842 case ES_32:
843 idx1 = 1;
844 idx2 = 3;
845 break;
846 default:
847 gen_program_exception(s, PGM_SPECIFICATION);
848 return DISAS_NORETURN;
851 tmp = tcg_temp_new_i64();
852 read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN);
853 write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
854 read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN);
855 write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
856 tcg_temp_free_i64(tmp);
857 return DISAS_NEXT;
860 static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
862 TCGv_i64 tmp = tcg_const_i64(16);
864 /* Probe write access before actually modifying memory */
865 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
867 read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
868 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
869 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
870 read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
871 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
872 tcg_temp_free_i64(tmp);
873 return DISAS_NEXT;
876 static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
878 const uint8_t es = s->insn->data;
879 const uint8_t enr = get_field(s->fields, m3);
880 TCGv_i64 tmp;
882 if (!valid_vec_element(enr, es)) {
883 gen_program_exception(s, PGM_SPECIFICATION);
884 return DISAS_NORETURN;
887 tmp = tcg_temp_new_i64();
888 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
889 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
890 tcg_temp_free_i64(tmp);
891 return DISAS_NEXT;
894 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
896 const uint8_t v3 = get_field(s->fields, v3);
897 uint8_t v1 = get_field(s->fields, v1);
898 TCGv_i64 tmp;
900 while (v3 < v1 || (v3 - v1 + 1) > 16) {
901 gen_program_exception(s, PGM_SPECIFICATION);
902 return DISAS_NORETURN;
905 /* Probe write access before actually modifying memory */
906 tmp = tcg_const_i64((v3 - v1 + 1) * 16);
907 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
909 for (;; v1++) {
910 read_vec_element_i64(tmp, v1, 0, ES_64);
911 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
912 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
913 read_vec_element_i64(tmp, v1, 1, ES_64);
914 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
915 if (v1 == v3) {
916 break;
918 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
920 tcg_temp_free_i64(tmp);
921 return DISAS_NEXT;
924 static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
926 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
927 TCGv_ptr a0 = tcg_temp_new_ptr();
929 /* convert highest index into an actual length */
930 tcg_gen_addi_i64(o->in2, o->in2, 1);
931 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
932 gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
933 tcg_temp_free_ptr(a0);
934 return DISAS_NEXT;
937 static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
939 const bool logical = s->fields->op2 == 0xd4 || s->fields->op2 == 0xd5;
940 const uint8_t v1 = get_field(s->fields, v1);
941 const uint8_t v2 = get_field(s->fields, v2);
942 const uint8_t src_es = get_field(s->fields, m3);
943 const uint8_t dst_es = src_es + 1;
944 int dst_idx, src_idx;
945 TCGv_i64 tmp;
947 if (src_es > ES_32) {
948 gen_program_exception(s, PGM_SPECIFICATION);
949 return DISAS_NORETURN;
952 tmp = tcg_temp_new_i64();
953 if (s->fields->op2 == 0xd7 || s->fields->op2 == 0xd5) {
954 /* iterate backwards to avoid overwriting data we might need later */
955 for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
956 src_idx = dst_idx;
957 read_vec_element_i64(tmp, v2, src_idx,
958 src_es | (logical ? 0 : MO_SIGN));
959 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
962 } else {
963 /* iterate forward to avoid overwriting data we might need later */
964 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
965 src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
966 read_vec_element_i64(tmp, v2, src_idx,
967 src_es | (logical ? 0 : MO_SIGN));
968 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
971 tcg_temp_free_i64(tmp);
972 return DISAS_NEXT;
975 static DisasJumpType op_va(DisasContext *s, DisasOps *o)
977 const uint8_t es = get_field(s->fields, m4);
979 if (es > ES_128) {
980 gen_program_exception(s, PGM_SPECIFICATION);
981 return DISAS_NORETURN;
982 } else if (es == ES_128) {
983 gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s->fields, v1),
984 get_field(s->fields, v2), get_field(s->fields, v3));
985 return DISAS_NEXT;
987 gen_gvec_fn_3(add, es, get_field(s->fields, v1), get_field(s->fields, v2),
988 get_field(s->fields, v3));
989 return DISAS_NEXT;
992 static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
994 const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
995 TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
996 TCGv_i64 t1 = tcg_temp_new_i64();
997 TCGv_i64 t2 = tcg_temp_new_i64();
998 TCGv_i64 t3 = tcg_temp_new_i64();
1000 /* Calculate the carry into the MSB, ignoring the old MSBs */
1001 tcg_gen_andc_i64(t1, a, msb_mask);
1002 tcg_gen_andc_i64(t2, b, msb_mask);
1003 tcg_gen_add_i64(t1, t1, t2);
1004 /* Calculate the MSB without any carry into it */
1005 tcg_gen_xor_i64(t3, a, b);
1006 /* Calculate the carry out of the MSB in the MSB bit position */
1007 tcg_gen_and_i64(d, a, b);
1008 tcg_gen_and_i64(t1, t1, t3);
1009 tcg_gen_or_i64(d, d, t1);
1010 /* Isolate and shift the carry into position */
1011 tcg_gen_and_i64(d, d, msb_mask);
1012 tcg_gen_shri_i64(d, d, msb_bit_nr);
1014 tcg_temp_free_i64(t1);
1015 tcg_temp_free_i64(t2);
1016 tcg_temp_free_i64(t3);
1019 static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1021 gen_acc(d, a, b, ES_8);
1024 static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1026 gen_acc(d, a, b, ES_16);
1029 static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1031 TCGv_i32 t = tcg_temp_new_i32();
1033 tcg_gen_add_i32(t, a, b);
1034 tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1035 tcg_temp_free_i32(t);
1038 static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1040 TCGv_i64 t = tcg_temp_new_i64();
1042 tcg_gen_add_i64(t, a, b);
1043 tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1044 tcg_temp_free_i64(t);
1047 static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1048 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1050 TCGv_i64 th = tcg_temp_new_i64();
1051 TCGv_i64 tl = tcg_temp_new_i64();
1052 TCGv_i64 zero = tcg_const_i64(0);
1054 tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1055 tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1056 tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1057 tcg_gen_mov_i64(dh, zero);
1059 tcg_temp_free_i64(th);
1060 tcg_temp_free_i64(tl);
1061 tcg_temp_free_i64(zero);
1064 static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1066 const uint8_t es = get_field(s->fields, m4);
1067 static const GVecGen3 g[4] = {
1068 { .fni8 = gen_acc8_i64, },
1069 { .fni8 = gen_acc16_i64, },
1070 { .fni4 = gen_acc_i32, },
1071 { .fni8 = gen_acc_i64, },
1074 if (es > ES_128) {
1075 gen_program_exception(s, PGM_SPECIFICATION);
1076 return DISAS_NORETURN;
1077 } else if (es == ES_128) {
1078 gen_gvec128_3_i64(gen_acc2_i64, get_field(s->fields, v1),
1079 get_field(s->fields, v2), get_field(s->fields, v3));
1080 return DISAS_NEXT;
1082 gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1083 get_field(s->fields, v3), &g[es]);
1084 return DISAS_NEXT;