s390x/tcg: Implement VECTOR ADD WITH CARRY
[qemu/ar7.git] / target / s390x / translate_vx.inc.c
blobd3d3442c0d2e060af3a3e1a9f1be6506fc05a4cf
1 /*
2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
26 * 128 bit elements:
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
30 * Sizes:
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
37 * CC handling:
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
49 #define ES_8 MO_8
50 #define ES_16 MO_16
51 #define ES_32 MO_32
52 #define ES_64 MO_64
53 #define ES_128 4
55 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
57 return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
61 TCGMemOp memop)
63 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
65 switch (memop) {
66 case ES_8:
67 tcg_gen_ld8u_i64(dst, cpu_env, offs);
68 break;
69 case ES_16:
70 tcg_gen_ld16u_i64(dst, cpu_env, offs);
71 break;
72 case ES_32:
73 tcg_gen_ld32u_i64(dst, cpu_env, offs);
74 break;
75 case ES_8 | MO_SIGN:
76 tcg_gen_ld8s_i64(dst, cpu_env, offs);
77 break;
78 case ES_16 | MO_SIGN:
79 tcg_gen_ld16s_i64(dst, cpu_env, offs);
80 break;
81 case ES_32 | MO_SIGN:
82 tcg_gen_ld32s_i64(dst, cpu_env, offs);
83 break;
84 case ES_64:
85 case ES_64 | MO_SIGN:
86 tcg_gen_ld_i64(dst, cpu_env, offs);
87 break;
88 default:
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 TCGMemOp memop)
96 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
98 switch (memop) {
99 case ES_8:
100 tcg_gen_st8_i64(src, cpu_env, offs);
101 break;
102 case ES_16:
103 tcg_gen_st16_i64(src, cpu_env, offs);
104 break;
105 case ES_32:
106 tcg_gen_st32_i64(src, cpu_env, offs);
107 break;
108 case ES_64:
109 tcg_gen_st_i64(src, cpu_env, offs);
110 break;
111 default:
112 g_assert_not_reached();
117 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
118 uint8_t es)
120 TCGv_i64 tmp = tcg_temp_new_i64();
122 /* mask off invalid parts from the element nr */
123 tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
125 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
126 tcg_gen_shli_i64(tmp, tmp, es);
127 #ifndef HOST_WORDS_BIGENDIAN
128 tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
129 #endif
130 tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
132 /* generate the final ptr by adding cpu_env */
133 tcg_gen_trunc_i64_ptr(ptr, tmp);
134 tcg_gen_add_ptr(ptr, ptr, cpu_env);
136 tcg_temp_free_i64(tmp);
139 #define gen_gvec_3(v1, v2, v3, gen) \
140 tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 vec_full_reg_offset(v3), 16, 16, gen)
142 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
143 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 vec_full_reg_offset(v3), 16, 16, data, fn)
145 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
146 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
148 #define gen_gvec_4(v1, v2, v3, v4, gen) \
149 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
150 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
151 16, 16, gen)
152 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
153 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
154 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
155 16, 16, data, fn)
156 #define gen_gvec_dup_i64(es, v1, c) \
157 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
158 #define gen_gvec_mov(v1, v2) \
159 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
161 #define gen_gvec_dup64i(v1, c) \
162 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
163 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
164 tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
165 vec_full_reg_offset(v3), 16, 16)
168 * Helper to carry out a 128 bit vector computation using 2 i64 values per
169 * vector.
171 typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
172 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
173 static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
174 uint8_t b)
176 TCGv_i64 dh = tcg_temp_new_i64();
177 TCGv_i64 dl = tcg_temp_new_i64();
178 TCGv_i64 ah = tcg_temp_new_i64();
179 TCGv_i64 al = tcg_temp_new_i64();
180 TCGv_i64 bh = tcg_temp_new_i64();
181 TCGv_i64 bl = tcg_temp_new_i64();
183 read_vec_element_i64(ah, a, 0, ES_64);
184 read_vec_element_i64(al, a, 1, ES_64);
185 read_vec_element_i64(bh, b, 0, ES_64);
186 read_vec_element_i64(bl, b, 1, ES_64);
187 fn(dl, dh, al, ah, bl, bh);
188 write_vec_element_i64(dh, d, 0, ES_64);
189 write_vec_element_i64(dl, d, 1, ES_64);
191 tcg_temp_free_i64(dh);
192 tcg_temp_free_i64(dl);
193 tcg_temp_free_i64(ah);
194 tcg_temp_free_i64(al);
195 tcg_temp_free_i64(bh);
196 tcg_temp_free_i64(bl);
199 typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
200 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
201 TCGv_i64 cl, TCGv_i64 ch);
202 static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
203 uint8_t b, uint8_t c)
205 TCGv_i64 dh = tcg_temp_new_i64();
206 TCGv_i64 dl = tcg_temp_new_i64();
207 TCGv_i64 ah = tcg_temp_new_i64();
208 TCGv_i64 al = tcg_temp_new_i64();
209 TCGv_i64 bh = tcg_temp_new_i64();
210 TCGv_i64 bl = tcg_temp_new_i64();
211 TCGv_i64 ch = tcg_temp_new_i64();
212 TCGv_i64 cl = tcg_temp_new_i64();
214 read_vec_element_i64(ah, a, 0, ES_64);
215 read_vec_element_i64(al, a, 1, ES_64);
216 read_vec_element_i64(bh, b, 0, ES_64);
217 read_vec_element_i64(bl, b, 1, ES_64);
218 read_vec_element_i64(ch, c, 0, ES_64);
219 read_vec_element_i64(cl, c, 1, ES_64);
220 fn(dl, dh, al, ah, bl, bh, cl, ch);
221 write_vec_element_i64(dh, d, 0, ES_64);
222 write_vec_element_i64(dl, d, 1, ES_64);
224 tcg_temp_free_i64(dh);
225 tcg_temp_free_i64(dl);
226 tcg_temp_free_i64(ah);
227 tcg_temp_free_i64(al);
228 tcg_temp_free_i64(bh);
229 tcg_temp_free_i64(bl);
230 tcg_temp_free_i64(ch);
231 tcg_temp_free_i64(cl);
234 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
236 switch (es) {
237 case ES_8:
238 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
239 break;
240 case ES_16:
241 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
242 break;
243 case ES_32:
244 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
245 break;
246 case ES_64:
247 gen_gvec_dup64i(reg, c);
248 break;
249 default:
250 g_assert_not_reached();
254 static void zero_vec(uint8_t reg)
256 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
259 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
261 const uint8_t es = s->insn->data;
262 const uint8_t enr = get_field(s->fields, m3);
263 TCGv_i64 tmp;
265 if (!valid_vec_element(enr, es)) {
266 gen_program_exception(s, PGM_SPECIFICATION);
267 return DISAS_NORETURN;
270 tmp = tcg_temp_new_i64();
271 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
272 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
273 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
275 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
276 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
277 tcg_temp_free_i64(tmp);
278 return DISAS_NEXT;
281 static uint64_t generate_byte_mask(uint8_t mask)
283 uint64_t r = 0;
284 int i;
286 for (i = 0; i < 8; i++) {
287 if ((mask >> i) & 1) {
288 r |= 0xffull << (i * 8);
291 return r;
294 static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
296 const uint16_t i2 = get_field(s->fields, i2);
298 if (i2 == (i2 & 0xff) * 0x0101) {
300 * Masks for both 64 bit elements of the vector are the same.
301 * Trust tcg to produce a good constant loading.
303 gen_gvec_dup64i(get_field(s->fields, v1),
304 generate_byte_mask(i2 & 0xff));
305 } else {
306 TCGv_i64 t = tcg_temp_new_i64();
308 tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
309 write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64);
310 tcg_gen_movi_i64(t, generate_byte_mask(i2));
311 write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64);
312 tcg_temp_free_i64(t);
314 return DISAS_NEXT;
317 static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
319 const uint8_t es = get_field(s->fields, m4);
320 const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
321 const uint8_t i2 = get_field(s->fields, i2) & (bits - 1);
322 const uint8_t i3 = get_field(s->fields, i3) & (bits - 1);
323 uint64_t mask = 0;
324 int i;
326 if (es > ES_64) {
327 gen_program_exception(s, PGM_SPECIFICATION);
328 return DISAS_NORETURN;
331 /* generate the mask - take care of wrapping */
332 for (i = i2; ; i = (i + 1) % bits) {
333 mask |= 1ull << (bits - i - 1);
334 if (i == i3) {
335 break;
339 gen_gvec_dupi(es, get_field(s->fields, v1), mask);
340 return DISAS_NEXT;
343 static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
345 TCGv_i64 t0 = tcg_temp_new_i64();
346 TCGv_i64 t1 = tcg_temp_new_i64();
348 tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
349 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
350 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
351 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
352 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
353 tcg_temp_free(t0);
354 tcg_temp_free(t1);
355 return DISAS_NEXT;
358 static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
360 gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2));
361 return DISAS_NEXT;
364 static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
366 const uint8_t es = get_field(s->fields, m3);
367 TCGv_i64 tmp;
369 if (es > ES_64) {
370 gen_program_exception(s, PGM_SPECIFICATION);
371 return DISAS_NORETURN;
374 tmp = tcg_temp_new_i64();
375 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
376 gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp);
377 tcg_temp_free_i64(tmp);
378 return DISAS_NEXT;
381 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
383 const uint8_t es = s->insn->data;
384 const uint8_t enr = get_field(s->fields, m3);
385 TCGv_i64 tmp;
387 if (!valid_vec_element(enr, es)) {
388 gen_program_exception(s, PGM_SPECIFICATION);
389 return DISAS_NORETURN;
392 tmp = tcg_temp_new_i64();
393 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
394 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
395 tcg_temp_free_i64(tmp);
396 return DISAS_NEXT;
399 static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
401 const uint8_t es = s->insn->data;
402 const uint8_t enr = get_field(s->fields, m3);
403 TCGv_i64 tmp;
405 if (!valid_vec_element(enr, es)) {
406 gen_program_exception(s, PGM_SPECIFICATION);
407 return DISAS_NORETURN;
410 tmp = tcg_const_i64((int16_t)get_field(s->fields, i2));
411 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
412 tcg_temp_free_i64(tmp);
413 return DISAS_NEXT;
416 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
418 const uint8_t es = get_field(s->fields, m4);
419 TCGv_ptr ptr;
421 if (es > ES_64) {
422 gen_program_exception(s, PGM_SPECIFICATION);
423 return DISAS_NORETURN;
426 /* fast path if we don't need the register content */
427 if (!get_field(s->fields, b2)) {
428 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
430 read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es);
431 return DISAS_NEXT;
434 ptr = tcg_temp_new_ptr();
435 get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es);
436 switch (es) {
437 case ES_8:
438 tcg_gen_ld8u_i64(o->out, ptr, 0);
439 break;
440 case ES_16:
441 tcg_gen_ld16u_i64(o->out, ptr, 0);
442 break;
443 case ES_32:
444 tcg_gen_ld32u_i64(o->out, ptr, 0);
445 break;
446 case ES_64:
447 tcg_gen_ld_i64(o->out, ptr, 0);
448 break;
449 default:
450 g_assert_not_reached();
452 tcg_temp_free_ptr(ptr);
454 return DISAS_NEXT;
457 static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
459 uint8_t es = get_field(s->fields, m3);
460 uint8_t enr;
461 TCGv_i64 t;
463 switch (es) {
464 /* rightmost sub-element of leftmost doubleword */
465 case ES_8:
466 enr = 7;
467 break;
468 case ES_16:
469 enr = 3;
470 break;
471 case ES_32:
472 enr = 1;
473 break;
474 case ES_64:
475 enr = 0;
476 break;
477 /* leftmost sub-element of leftmost doubleword */
478 case 6:
479 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
480 es = ES_32;
481 enr = 0;
482 break;
484 default:
485 /* fallthrough */
486 gen_program_exception(s, PGM_SPECIFICATION);
487 return DISAS_NORETURN;
490 t = tcg_temp_new_i64();
491 tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
492 zero_vec(get_field(s->fields, v1));
493 write_vec_element_i64(t, get_field(s->fields, v1), enr, es);
494 tcg_temp_free_i64(t);
495 return DISAS_NEXT;
498 static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
500 const uint8_t v3 = get_field(s->fields, v3);
501 uint8_t v1 = get_field(s->fields, v1);
502 TCGv_i64 t0, t1;
504 if (v3 < v1 || (v3 - v1 + 1) > 16) {
505 gen_program_exception(s, PGM_SPECIFICATION);
506 return DISAS_NORETURN;
510 * Check for possible access exceptions by trying to load the last
511 * element. The first element will be checked first next.
513 t0 = tcg_temp_new_i64();
514 t1 = tcg_temp_new_i64();
515 gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
516 tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
518 for (;; v1++) {
519 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
520 write_vec_element_i64(t1, v1, 0, ES_64);
521 if (v1 == v3) {
522 break;
524 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
525 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
526 write_vec_element_i64(t1, v1, 1, ES_64);
527 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
530 /* Store the last element, loaded first */
531 write_vec_element_i64(t0, v1, 1, ES_64);
533 tcg_temp_free_i64(t0);
534 tcg_temp_free_i64(t1);
535 return DISAS_NEXT;
538 static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
540 const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
541 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
542 TCGv_ptr a0;
543 TCGv_i64 bytes;
545 if (get_field(s->fields, m3) > 6) {
546 gen_program_exception(s, PGM_SPECIFICATION);
547 return DISAS_NORETURN;
550 bytes = tcg_temp_new_i64();
551 a0 = tcg_temp_new_ptr();
552 /* calculate the number of bytes until the next block boundary */
553 tcg_gen_ori_i64(bytes, o->addr1, -block_size);
554 tcg_gen_neg_i64(bytes, bytes);
556 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
557 gen_helper_vll(cpu_env, a0, o->addr1, bytes);
558 tcg_temp_free_i64(bytes);
559 tcg_temp_free_ptr(a0);
560 return DISAS_NEXT;
563 static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
565 const uint8_t es = get_field(s->fields, m4);
566 TCGv_ptr ptr;
568 if (es > ES_64) {
569 gen_program_exception(s, PGM_SPECIFICATION);
570 return DISAS_NORETURN;
573 /* fast path if we don't need the register content */
574 if (!get_field(s->fields, b2)) {
575 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
577 write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es);
578 return DISAS_NEXT;
581 ptr = tcg_temp_new_ptr();
582 get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es);
583 switch (es) {
584 case ES_8:
585 tcg_gen_st8_i64(o->in2, ptr, 0);
586 break;
587 case ES_16:
588 tcg_gen_st16_i64(o->in2, ptr, 0);
589 break;
590 case ES_32:
591 tcg_gen_st32_i64(o->in2, ptr, 0);
592 break;
593 case ES_64:
594 tcg_gen_st_i64(o->in2, ptr, 0);
595 break;
596 default:
597 g_assert_not_reached();
599 tcg_temp_free_ptr(ptr);
601 return DISAS_NEXT;
604 static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
606 write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64);
607 write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64);
608 return DISAS_NEXT;
611 static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
613 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
614 TCGv_ptr a0 = tcg_temp_new_ptr();
616 /* convert highest index into an actual length */
617 tcg_gen_addi_i64(o->in2, o->in2, 1);
618 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
619 gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
620 tcg_temp_free_ptr(a0);
621 return DISAS_NEXT;
624 static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
626 const uint8_t v1 = get_field(s->fields, v1);
627 const uint8_t v2 = get_field(s->fields, v2);
628 const uint8_t v3 = get_field(s->fields, v3);
629 const uint8_t es = get_field(s->fields, m4);
630 int dst_idx, src_idx;
631 TCGv_i64 tmp;
633 if (es > ES_64) {
634 gen_program_exception(s, PGM_SPECIFICATION);
635 return DISAS_NORETURN;
638 tmp = tcg_temp_new_i64();
639 if (s->fields->op2 == 0x61) {
640 /* iterate backwards to avoid overwriting data we might need later */
641 for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
642 src_idx = dst_idx / 2;
643 if (dst_idx % 2 == 0) {
644 read_vec_element_i64(tmp, v2, src_idx, es);
645 } else {
646 read_vec_element_i64(tmp, v3, src_idx, es);
648 write_vec_element_i64(tmp, v1, dst_idx, es);
650 } else {
651 /* iterate forward to avoid overwriting data we might need later */
652 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
653 src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
654 if (dst_idx % 2 == 0) {
655 read_vec_element_i64(tmp, v2, src_idx, es);
656 } else {
657 read_vec_element_i64(tmp, v3, src_idx, es);
659 write_vec_element_i64(tmp, v1, dst_idx, es);
662 tcg_temp_free_i64(tmp);
663 return DISAS_NEXT;
666 static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
668 const uint8_t v1 = get_field(s->fields, v1);
669 const uint8_t v2 = get_field(s->fields, v2);
670 const uint8_t v3 = get_field(s->fields, v3);
671 const uint8_t es = get_field(s->fields, m4);
672 static gen_helper_gvec_3 * const vpk[3] = {
673 gen_helper_gvec_vpk16,
674 gen_helper_gvec_vpk32,
675 gen_helper_gvec_vpk64,
677 static gen_helper_gvec_3 * const vpks[3] = {
678 gen_helper_gvec_vpks16,
679 gen_helper_gvec_vpks32,
680 gen_helper_gvec_vpks64,
682 static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
683 gen_helper_gvec_vpks_cc16,
684 gen_helper_gvec_vpks_cc32,
685 gen_helper_gvec_vpks_cc64,
687 static gen_helper_gvec_3 * const vpkls[3] = {
688 gen_helper_gvec_vpkls16,
689 gen_helper_gvec_vpkls32,
690 gen_helper_gvec_vpkls64,
692 static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
693 gen_helper_gvec_vpkls_cc16,
694 gen_helper_gvec_vpkls_cc32,
695 gen_helper_gvec_vpkls_cc64,
698 if (es == ES_8 || es > ES_64) {
699 gen_program_exception(s, PGM_SPECIFICATION);
700 return DISAS_NORETURN;
703 switch (s->fields->op2) {
704 case 0x97:
705 if (get_field(s->fields, m5) & 0x1) {
706 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
707 set_cc_static(s);
708 } else {
709 gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
711 break;
712 case 0x95:
713 if (get_field(s->fields, m5) & 0x1) {
714 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
715 set_cc_static(s);
716 } else {
717 gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
719 break;
720 case 0x94:
721 /* If sources and destination dont't overlap -> fast path */
722 if (v1 != v2 && v1 != v3) {
723 const uint8_t src_es = get_field(s->fields, m4);
724 const uint8_t dst_es = src_es - 1;
725 TCGv_i64 tmp = tcg_temp_new_i64();
726 int dst_idx, src_idx;
728 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
729 src_idx = dst_idx;
730 if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
731 read_vec_element_i64(tmp, v2, src_idx, src_es);
732 } else {
733 src_idx -= NUM_VEC_ELEMENTS(src_es);
734 read_vec_element_i64(tmp, v3, src_idx, src_es);
736 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
738 tcg_temp_free_i64(tmp);
739 } else {
740 gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
742 break;
743 default:
744 g_assert_not_reached();
746 return DISAS_NEXT;
749 static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
751 gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
752 get_field(s->fields, v3), get_field(s->fields, v4),
753 0, gen_helper_gvec_vperm);
754 return DISAS_NEXT;
757 static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
759 const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1);
760 const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1);
761 TCGv_i64 t0 = tcg_temp_new_i64();
762 TCGv_i64 t1 = tcg_temp_new_i64();
764 read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64);
765 read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64);
766 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
767 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
768 tcg_temp_free_i64(t0);
769 tcg_temp_free_i64(t1);
770 return DISAS_NEXT;
773 static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
775 const uint8_t enr = get_field(s->fields, i2);
776 const uint8_t es = get_field(s->fields, m4);
778 if (es > ES_64 || !valid_vec_element(enr, es)) {
779 gen_program_exception(s, PGM_SPECIFICATION);
780 return DISAS_NORETURN;
783 tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)),
784 vec_reg_offset(get_field(s->fields, v3), enr, es),
785 16, 16);
786 return DISAS_NEXT;
789 static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
791 const int64_t data = (int16_t)get_field(s->fields, i2);
792 const uint8_t es = get_field(s->fields, m3);
794 if (es > ES_64) {
795 gen_program_exception(s, PGM_SPECIFICATION);
796 return DISAS_NORETURN;
799 gen_gvec_dupi(es, get_field(s->fields, v1), data);
800 return DISAS_NEXT;
803 static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
805 const uint8_t es = s->insn->data;
806 const uint8_t enr = get_field(s->fields, m3);
807 TCGv_i64 tmp;
809 if (!valid_vec_element(enr, es)) {
810 gen_program_exception(s, PGM_SPECIFICATION);
811 return DISAS_NORETURN;
814 tmp = tcg_temp_new_i64();
815 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
816 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
817 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
819 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
820 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
821 tcg_temp_free_i64(tmp);
822 return DISAS_NEXT;
825 static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
827 TCGv_i64 t = tcg_temp_new_i64();
829 /* bit in c not set -> copy bit from b */
830 tcg_gen_andc_i64(t, b, c);
831 /* bit in c set -> copy bit from a */
832 tcg_gen_and_i64(d, a, c);
833 /* merge the results */
834 tcg_gen_or_i64(d, d, t);
835 tcg_temp_free_i64(t);
838 static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
839 TCGv_vec c)
841 TCGv_vec t = tcg_temp_new_vec_matching(d);
843 tcg_gen_andc_vec(vece, t, b, c);
844 tcg_gen_and_vec(vece, d, a, c);
845 tcg_gen_or_vec(vece, d, d, t);
846 tcg_temp_free_vec(t);
849 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
851 static const GVecGen4 gvec_op = {
852 .fni8 = gen_sel_i64,
853 .fniv = gen_sel_vec,
854 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
857 gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
858 get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
859 return DISAS_NEXT;
862 static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
864 const uint8_t es = get_field(s->fields, m3);
865 int idx1, idx2;
866 TCGv_i64 tmp;
868 switch (es) {
869 case ES_8:
870 idx1 = 7;
871 idx2 = 15;
872 break;
873 case ES_16:
874 idx1 = 3;
875 idx2 = 7;
876 break;
877 case ES_32:
878 idx1 = 1;
879 idx2 = 3;
880 break;
881 default:
882 gen_program_exception(s, PGM_SPECIFICATION);
883 return DISAS_NORETURN;
886 tmp = tcg_temp_new_i64();
887 read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN);
888 write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
889 read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN);
890 write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
891 tcg_temp_free_i64(tmp);
892 return DISAS_NEXT;
895 static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
897 TCGv_i64 tmp = tcg_const_i64(16);
899 /* Probe write access before actually modifying memory */
900 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
902 read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
903 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
904 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
905 read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
906 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
907 tcg_temp_free_i64(tmp);
908 return DISAS_NEXT;
911 static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
913 const uint8_t es = s->insn->data;
914 const uint8_t enr = get_field(s->fields, m3);
915 TCGv_i64 tmp;
917 if (!valid_vec_element(enr, es)) {
918 gen_program_exception(s, PGM_SPECIFICATION);
919 return DISAS_NORETURN;
922 tmp = tcg_temp_new_i64();
923 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
924 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
925 tcg_temp_free_i64(tmp);
926 return DISAS_NEXT;
929 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
931 const uint8_t v3 = get_field(s->fields, v3);
932 uint8_t v1 = get_field(s->fields, v1);
933 TCGv_i64 tmp;
935 while (v3 < v1 || (v3 - v1 + 1) > 16) {
936 gen_program_exception(s, PGM_SPECIFICATION);
937 return DISAS_NORETURN;
940 /* Probe write access before actually modifying memory */
941 tmp = tcg_const_i64((v3 - v1 + 1) * 16);
942 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
944 for (;; v1++) {
945 read_vec_element_i64(tmp, v1, 0, ES_64);
946 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
947 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
948 read_vec_element_i64(tmp, v1, 1, ES_64);
949 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
950 if (v1 == v3) {
951 break;
953 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
955 tcg_temp_free_i64(tmp);
956 return DISAS_NEXT;
959 static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
961 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
962 TCGv_ptr a0 = tcg_temp_new_ptr();
964 /* convert highest index into an actual length */
965 tcg_gen_addi_i64(o->in2, o->in2, 1);
966 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
967 gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
968 tcg_temp_free_ptr(a0);
969 return DISAS_NEXT;
972 static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
974 const bool logical = s->fields->op2 == 0xd4 || s->fields->op2 == 0xd5;
975 const uint8_t v1 = get_field(s->fields, v1);
976 const uint8_t v2 = get_field(s->fields, v2);
977 const uint8_t src_es = get_field(s->fields, m3);
978 const uint8_t dst_es = src_es + 1;
979 int dst_idx, src_idx;
980 TCGv_i64 tmp;
982 if (src_es > ES_32) {
983 gen_program_exception(s, PGM_SPECIFICATION);
984 return DISAS_NORETURN;
987 tmp = tcg_temp_new_i64();
988 if (s->fields->op2 == 0xd7 || s->fields->op2 == 0xd5) {
989 /* iterate backwards to avoid overwriting data we might need later */
990 for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
991 src_idx = dst_idx;
992 read_vec_element_i64(tmp, v2, src_idx,
993 src_es | (logical ? 0 : MO_SIGN));
994 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
997 } else {
998 /* iterate forward to avoid overwriting data we might need later */
999 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
1000 src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
1001 read_vec_element_i64(tmp, v2, src_idx,
1002 src_es | (logical ? 0 : MO_SIGN));
1003 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1006 tcg_temp_free_i64(tmp);
1007 return DISAS_NEXT;
1010 static DisasJumpType op_va(DisasContext *s, DisasOps *o)
1012 const uint8_t es = get_field(s->fields, m4);
1014 if (es > ES_128) {
1015 gen_program_exception(s, PGM_SPECIFICATION);
1016 return DISAS_NORETURN;
1017 } else if (es == ES_128) {
1018 gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s->fields, v1),
1019 get_field(s->fields, v2), get_field(s->fields, v3));
1020 return DISAS_NEXT;
1022 gen_gvec_fn_3(add, es, get_field(s->fields, v1), get_field(s->fields, v2),
1023 get_field(s->fields, v3));
1024 return DISAS_NEXT;
1027 static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
1029 const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
1030 TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
1031 TCGv_i64 t1 = tcg_temp_new_i64();
1032 TCGv_i64 t2 = tcg_temp_new_i64();
1033 TCGv_i64 t3 = tcg_temp_new_i64();
1035 /* Calculate the carry into the MSB, ignoring the old MSBs */
1036 tcg_gen_andc_i64(t1, a, msb_mask);
1037 tcg_gen_andc_i64(t2, b, msb_mask);
1038 tcg_gen_add_i64(t1, t1, t2);
1039 /* Calculate the MSB without any carry into it */
1040 tcg_gen_xor_i64(t3, a, b);
1041 /* Calculate the carry out of the MSB in the MSB bit position */
1042 tcg_gen_and_i64(d, a, b);
1043 tcg_gen_and_i64(t1, t1, t3);
1044 tcg_gen_or_i64(d, d, t1);
1045 /* Isolate and shift the carry into position */
1046 tcg_gen_and_i64(d, d, msb_mask);
1047 tcg_gen_shri_i64(d, d, msb_bit_nr);
1049 tcg_temp_free_i64(t1);
1050 tcg_temp_free_i64(t2);
1051 tcg_temp_free_i64(t3);
1054 static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1056 gen_acc(d, a, b, ES_8);
1059 static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1061 gen_acc(d, a, b, ES_16);
1064 static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1066 TCGv_i32 t = tcg_temp_new_i32();
1068 tcg_gen_add_i32(t, a, b);
1069 tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1070 tcg_temp_free_i32(t);
1073 static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1075 TCGv_i64 t = tcg_temp_new_i64();
1077 tcg_gen_add_i64(t, a, b);
1078 tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1079 tcg_temp_free_i64(t);
1082 static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1083 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1085 TCGv_i64 th = tcg_temp_new_i64();
1086 TCGv_i64 tl = tcg_temp_new_i64();
1087 TCGv_i64 zero = tcg_const_i64(0);
1089 tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1090 tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1091 tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1092 tcg_gen_mov_i64(dh, zero);
1094 tcg_temp_free_i64(th);
1095 tcg_temp_free_i64(tl);
1096 tcg_temp_free_i64(zero);
1099 static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1101 const uint8_t es = get_field(s->fields, m4);
1102 static const GVecGen3 g[4] = {
1103 { .fni8 = gen_acc8_i64, },
1104 { .fni8 = gen_acc16_i64, },
1105 { .fni4 = gen_acc_i32, },
1106 { .fni8 = gen_acc_i64, },
1109 if (es > ES_128) {
1110 gen_program_exception(s, PGM_SPECIFICATION);
1111 return DISAS_NORETURN;
1112 } else if (es == ES_128) {
1113 gen_gvec128_3_i64(gen_acc2_i64, get_field(s->fields, v1),
1114 get_field(s->fields, v2), get_field(s->fields, v3));
1115 return DISAS_NEXT;
1117 gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1118 get_field(s->fields, v3), &g[es]);
1119 return DISAS_NEXT;
1122 static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1123 TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1125 TCGv_i64 tl = tcg_temp_new_i64();
1126 TCGv_i64 th = tcg_const_i64(0);
1128 /* extract the carry only */
1129 tcg_gen_extract_i64(tl, cl, 0, 1);
1130 tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1131 tcg_gen_add2_i64(dl, dh, dl, dh, tl, th);
1133 tcg_temp_free_i64(tl);
1134 tcg_temp_free_i64(th);
1137 static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
1139 if (get_field(s->fields, m5) != ES_128) {
1140 gen_program_exception(s, PGM_SPECIFICATION);
1141 return DISAS_NORETURN;
1144 gen_gvec128_4_i64(gen_ac2_i64, get_field(s->fields, v1),
1145 get_field(s->fields, v2), get_field(s->fields, v3),
1146 get_field(s->fields, v4));
1147 return DISAS_NEXT;