s390x/tcg: Implement VECTOR ADD
[qemu/ar7.git] / target / s390x / translate_vx.inc.c
blob2f84ea0511b771ab68ddf6a5f4a8b93b0029ae19
1 /*
2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
26 * 128 bit elements:
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
30 * Sizes:
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
37 * CC handling:
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
49 #define ES_8 MO_8
50 #define ES_16 MO_16
51 #define ES_32 MO_32
52 #define ES_64 MO_64
53 #define ES_128 4
55 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
57 return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
61 TCGMemOp memop)
63 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
65 switch (memop) {
66 case ES_8:
67 tcg_gen_ld8u_i64(dst, cpu_env, offs);
68 break;
69 case ES_16:
70 tcg_gen_ld16u_i64(dst, cpu_env, offs);
71 break;
72 case ES_32:
73 tcg_gen_ld32u_i64(dst, cpu_env, offs);
74 break;
75 case ES_8 | MO_SIGN:
76 tcg_gen_ld8s_i64(dst, cpu_env, offs);
77 break;
78 case ES_16 | MO_SIGN:
79 tcg_gen_ld16s_i64(dst, cpu_env, offs);
80 break;
81 case ES_32 | MO_SIGN:
82 tcg_gen_ld32s_i64(dst, cpu_env, offs);
83 break;
84 case ES_64:
85 case ES_64 | MO_SIGN:
86 tcg_gen_ld_i64(dst, cpu_env, offs);
87 break;
88 default:
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 TCGMemOp memop)
96 const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
98 switch (memop) {
99 case ES_8:
100 tcg_gen_st8_i64(src, cpu_env, offs);
101 break;
102 case ES_16:
103 tcg_gen_st16_i64(src, cpu_env, offs);
104 break;
105 case ES_32:
106 tcg_gen_st32_i64(src, cpu_env, offs);
107 break;
108 case ES_64:
109 tcg_gen_st_i64(src, cpu_env, offs);
110 break;
111 default:
112 g_assert_not_reached();
117 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
118 uint8_t es)
120 TCGv_i64 tmp = tcg_temp_new_i64();
122 /* mask off invalid parts from the element nr */
123 tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
125 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
126 tcg_gen_shli_i64(tmp, tmp, es);
127 #ifndef HOST_WORDS_BIGENDIAN
128 tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
129 #endif
130 tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
132 /* generate the final ptr by adding cpu_env */
133 tcg_gen_trunc_i64_ptr(ptr, tmp);
134 tcg_gen_add_ptr(ptr, ptr, cpu_env);
136 tcg_temp_free_i64(tmp);
139 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
140 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 vec_full_reg_offset(v3), 16, 16, data, fn)
142 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
143 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
145 #define gen_gvec_4(v1, v2, v3, v4, gen) \
146 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
148 16, 16, gen)
149 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
150 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
151 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
152 16, 16, data, fn)
153 #define gen_gvec_dup_i64(es, v1, c) \
154 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
155 #define gen_gvec_mov(v1, v2) \
156 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
158 #define gen_gvec_dup64i(v1, c) \
159 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
160 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
161 tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
162 vec_full_reg_offset(v3), 16, 16)
165 * Helper to carry out a 128 bit vector computation using 2 i64 values per
166 * vector.
168 typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
169 TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
170 static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
171 uint8_t b)
173 TCGv_i64 dh = tcg_temp_new_i64();
174 TCGv_i64 dl = tcg_temp_new_i64();
175 TCGv_i64 ah = tcg_temp_new_i64();
176 TCGv_i64 al = tcg_temp_new_i64();
177 TCGv_i64 bh = tcg_temp_new_i64();
178 TCGv_i64 bl = tcg_temp_new_i64();
180 read_vec_element_i64(ah, a, 0, ES_64);
181 read_vec_element_i64(al, a, 1, ES_64);
182 read_vec_element_i64(bh, b, 0, ES_64);
183 read_vec_element_i64(bl, b, 1, ES_64);
184 fn(dl, dh, al, ah, bl, bh);
185 write_vec_element_i64(dh, d, 0, ES_64);
186 write_vec_element_i64(dl, d, 1, ES_64);
188 tcg_temp_free_i64(dh);
189 tcg_temp_free_i64(dl);
190 tcg_temp_free_i64(ah);
191 tcg_temp_free_i64(al);
192 tcg_temp_free_i64(bh);
193 tcg_temp_free_i64(bl);
196 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
198 switch (es) {
199 case ES_8:
200 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
201 break;
202 case ES_16:
203 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
204 break;
205 case ES_32:
206 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
207 break;
208 case ES_64:
209 gen_gvec_dup64i(reg, c);
210 break;
211 default:
212 g_assert_not_reached();
216 static void zero_vec(uint8_t reg)
218 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
221 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
223 const uint8_t es = s->insn->data;
224 const uint8_t enr = get_field(s->fields, m3);
225 TCGv_i64 tmp;
227 if (!valid_vec_element(enr, es)) {
228 gen_program_exception(s, PGM_SPECIFICATION);
229 return DISAS_NORETURN;
232 tmp = tcg_temp_new_i64();
233 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
234 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
235 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
237 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
238 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
239 tcg_temp_free_i64(tmp);
240 return DISAS_NEXT;
243 static uint64_t generate_byte_mask(uint8_t mask)
245 uint64_t r = 0;
246 int i;
248 for (i = 0; i < 8; i++) {
249 if ((mask >> i) & 1) {
250 r |= 0xffull << (i * 8);
253 return r;
256 static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
258 const uint16_t i2 = get_field(s->fields, i2);
260 if (i2 == (i2 & 0xff) * 0x0101) {
262 * Masks for both 64 bit elements of the vector are the same.
263 * Trust tcg to produce a good constant loading.
265 gen_gvec_dup64i(get_field(s->fields, v1),
266 generate_byte_mask(i2 & 0xff));
267 } else {
268 TCGv_i64 t = tcg_temp_new_i64();
270 tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
271 write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64);
272 tcg_gen_movi_i64(t, generate_byte_mask(i2));
273 write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64);
274 tcg_temp_free_i64(t);
276 return DISAS_NEXT;
279 static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
281 const uint8_t es = get_field(s->fields, m4);
282 const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
283 const uint8_t i2 = get_field(s->fields, i2) & (bits - 1);
284 const uint8_t i3 = get_field(s->fields, i3) & (bits - 1);
285 uint64_t mask = 0;
286 int i;
288 if (es > ES_64) {
289 gen_program_exception(s, PGM_SPECIFICATION);
290 return DISAS_NORETURN;
293 /* generate the mask - take care of wrapping */
294 for (i = i2; ; i = (i + 1) % bits) {
295 mask |= 1ull << (bits - i - 1);
296 if (i == i3) {
297 break;
301 gen_gvec_dupi(es, get_field(s->fields, v1), mask);
302 return DISAS_NEXT;
305 static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
307 TCGv_i64 t0 = tcg_temp_new_i64();
308 TCGv_i64 t1 = tcg_temp_new_i64();
310 tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
311 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
312 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
313 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
314 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
315 tcg_temp_free(t0);
316 tcg_temp_free(t1);
317 return DISAS_NEXT;
320 static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
322 gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2));
323 return DISAS_NEXT;
326 static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
328 const uint8_t es = get_field(s->fields, m3);
329 TCGv_i64 tmp;
331 if (es > ES_64) {
332 gen_program_exception(s, PGM_SPECIFICATION);
333 return DISAS_NORETURN;
336 tmp = tcg_temp_new_i64();
337 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
338 gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp);
339 tcg_temp_free_i64(tmp);
340 return DISAS_NEXT;
343 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
345 const uint8_t es = s->insn->data;
346 const uint8_t enr = get_field(s->fields, m3);
347 TCGv_i64 tmp;
349 if (!valid_vec_element(enr, es)) {
350 gen_program_exception(s, PGM_SPECIFICATION);
351 return DISAS_NORETURN;
354 tmp = tcg_temp_new_i64();
355 tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
356 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
357 tcg_temp_free_i64(tmp);
358 return DISAS_NEXT;
361 static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
363 const uint8_t es = s->insn->data;
364 const uint8_t enr = get_field(s->fields, m3);
365 TCGv_i64 tmp;
367 if (!valid_vec_element(enr, es)) {
368 gen_program_exception(s, PGM_SPECIFICATION);
369 return DISAS_NORETURN;
372 tmp = tcg_const_i64((int16_t)get_field(s->fields, i2));
373 write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
374 tcg_temp_free_i64(tmp);
375 return DISAS_NEXT;
378 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
380 const uint8_t es = get_field(s->fields, m4);
381 TCGv_ptr ptr;
383 if (es > ES_64) {
384 gen_program_exception(s, PGM_SPECIFICATION);
385 return DISAS_NORETURN;
388 /* fast path if we don't need the register content */
389 if (!get_field(s->fields, b2)) {
390 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
392 read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es);
393 return DISAS_NEXT;
396 ptr = tcg_temp_new_ptr();
397 get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es);
398 switch (es) {
399 case ES_8:
400 tcg_gen_ld8u_i64(o->out, ptr, 0);
401 break;
402 case ES_16:
403 tcg_gen_ld16u_i64(o->out, ptr, 0);
404 break;
405 case ES_32:
406 tcg_gen_ld32u_i64(o->out, ptr, 0);
407 break;
408 case ES_64:
409 tcg_gen_ld_i64(o->out, ptr, 0);
410 break;
411 default:
412 g_assert_not_reached();
414 tcg_temp_free_ptr(ptr);
416 return DISAS_NEXT;
419 static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
421 uint8_t es = get_field(s->fields, m3);
422 uint8_t enr;
423 TCGv_i64 t;
425 switch (es) {
426 /* rightmost sub-element of leftmost doubleword */
427 case ES_8:
428 enr = 7;
429 break;
430 case ES_16:
431 enr = 3;
432 break;
433 case ES_32:
434 enr = 1;
435 break;
436 case ES_64:
437 enr = 0;
438 break;
439 /* leftmost sub-element of leftmost doubleword */
440 case 6:
441 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
442 es = ES_32;
443 enr = 0;
444 break;
446 default:
447 /* fallthrough */
448 gen_program_exception(s, PGM_SPECIFICATION);
449 return DISAS_NORETURN;
452 t = tcg_temp_new_i64();
453 tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
454 zero_vec(get_field(s->fields, v1));
455 write_vec_element_i64(t, get_field(s->fields, v1), enr, es);
456 tcg_temp_free_i64(t);
457 return DISAS_NEXT;
460 static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
462 const uint8_t v3 = get_field(s->fields, v3);
463 uint8_t v1 = get_field(s->fields, v1);
464 TCGv_i64 t0, t1;
466 if (v3 < v1 || (v3 - v1 + 1) > 16) {
467 gen_program_exception(s, PGM_SPECIFICATION);
468 return DISAS_NORETURN;
472 * Check for possible access exceptions by trying to load the last
473 * element. The first element will be checked first next.
475 t0 = tcg_temp_new_i64();
476 t1 = tcg_temp_new_i64();
477 gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
478 tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
480 for (;; v1++) {
481 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
482 write_vec_element_i64(t1, v1, 0, ES_64);
483 if (v1 == v3) {
484 break;
486 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
487 tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
488 write_vec_element_i64(t1, v1, 1, ES_64);
489 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
492 /* Store the last element, loaded first */
493 write_vec_element_i64(t0, v1, 1, ES_64);
495 tcg_temp_free_i64(t0);
496 tcg_temp_free_i64(t1);
497 return DISAS_NEXT;
500 static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
502 const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
503 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
504 TCGv_ptr a0;
505 TCGv_i64 bytes;
507 if (get_field(s->fields, m3) > 6) {
508 gen_program_exception(s, PGM_SPECIFICATION);
509 return DISAS_NORETURN;
512 bytes = tcg_temp_new_i64();
513 a0 = tcg_temp_new_ptr();
514 /* calculate the number of bytes until the next block boundary */
515 tcg_gen_ori_i64(bytes, o->addr1, -block_size);
516 tcg_gen_neg_i64(bytes, bytes);
518 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
519 gen_helper_vll(cpu_env, a0, o->addr1, bytes);
520 tcg_temp_free_i64(bytes);
521 tcg_temp_free_ptr(a0);
522 return DISAS_NEXT;
525 static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
527 const uint8_t es = get_field(s->fields, m4);
528 TCGv_ptr ptr;
530 if (es > ES_64) {
531 gen_program_exception(s, PGM_SPECIFICATION);
532 return DISAS_NORETURN;
535 /* fast path if we don't need the register content */
536 if (!get_field(s->fields, b2)) {
537 uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1);
539 write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es);
540 return DISAS_NEXT;
543 ptr = tcg_temp_new_ptr();
544 get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es);
545 switch (es) {
546 case ES_8:
547 tcg_gen_st8_i64(o->in2, ptr, 0);
548 break;
549 case ES_16:
550 tcg_gen_st16_i64(o->in2, ptr, 0);
551 break;
552 case ES_32:
553 tcg_gen_st32_i64(o->in2, ptr, 0);
554 break;
555 case ES_64:
556 tcg_gen_st_i64(o->in2, ptr, 0);
557 break;
558 default:
559 g_assert_not_reached();
561 tcg_temp_free_ptr(ptr);
563 return DISAS_NEXT;
566 static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
568 write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64);
569 write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64);
570 return DISAS_NEXT;
573 static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
575 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
576 TCGv_ptr a0 = tcg_temp_new_ptr();
578 /* convert highest index into an actual length */
579 tcg_gen_addi_i64(o->in2, o->in2, 1);
580 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
581 gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
582 tcg_temp_free_ptr(a0);
583 return DISAS_NEXT;
586 static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
588 const uint8_t v1 = get_field(s->fields, v1);
589 const uint8_t v2 = get_field(s->fields, v2);
590 const uint8_t v3 = get_field(s->fields, v3);
591 const uint8_t es = get_field(s->fields, m4);
592 int dst_idx, src_idx;
593 TCGv_i64 tmp;
595 if (es > ES_64) {
596 gen_program_exception(s, PGM_SPECIFICATION);
597 return DISAS_NORETURN;
600 tmp = tcg_temp_new_i64();
601 if (s->fields->op2 == 0x61) {
602 /* iterate backwards to avoid overwriting data we might need later */
603 for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
604 src_idx = dst_idx / 2;
605 if (dst_idx % 2 == 0) {
606 read_vec_element_i64(tmp, v2, src_idx, es);
607 } else {
608 read_vec_element_i64(tmp, v3, src_idx, es);
610 write_vec_element_i64(tmp, v1, dst_idx, es);
612 } else {
613 /* iterate forward to avoid overwriting data we might need later */
614 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
615 src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
616 if (dst_idx % 2 == 0) {
617 read_vec_element_i64(tmp, v2, src_idx, es);
618 } else {
619 read_vec_element_i64(tmp, v3, src_idx, es);
621 write_vec_element_i64(tmp, v1, dst_idx, es);
624 tcg_temp_free_i64(tmp);
625 return DISAS_NEXT;
628 static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
630 const uint8_t v1 = get_field(s->fields, v1);
631 const uint8_t v2 = get_field(s->fields, v2);
632 const uint8_t v3 = get_field(s->fields, v3);
633 const uint8_t es = get_field(s->fields, m4);
634 static gen_helper_gvec_3 * const vpk[3] = {
635 gen_helper_gvec_vpk16,
636 gen_helper_gvec_vpk32,
637 gen_helper_gvec_vpk64,
639 static gen_helper_gvec_3 * const vpks[3] = {
640 gen_helper_gvec_vpks16,
641 gen_helper_gvec_vpks32,
642 gen_helper_gvec_vpks64,
644 static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
645 gen_helper_gvec_vpks_cc16,
646 gen_helper_gvec_vpks_cc32,
647 gen_helper_gvec_vpks_cc64,
649 static gen_helper_gvec_3 * const vpkls[3] = {
650 gen_helper_gvec_vpkls16,
651 gen_helper_gvec_vpkls32,
652 gen_helper_gvec_vpkls64,
654 static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
655 gen_helper_gvec_vpkls_cc16,
656 gen_helper_gvec_vpkls_cc32,
657 gen_helper_gvec_vpkls_cc64,
660 if (es == ES_8 || es > ES_64) {
661 gen_program_exception(s, PGM_SPECIFICATION);
662 return DISAS_NORETURN;
665 switch (s->fields->op2) {
666 case 0x97:
667 if (get_field(s->fields, m5) & 0x1) {
668 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
669 set_cc_static(s);
670 } else {
671 gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
673 break;
674 case 0x95:
675 if (get_field(s->fields, m5) & 0x1) {
676 gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
677 set_cc_static(s);
678 } else {
679 gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
681 break;
682 case 0x94:
683 /* If sources and destination dont't overlap -> fast path */
684 if (v1 != v2 && v1 != v3) {
685 const uint8_t src_es = get_field(s->fields, m4);
686 const uint8_t dst_es = src_es - 1;
687 TCGv_i64 tmp = tcg_temp_new_i64();
688 int dst_idx, src_idx;
690 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
691 src_idx = dst_idx;
692 if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
693 read_vec_element_i64(tmp, v2, src_idx, src_es);
694 } else {
695 src_idx -= NUM_VEC_ELEMENTS(src_es);
696 read_vec_element_i64(tmp, v3, src_idx, src_es);
698 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
700 tcg_temp_free_i64(tmp);
701 } else {
702 gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
704 break;
705 default:
706 g_assert_not_reached();
708 return DISAS_NEXT;
711 static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
713 gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
714 get_field(s->fields, v3), get_field(s->fields, v4),
715 0, gen_helper_gvec_vperm);
716 return DISAS_NEXT;
719 static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
721 const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1);
722 const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1);
723 TCGv_i64 t0 = tcg_temp_new_i64();
724 TCGv_i64 t1 = tcg_temp_new_i64();
726 read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64);
727 read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64);
728 write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
729 write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
730 tcg_temp_free_i64(t0);
731 tcg_temp_free_i64(t1);
732 return DISAS_NEXT;
735 static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
737 const uint8_t enr = get_field(s->fields, i2);
738 const uint8_t es = get_field(s->fields, m4);
740 if (es > ES_64 || !valid_vec_element(enr, es)) {
741 gen_program_exception(s, PGM_SPECIFICATION);
742 return DISAS_NORETURN;
745 tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)),
746 vec_reg_offset(get_field(s->fields, v3), enr, es),
747 16, 16);
748 return DISAS_NEXT;
751 static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
753 const int64_t data = (int16_t)get_field(s->fields, i2);
754 const uint8_t es = get_field(s->fields, m3);
756 if (es > ES_64) {
757 gen_program_exception(s, PGM_SPECIFICATION);
758 return DISAS_NORETURN;
761 gen_gvec_dupi(es, get_field(s->fields, v1), data);
762 return DISAS_NEXT;
765 static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
767 const uint8_t es = s->insn->data;
768 const uint8_t enr = get_field(s->fields, m3);
769 TCGv_i64 tmp;
771 if (!valid_vec_element(enr, es)) {
772 gen_program_exception(s, PGM_SPECIFICATION);
773 return DISAS_NORETURN;
776 tmp = tcg_temp_new_i64();
777 read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es);
778 tcg_gen_add_i64(o->addr1, o->addr1, tmp);
779 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
781 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
782 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
783 tcg_temp_free_i64(tmp);
784 return DISAS_NEXT;
787 static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
789 TCGv_i64 t = tcg_temp_new_i64();
791 /* bit in c not set -> copy bit from b */
792 tcg_gen_andc_i64(t, b, c);
793 /* bit in c set -> copy bit from a */
794 tcg_gen_and_i64(d, a, c);
795 /* merge the results */
796 tcg_gen_or_i64(d, d, t);
797 tcg_temp_free_i64(t);
800 static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
801 TCGv_vec c)
803 TCGv_vec t = tcg_temp_new_vec_matching(d);
805 tcg_gen_andc_vec(vece, t, b, c);
806 tcg_gen_and_vec(vece, d, a, c);
807 tcg_gen_or_vec(vece, d, d, t);
808 tcg_temp_free_vec(t);
811 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
813 static const GVecGen4 gvec_op = {
814 .fni8 = gen_sel_i64,
815 .fniv = gen_sel_vec,
816 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
819 gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
820 get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
821 return DISAS_NEXT;
824 static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
826 const uint8_t es = get_field(s->fields, m3);
827 int idx1, idx2;
828 TCGv_i64 tmp;
830 switch (es) {
831 case ES_8:
832 idx1 = 7;
833 idx2 = 15;
834 break;
835 case ES_16:
836 idx1 = 3;
837 idx2 = 7;
838 break;
839 case ES_32:
840 idx1 = 1;
841 idx2 = 3;
842 break;
843 default:
844 gen_program_exception(s, PGM_SPECIFICATION);
845 return DISAS_NORETURN;
848 tmp = tcg_temp_new_i64();
849 read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN);
850 write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
851 read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN);
852 write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
853 tcg_temp_free_i64(tmp);
854 return DISAS_NEXT;
857 static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
859 TCGv_i64 tmp = tcg_const_i64(16);
861 /* Probe write access before actually modifying memory */
862 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
864 read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64);
865 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
866 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
867 read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64);
868 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
869 tcg_temp_free_i64(tmp);
870 return DISAS_NEXT;
873 static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
875 const uint8_t es = s->insn->data;
876 const uint8_t enr = get_field(s->fields, m3);
877 TCGv_i64 tmp;
879 if (!valid_vec_element(enr, es)) {
880 gen_program_exception(s, PGM_SPECIFICATION);
881 return DISAS_NORETURN;
884 tmp = tcg_temp_new_i64();
885 read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es);
886 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
887 tcg_temp_free_i64(tmp);
888 return DISAS_NEXT;
891 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
893 const uint8_t v3 = get_field(s->fields, v3);
894 uint8_t v1 = get_field(s->fields, v1);
895 TCGv_i64 tmp;
897 while (v3 < v1 || (v3 - v1 + 1) > 16) {
898 gen_program_exception(s, PGM_SPECIFICATION);
899 return DISAS_NORETURN;
902 /* Probe write access before actually modifying memory */
903 tmp = tcg_const_i64((v3 - v1 + 1) * 16);
904 gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
906 for (;; v1++) {
907 read_vec_element_i64(tmp, v1, 0, ES_64);
908 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
909 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
910 read_vec_element_i64(tmp, v1, 1, ES_64);
911 tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
912 if (v1 == v3) {
913 break;
915 gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
917 tcg_temp_free_i64(tmp);
918 return DISAS_NEXT;
921 static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
923 const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1));
924 TCGv_ptr a0 = tcg_temp_new_ptr();
926 /* convert highest index into an actual length */
927 tcg_gen_addi_i64(o->in2, o->in2, 1);
928 tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
929 gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
930 tcg_temp_free_ptr(a0);
931 return DISAS_NEXT;
934 static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
936 const bool logical = s->fields->op2 == 0xd4 || s->fields->op2 == 0xd5;
937 const uint8_t v1 = get_field(s->fields, v1);
938 const uint8_t v2 = get_field(s->fields, v2);
939 const uint8_t src_es = get_field(s->fields, m3);
940 const uint8_t dst_es = src_es + 1;
941 int dst_idx, src_idx;
942 TCGv_i64 tmp;
944 if (src_es > ES_32) {
945 gen_program_exception(s, PGM_SPECIFICATION);
946 return DISAS_NORETURN;
949 tmp = tcg_temp_new_i64();
950 if (s->fields->op2 == 0xd7 || s->fields->op2 == 0xd5) {
951 /* iterate backwards to avoid overwriting data we might need later */
952 for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
953 src_idx = dst_idx;
954 read_vec_element_i64(tmp, v2, src_idx,
955 src_es | (logical ? 0 : MO_SIGN));
956 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
959 } else {
960 /* iterate forward to avoid overwriting data we might need later */
961 for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
962 src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
963 read_vec_element_i64(tmp, v2, src_idx,
964 src_es | (logical ? 0 : MO_SIGN));
965 write_vec_element_i64(tmp, v1, dst_idx, dst_es);
968 tcg_temp_free_i64(tmp);
969 return DISAS_NEXT;
972 static DisasJumpType op_va(DisasContext *s, DisasOps *o)
974 const uint8_t es = get_field(s->fields, m4);
976 if (es > ES_128) {
977 gen_program_exception(s, PGM_SPECIFICATION);
978 return DISAS_NORETURN;
979 } else if (es == ES_128) {
980 gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s->fields, v1),
981 get_field(s->fields, v2), get_field(s->fields, v3));
982 return DISAS_NEXT;
984 gen_gvec_fn_3(add, es, get_field(s->fields, v1), get_field(s->fields, v2),
985 get_field(s->fields, v3));
986 return DISAS_NEXT;