2 * QEMU TCG support -- s390x vector instruction translation functions
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
27 * As we only have i32/i64, such elements have to be loaded into two
28 * i64 values and can then be processed e.g. by tcg_gen_add2_i64.
31 * On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 * always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 * 128 bit element size has to be treated in a special way (MO_64 + 1).
35 * We will use ES_* instead of MO_* for this reason in this file.
38 * As gvec ool-helpers can currently not return values (besides via
39 * pointers like vectors or cpu_env), whenever we have to set the CC and
40 * can't conclude the value from the result vector, we will directly
41 * set it in "env->cc_op" and mark it as static via set_cc_static()".
42 * Whenever this is done, the helper writes globals (cc_op).
45 #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46 #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47 #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
55 static inline bool valid_vec_element(uint8_t enr
, TCGMemOp es
)
57 return !(enr
& ~(NUM_VEC_ELEMENTS(es
) - 1));
60 static void read_vec_element_i64(TCGv_i64 dst
, uint8_t reg
, uint8_t enr
,
63 const int offs
= vec_reg_offset(reg
, enr
, memop
& MO_SIZE
);
67 tcg_gen_ld8u_i64(dst
, cpu_env
, offs
);
70 tcg_gen_ld16u_i64(dst
, cpu_env
, offs
);
73 tcg_gen_ld32u_i64(dst
, cpu_env
, offs
);
76 tcg_gen_ld8s_i64(dst
, cpu_env
, offs
);
79 tcg_gen_ld16s_i64(dst
, cpu_env
, offs
);
82 tcg_gen_ld32s_i64(dst
, cpu_env
, offs
);
86 tcg_gen_ld_i64(dst
, cpu_env
, offs
);
89 g_assert_not_reached();
93 static void write_vec_element_i64(TCGv_i64 src
, int reg
, uint8_t enr
,
96 const int offs
= vec_reg_offset(reg
, enr
, memop
& MO_SIZE
);
100 tcg_gen_st8_i64(src
, cpu_env
, offs
);
103 tcg_gen_st16_i64(src
, cpu_env
, offs
);
106 tcg_gen_st32_i64(src
, cpu_env
, offs
);
109 tcg_gen_st_i64(src
, cpu_env
, offs
);
112 g_assert_not_reached();
117 static void get_vec_element_ptr_i64(TCGv_ptr ptr
, uint8_t reg
, TCGv_i64 enr
,
120 TCGv_i64 tmp
= tcg_temp_new_i64();
122 /* mask off invalid parts from the element nr */
123 tcg_gen_andi_i64(tmp
, enr
, NUM_VEC_ELEMENTS(es
) - 1);
125 /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
126 tcg_gen_shli_i64(tmp
, tmp
, es
);
127 #ifndef HOST_WORDS_BIGENDIAN
128 tcg_gen_xori_i64(tmp
, tmp
, 8 - NUM_VEC_ELEMENT_BYTES(es
));
130 tcg_gen_addi_i64(tmp
, tmp
, vec_full_reg_offset(reg
));
132 /* generate the final ptr by adding cpu_env */
133 tcg_gen_trunc_i64_ptr(ptr
, tmp
);
134 tcg_gen_add_ptr(ptr
, ptr
, cpu_env
);
136 tcg_temp_free_i64(tmp
);
139 #define gen_gvec_3(v1, v2, v3, gen) \
140 tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 vec_full_reg_offset(v3), 16, 16, gen)
142 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
143 tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 vec_full_reg_offset(v3), 16, 16, data, fn)
145 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
146 tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
148 #define gen_gvec_4(v1, v2, v3, v4, gen) \
149 tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
150 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
152 #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
153 tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
154 vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
156 #define gen_gvec_dup_i64(es, v1, c) \
157 tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
158 #define gen_gvec_mov(v1, v2) \
159 tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
161 #define gen_gvec_dup64i(v1, c) \
162 tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
163 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
164 tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
165 vec_full_reg_offset(v3), 16, 16)
168 * Helper to carry out a 128 bit vector computation using 2 i64 values per
171 typedef void (*gen_gvec128_3_i64_fn
)(TCGv_i64 dl
, TCGv_i64 dh
, TCGv_i64 al
,
172 TCGv_i64 ah
, TCGv_i64 bl
, TCGv_i64 bh
);
173 static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn
, uint8_t d
, uint8_t a
,
176 TCGv_i64 dh
= tcg_temp_new_i64();
177 TCGv_i64 dl
= tcg_temp_new_i64();
178 TCGv_i64 ah
= tcg_temp_new_i64();
179 TCGv_i64 al
= tcg_temp_new_i64();
180 TCGv_i64 bh
= tcg_temp_new_i64();
181 TCGv_i64 bl
= tcg_temp_new_i64();
183 read_vec_element_i64(ah
, a
, 0, ES_64
);
184 read_vec_element_i64(al
, a
, 1, ES_64
);
185 read_vec_element_i64(bh
, b
, 0, ES_64
);
186 read_vec_element_i64(bl
, b
, 1, ES_64
);
187 fn(dl
, dh
, al
, ah
, bl
, bh
);
188 write_vec_element_i64(dh
, d
, 0, ES_64
);
189 write_vec_element_i64(dl
, d
, 1, ES_64
);
191 tcg_temp_free_i64(dh
);
192 tcg_temp_free_i64(dl
);
193 tcg_temp_free_i64(ah
);
194 tcg_temp_free_i64(al
);
195 tcg_temp_free_i64(bh
);
196 tcg_temp_free_i64(bl
);
199 typedef void (*gen_gvec128_4_i64_fn
)(TCGv_i64 dl
, TCGv_i64 dh
, TCGv_i64 al
,
200 TCGv_i64 ah
, TCGv_i64 bl
, TCGv_i64 bh
,
201 TCGv_i64 cl
, TCGv_i64 ch
);
202 static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn
, uint8_t d
, uint8_t a
,
203 uint8_t b
, uint8_t c
)
205 TCGv_i64 dh
= tcg_temp_new_i64();
206 TCGv_i64 dl
= tcg_temp_new_i64();
207 TCGv_i64 ah
= tcg_temp_new_i64();
208 TCGv_i64 al
= tcg_temp_new_i64();
209 TCGv_i64 bh
= tcg_temp_new_i64();
210 TCGv_i64 bl
= tcg_temp_new_i64();
211 TCGv_i64 ch
= tcg_temp_new_i64();
212 TCGv_i64 cl
= tcg_temp_new_i64();
214 read_vec_element_i64(ah
, a
, 0, ES_64
);
215 read_vec_element_i64(al
, a
, 1, ES_64
);
216 read_vec_element_i64(bh
, b
, 0, ES_64
);
217 read_vec_element_i64(bl
, b
, 1, ES_64
);
218 read_vec_element_i64(ch
, c
, 0, ES_64
);
219 read_vec_element_i64(cl
, c
, 1, ES_64
);
220 fn(dl
, dh
, al
, ah
, bl
, bh
, cl
, ch
);
221 write_vec_element_i64(dh
, d
, 0, ES_64
);
222 write_vec_element_i64(dl
, d
, 1, ES_64
);
224 tcg_temp_free_i64(dh
);
225 tcg_temp_free_i64(dl
);
226 tcg_temp_free_i64(ah
);
227 tcg_temp_free_i64(al
);
228 tcg_temp_free_i64(bh
);
229 tcg_temp_free_i64(bl
);
230 tcg_temp_free_i64(ch
);
231 tcg_temp_free_i64(cl
);
234 static void gen_gvec_dupi(uint8_t es
, uint8_t reg
, uint64_t c
)
238 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg
), 16, 16, c
);
241 tcg_gen_gvec_dup16i(vec_full_reg_offset(reg
), 16, 16, c
);
244 tcg_gen_gvec_dup32i(vec_full_reg_offset(reg
), 16, 16, c
);
247 gen_gvec_dup64i(reg
, c
);
250 g_assert_not_reached();
254 static void zero_vec(uint8_t reg
)
256 tcg_gen_gvec_dup8i(vec_full_reg_offset(reg
), 16, 16, 0);
259 static DisasJumpType
op_vge(DisasContext
*s
, DisasOps
*o
)
261 const uint8_t es
= s
->insn
->data
;
262 const uint8_t enr
= get_field(s
->fields
, m3
);
265 if (!valid_vec_element(enr
, es
)) {
266 gen_program_exception(s
, PGM_SPECIFICATION
);
267 return DISAS_NORETURN
;
270 tmp
= tcg_temp_new_i64();
271 read_vec_element_i64(tmp
, get_field(s
->fields
, v2
), enr
, es
);
272 tcg_gen_add_i64(o
->addr1
, o
->addr1
, tmp
);
273 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 0);
275 tcg_gen_qemu_ld_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
276 write_vec_element_i64(tmp
, get_field(s
->fields
, v1
), enr
, es
);
277 tcg_temp_free_i64(tmp
);
281 static uint64_t generate_byte_mask(uint8_t mask
)
286 for (i
= 0; i
< 8; i
++) {
287 if ((mask
>> i
) & 1) {
288 r
|= 0xffull
<< (i
* 8);
294 static DisasJumpType
op_vgbm(DisasContext
*s
, DisasOps
*o
)
296 const uint16_t i2
= get_field(s
->fields
, i2
);
298 if (i2
== (i2
& 0xff) * 0x0101) {
300 * Masks for both 64 bit elements of the vector are the same.
301 * Trust tcg to produce a good constant loading.
303 gen_gvec_dup64i(get_field(s
->fields
, v1
),
304 generate_byte_mask(i2
& 0xff));
306 TCGv_i64 t
= tcg_temp_new_i64();
308 tcg_gen_movi_i64(t
, generate_byte_mask(i2
>> 8));
309 write_vec_element_i64(t
, get_field(s
->fields
, v1
), 0, ES_64
);
310 tcg_gen_movi_i64(t
, generate_byte_mask(i2
));
311 write_vec_element_i64(t
, get_field(s
->fields
, v1
), 1, ES_64
);
312 tcg_temp_free_i64(t
);
317 static DisasJumpType
op_vgm(DisasContext
*s
, DisasOps
*o
)
319 const uint8_t es
= get_field(s
->fields
, m4
);
320 const uint8_t bits
= NUM_VEC_ELEMENT_BITS(es
);
321 const uint8_t i2
= get_field(s
->fields
, i2
) & (bits
- 1);
322 const uint8_t i3
= get_field(s
->fields
, i3
) & (bits
- 1);
327 gen_program_exception(s
, PGM_SPECIFICATION
);
328 return DISAS_NORETURN
;
331 /* generate the mask - take care of wrapping */
332 for (i
= i2
; ; i
= (i
+ 1) % bits
) {
333 mask
|= 1ull << (bits
- i
- 1);
339 gen_gvec_dupi(es
, get_field(s
->fields
, v1
), mask
);
343 static DisasJumpType
op_vl(DisasContext
*s
, DisasOps
*o
)
345 TCGv_i64 t0
= tcg_temp_new_i64();
346 TCGv_i64 t1
= tcg_temp_new_i64();
348 tcg_gen_qemu_ld_i64(t0
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
349 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
350 tcg_gen_qemu_ld_i64(t1
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
351 write_vec_element_i64(t0
, get_field(s
->fields
, v1
), 0, ES_64
);
352 write_vec_element_i64(t1
, get_field(s
->fields
, v1
), 1, ES_64
);
358 static DisasJumpType
op_vlr(DisasContext
*s
, DisasOps
*o
)
360 gen_gvec_mov(get_field(s
->fields
, v1
), get_field(s
->fields
, v2
));
364 static DisasJumpType
op_vlrep(DisasContext
*s
, DisasOps
*o
)
366 const uint8_t es
= get_field(s
->fields
, m3
);
370 gen_program_exception(s
, PGM_SPECIFICATION
);
371 return DISAS_NORETURN
;
374 tmp
= tcg_temp_new_i64();
375 tcg_gen_qemu_ld_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
376 gen_gvec_dup_i64(es
, get_field(s
->fields
, v1
), tmp
);
377 tcg_temp_free_i64(tmp
);
381 static DisasJumpType
op_vle(DisasContext
*s
, DisasOps
*o
)
383 const uint8_t es
= s
->insn
->data
;
384 const uint8_t enr
= get_field(s
->fields
, m3
);
387 if (!valid_vec_element(enr
, es
)) {
388 gen_program_exception(s
, PGM_SPECIFICATION
);
389 return DISAS_NORETURN
;
392 tmp
= tcg_temp_new_i64();
393 tcg_gen_qemu_ld_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
394 write_vec_element_i64(tmp
, get_field(s
->fields
, v1
), enr
, es
);
395 tcg_temp_free_i64(tmp
);
399 static DisasJumpType
op_vlei(DisasContext
*s
, DisasOps
*o
)
401 const uint8_t es
= s
->insn
->data
;
402 const uint8_t enr
= get_field(s
->fields
, m3
);
405 if (!valid_vec_element(enr
, es
)) {
406 gen_program_exception(s
, PGM_SPECIFICATION
);
407 return DISAS_NORETURN
;
410 tmp
= tcg_const_i64((int16_t)get_field(s
->fields
, i2
));
411 write_vec_element_i64(tmp
, get_field(s
->fields
, v1
), enr
, es
);
412 tcg_temp_free_i64(tmp
);
416 static DisasJumpType
op_vlgv(DisasContext
*s
, DisasOps
*o
)
418 const uint8_t es
= get_field(s
->fields
, m4
);
422 gen_program_exception(s
, PGM_SPECIFICATION
);
423 return DISAS_NORETURN
;
426 /* fast path if we don't need the register content */
427 if (!get_field(s
->fields
, b2
)) {
428 uint8_t enr
= get_field(s
->fields
, d2
) & (NUM_VEC_ELEMENTS(es
) - 1);
430 read_vec_element_i64(o
->out
, get_field(s
->fields
, v3
), enr
, es
);
434 ptr
= tcg_temp_new_ptr();
435 get_vec_element_ptr_i64(ptr
, get_field(s
->fields
, v3
), o
->addr1
, es
);
438 tcg_gen_ld8u_i64(o
->out
, ptr
, 0);
441 tcg_gen_ld16u_i64(o
->out
, ptr
, 0);
444 tcg_gen_ld32u_i64(o
->out
, ptr
, 0);
447 tcg_gen_ld_i64(o
->out
, ptr
, 0);
450 g_assert_not_reached();
452 tcg_temp_free_ptr(ptr
);
457 static DisasJumpType
op_vllez(DisasContext
*s
, DisasOps
*o
)
459 uint8_t es
= get_field(s
->fields
, m3
);
464 /* rightmost sub-element of leftmost doubleword */
477 /* leftmost sub-element of leftmost doubleword */
479 if (s390_has_feat(S390_FEAT_VECTOR_ENH
)) {
486 gen_program_exception(s
, PGM_SPECIFICATION
);
487 return DISAS_NORETURN
;
490 t
= tcg_temp_new_i64();
491 tcg_gen_qemu_ld_i64(t
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
492 zero_vec(get_field(s
->fields
, v1
));
493 write_vec_element_i64(t
, get_field(s
->fields
, v1
), enr
, es
);
494 tcg_temp_free_i64(t
);
498 static DisasJumpType
op_vlm(DisasContext
*s
, DisasOps
*o
)
500 const uint8_t v3
= get_field(s
->fields
, v3
);
501 uint8_t v1
= get_field(s
->fields
, v1
);
504 if (v3
< v1
|| (v3
- v1
+ 1) > 16) {
505 gen_program_exception(s
, PGM_SPECIFICATION
);
506 return DISAS_NORETURN
;
510 * Check for possible access exceptions by trying to load the last
511 * element. The first element will be checked first next.
513 t0
= tcg_temp_new_i64();
514 t1
= tcg_temp_new_i64();
515 gen_addi_and_wrap_i64(s
, t0
, o
->addr1
, (v3
- v1
) * 16 + 8);
516 tcg_gen_qemu_ld_i64(t0
, t0
, get_mem_index(s
), MO_TEQ
);
519 tcg_gen_qemu_ld_i64(t1
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
520 write_vec_element_i64(t1
, v1
, 0, ES_64
);
524 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
525 tcg_gen_qemu_ld_i64(t1
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
526 write_vec_element_i64(t1
, v1
, 1, ES_64
);
527 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
530 /* Store the last element, loaded first */
531 write_vec_element_i64(t0
, v1
, 1, ES_64
);
533 tcg_temp_free_i64(t0
);
534 tcg_temp_free_i64(t1
);
538 static DisasJumpType
op_vlbb(DisasContext
*s
, DisasOps
*o
)
540 const int64_t block_size
= (1ull << (get_field(s
->fields
, m3
) + 6));
541 const int v1_offs
= vec_full_reg_offset(get_field(s
->fields
, v1
));
545 if (get_field(s
->fields
, m3
) > 6) {
546 gen_program_exception(s
, PGM_SPECIFICATION
);
547 return DISAS_NORETURN
;
550 bytes
= tcg_temp_new_i64();
551 a0
= tcg_temp_new_ptr();
552 /* calculate the number of bytes until the next block boundary */
553 tcg_gen_ori_i64(bytes
, o
->addr1
, -block_size
);
554 tcg_gen_neg_i64(bytes
, bytes
);
556 tcg_gen_addi_ptr(a0
, cpu_env
, v1_offs
);
557 gen_helper_vll(cpu_env
, a0
, o
->addr1
, bytes
);
558 tcg_temp_free_i64(bytes
);
559 tcg_temp_free_ptr(a0
);
563 static DisasJumpType
op_vlvg(DisasContext
*s
, DisasOps
*o
)
565 const uint8_t es
= get_field(s
->fields
, m4
);
569 gen_program_exception(s
, PGM_SPECIFICATION
);
570 return DISAS_NORETURN
;
573 /* fast path if we don't need the register content */
574 if (!get_field(s
->fields
, b2
)) {
575 uint8_t enr
= get_field(s
->fields
, d2
) & (NUM_VEC_ELEMENTS(es
) - 1);
577 write_vec_element_i64(o
->in2
, get_field(s
->fields
, v1
), enr
, es
);
581 ptr
= tcg_temp_new_ptr();
582 get_vec_element_ptr_i64(ptr
, get_field(s
->fields
, v1
), o
->addr1
, es
);
585 tcg_gen_st8_i64(o
->in2
, ptr
, 0);
588 tcg_gen_st16_i64(o
->in2
, ptr
, 0);
591 tcg_gen_st32_i64(o
->in2
, ptr
, 0);
594 tcg_gen_st_i64(o
->in2
, ptr
, 0);
597 g_assert_not_reached();
599 tcg_temp_free_ptr(ptr
);
604 static DisasJumpType
op_vlvgp(DisasContext
*s
, DisasOps
*o
)
606 write_vec_element_i64(o
->in1
, get_field(s
->fields
, v1
), 0, ES_64
);
607 write_vec_element_i64(o
->in2
, get_field(s
->fields
, v1
), 1, ES_64
);
611 static DisasJumpType
op_vll(DisasContext
*s
, DisasOps
*o
)
613 const int v1_offs
= vec_full_reg_offset(get_field(s
->fields
, v1
));
614 TCGv_ptr a0
= tcg_temp_new_ptr();
616 /* convert highest index into an actual length */
617 tcg_gen_addi_i64(o
->in2
, o
->in2
, 1);
618 tcg_gen_addi_ptr(a0
, cpu_env
, v1_offs
);
619 gen_helper_vll(cpu_env
, a0
, o
->addr1
, o
->in2
);
620 tcg_temp_free_ptr(a0
);
624 static DisasJumpType
op_vmr(DisasContext
*s
, DisasOps
*o
)
626 const uint8_t v1
= get_field(s
->fields
, v1
);
627 const uint8_t v2
= get_field(s
->fields
, v2
);
628 const uint8_t v3
= get_field(s
->fields
, v3
);
629 const uint8_t es
= get_field(s
->fields
, m4
);
630 int dst_idx
, src_idx
;
634 gen_program_exception(s
, PGM_SPECIFICATION
);
635 return DISAS_NORETURN
;
638 tmp
= tcg_temp_new_i64();
639 if (s
->fields
->op2
== 0x61) {
640 /* iterate backwards to avoid overwriting data we might need later */
641 for (dst_idx
= NUM_VEC_ELEMENTS(es
) - 1; dst_idx
>= 0; dst_idx
--) {
642 src_idx
= dst_idx
/ 2;
643 if (dst_idx
% 2 == 0) {
644 read_vec_element_i64(tmp
, v2
, src_idx
, es
);
646 read_vec_element_i64(tmp
, v3
, src_idx
, es
);
648 write_vec_element_i64(tmp
, v1
, dst_idx
, es
);
651 /* iterate forward to avoid overwriting data we might need later */
652 for (dst_idx
= 0; dst_idx
< NUM_VEC_ELEMENTS(es
); dst_idx
++) {
653 src_idx
= (dst_idx
+ NUM_VEC_ELEMENTS(es
)) / 2;
654 if (dst_idx
% 2 == 0) {
655 read_vec_element_i64(tmp
, v2
, src_idx
, es
);
657 read_vec_element_i64(tmp
, v3
, src_idx
, es
);
659 write_vec_element_i64(tmp
, v1
, dst_idx
, es
);
662 tcg_temp_free_i64(tmp
);
666 static DisasJumpType
op_vpk(DisasContext
*s
, DisasOps
*o
)
668 const uint8_t v1
= get_field(s
->fields
, v1
);
669 const uint8_t v2
= get_field(s
->fields
, v2
);
670 const uint8_t v3
= get_field(s
->fields
, v3
);
671 const uint8_t es
= get_field(s
->fields
, m4
);
672 static gen_helper_gvec_3
* const vpk
[3] = {
673 gen_helper_gvec_vpk16
,
674 gen_helper_gvec_vpk32
,
675 gen_helper_gvec_vpk64
,
677 static gen_helper_gvec_3
* const vpks
[3] = {
678 gen_helper_gvec_vpks16
,
679 gen_helper_gvec_vpks32
,
680 gen_helper_gvec_vpks64
,
682 static gen_helper_gvec_3_ptr
* const vpks_cc
[3] = {
683 gen_helper_gvec_vpks_cc16
,
684 gen_helper_gvec_vpks_cc32
,
685 gen_helper_gvec_vpks_cc64
,
687 static gen_helper_gvec_3
* const vpkls
[3] = {
688 gen_helper_gvec_vpkls16
,
689 gen_helper_gvec_vpkls32
,
690 gen_helper_gvec_vpkls64
,
692 static gen_helper_gvec_3_ptr
* const vpkls_cc
[3] = {
693 gen_helper_gvec_vpkls_cc16
,
694 gen_helper_gvec_vpkls_cc32
,
695 gen_helper_gvec_vpkls_cc64
,
698 if (es
== ES_8
|| es
> ES_64
) {
699 gen_program_exception(s
, PGM_SPECIFICATION
);
700 return DISAS_NORETURN
;
703 switch (s
->fields
->op2
) {
705 if (get_field(s
->fields
, m5
) & 0x1) {
706 gen_gvec_3_ptr(v1
, v2
, v3
, cpu_env
, 0, vpks_cc
[es
- 1]);
709 gen_gvec_3_ool(v1
, v2
, v3
, 0, vpks
[es
- 1]);
713 if (get_field(s
->fields
, m5
) & 0x1) {
714 gen_gvec_3_ptr(v1
, v2
, v3
, cpu_env
, 0, vpkls_cc
[es
- 1]);
717 gen_gvec_3_ool(v1
, v2
, v3
, 0, vpkls
[es
- 1]);
721 /* If sources and destination dont't overlap -> fast path */
722 if (v1
!= v2
&& v1
!= v3
) {
723 const uint8_t src_es
= get_field(s
->fields
, m4
);
724 const uint8_t dst_es
= src_es
- 1;
725 TCGv_i64 tmp
= tcg_temp_new_i64();
726 int dst_idx
, src_idx
;
728 for (dst_idx
= 0; dst_idx
< NUM_VEC_ELEMENTS(dst_es
); dst_idx
++) {
730 if (src_idx
< NUM_VEC_ELEMENTS(src_es
)) {
731 read_vec_element_i64(tmp
, v2
, src_idx
, src_es
);
733 src_idx
-= NUM_VEC_ELEMENTS(src_es
);
734 read_vec_element_i64(tmp
, v3
, src_idx
, src_es
);
736 write_vec_element_i64(tmp
, v1
, dst_idx
, dst_es
);
738 tcg_temp_free_i64(tmp
);
740 gen_gvec_3_ool(v1
, v2
, v3
, 0, vpk
[es
- 1]);
744 g_assert_not_reached();
749 static DisasJumpType
op_vperm(DisasContext
*s
, DisasOps
*o
)
751 gen_gvec_4_ool(get_field(s
->fields
, v1
), get_field(s
->fields
, v2
),
752 get_field(s
->fields
, v3
), get_field(s
->fields
, v4
),
753 0, gen_helper_gvec_vperm
);
757 static DisasJumpType
op_vpdi(DisasContext
*s
, DisasOps
*o
)
759 const uint8_t i2
= extract32(get_field(s
->fields
, m4
), 2, 1);
760 const uint8_t i3
= extract32(get_field(s
->fields
, m4
), 0, 1);
761 TCGv_i64 t0
= tcg_temp_new_i64();
762 TCGv_i64 t1
= tcg_temp_new_i64();
764 read_vec_element_i64(t0
, get_field(s
->fields
, v2
), i2
, ES_64
);
765 read_vec_element_i64(t1
, get_field(s
->fields
, v3
), i3
, ES_64
);
766 write_vec_element_i64(t0
, get_field(s
->fields
, v1
), 0, ES_64
);
767 write_vec_element_i64(t1
, get_field(s
->fields
, v1
), 1, ES_64
);
768 tcg_temp_free_i64(t0
);
769 tcg_temp_free_i64(t1
);
773 static DisasJumpType
op_vrep(DisasContext
*s
, DisasOps
*o
)
775 const uint8_t enr
= get_field(s
->fields
, i2
);
776 const uint8_t es
= get_field(s
->fields
, m4
);
778 if (es
> ES_64
|| !valid_vec_element(enr
, es
)) {
779 gen_program_exception(s
, PGM_SPECIFICATION
);
780 return DISAS_NORETURN
;
783 tcg_gen_gvec_dup_mem(es
, vec_full_reg_offset(get_field(s
->fields
, v1
)),
784 vec_reg_offset(get_field(s
->fields
, v3
), enr
, es
),
789 static DisasJumpType
op_vrepi(DisasContext
*s
, DisasOps
*o
)
791 const int64_t data
= (int16_t)get_field(s
->fields
, i2
);
792 const uint8_t es
= get_field(s
->fields
, m3
);
795 gen_program_exception(s
, PGM_SPECIFICATION
);
796 return DISAS_NORETURN
;
799 gen_gvec_dupi(es
, get_field(s
->fields
, v1
), data
);
803 static DisasJumpType
op_vsce(DisasContext
*s
, DisasOps
*o
)
805 const uint8_t es
= s
->insn
->data
;
806 const uint8_t enr
= get_field(s
->fields
, m3
);
809 if (!valid_vec_element(enr
, es
)) {
810 gen_program_exception(s
, PGM_SPECIFICATION
);
811 return DISAS_NORETURN
;
814 tmp
= tcg_temp_new_i64();
815 read_vec_element_i64(tmp
, get_field(s
->fields
, v2
), enr
, es
);
816 tcg_gen_add_i64(o
->addr1
, o
->addr1
, tmp
);
817 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 0);
819 read_vec_element_i64(tmp
, get_field(s
->fields
, v1
), enr
, es
);
820 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
821 tcg_temp_free_i64(tmp
);
825 static void gen_sel_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
, TCGv_i64 c
)
827 TCGv_i64 t
= tcg_temp_new_i64();
829 /* bit in c not set -> copy bit from b */
830 tcg_gen_andc_i64(t
, b
, c
);
831 /* bit in c set -> copy bit from a */
832 tcg_gen_and_i64(d
, a
, c
);
833 /* merge the results */
834 tcg_gen_or_i64(d
, d
, t
);
835 tcg_temp_free_i64(t
);
838 static void gen_sel_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
,
841 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
843 tcg_gen_andc_vec(vece
, t
, b
, c
);
844 tcg_gen_and_vec(vece
, d
, a
, c
);
845 tcg_gen_or_vec(vece
, d
, d
, t
);
846 tcg_temp_free_vec(t
);
849 static DisasJumpType
op_vsel(DisasContext
*s
, DisasOps
*o
)
851 static const GVecGen4 gvec_op
= {
854 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
857 gen_gvec_4(get_field(s
->fields
, v1
), get_field(s
->fields
, v2
),
858 get_field(s
->fields
, v3
), get_field(s
->fields
, v4
), &gvec_op
);
862 static DisasJumpType
op_vseg(DisasContext
*s
, DisasOps
*o
)
864 const uint8_t es
= get_field(s
->fields
, m3
);
882 gen_program_exception(s
, PGM_SPECIFICATION
);
883 return DISAS_NORETURN
;
886 tmp
= tcg_temp_new_i64();
887 read_vec_element_i64(tmp
, get_field(s
->fields
, v2
), idx1
, es
| MO_SIGN
);
888 write_vec_element_i64(tmp
, get_field(s
->fields
, v1
), 0, ES_64
);
889 read_vec_element_i64(tmp
, get_field(s
->fields
, v2
), idx2
, es
| MO_SIGN
);
890 write_vec_element_i64(tmp
, get_field(s
->fields
, v1
), 1, ES_64
);
891 tcg_temp_free_i64(tmp
);
895 static DisasJumpType
op_vst(DisasContext
*s
, DisasOps
*o
)
897 TCGv_i64 tmp
= tcg_const_i64(16);
899 /* Probe write access before actually modifying memory */
900 gen_helper_probe_write_access(cpu_env
, o
->addr1
, tmp
);
902 read_vec_element_i64(tmp
, get_field(s
->fields
, v1
), 0, ES_64
);
903 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
904 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
905 read_vec_element_i64(tmp
, get_field(s
->fields
, v1
), 1, ES_64
);
906 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
907 tcg_temp_free_i64(tmp
);
911 static DisasJumpType
op_vste(DisasContext
*s
, DisasOps
*o
)
913 const uint8_t es
= s
->insn
->data
;
914 const uint8_t enr
= get_field(s
->fields
, m3
);
917 if (!valid_vec_element(enr
, es
)) {
918 gen_program_exception(s
, PGM_SPECIFICATION
);
919 return DISAS_NORETURN
;
922 tmp
= tcg_temp_new_i64();
923 read_vec_element_i64(tmp
, get_field(s
->fields
, v1
), enr
, es
);
924 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TE
| es
);
925 tcg_temp_free_i64(tmp
);
929 static DisasJumpType
op_vstm(DisasContext
*s
, DisasOps
*o
)
931 const uint8_t v3
= get_field(s
->fields
, v3
);
932 uint8_t v1
= get_field(s
->fields
, v1
);
935 while (v3
< v1
|| (v3
- v1
+ 1) > 16) {
936 gen_program_exception(s
, PGM_SPECIFICATION
);
937 return DISAS_NORETURN
;
940 /* Probe write access before actually modifying memory */
941 tmp
= tcg_const_i64((v3
- v1
+ 1) * 16);
942 gen_helper_probe_write_access(cpu_env
, o
->addr1
, tmp
);
945 read_vec_element_i64(tmp
, v1
, 0, ES_64
);
946 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
947 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
948 read_vec_element_i64(tmp
, v1
, 1, ES_64
);
949 tcg_gen_qemu_st_i64(tmp
, o
->addr1
, get_mem_index(s
), MO_TEQ
);
953 gen_addi_and_wrap_i64(s
, o
->addr1
, o
->addr1
, 8);
955 tcg_temp_free_i64(tmp
);
959 static DisasJumpType
op_vstl(DisasContext
*s
, DisasOps
*o
)
961 const int v1_offs
= vec_full_reg_offset(get_field(s
->fields
, v1
));
962 TCGv_ptr a0
= tcg_temp_new_ptr();
964 /* convert highest index into an actual length */
965 tcg_gen_addi_i64(o
->in2
, o
->in2
, 1);
966 tcg_gen_addi_ptr(a0
, cpu_env
, v1_offs
);
967 gen_helper_vstl(cpu_env
, a0
, o
->addr1
, o
->in2
);
968 tcg_temp_free_ptr(a0
);
972 static DisasJumpType
op_vup(DisasContext
*s
, DisasOps
*o
)
974 const bool logical
= s
->fields
->op2
== 0xd4 || s
->fields
->op2
== 0xd5;
975 const uint8_t v1
= get_field(s
->fields
, v1
);
976 const uint8_t v2
= get_field(s
->fields
, v2
);
977 const uint8_t src_es
= get_field(s
->fields
, m3
);
978 const uint8_t dst_es
= src_es
+ 1;
979 int dst_idx
, src_idx
;
982 if (src_es
> ES_32
) {
983 gen_program_exception(s
, PGM_SPECIFICATION
);
984 return DISAS_NORETURN
;
987 tmp
= tcg_temp_new_i64();
988 if (s
->fields
->op2
== 0xd7 || s
->fields
->op2
== 0xd5) {
989 /* iterate backwards to avoid overwriting data we might need later */
990 for (dst_idx
= NUM_VEC_ELEMENTS(dst_es
) - 1; dst_idx
>= 0; dst_idx
--) {
992 read_vec_element_i64(tmp
, v2
, src_idx
,
993 src_es
| (logical
? 0 : MO_SIGN
));
994 write_vec_element_i64(tmp
, v1
, dst_idx
, dst_es
);
998 /* iterate forward to avoid overwriting data we might need later */
999 for (dst_idx
= 0; dst_idx
< NUM_VEC_ELEMENTS(dst_es
); dst_idx
++) {
1000 src_idx
= dst_idx
+ NUM_VEC_ELEMENTS(src_es
) / 2;
1001 read_vec_element_i64(tmp
, v2
, src_idx
,
1002 src_es
| (logical
? 0 : MO_SIGN
));
1003 write_vec_element_i64(tmp
, v1
, dst_idx
, dst_es
);
1006 tcg_temp_free_i64(tmp
);
1010 static DisasJumpType
op_va(DisasContext
*s
, DisasOps
*o
)
1012 const uint8_t es
= get_field(s
->fields
, m4
);
1015 gen_program_exception(s
, PGM_SPECIFICATION
);
1016 return DISAS_NORETURN
;
1017 } else if (es
== ES_128
) {
1018 gen_gvec128_3_i64(tcg_gen_add2_i64
, get_field(s
->fields
, v1
),
1019 get_field(s
->fields
, v2
), get_field(s
->fields
, v3
));
1022 gen_gvec_fn_3(add
, es
, get_field(s
->fields
, v1
), get_field(s
->fields
, v2
),
1023 get_field(s
->fields
, v3
));
1027 static void gen_acc(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
, uint8_t es
)
1029 const uint8_t msb_bit_nr
= NUM_VEC_ELEMENT_BITS(es
) - 1;
1030 TCGv_i64 msb_mask
= tcg_const_i64(dup_const(es
, 1ull << msb_bit_nr
));
1031 TCGv_i64 t1
= tcg_temp_new_i64();
1032 TCGv_i64 t2
= tcg_temp_new_i64();
1033 TCGv_i64 t3
= tcg_temp_new_i64();
1035 /* Calculate the carry into the MSB, ignoring the old MSBs */
1036 tcg_gen_andc_i64(t1
, a
, msb_mask
);
1037 tcg_gen_andc_i64(t2
, b
, msb_mask
);
1038 tcg_gen_add_i64(t1
, t1
, t2
);
1039 /* Calculate the MSB without any carry into it */
1040 tcg_gen_xor_i64(t3
, a
, b
);
1041 /* Calculate the carry out of the MSB in the MSB bit position */
1042 tcg_gen_and_i64(d
, a
, b
);
1043 tcg_gen_and_i64(t1
, t1
, t3
);
1044 tcg_gen_or_i64(d
, d
, t1
);
1045 /* Isolate and shift the carry into position */
1046 tcg_gen_and_i64(d
, d
, msb_mask
);
1047 tcg_gen_shri_i64(d
, d
, msb_bit_nr
);
1049 tcg_temp_free_i64(t1
);
1050 tcg_temp_free_i64(t2
);
1051 tcg_temp_free_i64(t3
);
1054 static void gen_acc8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1056 gen_acc(d
, a
, b
, ES_8
);
1059 static void gen_acc16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1061 gen_acc(d
, a
, b
, ES_16
);
1064 static void gen_acc_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1066 TCGv_i32 t
= tcg_temp_new_i32();
1068 tcg_gen_add_i32(t
, a
, b
);
1069 tcg_gen_setcond_i32(TCG_COND_LTU
, d
, t
, b
);
1070 tcg_temp_free_i32(t
);
1073 static void gen_acc_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1075 TCGv_i64 t
= tcg_temp_new_i64();
1077 tcg_gen_add_i64(t
, a
, b
);
1078 tcg_gen_setcond_i64(TCG_COND_LTU
, d
, t
, b
);
1079 tcg_temp_free_i64(t
);
1082 static void gen_acc2_i64(TCGv_i64 dl
, TCGv_i64 dh
, TCGv_i64 al
,
1083 TCGv_i64 ah
, TCGv_i64 bl
, TCGv_i64 bh
)
1085 TCGv_i64 th
= tcg_temp_new_i64();
1086 TCGv_i64 tl
= tcg_temp_new_i64();
1087 TCGv_i64 zero
= tcg_const_i64(0);
1089 tcg_gen_add2_i64(tl
, th
, al
, zero
, bl
, zero
);
1090 tcg_gen_add2_i64(tl
, th
, th
, zero
, ah
, zero
);
1091 tcg_gen_add2_i64(tl
, dl
, tl
, th
, bh
, zero
);
1092 tcg_gen_mov_i64(dh
, zero
);
1094 tcg_temp_free_i64(th
);
1095 tcg_temp_free_i64(tl
);
1096 tcg_temp_free_i64(zero
);
1099 static DisasJumpType
op_vacc(DisasContext
*s
, DisasOps
*o
)
1101 const uint8_t es
= get_field(s
->fields
, m4
);
1102 static const GVecGen3 g
[4] = {
1103 { .fni8
= gen_acc8_i64
, },
1104 { .fni8
= gen_acc16_i64
, },
1105 { .fni4
= gen_acc_i32
, },
1106 { .fni8
= gen_acc_i64
, },
1110 gen_program_exception(s
, PGM_SPECIFICATION
);
1111 return DISAS_NORETURN
;
1112 } else if (es
== ES_128
) {
1113 gen_gvec128_3_i64(gen_acc2_i64
, get_field(s
->fields
, v1
),
1114 get_field(s
->fields
, v2
), get_field(s
->fields
, v3
));
1117 gen_gvec_3(get_field(s
->fields
, v1
), get_field(s
->fields
, v2
),
1118 get_field(s
->fields
, v3
), &g
[es
]);
1122 static void gen_ac2_i64(TCGv_i64 dl
, TCGv_i64 dh
, TCGv_i64 al
, TCGv_i64 ah
,
1123 TCGv_i64 bl
, TCGv_i64 bh
, TCGv_i64 cl
, TCGv_i64 ch
)
1125 TCGv_i64 tl
= tcg_temp_new_i64();
1126 TCGv_i64 th
= tcg_const_i64(0);
1128 /* extract the carry only */
1129 tcg_gen_extract_i64(tl
, cl
, 0, 1);
1130 tcg_gen_add2_i64(dl
, dh
, al
, ah
, bl
, bh
);
1131 tcg_gen_add2_i64(dl
, dh
, dl
, dh
, tl
, th
);
1133 tcg_temp_free_i64(tl
);
1134 tcg_temp_free_i64(th
);
1137 static DisasJumpType
op_vac(DisasContext
*s
, DisasOps
*o
)
1139 if (get_field(s
->fields
, m5
) != ES_128
) {
1140 gen_program_exception(s
, PGM_SPECIFICATION
);
1141 return DISAS_NORETURN
;
1144 gen_gvec128_4_i64(gen_ac2_i64
, get_field(s
->fields
, v1
),
1145 get_field(s
->fields
, v2
), get_field(s
->fields
, v3
),
1146 get_field(s
->fields
, v4
));