2 * Ingenic XBurst Media eXtension Unit (MXU) translation routines.
4 * Copyright (c) 2004-2005 Jocelyn Mayer
5 * Copyright (c) 2006 Marius Groeger (FPU operations)
6 * Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7 * Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8 * Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
10 * SPDX-License-Identifier: LGPL-2.1-or-later
14 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
18 #include "qemu/osdep.h"
19 #include "translate.h"
23 * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24 * ============================================
27 * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28 * instructions set. It is designed to fit the needs of signal, graphical and
29 * video processing applications. MXU instruction set is used in Xburst family
30 * of microprocessors by Ingenic.
32 * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33 * the control register.
36 * The notation used in MXU assembler mnemonics
37 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 * XRa, XRb, XRc, XRd - MXU registers
42 * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
44 * Non-register operands:
46 * aptn1 - 1-bit accumulate add/subtract pattern
47 * aptn2 - 2-bit accumulate add/subtract pattern
48 * eptn2 - 2-bit execute add/subtract pattern
49 * optn2 - 2-bit operand pattern
50 * optn3 - 3-bit operand pattern
51 * sft4 - 4-bit shift amount
52 * strd2 - 2-bit stride amount
56 * Level of parallelism: Operand size:
57 * S - single operation at a time 32 - word
58 * D - two operations in parallel 16 - half word
59 * Q - four operations in parallel 8 - byte
63 * ADD - Add or subtract
64 * ADDC - Add with carry-in
66 * ASUM - Sum together then accumulate (add or subtract)
67 * ASUMC - Sum together then accumulate (add or subtract) with carry-in
68 * AVG - Average between 2 operands
69 * ABD - Absolute difference
71 * AND - Logical bitwise 'and' operation
74 * I2M - Move from GPR register to MXU register
75 * LDD - Load data from memory to XRF
76 * LDI - Load data from memory to XRF (and increase the address base)
77 * LUI - Load unsigned immediate
79 * MULU - Unsigned multiply
80 * MADD - 64-bit operand add 32x32 product
81 * MSUB - 64-bit operand subtract 32x32 product
82 * MAC - Multiply and accumulate (add or subtract)
83 * MAD - Multiply and add or subtract
84 * MAX - Maximum between 2 operands
85 * MIN - Minimum between 2 operands
86 * M2I - Move from MXU register to GPR register
88 * MOVN - Move if non-zero
89 * NOR - Logical bitwise 'nor' operation
90 * OR - Logical bitwise 'or' operation
91 * STD - Store data from XRF to memory
92 * SDI - Store data from XRF to memory (and increase the address base)
93 * SLT - Set of less than comparison
94 * SAD - Sum of absolute differences
95 * SLL - Logical shift left
96 * SLR - Logical shift right
97 * SAR - Arithmetic shift right
100 * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101 * XOR - Logical bitwise 'exclusive or' operation
106 * F - Fixed point multiplication
107 * L - Low part result
109 * V - Variable instead of immediate
110 * W - Combine above L and V
113 * The list of MXU instructions grouped by functionality
114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
116 * Load/Store instructions Multiplication instructions
117 * ----------------------- ---------------------------
119 * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt
120 * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt
121 * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt
122 * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt
123 * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt
124 * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt
125 * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2
126 * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2
127 * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2
128 * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129 * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130 * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131 * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132 * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133 * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd
134 * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd
135 * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2
136 * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2
137 * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2
138 * S16SDI XRa, Rb, s10, eptn2
139 * S8LDD XRa, Rb, s8, eptn3
140 * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions
141 * S8LDI XRa, Rb, s8, eptn3 -------------------------------------
142 * S8SDI XRa, Rb, s8, eptn3
143 * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2
144 * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd
145 * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2
146 * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2
147 * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2
148 * S32CPS XRa, XRb, XRc
149 * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150 * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2
151 * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2
152 * D16ASUM XRa, XRb, XRc, XRd, eptn2
153 * S32MAX XRa, XRb, XRc D16CPS XRa, XRb,
154 * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc
155 * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc
156 * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2
157 * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2
158 * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2
159 * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc
160 * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd
161 * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc
162 * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc
163 * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd
164 * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd
165 * Q8SLT XRa, XRb, XRc
166 * Q8SLTU XRa, XRb, XRc
167 * Q8MOVZ XRa, XRb, XRc Shift instructions
168 * Q8MOVN XRa, XRb, XRc ------------------
170 * D32SLL XRa, XRb, XRc, XRd, sft4
171 * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4
172 * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4
173 * D32SARL XRa, XRb, XRc, sft4
174 * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb
175 * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb
176 * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb
177 * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb
178 * Q16SLL XRa, XRb, XRc, XRd, sft4
179 * Q16SLR XRa, XRb, XRc, XRd, sft4
180 * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4
181 * ------------------------- Q16SLLV XRa, XRb, Rb
182 * Q16SLRV XRa, XRb, Rb
183 * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb
184 * S32ALN XRa, XRb, XRc, Rb
185 * S32ALNI XRa, XRb, XRc, s3
186 * S32LUI XRa, s8, optn3 Move instructions
187 * S32EXTR XRa, XRb, Rb, bits5 -----------------
188 * S32EXTRV XRa, XRb, Rs, Rt
189 * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb
190 * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb
193 * The opcode organization of MXU instructions
194 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197 * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198 * other bits up to the instruction level is as follows:
203 * ┌─ 000000 ─ OPC_MXU_S32MADD
204 * ├─ 000001 ─ OPC_MXU_S32MADDU
205 * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL)
208 * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209 * │ ├─ 001 ─ OPC_MXU_S32MIN
210 * │ ├─ 010 ─ OPC_MXU_D16MAX
211 * │ ├─ 011 ─ OPC_MXU_D16MIN
212 * │ ├─ 100 ─ OPC_MXU_Q8MAX
213 * │ ├─ 101 ─ OPC_MXU_Q8MIN
214 * │ ├─ 110 ─ OPC_MXU_Q8SLT
215 * │ └─ 111 ─ OPC_MXU_Q8SLTU
216 * ├─ 000100 ─ OPC_MXU_S32MSUB
217 * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18
218 * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219 * │ ├─ 001 ─ OPC_MXU_D16SLT
220 * │ ├─ 010 ─ OPC_MXU_D16AVG
221 * │ ├─ 011 ─ OPC_MXU_D16AVGR
222 * │ ├─ 100 ─ OPC_MXU_Q8AVG
223 * │ ├─ 101 ─ OPC_MXU_Q8AVGR
224 * │ └─ 111 ─ OPC_MXU_Q8ADD
227 * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228 * │ ├─ 010 ─ OPC_MXU_D16CPS
229 * │ ├─ 100 ─ OPC_MXU_Q8ABD
230 * │ └─ 110 ─ OPC_MXU_Q16SAT
231 * ├─ 001000 ─ OPC_MXU_D16MUL
233 * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234 * │ └─ 01 ─ OPC_MXU_D16MULE
235 * ├─ 001010 ─ OPC_MXU_D16MAC
236 * ├─ 001011 ─ OPC_MXU_D16MACF
237 * ├─ 001100 ─ OPC_MXU_D16MADL
238 * ├─ 001101 ─ OPC_MXU_S16MAD
239 * ├─ 001110 ─ OPC_MXU_Q16ADD
240 * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care)
241 * │ ┌─ 0 ─ OPC_MXU_S32LDD
242 * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
244 * │ 20 (13..10 don't care)
245 * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246 * │ └─ 1 ─ OPC_MXU_S32STDR
249 * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250 * │ └─ 0001 ─ OPC_MXU_S32LDDVR
253 * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254 * │ └─ 0001 ─ OPC_MXU_S32STDVR
256 * │ 20 (13..10 don't care)
257 * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258 * │ └─ 1 ─ OPC_MXU_S32LDIR
260 * │ 20 (13..10 don't care)
261 * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262 * │ └─ 1 ─ OPC_MXU_S32SDIR
265 * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266 * │ └─ 0001 ─ OPC_MXU_S32LDIVR
269 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270 * │ └─ 0001 ─ OPC_MXU_S32SDIVR
271 * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too)
273 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274 * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM
275 * │ └─ 10 ─ OPC_MXU_D32ASUM
276 * ├─ 011010 ─ <not assigned>
278 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279 * │ ├─ 01 ─ OPC_MXU_Q16ACCM
280 * │ └─ 10 ─ OPC_MXU_D16ASUM
283 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284 * │ ├─ 01 ─ OPC_MXU_D8SUM
285 * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC
286 * ├─ 011110 ─ <not assigned>
287 * ├─ 011111 ─ <not assigned>
288 * ├─ 100000 ─ <not assigned> (overlaps with CLZ)
289 * ├─ 100001 ─ <not assigned> (overlaps with CLO)
290 * ├─ 100010 ─ OPC_MXU_S8LDD
291 * ├─ 100011 ─ OPC_MXU_S8STD 15..14
292 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL
293 * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU
294 * │ ├─ 10 ─ OPC_MXU_S32EXTR
295 * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
298 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299 * │ ├─ 001 ─ OPC_MXU_S32ALN
300 * │ ├─ 010 ─ OPC_MXU_S32ALNI
301 * │ ├─ 011 ─ OPC_MXU_S32LUI
302 * │ ├─ 100 ─ OPC_MXU_S32NOR
303 * │ ├─ 101 ─ OPC_MXU_S32AND
304 * │ ├─ 110 ─ OPC_MXU_S32OR
305 * │ └─ 111 ─ OPC_MXU_S32XOR
308 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309 * │ ├─ 001 ─ OPC_MXU_LXH
310 * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW
311 * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU
312 * ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU
313 * ├─ 101100 ─ OPC_MXU_S16LDI
314 * ├─ 101101 ─ OPC_MXU_S16SDI
315 * ├─ 101110 ─ OPC_MXU_S32M2I
316 * ├─ 101111 ─ OPC_MXU_S32I2M
317 * ├─ 110000 ─ OPC_MXU_D32SLL
318 * ├─ 110001 ─ OPC_MXU_D32SLR 20..18
319 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV
320 * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV
321 * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV
322 * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV
323 * │ ├─ 101 ─ OPC_MXU_Q16SLRV
324 * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
326 * ├─ 110111 ─ OPC_MXU_Q16SAR
328 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329 * │ └─ 10 ─ OPC_MXU_Q8MULSU
332 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333 * │ ├─ 001 ─ OPC_MXU_Q8MOVN
334 * │ ├─ 010 ─ OPC_MXU_D16MOVZ
335 * │ ├─ 011 ─ OPC_MXU_D16MOVN
336 * │ ├─ 100 ─ OPC_MXU_S32MOVZ
337 * │ └─ 101 ─ OPC_MXU_S32MOVN
340 * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341 * │ └─ 10 ─ OPC_MXU_Q8MACSU
342 * ├─ 111011 ─ OPC_MXU_Q16SCOP
343 * ├─ 111100 ─ OPC_MXU_Q8MADL
344 * ├─ 111101 ─ OPC_MXU_S32SFL
345 * ├─ 111110 ─ OPC_MXU_Q8SAD
346 * └─ 111111 ─ <not assigned> (overlaps with SDBBP)
351 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
356 OPC_MXU_S32MADD
= 0x00,
357 OPC_MXU_S32MADDU
= 0x01,
358 OPC_MXU__POOL00
= 0x03,
359 OPC_MXU_S32MSUB
= 0x04,
360 OPC_MXU_S32MSUBU
= 0x05,
361 OPC_MXU__POOL01
= 0x06,
362 OPC_MXU__POOL02
= 0x07,
363 OPC_MXU_D16MUL
= 0x08,
364 OPC_MXU__POOL03
= 0x09,
365 OPC_MXU_D16MAC
= 0x0A,
366 OPC_MXU_D16MACF
= 0x0B,
367 OPC_MXU_D16MADL
= 0x0C,
368 OPC_MXU_S16MAD
= 0x0D,
369 OPC_MXU_Q16ADD
= 0x0E,
370 OPC_MXU_D16MACE
= 0x0F,
371 OPC_MXU__POOL04
= 0x10,
372 OPC_MXU__POOL05
= 0x11,
373 OPC_MXU__POOL06
= 0x12,
374 OPC_MXU__POOL07
= 0x13,
375 OPC_MXU__POOL08
= 0x14,
376 OPC_MXU__POOL09
= 0x15,
377 OPC_MXU__POOL10
= 0x16,
378 OPC_MXU__POOL11
= 0x17,
379 OPC_MXU_D32ADD
= 0x18,
380 OPC_MXU__POOL12
= 0x19,
381 OPC_MXU__POOL13
= 0x1B,
382 OPC_MXU__POOL14
= 0x1C,
383 OPC_MXU_Q8ACCE
= 0x1D,
384 OPC_MXU_S8LDD
= 0x22,
385 OPC_MXU_S8STD
= 0x23,
386 OPC_MXU_S8LDI
= 0x24,
387 OPC_MXU_S8SDI
= 0x25,
388 OPC_MXU__POOL15
= 0x26,
389 OPC_MXU__POOL16
= 0x27,
390 OPC_MXU__POOL17
= 0x28,
391 OPC_MXU_S16LDD
= 0x2A,
392 OPC_MXU_S16STD
= 0x2B,
393 OPC_MXU_S16LDI
= 0x2C,
394 OPC_MXU_S16SDI
= 0x2D,
395 OPC_MXU_S32M2I
= 0x2E,
396 OPC_MXU_S32I2M
= 0x2F,
397 OPC_MXU_D32SLL
= 0x30,
398 OPC_MXU_D32SLR
= 0x31,
399 OPC_MXU_D32SARL
= 0x32,
400 OPC_MXU_D32SAR
= 0x33,
401 OPC_MXU_Q16SLL
= 0x34,
402 OPC_MXU_Q16SLR
= 0x35,
403 OPC_MXU__POOL18
= 0x36,
404 OPC_MXU_Q16SAR
= 0x37,
405 OPC_MXU__POOL19
= 0x38,
406 OPC_MXU__POOL20
= 0x39,
407 OPC_MXU__POOL21
= 0x3A,
408 OPC_MXU_Q16SCOP
= 0x3B,
409 OPC_MXU_Q8MADL
= 0x3C,
410 OPC_MXU_S32SFL
= 0x3D,
411 OPC_MXU_Q8SAD
= 0x3E,
419 OPC_MXU_S32MAX
= 0x00,
420 OPC_MXU_S32MIN
= 0x01,
421 OPC_MXU_D16MAX
= 0x02,
422 OPC_MXU_D16MIN
= 0x03,
423 OPC_MXU_Q8MAX
= 0x04,
424 OPC_MXU_Q8MIN
= 0x05,
425 OPC_MXU_Q8SLT
= 0x06,
426 OPC_MXU_Q8SLTU
= 0x07,
433 OPC_MXU_S32SLT
= 0x00,
434 OPC_MXU_D16SLT
= 0x01,
435 OPC_MXU_D16AVG
= 0x02,
436 OPC_MXU_D16AVGR
= 0x03,
437 OPC_MXU_Q8AVG
= 0x04,
438 OPC_MXU_Q8AVGR
= 0x05,
439 OPC_MXU_Q8ADD
= 0x07,
446 OPC_MXU_S32CPS
= 0x00,
447 OPC_MXU_D16CPS
= 0x02,
448 OPC_MXU_Q8ABD
= 0x04,
449 OPC_MXU_Q16SAT
= 0x06,
456 OPC_MXU_D16MULF
= 0x00,
457 OPC_MXU_D16MULE
= 0x01,
461 * MXU pool 04 05 06 07 08 09 10 11
464 OPC_MXU_S32LDST
= 0x00,
465 OPC_MXU_S32LDSTR
= 0x01,
472 OPC_MXU_D32ACC
= 0x00,
473 OPC_MXU_D32ACCM
= 0x01,
474 OPC_MXU_D32ASUM
= 0x02,
481 OPC_MXU_Q16ACC
= 0x00,
482 OPC_MXU_Q16ACCM
= 0x01,
483 OPC_MXU_D16ASUM
= 0x02,
490 OPC_MXU_Q8ADDE
= 0x00,
491 OPC_MXU_D8SUM
= 0x01,
492 OPC_MXU_D8SUMC
= 0x02,
499 OPC_MXU_S32MUL
= 0x00,
500 OPC_MXU_S32MULU
= 0x01,
501 OPC_MXU_S32EXTR
= 0x02,
502 OPC_MXU_S32EXTRV
= 0x03,
509 OPC_MXU_D32SARW
= 0x00,
510 OPC_MXU_S32ALN
= 0x01,
511 OPC_MXU_S32ALNI
= 0x02,
512 OPC_MXU_S32LUI
= 0x03,
513 OPC_MXU_S32NOR
= 0x04,
514 OPC_MXU_S32AND
= 0x05,
515 OPC_MXU_S32OR
= 0x06,
516 OPC_MXU_S32XOR
= 0x07,
534 OPC_MXU_D32SLLV
= 0x00,
535 OPC_MXU_D32SLRV
= 0x01,
536 OPC_MXU_D32SARV
= 0x03,
537 OPC_MXU_Q16SLLV
= 0x04,
538 OPC_MXU_Q16SLRV
= 0x05,
539 OPC_MXU_Q16SARV
= 0x07,
546 OPC_MXU_Q8MUL
= 0x00,
547 OPC_MXU_Q8MULSU
= 0x02,
554 OPC_MXU_Q8MOVZ
= 0x00,
555 OPC_MXU_Q8MOVN
= 0x01,
556 OPC_MXU_D16MOVZ
= 0x02,
557 OPC_MXU_D16MOVN
= 0x03,
558 OPC_MXU_S32MOVZ
= 0x04,
559 OPC_MXU_S32MOVN
= 0x05,
566 OPC_MXU_Q8MAC
= 0x00,
567 OPC_MXU_Q8MACSU
= 0x02,
571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
572 #define MXU_APTN1_A 0
573 #define MXU_APTN1_S 1
575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
576 #define MXU_APTN2_AA 0
577 #define MXU_APTN2_AS 1
578 #define MXU_APTN2_SA 2
579 #define MXU_APTN2_SS 3
581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */
582 #define MXU_EPTN2_AA 0
583 #define MXU_EPTN2_AS 1
584 #define MXU_EPTN2_SA 2
585 #define MXU_EPTN2_SS 3
587 /* MXU operand getting pattern 'optn2' */
588 #define MXU_OPTN2_PTN0 0
589 #define MXU_OPTN2_PTN1 1
590 #define MXU_OPTN2_PTN2 2
591 #define MXU_OPTN2_PTN3 3
592 /* alternative naming scheme for 'optn2' */
593 #define MXU_OPTN2_WW 0
594 #define MXU_OPTN2_LW 1
595 #define MXU_OPTN2_HW 2
596 #define MXU_OPTN2_XW 3
598 /* MXU operand getting pattern 'optn3' */
599 #define MXU_OPTN3_PTN0 0
600 #define MXU_OPTN3_PTN1 1
601 #define MXU_OPTN3_PTN2 2
602 #define MXU_OPTN3_PTN3 3
603 #define MXU_OPTN3_PTN4 4
604 #define MXU_OPTN3_PTN5 5
605 #define MXU_OPTN3_PTN6 6
606 #define MXU_OPTN3_PTN7 7
609 static TCGv mxu_gpr
[NUMBER_OF_MXU_REGISTERS
- 1];
612 static const char mxuregnames
[NUMBER_OF_MXU_REGISTERS
][4] = {
613 "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8",
614 "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
617 void mxu_translate_init(void)
619 for (unsigned i
= 0; i
< NUMBER_OF_MXU_REGISTERS
- 1; i
++) {
620 mxu_gpr
[i
] = tcg_global_mem_new(tcg_env
,
621 offsetof(CPUMIPSState
, active_tc
.mxu_gpr
[i
]),
625 mxu_CR
= tcg_global_mem_new(tcg_env
,
626 offsetof(CPUMIPSState
, active_tc
.mxu_cr
),
627 mxuregnames
[NUMBER_OF_MXU_REGISTERS
- 1]);
630 /* MXU General purpose registers moves. */
631 static inline void gen_load_mxu_gpr(TCGv t
, unsigned int reg
)
634 tcg_gen_movi_tl(t
, 0);
635 } else if (reg
<= 15) {
636 tcg_gen_mov_tl(t
, mxu_gpr
[reg
- 1]);
640 static inline void gen_store_mxu_gpr(TCGv t
, unsigned int reg
)
642 if (reg
> 0 && reg
<= 15) {
643 tcg_gen_mov_tl(mxu_gpr
[reg
- 1], t
);
647 static inline void gen_extract_mxu_gpr(TCGv t
, unsigned int reg
,
648 unsigned int ofs
, unsigned int len
)
651 tcg_gen_movi_tl(t
, 0);
652 } else if (reg
<= 15) {
653 tcg_gen_extract_tl(t
, mxu_gpr
[reg
- 1], ofs
, len
);
657 /* MXU control register moves. */
658 static inline void gen_load_mxu_cr(TCGv t
)
660 tcg_gen_mov_tl(t
, mxu_CR
);
663 static inline void gen_store_mxu_cr(TCGv t
)
665 /* TODO: Add handling of RW rules for MXU_CR. */
666 tcg_gen_mov_tl(mxu_CR
, t
);
670 * S32I2M XRa, rb - Register move from GRF to XRF
672 static void gen_mxu_s32i2m(DisasContext
*ctx
)
679 XRa
= extract32(ctx
->opcode
, 6, 5);
680 Rb
= extract32(ctx
->opcode
, 16, 5);
682 gen_load_gpr(t0
, Rb
);
684 gen_store_mxu_gpr(t0
, XRa
);
685 } else if (XRa
== 16) {
686 gen_store_mxu_cr(t0
);
691 * S32M2I XRa, rb - Register move from XRF to GRF
693 static void gen_mxu_s32m2i(DisasContext
*ctx
)
700 XRa
= extract32(ctx
->opcode
, 6, 5);
701 Rb
= extract32(ctx
->opcode
, 16, 5);
704 gen_load_mxu_gpr(t0
, XRa
);
705 } else if (XRa
== 16) {
709 gen_store_gpr(t0
, Rb
);
713 * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
715 * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
716 * post modify address register
718 static void gen_mxu_s8ldd(DisasContext
*ctx
, bool postmodify
)
721 uint32_t XRa
, Rb
, s8
, optn3
;
726 XRa
= extract32(ctx
->opcode
, 6, 4);
727 s8
= extract32(ctx
->opcode
, 10, 8);
728 optn3
= extract32(ctx
->opcode
, 18, 3);
729 Rb
= extract32(ctx
->opcode
, 21, 5);
731 gen_load_gpr(t0
, Rb
);
732 tcg_gen_addi_tl(t0
, t0
, (int8_t)s8
);
734 gen_store_gpr(t0
, Rb
);
738 /* XRa[7:0] = tmp8 */
740 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
741 gen_load_mxu_gpr(t0
, XRa
);
742 tcg_gen_deposit_tl(t0
, t0
, t1
, 0, 8);
744 /* XRa[15:8] = tmp8 */
746 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
747 gen_load_mxu_gpr(t0
, XRa
);
748 tcg_gen_deposit_tl(t0
, t0
, t1
, 8, 8);
750 /* XRa[23:16] = tmp8 */
752 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
753 gen_load_mxu_gpr(t0
, XRa
);
754 tcg_gen_deposit_tl(t0
, t0
, t1
, 16, 8);
756 /* XRa[31:24] = tmp8 */
758 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
759 gen_load_mxu_gpr(t0
, XRa
);
760 tcg_gen_deposit_tl(t0
, t0
, t1
, 24, 8);
762 /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
764 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
765 tcg_gen_deposit_tl(t0
, t1
, t1
, 16, 16);
767 /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
769 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
770 tcg_gen_shli_tl(t1
, t1
, 8);
771 tcg_gen_deposit_tl(t0
, t1
, t1
, 16, 16);
773 /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
775 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_SB
);
776 tcg_gen_mov_tl(t0
, t1
);
777 tcg_gen_andi_tl(t0
, t0
, 0xFF00FFFF);
778 tcg_gen_shli_tl(t1
, t1
, 16);
779 tcg_gen_or_tl(t0
, t0
, t1
);
781 /* XRa = {tmp8, tmp8, tmp8, tmp8} */
783 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
784 tcg_gen_deposit_tl(t1
, t1
, t1
, 8, 8);
785 tcg_gen_deposit_tl(t0
, t1
, t1
, 16, 16);
789 gen_store_mxu_gpr(t0
, XRa
);
793 * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
795 * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
796 * post modify address register
798 static void gen_mxu_s8std(DisasContext
*ctx
, bool postmodify
)
801 uint32_t XRa
, Rb
, s8
, optn3
;
806 XRa
= extract32(ctx
->opcode
, 6, 4);
807 s8
= extract32(ctx
->opcode
, 10, 8);
808 optn3
= extract32(ctx
->opcode
, 18, 3);
809 Rb
= extract32(ctx
->opcode
, 21, 5);
812 /* reserved, do nothing */
816 gen_load_gpr(t0
, Rb
);
817 tcg_gen_addi_tl(t0
, t0
, (int8_t)s8
);
819 gen_store_gpr(t0
, Rb
);
821 gen_load_mxu_gpr(t1
, XRa
);
824 /* XRa[7:0] => tmp8 */
826 tcg_gen_extract_tl(t1
, t1
, 0, 8);
828 /* XRa[15:8] => tmp8 */
830 tcg_gen_extract_tl(t1
, t1
, 8, 8);
832 /* XRa[23:16] => tmp8 */
834 tcg_gen_extract_tl(t1
, t1
, 16, 8);
836 /* XRa[31:24] => tmp8 */
838 tcg_gen_extract_tl(t1
, t1
, 24, 8);
842 tcg_gen_qemu_st_tl(t1
, t0
, ctx
->mem_idx
, MO_UB
);
846 * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
848 * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
849 * post modify address register
851 static void gen_mxu_s16ldd(DisasContext
*ctx
, bool postmodify
)
854 uint32_t XRa
, Rb
, optn2
;
860 XRa
= extract32(ctx
->opcode
, 6, 4);
861 s10
= sextract32(ctx
->opcode
, 10, 9) * 2;
862 optn2
= extract32(ctx
->opcode
, 19, 2);
863 Rb
= extract32(ctx
->opcode
, 21, 5);
865 gen_load_gpr(t0
, Rb
);
866 tcg_gen_addi_tl(t0
, t0
, s10
);
868 gen_store_gpr(t0
, Rb
);
872 /* XRa[15:0] = tmp16 */
874 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UW
);
875 gen_load_mxu_gpr(t0
, XRa
);
876 tcg_gen_deposit_tl(t0
, t0
, t1
, 0, 16);
878 /* XRa[31:16] = tmp16 */
880 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UW
);
881 gen_load_mxu_gpr(t0
, XRa
);
882 tcg_gen_deposit_tl(t0
, t0
, t1
, 16, 16);
884 /* XRa = sign_extend(tmp16) */
886 tcg_gen_qemu_ld_tl(t0
, t0
, ctx
->mem_idx
, MO_SW
);
888 /* XRa = {tmp16, tmp16} */
890 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, MO_UW
);
891 tcg_gen_deposit_tl(t0
, t1
, t1
, 0, 16);
892 tcg_gen_deposit_tl(t0
, t1
, t1
, 16, 16);
896 gen_store_mxu_gpr(t0
, XRa
);
900 * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
902 * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
903 * post modify address register
905 static void gen_mxu_s16std(DisasContext
*ctx
, bool postmodify
)
908 uint32_t XRa
, Rb
, optn2
;
914 XRa
= extract32(ctx
->opcode
, 6, 4);
915 s10
= sextract32(ctx
->opcode
, 10, 9) * 2;
916 optn2
= extract32(ctx
->opcode
, 19, 2);
917 Rb
= extract32(ctx
->opcode
, 21, 5);
920 /* reserved, do nothing */
924 gen_load_gpr(t0
, Rb
);
925 tcg_gen_addi_tl(t0
, t0
, s10
);
927 gen_store_gpr(t0
, Rb
);
929 gen_load_mxu_gpr(t1
, XRa
);
932 /* XRa[15:0] => tmp16 */
934 tcg_gen_extract_tl(t1
, t1
, 0, 16);
936 /* XRa[31:16] => tmp16 */
938 tcg_gen_extract_tl(t1
, t1
, 16, 16);
942 tcg_gen_qemu_st_tl(t1
, t0
, ctx
->mem_idx
, MO_UW
);
946 * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
947 * of GPR's and stores result into pair of MXU registers.
948 * It strains HI and LO registers.
950 * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
951 * of GPR's and stores result into pair of MXU registers.
952 * It strains HI and LO registers.
954 static void gen_mxu_s32mul(DisasContext
*ctx
, bool mulu
)
957 uint32_t XRa
, XRd
, rs
, rt
;
962 XRa
= extract32(ctx
->opcode
, 6, 4);
963 XRd
= extract32(ctx
->opcode
, 10, 4);
964 rs
= extract32(ctx
->opcode
, 16, 5);
965 rt
= extract32(ctx
->opcode
, 21, 5);
967 if (unlikely(rs
== 0 || rt
== 0)) {
968 tcg_gen_movi_tl(t0
, 0);
969 tcg_gen_movi_tl(t1
, 0);
971 gen_load_gpr(t0
, rs
);
972 gen_load_gpr(t1
, rt
);
975 tcg_gen_mulu2_tl(t0
, t1
, t0
, t1
);
977 tcg_gen_muls2_tl(t0
, t1
, t0
, t1
);
980 tcg_gen_mov_tl(cpu_HI
[0], t1
);
981 tcg_gen_mov_tl(cpu_LO
[0], t0
);
982 gen_store_mxu_gpr(t1
, XRa
);
983 gen_store_mxu_gpr(t0
, XRd
);
987 * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
988 * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
989 * with rounding and packing result
990 * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
991 * multiplication with rounding
993 static void gen_mxu_d16mul(DisasContext
*ctx
, bool fractional
,
997 uint32_t XRa
, XRb
, XRc
, XRd
, optn2
;
1000 t1
= tcg_temp_new();
1001 t2
= tcg_temp_new();
1002 t3
= tcg_temp_new();
1004 XRa
= extract32(ctx
->opcode
, 6, 4);
1005 XRb
= extract32(ctx
->opcode
, 10, 4);
1006 XRc
= extract32(ctx
->opcode
, 14, 4);
1007 XRd
= extract32(ctx
->opcode
, 18, 4);
1008 optn2
= extract32(ctx
->opcode
, 22, 2);
1011 * TODO: XRd field isn't used for D16MULF
1012 * There's no knowledge how this field affect
1013 * instruction decoding/behavior
1016 gen_load_mxu_gpr(t1
, XRb
);
1017 tcg_gen_sextract_tl(t0
, t1
, 0, 16);
1018 tcg_gen_sextract_tl(t1
, t1
, 16, 16);
1019 gen_load_mxu_gpr(t3
, XRc
);
1020 tcg_gen_sextract_tl(t2
, t3
, 0, 16);
1021 tcg_gen_sextract_tl(t3
, t3
, 16, 16);
1024 case MXU_OPTN2_WW
: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1025 tcg_gen_mul_tl(t3
, t1
, t3
);
1026 tcg_gen_mul_tl(t2
, t0
, t2
);
1028 case MXU_OPTN2_LW
: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1029 tcg_gen_mul_tl(t3
, t0
, t3
);
1030 tcg_gen_mul_tl(t2
, t0
, t2
);
1032 case MXU_OPTN2_HW
: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1033 tcg_gen_mul_tl(t3
, t1
, t3
);
1034 tcg_gen_mul_tl(t2
, t1
, t2
);
1036 case MXU_OPTN2_XW
: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1037 tcg_gen_mul_tl(t3
, t0
, t3
);
1038 tcg_gen_mul_tl(t2
, t1
, t2
);
1042 TCGLabel
*l_done
= gen_new_label();
1043 TCGv rounding
= tcg_temp_new();
1045 tcg_gen_shli_tl(t3
, t3
, 1);
1046 tcg_gen_shli_tl(t2
, t2
, 1);
1047 tcg_gen_andi_tl(rounding
, mxu_CR
, 0x2);
1048 tcg_gen_brcondi_tl(TCG_COND_EQ
, rounding
, 0, l_done
);
1049 if (packed_result
) {
1050 TCGLabel
*l_apply_bias_l
= gen_new_label();
1051 TCGLabel
*l_apply_bias_r
= gen_new_label();
1052 TCGLabel
*l_half_done
= gen_new_label();
1053 TCGv bias
= tcg_temp_new();
1056 * D16MULF supports unbiased rounding aka "bankers rounding",
1057 * "round to even", "convergent rounding"
1059 tcg_gen_andi_tl(bias
, mxu_CR
, 0x4);
1060 tcg_gen_brcondi_tl(TCG_COND_NE
, bias
, 0, l_apply_bias_l
);
1061 tcg_gen_andi_tl(t0
, t3
, 0x1ffff);
1062 tcg_gen_brcondi_tl(TCG_COND_EQ
, t0
, 0x8000, l_half_done
);
1063 gen_set_label(l_apply_bias_l
);
1064 tcg_gen_addi_tl(t3
, t3
, 0x8000);
1065 gen_set_label(l_half_done
);
1066 tcg_gen_brcondi_tl(TCG_COND_NE
, bias
, 0, l_apply_bias_r
);
1067 tcg_gen_andi_tl(t0
, t2
, 0x1ffff);
1068 tcg_gen_brcondi_tl(TCG_COND_EQ
, t0
, 0x8000, l_done
);
1069 gen_set_label(l_apply_bias_r
);
1070 tcg_gen_addi_tl(t2
, t2
, 0x8000);
1072 /* D16MULE doesn't support unbiased rounding */
1073 tcg_gen_addi_tl(t3
, t3
, 0x8000);
1074 tcg_gen_addi_tl(t2
, t2
, 0x8000);
1076 gen_set_label(l_done
);
1078 if (!packed_result
) {
1079 gen_store_mxu_gpr(t3
, XRa
);
1080 gen_store_mxu_gpr(t2
, XRd
);
1082 tcg_gen_andi_tl(t3
, t3
, 0xffff0000);
1083 tcg_gen_shri_tl(t2
, t2
, 16);
1084 tcg_gen_or_tl(t3
, t3
, t2
);
1085 gen_store_mxu_gpr(t3
, XRa
);
1090 * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1091 * Signed 16 bit pattern multiply and accumulate
1092 * D16MACF XRa, XRb, XRc, aptn2, optn2
1093 * Signed Q15 fraction pattern multiply accumulate and pack
1094 * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1095 * Signed Q15 fraction pattern multiply and accumulate
1097 static void gen_mxu_d16mac(DisasContext
*ctx
, bool fractional
,
1100 TCGv t0
, t1
, t2
, t3
;
1101 uint32_t XRa
, XRb
, XRc
, XRd
, optn2
, aptn2
;
1103 t0
= tcg_temp_new();
1104 t1
= tcg_temp_new();
1105 t2
= tcg_temp_new();
1106 t3
= tcg_temp_new();
1108 XRa
= extract32(ctx
->opcode
, 6, 4);
1109 XRb
= extract32(ctx
->opcode
, 10, 4);
1110 XRc
= extract32(ctx
->opcode
, 14, 4);
1111 XRd
= extract32(ctx
->opcode
, 18, 4);
1112 optn2
= extract32(ctx
->opcode
, 22, 2);
1113 aptn2
= extract32(ctx
->opcode
, 24, 2);
1115 gen_load_mxu_gpr(t1
, XRb
);
1116 tcg_gen_sextract_tl(t0
, t1
, 0, 16);
1117 tcg_gen_sextract_tl(t1
, t1
, 16, 16);
1119 gen_load_mxu_gpr(t3
, XRc
);
1120 tcg_gen_sextract_tl(t2
, t3
, 0, 16);
1121 tcg_gen_sextract_tl(t3
, t3
, 16, 16);
1124 case MXU_OPTN2_WW
: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1125 tcg_gen_mul_tl(t3
, t1
, t3
);
1126 tcg_gen_mul_tl(t2
, t0
, t2
);
1128 case MXU_OPTN2_LW
: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1129 tcg_gen_mul_tl(t3
, t0
, t3
);
1130 tcg_gen_mul_tl(t2
, t0
, t2
);
1132 case MXU_OPTN2_HW
: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1133 tcg_gen_mul_tl(t3
, t1
, t3
);
1134 tcg_gen_mul_tl(t2
, t1
, t2
);
1136 case MXU_OPTN2_XW
: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1137 tcg_gen_mul_tl(t3
, t0
, t3
);
1138 tcg_gen_mul_tl(t2
, t1
, t2
);
1143 tcg_gen_shli_tl(t3
, t3
, 1);
1144 tcg_gen_shli_tl(t2
, t2
, 1);
1146 gen_load_mxu_gpr(t0
, XRa
);
1147 gen_load_mxu_gpr(t1
, XRd
);
1151 tcg_gen_add_tl(t3
, t0
, t3
);
1152 tcg_gen_add_tl(t2
, t1
, t2
);
1155 tcg_gen_add_tl(t3
, t0
, t3
);
1156 tcg_gen_sub_tl(t2
, t1
, t2
);
1159 tcg_gen_sub_tl(t3
, t0
, t3
);
1160 tcg_gen_add_tl(t2
, t1
, t2
);
1163 tcg_gen_sub_tl(t3
, t0
, t3
);
1164 tcg_gen_sub_tl(t2
, t1
, t2
);
1169 TCGLabel
*l_done
= gen_new_label();
1170 TCGv rounding
= tcg_temp_new();
1172 tcg_gen_andi_tl(rounding
, mxu_CR
, 0x2);
1173 tcg_gen_brcondi_tl(TCG_COND_EQ
, rounding
, 0, l_done
);
1174 if (packed_result
) {
1175 TCGLabel
*l_apply_bias_l
= gen_new_label();
1176 TCGLabel
*l_apply_bias_r
= gen_new_label();
1177 TCGLabel
*l_half_done
= gen_new_label();
1178 TCGv bias
= tcg_temp_new();
1181 * D16MACF supports unbiased rounding aka "bankers rounding",
1182 * "round to even", "convergent rounding"
1184 tcg_gen_andi_tl(bias
, mxu_CR
, 0x4);
1185 tcg_gen_brcondi_tl(TCG_COND_NE
, bias
, 0, l_apply_bias_l
);
1186 tcg_gen_andi_tl(t0
, t3
, 0x1ffff);
1187 tcg_gen_brcondi_tl(TCG_COND_EQ
, t0
, 0x8000, l_half_done
);
1188 gen_set_label(l_apply_bias_l
);
1189 tcg_gen_addi_tl(t3
, t3
, 0x8000);
1190 gen_set_label(l_half_done
);
1191 tcg_gen_brcondi_tl(TCG_COND_NE
, bias
, 0, l_apply_bias_r
);
1192 tcg_gen_andi_tl(t0
, t2
, 0x1ffff);
1193 tcg_gen_brcondi_tl(TCG_COND_EQ
, t0
, 0x8000, l_done
);
1194 gen_set_label(l_apply_bias_r
);
1195 tcg_gen_addi_tl(t2
, t2
, 0x8000);
1197 /* D16MACE doesn't support unbiased rounding */
1198 tcg_gen_addi_tl(t3
, t3
, 0x8000);
1199 tcg_gen_addi_tl(t2
, t2
, 0x8000);
1201 gen_set_label(l_done
);
1204 if (!packed_result
) {
1205 gen_store_mxu_gpr(t3
, XRa
);
1206 gen_store_mxu_gpr(t2
, XRd
);
1208 tcg_gen_andi_tl(t3
, t3
, 0xffff0000);
1209 tcg_gen_shri_tl(t2
, t2
, 16);
1210 tcg_gen_or_tl(t3
, t3
, t2
);
1211 gen_store_mxu_gpr(t3
, XRa
);
1216 * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1217 * unsigned 16 bit pattern multiply and add/subtract.
1219 static void gen_mxu_d16madl(DisasContext
*ctx
)
1221 TCGv t0
, t1
, t2
, t3
;
1222 uint32_t XRa
, XRb
, XRc
, XRd
, optn2
, aptn2
;
1224 t0
= tcg_temp_new();
1225 t1
= tcg_temp_new();
1226 t2
= tcg_temp_new();
1227 t3
= tcg_temp_new();
1229 XRa
= extract32(ctx
->opcode
, 6, 4);
1230 XRb
= extract32(ctx
->opcode
, 10, 4);
1231 XRc
= extract32(ctx
->opcode
, 14, 4);
1232 XRd
= extract32(ctx
->opcode
, 18, 4);
1233 optn2
= extract32(ctx
->opcode
, 22, 2);
1234 aptn2
= extract32(ctx
->opcode
, 24, 2);
1236 gen_load_mxu_gpr(t1
, XRb
);
1237 tcg_gen_sextract_tl(t0
, t1
, 0, 16);
1238 tcg_gen_sextract_tl(t1
, t1
, 16, 16);
1240 gen_load_mxu_gpr(t3
, XRc
);
1241 tcg_gen_sextract_tl(t2
, t3
, 0, 16);
1242 tcg_gen_sextract_tl(t3
, t3
, 16, 16);
1245 case MXU_OPTN2_WW
: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1246 tcg_gen_mul_tl(t3
, t1
, t3
);
1247 tcg_gen_mul_tl(t2
, t0
, t2
);
1249 case MXU_OPTN2_LW
: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1250 tcg_gen_mul_tl(t3
, t0
, t3
);
1251 tcg_gen_mul_tl(t2
, t0
, t2
);
1253 case MXU_OPTN2_HW
: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1254 tcg_gen_mul_tl(t3
, t1
, t3
);
1255 tcg_gen_mul_tl(t2
, t1
, t2
);
1257 case MXU_OPTN2_XW
: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1258 tcg_gen_mul_tl(t3
, t0
, t3
);
1259 tcg_gen_mul_tl(t2
, t1
, t2
);
1262 tcg_gen_extract_tl(t2
, t2
, 0, 16);
1263 tcg_gen_extract_tl(t3
, t3
, 0, 16);
1265 gen_load_mxu_gpr(t1
, XRa
);
1266 tcg_gen_extract_tl(t0
, t1
, 0, 16);
1267 tcg_gen_extract_tl(t1
, t1
, 16, 16);
1271 tcg_gen_add_tl(t3
, t1
, t3
);
1272 tcg_gen_add_tl(t2
, t0
, t2
);
1275 tcg_gen_add_tl(t3
, t1
, t3
);
1276 tcg_gen_sub_tl(t2
, t0
, t2
);
1279 tcg_gen_sub_tl(t3
, t1
, t3
);
1280 tcg_gen_add_tl(t2
, t0
, t2
);
1283 tcg_gen_sub_tl(t3
, t1
, t3
);
1284 tcg_gen_sub_tl(t2
, t0
, t2
);
1288 tcg_gen_andi_tl(t2
, t2
, 0xffff);
1289 tcg_gen_shli_tl(t3
, t3
, 16);
1290 tcg_gen_or_tl(mxu_gpr
[XRd
- 1], t3
, t2
);
1294 * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1295 * signed 16 bit pattern multiply and 32-bit add/subtract.
1297 static void gen_mxu_s16mad(DisasContext
*ctx
)
1300 uint32_t XRa
, XRb
, XRc
, XRd
, optn2
, aptn1
, pad
;
1302 t0
= tcg_temp_new();
1303 t1
= tcg_temp_new();
1305 XRa
= extract32(ctx
->opcode
, 6, 4);
1306 XRb
= extract32(ctx
->opcode
, 10, 4);
1307 XRc
= extract32(ctx
->opcode
, 14, 4);
1308 XRd
= extract32(ctx
->opcode
, 18, 4);
1309 optn2
= extract32(ctx
->opcode
, 22, 2);
1310 aptn1
= extract32(ctx
->opcode
, 24, 1);
1311 pad
= extract32(ctx
->opcode
, 25, 1);
1314 /* FIXME check if it influence the result */
1317 gen_load_mxu_gpr(t0
, XRb
);
1318 gen_load_mxu_gpr(t1
, XRc
);
1321 case MXU_OPTN2_WW
: /* XRB.H*XRC.H */
1322 tcg_gen_sextract_tl(t0
, t0
, 16, 16);
1323 tcg_gen_sextract_tl(t1
, t1
, 16, 16);
1325 case MXU_OPTN2_LW
: /* XRB.L*XRC.L */
1326 tcg_gen_sextract_tl(t0
, t0
, 0, 16);
1327 tcg_gen_sextract_tl(t1
, t1
, 0, 16);
1329 case MXU_OPTN2_HW
: /* XRB.H*XRC.L */
1330 tcg_gen_sextract_tl(t0
, t0
, 16, 16);
1331 tcg_gen_sextract_tl(t1
, t1
, 0, 16);
1333 case MXU_OPTN2_XW
: /* XRB.L*XRC.H */
1334 tcg_gen_sextract_tl(t0
, t0
, 0, 16);
1335 tcg_gen_sextract_tl(t1
, t1
, 16, 16);
1338 tcg_gen_mul_tl(t0
, t0
, t1
);
1340 gen_load_mxu_gpr(t1
, XRa
);
1344 tcg_gen_add_tl(t1
, t1
, t0
);
1347 tcg_gen_sub_tl(t1
, t1
, t0
);
1351 gen_store_mxu_gpr(t1
, XRd
);
1355 * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1356 * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1357 * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1359 * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1362 static void gen_mxu_q8mul_mac(DisasContext
*ctx
, bool su
, bool mac
)
1364 TCGv t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
1365 uint32_t XRa
, XRb
, XRc
, XRd
, aptn2
;
1367 t0
= tcg_temp_new();
1368 t1
= tcg_temp_new();
1369 t2
= tcg_temp_new();
1370 t3
= tcg_temp_new();
1371 t4
= tcg_temp_new();
1372 t5
= tcg_temp_new();
1373 t6
= tcg_temp_new();
1374 t7
= tcg_temp_new();
1376 XRa
= extract32(ctx
->opcode
, 6, 4);
1377 XRb
= extract32(ctx
->opcode
, 10, 4);
1378 XRc
= extract32(ctx
->opcode
, 14, 4);
1379 XRd
= extract32(ctx
->opcode
, 18, 4);
1380 aptn2
= extract32(ctx
->opcode
, 24, 2);
1382 gen_load_mxu_gpr(t3
, XRb
);
1383 gen_load_mxu_gpr(t7
, XRc
);
1386 /* Q8MULSU / Q8MACSU */
1387 tcg_gen_sextract_tl(t0
, t3
, 0, 8);
1388 tcg_gen_sextract_tl(t1
, t3
, 8, 8);
1389 tcg_gen_sextract_tl(t2
, t3
, 16, 8);
1390 tcg_gen_sextract_tl(t3
, t3
, 24, 8);
1393 tcg_gen_extract_tl(t0
, t3
, 0, 8);
1394 tcg_gen_extract_tl(t1
, t3
, 8, 8);
1395 tcg_gen_extract_tl(t2
, t3
, 16, 8);
1396 tcg_gen_extract_tl(t3
, t3
, 24, 8);
1399 tcg_gen_extract_tl(t4
, t7
, 0, 8);
1400 tcg_gen_extract_tl(t5
, t7
, 8, 8);
1401 tcg_gen_extract_tl(t6
, t7
, 16, 8);
1402 tcg_gen_extract_tl(t7
, t7
, 24, 8);
1404 tcg_gen_mul_tl(t0
, t0
, t4
);
1405 tcg_gen_mul_tl(t1
, t1
, t5
);
1406 tcg_gen_mul_tl(t2
, t2
, t6
);
1407 tcg_gen_mul_tl(t3
, t3
, t7
);
1410 gen_load_mxu_gpr(t4
, XRd
);
1411 gen_load_mxu_gpr(t5
, XRa
);
1412 tcg_gen_extract_tl(t6
, t4
, 0, 16);
1413 tcg_gen_extract_tl(t7
, t4
, 16, 16);
1415 tcg_gen_sub_tl(t0
, t6
, t0
);
1416 tcg_gen_sub_tl(t1
, t7
, t1
);
1418 tcg_gen_add_tl(t0
, t6
, t0
);
1419 tcg_gen_add_tl(t1
, t7
, t1
);
1421 tcg_gen_extract_tl(t6
, t5
, 0, 16);
1422 tcg_gen_extract_tl(t7
, t5
, 16, 16);
1424 tcg_gen_sub_tl(t2
, t6
, t2
);
1425 tcg_gen_sub_tl(t3
, t7
, t3
);
1427 tcg_gen_add_tl(t2
, t6
, t2
);
1428 tcg_gen_add_tl(t3
, t7
, t3
);
1432 tcg_gen_deposit_tl(t0
, t0
, t1
, 16, 16);
1433 tcg_gen_deposit_tl(t1
, t2
, t3
, 16, 16);
1435 gen_store_mxu_gpr(t0
, XRd
);
1436 gen_store_mxu_gpr(t1
, XRa
);
1440 * Q8MADL XRd, XRa, XRb, XRc
1441 * Parallel quad unsigned 8 bit multiply and accumulate.
1442 * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
1444 static void gen_mxu_q8madl(DisasContext
*ctx
)
1446 TCGv t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
1447 uint32_t XRa
, XRb
, XRc
, XRd
, aptn2
;
1449 t0
= tcg_temp_new();
1450 t1
= tcg_temp_new();
1451 t2
= tcg_temp_new();
1452 t3
= tcg_temp_new();
1453 t4
= tcg_temp_new();
1454 t5
= tcg_temp_new();
1455 t6
= tcg_temp_new();
1456 t7
= tcg_temp_new();
1458 XRa
= extract32(ctx
->opcode
, 6, 4);
1459 XRb
= extract32(ctx
->opcode
, 10, 4);
1460 XRc
= extract32(ctx
->opcode
, 14, 4);
1461 XRd
= extract32(ctx
->opcode
, 18, 4);
1462 aptn2
= extract32(ctx
->opcode
, 24, 2);
1464 gen_load_mxu_gpr(t3
, XRb
);
1465 gen_load_mxu_gpr(t7
, XRc
);
1467 tcg_gen_extract_tl(t0
, t3
, 0, 8);
1468 tcg_gen_extract_tl(t1
, t3
, 8, 8);
1469 tcg_gen_extract_tl(t2
, t3
, 16, 8);
1470 tcg_gen_extract_tl(t3
, t3
, 24, 8);
1472 tcg_gen_extract_tl(t4
, t7
, 0, 8);
1473 tcg_gen_extract_tl(t5
, t7
, 8, 8);
1474 tcg_gen_extract_tl(t6
, t7
, 16, 8);
1475 tcg_gen_extract_tl(t7
, t7
, 24, 8);
1477 tcg_gen_mul_tl(t0
, t0
, t4
);
1478 tcg_gen_mul_tl(t1
, t1
, t5
);
1479 tcg_gen_mul_tl(t2
, t2
, t6
);
1480 tcg_gen_mul_tl(t3
, t3
, t7
);
1482 gen_load_mxu_gpr(t4
, XRa
);
1483 tcg_gen_extract_tl(t6
, t4
, 0, 8);
1484 tcg_gen_extract_tl(t7
, t4
, 8, 8);
1486 tcg_gen_sub_tl(t0
, t6
, t0
);
1487 tcg_gen_sub_tl(t1
, t7
, t1
);
1489 tcg_gen_add_tl(t0
, t6
, t0
);
1490 tcg_gen_add_tl(t1
, t7
, t1
);
1492 tcg_gen_extract_tl(t6
, t4
, 16, 8);
1493 tcg_gen_extract_tl(t7
, t4
, 24, 8);
1495 tcg_gen_sub_tl(t2
, t6
, t2
);
1496 tcg_gen_sub_tl(t3
, t7
, t3
);
1498 tcg_gen_add_tl(t2
, t6
, t2
);
1499 tcg_gen_add_tl(t3
, t7
, t3
);
1502 tcg_gen_andi_tl(t5
, t0
, 0xff);
1503 tcg_gen_deposit_tl(t5
, t5
, t1
, 8, 8);
1504 tcg_gen_deposit_tl(t5
, t5
, t2
, 16, 8);
1505 tcg_gen_deposit_tl(t5
, t5
, t3
, 24, 8);
1507 gen_store_mxu_gpr(t5
, XRd
);
1511 * S32LDD XRa, Rb, S12 - Load a word from memory to XRF
1512 * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1513 * in reversed byte seq.
1514 * S32LDI XRa, Rb, S12 - Load a word from memory to XRF,
1515 * post modify base address GPR.
1516 * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1517 * post modify base address GPR and load in reversed byte seq.
1519 static void gen_mxu_s32ldxx(DisasContext
*ctx
, bool reversed
, bool postinc
)
1522 uint32_t XRa
, Rb
, s12
;
1524 t0
= tcg_temp_new();
1525 t1
= tcg_temp_new();
1527 XRa
= extract32(ctx
->opcode
, 6, 4);
1528 s12
= sextract32(ctx
->opcode
, 10, 10);
1529 Rb
= extract32(ctx
->opcode
, 21, 5);
1531 gen_load_gpr(t0
, Rb
);
1532 tcg_gen_movi_tl(t1
, s12
* 4);
1533 tcg_gen_add_tl(t0
, t0
, t1
);
1535 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
,
1536 (MO_TESL
^ (reversed
? MO_BSWAP
: 0)) |
1537 ctx
->default_tcg_memop_mask
);
1538 gen_store_mxu_gpr(t1
, XRa
);
1541 gen_store_gpr(t0
, Rb
);
1546 * S32STD XRa, Rb, S12 - Store a word from XRF to memory
1547 * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1548 * in reversed byte seq.
1549 * S32SDI XRa, Rb, S12 - Store a word from XRF to memory,
1550 * post modify base address GPR.
1551 * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1552 * post modify base address GPR and store in reversed byte seq.
1554 static void gen_mxu_s32stxx(DisasContext
*ctx
, bool reversed
, bool postinc
)
1557 uint32_t XRa
, Rb
, s12
;
1559 t0
= tcg_temp_new();
1560 t1
= tcg_temp_new();
1562 XRa
= extract32(ctx
->opcode
, 6, 4);
1563 s12
= sextract32(ctx
->opcode
, 10, 10);
1564 Rb
= extract32(ctx
->opcode
, 21, 5);
1566 gen_load_gpr(t0
, Rb
);
1567 tcg_gen_movi_tl(t1
, s12
* 4);
1568 tcg_gen_add_tl(t0
, t0
, t1
);
1570 gen_load_mxu_gpr(t1
, XRa
);
1571 tcg_gen_qemu_st_tl(t1
, t0
, ctx
->mem_idx
,
1572 (MO_TESL
^ (reversed
? MO_BSWAP
: 0)) |
1573 ctx
->default_tcg_memop_mask
);
1576 gen_store_gpr(t0
, Rb
);
1581 * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1582 * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1583 * in reversed byte seq.
1584 * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1585 * post modify base address GPR.
1586 * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1587 * post modify base address GPR and load in reversed byte seq.
1589 static void gen_mxu_s32ldxvx(DisasContext
*ctx
, bool reversed
,
1590 bool postinc
, uint32_t strd2
)
1593 uint32_t XRa
, Rb
, Rc
;
1595 t0
= tcg_temp_new();
1596 t1
= tcg_temp_new();
1598 XRa
= extract32(ctx
->opcode
, 6, 4);
1599 Rc
= extract32(ctx
->opcode
, 16, 5);
1600 Rb
= extract32(ctx
->opcode
, 21, 5);
1602 gen_load_gpr(t0
, Rb
);
1603 gen_load_gpr(t1
, Rc
);
1604 tcg_gen_shli_tl(t1
, t1
, strd2
);
1605 tcg_gen_add_tl(t0
, t0
, t1
);
1607 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
,
1608 (MO_TESL
^ (reversed
? MO_BSWAP
: 0)) |
1609 ctx
->default_tcg_memop_mask
);
1610 gen_store_mxu_gpr(t1
, XRa
);
1613 gen_store_gpr(t0
, Rb
);
1618 * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1619 * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1620 * sign extending to GPR size.
1621 * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1622 * sign extending to GPR size.
1623 * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1624 * zero extending to GPR size.
1625 * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1626 * zero extending to GPR size.
1628 static void gen_mxu_lxx(DisasContext
*ctx
, uint32_t strd2
, MemOp mop
)
1631 uint32_t Ra
, Rb
, Rc
;
1633 t0
= tcg_temp_new();
1634 t1
= tcg_temp_new();
1636 Ra
= extract32(ctx
->opcode
, 11, 5);
1637 Rc
= extract32(ctx
->opcode
, 16, 5);
1638 Rb
= extract32(ctx
->opcode
, 21, 5);
1640 gen_load_gpr(t0
, Rb
);
1641 gen_load_gpr(t1
, Rc
);
1642 tcg_gen_shli_tl(t1
, t1
, strd2
);
1643 tcg_gen_add_tl(t0
, t0
, t1
);
1645 tcg_gen_qemu_ld_tl(t1
, t0
, ctx
->mem_idx
, mop
| ctx
->default_tcg_memop_mask
);
1646 gen_store_gpr(t1
, Ra
);
1650 * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1651 * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1652 * in reversed byte seq.
1653 * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1654 * post modify base address GPR.
1655 * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1656 * post modify base address GPR and store in reversed byte seq.
1658 static void gen_mxu_s32stxvx(DisasContext
*ctx
, bool reversed
,
1659 bool postinc
, uint32_t strd2
)
1662 uint32_t XRa
, Rb
, Rc
;
1664 t0
= tcg_temp_new();
1665 t1
= tcg_temp_new();
1667 XRa
= extract32(ctx
->opcode
, 6, 4);
1668 Rc
= extract32(ctx
->opcode
, 16, 5);
1669 Rb
= extract32(ctx
->opcode
, 21, 5);
1671 gen_load_gpr(t0
, Rb
);
1672 gen_load_gpr(t1
, Rc
);
1673 tcg_gen_shli_tl(t1
, t1
, strd2
);
1674 tcg_gen_add_tl(t0
, t0
, t1
);
1676 gen_load_mxu_gpr(t1
, XRa
);
1677 tcg_gen_qemu_st_tl(t1
, t0
, ctx
->mem_idx
,
1678 (MO_TESL
^ (reversed
? MO_BSWAP
: 0)) |
1679 ctx
->default_tcg_memop_mask
);
1682 gen_store_gpr(t0
, Rb
);
1687 * MXU instruction category: logic
1688 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1690 * S32NOR S32AND S32OR S32XOR
1694 * S32NOR XRa, XRb, XRc
1695 * Update XRa with the result of logical bitwise 'nor' operation
1696 * applied to the content of XRb and XRc.
1698 static void gen_mxu_S32NOR(DisasContext
*ctx
)
1700 uint32_t pad
, XRc
, XRb
, XRa
;
1702 pad
= extract32(ctx
->opcode
, 21, 5);
1703 XRc
= extract32(ctx
->opcode
, 14, 4);
1704 XRb
= extract32(ctx
->opcode
, 10, 4);
1705 XRa
= extract32(ctx
->opcode
, 6, 4);
1707 if (unlikely(pad
!= 0)) {
1708 /* opcode padding incorrect -> do nothing */
1709 } else if (unlikely(XRa
== 0)) {
1710 /* destination is zero register -> do nothing */
1711 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
1712 /* both operands zero registers -> just set destination to all 1s */
1713 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0xFFFFFFFF);
1714 } else if (unlikely(XRb
== 0)) {
1715 /* XRb zero register -> just set destination to the negation of XRc */
1716 tcg_gen_not_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1]);
1717 } else if (unlikely(XRc
== 0)) {
1718 /* XRa zero register -> just set destination to the negation of XRb */
1719 tcg_gen_not_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1720 } else if (unlikely(XRb
== XRc
)) {
1721 /* both operands same -> just set destination to the negation of XRb */
1722 tcg_gen_not_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1724 /* the most general case */
1725 tcg_gen_nor_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], mxu_gpr
[XRc
- 1]);
1730 * S32AND XRa, XRb, XRc
1731 * Update XRa with the result of logical bitwise 'and' operation
1732 * applied to the content of XRb and XRc.
1734 static void gen_mxu_S32AND(DisasContext
*ctx
)
1736 uint32_t pad
, XRc
, XRb
, XRa
;
1738 pad
= extract32(ctx
->opcode
, 21, 5);
1739 XRc
= extract32(ctx
->opcode
, 14, 4);
1740 XRb
= extract32(ctx
->opcode
, 10, 4);
1741 XRa
= extract32(ctx
->opcode
, 6, 4);
1743 if (unlikely(pad
!= 0)) {
1744 /* opcode padding incorrect -> do nothing */
1745 } else if (unlikely(XRa
== 0)) {
1746 /* destination is zero register -> do nothing */
1747 } else if (unlikely((XRb
== 0) || (XRc
== 0))) {
1748 /* one of operands zero register -> just set destination to all 0s */
1749 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
1750 } else if (unlikely(XRb
== XRc
)) {
1751 /* both operands same -> just set destination to one of them */
1752 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1754 /* the most general case */
1755 tcg_gen_and_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], mxu_gpr
[XRc
- 1]);
1760 * S32OR XRa, XRb, XRc
1761 * Update XRa with the result of logical bitwise 'or' operation
1762 * applied to the content of XRb and XRc.
1764 static void gen_mxu_S32OR(DisasContext
*ctx
)
1766 uint32_t pad
, XRc
, XRb
, XRa
;
1768 pad
= extract32(ctx
->opcode
, 21, 5);
1769 XRc
= extract32(ctx
->opcode
, 14, 4);
1770 XRb
= extract32(ctx
->opcode
, 10, 4);
1771 XRa
= extract32(ctx
->opcode
, 6, 4);
1773 if (unlikely(pad
!= 0)) {
1774 /* opcode padding incorrect -> do nothing */
1775 } else if (unlikely(XRa
== 0)) {
1776 /* destination is zero register -> do nothing */
1777 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
1778 /* both operands zero registers -> just set destination to all 0s */
1779 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
1780 } else if (unlikely(XRb
== 0)) {
1781 /* XRb zero register -> just set destination to the content of XRc */
1782 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1]);
1783 } else if (unlikely(XRc
== 0)) {
1784 /* XRc zero register -> just set destination to the content of XRb */
1785 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1786 } else if (unlikely(XRb
== XRc
)) {
1787 /* both operands same -> just set destination to one of them */
1788 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1790 /* the most general case */
1791 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], mxu_gpr
[XRc
- 1]);
1796 * S32XOR XRa, XRb, XRc
1797 * Update XRa with the result of logical bitwise 'xor' operation
1798 * applied to the content of XRb and XRc.
1800 static void gen_mxu_S32XOR(DisasContext
*ctx
)
1802 uint32_t pad
, XRc
, XRb
, XRa
;
1804 pad
= extract32(ctx
->opcode
, 21, 5);
1805 XRc
= extract32(ctx
->opcode
, 14, 4);
1806 XRb
= extract32(ctx
->opcode
, 10, 4);
1807 XRa
= extract32(ctx
->opcode
, 6, 4);
1809 if (unlikely(pad
!= 0)) {
1810 /* opcode padding incorrect -> do nothing */
1811 } else if (unlikely(XRa
== 0)) {
1812 /* destination is zero register -> do nothing */
1813 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
1814 /* both operands zero registers -> just set destination to all 0s */
1815 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
1816 } else if (unlikely(XRb
== 0)) {
1817 /* XRb zero register -> just set destination to the content of XRc */
1818 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1]);
1819 } else if (unlikely(XRc
== 0)) {
1820 /* XRc zero register -> just set destination to the content of XRb */
1821 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
1822 } else if (unlikely(XRb
== XRc
)) {
1823 /* both operands same -> just set destination to all 0s */
1824 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
1826 /* the most general case */
1827 tcg_gen_xor_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], mxu_gpr
[XRc
- 1]);
1832 * MXU instruction category: shift
1833 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1835 * D32SLL D32SLR D32SAR D32SARL
1836 * D32SLLV D32SLRV D32SARV D32SARW
1837 * Q16SLL Q16SLR Q16SAR
1838 * Q16SLLV Q16SLRV Q16SARV
1842 * D32SLL XRa, XRd, XRb, XRc, SFT4
1843 * Dual 32-bit shift left from XRb and XRc to SFT4
1844 * bits (0..15). Store to XRa and XRd respectively.
1845 * D32SLR XRa, XRd, XRb, XRc, SFT4
1846 * Dual 32-bit shift logic right from XRb and XRc
1847 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1848 * D32SAR XRa, XRd, XRb, XRc, SFT4
1849 * Dual 32-bit shift arithmetic right from XRb and XRc
1850 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1852 static void gen_mxu_d32sxx(DisasContext
*ctx
, bool right
, bool arithmetic
)
1854 uint32_t XRa
, XRb
, XRc
, XRd
, sft4
;
1856 XRa
= extract32(ctx
->opcode
, 6, 4);
1857 XRb
= extract32(ctx
->opcode
, 10, 4);
1858 XRc
= extract32(ctx
->opcode
, 14, 4);
1859 XRd
= extract32(ctx
->opcode
, 18, 4);
1860 sft4
= extract32(ctx
->opcode
, 22, 4);
1862 TCGv t0
= tcg_temp_new();
1863 TCGv t1
= tcg_temp_new();
1865 gen_load_mxu_gpr(t0
, XRb
);
1866 gen_load_mxu_gpr(t1
, XRc
);
1870 tcg_gen_sari_tl(t0
, t0
, sft4
);
1871 tcg_gen_sari_tl(t1
, t1
, sft4
);
1873 tcg_gen_shri_tl(t0
, t0
, sft4
);
1874 tcg_gen_shri_tl(t1
, t1
, sft4
);
1877 tcg_gen_shli_tl(t0
, t0
, sft4
);
1878 tcg_gen_shli_tl(t1
, t1
, sft4
);
1880 gen_store_mxu_gpr(t0
, XRa
);
1881 gen_store_mxu_gpr(t1
, XRd
);
1885 * D32SLLV XRa, XRd, rs
1886 * Dual 32-bit shift left from XRa and XRd to rs[3:0]
1887 * bits. Store back to XRa and XRd respectively.
1888 * D32SLRV XRa, XRd, rs
1889 * Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1890 * bits. Store back to XRa and XRd respectively.
1891 * D32SARV XRa, XRd, rs
1892 * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1893 * bits. Store back to XRa and XRd respectively.
1895 static void gen_mxu_d32sxxv(DisasContext
*ctx
, bool right
, bool arithmetic
)
1897 uint32_t XRa
, XRd
, rs
;
1899 XRa
= extract32(ctx
->opcode
, 10, 4);
1900 XRd
= extract32(ctx
->opcode
, 14, 4);
1901 rs
= extract32(ctx
->opcode
, 21, 5);
1903 TCGv t0
= tcg_temp_new();
1904 TCGv t1
= tcg_temp_new();
1905 TCGv t2
= tcg_temp_new();
1907 gen_load_mxu_gpr(t0
, XRa
);
1908 gen_load_mxu_gpr(t1
, XRd
);
1909 gen_load_gpr(t2
, rs
);
1910 tcg_gen_andi_tl(t2
, t2
, 0x0f);
1914 tcg_gen_sar_tl(t0
, t0
, t2
);
1915 tcg_gen_sar_tl(t1
, t1
, t2
);
1917 tcg_gen_shr_tl(t0
, t0
, t2
);
1918 tcg_gen_shr_tl(t1
, t1
, t2
);
1921 tcg_gen_shl_tl(t0
, t0
, t2
);
1922 tcg_gen_shl_tl(t1
, t1
, t2
);
1924 gen_store_mxu_gpr(t0
, XRa
);
1925 gen_store_mxu_gpr(t1
, XRd
);
1929 * D32SARL XRa, XRb, XRc, SFT4
1930 * Dual shift arithmetic right 32-bit integers in XRb and XRc
1931 * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1933 * D32SARW XRa, XRb, XRc, rb
1934 * Dual shift arithmetic right 32-bit integers in XRb and XRc
1935 * to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1937 static void gen_mxu_d32sarl(DisasContext
*ctx
, bool sarw
)
1939 uint32_t XRa
, XRb
, XRc
, rb
;
1941 XRa
= extract32(ctx
->opcode
, 6, 4);
1942 XRb
= extract32(ctx
->opcode
, 10, 4);
1943 XRc
= extract32(ctx
->opcode
, 14, 4);
1944 rb
= extract32(ctx
->opcode
, 21, 5);
1946 if (unlikely(XRa
== 0)) {
1947 /* destination is zero register -> do nothing */
1949 TCGv t0
= tcg_temp_new();
1950 TCGv t1
= tcg_temp_new();
1951 TCGv t2
= tcg_temp_new();
1954 /* Make SFT4 from rb field */
1955 tcg_gen_movi_tl(t2
, rb
>> 1);
1957 gen_load_gpr(t2
, rb
);
1958 tcg_gen_andi_tl(t2
, t2
, 0x0f);
1960 gen_load_mxu_gpr(t0
, XRb
);
1961 gen_load_mxu_gpr(t1
, XRc
);
1962 tcg_gen_sar_tl(t0
, t0
, t2
);
1963 tcg_gen_sar_tl(t1
, t1
, t2
);
1964 tcg_gen_extract_tl(t2
, t1
, 0, 16);
1965 tcg_gen_deposit_tl(t2
, t2
, t0
, 16, 16);
1966 gen_store_mxu_gpr(t2
, XRa
);
1971 * Q16SLL XRa, XRd, XRb, XRc, SFT4
1972 * Quad 16-bit shift left from XRb and XRc to SFT4
1973 * bits (0..15). Store to XRa and XRd respectively.
1974 * Q16SLR XRa, XRd, XRb, XRc, SFT4
1975 * Quad 16-bit shift logic right from XRb and XRc
1976 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1977 * Q16SAR XRa, XRd, XRb, XRc, SFT4
1978 * Quad 16-bit shift arithmetic right from XRb and XRc
1979 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1981 static void gen_mxu_q16sxx(DisasContext
*ctx
, bool right
, bool arithmetic
)
1983 uint32_t XRa
, XRb
, XRc
, XRd
, sft4
;
1985 XRa
= extract32(ctx
->opcode
, 6, 4);
1986 XRb
= extract32(ctx
->opcode
, 10, 4);
1987 XRc
= extract32(ctx
->opcode
, 14, 4);
1988 XRd
= extract32(ctx
->opcode
, 18, 4);
1989 sft4
= extract32(ctx
->opcode
, 22, 4);
1991 TCGv t0
= tcg_temp_new();
1992 TCGv t1
= tcg_temp_new();
1993 TCGv t2
= tcg_temp_new();
1994 TCGv t3
= tcg_temp_new();
1996 gen_load_mxu_gpr(t0
, XRb
);
1997 gen_load_mxu_gpr(t2
, XRc
);
2000 tcg_gen_sextract_tl(t1
, t0
, 16, 16);
2001 tcg_gen_sextract_tl(t0
, t0
, 0, 16);
2002 tcg_gen_sextract_tl(t3
, t2
, 16, 16);
2003 tcg_gen_sextract_tl(t2
, t2
, 0, 16);
2005 tcg_gen_extract_tl(t1
, t0
, 16, 16);
2006 tcg_gen_extract_tl(t0
, t0
, 0, 16);
2007 tcg_gen_extract_tl(t3
, t2
, 16, 16);
2008 tcg_gen_extract_tl(t2
, t2
, 0, 16);
2013 tcg_gen_sari_tl(t0
, t0
, sft4
);
2014 tcg_gen_sari_tl(t1
, t1
, sft4
);
2015 tcg_gen_sari_tl(t2
, t2
, sft4
);
2016 tcg_gen_sari_tl(t3
, t3
, sft4
);
2018 tcg_gen_shri_tl(t0
, t0
, sft4
);
2019 tcg_gen_shri_tl(t1
, t1
, sft4
);
2020 tcg_gen_shri_tl(t2
, t2
, sft4
);
2021 tcg_gen_shri_tl(t3
, t3
, sft4
);
2024 tcg_gen_shli_tl(t0
, t0
, sft4
);
2025 tcg_gen_shli_tl(t1
, t1
, sft4
);
2026 tcg_gen_shli_tl(t2
, t2
, sft4
);
2027 tcg_gen_shli_tl(t3
, t3
, sft4
);
2029 tcg_gen_deposit_tl(t0
, t0
, t1
, 16, 16);
2030 tcg_gen_deposit_tl(t2
, t2
, t3
, 16, 16);
2032 gen_store_mxu_gpr(t0
, XRa
);
2033 gen_store_mxu_gpr(t2
, XRd
);
2037 * Q16SLLV XRa, XRd, rs
2038 * Quad 16-bit shift left from XRa and XRd to rs[3:0]
2039 * bits. Store to XRa and XRd respectively.
2040 * Q16SLRV XRa, XRd, rs
2041 * Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
2042 * bits. Store to XRa and XRd respectively.
2043 * Q16SARV XRa, XRd, rs
2044 * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
2045 * bits. Store to XRa and XRd respectively.
2047 static void gen_mxu_q16sxxv(DisasContext
*ctx
, bool right
, bool arithmetic
)
2049 uint32_t XRa
, XRd
, rs
;
2051 XRa
= extract32(ctx
->opcode
, 10, 4);
2052 XRd
= extract32(ctx
->opcode
, 14, 4);
2053 rs
= extract32(ctx
->opcode
, 21, 5);
2055 TCGv t0
= tcg_temp_new();
2056 TCGv t1
= tcg_temp_new();
2057 TCGv t2
= tcg_temp_new();
2058 TCGv t3
= tcg_temp_new();
2059 TCGv t5
= tcg_temp_new();
2061 gen_load_mxu_gpr(t0
, XRa
);
2062 gen_load_mxu_gpr(t2
, XRd
);
2063 gen_load_gpr(t5
, rs
);
2064 tcg_gen_andi_tl(t5
, t5
, 0x0f);
2068 tcg_gen_sextract_tl(t1
, t0
, 16, 16);
2069 tcg_gen_sextract_tl(t0
, t0
, 0, 16);
2070 tcg_gen_sextract_tl(t3
, t2
, 16, 16);
2071 tcg_gen_sextract_tl(t2
, t2
, 0, 16);
2073 tcg_gen_extract_tl(t1
, t0
, 16, 16);
2074 tcg_gen_extract_tl(t0
, t0
, 0, 16);
2075 tcg_gen_extract_tl(t3
, t2
, 16, 16);
2076 tcg_gen_extract_tl(t2
, t2
, 0, 16);
2081 tcg_gen_sar_tl(t0
, t0
, t5
);
2082 tcg_gen_sar_tl(t1
, t1
, t5
);
2083 tcg_gen_sar_tl(t2
, t2
, t5
);
2084 tcg_gen_sar_tl(t3
, t3
, t5
);
2086 tcg_gen_shr_tl(t0
, t0
, t5
);
2087 tcg_gen_shr_tl(t1
, t1
, t5
);
2088 tcg_gen_shr_tl(t2
, t2
, t5
);
2089 tcg_gen_shr_tl(t3
, t3
, t5
);
2092 tcg_gen_shl_tl(t0
, t0
, t5
);
2093 tcg_gen_shl_tl(t1
, t1
, t5
);
2094 tcg_gen_shl_tl(t2
, t2
, t5
);
2095 tcg_gen_shl_tl(t3
, t3
, t5
);
2097 tcg_gen_deposit_tl(t0
, t0
, t1
, 16, 16);
2098 tcg_gen_deposit_tl(t2
, t2
, t3
, 16, 16);
2100 gen_store_mxu_gpr(t0
, XRa
);
2101 gen_store_mxu_gpr(t2
, XRd
);
2105 * MXU instruction category max/min/avg
2106 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2108 * S32MAX D16MAX Q8MAX
2109 * S32MIN D16MIN Q8MIN
2110 * S32SLT D16SLT Q8SLT
2114 * S32MOVZ D16MOVZ Q8MOVZ
2115 * S32MOVN D16MOVN Q8MOVN
2119 * S32MAX XRa, XRb, XRc
2120 * Update XRa with the maximum of signed 32-bit integers contained
2123 * S32MIN XRa, XRb, XRc
2124 * Update XRa with the minimum of signed 32-bit integers contained
2127 static void gen_mxu_S32MAX_S32MIN(DisasContext
*ctx
)
2129 uint32_t pad
, opc
, XRc
, XRb
, XRa
;
2131 pad
= extract32(ctx
->opcode
, 21, 5);
2132 opc
= extract32(ctx
->opcode
, 18, 3);
2133 XRc
= extract32(ctx
->opcode
, 14, 4);
2134 XRb
= extract32(ctx
->opcode
, 10, 4);
2135 XRa
= extract32(ctx
->opcode
, 6, 4);
2137 if (unlikely(pad
!= 0)) {
2138 /* opcode padding incorrect -> do nothing */
2139 } else if (unlikely(XRa
== 0)) {
2140 /* destination is zero register -> do nothing */
2141 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2142 /* both operands zero registers -> just set destination to zero */
2143 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
2144 } else if (unlikely((XRb
== 0) || (XRc
== 0))) {
2145 /* exactly one operand is zero register - find which one is not...*/
2146 uint32_t XRx
= XRb
? XRb
: XRc
;
2147 /* ...and do max/min operation with one operand 0 */
2148 if (opc
== OPC_MXU_S32MAX
) {
2149 tcg_gen_smax_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRx
- 1], 0);
2151 tcg_gen_smin_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRx
- 1], 0);
2153 } else if (unlikely(XRb
== XRc
)) {
2154 /* both operands same -> just set destination to one of them */
2155 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2157 /* the most general case */
2158 if (opc
== OPC_MXU_S32MAX
) {
2159 tcg_gen_smax_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1],
2162 tcg_gen_smin_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1],
2170 * Update XRa with the 16-bit-wise maximums of signed integers
2171 * contained in XRb and XRc.
2174 * Update XRa with the 16-bit-wise minimums of signed integers
2175 * contained in XRb and XRc.
2177 static void gen_mxu_D16MAX_D16MIN(DisasContext
*ctx
)
2179 uint32_t pad
, opc
, XRc
, XRb
, XRa
;
2181 pad
= extract32(ctx
->opcode
, 21, 5);
2182 opc
= extract32(ctx
->opcode
, 18, 3);
2183 XRc
= extract32(ctx
->opcode
, 14, 4);
2184 XRb
= extract32(ctx
->opcode
, 10, 4);
2185 XRa
= extract32(ctx
->opcode
, 6, 4);
2187 if (unlikely(pad
!= 0)) {
2188 /* opcode padding incorrect -> do nothing */
2189 } else if (unlikely(XRa
== 0)) {
2190 /* destination is zero register -> do nothing */
2191 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2192 /* both operands zero registers -> just set destination to zero */
2193 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
2194 } else if (unlikely((XRb
== 0) || (XRc
== 0))) {
2195 /* exactly one operand is zero register - find which one is not...*/
2196 uint32_t XRx
= XRb
? XRb
: XRc
;
2197 /* ...and do half-word-wise max/min with one operand 0 */
2198 TCGv_i32 t0
= tcg_temp_new();
2199 TCGv_i32 t1
= tcg_constant_i32(0);
2200 TCGv_i32 t2
= tcg_temp_new();
2202 /* the left half-word first */
2203 tcg_gen_andi_i32(t0
, mxu_gpr
[XRx
- 1], 0xFFFF0000);
2204 if (opc
== OPC_MXU_D16MAX
) {
2205 tcg_gen_smax_i32(t2
, t0
, t1
);
2207 tcg_gen_smin_i32(t2
, t0
, t1
);
2210 /* the right half-word */
2211 tcg_gen_andi_i32(t0
, mxu_gpr
[XRx
- 1], 0x0000FFFF);
2212 /* move half-words to the leftmost position */
2213 tcg_gen_shli_i32(t0
, t0
, 16);
2214 /* t0 will be max/min of t0 and t1 */
2215 if (opc
== OPC_MXU_D16MAX
) {
2216 tcg_gen_smax_i32(t0
, t0
, t1
);
2218 tcg_gen_smin_i32(t0
, t0
, t1
);
2220 /* return resulting half-words to its original position */
2221 tcg_gen_shri_i32(t0
, t0
, 16);
2222 /* finally update the destination */
2223 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], t2
, t0
);
2224 } else if (unlikely(XRb
== XRc
)) {
2225 /* both operands same -> just set destination to one of them */
2226 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2228 /* the most general case */
2229 TCGv_i32 t0
= tcg_temp_new();
2230 TCGv_i32 t1
= tcg_temp_new();
2231 TCGv_i32 t2
= tcg_temp_new();
2233 /* the left half-word first */
2234 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0xFFFF0000);
2235 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFFFF0000);
2236 if (opc
== OPC_MXU_D16MAX
) {
2237 tcg_gen_smax_i32(t2
, t0
, t1
);
2239 tcg_gen_smin_i32(t2
, t0
, t1
);
2242 /* the right half-word */
2243 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0x0000FFFF);
2244 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0x0000FFFF);
2245 /* move half-words to the leftmost position */
2246 tcg_gen_shli_i32(t0
, t0
, 16);
2247 tcg_gen_shli_i32(t1
, t1
, 16);
2248 /* t0 will be max/min of t0 and t1 */
2249 if (opc
== OPC_MXU_D16MAX
) {
2250 tcg_gen_smax_i32(t0
, t0
, t1
);
2252 tcg_gen_smin_i32(t0
, t0
, t1
);
2254 /* return resulting half-words to its original position */
2255 tcg_gen_shri_i32(t0
, t0
, 16);
2256 /* finally update the destination */
2257 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], t2
, t0
);
2263 * Update XRa with the 8-bit-wise maximums of signed integers
2264 * contained in XRb and XRc.
2267 * Update XRa with the 8-bit-wise minimums of signed integers
2268 * contained in XRb and XRc.
2270 static void gen_mxu_Q8MAX_Q8MIN(DisasContext
*ctx
)
2272 uint32_t pad
, opc
, XRc
, XRb
, XRa
;
2274 pad
= extract32(ctx
->opcode
, 21, 5);
2275 opc
= extract32(ctx
->opcode
, 18, 3);
2276 XRc
= extract32(ctx
->opcode
, 14, 4);
2277 XRb
= extract32(ctx
->opcode
, 10, 4);
2278 XRa
= extract32(ctx
->opcode
, 6, 4);
2280 if (unlikely(pad
!= 0)) {
2281 /* opcode padding incorrect -> do nothing */
2282 } else if (unlikely(XRa
== 0)) {
2283 /* destination is zero register -> do nothing */
2284 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2285 /* both operands zero registers -> just set destination to zero */
2286 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
2287 } else if (unlikely((XRb
== 0) || (XRc
== 0))) {
2288 /* exactly one operand is zero register - make it be the first...*/
2289 uint32_t XRx
= XRb
? XRb
: XRc
;
2290 /* ...and do byte-wise max/min with one operand 0 */
2291 TCGv_i32 t0
= tcg_temp_new();
2292 TCGv_i32 t1
= tcg_constant_i32(0);
2293 TCGv_i32 t2
= tcg_temp_new();
2296 /* the leftmost byte (byte 3) first */
2297 tcg_gen_andi_i32(t0
, mxu_gpr
[XRx
- 1], 0xFF000000);
2298 if (opc
== OPC_MXU_Q8MAX
) {
2299 tcg_gen_smax_i32(t2
, t0
, t1
);
2301 tcg_gen_smin_i32(t2
, t0
, t1
);
2305 for (i
= 2; i
>= 0; i
--) {
2306 /* extract the byte */
2307 tcg_gen_andi_i32(t0
, mxu_gpr
[XRx
- 1], 0xFF << (8 * i
));
2308 /* move the byte to the leftmost position */
2309 tcg_gen_shli_i32(t0
, t0
, 8 * (3 - i
));
2310 /* t0 will be max/min of t0 and t1 */
2311 if (opc
== OPC_MXU_Q8MAX
) {
2312 tcg_gen_smax_i32(t0
, t0
, t1
);
2314 tcg_gen_smin_i32(t0
, t0
, t1
);
2316 /* return resulting byte to its original position */
2317 tcg_gen_shri_i32(t0
, t0
, 8 * (3 - i
));
2318 /* finally update the destination */
2319 tcg_gen_or_i32(t2
, t2
, t0
);
2321 gen_store_mxu_gpr(t2
, XRa
);
2322 } else if (unlikely(XRb
== XRc
)) {
2323 /* both operands same -> just set destination to one of them */
2324 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2326 /* the most general case */
2327 TCGv_i32 t0
= tcg_temp_new();
2328 TCGv_i32 t1
= tcg_temp_new();
2329 TCGv_i32 t2
= tcg_temp_new();
2332 /* the leftmost bytes (bytes 3) first */
2333 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0xFF000000);
2334 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFF000000);
2335 if (opc
== OPC_MXU_Q8MAX
) {
2336 tcg_gen_smax_i32(t2
, t0
, t1
);
2338 tcg_gen_smin_i32(t2
, t0
, t1
);
2342 for (i
= 2; i
>= 0; i
--) {
2343 /* extract corresponding bytes */
2344 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0xFF << (8 * i
));
2345 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFF << (8 * i
));
2346 /* move the bytes to the leftmost position */
2347 tcg_gen_shli_i32(t0
, t0
, 8 * (3 - i
));
2348 tcg_gen_shli_i32(t1
, t1
, 8 * (3 - i
));
2349 /* t0 will be max/min of t0 and t1 */
2350 if (opc
== OPC_MXU_Q8MAX
) {
2351 tcg_gen_smax_i32(t0
, t0
, t1
);
2353 tcg_gen_smin_i32(t0
, t0
, t1
);
2355 /* return resulting byte to its original position */
2356 tcg_gen_shri_i32(t0
, t0
, 8 * (3 - i
));
2357 /* finally update the destination */
2358 tcg_gen_or_i32(t2
, t2
, t0
);
2360 gen_store_mxu_gpr(t2
, XRa
);
2366 * Update XRa with the signed "set less than" comparison of XRb and XRc
2367 * on per-byte basis.
2368 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2371 * Update XRa with the unsigned "set less than" comparison of XRb and XRc
2372 * on per-byte basis.
2373 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2375 static void gen_mxu_q8slt(DisasContext
*ctx
, bool sltu
)
2377 uint32_t pad
, XRc
, XRb
, XRa
;
2379 pad
= extract32(ctx
->opcode
, 21, 5);
2380 XRc
= extract32(ctx
->opcode
, 14, 4);
2381 XRb
= extract32(ctx
->opcode
, 10, 4);
2382 XRa
= extract32(ctx
->opcode
, 6, 4);
2384 if (unlikely(pad
!= 0)) {
2385 /* opcode padding incorrect -> do nothing */
2386 } else if (unlikely(XRa
== 0)) {
2387 /* destination is zero register -> do nothing */
2388 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2389 /* both operands zero registers -> just set destination to zero */
2390 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2391 } else if (unlikely(XRb
== XRc
)) {
2392 /* both operands same registers -> just set destination to zero */
2393 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2395 /* the most general case */
2396 TCGv t0
= tcg_temp_new();
2397 TCGv t1
= tcg_temp_new();
2398 TCGv t2
= tcg_temp_new();
2399 TCGv t3
= tcg_temp_new();
2400 TCGv t4
= tcg_temp_new();
2402 gen_load_mxu_gpr(t3
, XRb
);
2403 gen_load_mxu_gpr(t4
, XRc
);
2404 tcg_gen_movi_tl(t2
, 0);
2406 for (int i
= 0; i
< 4; i
++) {
2408 tcg_gen_extract_tl(t0
, t3
, 8 * i
, 8);
2409 tcg_gen_extract_tl(t1
, t4
, 8 * i
, 8);
2411 tcg_gen_sextract_tl(t0
, t3
, 8 * i
, 8);
2412 tcg_gen_sextract_tl(t1
, t4
, 8 * i
, 8);
2414 tcg_gen_setcond_tl(TCG_COND_LT
, t0
, t0
, t1
);
2415 tcg_gen_deposit_tl(t2
, t2
, t0
, 8 * i
, 8);
2417 gen_store_mxu_gpr(t2
, XRa
);
2423 * Update XRa with the signed "set less than" comparison of XRb and XRc.
2424 * a.k.a. XRa = XRb < XRc ? 1 : 0;
2426 static void gen_mxu_S32SLT(DisasContext
*ctx
)
2428 uint32_t pad
, XRc
, XRb
, XRa
;
2430 pad
= extract32(ctx
->opcode
, 21, 5);
2431 XRc
= extract32(ctx
->opcode
, 14, 4);
2432 XRb
= extract32(ctx
->opcode
, 10, 4);
2433 XRa
= extract32(ctx
->opcode
, 6, 4);
2435 if (unlikely(pad
!= 0)) {
2436 /* opcode padding incorrect -> do nothing */
2437 } else if (unlikely(XRa
== 0)) {
2438 /* destination is zero register -> do nothing */
2439 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2440 /* both operands zero registers -> just set destination to zero */
2441 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2442 } else if (unlikely(XRb
== XRc
)) {
2443 /* both operands same registers -> just set destination to zero */
2444 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2446 /* the most general case */
2447 TCGv t0
= tcg_temp_new();
2448 TCGv t1
= tcg_temp_new();
2450 gen_load_mxu_gpr(t0
, XRb
);
2451 gen_load_mxu_gpr(t1
, XRc
);
2452 tcg_gen_setcond_tl(TCG_COND_LT
, mxu_gpr
[XRa
- 1], t0
, t1
);
2458 * Update XRa with the signed "set less than" comparison of XRb and XRc
2459 * on per-word basis.
2460 * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2462 static void gen_mxu_D16SLT(DisasContext
*ctx
)
2464 uint32_t pad
, XRc
, XRb
, XRa
;
2466 pad
= extract32(ctx
->opcode
, 21, 5);
2467 XRc
= extract32(ctx
->opcode
, 14, 4);
2468 XRb
= extract32(ctx
->opcode
, 10, 4);
2469 XRa
= extract32(ctx
->opcode
, 6, 4);
2471 if (unlikely(pad
!= 0)) {
2472 /* opcode padding incorrect -> do nothing */
2473 } else if (unlikely(XRa
== 0)) {
2474 /* destination is zero register -> do nothing */
2475 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2476 /* both operands zero registers -> just set destination to zero */
2477 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2478 } else if (unlikely(XRb
== XRc
)) {
2479 /* both operands same registers -> just set destination to zero */
2480 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2482 /* the most general case */
2483 TCGv t0
= tcg_temp_new();
2484 TCGv t1
= tcg_temp_new();
2485 TCGv t2
= tcg_temp_new();
2486 TCGv t3
= tcg_temp_new();
2487 TCGv t4
= tcg_temp_new();
2489 gen_load_mxu_gpr(t3
, XRb
);
2490 gen_load_mxu_gpr(t4
, XRc
);
2491 tcg_gen_sextract_tl(t0
, t3
, 16, 16);
2492 tcg_gen_sextract_tl(t1
, t4
, 16, 16);
2493 tcg_gen_setcond_tl(TCG_COND_LT
, t0
, t0
, t1
);
2494 tcg_gen_shli_tl(t2
, t0
, 16);
2495 tcg_gen_sextract_tl(t0
, t3
, 0, 16);
2496 tcg_gen_sextract_tl(t1
, t4
, 0, 16);
2497 tcg_gen_setcond_tl(TCG_COND_LT
, t0
, t0
, t1
);
2498 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], t2
, t0
);
2504 * Update XRa with the signed average of XRb and XRc
2505 * on per-word basis, rounding down.
2506 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2509 * Update XRa with the signed average of XRb and XRc
2510 * on per-word basis, math rounding 4/5.
2511 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2513 static void gen_mxu_d16avg(DisasContext
*ctx
, bool round45
)
2515 uint32_t pad
, XRc
, XRb
, XRa
;
2517 pad
= extract32(ctx
->opcode
, 21, 5);
2518 XRc
= extract32(ctx
->opcode
, 14, 4);
2519 XRb
= extract32(ctx
->opcode
, 10, 4);
2520 XRa
= extract32(ctx
->opcode
, 6, 4);
2522 if (unlikely(pad
!= 0)) {
2523 /* opcode padding incorrect -> do nothing */
2524 } else if (unlikely(XRa
== 0)) {
2525 /* destination is zero register -> do nothing */
2526 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2527 /* both operands zero registers -> just set destination to zero */
2528 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2529 } else if (unlikely(XRb
== XRc
)) {
2530 /* both operands same registers -> just set destination to same */
2531 tcg_gen_mov_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2533 /* the most general case */
2534 TCGv t0
= tcg_temp_new();
2535 TCGv t1
= tcg_temp_new();
2536 TCGv t2
= tcg_temp_new();
2537 TCGv t3
= tcg_temp_new();
2538 TCGv t4
= tcg_temp_new();
2540 gen_load_mxu_gpr(t3
, XRb
);
2541 gen_load_mxu_gpr(t4
, XRc
);
2542 tcg_gen_sextract_tl(t0
, t3
, 16, 16);
2543 tcg_gen_sextract_tl(t1
, t4
, 16, 16);
2544 tcg_gen_add_tl(t0
, t0
, t1
);
2546 tcg_gen_addi_tl(t0
, t0
, 1);
2548 tcg_gen_shli_tl(t2
, t0
, 15);
2549 tcg_gen_andi_tl(t2
, t2
, 0xffff0000);
2550 tcg_gen_sextract_tl(t0
, t3
, 0, 16);
2551 tcg_gen_sextract_tl(t1
, t4
, 0, 16);
2552 tcg_gen_add_tl(t0
, t0
, t1
);
2554 tcg_gen_addi_tl(t0
, t0
, 1);
2556 tcg_gen_shri_tl(t0
, t0
, 1);
2557 tcg_gen_deposit_tl(t2
, t2
, t0
, 0, 16);
2558 gen_store_mxu_gpr(t2
, XRa
);
2564 * Update XRa with the signed average of XRb and XRc
2565 * on per-byte basis, rounding down.
2566 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2569 * Update XRa with the signed average of XRb and XRc
2570 * on per-word basis, math rounding 4/5.
2571 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2573 static void gen_mxu_q8avg(DisasContext
*ctx
, bool round45
)
2575 uint32_t pad
, XRc
, XRb
, XRa
;
2577 pad
= extract32(ctx
->opcode
, 21, 5);
2578 XRc
= extract32(ctx
->opcode
, 14, 4);
2579 XRb
= extract32(ctx
->opcode
, 10, 4);
2580 XRa
= extract32(ctx
->opcode
, 6, 4);
2582 if (unlikely(pad
!= 0)) {
2583 /* opcode padding incorrect -> do nothing */
2584 } else if (unlikely(XRa
== 0)) {
2585 /* destination is zero register -> do nothing */
2586 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2587 /* both operands zero registers -> just set destination to zero */
2588 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2589 } else if (unlikely(XRb
== XRc
)) {
2590 /* both operands same registers -> just set destination to same */
2591 tcg_gen_mov_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2593 /* the most general case */
2594 TCGv t0
= tcg_temp_new();
2595 TCGv t1
= tcg_temp_new();
2596 TCGv t2
= tcg_temp_new();
2597 TCGv t3
= tcg_temp_new();
2598 TCGv t4
= tcg_temp_new();
2600 gen_load_mxu_gpr(t3
, XRb
);
2601 gen_load_mxu_gpr(t4
, XRc
);
2602 tcg_gen_movi_tl(t2
, 0);
2604 for (int i
= 0; i
< 4; i
++) {
2605 tcg_gen_extract_tl(t0
, t3
, 8 * i
, 8);
2606 tcg_gen_extract_tl(t1
, t4
, 8 * i
, 8);
2607 tcg_gen_add_tl(t0
, t0
, t1
);
2609 tcg_gen_addi_tl(t0
, t0
, 1);
2611 tcg_gen_shri_tl(t0
, t0
, 1);
2612 tcg_gen_deposit_tl(t2
, t2
, t0
, 8 * i
, 8);
2614 gen_store_mxu_gpr(t2
, XRa
);
2620 * Quadruple 8-bit packed conditional move where
2621 * XRb contains conditions, XRc what to move and
2622 * XRa is the destination.
2623 * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2626 * Quadruple 8-bit packed conditional move where
2627 * XRb contains conditions, XRc what to move and
2628 * XRa is the destination.
2629 * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2631 static void gen_mxu_q8movzn(DisasContext
*ctx
, TCGCond cond
)
2633 uint32_t XRc
, XRb
, XRa
;
2635 XRa
= extract32(ctx
->opcode
, 6, 4);
2636 XRb
= extract32(ctx
->opcode
, 10, 4);
2637 XRc
= extract32(ctx
->opcode
, 14, 4);
2639 TCGv t0
= tcg_temp_new();
2640 TCGv t1
= tcg_temp_new();
2641 TCGv t2
= tcg_temp_new();
2642 TCGv t3
= tcg_temp_new();
2643 TCGLabel
*l_quarterdone
= gen_new_label();
2644 TCGLabel
*l_halfdone
= gen_new_label();
2645 TCGLabel
*l_quarterrest
= gen_new_label();
2646 TCGLabel
*l_done
= gen_new_label();
2648 gen_load_mxu_gpr(t0
, XRc
);
2649 gen_load_mxu_gpr(t1
, XRb
);
2650 gen_load_mxu_gpr(t2
, XRa
);
2652 tcg_gen_extract_tl(t3
, t1
, 24, 8);
2653 tcg_gen_brcondi_tl(cond
, t3
, 0, l_quarterdone
);
2654 tcg_gen_extract_tl(t3
, t0
, 24, 8);
2655 tcg_gen_deposit_tl(t2
, t2
, t3
, 24, 8);
2657 gen_set_label(l_quarterdone
);
2658 tcg_gen_extract_tl(t3
, t1
, 16, 8);
2659 tcg_gen_brcondi_tl(cond
, t3
, 0, l_halfdone
);
2660 tcg_gen_extract_tl(t3
, t0
, 16, 8);
2661 tcg_gen_deposit_tl(t2
, t2
, t3
, 16, 8);
2663 gen_set_label(l_halfdone
);
2664 tcg_gen_extract_tl(t3
, t1
, 8, 8);
2665 tcg_gen_brcondi_tl(cond
, t3
, 0, l_quarterrest
);
2666 tcg_gen_extract_tl(t3
, t0
, 8, 8);
2667 tcg_gen_deposit_tl(t2
, t2
, t3
, 8, 8);
2669 gen_set_label(l_quarterrest
);
2670 tcg_gen_extract_tl(t3
, t1
, 0, 8);
2671 tcg_gen_brcondi_tl(cond
, t3
, 0, l_done
);
2672 tcg_gen_extract_tl(t3
, t0
, 0, 8);
2673 tcg_gen_deposit_tl(t2
, t2
, t3
, 0, 8);
2675 gen_set_label(l_done
);
2676 gen_store_mxu_gpr(t2
, XRa
);
2681 * Double 16-bit packed conditional move where
2682 * XRb contains conditions, XRc what to move and
2683 * XRa is the destination.
2684 * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2687 * Double 16-bit packed conditional move where
2688 * XRb contains conditions, XRc what to move and
2689 * XRa is the destination.
2690 * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2692 static void gen_mxu_d16movzn(DisasContext
*ctx
, TCGCond cond
)
2694 uint32_t XRc
, XRb
, XRa
;
2696 XRa
= extract32(ctx
->opcode
, 6, 4);
2697 XRb
= extract32(ctx
->opcode
, 10, 4);
2698 XRc
= extract32(ctx
->opcode
, 14, 4);
2700 TCGv t0
= tcg_temp_new();
2701 TCGv t1
= tcg_temp_new();
2702 TCGv t2
= tcg_temp_new();
2703 TCGv t3
= tcg_temp_new();
2704 TCGLabel
*l_halfdone
= gen_new_label();
2705 TCGLabel
*l_done
= gen_new_label();
2707 gen_load_mxu_gpr(t0
, XRc
);
2708 gen_load_mxu_gpr(t1
, XRb
);
2709 gen_load_mxu_gpr(t2
, XRa
);
2711 tcg_gen_extract_tl(t3
, t1
, 16, 16);
2712 tcg_gen_brcondi_tl(cond
, t3
, 0, l_halfdone
);
2713 tcg_gen_extract_tl(t3
, t0
, 16, 16);
2714 tcg_gen_deposit_tl(t2
, t2
, t3
, 16, 16);
2716 gen_set_label(l_halfdone
);
2717 tcg_gen_extract_tl(t3
, t1
, 0, 16);
2718 tcg_gen_brcondi_tl(cond
, t3
, 0, l_done
);
2719 tcg_gen_extract_tl(t3
, t0
, 0, 16);
2720 tcg_gen_deposit_tl(t2
, t2
, t3
, 0, 16);
2722 gen_set_label(l_done
);
2723 gen_store_mxu_gpr(t2
, XRa
);
2728 * Quadruple 32-bit conditional move where
2729 * XRb contains conditions, XRc what to move and
2730 * XRa is the destination.
2731 * a.k.a. if (XRb == 0) { XRa = XRc }
2734 * Single 32-bit conditional move where
2735 * XRb contains conditions, XRc what to move and
2736 * XRa is the destination.
2737 * a.k.a. if (XRb != 0) { XRa = XRc }
2739 static void gen_mxu_s32movzn(DisasContext
*ctx
, TCGCond cond
)
2741 uint32_t XRc
, XRb
, XRa
;
2743 XRa
= extract32(ctx
->opcode
, 6, 4);
2744 XRb
= extract32(ctx
->opcode
, 10, 4);
2745 XRc
= extract32(ctx
->opcode
, 14, 4);
2747 TCGv t0
= tcg_temp_new();
2748 TCGv t1
= tcg_temp_new();
2749 TCGLabel
*l_done
= gen_new_label();
2751 gen_load_mxu_gpr(t0
, XRc
);
2752 gen_load_mxu_gpr(t1
, XRb
);
2754 tcg_gen_brcondi_tl(cond
, t1
, 0, l_done
);
2755 gen_store_mxu_gpr(t0
, XRa
);
2756 gen_set_label(l_done
);
2760 * MXU instruction category: Addition and subtraction
2761 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2769 * Update XRa if XRc < 0 by value of 0 - XRb
2772 static void gen_mxu_S32CPS(DisasContext
*ctx
)
2774 uint32_t pad
, XRc
, XRb
, XRa
;
2776 pad
= extract32(ctx
->opcode
, 21, 5);
2777 XRc
= extract32(ctx
->opcode
, 14, 4);
2778 XRb
= extract32(ctx
->opcode
, 10, 4);
2779 XRa
= extract32(ctx
->opcode
, 6, 4);
2781 if (unlikely(pad
!= 0)) {
2782 /* opcode padding incorrect -> do nothing */
2783 } else if (unlikely(XRa
== 0)) {
2784 /* destination is zero register -> do nothing */
2785 } else if (unlikely(XRb
== 0)) {
2786 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2787 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2788 } else if (unlikely(XRc
== 0)) {
2789 /* condition always false -> just move XRb to XRa */
2790 tcg_gen_mov_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2792 /* the most general case */
2793 TCGv t0
= tcg_temp_new();
2794 TCGLabel
*l_not_less
= gen_new_label();
2795 TCGLabel
*l_done
= gen_new_label();
2797 tcg_gen_brcondi_tl(TCG_COND_GE
, mxu_gpr
[XRc
- 1], 0, l_not_less
);
2798 tcg_gen_neg_tl(t0
, mxu_gpr
[XRb
- 1]);
2800 gen_set_label(l_not_less
);
2801 gen_load_mxu_gpr(t0
, XRb
);
2802 gen_set_label(l_done
);
2803 gen_store_mxu_gpr(t0
, XRa
);
2809 * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2810 * else XRa[0..1] = XRb[0..1]
2812 static void gen_mxu_D16CPS(DisasContext
*ctx
)
2814 uint32_t pad
, XRc
, XRb
, XRa
;
2816 pad
= extract32(ctx
->opcode
, 21, 5);
2817 XRc
= extract32(ctx
->opcode
, 14, 4);
2818 XRb
= extract32(ctx
->opcode
, 10, 4);
2819 XRa
= extract32(ctx
->opcode
, 6, 4);
2821 if (unlikely(pad
!= 0)) {
2822 /* opcode padding incorrect -> do nothing */
2823 } else if (unlikely(XRa
== 0)) {
2824 /* destination is zero register -> do nothing */
2825 } else if (unlikely(XRb
== 0)) {
2826 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2827 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2828 } else if (unlikely(XRc
== 0)) {
2829 /* condition always false -> just move XRb to XRa */
2830 tcg_gen_mov_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
2832 /* the most general case */
2833 TCGv t0
= tcg_temp_new();
2834 TCGv t1
= tcg_temp_new();
2835 TCGLabel
*l_done_hi
= gen_new_label();
2836 TCGLabel
*l_not_less_lo
= gen_new_label();
2837 TCGLabel
*l_done_lo
= gen_new_label();
2839 tcg_gen_sextract_tl(t0
, mxu_gpr
[XRc
- 1], 16, 16);
2840 tcg_gen_sextract_tl(t1
, mxu_gpr
[XRb
- 1], 16, 16);
2841 tcg_gen_brcondi_tl(TCG_COND_GE
, t0
, 0, l_done_hi
);
2842 tcg_gen_subfi_tl(t1
, 0, t1
);
2844 gen_set_label(l_done_hi
);
2845 tcg_gen_shli_i32(t1
, t1
, 16);
2847 tcg_gen_sextract_tl(t0
, mxu_gpr
[XRc
- 1], 0, 16);
2848 tcg_gen_brcondi_tl(TCG_COND_GE
, t0
, 0, l_not_less_lo
);
2849 tcg_gen_sextract_tl(t0
, mxu_gpr
[XRb
- 1], 0, 16);
2850 tcg_gen_subfi_tl(t0
, 0, t0
);
2851 tcg_gen_br(l_done_lo
);
2853 gen_set_label(l_not_less_lo
);
2854 tcg_gen_extract_tl(t0
, mxu_gpr
[XRb
- 1], 0, 16);
2856 gen_set_label(l_done_lo
);
2857 tcg_gen_deposit_tl(mxu_gpr
[XRa
- 1], t1
, t0
, 0, 16);
2862 * Q8ABD XRa, XRb, XRc
2863 * Gets absolute difference for quadruple of 8-bit
2864 * packed in XRb to another one in XRc,
2865 * put the result in XRa.
2866 * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2868 static void gen_mxu_Q8ABD(DisasContext
*ctx
)
2870 uint32_t pad
, XRc
, XRb
, XRa
;
2872 pad
= extract32(ctx
->opcode
, 21, 3);
2873 XRc
= extract32(ctx
->opcode
, 14, 4);
2874 XRb
= extract32(ctx
->opcode
, 10, 4);
2875 XRa
= extract32(ctx
->opcode
, 6, 4);
2877 if (unlikely(pad
!= 0)) {
2878 /* opcode padding incorrect -> do nothing */
2879 } else if (unlikely(XRa
== 0)) {
2880 /* destination is zero register -> do nothing */
2881 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2882 /* both operands zero registers -> just set destination to zero */
2883 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
2885 /* the most general case */
2886 TCGv t0
= tcg_temp_new();
2887 TCGv t1
= tcg_temp_new();
2888 TCGv t2
= tcg_temp_new();
2889 TCGv t3
= tcg_temp_new();
2890 TCGv t4
= tcg_temp_new();
2892 gen_load_mxu_gpr(t3
, XRb
);
2893 gen_load_mxu_gpr(t4
, XRc
);
2894 tcg_gen_movi_tl(t2
, 0);
2896 for (int i
= 0; i
< 4; i
++) {
2897 tcg_gen_extract_tl(t0
, t3
, 8 * i
, 8);
2898 tcg_gen_extract_tl(t1
, t4
, 8 * i
, 8);
2900 tcg_gen_sub_tl(t0
, t0
, t1
);
2901 tcg_gen_abs_tl(t0
, t0
);
2903 tcg_gen_deposit_tl(t2
, t2
, t0
, 8 * i
, 8);
2905 gen_store_mxu_gpr(t2
, XRa
);
2910 * Q8ADD XRa, XRb, XRc, ptn2
2911 * Add/subtract quadruple of 8-bit packed in XRb
2912 * to another one in XRc, put the result in XRa.
2914 static void gen_mxu_Q8ADD(DisasContext
*ctx
)
2916 uint32_t aptn2
, pad
, XRc
, XRb
, XRa
;
2918 aptn2
= extract32(ctx
->opcode
, 24, 2);
2919 pad
= extract32(ctx
->opcode
, 21, 3);
2920 XRc
= extract32(ctx
->opcode
, 14, 4);
2921 XRb
= extract32(ctx
->opcode
, 10, 4);
2922 XRa
= extract32(ctx
->opcode
, 6, 4);
2924 if (unlikely(pad
!= 0)) {
2925 /* opcode padding incorrect -> do nothing */
2926 } else if (unlikely(XRa
== 0)) {
2927 /* destination is zero register -> do nothing */
2928 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
2929 /* both operands zero registers -> just set destination to zero */
2930 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
2932 /* the most general case */
2933 TCGv t0
= tcg_temp_new();
2934 TCGv t1
= tcg_temp_new();
2935 TCGv t2
= tcg_temp_new();
2936 TCGv t3
= tcg_temp_new();
2937 TCGv t4
= tcg_temp_new();
2939 gen_load_mxu_gpr(t3
, XRb
);
2940 gen_load_mxu_gpr(t4
, XRc
);
2942 for (int i
= 0; i
< 4; i
++) {
2943 tcg_gen_andi_tl(t0
, t3
, 0xff);
2944 tcg_gen_andi_tl(t1
, t4
, 0xff);
2948 tcg_gen_sub_tl(t0
, t0
, t1
);
2950 tcg_gen_add_tl(t0
, t0
, t1
);
2954 tcg_gen_sub_tl(t0
, t0
, t1
);
2956 tcg_gen_add_tl(t0
, t0
, t1
);
2960 tcg_gen_shri_tl(t3
, t3
, 8);
2961 tcg_gen_shri_tl(t4
, t4
, 8);
2964 tcg_gen_deposit_tl(t2
, t2
, t0
, 8 * i
, 8);
2966 tcg_gen_andi_tl(t0
, t0
, 0xff);
2967 tcg_gen_mov_tl(t2
, t0
);
2970 gen_store_mxu_gpr(t2
, XRa
);
2975 * Q8ADDE XRa, XRb, XRc, XRd, aptn2
2976 * Add/subtract quadruple of 8-bit packed in XRb
2977 * to another one in XRc, with zero extending
2978 * to 16-bit and put results as packed 16-bit data
2980 * aptn2 manages action add or subtract of pairs of data.
2982 * Q8ACCE XRa, XRb, XRc, XRd, aptn2
2983 * Add/subtract quadruple of 8-bit packed in XRb
2984 * to another one in XRc, with zero extending
2985 * to 16-bit and accumulate results as packed 16-bit data
2987 * aptn2 manages action add or subtract of pairs of data.
2989 static void gen_mxu_q8adde(DisasContext
*ctx
, bool accumulate
)
2991 uint32_t aptn2
, XRd
, XRc
, XRb
, XRa
;
2993 aptn2
= extract32(ctx
->opcode
, 24, 2);
2994 XRd
= extract32(ctx
->opcode
, 18, 4);
2995 XRc
= extract32(ctx
->opcode
, 14, 4);
2996 XRb
= extract32(ctx
->opcode
, 10, 4);
2997 XRa
= extract32(ctx
->opcode
, 6, 4);
2999 if (unlikely((XRb
== 0) && (XRc
== 0))) {
3000 /* both operands zero registers -> just set destination to zero */
3002 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
3005 tcg_gen_movi_tl(mxu_gpr
[XRd
- 1], 0);
3008 /* the most general case */
3009 TCGv t0
= tcg_temp_new();
3010 TCGv t1
= tcg_temp_new();
3011 TCGv t2
= tcg_temp_new();
3012 TCGv t3
= tcg_temp_new();
3013 TCGv t4
= tcg_temp_new();
3014 TCGv t5
= tcg_temp_new();
3017 gen_extract_mxu_gpr(t0
, XRb
, 16, 8);
3018 gen_extract_mxu_gpr(t1
, XRc
, 16, 8);
3019 gen_extract_mxu_gpr(t2
, XRb
, 24, 8);
3020 gen_extract_mxu_gpr(t3
, XRc
, 24, 8);
3022 tcg_gen_sub_tl(t0
, t0
, t1
);
3023 tcg_gen_sub_tl(t2
, t2
, t3
);
3025 tcg_gen_add_tl(t0
, t0
, t1
);
3026 tcg_gen_add_tl(t2
, t2
, t3
);
3029 gen_load_mxu_gpr(t5
, XRa
);
3030 tcg_gen_extract_tl(t1
, t5
, 0, 16);
3031 tcg_gen_extract_tl(t3
, t5
, 16, 16);
3032 tcg_gen_add_tl(t0
, t0
, t1
);
3033 tcg_gen_add_tl(t2
, t2
, t3
);
3035 tcg_gen_shli_tl(t2
, t2
, 16);
3036 tcg_gen_extract_tl(t0
, t0
, 0, 16);
3037 tcg_gen_or_tl(t4
, t2
, t0
);
3040 gen_extract_mxu_gpr(t0
, XRb
, 0, 8);
3041 gen_extract_mxu_gpr(t1
, XRc
, 0, 8);
3042 gen_extract_mxu_gpr(t2
, XRb
, 8, 8);
3043 gen_extract_mxu_gpr(t3
, XRc
, 8, 8);
3045 tcg_gen_sub_tl(t0
, t0
, t1
);
3046 tcg_gen_sub_tl(t2
, t2
, t3
);
3048 tcg_gen_add_tl(t0
, t0
, t1
);
3049 tcg_gen_add_tl(t2
, t2
, t3
);
3052 gen_load_mxu_gpr(t5
, XRd
);
3053 tcg_gen_extract_tl(t1
, t5
, 0, 16);
3054 tcg_gen_extract_tl(t3
, t5
, 16, 16);
3055 tcg_gen_add_tl(t0
, t0
, t1
);
3056 tcg_gen_add_tl(t2
, t2
, t3
);
3058 tcg_gen_shli_tl(t2
, t2
, 16);
3059 tcg_gen_extract_tl(t0
, t0
, 0, 16);
3060 tcg_gen_or_tl(t5
, t2
, t0
);
3063 gen_store_mxu_gpr(t4
, XRa
);
3064 gen_store_mxu_gpr(t5
, XRd
);
3069 * D8SUM XRa, XRb, XRc
3070 * Double parallel add of quadruple unsigned 8-bit together
3071 * with zero extending to 16-bit data.
3072 * D8SUMC XRa, XRb, XRc
3073 * Double parallel add of quadruple unsigned 8-bit together
3074 * with zero extending to 16-bit data and adding 2 to each
3077 static void gen_mxu_d8sum(DisasContext
*ctx
, bool sumc
)
3079 uint32_t pad
, pad2
, XRc
, XRb
, XRa
;
3081 pad
= extract32(ctx
->opcode
, 24, 2);
3082 pad2
= extract32(ctx
->opcode
, 18, 4);
3083 XRc
= extract32(ctx
->opcode
, 14, 4);
3084 XRb
= extract32(ctx
->opcode
, 10, 4);
3085 XRa
= extract32(ctx
->opcode
, 6, 4);
3087 if (unlikely(pad
!= 0 || pad2
!= 0)) {
3088 /* opcode padding incorrect -> do nothing */
3089 } else if (unlikely(XRa
== 0)) {
3090 /* destination is zero register -> do nothing */
3091 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
3092 /* both operands zero registers -> just set destination to zero */
3093 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
3095 /* the most general case */
3096 TCGv t0
= tcg_temp_new();
3097 TCGv t1
= tcg_temp_new();
3098 TCGv t2
= tcg_temp_new();
3099 TCGv t3
= tcg_temp_new();
3100 TCGv t4
= tcg_temp_new();
3101 TCGv t5
= tcg_temp_new();
3104 tcg_gen_extract_tl(t0
, mxu_gpr
[XRb
- 1], 0, 8);
3105 tcg_gen_extract_tl(t1
, mxu_gpr
[XRb
- 1], 8, 8);
3106 tcg_gen_extract_tl(t2
, mxu_gpr
[XRb
- 1], 16, 8);
3107 tcg_gen_extract_tl(t3
, mxu_gpr
[XRb
- 1], 24, 8);
3108 tcg_gen_add_tl(t4
, t0
, t1
);
3109 tcg_gen_add_tl(t4
, t4
, t2
);
3110 tcg_gen_add_tl(t4
, t4
, t3
);
3112 tcg_gen_mov_tl(t4
, 0);
3115 tcg_gen_extract_tl(t0
, mxu_gpr
[XRc
- 1], 0, 8);
3116 tcg_gen_extract_tl(t1
, mxu_gpr
[XRc
- 1], 8, 8);
3117 tcg_gen_extract_tl(t2
, mxu_gpr
[XRc
- 1], 16, 8);
3118 tcg_gen_extract_tl(t3
, mxu_gpr
[XRc
- 1], 24, 8);
3119 tcg_gen_add_tl(t5
, t0
, t1
);
3120 tcg_gen_add_tl(t5
, t5
, t2
);
3121 tcg_gen_add_tl(t5
, t5
, t3
);
3123 tcg_gen_mov_tl(t5
, 0);
3127 tcg_gen_addi_tl(t4
, t4
, 2);
3128 tcg_gen_addi_tl(t5
, t5
, 2);
3130 tcg_gen_shli_tl(t4
, t4
, 16);
3132 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], t4
, t5
);
3137 * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3138 * 16-bit pattern addition.
3140 static void gen_mxu_q16add(DisasContext
*ctx
)
3142 uint32_t aptn2
, optn2
, XRc
, XRb
, XRa
, XRd
;
3144 aptn2
= extract32(ctx
->opcode
, 24, 2);
3145 optn2
= extract32(ctx
->opcode
, 22, 2);
3146 XRd
= extract32(ctx
->opcode
, 18, 4);
3147 XRc
= extract32(ctx
->opcode
, 14, 4);
3148 XRb
= extract32(ctx
->opcode
, 10, 4);
3149 XRa
= extract32(ctx
->opcode
, 6, 4);
3151 TCGv t0
= tcg_temp_new();
3152 TCGv t1
= tcg_temp_new();
3153 TCGv t2
= tcg_temp_new();
3154 TCGv t3
= tcg_temp_new();
3155 TCGv t4
= tcg_temp_new();
3156 TCGv t5
= tcg_temp_new();
3158 gen_load_mxu_gpr(t1
, XRb
);
3159 tcg_gen_extract_tl(t0
, t1
, 0, 16);
3160 tcg_gen_extract_tl(t1
, t1
, 16, 16);
3162 gen_load_mxu_gpr(t3
, XRc
);
3163 tcg_gen_extract_tl(t2
, t3
, 0, 16);
3164 tcg_gen_extract_tl(t3
, t3
, 16, 16);
3167 case MXU_OPTN2_WW
: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3168 tcg_gen_mov_tl(t4
, t1
);
3169 tcg_gen_mov_tl(t5
, t0
);
3171 case MXU_OPTN2_LW
: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3172 tcg_gen_mov_tl(t4
, t0
);
3173 tcg_gen_mov_tl(t5
, t0
);
3175 case MXU_OPTN2_HW
: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3176 tcg_gen_mov_tl(t4
, t1
);
3177 tcg_gen_mov_tl(t5
, t1
);
3179 case MXU_OPTN2_XW
: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3180 tcg_gen_mov_tl(t4
, t0
);
3181 tcg_gen_mov_tl(t5
, t1
);
3186 case MXU_APTN2_AA
: /* lop +, rop + */
3187 tcg_gen_add_tl(t0
, t4
, t3
);
3188 tcg_gen_add_tl(t1
, t5
, t2
);
3189 tcg_gen_add_tl(t4
, t4
, t3
);
3190 tcg_gen_add_tl(t5
, t5
, t2
);
3192 case MXU_APTN2_AS
: /* lop +, rop + */
3193 tcg_gen_sub_tl(t0
, t4
, t3
);
3194 tcg_gen_sub_tl(t1
, t5
, t2
);
3195 tcg_gen_add_tl(t4
, t4
, t3
);
3196 tcg_gen_add_tl(t5
, t5
, t2
);
3198 case MXU_APTN2_SA
: /* lop +, rop + */
3199 tcg_gen_add_tl(t0
, t4
, t3
);
3200 tcg_gen_add_tl(t1
, t5
, t2
);
3201 tcg_gen_sub_tl(t4
, t4
, t3
);
3202 tcg_gen_sub_tl(t5
, t5
, t2
);
3204 case MXU_APTN2_SS
: /* lop +, rop + */
3205 tcg_gen_sub_tl(t0
, t4
, t3
);
3206 tcg_gen_sub_tl(t1
, t5
, t2
);
3207 tcg_gen_sub_tl(t4
, t4
, t3
);
3208 tcg_gen_sub_tl(t5
, t5
, t2
);
3212 tcg_gen_shli_tl(t0
, t0
, 16);
3213 tcg_gen_extract_tl(t1
, t1
, 0, 16);
3214 tcg_gen_shli_tl(t4
, t4
, 16);
3215 tcg_gen_extract_tl(t5
, t5
, 0, 16);
3217 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], t4
, t5
);
3218 tcg_gen_or_tl(mxu_gpr
[XRd
- 1], t0
, t1
);
3222 * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3223 * 16-bit addition/subtraction with accumulate.
3225 static void gen_mxu_q16acc(DisasContext
*ctx
)
3227 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3229 aptn2
= extract32(ctx
->opcode
, 24, 2);
3230 XRd
= extract32(ctx
->opcode
, 18, 4);
3231 XRc
= extract32(ctx
->opcode
, 14, 4);
3232 XRb
= extract32(ctx
->opcode
, 10, 4);
3233 XRa
= extract32(ctx
->opcode
, 6, 4);
3235 TCGv t0
= tcg_temp_new();
3236 TCGv t1
= tcg_temp_new();
3237 TCGv t2
= tcg_temp_new();
3238 TCGv t3
= tcg_temp_new();
3239 TCGv s3
= tcg_temp_new();
3240 TCGv s2
= tcg_temp_new();
3241 TCGv s1
= tcg_temp_new();
3242 TCGv s0
= tcg_temp_new();
3244 gen_load_mxu_gpr(t1
, XRb
);
3245 tcg_gen_extract_tl(t0
, t1
, 0, 16);
3246 tcg_gen_extract_tl(t1
, t1
, 16, 16);
3248 gen_load_mxu_gpr(t3
, XRc
);
3249 tcg_gen_extract_tl(t2
, t3
, 0, 16);
3250 tcg_gen_extract_tl(t3
, t3
, 16, 16);
3253 case MXU_APTN2_AA
: /* lop +, rop + */
3254 tcg_gen_add_tl(s3
, t1
, t3
);
3255 tcg_gen_add_tl(s2
, t0
, t2
);
3256 tcg_gen_add_tl(s1
, t1
, t3
);
3257 tcg_gen_add_tl(s0
, t0
, t2
);
3259 case MXU_APTN2_AS
: /* lop +, rop - */
3260 tcg_gen_sub_tl(s3
, t1
, t3
);
3261 tcg_gen_sub_tl(s2
, t0
, t2
);
3262 tcg_gen_add_tl(s1
, t1
, t3
);
3263 tcg_gen_add_tl(s0
, t0
, t2
);
3265 case MXU_APTN2_SA
: /* lop -, rop + */
3266 tcg_gen_add_tl(s3
, t1
, t3
);
3267 tcg_gen_add_tl(s2
, t0
, t2
);
3268 tcg_gen_sub_tl(s1
, t1
, t3
);
3269 tcg_gen_sub_tl(s0
, t0
, t2
);
3271 case MXU_APTN2_SS
: /* lop -, rop - */
3272 tcg_gen_sub_tl(s3
, t1
, t3
);
3273 tcg_gen_sub_tl(s2
, t0
, t2
);
3274 tcg_gen_sub_tl(s1
, t1
, t3
);
3275 tcg_gen_sub_tl(s0
, t0
, t2
);
3280 tcg_gen_add_tl(t0
, mxu_gpr
[XRa
- 1], s0
);
3281 tcg_gen_extract_tl(t0
, t0
, 0, 16);
3282 tcg_gen_extract_tl(t1
, mxu_gpr
[XRa
- 1], 16, 16);
3283 tcg_gen_add_tl(t1
, t1
, s1
);
3284 tcg_gen_shli_tl(t1
, t1
, 16);
3285 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], t1
, t0
);
3289 tcg_gen_add_tl(t0
, mxu_gpr
[XRd
- 1], s2
);
3290 tcg_gen_extract_tl(t0
, t0
, 0, 16);
3291 tcg_gen_extract_tl(t1
, mxu_gpr
[XRd
- 1], 16, 16);
3292 tcg_gen_add_tl(t1
, t1
, s3
);
3293 tcg_gen_shli_tl(t1
, t1
, 16);
3294 tcg_gen_or_tl(mxu_gpr
[XRd
- 1], t1
, t0
);
3299 * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3300 * 16-bit accumulate.
3302 static void gen_mxu_q16accm(DisasContext
*ctx
)
3304 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3306 aptn2
= extract32(ctx
->opcode
, 24, 2);
3307 XRd
= extract32(ctx
->opcode
, 18, 4);
3308 XRc
= extract32(ctx
->opcode
, 14, 4);
3309 XRb
= extract32(ctx
->opcode
, 10, 4);
3310 XRa
= extract32(ctx
->opcode
, 6, 4);
3312 TCGv t0
= tcg_temp_new();
3313 TCGv t1
= tcg_temp_new();
3314 TCGv t2
= tcg_temp_new();
3315 TCGv t3
= tcg_temp_new();
3317 gen_load_mxu_gpr(t2
, XRb
);
3318 gen_load_mxu_gpr(t3
, XRc
);
3321 TCGv a0
= tcg_temp_new();
3322 TCGv a1
= tcg_temp_new();
3324 tcg_gen_extract_tl(t0
, t2
, 0, 16);
3325 tcg_gen_extract_tl(t1
, t2
, 16, 16);
3327 gen_load_mxu_gpr(a1
, XRa
);
3328 tcg_gen_extract_tl(a0
, a1
, 0, 16);
3329 tcg_gen_extract_tl(a1
, a1
, 16, 16);
3332 tcg_gen_sub_tl(a0
, a0
, t0
);
3333 tcg_gen_sub_tl(a1
, a1
, t1
);
3335 tcg_gen_add_tl(a0
, a0
, t0
);
3336 tcg_gen_add_tl(a1
, a1
, t1
);
3338 tcg_gen_extract_tl(a0
, a0
, 0, 16);
3339 tcg_gen_shli_tl(a1
, a1
, 16);
3340 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], a1
, a0
);
3344 TCGv a0
= tcg_temp_new();
3345 TCGv a1
= tcg_temp_new();
3347 tcg_gen_extract_tl(t0
, t3
, 0, 16);
3348 tcg_gen_extract_tl(t1
, t3
, 16, 16);
3350 gen_load_mxu_gpr(a1
, XRd
);
3351 tcg_gen_extract_tl(a0
, a1
, 0, 16);
3352 tcg_gen_extract_tl(a1
, a1
, 16, 16);
3355 tcg_gen_sub_tl(a0
, a0
, t0
);
3356 tcg_gen_sub_tl(a1
, a1
, t1
);
3358 tcg_gen_add_tl(a0
, a0
, t0
);
3359 tcg_gen_add_tl(a1
, a1
, t1
);
3361 tcg_gen_extract_tl(a0
, a0
, 0, 16);
3362 tcg_gen_shli_tl(a1
, a1
, 16);
3363 tcg_gen_or_tl(mxu_gpr
[XRd
- 1], a1
, a0
);
3369 * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3370 * 16-bit sign extended addition and accumulate.
3372 static void gen_mxu_d16asum(DisasContext
*ctx
)
3374 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3376 aptn2
= extract32(ctx
->opcode
, 24, 2);
3377 XRd
= extract32(ctx
->opcode
, 18, 4);
3378 XRc
= extract32(ctx
->opcode
, 14, 4);
3379 XRb
= extract32(ctx
->opcode
, 10, 4);
3380 XRa
= extract32(ctx
->opcode
, 6, 4);
3382 TCGv t0
= tcg_temp_new();
3383 TCGv t1
= tcg_temp_new();
3384 TCGv t2
= tcg_temp_new();
3385 TCGv t3
= tcg_temp_new();
3387 gen_load_mxu_gpr(t2
, XRb
);
3388 gen_load_mxu_gpr(t3
, XRc
);
3391 tcg_gen_sextract_tl(t0
, t2
, 0, 16);
3392 tcg_gen_sextract_tl(t1
, t2
, 16, 16);
3393 tcg_gen_add_tl(t0
, t0
, t1
);
3395 tcg_gen_sub_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t0
);
3397 tcg_gen_add_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t0
);
3402 tcg_gen_sextract_tl(t0
, t3
, 0, 16);
3403 tcg_gen_sextract_tl(t1
, t3
, 16, 16);
3404 tcg_gen_add_tl(t0
, t0
, t1
);
3406 tcg_gen_sub_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t0
);
3408 tcg_gen_add_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t0
);
3414 * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3415 * 32 bit pattern addition/subtraction, set carry.
3417 * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3418 * 32 bit pattern addition/subtraction with carry.
3420 static void gen_mxu_d32add(DisasContext
*ctx
)
3422 uint32_t aptn2
, addc
, XRc
, XRb
, XRa
, XRd
;
3424 aptn2
= extract32(ctx
->opcode
, 24, 2);
3425 addc
= extract32(ctx
->opcode
, 22, 2);
3426 XRd
= extract32(ctx
->opcode
, 18, 4);
3427 XRc
= extract32(ctx
->opcode
, 14, 4);
3428 XRb
= extract32(ctx
->opcode
, 10, 4);
3429 XRa
= extract32(ctx
->opcode
, 6, 4);
3431 TCGv t0
= tcg_temp_new();
3432 TCGv t1
= tcg_temp_new();
3433 TCGv t2
= tcg_temp_new();
3434 TCGv cr
= tcg_temp_new();
3436 if (unlikely(addc
> 1)) {
3437 /* opcode incorrect -> do nothing */
3438 } else if (addc
== 1) {
3439 if (unlikely(XRa
== 0 && XRd
== 0)) {
3440 /* destinations are zero register -> do nothing */
3442 /* FIXME ??? What if XRa == XRd ??? */
3443 /* aptn2 is unused here */
3444 gen_load_mxu_gpr(t0
, XRb
);
3445 gen_load_mxu_gpr(t1
, XRc
);
3446 gen_load_mxu_cr(cr
);
3448 tcg_gen_extract_tl(t2
, cr
, 31, 1);
3449 tcg_gen_add_tl(t0
, t0
, t2
);
3450 tcg_gen_add_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t0
);
3453 tcg_gen_extract_tl(t2
, cr
, 30, 1);
3454 tcg_gen_add_tl(t1
, t1
, t2
);
3455 tcg_gen_add_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t1
);
3458 } else if (unlikely(XRa
== 0 && XRd
== 0)) {
3459 /* destinations are zero register -> do nothing */
3462 /* FIXME ??? What if XRa == XRd ??? */
3463 TCGv carry
= tcg_temp_new();
3465 gen_load_mxu_gpr(t0
, XRb
);
3466 gen_load_mxu_gpr(t1
, XRc
);
3467 gen_load_mxu_cr(cr
);
3470 tcg_gen_sub_i32(t2
, t0
, t1
);
3471 tcg_gen_setcond_tl(TCG_COND_GTU
, carry
, t0
, t1
);
3473 tcg_gen_add_i32(t2
, t0
, t1
);
3474 tcg_gen_setcond_tl(TCG_COND_GTU
, carry
, t0
, t2
);
3476 tcg_gen_andi_tl(cr
, cr
, 0x7fffffff);
3477 tcg_gen_shli_tl(carry
, carry
, 31);
3478 tcg_gen_or_tl(cr
, cr
, carry
);
3479 gen_store_mxu_gpr(t2
, XRa
);
3483 tcg_gen_sub_i32(t2
, t0
, t1
);
3484 tcg_gen_setcond_tl(TCG_COND_GTU
, carry
, t0
, t1
);
3486 tcg_gen_add_i32(t2
, t0
, t1
);
3487 tcg_gen_setcond_tl(TCG_COND_GTU
, carry
, t0
, t2
);
3489 tcg_gen_andi_tl(cr
, cr
, 0xbfffffff);
3490 tcg_gen_shli_tl(carry
, carry
, 30);
3491 tcg_gen_or_tl(cr
, cr
, carry
);
3492 gen_store_mxu_gpr(t2
, XRd
);
3494 gen_store_mxu_cr(cr
);
3499 * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3500 * 32 bit pattern addition/subtraction and accumulate.
3502 static void gen_mxu_d32acc(DisasContext
*ctx
)
3504 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3506 aptn2
= extract32(ctx
->opcode
, 24, 2);
3507 XRd
= extract32(ctx
->opcode
, 18, 4);
3508 XRc
= extract32(ctx
->opcode
, 14, 4);
3509 XRb
= extract32(ctx
->opcode
, 10, 4);
3510 XRa
= extract32(ctx
->opcode
, 6, 4);
3512 TCGv t0
= tcg_temp_new();
3513 TCGv t1
= tcg_temp_new();
3514 TCGv t2
= tcg_temp_new();
3516 if (unlikely(XRa
== 0 && XRd
== 0)) {
3517 /* destinations are zero register -> do nothing */
3520 gen_load_mxu_gpr(t0
, XRb
);
3521 gen_load_mxu_gpr(t1
, XRc
);
3524 tcg_gen_sub_tl(t2
, t0
, t1
);
3526 tcg_gen_add_tl(t2
, t0
, t1
);
3528 tcg_gen_add_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t2
);
3532 tcg_gen_sub_tl(t2
, t0
, t1
);
3534 tcg_gen_add_tl(t2
, t0
, t1
);
3536 tcg_gen_add_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t2
);
3542 * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3543 * 32 bit pattern addition/subtraction and accumulate.
3545 static void gen_mxu_d32accm(DisasContext
*ctx
)
3547 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3549 aptn2
= extract32(ctx
->opcode
, 24, 2);
3550 XRd
= extract32(ctx
->opcode
, 18, 4);
3551 XRc
= extract32(ctx
->opcode
, 14, 4);
3552 XRb
= extract32(ctx
->opcode
, 10, 4);
3553 XRa
= extract32(ctx
->opcode
, 6, 4);
3555 TCGv t0
= tcg_temp_new();
3556 TCGv t1
= tcg_temp_new();
3557 TCGv t2
= tcg_temp_new();
3559 if (unlikely(XRa
== 0 && XRd
== 0)) {
3560 /* destinations are zero register -> do nothing */
3563 gen_load_mxu_gpr(t0
, XRb
);
3564 gen_load_mxu_gpr(t1
, XRc
);
3566 tcg_gen_add_tl(t2
, t0
, t1
);
3568 tcg_gen_sub_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t2
);
3570 tcg_gen_add_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t2
);
3574 tcg_gen_sub_tl(t2
, t0
, t1
);
3576 tcg_gen_sub_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t2
);
3578 tcg_gen_add_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t2
);
3585 * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3586 * 32 bit pattern addition/subtraction.
3588 static void gen_mxu_d32asum(DisasContext
*ctx
)
3590 uint32_t aptn2
, XRc
, XRb
, XRa
, XRd
;
3592 aptn2
= extract32(ctx
->opcode
, 24, 2);
3593 XRd
= extract32(ctx
->opcode
, 18, 4);
3594 XRc
= extract32(ctx
->opcode
, 14, 4);
3595 XRb
= extract32(ctx
->opcode
, 10, 4);
3596 XRa
= extract32(ctx
->opcode
, 6, 4);
3598 TCGv t0
= tcg_temp_new();
3599 TCGv t1
= tcg_temp_new();
3601 if (unlikely(XRa
== 0 && XRd
== 0)) {
3602 /* destinations are zero register -> do nothing */
3605 gen_load_mxu_gpr(t0
, XRb
);
3606 gen_load_mxu_gpr(t1
, XRc
);
3609 tcg_gen_sub_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t0
);
3611 tcg_gen_add_tl(mxu_gpr
[XRa
- 1], mxu_gpr
[XRa
- 1], t0
);
3616 tcg_gen_sub_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t1
);
3618 tcg_gen_add_tl(mxu_gpr
[XRd
- 1], mxu_gpr
[XRd
- 1], t1
);
3625 * MXU instruction category: Miscellaneous
3626 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3635 * S32EXTR XRa, XRd, rs, bits5
3636 * Extract bits5 bits from 64-bit pair {XRa:XRd}
3637 * starting from rs[4:0] offset and put to the XRa.
3639 static void gen_mxu_s32extr(DisasContext
*ctx
)
3641 TCGv t0
, t1
, t2
, t3
;
3642 uint32_t XRa
, XRd
, rs
, bits5
;
3644 t0
= tcg_temp_new();
3645 t1
= tcg_temp_new();
3646 t2
= tcg_temp_new();
3647 t3
= tcg_temp_new();
3649 XRa
= extract32(ctx
->opcode
, 6, 4);
3650 XRd
= extract32(ctx
->opcode
, 10, 4);
3651 bits5
= extract32(ctx
->opcode
, 16, 5);
3652 rs
= extract32(ctx
->opcode
, 21, 5);
3654 /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3655 /* {XRa} = extract({tmp}, 0, bits5); */
3657 TCGLabel
*l_xra_only
= gen_new_label();
3658 TCGLabel
*l_done
= gen_new_label();
3660 gen_load_mxu_gpr(t0
, XRd
);
3661 gen_load_mxu_gpr(t1
, XRa
);
3662 gen_load_gpr(t2
, rs
);
3663 tcg_gen_andi_tl(t2
, t2
, 0x1f);
3664 tcg_gen_subfi_tl(t2
, 32, t2
);
3665 tcg_gen_brcondi_tl(TCG_COND_GE
, t2
, bits5
, l_xra_only
);
3666 tcg_gen_subfi_tl(t2
, bits5
, t2
);
3667 tcg_gen_subfi_tl(t3
, 32, t2
);
3668 tcg_gen_shr_tl(t0
, t0
, t3
);
3669 tcg_gen_shl_tl(t1
, t1
, t2
);
3670 tcg_gen_or_tl(t0
, t0
, t1
);
3672 gen_set_label(l_xra_only
);
3673 tcg_gen_subi_tl(t2
, t2
, bits5
);
3674 tcg_gen_shr_tl(t0
, t1
, t2
);
3675 gen_set_label(l_done
);
3676 tcg_gen_extract_tl(t0
, t0
, 0, bits5
);
3678 /* unspecified behavior but matches tests on real hardware*/
3679 tcg_gen_movi_tl(t0
, 0);
3681 gen_store_mxu_gpr(t0
, XRa
);
3685 * S32EXTRV XRa, XRd, rs, rt
3686 * Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3687 * starting from rs[4:0] offset and put to the XRa.
3689 static void gen_mxu_s32extrv(DisasContext
*ctx
)
3691 TCGv t0
, t1
, t2
, t3
, t4
;
3692 uint32_t XRa
, XRd
, rs
, rt
;
3694 t0
= tcg_temp_new();
3695 t1
= tcg_temp_new();
3696 t2
= tcg_temp_new();
3697 t3
= tcg_temp_new();
3698 t4
= tcg_temp_new();
3699 TCGLabel
*l_xra_only
= gen_new_label();
3700 TCGLabel
*l_done
= gen_new_label();
3701 TCGLabel
*l_zero
= gen_new_label();
3702 TCGLabel
*l_extract
= gen_new_label();
3704 XRa
= extract32(ctx
->opcode
, 6, 4);
3705 XRd
= extract32(ctx
->opcode
, 10, 4);
3706 rt
= extract32(ctx
->opcode
, 16, 5);
3707 rs
= extract32(ctx
->opcode
, 21, 5);
3709 /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3710 gen_load_mxu_gpr(t0
, XRd
);
3711 gen_load_mxu_gpr(t1
, XRa
);
3712 gen_load_gpr(t2
, rs
);
3713 gen_load_gpr(t4
, rt
);
3714 tcg_gen_brcondi_tl(TCG_COND_EQ
, t4
, 0, l_zero
);
3715 tcg_gen_andi_tl(t2
, t2
, 0x1f);
3716 tcg_gen_subfi_tl(t2
, 32, t2
);
3717 tcg_gen_brcond_tl(TCG_COND_GE
, t2
, t4
, l_xra_only
);
3718 tcg_gen_sub_tl(t2
, t4
, t2
);
3719 tcg_gen_subfi_tl(t3
, 32, t2
);
3720 tcg_gen_shr_tl(t0
, t0
, t3
);
3721 tcg_gen_shl_tl(t1
, t1
, t2
);
3722 tcg_gen_or_tl(t0
, t0
, t1
);
3723 tcg_gen_br(l_extract
);
3725 gen_set_label(l_xra_only
);
3726 tcg_gen_sub_tl(t2
, t2
, t4
);
3727 tcg_gen_shr_tl(t0
, t1
, t2
);
3728 tcg_gen_br(l_extract
);
3730 /* unspecified behavior but matches tests on real hardware*/
3731 gen_set_label(l_zero
);
3732 tcg_gen_movi_tl(t0
, 0);
3735 /* {XRa} = extract({tmp}, 0, rt) */
3736 gen_set_label(l_extract
);
3737 tcg_gen_subfi_tl(t4
, 32, t4
);
3738 tcg_gen_shl_tl(t0
, t0
, t4
);
3739 tcg_gen_shr_tl(t0
, t0
, t4
);
3741 gen_set_label(l_done
);
3742 gen_store_mxu_gpr(t0
, XRa
);
3746 * S32LUI XRa, S8, optn3
3747 * Permutate the immediate S8 value to form a word
3750 static void gen_mxu_s32lui(DisasContext
*ctx
)
3752 uint32_t XRa
, s8
, optn3
, pad
;
3754 XRa
= extract32(ctx
->opcode
, 6, 4);
3755 s8
= extract32(ctx
->opcode
, 10, 8);
3756 pad
= extract32(ctx
->opcode
, 21, 2);
3757 optn3
= extract32(ctx
->opcode
, 23, 3);
3759 if (unlikely(pad
!= 0)) {
3760 /* opcode padding incorrect -> do nothing */
3761 } else if (unlikely(XRa
== 0)) {
3762 /* destination is zero register -> do nothing */
3765 TCGv t0
= tcg_temp_new();
3769 tcg_gen_movi_tl(t0
, s8
);
3772 tcg_gen_movi_tl(t0
, s8
<< 8);
3775 tcg_gen_movi_tl(t0
, s8
<< 16);
3778 tcg_gen_movi_tl(t0
, s8
<< 24);
3781 tcg_gen_movi_tl(t0
, (s8
<< 16) | s8
);
3784 tcg_gen_movi_tl(t0
, (s8
<< 24) | (s8
<< 8));
3787 s16
= (uint16_t)(int16_t)(int8_t)s8
;
3788 tcg_gen_movi_tl(t0
, (s16
<< 16) | s16
);
3791 tcg_gen_movi_tl(t0
, (s8
<< 24) | (s8
<< 16) | (s8
<< 8) | s8
);
3794 gen_store_mxu_gpr(t0
, XRa
);
3799 * Q16SAT XRa, XRb, XRc
3800 * Packs four 16-bit signed integers in XRb and XRc to
3801 * four saturated unsigned 8-bit into XRa.
3804 static void gen_mxu_Q16SAT(DisasContext
*ctx
)
3806 uint32_t pad
, XRc
, XRb
, XRa
;
3808 pad
= extract32(ctx
->opcode
, 21, 3);
3809 XRc
= extract32(ctx
->opcode
, 14, 4);
3810 XRb
= extract32(ctx
->opcode
, 10, 4);
3811 XRa
= extract32(ctx
->opcode
, 6, 4);
3813 if (unlikely(pad
!= 0)) {
3814 /* opcode padding incorrect -> do nothing */
3815 } else if (unlikely(XRa
== 0)) {
3816 /* destination is zero register -> do nothing */
3818 /* the most general case */
3819 TCGv t0
= tcg_temp_new();
3820 TCGv t1
= tcg_temp_new();
3821 TCGv t2
= tcg_temp_new();
3823 tcg_gen_movi_tl(t2
, 0);
3825 TCGLabel
*l_less_hi
= gen_new_label();
3826 TCGLabel
*l_less_lo
= gen_new_label();
3827 TCGLabel
*l_lo
= gen_new_label();
3828 TCGLabel
*l_greater_hi
= gen_new_label();
3829 TCGLabel
*l_greater_lo
= gen_new_label();
3830 TCGLabel
*l_done
= gen_new_label();
3832 tcg_gen_sari_tl(t0
, mxu_gpr
[XRb
- 1], 16);
3833 tcg_gen_brcondi_tl(TCG_COND_LT
, t0
, 0, l_less_hi
);
3834 tcg_gen_brcondi_tl(TCG_COND_GT
, t0
, 255, l_greater_hi
);
3836 gen_set_label(l_less_hi
);
3837 tcg_gen_movi_tl(t0
, 0);
3839 gen_set_label(l_greater_hi
);
3840 tcg_gen_movi_tl(t0
, 255);
3842 gen_set_label(l_lo
);
3843 tcg_gen_shli_tl(t1
, mxu_gpr
[XRb
- 1], 16);
3844 tcg_gen_sari_tl(t1
, t1
, 16);
3845 tcg_gen_brcondi_tl(TCG_COND_LT
, t1
, 0, l_less_lo
);
3846 tcg_gen_brcondi_tl(TCG_COND_GT
, t1
, 255, l_greater_lo
);
3848 gen_set_label(l_less_lo
);
3849 tcg_gen_movi_tl(t1
, 0);
3851 gen_set_label(l_greater_lo
);
3852 tcg_gen_movi_tl(t1
, 255);
3854 gen_set_label(l_done
);
3855 tcg_gen_shli_tl(t2
, t0
, 24);
3856 tcg_gen_shli_tl(t1
, t1
, 16);
3857 tcg_gen_or_tl(t2
, t2
, t1
);
3861 TCGLabel
*l_less_hi
= gen_new_label();
3862 TCGLabel
*l_less_lo
= gen_new_label();
3863 TCGLabel
*l_lo
= gen_new_label();
3864 TCGLabel
*l_greater_hi
= gen_new_label();
3865 TCGLabel
*l_greater_lo
= gen_new_label();
3866 TCGLabel
*l_done
= gen_new_label();
3868 tcg_gen_sari_tl(t0
, mxu_gpr
[XRc
- 1], 16);
3869 tcg_gen_brcondi_tl(TCG_COND_LT
, t0
, 0, l_less_hi
);
3870 tcg_gen_brcondi_tl(TCG_COND_GT
, t0
, 255, l_greater_hi
);
3872 gen_set_label(l_less_hi
);
3873 tcg_gen_movi_tl(t0
, 0);
3875 gen_set_label(l_greater_hi
);
3876 tcg_gen_movi_tl(t0
, 255);
3878 gen_set_label(l_lo
);
3879 tcg_gen_shli_tl(t1
, mxu_gpr
[XRc
- 1], 16);
3880 tcg_gen_sari_tl(t1
, t1
, 16);
3881 tcg_gen_brcondi_tl(TCG_COND_LT
, t1
, 0, l_less_lo
);
3882 tcg_gen_brcondi_tl(TCG_COND_GT
, t1
, 255, l_greater_lo
);
3884 gen_set_label(l_less_lo
);
3885 tcg_gen_movi_tl(t1
, 0);
3887 gen_set_label(l_greater_lo
);
3888 tcg_gen_movi_tl(t1
, 255);
3890 gen_set_label(l_done
);
3891 tcg_gen_shli_tl(t0
, t0
, 8);
3892 tcg_gen_or_tl(t2
, t2
, t0
);
3893 tcg_gen_or_tl(t2
, t2
, t1
);
3895 gen_store_mxu_gpr(t2
, XRa
);
3900 * Q16SCOP XRa, XRd, XRb, XRc
3901 * Determine sign of quad packed 16-bit signed values
3902 * in XRb and XRc put result in XRa and XRd respectively.
3904 static void gen_mxu_q16scop(DisasContext
*ctx
)
3906 uint32_t XRd
, XRc
, XRb
, XRa
;
3908 XRd
= extract32(ctx
->opcode
, 18, 4);
3909 XRc
= extract32(ctx
->opcode
, 14, 4);
3910 XRb
= extract32(ctx
->opcode
, 10, 4);
3911 XRa
= extract32(ctx
->opcode
, 6, 4);
3913 TCGv t0
= tcg_temp_new();
3914 TCGv t1
= tcg_temp_new();
3915 TCGv t2
= tcg_temp_new();
3916 TCGv t3
= tcg_temp_new();
3917 TCGv t4
= tcg_temp_new();
3919 TCGLabel
*l_b_hi_lt
= gen_new_label();
3920 TCGLabel
*l_b_hi_gt
= gen_new_label();
3921 TCGLabel
*l_b_lo
= gen_new_label();
3922 TCGLabel
*l_b_lo_lt
= gen_new_label();
3923 TCGLabel
*l_c_hi
= gen_new_label();
3924 TCGLabel
*l_c_hi_lt
= gen_new_label();
3925 TCGLabel
*l_c_hi_gt
= gen_new_label();
3926 TCGLabel
*l_c_lo
= gen_new_label();
3927 TCGLabel
*l_c_lo_lt
= gen_new_label();
3928 TCGLabel
*l_done
= gen_new_label();
3930 gen_load_mxu_gpr(t0
, XRb
);
3931 gen_load_mxu_gpr(t1
, XRc
);
3933 tcg_gen_sextract_tl(t2
, t0
, 16, 16);
3934 tcg_gen_brcondi_tl(TCG_COND_LT
, t2
, 0, l_b_hi_lt
);
3935 tcg_gen_brcondi_tl(TCG_COND_GT
, t2
, 0, l_b_hi_gt
);
3936 tcg_gen_movi_tl(t3
, 0);
3938 gen_set_label(l_b_hi_lt
);
3939 tcg_gen_movi_tl(t3
, 0xffff0000);
3941 gen_set_label(l_b_hi_gt
);
3942 tcg_gen_movi_tl(t3
, 0x00010000);
3944 gen_set_label(l_b_lo
);
3945 tcg_gen_sextract_tl(t2
, t0
, 0, 16);
3946 tcg_gen_brcondi_tl(TCG_COND_EQ
, t2
, 0, l_c_hi
);
3947 tcg_gen_brcondi_tl(TCG_COND_LT
, t2
, 0, l_b_lo_lt
);
3948 tcg_gen_ori_tl(t3
, t3
, 0x00000001);
3950 gen_set_label(l_b_lo_lt
);
3951 tcg_gen_ori_tl(t3
, t3
, 0x0000ffff);
3954 gen_set_label(l_c_hi
);
3955 tcg_gen_sextract_tl(t2
, t1
, 16, 16);
3956 tcg_gen_brcondi_tl(TCG_COND_LT
, t2
, 0, l_c_hi_lt
);
3957 tcg_gen_brcondi_tl(TCG_COND_GT
, t2
, 0, l_c_hi_gt
);
3958 tcg_gen_movi_tl(t4
, 0);
3960 gen_set_label(l_c_hi_lt
);
3961 tcg_gen_movi_tl(t4
, 0xffff0000);
3963 gen_set_label(l_c_hi_gt
);
3964 tcg_gen_movi_tl(t4
, 0x00010000);
3966 gen_set_label(l_c_lo
);
3967 tcg_gen_sextract_tl(t2
, t1
, 0, 16);
3968 tcg_gen_brcondi_tl(TCG_COND_EQ
, t2
, 0, l_done
);
3969 tcg_gen_brcondi_tl(TCG_COND_LT
, t2
, 0, l_c_lo_lt
);
3970 tcg_gen_ori_tl(t4
, t4
, 0x00000001);
3972 gen_set_label(l_c_lo_lt
);
3973 tcg_gen_ori_tl(t4
, t4
, 0x0000ffff);
3975 gen_set_label(l_done
);
3976 gen_store_mxu_gpr(t3
, XRa
);
3977 gen_store_mxu_gpr(t4
, XRd
);
3981 * S32SFL XRa, XRd, XRb, XRc
3982 * Shuffle bytes according to one of four patterns.
3984 static void gen_mxu_s32sfl(DisasContext
*ctx
)
3986 uint32_t XRd
, XRc
, XRb
, XRa
, ptn2
;
3988 XRd
= extract32(ctx
->opcode
, 18, 4);
3989 XRc
= extract32(ctx
->opcode
, 14, 4);
3990 XRb
= extract32(ctx
->opcode
, 10, 4);
3991 XRa
= extract32(ctx
->opcode
, 6, 4);
3992 ptn2
= extract32(ctx
->opcode
, 24, 2);
3994 TCGv t0
= tcg_temp_new();
3995 TCGv t1
= tcg_temp_new();
3996 TCGv t2
= tcg_temp_new();
3997 TCGv t3
= tcg_temp_new();
3999 gen_load_mxu_gpr(t0
, XRb
);
4000 gen_load_mxu_gpr(t1
, XRc
);
4004 tcg_gen_andi_tl(t2
, t0
, 0xff000000);
4005 tcg_gen_andi_tl(t3
, t1
, 0x000000ff);
4006 tcg_gen_deposit_tl(t3
, t3
, t0
, 8, 8);
4007 tcg_gen_shri_tl(t0
, t0
, 8);
4008 tcg_gen_shri_tl(t1
, t1
, 8);
4009 tcg_gen_deposit_tl(t3
, t3
, t0
, 24, 8);
4010 tcg_gen_deposit_tl(t3
, t3
, t1
, 16, 8);
4011 tcg_gen_shri_tl(t0
, t0
, 8);
4012 tcg_gen_shri_tl(t1
, t1
, 8);
4013 tcg_gen_deposit_tl(t2
, t2
, t0
, 8, 8);
4014 tcg_gen_deposit_tl(t2
, t2
, t1
, 0, 8);
4015 tcg_gen_shri_tl(t1
, t1
, 8);
4016 tcg_gen_deposit_tl(t2
, t2
, t1
, 16, 8);
4019 tcg_gen_andi_tl(t2
, t0
, 0xff000000);
4020 tcg_gen_andi_tl(t3
, t1
, 0x000000ff);
4021 tcg_gen_deposit_tl(t3
, t3
, t0
, 16, 8);
4022 tcg_gen_shri_tl(t0
, t0
, 8);
4023 tcg_gen_shri_tl(t1
, t1
, 8);
4024 tcg_gen_deposit_tl(t2
, t2
, t0
, 16, 8);
4025 tcg_gen_deposit_tl(t2
, t2
, t1
, 0, 8);
4026 tcg_gen_shri_tl(t0
, t0
, 8);
4027 tcg_gen_shri_tl(t1
, t1
, 8);
4028 tcg_gen_deposit_tl(t3
, t3
, t0
, 24, 8);
4029 tcg_gen_deposit_tl(t3
, t3
, t1
, 8, 8);
4030 tcg_gen_shri_tl(t1
, t1
, 8);
4031 tcg_gen_deposit_tl(t2
, t2
, t1
, 8, 8);
4034 tcg_gen_andi_tl(t2
, t0
, 0xff00ff00);
4035 tcg_gen_andi_tl(t3
, t1
, 0x00ff00ff);
4036 tcg_gen_deposit_tl(t3
, t3
, t0
, 8, 8);
4037 tcg_gen_shri_tl(t0
, t0
, 16);
4038 tcg_gen_shri_tl(t1
, t1
, 8);
4039 tcg_gen_deposit_tl(t2
, t2
, t1
, 0, 8);
4040 tcg_gen_deposit_tl(t3
, t3
, t0
, 24, 8);
4041 tcg_gen_shri_tl(t1
, t1
, 16);
4042 tcg_gen_deposit_tl(t2
, t2
, t1
, 16, 8);
4045 tcg_gen_andi_tl(t2
, t0
, 0xffff0000);
4046 tcg_gen_andi_tl(t3
, t1
, 0x0000ffff);
4047 tcg_gen_shri_tl(t1
, t1
, 16);
4048 tcg_gen_deposit_tl(t2
, t2
, t1
, 0, 16);
4049 tcg_gen_deposit_tl(t3
, t3
, t0
, 16, 16);
4053 gen_store_mxu_gpr(t2
, XRa
);
4054 gen_store_mxu_gpr(t3
, XRd
);
4058 * Q8SAD XRa, XRd, XRb, XRc
4059 * Typical SAD operation for motion estimation.
4061 static void gen_mxu_q8sad(DisasContext
*ctx
)
4063 uint32_t XRd
, XRc
, XRb
, XRa
;
4065 XRd
= extract32(ctx
->opcode
, 18, 4);
4066 XRc
= extract32(ctx
->opcode
, 14, 4);
4067 XRb
= extract32(ctx
->opcode
, 10, 4);
4068 XRa
= extract32(ctx
->opcode
, 6, 4);
4070 TCGv t0
= tcg_temp_new();
4071 TCGv t1
= tcg_temp_new();
4072 TCGv t2
= tcg_temp_new();
4073 TCGv t3
= tcg_temp_new();
4074 TCGv t4
= tcg_temp_new();
4075 TCGv t5
= tcg_temp_new();
4077 gen_load_mxu_gpr(t2
, XRb
);
4078 gen_load_mxu_gpr(t3
, XRc
);
4079 gen_load_mxu_gpr(t5
, XRd
);
4080 tcg_gen_movi_tl(t4
, 0);
4082 for (int i
= 0; i
< 4; i
++) {
4083 tcg_gen_andi_tl(t0
, t2
, 0xff);
4084 tcg_gen_andi_tl(t1
, t3
, 0xff);
4085 tcg_gen_sub_tl(t0
, t0
, t1
);
4086 tcg_gen_abs_tl(t0
, t0
);
4087 tcg_gen_add_tl(t4
, t4
, t0
);
4089 tcg_gen_shri_tl(t2
, t2
, 8);
4090 tcg_gen_shri_tl(t3
, t3
, 8);
4093 tcg_gen_add_tl(t5
, t5
, t4
);
4094 gen_store_mxu_gpr(t4
, XRa
);
4095 gen_store_mxu_gpr(t5
, XRd
);
4099 * MXU instruction category: align
4100 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4106 * S32ALNI XRc, XRb, XRa, optn3
4107 * Arrange bytes from XRb and XRc according to one of five sets of
4108 * rules determined by optn3, and place the result in XRa.
4110 static void gen_mxu_S32ALNI(DisasContext
*ctx
)
4112 uint32_t optn3
, pad
, XRc
, XRb
, XRa
;
4114 optn3
= extract32(ctx
->opcode
, 23, 3);
4115 pad
= extract32(ctx
->opcode
, 21, 2);
4116 XRc
= extract32(ctx
->opcode
, 14, 4);
4117 XRb
= extract32(ctx
->opcode
, 10, 4);
4118 XRa
= extract32(ctx
->opcode
, 6, 4);
4120 if (unlikely(pad
!= 0)) {
4121 /* opcode padding incorrect -> do nothing */
4122 } else if (unlikely(XRa
== 0)) {
4123 /* destination is zero register -> do nothing */
4124 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
4125 /* both operands zero registers -> just set destination to all 0s */
4126 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
4127 } else if (unlikely(XRb
== 0)) {
4128 /* XRb zero register -> just appropriatelly shift XRc into XRa */
4130 case MXU_OPTN3_PTN0
:
4131 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
4133 case MXU_OPTN3_PTN1
:
4134 case MXU_OPTN3_PTN2
:
4135 case MXU_OPTN3_PTN3
:
4136 tcg_gen_shri_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1],
4139 case MXU_OPTN3_PTN4
:
4140 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1]);
4143 } else if (unlikely(XRc
== 0)) {
4144 /* XRc zero register -> just appropriatelly shift XRb into XRa */
4146 case MXU_OPTN3_PTN0
:
4147 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
4149 case MXU_OPTN3_PTN1
:
4150 case MXU_OPTN3_PTN2
:
4151 case MXU_OPTN3_PTN3
:
4152 tcg_gen_shri_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], 8 * optn3
);
4154 case MXU_OPTN3_PTN4
:
4155 tcg_gen_movi_i32(mxu_gpr
[XRa
- 1], 0);
4158 } else if (unlikely(XRb
== XRc
)) {
4159 /* both operands same -> just rotation or moving from any of them */
4161 case MXU_OPTN3_PTN0
:
4162 case MXU_OPTN3_PTN4
:
4163 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
4165 case MXU_OPTN3_PTN1
:
4166 case MXU_OPTN3_PTN2
:
4167 case MXU_OPTN3_PTN3
:
4168 tcg_gen_rotli_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1], 8 * optn3
);
4172 /* the most general case */
4174 case MXU_OPTN3_PTN0
:
4178 /* +---------------+ */
4179 /* | A B C D | E F G H */
4180 /* +-------+-------+ */
4185 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRb
- 1]);
4188 case MXU_OPTN3_PTN1
:
4192 /* +-------------------+ */
4193 /* A | B C D E | F G H */
4194 /* +---------+---------+ */
4199 TCGv_i32 t0
= tcg_temp_new();
4200 TCGv_i32 t1
= tcg_temp_new();
4202 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0x00FFFFFF);
4203 tcg_gen_shli_i32(t0
, t0
, 8);
4205 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFF000000);
4206 tcg_gen_shri_i32(t1
, t1
, 24);
4208 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], t0
, t1
);
4211 case MXU_OPTN3_PTN2
:
4215 /* +-------------------+ */
4216 /* A B | C D E F | G H */
4217 /* +---------+---------+ */
4222 TCGv_i32 t0
= tcg_temp_new();
4223 TCGv_i32 t1
= tcg_temp_new();
4225 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0x0000FFFF);
4226 tcg_gen_shli_i32(t0
, t0
, 16);
4228 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFFFF0000);
4229 tcg_gen_shri_i32(t1
, t1
, 16);
4231 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], t0
, t1
);
4234 case MXU_OPTN3_PTN3
:
4238 /* +-------------------+ */
4239 /* A B C | D E F G | H */
4240 /* +---------+---------+ */
4245 TCGv_i32 t0
= tcg_temp_new();
4246 TCGv_i32 t1
= tcg_temp_new();
4248 tcg_gen_andi_i32(t0
, mxu_gpr
[XRb
- 1], 0x000000FF);
4249 tcg_gen_shli_i32(t0
, t0
, 24);
4251 tcg_gen_andi_i32(t1
, mxu_gpr
[XRc
- 1], 0xFFFFFF00);
4252 tcg_gen_shri_i32(t1
, t1
, 8);
4254 tcg_gen_or_i32(mxu_gpr
[XRa
- 1], t0
, t1
);
4257 case MXU_OPTN3_PTN4
:
4261 /* +---------------+ */
4262 /* A B C D | E F G H | */
4263 /* +-------+-------+ */
4268 tcg_gen_mov_i32(mxu_gpr
[XRa
- 1], mxu_gpr
[XRc
- 1]);
4276 * S32ALN XRc, XRb, XRa, rs
4277 * Arrange bytes from XRb and XRc according to one of five sets of
4278 * rules determined by rs[2:0], and place the result in XRa.
4280 static void gen_mxu_S32ALN(DisasContext
*ctx
)
4282 uint32_t rs
, XRc
, XRb
, XRa
;
4284 rs
= extract32(ctx
->opcode
, 21, 5);
4285 XRc
= extract32(ctx
->opcode
, 14, 4);
4286 XRb
= extract32(ctx
->opcode
, 10, 4);
4287 XRa
= extract32(ctx
->opcode
, 6, 4);
4289 if (unlikely(XRa
== 0)) {
4290 /* destination is zero register -> do nothing */
4291 } else if (unlikely((XRb
== 0) && (XRc
== 0))) {
4292 /* both operands zero registers -> just set destination to all 0s */
4293 tcg_gen_movi_tl(mxu_gpr
[XRa
- 1], 0);
4295 /* the most general case */
4296 TCGv t0
= tcg_temp_new();
4297 TCGv t1
= tcg_temp_new();
4298 TCGv t2
= tcg_temp_new();
4299 TCGv t3
= tcg_temp_new();
4300 TCGLabel
*l_exit
= gen_new_label();
4301 TCGLabel
*l_b_only
= gen_new_label();
4302 TCGLabel
*l_c_only
= gen_new_label();
4304 gen_load_mxu_gpr(t0
, XRb
);
4305 gen_load_mxu_gpr(t1
, XRc
);
4306 gen_load_gpr(t2
, rs
);
4307 tcg_gen_andi_tl(t2
, t2
, 0x07);
4309 /* do nothing for undefined cases */
4310 tcg_gen_brcondi_tl(TCG_COND_GE
, t2
, 5, l_exit
);
4312 tcg_gen_brcondi_tl(TCG_COND_EQ
, t2
, 0, l_b_only
);
4313 tcg_gen_brcondi_tl(TCG_COND_EQ
, t2
, 4, l_c_only
);
4315 tcg_gen_shli_tl(t2
, t2
, 3);
4316 tcg_gen_subfi_tl(t3
, 32, t2
);
4318 tcg_gen_shl_tl(t0
, t0
, t2
);
4319 tcg_gen_shr_tl(t1
, t1
, t3
);
4320 tcg_gen_or_tl(mxu_gpr
[XRa
- 1], t0
, t1
);
4323 gen_set_label(l_b_only
);
4324 gen_store_mxu_gpr(t0
, XRa
);
4327 gen_set_label(l_c_only
);
4328 gen_store_mxu_gpr(t1
, XRa
);
4330 gen_set_label(l_exit
);
4335 * S32MADD XRa, XRd, rb, rc
4336 * 32 to 64 bit signed multiply with subsequent add
4337 * result stored in {XRa, XRd} pair, stain HI/LO.
4338 * S32MADDU XRa, XRd, rb, rc
4339 * 32 to 64 bit unsigned multiply with subsequent add
4340 * result stored in {XRa, XRd} pair, stain HI/LO.
4341 * S32MSUB XRa, XRd, rb, rc
4342 * 32 to 64 bit signed multiply with subsequent subtract
4343 * result stored in {XRa, XRd} pair, stain HI/LO.
4344 * S32MSUBU XRa, XRd, rb, rc
4345 * 32 to 64 bit unsigned multiply with subsequent subtract
4346 * result stored in {XRa, XRd} pair, stain HI/LO.
4348 static void gen_mxu_s32madd_sub(DisasContext
*ctx
, bool sub
, bool uns
)
4350 uint32_t XRa
, XRd
, Rb
, Rc
;
4352 XRa
= extract32(ctx
->opcode
, 6, 4);
4353 XRd
= extract32(ctx
->opcode
, 10, 4);
4354 Rb
= extract32(ctx
->opcode
, 16, 5);
4355 Rc
= extract32(ctx
->opcode
, 21, 5);
4357 if (unlikely(Rb
== 0 || Rc
== 0)) {
4358 /* do nothing because x + 0 * y => x */
4359 } else if (unlikely(XRa
== 0 && XRd
== 0)) {
4360 /* do nothing because result just dropped */
4362 TCGv t0
= tcg_temp_new();
4363 TCGv t1
= tcg_temp_new();
4364 TCGv_i64 t2
= tcg_temp_new_i64();
4365 TCGv_i64 t3
= tcg_temp_new_i64();
4367 gen_load_gpr(t0
, Rb
);
4368 gen_load_gpr(t1
, Rc
);
4371 tcg_gen_extu_tl_i64(t2
, t0
);
4372 tcg_gen_extu_tl_i64(t3
, t1
);
4374 tcg_gen_ext_tl_i64(t2
, t0
);
4375 tcg_gen_ext_tl_i64(t3
, t1
);
4377 tcg_gen_mul_i64(t2
, t2
, t3
);
4379 gen_load_mxu_gpr(t0
, XRa
);
4380 gen_load_mxu_gpr(t1
, XRd
);
4382 tcg_gen_concat_tl_i64(t3
, t1
, t0
);
4384 tcg_gen_sub_i64(t3
, t3
, t2
);
4386 tcg_gen_add_i64(t3
, t3
, t2
);
4388 gen_move_low32(t1
, t3
);
4389 gen_move_high32(t0
, t3
);
4391 tcg_gen_mov_tl(cpu_HI
[0], t0
);
4392 tcg_gen_mov_tl(cpu_LO
[0], t1
);
4394 gen_store_mxu_gpr(t1
, XRd
);
4395 gen_store_mxu_gpr(t0
, XRa
);
4400 * Decoding engine for MXU
4401 * =======================
4404 static void decode_opc_mxu__pool00(DisasContext
*ctx
)
4406 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4409 case OPC_MXU_S32MAX
:
4410 case OPC_MXU_S32MIN
:
4411 gen_mxu_S32MAX_S32MIN(ctx
);
4413 case OPC_MXU_D16MAX
:
4414 case OPC_MXU_D16MIN
:
4415 gen_mxu_D16MAX_D16MIN(ctx
);
4419 gen_mxu_Q8MAX_Q8MIN(ctx
);
4422 gen_mxu_q8slt(ctx
, false);
4424 case OPC_MXU_Q8SLTU
:
4425 gen_mxu_q8slt(ctx
, true);
4428 MIPS_INVAL("decode_opc_mxu");
4429 gen_reserved_instruction(ctx
);
4434 static bool decode_opc_mxu_s32madd_sub(DisasContext
*ctx
)
4436 uint32_t opcode
= extract32(ctx
->opcode
, 0, 6);
4437 uint32_t pad
= extract32(ctx
->opcode
, 14, 2);
4440 /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4445 case OPC_MXU_S32MADD
:
4446 gen_mxu_s32madd_sub(ctx
, false, false);
4448 case OPC_MXU_S32MADDU
:
4449 gen_mxu_s32madd_sub(ctx
, false, true);
4451 case OPC_MXU_S32MSUB
:
4452 gen_mxu_s32madd_sub(ctx
, true, false);
4454 case OPC_MXU_S32MSUBU
:
4455 gen_mxu_s32madd_sub(ctx
, true, true);
4463 static void decode_opc_mxu__pool01(DisasContext
*ctx
)
4465 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4468 case OPC_MXU_S32SLT
:
4469 gen_mxu_S32SLT(ctx
);
4471 case OPC_MXU_D16SLT
:
4472 gen_mxu_D16SLT(ctx
);
4474 case OPC_MXU_D16AVG
:
4475 gen_mxu_d16avg(ctx
, false);
4477 case OPC_MXU_D16AVGR
:
4478 gen_mxu_d16avg(ctx
, true);
4481 gen_mxu_q8avg(ctx
, false);
4483 case OPC_MXU_Q8AVGR
:
4484 gen_mxu_q8avg(ctx
, true);
4490 MIPS_INVAL("decode_opc_mxu");
4491 gen_reserved_instruction(ctx
);
4496 static void decode_opc_mxu__pool02(DisasContext
*ctx
)
4498 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4501 case OPC_MXU_S32CPS
:
4502 gen_mxu_S32CPS(ctx
);
4504 case OPC_MXU_D16CPS
:
4505 gen_mxu_D16CPS(ctx
);
4510 case OPC_MXU_Q16SAT
:
4511 gen_mxu_Q16SAT(ctx
);
4514 MIPS_INVAL("decode_opc_mxu");
4515 gen_reserved_instruction(ctx
);
4520 static void decode_opc_mxu__pool03(DisasContext
*ctx
)
4522 uint32_t opcode
= extract32(ctx
->opcode
, 24, 2);
4525 case OPC_MXU_D16MULF
:
4526 gen_mxu_d16mul(ctx
, true, true);
4528 case OPC_MXU_D16MULE
:
4529 gen_mxu_d16mul(ctx
, true, false);
4532 MIPS_INVAL("decode_opc_mxu");
4533 gen_reserved_instruction(ctx
);
4538 static void decode_opc_mxu__pool04(DisasContext
*ctx
)
4540 uint32_t reversed
= extract32(ctx
->opcode
, 20, 1);
4541 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4543 /* Don't care about opcode bits as their meaning is unknown yet */
4546 gen_mxu_s32ldxx(ctx
, reversed
, false);
4551 static void decode_opc_mxu__pool05(DisasContext
*ctx
)
4553 uint32_t reversed
= extract32(ctx
->opcode
, 20, 1);
4554 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4556 /* Don't care about opcode bits as their meaning is unknown yet */
4559 gen_mxu_s32stxx(ctx
, reversed
, false);
4564 static void decode_opc_mxu__pool06(DisasContext
*ctx
)
4566 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4567 uint32_t strd2
= extract32(ctx
->opcode
, 14, 2);
4570 case OPC_MXU_S32LDST
:
4571 case OPC_MXU_S32LDSTR
:
4573 gen_mxu_s32ldxvx(ctx
, opcode
, false, strd2
);
4578 MIPS_INVAL("decode_opc_mxu");
4579 gen_reserved_instruction(ctx
);
4584 static void decode_opc_mxu__pool07(DisasContext
*ctx
)
4586 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4587 uint32_t strd2
= extract32(ctx
->opcode
, 14, 2);
4590 case OPC_MXU_S32LDST
:
4591 case OPC_MXU_S32LDSTR
:
4593 gen_mxu_s32stxvx(ctx
, opcode
, false, strd2
);
4598 MIPS_INVAL("decode_opc_mxu");
4599 gen_reserved_instruction(ctx
);
4604 static void decode_opc_mxu__pool08(DisasContext
*ctx
)
4606 uint32_t reversed
= extract32(ctx
->opcode
, 20, 1);
4607 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4609 /* Don't care about opcode bits as their meaning is unknown yet */
4612 gen_mxu_s32ldxx(ctx
, reversed
, true);
4617 static void decode_opc_mxu__pool09(DisasContext
*ctx
)
4619 uint32_t reversed
= extract32(ctx
->opcode
, 20, 1);
4620 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4622 /* Don't care about opcode bits as their meaning is unknown yet */
4625 gen_mxu_s32stxx(ctx
, reversed
, true);
4630 static void decode_opc_mxu__pool10(DisasContext
*ctx
)
4632 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4633 uint32_t strd2
= extract32(ctx
->opcode
, 14, 2);
4636 case OPC_MXU_S32LDST
:
4637 case OPC_MXU_S32LDSTR
:
4639 gen_mxu_s32ldxvx(ctx
, opcode
, true, strd2
);
4644 MIPS_INVAL("decode_opc_mxu");
4645 gen_reserved_instruction(ctx
);
4650 static void decode_opc_mxu__pool11(DisasContext
*ctx
)
4652 uint32_t opcode
= extract32(ctx
->opcode
, 10, 4);
4653 uint32_t strd2
= extract32(ctx
->opcode
, 14, 2);
4656 case OPC_MXU_S32LDST
:
4657 case OPC_MXU_S32LDSTR
:
4659 gen_mxu_s32stxvx(ctx
, opcode
, true, strd2
);
4664 MIPS_INVAL("decode_opc_mxu");
4665 gen_reserved_instruction(ctx
);
4670 static void decode_opc_mxu__pool12(DisasContext
*ctx
)
4672 uint32_t opcode
= extract32(ctx
->opcode
, 22, 2);
4675 case OPC_MXU_D32ACC
:
4676 gen_mxu_d32acc(ctx
);
4678 case OPC_MXU_D32ACCM
:
4679 gen_mxu_d32accm(ctx
);
4681 case OPC_MXU_D32ASUM
:
4682 gen_mxu_d32asum(ctx
);
4685 MIPS_INVAL("decode_opc_mxu");
4686 gen_reserved_instruction(ctx
);
4691 static void decode_opc_mxu__pool13(DisasContext
*ctx
)
4693 uint32_t opcode
= extract32(ctx
->opcode
, 22, 2);
4696 case OPC_MXU_Q16ACC
:
4697 gen_mxu_q16acc(ctx
);
4699 case OPC_MXU_Q16ACCM
:
4700 gen_mxu_q16accm(ctx
);
4702 case OPC_MXU_D16ASUM
:
4703 gen_mxu_d16asum(ctx
);
4706 MIPS_INVAL("decode_opc_mxu");
4707 gen_reserved_instruction(ctx
);
4712 static void decode_opc_mxu__pool14(DisasContext
*ctx
)
4714 uint32_t opcode
= extract32(ctx
->opcode
, 22, 2);
4717 case OPC_MXU_Q8ADDE
:
4718 gen_mxu_q8adde(ctx
, false);
4721 gen_mxu_d8sum(ctx
, false);
4723 case OPC_MXU_D8SUMC
:
4724 gen_mxu_d8sum(ctx
, true);
4727 MIPS_INVAL("decode_opc_mxu");
4728 gen_reserved_instruction(ctx
);
4733 static void decode_opc_mxu__pool15(DisasContext
*ctx
)
4735 uint32_t opcode
= extract32(ctx
->opcode
, 14, 2);
4738 case OPC_MXU_S32MUL
:
4739 gen_mxu_s32mul(ctx
, false);
4741 case OPC_MXU_S32MULU
:
4742 gen_mxu_s32mul(ctx
, true);
4744 case OPC_MXU_S32EXTR
:
4745 gen_mxu_s32extr(ctx
);
4747 case OPC_MXU_S32EXTRV
:
4748 gen_mxu_s32extrv(ctx
);
4751 MIPS_INVAL("decode_opc_mxu");
4752 gen_reserved_instruction(ctx
);
4757 static void decode_opc_mxu__pool16(DisasContext
*ctx
)
4759 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4762 case OPC_MXU_D32SARW
:
4763 gen_mxu_d32sarl(ctx
, true);
4765 case OPC_MXU_S32ALN
:
4766 gen_mxu_S32ALN(ctx
);
4768 case OPC_MXU_S32ALNI
:
4769 gen_mxu_S32ALNI(ctx
);
4771 case OPC_MXU_S32LUI
:
4772 gen_mxu_s32lui(ctx
);
4774 case OPC_MXU_S32NOR
:
4775 gen_mxu_S32NOR(ctx
);
4777 case OPC_MXU_S32AND
:
4778 gen_mxu_S32AND(ctx
);
4783 case OPC_MXU_S32XOR
:
4784 gen_mxu_S32XOR(ctx
);
4787 MIPS_INVAL("decode_opc_mxu");
4788 gen_reserved_instruction(ctx
);
4793 static void decode_opc_mxu__pool17(DisasContext
*ctx
)
4795 uint32_t opcode
= extract32(ctx
->opcode
, 6, 3);
4796 uint32_t strd2
= extract32(ctx
->opcode
, 9, 2);
4799 MIPS_INVAL("decode_opc_mxu");
4800 gen_reserved_instruction(ctx
);
4806 gen_mxu_lxx(ctx
, strd2
, MO_TE
| MO_UL
);
4809 gen_mxu_lxx(ctx
, strd2
, MO_TE
| MO_SB
);
4812 gen_mxu_lxx(ctx
, strd2
, MO_TE
| MO_SW
);
4815 gen_mxu_lxx(ctx
, strd2
, MO_TE
| MO_UB
);
4818 gen_mxu_lxx(ctx
, strd2
, MO_TE
| MO_UW
);
4821 MIPS_INVAL("decode_opc_mxu");
4822 gen_reserved_instruction(ctx
);
4827 static void decode_opc_mxu__pool18(DisasContext
*ctx
)
4829 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4832 case OPC_MXU_D32SLLV
:
4833 gen_mxu_d32sxxv(ctx
, false, false);
4835 case OPC_MXU_D32SLRV
:
4836 gen_mxu_d32sxxv(ctx
, true, false);
4838 case OPC_MXU_D32SARV
:
4839 gen_mxu_d32sxxv(ctx
, true, true);
4841 case OPC_MXU_Q16SLLV
:
4842 gen_mxu_q16sxxv(ctx
, false, false);
4844 case OPC_MXU_Q16SLRV
:
4845 gen_mxu_q16sxxv(ctx
, true, false);
4847 case OPC_MXU_Q16SARV
:
4848 gen_mxu_q16sxxv(ctx
, true, true);
4851 MIPS_INVAL("decode_opc_mxu");
4852 gen_reserved_instruction(ctx
);
4857 static void decode_opc_mxu__pool19(DisasContext
*ctx
)
4859 uint32_t opcode
= extract32(ctx
->opcode
, 22, 4);
4863 gen_mxu_q8mul_mac(ctx
, false, false);
4865 case OPC_MXU_Q8MULSU
:
4866 gen_mxu_q8mul_mac(ctx
, true, false);
4869 MIPS_INVAL("decode_opc_mxu");
4870 gen_reserved_instruction(ctx
);
4875 static void decode_opc_mxu__pool20(DisasContext
*ctx
)
4877 uint32_t opcode
= extract32(ctx
->opcode
, 18, 3);
4880 case OPC_MXU_Q8MOVZ
:
4881 gen_mxu_q8movzn(ctx
, TCG_COND_NE
);
4883 case OPC_MXU_Q8MOVN
:
4884 gen_mxu_q8movzn(ctx
, TCG_COND_EQ
);
4886 case OPC_MXU_D16MOVZ
:
4887 gen_mxu_d16movzn(ctx
, TCG_COND_NE
);
4889 case OPC_MXU_D16MOVN
:
4890 gen_mxu_d16movzn(ctx
, TCG_COND_EQ
);
4892 case OPC_MXU_S32MOVZ
:
4893 gen_mxu_s32movzn(ctx
, TCG_COND_NE
);
4895 case OPC_MXU_S32MOVN
:
4896 gen_mxu_s32movzn(ctx
, TCG_COND_EQ
);
4899 MIPS_INVAL("decode_opc_mxu");
4900 gen_reserved_instruction(ctx
);
4905 static void decode_opc_mxu__pool21(DisasContext
*ctx
)
4907 uint32_t opcode
= extract32(ctx
->opcode
, 22, 2);
4911 gen_mxu_q8mul_mac(ctx
, false, true);
4913 case OPC_MXU_Q8MACSU
:
4914 gen_mxu_q8mul_mac(ctx
, true, true);
4917 MIPS_INVAL("decode_opc_mxu");
4918 gen_reserved_instruction(ctx
);
4924 bool decode_ase_mxu(DisasContext
*ctx
, uint32_t insn
)
4926 uint32_t opcode
= extract32(insn
, 0, 6);
4928 if (opcode
== OPC_MXU_S32M2I
) {
4929 gen_mxu_s32m2i(ctx
);
4933 if (opcode
== OPC_MXU_S32I2M
) {
4934 gen_mxu_s32i2m(ctx
);
4939 TCGv t_mxu_cr
= tcg_temp_new();
4940 TCGLabel
*l_exit
= gen_new_label();
4942 gen_load_mxu_cr(t_mxu_cr
);
4943 tcg_gen_andi_tl(t_mxu_cr
, t_mxu_cr
, MXU_CR_MXU_EN
);
4944 tcg_gen_brcondi_tl(TCG_COND_NE
, t_mxu_cr
, MXU_CR_MXU_EN
, l_exit
);
4947 case OPC_MXU_S32MADD
:
4948 case OPC_MXU_S32MADDU
:
4949 case OPC_MXU_S32MSUB
:
4950 case OPC_MXU_S32MSUBU
:
4951 return decode_opc_mxu_s32madd_sub(ctx
);
4952 case OPC_MXU__POOL00
:
4953 decode_opc_mxu__pool00(ctx
);
4955 case OPC_MXU_D16MUL
:
4956 gen_mxu_d16mul(ctx
, false, false);
4958 case OPC_MXU_D16MAC
:
4959 gen_mxu_d16mac(ctx
, false, false);
4961 case OPC_MXU_D16MACF
:
4962 gen_mxu_d16mac(ctx
, true, true);
4964 case OPC_MXU_D16MADL
:
4965 gen_mxu_d16madl(ctx
);
4967 case OPC_MXU_S16MAD
:
4968 gen_mxu_s16mad(ctx
);
4970 case OPC_MXU_Q16ADD
:
4971 gen_mxu_q16add(ctx
);
4973 case OPC_MXU_D16MACE
:
4974 gen_mxu_d16mac(ctx
, true, false);
4976 case OPC_MXU__POOL01
:
4977 decode_opc_mxu__pool01(ctx
);
4979 case OPC_MXU__POOL02
:
4980 decode_opc_mxu__pool02(ctx
);
4982 case OPC_MXU__POOL03
:
4983 decode_opc_mxu__pool03(ctx
);
4985 case OPC_MXU__POOL04
:
4986 decode_opc_mxu__pool04(ctx
);
4988 case OPC_MXU__POOL05
:
4989 decode_opc_mxu__pool05(ctx
);
4991 case OPC_MXU__POOL06
:
4992 decode_opc_mxu__pool06(ctx
);
4994 case OPC_MXU__POOL07
:
4995 decode_opc_mxu__pool07(ctx
);
4997 case OPC_MXU__POOL08
:
4998 decode_opc_mxu__pool08(ctx
);
5000 case OPC_MXU__POOL09
:
5001 decode_opc_mxu__pool09(ctx
);
5003 case OPC_MXU__POOL10
:
5004 decode_opc_mxu__pool10(ctx
);
5006 case OPC_MXU__POOL11
:
5007 decode_opc_mxu__pool11(ctx
);
5009 case OPC_MXU_D32ADD
:
5010 gen_mxu_d32add(ctx
);
5012 case OPC_MXU__POOL12
:
5013 decode_opc_mxu__pool12(ctx
);
5015 case OPC_MXU__POOL13
:
5016 decode_opc_mxu__pool13(ctx
);
5018 case OPC_MXU__POOL14
:
5019 decode_opc_mxu__pool14(ctx
);
5021 case OPC_MXU_Q8ACCE
:
5022 gen_mxu_q8adde(ctx
, true);
5025 gen_mxu_s8ldd(ctx
, false);
5028 gen_mxu_s8std(ctx
, false);
5031 gen_mxu_s8ldd(ctx
, true);
5034 gen_mxu_s8std(ctx
, true);
5036 case OPC_MXU__POOL15
:
5037 decode_opc_mxu__pool15(ctx
);
5039 case OPC_MXU__POOL16
:
5040 decode_opc_mxu__pool16(ctx
);
5042 case OPC_MXU__POOL17
:
5043 decode_opc_mxu__pool17(ctx
);
5045 case OPC_MXU_S16LDD
:
5046 gen_mxu_s16ldd(ctx
, false);
5048 case OPC_MXU_S16STD
:
5049 gen_mxu_s16std(ctx
, false);
5051 case OPC_MXU_S16LDI
:
5052 gen_mxu_s16ldd(ctx
, true);
5054 case OPC_MXU_S16SDI
:
5055 gen_mxu_s16std(ctx
, true);
5057 case OPC_MXU_D32SLL
:
5058 gen_mxu_d32sxx(ctx
, false, false);
5060 case OPC_MXU_D32SLR
:
5061 gen_mxu_d32sxx(ctx
, true, false);
5063 case OPC_MXU_D32SARL
:
5064 gen_mxu_d32sarl(ctx
, false);
5066 case OPC_MXU_D32SAR
:
5067 gen_mxu_d32sxx(ctx
, true, true);
5069 case OPC_MXU_Q16SLL
:
5070 gen_mxu_q16sxx(ctx
, false, false);
5072 case OPC_MXU__POOL18
:
5073 decode_opc_mxu__pool18(ctx
);
5075 case OPC_MXU_Q16SLR
:
5076 gen_mxu_q16sxx(ctx
, true, false);
5078 case OPC_MXU_Q16SAR
:
5079 gen_mxu_q16sxx(ctx
, true, true);
5081 case OPC_MXU__POOL19
:
5082 decode_opc_mxu__pool19(ctx
);
5084 case OPC_MXU__POOL20
:
5085 decode_opc_mxu__pool20(ctx
);
5087 case OPC_MXU__POOL21
:
5088 decode_opc_mxu__pool21(ctx
);
5090 case OPC_MXU_Q16SCOP
:
5091 gen_mxu_q16scop(ctx
);
5093 case OPC_MXU_Q8MADL
:
5094 gen_mxu_q8madl(ctx
);
5096 case OPC_MXU_S32SFL
:
5097 gen_mxu_s32sfl(ctx
);
5106 gen_set_label(l_exit
);