target-mips/msa_helper.c

   1 /*
   2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
   3  *
   4  * Copyright (c) 2014 Imagination Technologies
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "cpu.h"
  21 #include "exec/helper-proto.h"
  22
  23 /* Data format min and max values */
  24 #define DF_BITS(df) (1 << ((df) + 3))
  25
  26 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
  27 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
  28
  29 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
  30 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
  31
  32 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
  33 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
  34
  35 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
  36 #define SIGNED(x, df)                                                   \
  37     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
  38
  39 /* Element-by-element access macros */
  40 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
  41
  42 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
  43 {
  44     uint32_t i;
  45
  46     for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
  47         pwd->d[i] = pws->d[i];
  48     }
  49 }
  50
  51 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
  52 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
  53         uint32_t i8)                                                    \
  54 {                                                                       \
  55     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
  56     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
  57     uint32_t i;                                                         \
  58     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
  59         DEST = OPERATION;                                               \
  60     }                                                                   \
  61 }
  62
  63 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
  64 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
  65 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
  66 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
  67
  68 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
  69             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
  70 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
  71         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
  72
  73 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
  74             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
  75 MSA_FN_IMM8(bmzi_b, pwd->b[i],
  76         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
  77
  78 #define BIT_SELECT(dest, arg1, arg2, df) \
  79             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
  80 MSA_FN_IMM8(bseli_b, pwd->b[i],
  81         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
  82
  83 #undef MSA_FN_IMM8
  84
  85 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
  86
  87 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
  88                        uint32_t ws, uint32_t imm)
  89 {
  90     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
  91     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
  92     wr_t wx, *pwx = &wx;
  93     uint32_t i;
  94
  95     switch (df) {
  96     case DF_BYTE:
  97         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
  98             pwx->b[i] = pws->b[SHF_POS(i, imm)];
  99         }
 100         break;
 101     case DF_HALF:
 102         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
 103             pwx->h[i] = pws->h[SHF_POS(i, imm)];
 104         }
 105         break;
 106     case DF_WORD:
 107         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
 108             pwx->w[i] = pws->w[SHF_POS(i, imm)];
 109         }
 110         break;
 111     default:
 112         assert(0);
 113     }
 114     msa_move_v(pwd, pwx);
 115 }
 116
 117 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
 118 {
 119     return arg1 + arg2;
 120 }
 121
 122 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
 123 {
 124     return arg1 - arg2;
 125 }
 126
 127 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
 128 {
 129     return arg1 == arg2 ? -1 : 0;
 130 }
 131
 132 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 133 {
 134     return arg1 <= arg2 ? -1 : 0;
 135 }
 136
 137 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 138 {
 139     uint64_t u_arg1 = UNSIGNED(arg1, df);
 140     uint64_t u_arg2 = UNSIGNED(arg2, df);
 141     return u_arg1 <= u_arg2 ? -1 : 0;
 142 }
 143
 144 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 145 {
 146     return arg1 < arg2 ? -1 : 0;
 147 }
 148
 149 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 150 {
 151     uint64_t u_arg1 = UNSIGNED(arg1, df);
 152     uint64_t u_arg2 = UNSIGNED(arg2, df);
 153     return u_arg1 < u_arg2 ? -1 : 0;
 154 }
 155
 156 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 157 {
 158     return arg1 > arg2 ? arg1 : arg2;
 159 }
 160
 161 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 162 {
 163     uint64_t u_arg1 = UNSIGNED(arg1, df);
 164     uint64_t u_arg2 = UNSIGNED(arg2, df);
 165     return u_arg1 > u_arg2 ? arg1 : arg2;
 166 }
 167
 168 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 169 {
 170     return arg1 < arg2 ? arg1 : arg2;
 171 }
 172
 173 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 174 {
 175     uint64_t u_arg1 = UNSIGNED(arg1, df);
 176     uint64_t u_arg2 = UNSIGNED(arg2, df);
 177     return u_arg1 < u_arg2 ? arg1 : arg2;
 178 }
 179
 180 #define MSA_BINOP_IMM_DF(helper, func)                                  \
 181 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
 182                         uint32_t wd, uint32_t ws, int32_t u5)           \
 183 {                                                                       \
 184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
 185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
 186     uint32_t i;                                                         \
 187                                                                         \
 188     switch (df) {                                                       \
 189     case DF_BYTE:                                                       \
 190         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
 191             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
 192         }                                                               \
 193         break;                                                          \
 194     case DF_HALF:                                                       \
 195         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
 196             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
 197         }                                                               \
 198         break;                                                          \
 199     case DF_WORD:                                                       \
 200         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
 201             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
 202         }                                                               \
 203         break;                                                          \
 204     case DF_DOUBLE:                                                     \
 205         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
 206             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
 207         }                                                               \
 208         break;                                                          \
 209     default:                                                            \
 210         assert(0);                                                      \
 211     }                                                                   \
 212 }
 213
 214 MSA_BINOP_IMM_DF(addvi, addv)
 215 MSA_BINOP_IMM_DF(subvi, subv)
 216 MSA_BINOP_IMM_DF(ceqi, ceq)
 217 MSA_BINOP_IMM_DF(clei_s, cle_s)
 218 MSA_BINOP_IMM_DF(clei_u, cle_u)
 219 MSA_BINOP_IMM_DF(clti_s, clt_s)
 220 MSA_BINOP_IMM_DF(clti_u, clt_u)
 221 MSA_BINOP_IMM_DF(maxi_s, max_s)
 222 MSA_BINOP_IMM_DF(maxi_u, max_u)
 223 MSA_BINOP_IMM_DF(mini_s, min_s)
 224 MSA_BINOP_IMM_DF(mini_u, min_u)
 225 #undef MSA_BINOP_IMM_DF
 226
 227 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 228                        int32_t s10)
 229 {
 230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 231     uint32_t i;
 232
 233     switch (df) {
 234     case DF_BYTE:
 235         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
 236             pwd->b[i] = (int8_t)s10;
 237         }
 238         break;
 239     case DF_HALF:
 240         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
 241             pwd->h[i] = (int16_t)s10;
 242         }
 243         break;
 244     case DF_WORD:
 245         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
 246             pwd->w[i] = (int32_t)s10;
 247         }
 248         break;
 249     case DF_DOUBLE:
 250         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
 251             pwd->d[i] = (int64_t)s10;
 252         }
 253        break;
 254     default:
 255         assert(0);
 256     }
 257 }
 258
 259 /* Data format bit position and unsigned values */
 260 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
 261
 262 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
 263 {
 264     int32_t b_arg2 = BIT_POSITION(arg2, df);
 265     return arg1 << b_arg2;
 266 }
 267
 268 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
 269 {
 270     int32_t b_arg2 = BIT_POSITION(arg2, df);
 271     return arg1 >> b_arg2;
 272 }
 273
 274 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
 275 {
 276     uint64_t u_arg1 = UNSIGNED(arg1, df);
 277     int32_t b_arg2 = BIT_POSITION(arg2, df);
 278     return u_arg1 >> b_arg2;
 279 }
 280
 281 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
 282 {
 283     int32_t b_arg2 = BIT_POSITION(arg2, df);
 284     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
 285 }
 286
 287 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
 288         int64_t arg2)
 289 {
 290     int32_t b_arg2 = BIT_POSITION(arg2, df);
 291     return UNSIGNED(arg1 | (1LL << b_arg2), df);
 292 }
 293
 294 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
 295 {
 296     int32_t b_arg2 = BIT_POSITION(arg2, df);
 297     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
 298 }
 299
 300 static inline int64_t msa_binsl_df(uint32_t df, int64_t dest, int64_t arg1,
 301                                    int64_t arg2)
 302 {
 303     uint64_t u_arg1 = UNSIGNED(arg1, df);
 304     uint64_t u_dest = UNSIGNED(dest, df);
 305     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
 306     int32_t sh_a = DF_BITS(df) - sh_d;
 307     if (sh_d == DF_BITS(df)) {
 308         return u_arg1;
 309     } else {
 310         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
 311                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
 312     }
 313 }
 314
 315 static inline int64_t msa_binsr_df(uint32_t df, int64_t dest, int64_t arg1,
 316                                    int64_t arg2)
 317 {
 318     uint64_t u_arg1 = UNSIGNED(arg1, df);
 319     uint64_t u_dest = UNSIGNED(dest, df);
 320     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
 321     int32_t sh_a = DF_BITS(df) - sh_d;
 322     if (sh_d == DF_BITS(df)) {
 323         return u_arg1;
 324     } else {
 325         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
 326                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
 327     }
 328 }
 329
 330 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
 331 {
 332     return arg < M_MIN_INT(m+1) ? M_MIN_INT(m+1) :
 333                                   arg > M_MAX_INT(m+1) ? M_MAX_INT(m+1) :
 334                                                          arg;
 335 }
 336
 337 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
 338 {
 339     uint64_t u_arg = UNSIGNED(arg, df);
 340     return  u_arg < M_MAX_UINT(m+1) ? u_arg :
 341                                       M_MAX_UINT(m+1);
 342 }
 343
 344 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
 345 {
 346     int32_t b_arg2 = BIT_POSITION(arg2, df);
 347     if (b_arg2 == 0) {
 348         return arg1;
 349     } else {
 350         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
 351         return (arg1 >> b_arg2) + r_bit;
 352     }
 353 }
 354
 355 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
 356 {
 357     uint64_t u_arg1 = UNSIGNED(arg1, df);
 358     int32_t b_arg2 = BIT_POSITION(arg2, df);
 359     if (b_arg2 == 0) {
 360         return u_arg1;
 361     } else {
 362         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
 363         return (u_arg1 >> b_arg2) + r_bit;
 364     }
 365 }
 366
 367 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
 368 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
 369                        uint32_t ws, uint32_t u5)                        \
 370 {                                                                       \
 371     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
 372     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
 373     uint32_t i;                                                         \
 374                                                                         \
 375     switch (df) {                                                       \
 376     case DF_BYTE:                                                       \
 377         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
 378             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
 379         }                                                               \
 380         break;                                                          \
 381     case DF_HALF:                                                       \
 382         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
 383             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
 384         }                                                               \
 385         break;                                                          \
 386     case DF_WORD:                                                       \
 387         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
 388             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
 389         }                                                               \
 390         break;                                                          \
 391     case DF_DOUBLE:                                                     \
 392         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
 393             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
 394         }                                                               \
 395         break;                                                          \
 396     default:                                                            \
 397         assert(0);                                                      \
 398     }                                                                   \
 399 }
 400
 401 MSA_BINOP_IMMU_DF(slli, sll)
 402 MSA_BINOP_IMMU_DF(srai, sra)
 403 MSA_BINOP_IMMU_DF(srli, srl)
 404 MSA_BINOP_IMMU_DF(bclri, bclr)
 405 MSA_BINOP_IMMU_DF(bseti, bset)
 406 MSA_BINOP_IMMU_DF(bnegi, bneg)
 407 MSA_BINOP_IMMU_DF(sat_s, sat_s)
 408 MSA_BINOP_IMMU_DF(sat_u, sat_u)
 409 MSA_BINOP_IMMU_DF(srari, srar)
 410 MSA_BINOP_IMMU_DF(srlri, srlr)
 411 #undef MSA_BINOP_IMMU_DF
 412
 413 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
 414 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
 415                                   uint32_t wd, uint32_t ws, uint32_t u5) \
 416 {                                                                       \
 417     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
 418     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
 419     uint32_t i;                                                         \
 420                                                                         \
 421     switch (df) {                                                       \
 422     case DF_BYTE:                                                       \
 423         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
 424             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
 425                                             u5);                        \
 426         }                                                               \
 427         break;                                                          \
 428     case DF_HALF:                                                       \
 429         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
 430             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
 431                                             u5);                        \
 432         }                                                               \
 433         break;                                                          \
 434     case DF_WORD:                                                       \
 435         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
 436             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
 437                                             u5);                        \
 438         }                                                               \
 439         break;                                                          \
 440     case DF_DOUBLE:                                                     \
 441         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
 442             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
 443                                             u5);                        \
 444         }                                                               \
 445         break;                                                          \
 446     default:                                                            \
 447         assert(0);                                                      \
 448     }                                                                   \
 449 }
 450
 451 MSA_TEROP_IMMU_DF(binsli, binsl)
 452 MSA_TEROP_IMMU_DF(binsri, binsr)
 453 #undef MSA_TEROP_IMMU_DF
 454
 455 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 456 {
 457     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 458     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 459     return abs_arg1 > abs_arg2 ? arg1 : arg2;
 460 }
 461
 462 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 463 {
 464     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 465     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 466     return abs_arg1 < abs_arg2 ? arg1 : arg2;
 467 }
 468
 469 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 470 {
 471     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 472     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 473     return abs_arg1 + abs_arg2;
 474 }
 475
 476 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 477 {
 478     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
 479     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 480     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 481     if (abs_arg1 > max_int || abs_arg2 > max_int) {
 482         return (int64_t)max_int;
 483     } else {
 484         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
 485     }
 486 }
 487
 488 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 489 {
 490     int64_t max_int = DF_MAX_INT(df);
 491     int64_t min_int = DF_MIN_INT(df);
 492     if (arg1 < 0) {
 493         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
 494     } else {
 495         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
 496     }
 497 }
 498
 499 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
 500 {
 501     uint64_t max_uint = DF_MAX_UINT(df);
 502     uint64_t u_arg1 = UNSIGNED(arg1, df);
 503     uint64_t u_arg2 = UNSIGNED(arg2, df);
 504     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
 505 }
 506
 507 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 508 {
 509     /* signed shift */
 510     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
 511 }
 512
 513 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
 514 {
 515     uint64_t u_arg1 = UNSIGNED(arg1, df);
 516     uint64_t u_arg2 = UNSIGNED(arg2, df);
 517     /* unsigned shift */
 518     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
 519 }
 520
 521 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 522 {
 523     /* signed shift */
 524     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
 525 }
 526
 527 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
 528 {
 529     uint64_t u_arg1 = UNSIGNED(arg1, df);
 530     uint64_t u_arg2 = UNSIGNED(arg2, df);
 531     /* unsigned shift */
 532     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
 533 }
 534
 535 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 536 {
 537     int64_t max_int = DF_MAX_INT(df);
 538     int64_t min_int = DF_MIN_INT(df);
 539     if (arg2 > 0) {
 540         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
 541     } else {
 542         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
 543     }
 544 }
 545
 546 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 547 {
 548     uint64_t u_arg1 = UNSIGNED(arg1, df);
 549     uint64_t u_arg2 = UNSIGNED(arg2, df);
 550     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
 551 }
 552
 553 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 554 {
 555     uint64_t u_arg1 = UNSIGNED(arg1, df);
 556     uint64_t max_uint = DF_MAX_UINT(df);
 557     if (arg2 >= 0) {
 558         uint64_t u_arg2 = (uint64_t)arg2;
 559         return (u_arg1 > u_arg2) ?
 560             (int64_t)(u_arg1 - u_arg2) :
 561             0;
 562     } else {
 563         uint64_t u_arg2 = (uint64_t)(-arg2);
 564         return (u_arg1 < max_uint - u_arg2) ?
 565             (int64_t)(u_arg1 + u_arg2) :
 566             (int64_t)max_uint;
 567     }
 568 }
 569
 570 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 571 {
 572     uint64_t u_arg1 = UNSIGNED(arg1, df);
 573     uint64_t u_arg2 = UNSIGNED(arg2, df);
 574     int64_t max_int = DF_MAX_INT(df);
 575     int64_t min_int = DF_MIN_INT(df);
 576     if (u_arg1 > u_arg2) {
 577         return u_arg1 - u_arg2 < (uint64_t)max_int ?
 578             (int64_t)(u_arg1 - u_arg2) :
 579             max_int;
 580     } else {
 581         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
 582             (int64_t)(u_arg1 - u_arg2) :
 583             min_int;
 584     }
 585 }
 586
 587 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 588 {
 589     /* signed compare */
 590     return (arg1 < arg2) ?
 591         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
 592 }
 593
 594 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
 595 {
 596     uint64_t u_arg1 = UNSIGNED(arg1, df);
 597     uint64_t u_arg2 = UNSIGNED(arg2, df);
 598     /* unsigned compare */
 599     return (u_arg1 < u_arg2) ?
 600         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
 601 }
 602
 603 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
 604 {
 605     return arg1 * arg2;
 606 }
 607
 608 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 609 {
 610     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
 611         return DF_MIN_INT(df);
 612     }
 613     return arg2 ? arg1 / arg2 : 0;
 614 }
 615
 616 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 617 {
 618     uint64_t u_arg1 = UNSIGNED(arg1, df);
 619     uint64_t u_arg2 = UNSIGNED(arg2, df);
 620     return u_arg2 ? u_arg1 / u_arg2 : 0;
 621 }
 622
 623 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 624 {
 625     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
 626         return 0;
 627     }
 628     return arg2 ? arg1 % arg2 : 0;
 629 }
 630
 631 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 632 {
 633     uint64_t u_arg1 = UNSIGNED(arg1, df);
 634     uint64_t u_arg2 = UNSIGNED(arg2, df);
 635     return u_arg2 ? u_arg1 % u_arg2 : 0;
 636 }
 637
 638 #define SIGNED_EVEN(a, df) \
 639         ((((int64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
 640
 641 #define UNSIGNED_EVEN(a, df) \
 642         ((((uint64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
 643
 644 #define SIGNED_ODD(a, df) \
 645         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
 646
 647 #define UNSIGNED_ODD(a, df) \
 648         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
 649
 650 #define SIGNED_EXTRACT(e, o, a, df)     \
 651     do {                                \
 652         e = SIGNED_EVEN(a, df);         \
 653         o = SIGNED_ODD(a, df);          \
 654     } while (0);
 655
 656 #define UNSIGNED_EXTRACT(e, o, a, df)   \
 657     do {                                \
 658         e = UNSIGNED_EVEN(a, df);       \
 659         o = UNSIGNED_ODD(a, df);        \
 660     } while (0);
 661
 662 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 663 {
 664     int64_t even_arg1;
 665     int64_t even_arg2;
 666     int64_t odd_arg1;
 667     int64_t odd_arg2;
 668     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 669     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 670     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
 671 }
 672
 673 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 674 {
 675     int64_t even_arg1;
 676     int64_t even_arg2;
 677     int64_t odd_arg1;
 678     int64_t odd_arg2;
 679     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 680     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 681     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
 682 }
 683
 684 #define CONCATENATE_AND_SLIDE(s, k)             \
 685     do {                                        \
 686         for (i = 0; i < s; i++) {               \
 687             v[i]     = pws->b[s * k + i];       \
 688             v[i + s] = pwd->b[s * k + i];       \
 689         }                                       \
 690         for (i = 0; i < s; i++) {               \
 691             pwd->b[s * k + i] = v[i + n];       \
 692         }                                       \
 693     } while (0)
 694
 695 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
 696                               wr_t *pws, target_ulong rt)
 697 {
 698     uint32_t n = rt % DF_ELEMENTS(df);
 699     uint8_t v[64];
 700     uint32_t i, k;
 701
 702     switch (df) {
 703     case DF_BYTE:
 704         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
 705         break;
 706     case DF_HALF:
 707         for (k = 0; k < 2; k++) {
 708             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
 709         }
 710         break;
 711     case DF_WORD:
 712         for (k = 0; k < 4; k++) {
 713             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
 714         }
 715         break;
 716     case DF_DOUBLE:
 717         for (k = 0; k < 8; k++) {
 718             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
 719         }
 720         break;
 721     default:
 722         assert(0);
 723     }
 724 }
 725
 726 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 727 {
 728     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
 729 }
 730
 731 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 732 {
 733     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
 734 }
 735
 736 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 737 {
 738     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
 739 }
 740
 741 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 742 {
 743     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
 744 }
 745
 746 #define MSA_BINOP_DF(func) \
 747 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
 748                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
 749 {                                                                       \
 750     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
 751     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
 752     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
 753     uint32_t i;                                                         \
 754                                                                         \
 755     switch (df) {                                                       \
 756     case DF_BYTE:                                                       \
 757         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
 758             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], pwt->b[i]);  \
 759         }                                                               \
 760         break;                                                          \
 761     case DF_HALF:                                                       \
 762         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
 763             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], pwt->h[i]);  \
 764         }                                                               \
 765         break;                                                          \
 766     case DF_WORD:                                                       \
 767         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
 768             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], pwt->w[i]);  \
 769         }                                                               \
 770         break;                                                          \
 771     case DF_DOUBLE:                                                     \
 772         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
 773             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], pwt->d[i]);  \
 774         }                                                               \
 775         break;                                                          \
 776     default:                                                            \
 777         assert(0);                                                      \
 778     }                                                                   \
 779 }
 780
 781 MSA_BINOP_DF(sll)
 782 MSA_BINOP_DF(sra)
 783 MSA_BINOP_DF(srl)
 784 MSA_BINOP_DF(bclr)
 785 MSA_BINOP_DF(bset)
 786 MSA_BINOP_DF(bneg)
 787 MSA_BINOP_DF(addv)
 788 MSA_BINOP_DF(subv)
 789 MSA_BINOP_DF(max_s)
 790 MSA_BINOP_DF(max_u)
 791 MSA_BINOP_DF(min_s)
 792 MSA_BINOP_DF(min_u)
 793 MSA_BINOP_DF(max_a)
 794 MSA_BINOP_DF(min_a)
 795 MSA_BINOP_DF(ceq)
 796 MSA_BINOP_DF(clt_s)
 797 MSA_BINOP_DF(clt_u)
 798 MSA_BINOP_DF(cle_s)
 799 MSA_BINOP_DF(cle_u)
 800 MSA_BINOP_DF(add_a)
 801 MSA_BINOP_DF(adds_a)
 802 MSA_BINOP_DF(adds_s)
 803 MSA_BINOP_DF(adds_u)
 804 MSA_BINOP_DF(ave_s)
 805 MSA_BINOP_DF(ave_u)
 806 MSA_BINOP_DF(aver_s)
 807 MSA_BINOP_DF(aver_u)
 808 MSA_BINOP_DF(subs_s)
 809 MSA_BINOP_DF(subs_u)
 810 MSA_BINOP_DF(subsus_u)
 811 MSA_BINOP_DF(subsuu_s)
 812 MSA_BINOP_DF(asub_s)
 813 MSA_BINOP_DF(asub_u)
 814 MSA_BINOP_DF(mulv)
 815 MSA_BINOP_DF(div_s)
 816 MSA_BINOP_DF(div_u)
 817 MSA_BINOP_DF(mod_s)
 818 MSA_BINOP_DF(mod_u)
 819 MSA_BINOP_DF(dotp_s)
 820 MSA_BINOP_DF(dotp_u)
 821 MSA_BINOP_DF(srar)
 822 MSA_BINOP_DF(srlr)
 823 MSA_BINOP_DF(hadd_s)
 824 MSA_BINOP_DF(hadd_u)
 825 MSA_BINOP_DF(hsub_s)
 826 MSA_BINOP_DF(hsub_u)
 827 #undef MSA_BINOP_DF
 828
 829 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 830                        uint32_t ws, uint32_t rt)
 831 {
 832     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 833     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 834
 835     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
 836 }
 837
 838 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
 839                                    int64_t arg2)
 840 {
 841     return dest + arg1 * arg2;
 842 }
 843
 844 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
 845                                    int64_t arg2)
 846 {
 847     return dest - arg1 * arg2;
 848 }
 849
 850 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
 851                                      int64_t arg2)
 852 {
 853     int64_t even_arg1;
 854     int64_t even_arg2;
 855     int64_t odd_arg1;
 856     int64_t odd_arg2;
 857     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 858     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 859     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
 860 }
 861
 862 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
 863                                      int64_t arg2)
 864 {
 865     int64_t even_arg1;
 866     int64_t even_arg2;
 867     int64_t odd_arg1;
 868     int64_t odd_arg2;
 869     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 870     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 871     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
 872 }
 873
 874 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
 875                                      int64_t arg2)
 876 {
 877     int64_t even_arg1;
 878     int64_t even_arg2;
 879     int64_t odd_arg1;
 880     int64_t odd_arg2;
 881     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 882     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 883     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
 884 }
 885
 886 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
 887                                      int64_t arg2)
 888 {
 889     int64_t even_arg1;
 890     int64_t even_arg2;
 891     int64_t odd_arg1;
 892     int64_t odd_arg2;
 893     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
 894     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
 895     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
 896 }
 897
 898 #define MSA_TEROP_DF(func) \
 899 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,   \
 900                           uint32_t ws, uint32_t wt)                     \
 901 {                                                                       \
 902     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
 903     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
 904     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
 905     uint32_t i;                                                         \
 906                                                                         \
 907     switch (df) {                                                       \
 908     case DF_BYTE:                                                       \
 909         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
 910             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
 911                                             pwt->b[i]);                 \
 912         }                                                               \
 913         break;                                                          \
 914     case DF_HALF:                                                       \
 915         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
 916             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
 917                                             pwt->h[i]);                 \
 918         }                                                               \
 919         break;                                                          \
 920     case DF_WORD:                                                       \
 921         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
 922             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
 923                                             pwt->w[i]);                 \
 924         }                                                               \
 925         break;                                                          \
 926     case DF_DOUBLE:                                                     \
 927         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
 928             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
 929                                             pwt->d[i]);                 \
 930         }                                                               \
 931         break;                                                          \
 932     default:                                                            \
 933         assert(0);                                                      \
 934     }                                                                   \
 935 }
 936
 937 MSA_TEROP_DF(maddv)
 938 MSA_TEROP_DF(msubv)
 939 MSA_TEROP_DF(dpadd_s)
 940 MSA_TEROP_DF(dpadd_u)
 941 MSA_TEROP_DF(dpsub_s)
 942 MSA_TEROP_DF(dpsub_u)
 943 MSA_TEROP_DF(binsl)
 944 MSA_TEROP_DF(binsr)
 945 #undef MSA_TEROP_DF
 946
 947 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
 948                                 wr_t *pws, target_ulong rt)
 949 {
 950     uint32_t n = rt % DF_ELEMENTS(df);
 951     uint32_t i;
 952
 953     switch (df) {
 954     case DF_BYTE:
 955         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
 956             pwd->b[i] = pws->b[n];
 957         }
 958         break;
 959     case DF_HALF:
 960         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
 961             pwd->h[i] = pws->h[n];
 962         }
 963         break;
 964     case DF_WORD:
 965         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
 966             pwd->w[i] = pws->w[n];
 967         }
 968         break;
 969     case DF_DOUBLE:
 970         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
 971             pwd->d[i] = pws->d[n];
 972         }
 973        break;
 974     default:
 975         assert(0);
 976     }
 977 }
 978
 979 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 980                          uint32_t ws, uint32_t rt)
 981 {
 982     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 983     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 984
 985     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
 986 }
 987
 988 #define MSA_DO_B MSA_DO(b)
 989 #define MSA_DO_H MSA_DO(h)
 990 #define MSA_DO_W MSA_DO(w)
 991 #define MSA_DO_D MSA_DO(d)
 992
 993 #define MSA_LOOP_B MSA_LOOP(B)
 994 #define MSA_LOOP_H MSA_LOOP(H)
 995 #define MSA_LOOP_W MSA_LOOP(W)
 996 #define MSA_LOOP_D MSA_LOOP(D)
 997
 998 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
 999 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
1000 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
1001 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
1002
1003 #define MSA_LOOP(DF) \
1004         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
1005             MSA_DO_ ## DF \
1006         }
1007
1008 #define MSA_FN_DF(FUNC)                                             \
1009 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
1010         uint32_t ws, uint32_t wt)                                   \
1011 {                                                                   \
1012     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
1013     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
1014     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
1015     wr_t wx, *pwx = &wx;                                            \
1016     uint32_t i;                                                     \
1017     switch (df) {                                                   \
1018     case DF_BYTE:                                                   \
1019         MSA_LOOP_B                                                  \
1020         break;                                                      \
1021     case DF_HALF:                                                   \
1022         MSA_LOOP_H                                                  \
1023         break;                                                      \
1024     case DF_WORD:                                                   \
1025         MSA_LOOP_W                                                  \
1026         break;                                                      \
1027     case DF_DOUBLE:                                                 \
1028         MSA_LOOP_D                                                  \
1029        break;                                                       \
1030     default:                                                        \
1031         assert(0);                                                  \
1032     }                                                               \
1033     msa_move_v(pwd, pwx);                                           \
1034 }
1035
1036 #define MSA_LOOP_COND(DF) \
1037             (DF_ELEMENTS(DF) / 2)
1038
1039 #define Rb(pwr, i) (pwr->b[i])
1040 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE)/2])
1041 #define Rh(pwr, i) (pwr->h[i])
1042 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF)/2])
1043 #define Rw(pwr, i) (pwr->w[i])
1044 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD)/2])
1045 #define Rd(pwr, i) (pwr->d[i])
1046 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE)/2])
1047
1048 #define MSA_DO(DF)                      \
1049     do {                                \
1050         R##DF(pwx, i) = pwt->DF[2*i];   \
1051         L##DF(pwx, i) = pws->DF[2*i];   \
1052     } while (0);
1053 MSA_FN_DF(pckev_df)
1054 #undef MSA_DO
1055
1056 #define MSA_DO(DF)                      \
1057     do {                                \
1058         R##DF(pwx, i) = pwt->DF[2*i+1]; \
1059         L##DF(pwx, i) = pws->DF[2*i+1]; \
1060     } while (0);
1061 MSA_FN_DF(pckod_df)
1062 #undef MSA_DO
1063
1064 #define MSA_DO(DF)                      \
1065     do {                                \
1066         pwx->DF[2*i]   = L##DF(pwt, i); \
1067         pwx->DF[2*i+1] = L##DF(pws, i); \
1068     } while (0);
1069 MSA_FN_DF(ilvl_df)
1070 #undef MSA_DO
1071
1072 #define MSA_DO(DF)                      \
1073     do {                                \
1074         pwx->DF[2*i]   = R##DF(pwt, i); \
1075         pwx->DF[2*i+1] = R##DF(pws, i); \
1076     } while (0);
1077 MSA_FN_DF(ilvr_df)
1078 #undef MSA_DO
1079
1080 #define MSA_DO(DF)                      \
1081     do {                                \
1082         pwx->DF[2*i]   = pwt->DF[2*i];  \
1083         pwx->DF[2*i+1] = pws->DF[2*i];  \
1084     } while (0);
1085 MSA_FN_DF(ilvev_df)
1086 #undef MSA_DO
1087
1088 #define MSA_DO(DF)                          \
1089     do {                                    \
1090         pwx->DF[2*i]   = pwt->DF[2*i+1];    \
1091         pwx->DF[2*i+1] = pws->DF[2*i+1];    \
1092     } while (0);
1093 MSA_FN_DF(ilvod_df)
1094 #undef MSA_DO
1095 #undef MSA_LOOP_COND
1096
1097 #define MSA_LOOP_COND(DF) \
1098             (DF_ELEMENTS(DF))
1099
1100 #define MSA_DO(DF)                                                          \
1101     do {                                                                    \
1102         uint32_t n = DF_ELEMENTS(df);                                       \
1103         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
1104         pwx->DF[i] =                                                        \
1105             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
1106     } while (0);
1107 MSA_FN_DF(vshf_df)
1108 #undef MSA_DO
1109 #undef MSA_LOOP_COND
1110 #undef MSA_FN_DF