target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include <stdarg.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23 #include <inttypes.h>
  24
  25 #include "cpu.h"
  26 #include "tcg-op.h"
  27 #include "qemu/log.h"
  28 #include "translate.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/gen-icount.h"
  32
  33 #include "helper.h"
  34 #define GEN_HELPER 1
  35 #include "helper.h"
  36
  37 static TCGv_i64 cpu_X[32];
  38 static TCGv_i64 cpu_pc;
  39 static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
  40
  41 /* Load/store exclusive handling */
  42 static TCGv_i64 cpu_exclusive_addr;
  43 static TCGv_i64 cpu_exclusive_val;
  44 static TCGv_i64 cpu_exclusive_high;
  45 #ifdef CONFIG_USER_ONLY
  46 static TCGv_i64 cpu_exclusive_test;
  47 static TCGv_i32 cpu_exclusive_info;
  48 #endif
  49
  50 static const char *regnames[] = {
  51     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  52     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  53     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  54     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  55 };
  56
  57 enum a64_shift_type {
  58     A64_SHIFT_TYPE_LSL = 0,
  59     A64_SHIFT_TYPE_LSR = 1,
  60     A64_SHIFT_TYPE_ASR = 2,
  61     A64_SHIFT_TYPE_ROR = 3
  62 };
  63
  64 /* initialize TCG globals.  */
  65 void a64_translate_init(void)
  66 {
  67     int i;
  68
  69     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  70                                     offsetof(CPUARMState, pc),
  71                                     "pc");
  72     for (i = 0; i < 32; i++) {
  73         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  74                                           offsetof(CPUARMState, xregs[i]),
  75                                           regnames[i]);
  76     }
  77
  78     cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
  79     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
  80     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
  81     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
  82
  83     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
  84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  85     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
  86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
  87     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
  88         offsetof(CPUARMState, exclusive_high), "exclusive_high");
  89 #ifdef CONFIG_USER_ONLY
  90     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
  91         offsetof(CPUARMState, exclusive_test), "exclusive_test");
  92     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
  93         offsetof(CPUARMState, exclusive_info), "exclusive_info");
  94 #endif
  95 }
  96
  97 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
  98                             fprintf_function cpu_fprintf, int flags)
  99 {
 100     ARMCPU *cpu = ARM_CPU(cs);
 101     CPUARMState *env = &cpu->env;
 102     uint32_t psr = pstate_read(env);
 103     int i;
 104
 105     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 106             env->pc, env->xregs[31]);
 107     for (i = 0; i < 31; i++) {
 108         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 109         if ((i % 4) == 3) {
 110             cpu_fprintf(f, "\n");
 111         } else {
 112             cpu_fprintf(f, " ");
 113         }
 114     }
 115     cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
 116                 psr,
 117                 psr & PSTATE_N ? 'N' : '-',
 118                 psr & PSTATE_Z ? 'Z' : '-',
 119                 psr & PSTATE_C ? 'C' : '-',
 120                 psr & PSTATE_V ? 'V' : '-');
 121     cpu_fprintf(f, "\n");
 122
 123     if (flags & CPU_DUMP_FPU) {
 124         int numvfpregs = 32;
 125         for (i = 0; i < numvfpregs; i += 2) {
 126             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 127             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 128             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 129                         i, vhi, vlo);
 130             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 131             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 132             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 133                         i + 1, vhi, vlo);
 134         }
 135         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 136                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 137     }
 138 }
 139
 140 static int get_mem_index(DisasContext *s)
 141 {
 142 #ifdef CONFIG_USER_ONLY
 143     return 1;
 144 #else
 145     return s->user;
 146 #endif
 147 }
 148
 149 void gen_a64_set_pc_im(uint64_t val)
 150 {
 151     tcg_gen_movi_i64(cpu_pc, val);
 152 }
 153
 154 static void gen_exception(int excp)
 155 {
 156     TCGv_i32 tmp = tcg_temp_new_i32();
 157     tcg_gen_movi_i32(tmp, excp);
 158     gen_helper_exception(cpu_env, tmp);
 159     tcg_temp_free_i32(tmp);
 160 }
 161
 162 static void gen_exception_insn(DisasContext *s, int offset, int excp)
 163 {
 164     gen_a64_set_pc_im(s->pc - offset);
 165     gen_exception(excp);
 166     s->is_jmp = DISAS_EXC;
 167 }
 168
 169 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 170 {
 171     /* No direct tb linking with singlestep or deterministic io */
 172     if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
 173         return false;
 174     }
 175
 176     /* Only link tbs from inside the same guest page */
 177     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 178         return false;
 179     }
 180
 181     return true;
 182 }
 183
 184 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 185 {
 186     TranslationBlock *tb;
 187
 188     tb = s->tb;
 189     if (use_goto_tb(s, n, dest)) {
 190         tcg_gen_goto_tb(n);
 191         gen_a64_set_pc_im(dest);
 192         tcg_gen_exit_tb((tcg_target_long)tb + n);
 193         s->is_jmp = DISAS_TB_JUMP;
 194     } else {
 195         gen_a64_set_pc_im(dest);
 196         if (s->singlestep_enabled) {
 197             gen_exception(EXCP_DEBUG);
 198         }
 199         tcg_gen_exit_tb(0);
 200         s->is_jmp = DISAS_JUMP;
 201     }
 202 }
 203
 204 static void unallocated_encoding(DisasContext *s)
 205 {
 206     gen_exception_insn(s, 4, EXCP_UDEF);
 207 }
 208
 209 #define unsupported_encoding(s, insn)                                    \
 210     do {                                                                 \
 211         qemu_log_mask(LOG_UNIMP,                                         \
 212                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 213                       "at pc=%016" PRIx64 "\n",                          \
 214                       __FILE__, __LINE__, insn, s->pc - 4);              \
 215         unallocated_encoding(s);                                         \
 216     } while (0);
 217
 218 static void init_tmp_a64_array(DisasContext *s)
 219 {
 220 #ifdef CONFIG_DEBUG_TCG
 221     int i;
 222     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 223         TCGV_UNUSED_I64(s->tmp_a64[i]);
 224     }
 225 #endif
 226     s->tmp_a64_count = 0;
 227 }
 228
 229 static void free_tmp_a64(DisasContext *s)
 230 {
 231     int i;
 232     for (i = 0; i < s->tmp_a64_count; i++) {
 233         tcg_temp_free_i64(s->tmp_a64[i]);
 234     }
 235     init_tmp_a64_array(s);
 236 }
 237
 238 static TCGv_i64 new_tmp_a64(DisasContext *s)
 239 {
 240     assert(s->tmp_a64_count < TMP_A64_MAX);
 241     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 242 }
 243
 244 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 245 {
 246     TCGv_i64 t = new_tmp_a64(s);
 247     tcg_gen_movi_i64(t, 0);
 248     return t;
 249 }
 250
 251 /*
 252  * Register access functions
 253  *
 254  * These functions are used for directly accessing a register in where
 255  * changes to the final register value are likely to be made. If you
 256  * need to use a register for temporary calculation (e.g. index type
 257  * operations) use the read_* form.
 258  *
 259  * B1.2.1 Register mappings
 260  *
 261  * In instruction register encoding 31 can refer to ZR (zero register) or
 262  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 263  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 264  * This is the point of the _sp forms.
 265  */
 266 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 267 {
 268     if (reg == 31) {
 269         return new_tmp_a64_zero(s);
 270     } else {
 271         return cpu_X[reg];
 272     }
 273 }
 274
 275 /* register access for when 31 == SP */
 276 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 277 {
 278     return cpu_X[reg];
 279 }
 280
 281 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 282  * representing the register contents. This TCGv is an auto-freed
 283  * temporary so it need not be explicitly freed, and may be modified.
 284  */
 285 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 286 {
 287     TCGv_i64 v = new_tmp_a64(s);
 288     if (reg != 31) {
 289         if (sf) {
 290             tcg_gen_mov_i64(v, cpu_X[reg]);
 291         } else {
 292             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 293         }
 294     } else {
 295         tcg_gen_movi_i64(v, 0);
 296     }
 297     return v;
 298 }
 299
 300 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 301 {
 302     TCGv_i64 v = new_tmp_a64(s);
 303     if (sf) {
 304         tcg_gen_mov_i64(v, cpu_X[reg]);
 305     } else {
 306         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 307     }
 308     return v;
 309 }
 310
 311 /* Return the offset into CPUARMState of an element of specified
 312  * size, 'element' places in from the least significant end of
 313  * the FP/vector register Qn.
 314  */
 315 static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
 316 {
 317     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 318 #ifdef HOST_WORDS_BIGENDIAN
 319     /* This is complicated slightly because vfp.regs[2n] is
 320      * still the low half and  vfp.regs[2n+1] the high half
 321      * of the 128 bit vector, even on big endian systems.
 322      * Calculate the offset assuming a fully bigendian 128 bits,
 323      * then XOR to account for the order of the two 64 bit halves.
 324      */
 325     offs += (16 - ((element + 1) * (1 << size)));
 326     offs ^= 8;
 327 #else
 328     offs += element * (1 << size);
 329 #endif
 330     return offs;
 331 }
 332
 333 /* Return the offset into CPUARMState of a slice (from
 334  * the least significant end) of FP register Qn (ie
 335  * Dn, Sn, Hn or Bn).
 336  * (Note that this is not the same mapping as for A32; see cpu.h)
 337  */
 338 static inline int fp_reg_offset(int regno, TCGMemOp size)
 339 {
 340     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 341 #ifdef HOST_WORDS_BIGENDIAN
 342     offs += (8 - (1 << size));
 343 #endif
 344     return offs;
 345 }
 346
 347 /* Offset of the high half of the 128 bit vector Qn */
 348 static inline int fp_reg_hi_offset(int regno)
 349 {
 350     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 351 }
 352
 353 /* Convenience accessors for reading and writing single and double
 354  * FP registers. Writing clears the upper parts of the associated
 355  * 128 bit vector register, as required by the architecture.
 356  * Note that unlike the GP register accessors, the values returned
 357  * by the read functions must be manually freed.
 358  */
 359 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 360 {
 361     TCGv_i64 v = tcg_temp_new_i64();
 362
 363     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
 364     return v;
 365 }
 366
 367 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 368 {
 369     TCGv_i32 v = tcg_temp_new_i32();
 370
 371     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
 372     return v;
 373 }
 374
 375 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 376 {
 377     TCGv_i64 tcg_zero = tcg_const_i64(0);
 378
 379     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
 380     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
 381     tcg_temp_free_i64(tcg_zero);
 382 }
 383
 384 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 385 {
 386     TCGv_i64 tmp = tcg_temp_new_i64();
 387
 388     tcg_gen_extu_i32_i64(tmp, v);
 389     write_fp_dreg(s, reg, tmp);
 390     tcg_temp_free_i64(tmp);
 391 }
 392
 393 static TCGv_ptr get_fpstatus_ptr(void)
 394 {
 395     TCGv_ptr statusptr = tcg_temp_new_ptr();
 396     int offset;
 397
 398     /* In A64 all instructions (both FP and Neon) use the FPCR;
 399      * there is no equivalent of the A32 Neon "standard FPSCR value"
 400      * and all operations use vfp.fp_status.
 401      */
 402     offset = offsetof(CPUARMState, vfp.fp_status);
 403     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 404     return statusptr;
 405 }
 406
 407 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 408  * than the 32 bit equivalent.
 409  */
 410 static inline void gen_set_NZ64(TCGv_i64 result)
 411 {
 412     TCGv_i64 flag = tcg_temp_new_i64();
 413
 414     tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
 415     tcg_gen_trunc_i64_i32(cpu_ZF, flag);
 416     tcg_gen_shri_i64(flag, result, 32);
 417     tcg_gen_trunc_i64_i32(cpu_NF, flag);
 418     tcg_temp_free_i64(flag);
 419 }
 420
 421 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 422 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 423 {
 424     if (sf) {
 425         gen_set_NZ64(result);
 426     } else {
 427         tcg_gen_trunc_i64_i32(cpu_ZF, result);
 428         tcg_gen_trunc_i64_i32(cpu_NF, result);
 429     }
 430     tcg_gen_movi_i32(cpu_CF, 0);
 431     tcg_gen_movi_i32(cpu_VF, 0);
 432 }
 433
 434 /* dest = T0 + T1; compute C, N, V and Z flags */
 435 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 436 {
 437     if (sf) {
 438         TCGv_i64 result, flag, tmp;
 439         result = tcg_temp_new_i64();
 440         flag = tcg_temp_new_i64();
 441         tmp = tcg_temp_new_i64();
 442
 443         tcg_gen_movi_i64(tmp, 0);
 444         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 445
 446         tcg_gen_trunc_i64_i32(cpu_CF, flag);
 447
 448         gen_set_NZ64(result);
 449
 450         tcg_gen_xor_i64(flag, result, t0);
 451         tcg_gen_xor_i64(tmp, t0, t1);
 452         tcg_gen_andc_i64(flag, flag, tmp);
 453         tcg_temp_free_i64(tmp);
 454         tcg_gen_shri_i64(flag, flag, 32);
 455         tcg_gen_trunc_i64_i32(cpu_VF, flag);
 456
 457         tcg_gen_mov_i64(dest, result);
 458         tcg_temp_free_i64(result);
 459         tcg_temp_free_i64(flag);
 460     } else {
 461         /* 32 bit arithmetic */
 462         TCGv_i32 t0_32 = tcg_temp_new_i32();
 463         TCGv_i32 t1_32 = tcg_temp_new_i32();
 464         TCGv_i32 tmp = tcg_temp_new_i32();
 465
 466         tcg_gen_movi_i32(tmp, 0);
 467         tcg_gen_trunc_i64_i32(t0_32, t0);
 468         tcg_gen_trunc_i64_i32(t1_32, t1);
 469         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 470         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 472         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 473         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474         tcg_gen_extu_i32_i64(dest, cpu_NF);
 475
 476         tcg_temp_free_i32(tmp);
 477         tcg_temp_free_i32(t0_32);
 478         tcg_temp_free_i32(t1_32);
 479     }
 480 }
 481
 482 /* dest = T0 - T1; compute C, N, V and Z flags */
 483 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 484 {
 485     if (sf) {
 486         /* 64 bit arithmetic */
 487         TCGv_i64 result, flag, tmp;
 488
 489         result = tcg_temp_new_i64();
 490         flag = tcg_temp_new_i64();
 491         tcg_gen_sub_i64(result, t0, t1);
 492
 493         gen_set_NZ64(result);
 494
 495         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 496         tcg_gen_trunc_i64_i32(cpu_CF, flag);
 497
 498         tcg_gen_xor_i64(flag, result, t0);
 499         tmp = tcg_temp_new_i64();
 500         tcg_gen_xor_i64(tmp, t0, t1);
 501         tcg_gen_and_i64(flag, flag, tmp);
 502         tcg_temp_free_i64(tmp);
 503         tcg_gen_shri_i64(flag, flag, 32);
 504         tcg_gen_trunc_i64_i32(cpu_VF, flag);
 505         tcg_gen_mov_i64(dest, result);
 506         tcg_temp_free_i64(flag);
 507         tcg_temp_free_i64(result);
 508     } else {
 509         /* 32 bit arithmetic */
 510         TCGv_i32 t0_32 = tcg_temp_new_i32();
 511         TCGv_i32 t1_32 = tcg_temp_new_i32();
 512         TCGv_i32 tmp;
 513
 514         tcg_gen_trunc_i64_i32(t0_32, t0);
 515         tcg_gen_trunc_i64_i32(t1_32, t1);
 516         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 517         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 518         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 519         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 520         tmp = tcg_temp_new_i32();
 521         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 522         tcg_temp_free_i32(t0_32);
 523         tcg_temp_free_i32(t1_32);
 524         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 525         tcg_temp_free_i32(tmp);
 526         tcg_gen_extu_i32_i64(dest, cpu_NF);
 527     }
 528 }
 529
 530 /* dest = T0 + T1 + CF; do not compute flags. */
 531 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 532 {
 533     TCGv_i64 flag = tcg_temp_new_i64();
 534     tcg_gen_extu_i32_i64(flag, cpu_CF);
 535     tcg_gen_add_i64(dest, t0, t1);
 536     tcg_gen_add_i64(dest, dest, flag);
 537     tcg_temp_free_i64(flag);
 538
 539     if (!sf) {
 540         tcg_gen_ext32u_i64(dest, dest);
 541     }
 542 }
 543
 544 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 545 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 546 {
 547     if (sf) {
 548         TCGv_i64 result, cf_64, vf_64, tmp;
 549         result = tcg_temp_new_i64();
 550         cf_64 = tcg_temp_new_i64();
 551         vf_64 = tcg_temp_new_i64();
 552         tmp = tcg_const_i64(0);
 553
 554         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 555         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 556         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 557         tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
 558         gen_set_NZ64(result);
 559
 560         tcg_gen_xor_i64(vf_64, result, t0);
 561         tcg_gen_xor_i64(tmp, t0, t1);
 562         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 563         tcg_gen_shri_i64(vf_64, vf_64, 32);
 564         tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
 565
 566         tcg_gen_mov_i64(dest, result);
 567
 568         tcg_temp_free_i64(tmp);
 569         tcg_temp_free_i64(vf_64);
 570         tcg_temp_free_i64(cf_64);
 571         tcg_temp_free_i64(result);
 572     } else {
 573         TCGv_i32 t0_32, t1_32, tmp;
 574         t0_32 = tcg_temp_new_i32();
 575         t1_32 = tcg_temp_new_i32();
 576         tmp = tcg_const_i32(0);
 577
 578         tcg_gen_trunc_i64_i32(t0_32, t0);
 579         tcg_gen_trunc_i64_i32(t1_32, t1);
 580         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 581         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 582
 583         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 584         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 585         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 586         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 587         tcg_gen_extu_i32_i64(dest, cpu_NF);
 588
 589         tcg_temp_free_i32(tmp);
 590         tcg_temp_free_i32(t1_32);
 591         tcg_temp_free_i32(t0_32);
 592     }
 593 }
 594
 595 /*
 596  * Load/Store generators
 597  */
 598
 599 /*
 600  * Store from GPR register to memory
 601  */
 602 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 603                       TCGv_i64 tcg_addr, int size)
 604 {
 605     g_assert(size <= 3);
 606     tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
 607 }
 608
 609 /*
 610  * Load from memory to GPR register
 611  */
 612 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 613                       int size, bool is_signed, bool extend)
 614 {
 615     TCGMemOp memop = MO_TE + size;
 616
 617     g_assert(size <= 3);
 618
 619     if (is_signed) {
 620         memop += MO_SIGN;
 621     }
 622
 623     tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
 624
 625     if (extend && is_signed) {
 626         g_assert(size < 3);
 627         tcg_gen_ext32u_i64(dest, dest);
 628     }
 629 }
 630
 631 /*
 632  * Store from FP register to memory
 633  */
 634 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 635 {
 636     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 637     TCGv_i64 tmp = tcg_temp_new_i64();
 638     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
 639     if (size < 4) {
 640         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 641     } else {
 642         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 643         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 644         tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
 645         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
 646         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 647         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 648         tcg_temp_free_i64(tcg_hiaddr);
 649     }
 650
 651     tcg_temp_free_i64(tmp);
 652 }
 653
 654 /*
 655  * Load from memory to FP register
 656  */
 657 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 658 {
 659     /* This always zero-extends and writes to a full 128 bit wide vector */
 660     TCGv_i64 tmplo = tcg_temp_new_i64();
 661     TCGv_i64 tmphi;
 662
 663     if (size < 4) {
 664         TCGMemOp memop = MO_TE + size;
 665         tmphi = tcg_const_i64(0);
 666         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 667     } else {
 668         TCGv_i64 tcg_hiaddr;
 669         tmphi = tcg_temp_new_i64();
 670         tcg_hiaddr = tcg_temp_new_i64();
 671
 672         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 673         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 674         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 675         tcg_temp_free_i64(tcg_hiaddr);
 676     }
 677
 678     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
 679     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
 680
 681     tcg_temp_free_i64(tmplo);
 682     tcg_temp_free_i64(tmphi);
 683 }
 684
 685 /*
 686  * Vector load/store helpers.
 687  *
 688  * The principal difference between this and a FP load is that we don't
 689  * zero extend as we are filling a partial chunk of the vector register.
 690  * These functions don't support 128 bit loads/stores, which would be
 691  * normal load/store operations.
 692  */
 693
 694 /* Get value of an element within a vector register */
 695 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 696                              int element, TCGMemOp memop)
 697 {
 698     int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
 699     switch (memop) {
 700     case MO_8:
 701         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 702         break;
 703     case MO_16:
 704         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 705         break;
 706     case MO_32:
 707         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 708         break;
 709     case MO_8|MO_SIGN:
 710         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 711         break;
 712     case MO_16|MO_SIGN:
 713         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 714         break;
 715     case MO_32|MO_SIGN:
 716         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 717         break;
 718     case MO_64:
 719     case MO_64|MO_SIGN:
 720         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 721         break;
 722     default:
 723         g_assert_not_reached();
 724     }
 725 }
 726
 727 /* Set value of an element within a vector register */
 728 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 729                               int element, TCGMemOp memop)
 730 {
 731     int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
 732     switch (memop) {
 733     case MO_8:
 734         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 735         break;
 736     case MO_16:
 737         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 738         break;
 739     case MO_32:
 740         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 741         break;
 742     case MO_64:
 743         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 744         break;
 745     default:
 746         g_assert_not_reached();
 747     }
 748 }
 749
 750 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 751  * vector ops all need to do this).
 752  */
 753 static void clear_vec_high(DisasContext *s, int rd)
 754 {
 755     TCGv_i64 tcg_zero = tcg_const_i64(0);
 756
 757     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 758     tcg_temp_free_i64(tcg_zero);
 759 }
 760
 761 /* Store from vector register to memory */
 762 static void do_vec_st(DisasContext *s, int srcidx, int element,
 763                       TCGv_i64 tcg_addr, int size)
 764 {
 765     TCGMemOp memop = MO_TE + size;
 766     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 767
 768     read_vec_element(s, tcg_tmp, srcidx, element, size);
 769     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 770
 771     tcg_temp_free_i64(tcg_tmp);
 772 }
 773
 774 /* Load from memory to vector register */
 775 static void do_vec_ld(DisasContext *s, int destidx, int element,
 776                       TCGv_i64 tcg_addr, int size)
 777 {
 778     TCGMemOp memop = MO_TE + size;
 779     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 780
 781     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 782     write_vec_element(s, tcg_tmp, destidx, element, size);
 783
 784     tcg_temp_free_i64(tcg_tmp);
 785 }
 786
 787 /*
 788  * This utility function is for doing register extension with an
 789  * optional shift. You will likely want to pass a temporary for the
 790  * destination register. See DecodeRegExtend() in the ARM ARM.
 791  */
 792 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 793                               int option, unsigned int shift)
 794 {
 795     int extsize = extract32(option, 0, 2);
 796     bool is_signed = extract32(option, 2, 1);
 797
 798     if (is_signed) {
 799         switch (extsize) {
 800         case 0:
 801             tcg_gen_ext8s_i64(tcg_out, tcg_in);
 802             break;
 803         case 1:
 804             tcg_gen_ext16s_i64(tcg_out, tcg_in);
 805             break;
 806         case 2:
 807             tcg_gen_ext32s_i64(tcg_out, tcg_in);
 808             break;
 809         case 3:
 810             tcg_gen_mov_i64(tcg_out, tcg_in);
 811             break;
 812         }
 813     } else {
 814         switch (extsize) {
 815         case 0:
 816             tcg_gen_ext8u_i64(tcg_out, tcg_in);
 817             break;
 818         case 1:
 819             tcg_gen_ext16u_i64(tcg_out, tcg_in);
 820             break;
 821         case 2:
 822             tcg_gen_ext32u_i64(tcg_out, tcg_in);
 823             break;
 824         case 3:
 825             tcg_gen_mov_i64(tcg_out, tcg_in);
 826             break;
 827         }
 828     }
 829
 830     if (shift) {
 831         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
 832     }
 833 }
 834
 835 static inline void gen_check_sp_alignment(DisasContext *s)
 836 {
 837     /* The AArch64 architecture mandates that (if enabled via PSTATE
 838      * or SCTLR bits) there is a check that SP is 16-aligned on every
 839      * SP-relative load or store (with an exception generated if it is not).
 840      * In line with general QEMU practice regarding misaligned accesses,
 841      * we omit these checks for the sake of guest program performance.
 842      * This function is provided as a hook so we can more easily add these
 843      * checks in future (possibly as a "favour catching guest program bugs
 844      * over speed" user selectable option).
 845      */
 846 }
 847
 848 /*
 849  * the instruction disassembly implemented here matches
 850  * the instruction encoding classifications in chapter 3 (C3)
 851  * of the ARM Architecture Reference Manual (DDI0487A_a)
 852  */
 853
 854 /* C3.2.7 Unconditional branch (immediate)
 855  *   31  30       26 25                                  0
 856  * +----+-----------+-------------------------------------+
 857  * | op | 0 0 1 0 1 |                 imm26               |
 858  * +----+-----------+-------------------------------------+
 859  */
 860 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
 861 {
 862     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
 863
 864     if (insn & (1 << 31)) {
 865         /* C5.6.26 BL Branch with link */
 866         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
 867     }
 868
 869     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
 870     gen_goto_tb(s, 0, addr);
 871 }
 872
 873 /* C3.2.1 Compare & branch (immediate)
 874  *   31  30         25  24  23                  5 4      0
 875  * +----+-------------+----+---------------------+--------+
 876  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
 877  * +----+-------------+----+---------------------+--------+
 878  */
 879 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
 880 {
 881     unsigned int sf, op, rt;
 882     uint64_t addr;
 883     int label_match;
 884     TCGv_i64 tcg_cmp;
 885
 886     sf = extract32(insn, 31, 1);
 887     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
 888     rt = extract32(insn, 0, 5);
 889     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
 890
 891     tcg_cmp = read_cpu_reg(s, rt, sf);
 892     label_match = gen_new_label();
 893
 894     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
 895                         tcg_cmp, 0, label_match);
 896
 897     gen_goto_tb(s, 0, s->pc);
 898     gen_set_label(label_match);
 899     gen_goto_tb(s, 1, addr);
 900 }
 901
 902 /* C3.2.5 Test & branch (immediate)
 903  *   31  30         25  24  23   19 18          5 4    0
 904  * +----+-------------+----+-------+-------------+------+
 905  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
 906  * +----+-------------+----+-------+-------------+------+
 907  */
 908 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
 909 {
 910     unsigned int bit_pos, op, rt;
 911     uint64_t addr;
 912     int label_match;
 913     TCGv_i64 tcg_cmp;
 914
 915     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
 916     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
 917     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
 918     rt = extract32(insn, 0, 5);
 919
 920     tcg_cmp = tcg_temp_new_i64();
 921     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
 922     label_match = gen_new_label();
 923     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
 924                         tcg_cmp, 0, label_match);
 925     tcg_temp_free_i64(tcg_cmp);
 926     gen_goto_tb(s, 0, s->pc);
 927     gen_set_label(label_match);
 928     gen_goto_tb(s, 1, addr);
 929 }
 930
 931 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
 932  *  31           25  24  23                  5   4  3    0
 933  * +---------------+----+---------------------+----+------+
 934  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
 935  * +---------------+----+---------------------+----+------+
 936  */
 937 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
 938 {
 939     unsigned int cond;
 940     uint64_t addr;
 941
 942     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
 943         unallocated_encoding(s);
 944         return;
 945     }
 946     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
 947     cond = extract32(insn, 0, 4);
 948
 949     if (cond < 0x0e) {
 950         /* genuinely conditional branches */
 951         int label_match = gen_new_label();
 952         arm_gen_test_cc(cond, label_match);
 953         gen_goto_tb(s, 0, s->pc);
 954         gen_set_label(label_match);
 955         gen_goto_tb(s, 1, addr);
 956     } else {
 957         /* 0xe and 0xf are both "always" conditions */
 958         gen_goto_tb(s, 0, addr);
 959     }
 960 }
 961
 962 /* C5.6.68 HINT */
 963 static void handle_hint(DisasContext *s, uint32_t insn,
 964                         unsigned int op1, unsigned int op2, unsigned int crm)
 965 {
 966     unsigned int selector = crm << 3 | op2;
 967
 968     if (op1 != 3) {
 969         unallocated_encoding(s);
 970         return;
 971     }
 972
 973     switch (selector) {
 974     case 0: /* NOP */
 975         return;
 976     case 1: /* YIELD */
 977     case 2: /* WFE */
 978     case 3: /* WFI */
 979     case 4: /* SEV */
 980     case 5: /* SEVL */
 981         /* we treat all as NOP at least for now */
 982         return;
 983     default:
 984         /* default specified as NOP equivalent */
 985         return;
 986     }
 987 }
 988
 989 static void gen_clrex(DisasContext *s, uint32_t insn)
 990 {
 991     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 992 }
 993
 994 /* CLREX, DSB, DMB, ISB */
 995 static void handle_sync(DisasContext *s, uint32_t insn,
 996                         unsigned int op1, unsigned int op2, unsigned int crm)
 997 {
 998     if (op1 != 3) {
 999         unallocated_encoding(s);
1000         return;
1001     }
1002
1003     switch (op2) {
1004     case 2: /* CLREX */
1005         gen_clrex(s, insn);
1006         return;
1007     case 4: /* DSB */
1008     case 5: /* DMB */
1009     case 6: /* ISB */
1010         /* We don't emulate caches so barriers are no-ops */
1011         return;
1012     default:
1013         unallocated_encoding(s);
1014         return;
1015     }
1016 }
1017
1018 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1019 static void handle_msr_i(DisasContext *s, uint32_t insn,
1020                          unsigned int op1, unsigned int op2, unsigned int crm)
1021 {
1022     unsupported_encoding(s, insn);
1023 }
1024
1025 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1026 {
1027     TCGv_i32 tmp = tcg_temp_new_i32();
1028     TCGv_i32 nzcv = tcg_temp_new_i32();
1029
1030     /* build bit 31, N */
1031     tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
1032     /* build bit 30, Z */
1033     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1034     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1035     /* build bit 29, C */
1036     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1037     /* build bit 28, V */
1038     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1039     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1040     /* generate result */
1041     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1042
1043     tcg_temp_free_i32(nzcv);
1044     tcg_temp_free_i32(tmp);
1045 }
1046
1047 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1048
1049 {
1050     TCGv_i32 nzcv = tcg_temp_new_i32();
1051
1052     /* take NZCV from R[t] */
1053     tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
1054
1055     /* bit 31, N */
1056     tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
1057     /* bit 30, Z */
1058     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1059     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1060     /* bit 29, C */
1061     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1062     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1063     /* bit 28, V */
1064     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1065     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1066     tcg_temp_free_i32(nzcv);
1067 }
1068
1069 /* C5.6.129 MRS - move from system register
1070  * C5.6.131 MSR (register) - move to system register
1071  * C5.6.204 SYS
1072  * C5.6.205 SYSL
1073  * These are all essentially the same insn in 'read' and 'write'
1074  * versions, with varying op0 fields.
1075  */
1076 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1077                        unsigned int op0, unsigned int op1, unsigned int op2,
1078                        unsigned int crn, unsigned int crm, unsigned int rt)
1079 {
1080     const ARMCPRegInfo *ri;
1081     TCGv_i64 tcg_rt;
1082
1083     ri = get_arm_cp_reginfo(s->cp_regs,
1084                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1085                                                crn, crm, op0, op1, op2));
1086
1087     if (!ri) {
1088         /* Unknown register */
1089         unallocated_encoding(s);
1090         return;
1091     }
1092
1093     /* Check access permissions */
1094     if (!cp_access_ok(s->current_pl, ri, isread)) {
1095         unallocated_encoding(s);
1096         return;
1097     }
1098
1099     /* Handle special cases first */
1100     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1101     case ARM_CP_NOP:
1102         return;
1103     case ARM_CP_NZCV:
1104         tcg_rt = cpu_reg(s, rt);
1105         if (isread) {
1106             gen_get_nzcv(tcg_rt);
1107         } else {
1108             gen_set_nzcv(tcg_rt);
1109         }
1110         return;
1111     default:
1112         break;
1113     }
1114
1115     if (use_icount && (ri->type & ARM_CP_IO)) {
1116         gen_io_start();
1117     }
1118
1119     tcg_rt = cpu_reg(s, rt);
1120
1121     if (isread) {
1122         if (ri->type & ARM_CP_CONST) {
1123             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1124         } else if (ri->readfn) {
1125             TCGv_ptr tmpptr;
1126             gen_a64_set_pc_im(s->pc - 4);
1127             tmpptr = tcg_const_ptr(ri);
1128             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1129             tcg_temp_free_ptr(tmpptr);
1130         } else {
1131             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1132         }
1133     } else {
1134         if (ri->type & ARM_CP_CONST) {
1135             /* If not forbidden by access permissions, treat as WI */
1136             return;
1137         } else if (ri->writefn) {
1138             TCGv_ptr tmpptr;
1139             gen_a64_set_pc_im(s->pc - 4);
1140             tmpptr = tcg_const_ptr(ri);
1141             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1142             tcg_temp_free_ptr(tmpptr);
1143         } else {
1144             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1145         }
1146     }
1147
1148     if (use_icount && (ri->type & ARM_CP_IO)) {
1149         /* I/O operations must end the TB here (whether read or write) */
1150         gen_io_end();
1151         s->is_jmp = DISAS_UPDATE;
1152     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1153         /* We default to ending the TB on a coprocessor register write,
1154          * but allow this to be suppressed by the register definition
1155          * (usually only necessary to work around guest bugs).
1156          */
1157         s->is_jmp = DISAS_UPDATE;
1158     }
1159 }
1160
1161 /* C3.2.4 System
1162  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1163  * +---------------------+---+-----+-----+-------+-------+-----+------+
1164  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1165  * +---------------------+---+-----+-----+-------+-------+-----+------+
1166  */
1167 static void disas_system(DisasContext *s, uint32_t insn)
1168 {
1169     unsigned int l, op0, op1, crn, crm, op2, rt;
1170     l = extract32(insn, 21, 1);
1171     op0 = extract32(insn, 19, 2);
1172     op1 = extract32(insn, 16, 3);
1173     crn = extract32(insn, 12, 4);
1174     crm = extract32(insn, 8, 4);
1175     op2 = extract32(insn, 5, 3);
1176     rt = extract32(insn, 0, 5);
1177
1178     if (op0 == 0) {
1179         if (l || rt != 31) {
1180             unallocated_encoding(s);
1181             return;
1182         }
1183         switch (crn) {
1184         case 2: /* C5.6.68 HINT */
1185             handle_hint(s, insn, op1, op2, crm);
1186             break;
1187         case 3: /* CLREX, DSB, DMB, ISB */
1188             handle_sync(s, insn, op1, op2, crm);
1189             break;
1190         case 4: /* C5.6.130 MSR (immediate) */
1191             handle_msr_i(s, insn, op1, op2, crm);
1192             break;
1193         default:
1194             unallocated_encoding(s);
1195             break;
1196         }
1197         return;
1198     }
1199     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1200 }
1201
1202 /* C3.2.3 Exception generation
1203  *
1204  *  31             24 23 21 20                     5 4   2 1  0
1205  * +-----------------+-----+------------------------+-----+----+
1206  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1207  * +-----------------------+------------------------+----------+
1208  */
1209 static void disas_exc(DisasContext *s, uint32_t insn)
1210 {
1211     int opc = extract32(insn, 21, 3);
1212     int op2_ll = extract32(insn, 0, 5);
1213
1214     switch (opc) {
1215     case 0:
1216         /* SVC, HVC, SMC; since we don't support the Virtualization
1217          * or TrustZone extensions these all UNDEF except SVC.
1218          */
1219         if (op2_ll != 1) {
1220             unallocated_encoding(s);
1221             break;
1222         }
1223         gen_exception_insn(s, 0, EXCP_SWI);
1224         break;
1225     case 1:
1226         if (op2_ll != 0) {
1227             unallocated_encoding(s);
1228             break;
1229         }
1230         /* BRK */
1231         gen_exception_insn(s, 0, EXCP_BKPT);
1232         break;
1233     case 2:
1234         if (op2_ll != 0) {
1235             unallocated_encoding(s);
1236             break;
1237         }
1238         /* HLT */
1239         unsupported_encoding(s, insn);
1240         break;
1241     case 5:
1242         if (op2_ll < 1 || op2_ll > 3) {
1243             unallocated_encoding(s);
1244             break;
1245         }
1246         /* DCPS1, DCPS2, DCPS3 */
1247         unsupported_encoding(s, insn);
1248         break;
1249     default:
1250         unallocated_encoding(s);
1251         break;
1252     }
1253 }
1254
1255 /* C3.2.7 Unconditional branch (register)
1256  *  31           25 24   21 20   16 15   10 9    5 4     0
1257  * +---------------+-------+-------+-------+------+-------+
1258  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1259  * +---------------+-------+-------+-------+------+-------+
1260  */
1261 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1262 {
1263     unsigned int opc, op2, op3, rn, op4;
1264
1265     opc = extract32(insn, 21, 4);
1266     op2 = extract32(insn, 16, 5);
1267     op3 = extract32(insn, 10, 6);
1268     rn = extract32(insn, 5, 5);
1269     op4 = extract32(insn, 0, 5);
1270
1271     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1272         unallocated_encoding(s);
1273         return;
1274     }
1275
1276     switch (opc) {
1277     case 0: /* BR */
1278     case 2: /* RET */
1279         break;
1280     case 1: /* BLR */
1281         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1282         break;
1283     case 4: /* ERET */
1284     case 5: /* DRPS */
1285         if (rn != 0x1f) {
1286             unallocated_encoding(s);
1287         } else {
1288             unsupported_encoding(s, insn);
1289         }
1290         return;
1291     default:
1292         unallocated_encoding(s);
1293         return;
1294     }
1295
1296     tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1297     s->is_jmp = DISAS_JUMP;
1298 }
1299
1300 /* C3.2 Branches, exception generating and system instructions */
1301 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1302 {
1303     switch (extract32(insn, 25, 7)) {
1304     case 0x0a: case 0x0b:
1305     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1306         disas_uncond_b_imm(s, insn);
1307         break;
1308     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1309         disas_comp_b_imm(s, insn);
1310         break;
1311     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1312         disas_test_b_imm(s, insn);
1313         break;
1314     case 0x2a: /* Conditional branch (immediate) */
1315         disas_cond_b_imm(s, insn);
1316         break;
1317     case 0x6a: /* Exception generation / System */
1318         if (insn & (1 << 24)) {
1319             disas_system(s, insn);
1320         } else {
1321             disas_exc(s, insn);
1322         }
1323         break;
1324     case 0x6b: /* Unconditional branch (register) */
1325         disas_uncond_b_reg(s, insn);
1326         break;
1327     default:
1328         unallocated_encoding(s);
1329         break;
1330     }
1331 }
1332
1333 /*
1334  * Load/Store exclusive instructions are implemented by remembering
1335  * the value/address loaded, and seeing if these are the same
1336  * when the store is performed. This is not actually the architecturally
1337  * mandated semantics, but it works for typical guest code sequences
1338  * and avoids having to monitor regular stores.
1339  *
1340  * In system emulation mode only one CPU will be running at once, so
1341  * this sequence is effectively atomic.  In user emulation mode we
1342  * throw an exception and handle the atomic operation elsewhere.
1343  */
1344 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1345                                TCGv_i64 addr, int size, bool is_pair)
1346 {
1347     TCGv_i64 tmp = tcg_temp_new_i64();
1348     TCGMemOp memop = MO_TE + size;
1349
1350     g_assert(size <= 3);
1351     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1352
1353     if (is_pair) {
1354         TCGv_i64 addr2 = tcg_temp_new_i64();
1355         TCGv_i64 hitmp = tcg_temp_new_i64();
1356
1357         g_assert(size >= 2);
1358         tcg_gen_addi_i64(addr2, addr, 1 << size);
1359         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1360         tcg_temp_free_i64(addr2);
1361         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1362         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1363         tcg_temp_free_i64(hitmp);
1364     }
1365
1366     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1367     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1368
1369     tcg_temp_free_i64(tmp);
1370     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1371 }
1372
1373 #ifdef CONFIG_USER_ONLY
1374 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1375                                 TCGv_i64 addr, int size, int is_pair)
1376 {
1377     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1378     tcg_gen_movi_i32(cpu_exclusive_info,
1379                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1380     gen_exception_insn(s, 4, EXCP_STREX);
1381 }
1382 #else
1383 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1384                                 TCGv_i64 addr, int size, int is_pair)
1385 {
1386     qemu_log_mask(LOG_UNIMP,
1387                   "%s:%d: system mode store_exclusive unsupported "
1388                   "at pc=%016" PRIx64 "\n",
1389                   __FILE__, __LINE__, s->pc - 4);
1390 }
1391 #endif
1392
1393 /* C3.3.6 Load/store exclusive
1394  *
1395  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1396  * +-----+-------------+----+---+----+------+----+-------+------+------+
1397  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1398  * +-----+-------------+----+---+----+------+----+-------+------+------+
1399  *
1400  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1401  *   L: 0 -> store, 1 -> load
1402  *  o2: 0 -> exclusive, 1 -> not
1403  *  o1: 0 -> single register, 1 -> register pair
1404  *  o0: 1 -> load-acquire/store-release, 0 -> not
1405  *
1406  *  o0 == 0 AND o2 == 1 is un-allocated
1407  *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1408  */
1409 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1410 {
1411     int rt = extract32(insn, 0, 5);
1412     int rn = extract32(insn, 5, 5);
1413     int rt2 = extract32(insn, 10, 5);
1414     int is_lasr = extract32(insn, 15, 1);
1415     int rs = extract32(insn, 16, 5);
1416     int is_pair = extract32(insn, 21, 1);
1417     int is_store = !extract32(insn, 22, 1);
1418     int is_excl = !extract32(insn, 23, 1);
1419     int size = extract32(insn, 30, 2);
1420     TCGv_i64 tcg_addr;
1421
1422     if ((!is_excl && !is_lasr) ||
1423         (is_pair && size < 2)) {
1424         unallocated_encoding(s);
1425         return;
1426     }
1427
1428     if (rn == 31) {
1429         gen_check_sp_alignment(s);
1430     }
1431     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1432
1433     /* Note that since TCG is single threaded load-acquire/store-release
1434      * semantics require no extra if (is_lasr) { ... } handling.
1435      */
1436
1437     if (is_excl) {
1438         if (!is_store) {
1439             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1440         } else {
1441             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1442         }
1443     } else {
1444         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1445         if (is_store) {
1446             do_gpr_st(s, tcg_rt, tcg_addr, size);
1447         } else {
1448             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1449         }
1450         if (is_pair) {
1451             TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1452             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1453             if (is_store) {
1454                 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1455             } else {
1456                 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1457             }
1458         }
1459     }
1460 }
1461
1462 /*
1463  * C3.3.5 Load register (literal)
1464  *
1465  *  31 30 29   27  26 25 24 23                5 4     0
1466  * +-----+-------+---+-----+-------------------+-------+
1467  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1468  * +-----+-------+---+-----+-------------------+-------+
1469  *
1470  * V: 1 -> vector (simd/fp)
1471  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1472  *                   10-> 32 bit signed, 11 -> prefetch
1473  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1474  */
1475 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1476 {
1477     int rt = extract32(insn, 0, 5);
1478     int64_t imm = sextract32(insn, 5, 19) << 2;
1479     bool is_vector = extract32(insn, 26, 1);
1480     int opc = extract32(insn, 30, 2);
1481     bool is_signed = false;
1482     int size = 2;
1483     TCGv_i64 tcg_rt, tcg_addr;
1484
1485     if (is_vector) {
1486         if (opc == 3) {
1487             unallocated_encoding(s);
1488             return;
1489         }
1490         size = 2 + opc;
1491     } else {
1492         if (opc == 3) {
1493             /* PRFM (literal) : prefetch */
1494             return;
1495         }
1496         size = 2 + extract32(opc, 0, 1);
1497         is_signed = extract32(opc, 1, 1);
1498     }
1499
1500     tcg_rt = cpu_reg(s, rt);
1501
1502     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1503     if (is_vector) {
1504         do_fp_ld(s, rt, tcg_addr, size);
1505     } else {
1506         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1507     }
1508     tcg_temp_free_i64(tcg_addr);
1509 }
1510
1511 /*
1512  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1513  * C5.6.81 LDP (Load Pair - non vector)
1514  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1515  * C5.6.176 STNP (Store Pair - non-temporal hint)
1516  * C5.6.177 STP (Store Pair - non vector)
1517  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1518  * C6.3.165 LDP (Load Pair of SIMD&FP)
1519  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1520  * C6.3.284 STP (Store Pair of SIMD&FP)
1521  *
1522  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1523  * +-----+-------+---+---+-------+---+-----------------------------+
1524  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1525  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1526  *
1527  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1528  *      LDPSW                    01
1529  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1530  *   V: 0 -> GPR, 1 -> Vector
1531  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1532  *      10 -> signed offset, 11 -> pre-index
1533  *   L: 0 -> Store 1 -> Load
1534  *
1535  * Rt, Rt2 = GPR or SIMD registers to be stored
1536  * Rn = general purpose register containing address
1537  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1538  */
1539 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1540 {
1541     int rt = extract32(insn, 0, 5);
1542     int rn = extract32(insn, 5, 5);
1543     int rt2 = extract32(insn, 10, 5);
1544     int64_t offset = sextract32(insn, 15, 7);
1545     int index = extract32(insn, 23, 2);
1546     bool is_vector = extract32(insn, 26, 1);
1547     bool is_load = extract32(insn, 22, 1);
1548     int opc = extract32(insn, 30, 2);
1549
1550     bool is_signed = false;
1551     bool postindex = false;
1552     bool wback = false;
1553
1554     TCGv_i64 tcg_addr; /* calculated address */
1555     int size;
1556
1557     if (opc == 3) {
1558         unallocated_encoding(s);
1559         return;
1560     }
1561
1562     if (is_vector) {
1563         size = 2 + opc;
1564     } else {
1565         size = 2 + extract32(opc, 1, 1);
1566         is_signed = extract32(opc, 0, 1);
1567         if (!is_load && is_signed) {
1568             unallocated_encoding(s);
1569             return;
1570         }
1571     }
1572
1573     switch (index) {
1574     case 1: /* post-index */
1575         postindex = true;
1576         wback = true;
1577         break;
1578     case 0:
1579         /* signed offset with "non-temporal" hint. Since we don't emulate
1580          * caches we don't care about hints to the cache system about
1581          * data access patterns, and handle this identically to plain
1582          * signed offset.
1583          */
1584         if (is_signed) {
1585             /* There is no non-temporal-hint version of LDPSW */
1586             unallocated_encoding(s);
1587             return;
1588         }
1589         postindex = false;
1590         break;
1591     case 2: /* signed offset, rn not updated */
1592         postindex = false;
1593         break;
1594     case 3: /* pre-index */
1595         postindex = false;
1596         wback = true;
1597         break;
1598     }
1599
1600     offset <<= size;
1601
1602     if (rn == 31) {
1603         gen_check_sp_alignment(s);
1604     }
1605
1606     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1607
1608     if (!postindex) {
1609         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1610     }
1611
1612     if (is_vector) {
1613         if (is_load) {
1614             do_fp_ld(s, rt, tcg_addr, size);
1615         } else {
1616             do_fp_st(s, rt, tcg_addr, size);
1617         }
1618     } else {
1619         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1620         if (is_load) {
1621             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1622         } else {
1623             do_gpr_st(s, tcg_rt, tcg_addr, size);
1624         }
1625     }
1626     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1627     if (is_vector) {
1628         if (is_load) {
1629             do_fp_ld(s, rt2, tcg_addr, size);
1630         } else {
1631             do_fp_st(s, rt2, tcg_addr, size);
1632         }
1633     } else {
1634         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
1635         if (is_load) {
1636             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
1637         } else {
1638             do_gpr_st(s, tcg_rt2, tcg_addr, size);
1639         }
1640     }
1641
1642     if (wback) {
1643         if (postindex) {
1644             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
1645         } else {
1646             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
1647         }
1648         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
1649     }
1650 }
1651
1652 /*
1653  * C3.3.8 Load/store (immediate post-indexed)
1654  * C3.3.9 Load/store (immediate pre-indexed)
1655  * C3.3.12 Load/store (unscaled immediate)
1656  *
1657  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
1658  * +----+-------+---+-----+-----+---+--------+-----+------+------+
1659  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
1660  * +----+-------+---+-----+-----+---+--------+-----+------+------+
1661  *
1662  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
1663  * V = 0 -> non-vector
1664  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
1665  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1666  */
1667 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
1668 {
1669     int rt = extract32(insn, 0, 5);
1670     int rn = extract32(insn, 5, 5);
1671     int imm9 = sextract32(insn, 12, 9);
1672     int opc = extract32(insn, 22, 2);
1673     int size = extract32(insn, 30, 2);
1674     int idx = extract32(insn, 10, 2);
1675     bool is_signed = false;
1676     bool is_store = false;
1677     bool is_extended = false;
1678     bool is_vector = extract32(insn, 26, 1);
1679     bool post_index;
1680     bool writeback;
1681
1682     TCGv_i64 tcg_addr;
1683
1684     if (is_vector) {
1685         size |= (opc & 2) << 1;
1686         if (size > 4) {
1687             unallocated_encoding(s);
1688             return;
1689         }
1690         is_store = ((opc & 1) == 0);
1691     } else {
1692         if (size == 3 && opc == 2) {
1693             /* PRFM - prefetch */
1694             return;
1695         }
1696         if (opc == 3 && size > 1) {
1697             unallocated_encoding(s);
1698             return;
1699         }
1700         is_store = (opc == 0);
1701         is_signed = opc & (1<<1);
1702         is_extended = (size < 3) && (opc & 1);
1703     }
1704
1705     switch (idx) {
1706     case 0:
1707         post_index = false;
1708         writeback = false;
1709         break;
1710     case 1:
1711         post_index = true;
1712         writeback = true;
1713         break;
1714     case 3:
1715         post_index = false;
1716         writeback = true;
1717         break;
1718     case 2:
1719         g_assert(false);
1720         break;
1721     }
1722
1723     if (rn == 31) {
1724         gen_check_sp_alignment(s);
1725     }
1726     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1727
1728     if (!post_index) {
1729         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1730     }
1731
1732     if (is_vector) {
1733         if (is_store) {
1734             do_fp_st(s, rt, tcg_addr, size);
1735         } else {
1736             do_fp_ld(s, rt, tcg_addr, size);
1737         }
1738     } else {
1739         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1740         if (is_store) {
1741             do_gpr_st(s, tcg_rt, tcg_addr, size);
1742         } else {
1743             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1744         }
1745     }
1746
1747     if (writeback) {
1748         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
1749         if (post_index) {
1750             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1751         }
1752         tcg_gen_mov_i64(tcg_rn, tcg_addr);
1753     }
1754 }
1755
1756 /*
1757  * C3.3.10 Load/store (register offset)
1758  *
1759  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
1760  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1761  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
1762  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1763  *
1764  * For non-vector:
1765  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1766  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1767  * For vector:
1768  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1769  *   opc<0>: 0 -> store, 1 -> load
1770  * V: 1 -> vector/simd
1771  * opt: extend encoding (see DecodeRegExtend)
1772  * S: if S=1 then scale (essentially index by sizeof(size))
1773  * Rt: register to transfer into/out of
1774  * Rn: address register or SP for base
1775  * Rm: offset register or ZR for offset
1776  */
1777 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
1778 {
1779     int rt = extract32(insn, 0, 5);
1780     int rn = extract32(insn, 5, 5);
1781     int shift = extract32(insn, 12, 1);
1782     int rm = extract32(insn, 16, 5);
1783     int opc = extract32(insn, 22, 2);
1784     int opt = extract32(insn, 13, 3);
1785     int size = extract32(insn, 30, 2);
1786     bool is_signed = false;
1787     bool is_store = false;
1788     bool is_extended = false;
1789     bool is_vector = extract32(insn, 26, 1);
1790
1791     TCGv_i64 tcg_rm;
1792     TCGv_i64 tcg_addr;
1793
1794     if (extract32(opt, 1, 1) == 0) {
1795         unallocated_encoding(s);
1796         return;
1797     }
1798
1799     if (is_vector) {
1800         size |= (opc & 2) << 1;
1801         if (size > 4) {
1802             unallocated_encoding(s);
1803             return;
1804         }
1805         is_store = !extract32(opc, 0, 1);
1806     } else {
1807         if (size == 3 && opc == 2) {
1808             /* PRFM - prefetch */
1809             return;
1810         }
1811         if (opc == 3 && size > 1) {
1812             unallocated_encoding(s);
1813             return;
1814         }
1815         is_store = (opc == 0);
1816         is_signed = extract32(opc, 1, 1);
1817         is_extended = (size < 3) && extract32(opc, 0, 1);
1818     }
1819
1820     if (rn == 31) {
1821         gen_check_sp_alignment(s);
1822     }
1823     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1824
1825     tcg_rm = read_cpu_reg(s, rm, 1);
1826     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
1827
1828     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
1829
1830     if (is_vector) {
1831         if (is_store) {
1832             do_fp_st(s, rt, tcg_addr, size);
1833         } else {
1834             do_fp_ld(s, rt, tcg_addr, size);
1835         }
1836     } else {
1837         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1838         if (is_store) {
1839             do_gpr_st(s, tcg_rt, tcg_addr, size);
1840         } else {
1841             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1842         }
1843     }
1844 }
1845
1846 /*
1847  * C3.3.13 Load/store (unsigned immediate)
1848  *
1849  * 31 30 29   27  26 25 24 23 22 21        10 9     5
1850  * +----+-------+---+-----+-----+------------+-------+------+
1851  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
1852  * +----+-------+---+-----+-----+------------+-------+------+
1853  *
1854  * For non-vector:
1855  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1856  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1857  * For vector:
1858  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1859  *   opc<0>: 0 -> store, 1 -> load
1860  * Rn: base address register (inc SP)
1861  * Rt: target register
1862  */
1863 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
1864 {
1865     int rt = extract32(insn, 0, 5);
1866     int rn = extract32(insn, 5, 5);
1867     unsigned int imm12 = extract32(insn, 10, 12);
1868     bool is_vector = extract32(insn, 26, 1);
1869     int size = extract32(insn, 30, 2);
1870     int opc = extract32(insn, 22, 2);
1871     unsigned int offset;
1872
1873     TCGv_i64 tcg_addr;
1874
1875     bool is_store;
1876     bool is_signed = false;
1877     bool is_extended = false;
1878
1879     if (is_vector) {
1880         size |= (opc & 2) << 1;
1881         if (size > 4) {
1882             unallocated_encoding(s);
1883             return;
1884         }
1885         is_store = !extract32(opc, 0, 1);
1886     } else {
1887         if (size == 3 && opc == 2) {
1888             /* PRFM - prefetch */
1889             return;
1890         }
1891         if (opc == 3 && size > 1) {
1892             unallocated_encoding(s);
1893             return;
1894         }
1895         is_store = (opc == 0);
1896         is_signed = extract32(opc, 1, 1);
1897         is_extended = (size < 3) && extract32(opc, 0, 1);
1898     }
1899
1900     if (rn == 31) {
1901         gen_check_sp_alignment(s);
1902     }
1903     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1904     offset = imm12 << size;
1905     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1906
1907     if (is_vector) {
1908         if (is_store) {
1909             do_fp_st(s, rt, tcg_addr, size);
1910         } else {
1911             do_fp_ld(s, rt, tcg_addr, size);
1912         }
1913     } else {
1914         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1915         if (is_store) {
1916             do_gpr_st(s, tcg_rt, tcg_addr, size);
1917         } else {
1918             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1919         }
1920     }
1921 }
1922
1923 /* Load/store register (immediate forms) */
1924 static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
1925 {
1926     switch (extract32(insn, 10, 2)) {
1927     case 0: case 1: case 3:
1928         /* Load/store register (unscaled immediate) */
1929         /* Load/store immediate pre/post-indexed */
1930         disas_ldst_reg_imm9(s, insn);
1931         break;
1932     case 2:
1933         /* Load/store register unprivileged */
1934         unsupported_encoding(s, insn);
1935         break;
1936     default:
1937         unallocated_encoding(s);
1938         break;
1939     }
1940 }
1941
1942 /* Load/store register (all forms) */
1943 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
1944 {
1945     switch (extract32(insn, 24, 2)) {
1946     case 0:
1947         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
1948             disas_ldst_reg_roffset(s, insn);
1949         } else {
1950             disas_ldst_reg_imm(s, insn);
1951         }
1952         break;
1953     case 1:
1954         disas_ldst_reg_unsigned_imm(s, insn);
1955         break;
1956     default:
1957         unallocated_encoding(s);
1958         break;
1959     }
1960 }
1961
1962 /* C3.3.1 AdvSIMD load/store multiple structures
1963  *
1964  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
1965  * +---+---+---------------+---+-------------+--------+------+------+------+
1966  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
1967  * +---+---+---------------+---+-------------+--------+------+------+------+
1968  *
1969  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
1970  *
1971  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
1972  * +---+---+---------------+---+---+---------+--------+------+------+------+
1973  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
1974  * +---+---+---------------+---+---+---------+--------+------+------+------+
1975  *
1976  * Rt: first (or only) SIMD&FP register to be transferred
1977  * Rn: base address or SP
1978  * Rm (post-index only): post-index register (when !31) or size dependent #imm
1979  */
1980 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
1981 {
1982     int rt = extract32(insn, 0, 5);
1983     int rn = extract32(insn, 5, 5);
1984     int size = extract32(insn, 10, 2);
1985     int opcode = extract32(insn, 12, 4);
1986     bool is_store = !extract32(insn, 22, 1);
1987     bool is_postidx = extract32(insn, 23, 1);
1988     bool is_q = extract32(insn, 30, 1);
1989     TCGv_i64 tcg_addr, tcg_rn;
1990
1991     int ebytes = 1 << size;
1992     int elements = (is_q ? 128 : 64) / (8 << size);
1993     int rpt;    /* num iterations */
1994     int selem;  /* structure elements */
1995     int r;
1996
1997     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
1998         unallocated_encoding(s);
1999         return;
2000     }
2001
2002     /* From the shared decode logic */
2003     switch (opcode) {
2004     case 0x0:
2005         rpt = 1;
2006         selem = 4;
2007         break;
2008     case 0x2:
2009         rpt = 4;
2010         selem = 1;
2011         break;
2012     case 0x4:
2013         rpt = 1;
2014         selem = 3;
2015         break;
2016     case 0x6:
2017         rpt = 3;
2018         selem = 1;
2019         break;
2020     case 0x7:
2021         rpt = 1;
2022         selem = 1;
2023         break;
2024     case 0x8:
2025         rpt = 1;
2026         selem = 2;
2027         break;
2028     case 0xa:
2029         rpt = 2;
2030         selem = 1;
2031         break;
2032     default:
2033         unallocated_encoding(s);
2034         return;
2035     }
2036
2037     if (size == 3 && !is_q && selem != 1) {
2038         /* reserved */
2039         unallocated_encoding(s);
2040         return;
2041     }
2042
2043     if (rn == 31) {
2044         gen_check_sp_alignment(s);
2045     }
2046
2047     tcg_rn = cpu_reg_sp(s, rn);
2048     tcg_addr = tcg_temp_new_i64();
2049     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2050
2051     for (r = 0; r < rpt; r++) {
2052         int e;
2053         for (e = 0; e < elements; e++) {
2054             int tt = (rt + r) % 32;
2055             int xs;
2056             for (xs = 0; xs < selem; xs++) {
2057                 if (is_store) {
2058                     do_vec_st(s, tt, e, tcg_addr, size);
2059                 } else {
2060                     do_vec_ld(s, tt, e, tcg_addr, size);
2061
2062                     /* For non-quad operations, setting a slice of the low
2063                      * 64 bits of the register clears the high 64 bits (in
2064                      * the ARM ARM pseudocode this is implicit in the fact
2065                      * that 'rval' is a 64 bit wide variable). We optimize
2066                      * by noticing that we only need to do this the first
2067                      * time we touch a register.
2068                      */
2069                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2070                         clear_vec_high(s, tt);
2071                     }
2072                 }
2073                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2074                 tt = (tt + 1) % 32;
2075             }
2076         }
2077     }
2078
2079     if (is_postidx) {
2080         int rm = extract32(insn, 16, 5);
2081         if (rm == 31) {
2082             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2083         } else {
2084             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2085         }
2086     }
2087     tcg_temp_free_i64(tcg_addr);
2088 }
2089
2090 /* AdvSIMD load/store single structure */
2091 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2092 {
2093     unsupported_encoding(s, insn);
2094 }
2095
2096 /* C3.3 Loads and stores */
2097 static void disas_ldst(DisasContext *s, uint32_t insn)
2098 {
2099     switch (extract32(insn, 24, 6)) {
2100     case 0x08: /* Load/store exclusive */
2101         disas_ldst_excl(s, insn);
2102         break;
2103     case 0x18: case 0x1c: /* Load register (literal) */
2104         disas_ld_lit(s, insn);
2105         break;
2106     case 0x28: case 0x29:
2107     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2108         disas_ldst_pair(s, insn);
2109         break;
2110     case 0x38: case 0x39:
2111     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2112         disas_ldst_reg(s, insn);
2113         break;
2114     case 0x0c: /* AdvSIMD load/store multiple structures */
2115         disas_ldst_multiple_struct(s, insn);
2116         break;
2117     case 0x0d: /* AdvSIMD load/store single structure */
2118         disas_ldst_single_struct(s, insn);
2119         break;
2120     default:
2121         unallocated_encoding(s);
2122         break;
2123     }
2124 }
2125
2126 /* C3.4.6 PC-rel. addressing
2127  *   31  30   29 28       24 23                5 4    0
2128  * +----+-------+-----------+-------------------+------+
2129  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2130  * +----+-------+-----------+-------------------+------+
2131  */
2132 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2133 {
2134     unsigned int page, rd;
2135     uint64_t base;
2136     int64_t offset;
2137
2138     page = extract32(insn, 31, 1);
2139     /* SignExtend(immhi:immlo) -> offset */
2140     offset = ((int64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2141     rd = extract32(insn, 0, 5);
2142     base = s->pc - 4;
2143
2144     if (page) {
2145         /* ADRP (page based) */
2146         base &= ~0xfff;
2147         offset <<= 12;
2148     }
2149
2150     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2151 }
2152
2153 /*
2154  * C3.4.1 Add/subtract (immediate)
2155  *
2156  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2157  * +--+--+--+-----------+-----+-------------+-----+-----+
2158  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2159  * +--+--+--+-----------+-----+-------------+-----+-----+
2160  *
2161  *    sf: 0 -> 32bit, 1 -> 64bit
2162  *    op: 0 -> add  , 1 -> sub
2163  *     S: 1 -> set flags
2164  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2165  */
2166 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2167 {
2168     int rd = extract32(insn, 0, 5);
2169     int rn = extract32(insn, 5, 5);
2170     uint64_t imm = extract32(insn, 10, 12);
2171     int shift = extract32(insn, 22, 2);
2172     bool setflags = extract32(insn, 29, 1);
2173     bool sub_op = extract32(insn, 30, 1);
2174     bool is_64bit = extract32(insn, 31, 1);
2175
2176     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2177     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2178     TCGv_i64 tcg_result;
2179
2180     switch (shift) {
2181     case 0x0:
2182         break;
2183     case 0x1:
2184         imm <<= 12;
2185         break;
2186     default:
2187         unallocated_encoding(s);
2188         return;
2189     }
2190
2191     tcg_result = tcg_temp_new_i64();
2192     if (!setflags) {
2193         if (sub_op) {
2194             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2195         } else {
2196             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2197         }
2198     } else {
2199         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2200         if (sub_op) {
2201             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2202         } else {
2203             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2204         }
2205         tcg_temp_free_i64(tcg_imm);
2206     }
2207
2208     if (is_64bit) {
2209         tcg_gen_mov_i64(tcg_rd, tcg_result);
2210     } else {
2211         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2212     }
2213
2214     tcg_temp_free_i64(tcg_result);
2215 }
2216
2217 /* The input should be a value in the bottom e bits (with higher
2218  * bits zero); returns that value replicated into every element
2219  * of size e in a 64 bit integer.
2220  */
2221 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2222 {
2223     assert(e != 0);
2224     while (e < 64) {
2225         mask |= mask << e;
2226         e *= 2;
2227     }
2228     return mask;
2229 }
2230
2231 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2232 static inline uint64_t bitmask64(unsigned int length)
2233 {
2234     assert(length > 0 && length <= 64);
2235     return ~0ULL >> (64 - length);
2236 }
2237
2238 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2239  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2240  * value (ie should cause a guest UNDEF exception), and true if they are
2241  * valid, in which case the decoded bit pattern is written to result.
2242  */
2243 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2244                                    unsigned int imms, unsigned int immr)
2245 {
2246     uint64_t mask;
2247     unsigned e, levels, s, r;
2248     int len;
2249
2250     assert(immn < 2 && imms < 64 && immr < 64);
2251
2252     /* The bit patterns we create here are 64 bit patterns which
2253      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2254      * 64 bits each. Each element contains the same value: a run
2255      * of between 1 and e-1 non-zero bits, rotated within the
2256      * element by between 0 and e-1 bits.
2257      *
2258      * The element size and run length are encoded into immn (1 bit)
2259      * and imms (6 bits) as follows:
2260      * 64 bit elements: immn = 1, imms = <length of run - 1>
2261      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2262      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2263      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2264      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2265      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2266      * Notice that immn = 0, imms = 11111x is the only combination
2267      * not covered by one of the above options; this is reserved.
2268      * Further, <length of run - 1> all-ones is a reserved pattern.
2269      *
2270      * In all cases the rotation is by immr % e (and immr is 6 bits).
2271      */
2272
2273     /* First determine the element size */
2274     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2275     if (len < 1) {
2276         /* This is the immn == 0, imms == 0x11111x case */
2277         return false;
2278     }
2279     e = 1 << len;
2280
2281     levels = e - 1;
2282     s = imms & levels;
2283     r = immr & levels;
2284
2285     if (s == levels) {
2286         /* <length of run - 1> mustn't be all-ones. */
2287         return false;
2288     }
2289
2290     /* Create the value of one element: s+1 set bits rotated
2291      * by r within the element (which is e bits wide)...
2292      */
2293     mask = bitmask64(s + 1);
2294     mask = (mask >> r) | (mask << (e - r));
2295     /* ...then replicate the element over the whole 64 bit value */
2296     mask = bitfield_replicate(mask, e);
2297     *result = mask;
2298     return true;
2299 }
2300
2301 /* C3.4.4 Logical (immediate)
2302  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2303  * +----+-----+-------------+---+------+------+------+------+
2304  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2305  * +----+-----+-------------+---+------+------+------+------+
2306  */
2307 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2308 {
2309     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2310     TCGv_i64 tcg_rd, tcg_rn;
2311     uint64_t wmask;
2312     bool is_and = false;
2313
2314     sf = extract32(insn, 31, 1);
2315     opc = extract32(insn, 29, 2);
2316     is_n = extract32(insn, 22, 1);
2317     immr = extract32(insn, 16, 6);
2318     imms = extract32(insn, 10, 6);
2319     rn = extract32(insn, 5, 5);
2320     rd = extract32(insn, 0, 5);
2321
2322     if (!sf && is_n) {
2323         unallocated_encoding(s);
2324         return;
2325     }
2326
2327     if (opc == 0x3) { /* ANDS */
2328         tcg_rd = cpu_reg(s, rd);
2329     } else {
2330         tcg_rd = cpu_reg_sp(s, rd);
2331     }
2332     tcg_rn = cpu_reg(s, rn);
2333
2334     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2335         /* some immediate field values are reserved */
2336         unallocated_encoding(s);
2337         return;
2338     }
2339
2340     if (!sf) {
2341         wmask &= 0xffffffff;
2342     }
2343
2344     switch (opc) {
2345     case 0x3: /* ANDS */
2346     case 0x0: /* AND */
2347         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2348         is_and = true;
2349         break;
2350     case 0x1: /* ORR */
2351         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2352         break;
2353     case 0x2: /* EOR */
2354         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2355         break;
2356     default:
2357         assert(FALSE); /* must handle all above */
2358         break;
2359     }
2360
2361     if (!sf && !is_and) {
2362         /* zero extend final result; we know we can skip this for AND
2363          * since the immediate had the high 32 bits clear.
2364          */
2365         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2366     }
2367
2368     if (opc == 3) { /* ANDS */
2369         gen_logic_CC(sf, tcg_rd);
2370     }
2371 }
2372
2373 /*
2374  * C3.4.5 Move wide (immediate)
2375  *
2376  *  31 30 29 28         23 22 21 20             5 4    0
2377  * +--+-----+-------------+-----+----------------+------+
2378  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2379  * +--+-----+-------------+-----+----------------+------+
2380  *
2381  * sf: 0 -> 32 bit, 1 -> 64 bit
2382  * opc: 00 -> N, 10 -> Z, 11 -> K
2383  * hw: shift/16 (0,16, and sf only 32, 48)
2384  */
2385 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2386 {
2387     int rd = extract32(insn, 0, 5);
2388     uint64_t imm = extract32(insn, 5, 16);
2389     int sf = extract32(insn, 31, 1);
2390     int opc = extract32(insn, 29, 2);
2391     int pos = extract32(insn, 21, 2) << 4;
2392     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2393     TCGv_i64 tcg_imm;
2394
2395     if (!sf && (pos >= 32)) {
2396         unallocated_encoding(s);
2397         return;
2398     }
2399
2400     switch (opc) {
2401     case 0: /* MOVN */
2402     case 2: /* MOVZ */
2403         imm <<= pos;
2404         if (opc == 0) {
2405             imm = ~imm;
2406         }
2407         if (!sf) {
2408             imm &= 0xffffffffu;
2409         }
2410         tcg_gen_movi_i64(tcg_rd, imm);
2411         break;
2412     case 3: /* MOVK */
2413         tcg_imm = tcg_const_i64(imm);
2414         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2415         tcg_temp_free_i64(tcg_imm);
2416         if (!sf) {
2417             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2418         }
2419         break;
2420     default:
2421         unallocated_encoding(s);
2422         break;
2423     }
2424 }
2425
2426 /* C3.4.2 Bitfield
2427  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2428  * +----+-----+-------------+---+------+------+------+------+
2429  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2430  * +----+-----+-------------+---+------+------+------+------+
2431  */
2432 static void disas_bitfield(DisasContext *s, uint32_t insn)
2433 {
2434     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2435     TCGv_i64 tcg_rd, tcg_tmp;
2436
2437     sf = extract32(insn, 31, 1);
2438     opc = extract32(insn, 29, 2);
2439     n = extract32(insn, 22, 1);
2440     ri = extract32(insn, 16, 6);
2441     si = extract32(insn, 10, 6);
2442     rn = extract32(insn, 5, 5);
2443     rd = extract32(insn, 0, 5);
2444     bitsize = sf ? 64 : 32;
2445
2446     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
2447         unallocated_encoding(s);
2448         return;
2449     }
2450
2451     tcg_rd = cpu_reg(s, rd);
2452     tcg_tmp = read_cpu_reg(s, rn, sf);
2453
2454     /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
2455
2456     if (opc != 1) { /* SBFM or UBFM */
2457         tcg_gen_movi_i64(tcg_rd, 0);
2458     }
2459
2460     /* do the bit move operation */
2461     if (si >= ri) {
2462         /* Wd<s-r:0> = Wn<s:r> */
2463         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
2464         pos = 0;
2465         len = (si - ri) + 1;
2466     } else {
2467         /* Wd<32+s-r,32-r> = Wn<s:0> */
2468         pos = bitsize - ri;
2469         len = si + 1;
2470     }
2471
2472     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
2473
2474     if (opc == 0) { /* SBFM - sign extend the destination field */
2475         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2476         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2477     }
2478
2479     if (!sf) { /* zero extend final result */
2480         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2481     }
2482 }
2483
2484 /* C3.4.3 Extract
2485  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
2486  * +----+------+-------------+---+----+------+--------+------+------+
2487  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
2488  * +----+------+-------------+---+----+------+--------+------+------+
2489  */
2490 static void disas_extract(DisasContext *s, uint32_t insn)
2491 {
2492     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
2493
2494     sf = extract32(insn, 31, 1);
2495     n = extract32(insn, 22, 1);
2496     rm = extract32(insn, 16, 5);
2497     imm = extract32(insn, 10, 6);
2498     rn = extract32(insn, 5, 5);
2499     rd = extract32(insn, 0, 5);
2500     op21 = extract32(insn, 29, 2);
2501     op0 = extract32(insn, 21, 1);
2502     bitsize = sf ? 64 : 32;
2503
2504     if (sf != n || op21 || op0 || imm >= bitsize) {
2505         unallocated_encoding(s);
2506     } else {
2507         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
2508
2509         tcg_rd = cpu_reg(s, rd);
2510
2511         if (imm) {
2512             /* OPTME: we can special case rm==rn as a rotate */
2513             tcg_rm = read_cpu_reg(s, rm, sf);
2514             tcg_rn = read_cpu_reg(s, rn, sf);
2515             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
2516             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
2517             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
2518             if (!sf) {
2519                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2520             }
2521         } else {
2522             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
2523              * so an extract from bit 0 is a special case.
2524              */
2525             if (sf) {
2526                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
2527             } else {
2528                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
2529             }
2530         }
2531
2532     }
2533 }
2534
2535 /* C3.4 Data processing - immediate */
2536 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
2537 {
2538     switch (extract32(insn, 23, 6)) {
2539     case 0x20: case 0x21: /* PC-rel. addressing */
2540         disas_pc_rel_adr(s, insn);
2541         break;
2542     case 0x22: case 0x23: /* Add/subtract (immediate) */
2543         disas_add_sub_imm(s, insn);
2544         break;
2545     case 0x24: /* Logical (immediate) */
2546         disas_logic_imm(s, insn);
2547         break;
2548     case 0x25: /* Move wide (immediate) */
2549         disas_movw_imm(s, insn);
2550         break;
2551     case 0x26: /* Bitfield */
2552         disas_bitfield(s, insn);
2553         break;
2554     case 0x27: /* Extract */
2555         disas_extract(s, insn);
2556         break;
2557     default:
2558         unallocated_encoding(s);
2559         break;
2560     }
2561 }
2562
2563 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
2564  * Note that it is the caller's responsibility to ensure that the
2565  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
2566  * mandated semantics for out of range shifts.
2567  */
2568 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
2569                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
2570 {
2571     switch (shift_type) {
2572     case A64_SHIFT_TYPE_LSL:
2573         tcg_gen_shl_i64(dst, src, shift_amount);
2574         break;
2575     case A64_SHIFT_TYPE_LSR:
2576         tcg_gen_shr_i64(dst, src, shift_amount);
2577         break;
2578     case A64_SHIFT_TYPE_ASR:
2579         if (!sf) {
2580             tcg_gen_ext32s_i64(dst, src);
2581         }
2582         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
2583         break;
2584     case A64_SHIFT_TYPE_ROR:
2585         if (sf) {
2586             tcg_gen_rotr_i64(dst, src, shift_amount);
2587         } else {
2588             TCGv_i32 t0, t1;
2589             t0 = tcg_temp_new_i32();
2590             t1 = tcg_temp_new_i32();
2591             tcg_gen_trunc_i64_i32(t0, src);
2592             tcg_gen_trunc_i64_i32(t1, shift_amount);
2593             tcg_gen_rotr_i32(t0, t0, t1);
2594             tcg_gen_extu_i32_i64(dst, t0);
2595             tcg_temp_free_i32(t0);
2596             tcg_temp_free_i32(t1);
2597         }
2598         break;
2599     default:
2600         assert(FALSE); /* all shift types should be handled */
2601         break;
2602     }
2603
2604     if (!sf) { /* zero extend final result */
2605         tcg_gen_ext32u_i64(dst, dst);
2606     }
2607 }
2608
2609 /* Shift a TCGv src by immediate, put result in dst.
2610  * The shift amount must be in range (this should always be true as the
2611  * relevant instructions will UNDEF on bad shift immediates).
2612  */
2613 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
2614                           enum a64_shift_type shift_type, unsigned int shift_i)
2615 {
2616     assert(shift_i < (sf ? 64 : 32));
2617
2618     if (shift_i == 0) {
2619         tcg_gen_mov_i64(dst, src);
2620     } else {
2621         TCGv_i64 shift_const;
2622
2623         shift_const = tcg_const_i64(shift_i);
2624         shift_reg(dst, src, sf, shift_type, shift_const);
2625         tcg_temp_free_i64(shift_const);
2626     }
2627 }
2628
2629 /* C3.5.10 Logical (shifted register)
2630  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
2631  * +----+-----+-----------+-------+---+------+--------+------+------+
2632  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
2633  * +----+-----+-----------+-------+---+------+--------+------+------+
2634  */
2635 static void disas_logic_reg(DisasContext *s, uint32_t insn)
2636 {
2637     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
2638     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
2639
2640     sf = extract32(insn, 31, 1);
2641     opc = extract32(insn, 29, 2);
2642     shift_type = extract32(insn, 22, 2);
2643     invert = extract32(insn, 21, 1);
2644     rm = extract32(insn, 16, 5);
2645     shift_amount = extract32(insn, 10, 6);
2646     rn = extract32(insn, 5, 5);
2647     rd = extract32(insn, 0, 5);
2648
2649     if (!sf && (shift_amount & (1 << 5))) {
2650         unallocated_encoding(s);
2651         return;
2652     }
2653
2654     tcg_rd = cpu_reg(s, rd);
2655
2656     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
2657         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
2658          * register-register MOV and MVN, so it is worth special casing.
2659          */
2660         tcg_rm = cpu_reg(s, rm);
2661         if (invert) {
2662             tcg_gen_not_i64(tcg_rd, tcg_rm);
2663             if (!sf) {
2664                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2665             }
2666         } else {
2667             if (sf) {
2668                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
2669             } else {
2670                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
2671             }
2672         }
2673         return;
2674     }
2675
2676     tcg_rm = read_cpu_reg(s, rm, sf);
2677
2678     if (shift_amount) {
2679         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
2680     }
2681
2682     tcg_rn = cpu_reg(s, rn);
2683
2684     switch (opc | (invert << 2)) {
2685     case 0: /* AND */
2686     case 3: /* ANDS */
2687         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
2688         break;
2689     case 1: /* ORR */
2690         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
2691         break;
2692     case 2: /* EOR */
2693         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
2694         break;
2695     case 4: /* BIC */
2696     case 7: /* BICS */
2697         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
2698         break;
2699     case 5: /* ORN */
2700         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
2701         break;
2702     case 6: /* EON */
2703         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
2704         break;
2705     default:
2706         assert(FALSE);
2707         break;
2708     }
2709
2710     if (!sf) {
2711         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2712     }
2713
2714     if (opc == 3) {
2715         gen_logic_CC(sf, tcg_rd);
2716     }
2717 }
2718
2719 /*
2720  * C3.5.1 Add/subtract (extended register)
2721  *
2722  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
2723  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2724  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
2725  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2726  *
2727  *  sf: 0 -> 32bit, 1 -> 64bit
2728  *  op: 0 -> add  , 1 -> sub
2729  *   S: 1 -> set flags
2730  * opt: 00
2731  * option: extension type (see DecodeRegExtend)
2732  * imm3: optional shift to Rm
2733  *
2734  * Rd = Rn + LSL(extend(Rm), amount)
2735  */
2736 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
2737 {
2738     int rd = extract32(insn, 0, 5);
2739     int rn = extract32(insn, 5, 5);
2740     int imm3 = extract32(insn, 10, 3);
2741     int option = extract32(insn, 13, 3);
2742     int rm = extract32(insn, 16, 5);
2743     bool setflags = extract32(insn, 29, 1);
2744     bool sub_op = extract32(insn, 30, 1);
2745     bool sf = extract32(insn, 31, 1);
2746
2747     TCGv_i64 tcg_rm, tcg_rn; /* temps */
2748     TCGv_i64 tcg_rd;
2749     TCGv_i64 tcg_result;
2750
2751     if (imm3 > 4) {
2752         unallocated_encoding(s);
2753         return;
2754     }
2755
2756     /* non-flag setting ops may use SP */
2757     if (!setflags) {
2758         tcg_rn = read_cpu_reg_sp(s, rn, sf);
2759         tcg_rd = cpu_reg_sp(s, rd);
2760     } else {
2761         tcg_rn = read_cpu_reg(s, rn, sf);
2762         tcg_rd = cpu_reg(s, rd);
2763     }
2764
2765     tcg_rm = read_cpu_reg(s, rm, sf);
2766     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
2767
2768     tcg_result = tcg_temp_new_i64();
2769
2770     if (!setflags) {
2771         if (sub_op) {
2772             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
2773         } else {
2774             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
2775         }
2776     } else {
2777         if (sub_op) {
2778             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
2779         } else {
2780             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
2781         }
2782     }
2783
2784     if (sf) {
2785         tcg_gen_mov_i64(tcg_rd, tcg_result);
2786     } else {
2787         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2788     }
2789
2790     tcg_temp_free_i64(tcg_result);
2791 }
2792
2793 /*
2794  * C3.5.2 Add/subtract (shifted register)
2795  *
2796  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
2797  * +--+--+--+-----------+-----+--+-------+---------+------+------+
2798  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
2799  * +--+--+--+-----------+-----+--+-------+---------+------+------+
2800  *
2801  *    sf: 0 -> 32bit, 1 -> 64bit
2802  *    op: 0 -> add  , 1 -> sub
2803  *     S: 1 -> set flags
2804  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
2805  *  imm6: Shift amount to apply to Rm before the add/sub
2806  */
2807 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
2808 {
2809     int rd = extract32(insn, 0, 5);
2810     int rn = extract32(insn, 5, 5);
2811     int imm6 = extract32(insn, 10, 6);
2812     int rm = extract32(insn, 16, 5);
2813     int shift_type = extract32(insn, 22, 2);
2814     bool setflags = extract32(insn, 29, 1);
2815     bool sub_op = extract32(insn, 30, 1);
2816     bool sf = extract32(insn, 31, 1);
2817
2818     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2819     TCGv_i64 tcg_rn, tcg_rm;
2820     TCGv_i64 tcg_result;
2821
2822     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
2823         unallocated_encoding(s);
2824         return;
2825     }
2826
2827     tcg_rn = read_cpu_reg(s, rn, sf);
2828     tcg_rm = read_cpu_reg(s, rm, sf);
2829
2830     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
2831
2832     tcg_result = tcg_temp_new_i64();
2833
2834     if (!setflags) {
2835         if (sub_op) {
2836             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
2837         } else {
2838             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
2839         }
2840     } else {
2841         if (sub_op) {
2842             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
2843         } else {
2844             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
2845         }
2846     }
2847
2848     if (sf) {
2849         tcg_gen_mov_i64(tcg_rd, tcg_result);
2850     } else {
2851         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2852     }
2853
2854     tcg_temp_free_i64(tcg_result);
2855 }
2856
2857 /* C3.5.9 Data-processing (3 source)
2858
2859    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
2860   +--+------+-----------+------+------+----+------+------+------+
2861   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
2862   +--+------+-----------+------+------+----+------+------+------+
2863
2864  */
2865 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
2866 {
2867     int rd = extract32(insn, 0, 5);
2868     int rn = extract32(insn, 5, 5);
2869     int ra = extract32(insn, 10, 5);
2870     int rm = extract32(insn, 16, 5);
2871     int op_id = (extract32(insn, 29, 3) << 4) |
2872         (extract32(insn, 21, 3) << 1) |
2873         extract32(insn, 15, 1);
2874     bool sf = extract32(insn, 31, 1);
2875     bool is_sub = extract32(op_id, 0, 1);
2876     bool is_high = extract32(op_id, 2, 1);
2877     bool is_signed = false;
2878     TCGv_i64 tcg_op1;
2879     TCGv_i64 tcg_op2;
2880     TCGv_i64 tcg_tmp;
2881
2882     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
2883     switch (op_id) {
2884     case 0x42: /* SMADDL */
2885     case 0x43: /* SMSUBL */
2886     case 0x44: /* SMULH */
2887         is_signed = true;
2888         break;
2889     case 0x0: /* MADD (32bit) */
2890     case 0x1: /* MSUB (32bit) */
2891     case 0x40: /* MADD (64bit) */
2892     case 0x41: /* MSUB (64bit) */
2893     case 0x4a: /* UMADDL */
2894     case 0x4b: /* UMSUBL */
2895     case 0x4c: /* UMULH */
2896         break;
2897     default:
2898         unallocated_encoding(s);
2899         return;
2900     }
2901
2902     if (is_high) {
2903         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
2904         TCGv_i64 tcg_rd = cpu_reg(s, rd);
2905         TCGv_i64 tcg_rn = cpu_reg(s, rn);
2906         TCGv_i64 tcg_rm = cpu_reg(s, rm);
2907
2908         if (is_signed) {
2909             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
2910         } else {
2911             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
2912         }
2913
2914         tcg_temp_free_i64(low_bits);
2915         return;
2916     }
2917
2918     tcg_op1 = tcg_temp_new_i64();
2919     tcg_op2 = tcg_temp_new_i64();
2920     tcg_tmp = tcg_temp_new_i64();
2921
2922     if (op_id < 0x42) {
2923         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
2924         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
2925     } else {
2926         if (is_signed) {
2927             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
2928             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
2929         } else {
2930             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
2931             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
2932         }
2933     }
2934
2935     if (ra == 31 && !is_sub) {
2936         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
2937         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
2938     } else {
2939         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
2940         if (is_sub) {
2941             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
2942         } else {
2943             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
2944         }
2945     }
2946
2947     if (!sf) {
2948         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
2949     }
2950
2951     tcg_temp_free_i64(tcg_op1);
2952     tcg_temp_free_i64(tcg_op2);
2953     tcg_temp_free_i64(tcg_tmp);
2954 }
2955
2956 /* C3.5.3 - Add/subtract (with carry)
2957  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
2958  * +--+--+--+------------------------+------+---------+------+-----+
2959  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
2960  * +--+--+--+------------------------+------+---------+------+-----+
2961  *                                            [000000]
2962  */
2963
2964 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
2965 {
2966     unsigned int sf, op, setflags, rm, rn, rd;
2967     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
2968
2969     if (extract32(insn, 10, 6) != 0) {
2970         unallocated_encoding(s);
2971         return;
2972     }
2973
2974     sf = extract32(insn, 31, 1);
2975     op = extract32(insn, 30, 1);
2976     setflags = extract32(insn, 29, 1);
2977     rm = extract32(insn, 16, 5);
2978     rn = extract32(insn, 5, 5);
2979     rd = extract32(insn, 0, 5);
2980
2981     tcg_rd = cpu_reg(s, rd);
2982     tcg_rn = cpu_reg(s, rn);
2983
2984     if (op) {
2985         tcg_y = new_tmp_a64(s);
2986         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
2987     } else {
2988         tcg_y = cpu_reg(s, rm);
2989     }
2990
2991     if (setflags) {
2992         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
2993     } else {
2994         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
2995     }
2996 }
2997
2998 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
2999  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3000  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3001  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3002  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3003  *        [1]                             y                [0]       [0]
3004  */
3005 static void disas_cc(DisasContext *s, uint32_t insn)
3006 {
3007     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3008     int label_continue = -1;
3009     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3010
3011     if (!extract32(insn, 29, 1)) {
3012         unallocated_encoding(s);
3013         return;
3014     }
3015     if (insn & (1 << 10 | 1 << 4)) {
3016         unallocated_encoding(s);
3017         return;
3018     }
3019     sf = extract32(insn, 31, 1);
3020     op = extract32(insn, 30, 1);
3021     is_imm = extract32(insn, 11, 1);
3022     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3023     cond = extract32(insn, 12, 4);
3024     rn = extract32(insn, 5, 5);
3025     nzcv = extract32(insn, 0, 4);
3026
3027     if (cond < 0x0e) { /* not always */
3028         int label_match = gen_new_label();
3029         label_continue = gen_new_label();
3030         arm_gen_test_cc(cond, label_match);
3031         /* nomatch: */
3032         tcg_tmp = tcg_temp_new_i64();
3033         tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3034         gen_set_nzcv(tcg_tmp);
3035         tcg_temp_free_i64(tcg_tmp);
3036         tcg_gen_br(label_continue);
3037         gen_set_label(label_match);
3038     }
3039     /* match, or condition is always */
3040     if (is_imm) {
3041         tcg_y = new_tmp_a64(s);
3042         tcg_gen_movi_i64(tcg_y, y);
3043     } else {
3044         tcg_y = cpu_reg(s, y);
3045     }
3046     tcg_rn = cpu_reg(s, rn);
3047
3048     tcg_tmp = tcg_temp_new_i64();
3049     if (op) {
3050         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3051     } else {
3052         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3053     }
3054     tcg_temp_free_i64(tcg_tmp);
3055
3056     if (cond < 0x0e) { /* continue */
3057         gen_set_label(label_continue);
3058     }
3059 }
3060
3061 /* C3.5.6 Conditional select
3062  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3063  * +----+----+---+-----------------+------+------+-----+------+------+
3064  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3065  * +----+----+---+-----------------+------+------+-----+------+------+
3066  */
3067 static void disas_cond_select(DisasContext *s, uint32_t insn)
3068 {
3069     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3070     TCGv_i64 tcg_rd, tcg_src;
3071
3072     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3073         /* S == 1 or op2<1> == 1 */
3074         unallocated_encoding(s);
3075         return;
3076     }
3077     sf = extract32(insn, 31, 1);
3078     else_inv = extract32(insn, 30, 1);
3079     rm = extract32(insn, 16, 5);
3080     cond = extract32(insn, 12, 4);
3081     else_inc = extract32(insn, 10, 1);
3082     rn = extract32(insn, 5, 5);
3083     rd = extract32(insn, 0, 5);
3084
3085     if (rd == 31) {
3086         /* silly no-op write; until we use movcond we must special-case
3087          * this to avoid a dead temporary across basic blocks.
3088          */
3089         return;
3090     }
3091
3092     tcg_rd = cpu_reg(s, rd);
3093
3094     if (cond >= 0x0e) { /* condition "always" */
3095         tcg_src = read_cpu_reg(s, rn, sf);
3096         tcg_gen_mov_i64(tcg_rd, tcg_src);
3097     } else {
3098         /* OPTME: we could use movcond here, at the cost of duplicating
3099          * a lot of the arm_gen_test_cc() logic.
3100          */
3101         int label_match = gen_new_label();
3102         int label_continue = gen_new_label();
3103
3104         arm_gen_test_cc(cond, label_match);
3105         /* nomatch: */
3106         tcg_src = cpu_reg(s, rm);
3107
3108         if (else_inv && else_inc) {
3109             tcg_gen_neg_i64(tcg_rd, tcg_src);
3110         } else if (else_inv) {
3111             tcg_gen_not_i64(tcg_rd, tcg_src);
3112         } else if (else_inc) {
3113             tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3114         } else {
3115             tcg_gen_mov_i64(tcg_rd, tcg_src);
3116         }
3117         if (!sf) {
3118             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3119         }
3120         tcg_gen_br(label_continue);
3121         /* match: */
3122         gen_set_label(label_match);
3123         tcg_src = read_cpu_reg(s, rn, sf);
3124         tcg_gen_mov_i64(tcg_rd, tcg_src);
3125         /* continue: */
3126         gen_set_label(label_continue);
3127     }
3128 }
3129
3130 static void handle_clz(DisasContext *s, unsigned int sf,
3131                        unsigned int rn, unsigned int rd)
3132 {
3133     TCGv_i64 tcg_rd, tcg_rn;
3134     tcg_rd = cpu_reg(s, rd);
3135     tcg_rn = cpu_reg(s, rn);
3136
3137     if (sf) {
3138         gen_helper_clz64(tcg_rd, tcg_rn);
3139     } else {
3140         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3141         tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3142         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3143         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3144         tcg_temp_free_i32(tcg_tmp32);
3145     }
3146 }
3147
3148 static void handle_cls(DisasContext *s, unsigned int sf,
3149                        unsigned int rn, unsigned int rd)
3150 {
3151     TCGv_i64 tcg_rd, tcg_rn;
3152     tcg_rd = cpu_reg(s, rd);
3153     tcg_rn = cpu_reg(s, rn);
3154
3155     if (sf) {
3156         gen_helper_cls64(tcg_rd, tcg_rn);
3157     } else {
3158         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3159         tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3160         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3161         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3162         tcg_temp_free_i32(tcg_tmp32);
3163     }
3164 }
3165
3166 static void handle_rbit(DisasContext *s, unsigned int sf,
3167                         unsigned int rn, unsigned int rd)
3168 {
3169     TCGv_i64 tcg_rd, tcg_rn;
3170     tcg_rd = cpu_reg(s, rd);
3171     tcg_rn = cpu_reg(s, rn);
3172
3173     if (sf) {
3174         gen_helper_rbit64(tcg_rd, tcg_rn);
3175     } else {
3176         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3177         tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3178         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3179         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3180         tcg_temp_free_i32(tcg_tmp32);
3181     }
3182 }
3183
3184 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3185 static void handle_rev64(DisasContext *s, unsigned int sf,
3186                          unsigned int rn, unsigned int rd)
3187 {
3188     if (!sf) {
3189         unallocated_encoding(s);
3190         return;
3191     }
3192     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3193 }
3194
3195 /* C5.6.149 REV with sf==0, opcode==2
3196  * C5.6.151 REV32 (sf==1, opcode==2)
3197  */
3198 static void handle_rev32(DisasContext *s, unsigned int sf,
3199                          unsigned int rn, unsigned int rd)
3200 {
3201     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3202
3203     if (sf) {
3204         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3205         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3206
3207         /* bswap32_i64 requires zero high word */
3208         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3209         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3210         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3211         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3212         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3213
3214         tcg_temp_free_i64(tcg_tmp);
3215     } else {
3216         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3217         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3218     }
3219 }
3220
3221 /* C5.6.150 REV16 (opcode==1) */
3222 static void handle_rev16(DisasContext *s, unsigned int sf,
3223                          unsigned int rn, unsigned int rd)
3224 {
3225     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3226     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3227     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3228
3229     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3230     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3231
3232     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3233     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3234     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3235     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3236
3237     if (sf) {
3238         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3239         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3240         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3241         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3242
3243         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3244         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3245         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3246     }
3247
3248     tcg_temp_free_i64(tcg_tmp);
3249 }
3250
3251 /* C3.5.7 Data-processing (1 source)
3252  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3253  * +----+---+---+-----------------+---------+--------+------+------+
3254  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3255  * +----+---+---+-----------------+---------+--------+------+------+
3256  */
3257 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3258 {
3259     unsigned int sf, opcode, rn, rd;
3260
3261     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3262         unallocated_encoding(s);
3263         return;
3264     }
3265
3266     sf = extract32(insn, 31, 1);
3267     opcode = extract32(insn, 10, 6);
3268     rn = extract32(insn, 5, 5);
3269     rd = extract32(insn, 0, 5);
3270
3271     switch (opcode) {
3272     case 0: /* RBIT */
3273         handle_rbit(s, sf, rn, rd);
3274         break;
3275     case 1: /* REV16 */
3276         handle_rev16(s, sf, rn, rd);
3277         break;
3278     case 2: /* REV32 */
3279         handle_rev32(s, sf, rn, rd);
3280         break;
3281     case 3: /* REV64 */
3282         handle_rev64(s, sf, rn, rd);
3283         break;
3284     case 4: /* CLZ */
3285         handle_clz(s, sf, rn, rd);
3286         break;
3287     case 5: /* CLS */
3288         handle_cls(s, sf, rn, rd);
3289         break;
3290     }
3291 }
3292
3293 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3294                        unsigned int rm, unsigned int rn, unsigned int rd)
3295 {
3296     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3297     tcg_rd = cpu_reg(s, rd);
3298
3299     if (!sf && is_signed) {
3300         tcg_n = new_tmp_a64(s);
3301         tcg_m = new_tmp_a64(s);
3302         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3303         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3304     } else {
3305         tcg_n = read_cpu_reg(s, rn, sf);
3306         tcg_m = read_cpu_reg(s, rm, sf);
3307     }
3308
3309     if (is_signed) {
3310         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3311     } else {
3312         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3313     }
3314
3315     if (!sf) { /* zero extend final result */
3316         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3317     }
3318 }
3319
3320 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3321 static void handle_shift_reg(DisasContext *s,
3322                              enum a64_shift_type shift_type, unsigned int sf,
3323                              unsigned int rm, unsigned int rn, unsigned int rd)
3324 {
3325     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3326     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3327     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3328
3329     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3330     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3331     tcg_temp_free_i64(tcg_shift);
3332 }
3333
3334 /* C3.5.8 Data-processing (2 source)
3335  *   31   30  29 28             21 20  16 15    10 9    5 4    0
3336  * +----+---+---+-----------------+------+--------+------+------+
3337  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
3338  * +----+---+---+-----------------+------+--------+------+------+
3339  */
3340 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3341 {
3342     unsigned int sf, rm, opcode, rn, rd;
3343     sf = extract32(insn, 31, 1);
3344     rm = extract32(insn, 16, 5);
3345     opcode = extract32(insn, 10, 6);
3346     rn = extract32(insn, 5, 5);
3347     rd = extract32(insn, 0, 5);
3348
3349     if (extract32(insn, 29, 1)) {
3350         unallocated_encoding(s);
3351         return;
3352     }
3353
3354     switch (opcode) {
3355     case 2: /* UDIV */
3356         handle_div(s, false, sf, rm, rn, rd);
3357         break;
3358     case 3: /* SDIV */
3359         handle_div(s, true, sf, rm, rn, rd);
3360         break;
3361     case 8: /* LSLV */
3362         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3363         break;
3364     case 9: /* LSRV */
3365         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3366         break;
3367     case 10: /* ASRV */
3368         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3369         break;
3370     case 11: /* RORV */
3371         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3372         break;
3373     case 16:
3374     case 17:
3375     case 18:
3376     case 19:
3377     case 20:
3378     case 21:
3379     case 22:
3380     case 23: /* CRC32 */
3381         unsupported_encoding(s, insn);
3382         break;
3383     default:
3384         unallocated_encoding(s);
3385         break;
3386     }
3387 }
3388
3389 /* C3.5 Data processing - register */
3390 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
3391 {
3392     switch (extract32(insn, 24, 5)) {
3393     case 0x0a: /* Logical (shifted register) */
3394         disas_logic_reg(s, insn);
3395         break;
3396     case 0x0b: /* Add/subtract */
3397         if (insn & (1 << 21)) { /* (extended register) */
3398             disas_add_sub_ext_reg(s, insn);
3399         } else {
3400             disas_add_sub_reg(s, insn);
3401         }
3402         break;
3403     case 0x1b: /* Data-processing (3 source) */
3404         disas_data_proc_3src(s, insn);
3405         break;
3406     case 0x1a:
3407         switch (extract32(insn, 21, 3)) {
3408         case 0x0: /* Add/subtract (with carry) */
3409             disas_adc_sbc(s, insn);
3410             break;
3411         case 0x2: /* Conditional compare */
3412             disas_cc(s, insn); /* both imm and reg forms */
3413             break;
3414         case 0x4: /* Conditional select */
3415             disas_cond_select(s, insn);
3416             break;
3417         case 0x6: /* Data-processing */
3418             if (insn & (1 << 30)) { /* (1 source) */
3419                 disas_data_proc_1src(s, insn);
3420             } else {            /* (2 source) */
3421                 disas_data_proc_2src(s, insn);
3422             }
3423             break;
3424         default:
3425             unallocated_encoding(s);
3426             break;
3427         }
3428         break;
3429     default:
3430         unallocated_encoding(s);
3431         break;
3432     }
3433 }
3434
3435 /* Convert ARM rounding mode to softfloat */
3436 static inline int arm_rmode_to_sf(int rmode)
3437 {
3438     switch (rmode) {
3439     case FPROUNDING_TIEAWAY:
3440         rmode = float_round_ties_away;
3441         break;
3442     case FPROUNDING_ODD:
3443         /* FIXME: add support for TIEAWAY and ODD */
3444         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
3445                       rmode);
3446     case FPROUNDING_TIEEVEN:
3447     default:
3448         rmode = float_round_nearest_even;
3449         break;
3450     case FPROUNDING_POSINF:
3451         rmode = float_round_up;
3452         break;
3453     case FPROUNDING_NEGINF:
3454         rmode = float_round_down;
3455         break;
3456     case FPROUNDING_ZERO:
3457         rmode = float_round_to_zero;
3458         break;
3459     }
3460     return rmode;
3461 }
3462
3463 static void handle_fp_compare(DisasContext *s, bool is_double,
3464                               unsigned int rn, unsigned int rm,
3465                               bool cmp_with_zero, bool signal_all_nans)
3466 {
3467     TCGv_i64 tcg_flags = tcg_temp_new_i64();
3468     TCGv_ptr fpst = get_fpstatus_ptr();
3469
3470     if (is_double) {
3471         TCGv_i64 tcg_vn, tcg_vm;
3472
3473         tcg_vn = read_fp_dreg(s, rn);
3474         if (cmp_with_zero) {
3475             tcg_vm = tcg_const_i64(0);
3476         } else {
3477             tcg_vm = read_fp_dreg(s, rm);
3478         }
3479         if (signal_all_nans) {
3480             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3481         } else {
3482             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3483         }
3484         tcg_temp_free_i64(tcg_vn);
3485         tcg_temp_free_i64(tcg_vm);
3486     } else {
3487         TCGv_i32 tcg_vn, tcg_vm;
3488
3489         tcg_vn = read_fp_sreg(s, rn);
3490         if (cmp_with_zero) {
3491             tcg_vm = tcg_const_i32(0);
3492         } else {
3493             tcg_vm = read_fp_sreg(s, rm);
3494         }
3495         if (signal_all_nans) {
3496             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3497         } else {
3498             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3499         }
3500         tcg_temp_free_i32(tcg_vn);
3501         tcg_temp_free_i32(tcg_vm);
3502     }
3503
3504     tcg_temp_free_ptr(fpst);
3505
3506     gen_set_nzcv(tcg_flags);
3507
3508     tcg_temp_free_i64(tcg_flags);
3509 }
3510
3511 /* C3.6.22 Floating point compare
3512  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
3513  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3514  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
3515  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3516  */
3517 static void disas_fp_compare(DisasContext *s, uint32_t insn)
3518 {
3519     unsigned int mos, type, rm, op, rn, opc, op2r;
3520
3521     mos = extract32(insn, 29, 3);
3522     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3523     rm = extract32(insn, 16, 5);
3524     op = extract32(insn, 14, 2);
3525     rn = extract32(insn, 5, 5);
3526     opc = extract32(insn, 3, 2);
3527     op2r = extract32(insn, 0, 3);
3528
3529     if (mos || op || op2r || type > 1) {
3530         unallocated_encoding(s);
3531         return;
3532     }
3533
3534     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
3535 }
3536
3537 /* C3.6.23 Floating point conditional compare
3538  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
3539  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3540  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
3541  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3542  */
3543 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
3544 {
3545     unsigned int mos, type, rm, cond, rn, op, nzcv;
3546     TCGv_i64 tcg_flags;
3547     int label_continue = -1;
3548
3549     mos = extract32(insn, 29, 3);
3550     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3551     rm = extract32(insn, 16, 5);
3552     cond = extract32(insn, 12, 4);
3553     rn = extract32(insn, 5, 5);
3554     op = extract32(insn, 4, 1);
3555     nzcv = extract32(insn, 0, 4);
3556
3557     if (mos || type > 1) {
3558         unallocated_encoding(s);
3559         return;
3560     }
3561
3562     if (cond < 0x0e) { /* not always */
3563         int label_match = gen_new_label();
3564         label_continue = gen_new_label();
3565         arm_gen_test_cc(cond, label_match);
3566         /* nomatch: */
3567         tcg_flags = tcg_const_i64(nzcv << 28);
3568         gen_set_nzcv(tcg_flags);
3569         tcg_temp_free_i64(tcg_flags);
3570         tcg_gen_br(label_continue);
3571         gen_set_label(label_match);
3572     }
3573
3574     handle_fp_compare(s, type, rn, rm, false, op);
3575
3576     if (cond < 0x0e) {
3577         gen_set_label(label_continue);
3578     }
3579 }
3580
3581 /* copy src FP register to dst FP register; type specifies single or double */
3582 static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
3583 {
3584     if (type) {
3585         TCGv_i64 v = read_fp_dreg(s, src);
3586         write_fp_dreg(s, dst, v);
3587         tcg_temp_free_i64(v);
3588     } else {
3589         TCGv_i32 v = read_fp_sreg(s, src);
3590         write_fp_sreg(s, dst, v);
3591         tcg_temp_free_i32(v);
3592     }
3593 }
3594
3595 /* C3.6.24 Floating point conditional select
3596  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
3597  * +---+---+---+-----------+------+---+------+------+-----+------+------+
3598  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
3599  * +---+---+---+-----------+------+---+------+------+-----+------+------+
3600  */
3601 static void disas_fp_csel(DisasContext *s, uint32_t insn)
3602 {
3603     unsigned int mos, type, rm, cond, rn, rd;
3604     int label_continue = -1;
3605
3606     mos = extract32(insn, 29, 3);
3607     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3608     rm = extract32(insn, 16, 5);
3609     cond = extract32(insn, 12, 4);
3610     rn = extract32(insn, 5, 5);
3611     rd = extract32(insn, 0, 5);
3612
3613     if (mos || type > 1) {
3614         unallocated_encoding(s);
3615         return;
3616     }
3617
3618     if (cond < 0x0e) { /* not always */
3619         int label_match = gen_new_label();
3620         label_continue = gen_new_label();
3621         arm_gen_test_cc(cond, label_match);
3622         /* nomatch: */
3623         gen_mov_fp2fp(s, type, rd, rm);
3624         tcg_gen_br(label_continue);
3625         gen_set_label(label_match);
3626     }
3627
3628     gen_mov_fp2fp(s, type, rd, rn);
3629
3630     if (cond < 0x0e) { /* continue */
3631         gen_set_label(label_continue);
3632     }
3633 }
3634
3635 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
3636 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
3637 {
3638     TCGv_ptr fpst;
3639     TCGv_i32 tcg_op;
3640     TCGv_i32 tcg_res;
3641
3642     fpst = get_fpstatus_ptr();
3643     tcg_op = read_fp_sreg(s, rn);
3644     tcg_res = tcg_temp_new_i32();
3645
3646     switch (opcode) {
3647     case 0x0: /* FMOV */
3648         tcg_gen_mov_i32(tcg_res, tcg_op);
3649         break;
3650     case 0x1: /* FABS */
3651         gen_helper_vfp_abss(tcg_res, tcg_op);
3652         break;
3653     case 0x2: /* FNEG */
3654         gen_helper_vfp_negs(tcg_res, tcg_op);
3655         break;
3656     case 0x3: /* FSQRT */
3657         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
3658         break;
3659     case 0x8: /* FRINTN */
3660     case 0x9: /* FRINTP */
3661     case 0xa: /* FRINTM */
3662     case 0xb: /* FRINTZ */
3663     case 0xc: /* FRINTA */
3664     {
3665         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3666
3667         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3668         gen_helper_rints(tcg_res, tcg_op, fpst);
3669
3670         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3671         tcg_temp_free_i32(tcg_rmode);
3672         break;
3673     }
3674     case 0xe: /* FRINTX */
3675         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
3676         break;
3677     case 0xf: /* FRINTI */
3678         gen_helper_rints(tcg_res, tcg_op, fpst);
3679         break;
3680     default:
3681         abort();
3682     }
3683
3684     write_fp_sreg(s, rd, tcg_res);
3685
3686     tcg_temp_free_ptr(fpst);
3687     tcg_temp_free_i32(tcg_op);
3688     tcg_temp_free_i32(tcg_res);
3689 }
3690
3691 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
3692 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
3693 {
3694     TCGv_ptr fpst;
3695     TCGv_i64 tcg_op;
3696     TCGv_i64 tcg_res;
3697
3698     fpst = get_fpstatus_ptr();
3699     tcg_op = read_fp_dreg(s, rn);
3700     tcg_res = tcg_temp_new_i64();
3701
3702     switch (opcode) {
3703     case 0x0: /* FMOV */
3704         tcg_gen_mov_i64(tcg_res, tcg_op);
3705         break;
3706     case 0x1: /* FABS */
3707         gen_helper_vfp_absd(tcg_res, tcg_op);
3708         break;
3709     case 0x2: /* FNEG */
3710         gen_helper_vfp_negd(tcg_res, tcg_op);
3711         break;
3712     case 0x3: /* FSQRT */
3713         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
3714         break;
3715     case 0x8: /* FRINTN */
3716     case 0x9: /* FRINTP */
3717     case 0xa: /* FRINTM */
3718     case 0xb: /* FRINTZ */
3719     case 0xc: /* FRINTA */
3720     {
3721         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3722
3723         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3724         gen_helper_rintd(tcg_res, tcg_op, fpst);
3725
3726         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3727         tcg_temp_free_i32(tcg_rmode);
3728         break;
3729     }
3730     case 0xe: /* FRINTX */
3731         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
3732         break;
3733     case 0xf: /* FRINTI */
3734         gen_helper_rintd(tcg_res, tcg_op, fpst);
3735         break;
3736     default:
3737         abort();
3738     }
3739
3740     write_fp_dreg(s, rd, tcg_res);
3741
3742     tcg_temp_free_ptr(fpst);
3743     tcg_temp_free_i64(tcg_op);
3744     tcg_temp_free_i64(tcg_res);
3745 }
3746
3747 static void handle_fp_fcvt(DisasContext *s, int opcode,
3748                            int rd, int rn, int dtype, int ntype)
3749 {
3750     switch (ntype) {
3751     case 0x0:
3752     {
3753         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
3754         if (dtype == 1) {
3755             /* Single to double */
3756             TCGv_i64 tcg_rd = tcg_temp_new_i64();
3757             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
3758             write_fp_dreg(s, rd, tcg_rd);
3759             tcg_temp_free_i64(tcg_rd);
3760         } else {
3761             /* Single to half */
3762             TCGv_i32 tcg_rd = tcg_temp_new_i32();
3763             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
3764             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3765             write_fp_sreg(s, rd, tcg_rd);
3766             tcg_temp_free_i32(tcg_rd);
3767         }
3768         tcg_temp_free_i32(tcg_rn);
3769         break;
3770     }
3771     case 0x1:
3772     {
3773         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
3774         TCGv_i32 tcg_rd = tcg_temp_new_i32();
3775         if (dtype == 0) {
3776             /* Double to single */
3777             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
3778         } else {
3779             /* Double to half */
3780             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
3781             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3782         }
3783         write_fp_sreg(s, rd, tcg_rd);
3784         tcg_temp_free_i32(tcg_rd);
3785         tcg_temp_free_i64(tcg_rn);
3786         break;
3787     }
3788     case 0x3:
3789     {
3790         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
3791         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
3792         if (dtype == 0) {
3793             /* Half to single */
3794             TCGv_i32 tcg_rd = tcg_temp_new_i32();
3795             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
3796             write_fp_sreg(s, rd, tcg_rd);
3797             tcg_temp_free_i32(tcg_rd);
3798         } else {
3799             /* Half to double */
3800             TCGv_i64 tcg_rd = tcg_temp_new_i64();
3801             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
3802             write_fp_dreg(s, rd, tcg_rd);
3803             tcg_temp_free_i64(tcg_rd);
3804         }
3805         tcg_temp_free_i32(tcg_rn);
3806         break;
3807     }
3808     default:
3809         abort();
3810     }
3811 }
3812
3813 /* C3.6.25 Floating point data-processing (1 source)
3814  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
3815  * +---+---+---+-----------+------+---+--------+-----------+------+------+
3816  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
3817  * +---+---+---+-----------+------+---+--------+-----------+------+------+
3818  */
3819 static void disas_fp_1src(DisasContext *s, uint32_t insn)
3820 {
3821     int type = extract32(insn, 22, 2);
3822     int opcode = extract32(insn, 15, 6);
3823     int rn = extract32(insn, 5, 5);
3824     int rd = extract32(insn, 0, 5);
3825
3826     switch (opcode) {
3827     case 0x4: case 0x5: case 0x7:
3828     {
3829         /* FCVT between half, single and double precision */
3830         int dtype = extract32(opcode, 0, 2);
3831         if (type == 2 || dtype == type) {
3832             unallocated_encoding(s);
3833             return;
3834         }
3835         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
3836         break;
3837     }
3838     case 0x0 ... 0x3:
3839     case 0x8 ... 0xc:
3840     case 0xe ... 0xf:
3841         /* 32-to-32 and 64-to-64 ops */
3842         switch (type) {
3843         case 0:
3844             handle_fp_1src_single(s, opcode, rd, rn);
3845             break;
3846         case 1:
3847             handle_fp_1src_double(s, opcode, rd, rn);
3848             break;
3849         default:
3850             unallocated_encoding(s);
3851         }
3852         break;
3853     default:
3854         unallocated_encoding(s);
3855         break;
3856     }
3857 }
3858
3859 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
3860 static void handle_fp_2src_single(DisasContext *s, int opcode,
3861                                   int rd, int rn, int rm)
3862 {
3863     TCGv_i32 tcg_op1;
3864     TCGv_i32 tcg_op2;
3865     TCGv_i32 tcg_res;
3866     TCGv_ptr fpst;
3867
3868     tcg_res = tcg_temp_new_i32();
3869     fpst = get_fpstatus_ptr();
3870     tcg_op1 = read_fp_sreg(s, rn);
3871     tcg_op2 = read_fp_sreg(s, rm);
3872
3873     switch (opcode) {
3874     case 0x0: /* FMUL */
3875         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
3876         break;
3877     case 0x1: /* FDIV */
3878         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
3879         break;
3880     case 0x2: /* FADD */
3881         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
3882         break;
3883     case 0x3: /* FSUB */
3884         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
3885         break;
3886     case 0x4: /* FMAX */
3887         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
3888         break;
3889     case 0x5: /* FMIN */
3890         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
3891         break;
3892     case 0x6: /* FMAXNM */
3893         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
3894         break;
3895     case 0x7: /* FMINNM */
3896         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
3897         break;
3898     case 0x8: /* FNMUL */
3899         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
3900         gen_helper_vfp_negs(tcg_res, tcg_res);
3901         break;
3902     }
3903
3904     write_fp_sreg(s, rd, tcg_res);
3905
3906     tcg_temp_free_ptr(fpst);
3907     tcg_temp_free_i32(tcg_op1);
3908     tcg_temp_free_i32(tcg_op2);
3909     tcg_temp_free_i32(tcg_res);
3910 }
3911
3912 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
3913 static void handle_fp_2src_double(DisasContext *s, int opcode,
3914                                   int rd, int rn, int rm)
3915 {
3916     TCGv_i64 tcg_op1;
3917     TCGv_i64 tcg_op2;
3918     TCGv_i64 tcg_res;
3919     TCGv_ptr fpst;
3920
3921     tcg_res = tcg_temp_new_i64();
3922     fpst = get_fpstatus_ptr();
3923     tcg_op1 = read_fp_dreg(s, rn);
3924     tcg_op2 = read_fp_dreg(s, rm);
3925
3926     switch (opcode) {
3927     case 0x0: /* FMUL */
3928         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
3929         break;
3930     case 0x1: /* FDIV */
3931         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
3932         break;
3933     case 0x2: /* FADD */
3934         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
3935         break;
3936     case 0x3: /* FSUB */
3937         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
3938         break;
3939     case 0x4: /* FMAX */
3940         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
3941         break;
3942     case 0x5: /* FMIN */
3943         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
3944         break;
3945     case 0x6: /* FMAXNM */
3946         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
3947         break;
3948     case 0x7: /* FMINNM */
3949         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
3950         break;
3951     case 0x8: /* FNMUL */
3952         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
3953         gen_helper_vfp_negd(tcg_res, tcg_res);
3954         break;
3955     }
3956
3957     write_fp_dreg(s, rd, tcg_res);
3958
3959     tcg_temp_free_ptr(fpst);
3960     tcg_temp_free_i64(tcg_op1);
3961     tcg_temp_free_i64(tcg_op2);
3962     tcg_temp_free_i64(tcg_res);
3963 }
3964
3965 /* C3.6.26 Floating point data-processing (2 source)
3966  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
3967  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
3968  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
3969  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
3970  */
3971 static void disas_fp_2src(DisasContext *s, uint32_t insn)
3972 {
3973     int type = extract32(insn, 22, 2);
3974     int rd = extract32(insn, 0, 5);
3975     int rn = extract32(insn, 5, 5);
3976     int rm = extract32(insn, 16, 5);
3977     int opcode = extract32(insn, 12, 4);
3978
3979     if (opcode > 8) {
3980         unallocated_encoding(s);
3981         return;
3982     }
3983
3984     switch (type) {
3985     case 0:
3986         handle_fp_2src_single(s, opcode, rd, rn, rm);
3987         break;
3988     case 1:
3989         handle_fp_2src_double(s, opcode, rd, rn, rm);
3990         break;
3991     default:
3992         unallocated_encoding(s);
3993     }
3994 }
3995
3996 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
3997 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
3998                                   int rd, int rn, int rm, int ra)
3999 {
4000     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4001     TCGv_i32 tcg_res = tcg_temp_new_i32();
4002     TCGv_ptr fpst = get_fpstatus_ptr();
4003
4004     tcg_op1 = read_fp_sreg(s, rn);
4005     tcg_op2 = read_fp_sreg(s, rm);
4006     tcg_op3 = read_fp_sreg(s, ra);
4007
4008     /* These are fused multiply-add, and must be done as one
4009      * floating point operation with no rounding between the
4010      * multiplication and addition steps.
4011      * NB that doing the negations here as separate steps is
4012      * correct : an input NaN should come out with its sign bit
4013      * flipped if it is a negated-input.
4014      */
4015     if (o1 == true) {
4016         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4017     }
4018
4019     if (o0 != o1) {
4020         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4021     }
4022
4023     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4024
4025     write_fp_sreg(s, rd, tcg_res);
4026
4027     tcg_temp_free_ptr(fpst);
4028     tcg_temp_free_i32(tcg_op1);
4029     tcg_temp_free_i32(tcg_op2);
4030     tcg_temp_free_i32(tcg_op3);
4031     tcg_temp_free_i32(tcg_res);
4032 }
4033
4034 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4035 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4036                                   int rd, int rn, int rm, int ra)
4037 {
4038     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4039     TCGv_i64 tcg_res = tcg_temp_new_i64();
4040     TCGv_ptr fpst = get_fpstatus_ptr();
4041
4042     tcg_op1 = read_fp_dreg(s, rn);
4043     tcg_op2 = read_fp_dreg(s, rm);
4044     tcg_op3 = read_fp_dreg(s, ra);
4045
4046     /* These are fused multiply-add, and must be done as one
4047      * floating point operation with no rounding between the
4048      * multiplication and addition steps.
4049      * NB that doing the negations here as separate steps is
4050      * correct : an input NaN should come out with its sign bit
4051      * flipped if it is a negated-input.
4052      */
4053     if (o1 == true) {
4054         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4055     }
4056
4057     if (o0 != o1) {
4058         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4059     }
4060
4061     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4062
4063     write_fp_dreg(s, rd, tcg_res);
4064
4065     tcg_temp_free_ptr(fpst);
4066     tcg_temp_free_i64(tcg_op1);
4067     tcg_temp_free_i64(tcg_op2);
4068     tcg_temp_free_i64(tcg_op3);
4069     tcg_temp_free_i64(tcg_res);
4070 }
4071
4072 /* C3.6.27 Floating point data-processing (3 source)
4073  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4074  * +---+---+---+-----------+------+----+------+----+------+------+------+
4075  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4076  * +---+---+---+-----------+------+----+------+----+------+------+------+
4077  */
4078 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4079 {
4080     int type = extract32(insn, 22, 2);
4081     int rd = extract32(insn, 0, 5);
4082     int rn = extract32(insn, 5, 5);
4083     int ra = extract32(insn, 10, 5);
4084     int rm = extract32(insn, 16, 5);
4085     bool o0 = extract32(insn, 15, 1);
4086     bool o1 = extract32(insn, 21, 1);
4087
4088     switch (type) {
4089     case 0:
4090         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4091         break;
4092     case 1:
4093         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4094         break;
4095     default:
4096         unallocated_encoding(s);
4097     }
4098 }
4099
4100 /* C3.6.28 Floating point immediate
4101  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4102  * +---+---+---+-----------+------+---+------------+-------+------+------+
4103  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4104  * +---+---+---+-----------+------+---+------------+-------+------+------+
4105  */
4106 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4107 {
4108     int rd = extract32(insn, 0, 5);
4109     int imm8 = extract32(insn, 13, 8);
4110     int is_double = extract32(insn, 22, 2);
4111     uint64_t imm;
4112     TCGv_i64 tcg_res;
4113
4114     if (is_double > 1) {
4115         unallocated_encoding(s);
4116         return;
4117     }
4118
4119     /* The imm8 encodes the sign bit, enough bits to represent
4120      * an exponent in the range 01....1xx to 10....0xx,
4121      * and the most significant 4 bits of the mantissa; see
4122      * VFPExpandImm() in the v8 ARM ARM.
4123      */
4124     if (is_double) {
4125         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4126             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4127             extract32(imm8, 0, 6);
4128         imm <<= 48;
4129     } else {
4130         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4131             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4132             (extract32(imm8, 0, 6) << 3);
4133         imm <<= 16;
4134     }
4135
4136     tcg_res = tcg_const_i64(imm);
4137     write_fp_dreg(s, rd, tcg_res);
4138     tcg_temp_free_i64(tcg_res);
4139 }
4140
4141 /* Handle floating point <=> fixed point conversions. Note that we can
4142  * also deal with fp <=> integer conversions as a special case (scale == 64)
4143  * OPTME: consider handling that special case specially or at least skipping
4144  * the call to scalbn in the helpers for zero shifts.
4145  */
4146 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4147                            bool itof, int rmode, int scale, int sf, int type)
4148 {
4149     bool is_signed = !(opcode & 1);
4150     bool is_double = type;
4151     TCGv_ptr tcg_fpstatus;
4152     TCGv_i32 tcg_shift;
4153
4154     tcg_fpstatus = get_fpstatus_ptr();
4155
4156     tcg_shift = tcg_const_i32(64 - scale);
4157
4158     if (itof) {
4159         TCGv_i64 tcg_int = cpu_reg(s, rn);
4160         if (!sf) {
4161             TCGv_i64 tcg_extend = new_tmp_a64(s);
4162
4163             if (is_signed) {
4164                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4165             } else {
4166                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4167             }
4168
4169             tcg_int = tcg_extend;
4170         }
4171
4172         if (is_double) {
4173             TCGv_i64 tcg_double = tcg_temp_new_i64();
4174             if (is_signed) {
4175                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4176                                      tcg_shift, tcg_fpstatus);
4177             } else {
4178                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4179                                      tcg_shift, tcg_fpstatus);
4180             }
4181             write_fp_dreg(s, rd, tcg_double);
4182             tcg_temp_free_i64(tcg_double);
4183         } else {
4184             TCGv_i32 tcg_single = tcg_temp_new_i32();
4185             if (is_signed) {
4186                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4187                                      tcg_shift, tcg_fpstatus);
4188             } else {
4189                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4190                                      tcg_shift, tcg_fpstatus);
4191             }
4192             write_fp_sreg(s, rd, tcg_single);
4193             tcg_temp_free_i32(tcg_single);
4194         }
4195     } else {
4196         TCGv_i64 tcg_int = cpu_reg(s, rd);
4197         TCGv_i32 tcg_rmode;
4198
4199         if (extract32(opcode, 2, 1)) {
4200             /* There are too many rounding modes to all fit into rmode,
4201              * so FCVTA[US] is a special case.
4202              */
4203             rmode = FPROUNDING_TIEAWAY;
4204         }
4205
4206         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4207
4208         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4209
4210         if (is_double) {
4211             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4212             if (is_signed) {
4213                 if (!sf) {
4214                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4215                                          tcg_shift, tcg_fpstatus);
4216                 } else {
4217                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4218                                          tcg_shift, tcg_fpstatus);
4219                 }
4220             } else {
4221                 if (!sf) {
4222                     gen_helper_vfp_tould(tcg_int, tcg_double,
4223                                          tcg_shift, tcg_fpstatus);
4224                 } else {
4225                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4226                                          tcg_shift, tcg_fpstatus);
4227                 }
4228             }
4229             tcg_temp_free_i64(tcg_double);
4230         } else {
4231             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4232             if (sf) {
4233                 if (is_signed) {
4234                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4235                                          tcg_shift, tcg_fpstatus);
4236                 } else {
4237                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4238                                          tcg_shift, tcg_fpstatus);
4239                 }
4240             } else {
4241                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4242                 if (is_signed) {
4243                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4244                                          tcg_shift, tcg_fpstatus);
4245                 } else {
4246                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4247                                          tcg_shift, tcg_fpstatus);
4248                 }
4249                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4250                 tcg_temp_free_i32(tcg_dest);
4251             }
4252             tcg_temp_free_i32(tcg_single);
4253         }
4254
4255         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4256         tcg_temp_free_i32(tcg_rmode);
4257
4258         if (!sf) {
4259             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4260         }
4261     }
4262
4263     tcg_temp_free_ptr(tcg_fpstatus);
4264     tcg_temp_free_i32(tcg_shift);
4265 }
4266
4267 /* C3.6.29 Floating point <-> fixed point conversions
4268  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4269  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4270  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4271  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4272  */
4273 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4274 {
4275     int rd = extract32(insn, 0, 5);
4276     int rn = extract32(insn, 5, 5);
4277     int scale = extract32(insn, 10, 6);
4278     int opcode = extract32(insn, 16, 3);
4279     int rmode = extract32(insn, 19, 2);
4280     int type = extract32(insn, 22, 2);
4281     bool sbit = extract32(insn, 29, 1);
4282     bool sf = extract32(insn, 31, 1);
4283     bool itof;
4284
4285     if (sbit || (type > 1)
4286         || (!sf && scale < 32)) {
4287         unallocated_encoding(s);
4288         return;
4289     }
4290
4291     switch ((rmode << 3) | opcode) {
4292     case 0x2: /* SCVTF */
4293     case 0x3: /* UCVTF */
4294         itof = true;
4295         break;
4296     case 0x18: /* FCVTZS */
4297     case 0x19: /* FCVTZU */
4298         itof = false;
4299         break;
4300     default:
4301         unallocated_encoding(s);
4302         return;
4303     }
4304
4305     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4306 }
4307
4308 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4309 {
4310     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4311      * without conversion.
4312      */
4313
4314     if (itof) {
4315         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4316
4317         switch (type) {
4318         case 0:
4319         {
4320             /* 32 bit */
4321             TCGv_i64 tmp = tcg_temp_new_i64();
4322             tcg_gen_ext32u_i64(tmp, tcg_rn);
4323             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
4324             tcg_gen_movi_i64(tmp, 0);
4325             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4326             tcg_temp_free_i64(tmp);
4327             break;
4328         }
4329         case 1:
4330         {
4331             /* 64 bit */
4332             TCGv_i64 tmp = tcg_const_i64(0);
4333             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
4334             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4335             tcg_temp_free_i64(tmp);
4336             break;
4337         }
4338         case 2:
4339             /* 64 bit to top half. */
4340             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
4341             break;
4342         }
4343     } else {
4344         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4345
4346         switch (type) {
4347         case 0:
4348             /* 32 bit */
4349             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
4350             break;
4351         case 1:
4352             /* 64 bit */
4353             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
4354             break;
4355         case 2:
4356             /* 64 bits from top half */
4357             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
4358             break;
4359         }
4360     }
4361 }
4362
4363 /* C3.6.30 Floating point <-> integer conversions
4364  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
4365  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4366  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
4367  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4368  */
4369 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
4370 {
4371     int rd = extract32(insn, 0, 5);
4372     int rn = extract32(insn, 5, 5);
4373     int opcode = extract32(insn, 16, 3);
4374     int rmode = extract32(insn, 19, 2);
4375     int type = extract32(insn, 22, 2);
4376     bool sbit = extract32(insn, 29, 1);
4377     bool sf = extract32(insn, 31, 1);
4378
4379     if (sbit) {
4380         unallocated_encoding(s);
4381         return;
4382     }
4383
4384     if (opcode > 5) {
4385         /* FMOV */
4386         bool itof = opcode & 1;
4387
4388         if (rmode >= 2) {
4389             unallocated_encoding(s);
4390             return;
4391         }
4392
4393         switch (sf << 3 | type << 1 | rmode) {
4394         case 0x0: /* 32 bit */
4395         case 0xa: /* 64 bit */
4396         case 0xd: /* 64 bit to top half of quad */
4397             break;
4398         default:
4399             /* all other sf/type/rmode combinations are invalid */
4400             unallocated_encoding(s);
4401             break;
4402         }
4403
4404         handle_fmov(s, rd, rn, type, itof);
4405     } else {
4406         /* actual FP conversions */
4407         bool itof = extract32(opcode, 1, 1);
4408
4409         if (type > 1 || (rmode != 0 && opcode > 1)) {
4410             unallocated_encoding(s);
4411             return;
4412         }
4413
4414         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
4415     }
4416 }
4417
4418 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
4419  *   31  30  29 28     25 24                          0
4420  * +---+---+---+---------+-----------------------------+
4421  * |   | 0 |   | 1 1 1 1 |                             |
4422  * +---+---+---+---------+-----------------------------+
4423  */
4424 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
4425 {
4426     if (extract32(insn, 24, 1)) {
4427         /* Floating point data-processing (3 source) */
4428         disas_fp_3src(s, insn);
4429     } else if (extract32(insn, 21, 1) == 0) {
4430         /* Floating point to fixed point conversions */
4431         disas_fp_fixed_conv(s, insn);
4432     } else {
4433         switch (extract32(insn, 10, 2)) {
4434         case 1:
4435             /* Floating point conditional compare */
4436             disas_fp_ccomp(s, insn);
4437             break;
4438         case 2:
4439             /* Floating point data-processing (2 source) */
4440             disas_fp_2src(s, insn);
4441             break;
4442         case 3:
4443             /* Floating point conditional select */
4444             disas_fp_csel(s, insn);
4445             break;
4446         case 0:
4447             switch (ctz32(extract32(insn, 12, 4))) {
4448             case 0: /* [15:12] == xxx1 */
4449                 /* Floating point immediate */
4450                 disas_fp_imm(s, insn);
4451                 break;
4452             case 1: /* [15:12] == xx10 */
4453                 /* Floating point compare */
4454                 disas_fp_compare(s, insn);
4455                 break;
4456             case 2: /* [15:12] == x100 */
4457                 /* Floating point data-processing (1 source) */
4458                 disas_fp_1src(s, insn);
4459                 break;
4460             case 3: /* [15:12] == 1000 */
4461                 unallocated_encoding(s);
4462                 break;
4463             default: /* [15:12] == 0000 */
4464                 /* Floating point <-> integer conversions */
4465                 disas_fp_int_conv(s, insn);
4466                 break;
4467             }
4468             break;
4469         }
4470     }
4471 }
4472
4473 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
4474 {
4475     /* Note that this is called with all non-FP cases from
4476      * table C3-6 so it must UNDEF for entries not specifically
4477      * allocated to instructions in that table.
4478      */
4479     unsupported_encoding(s, insn);
4480 }
4481
4482 /* C3.6 Data processing - SIMD and floating point */
4483 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
4484 {
4485     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
4486         disas_data_proc_fp(s, insn);
4487     } else {
4488         /* SIMD, including crypto */
4489         disas_data_proc_simd(s, insn);
4490     }
4491 }
4492
4493 /* C3.1 A64 instruction index by encoding */
4494 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
4495 {
4496     uint32_t insn;
4497
4498     insn = arm_ldl_code(env, s->pc, s->bswap_code);
4499     s->insn = insn;
4500     s->pc += 4;
4501
4502     switch (extract32(insn, 25, 4)) {
4503     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
4504         unallocated_encoding(s);
4505         break;
4506     case 0x8: case 0x9: /* Data processing - immediate */
4507         disas_data_proc_imm(s, insn);
4508         break;
4509     case 0xa: case 0xb: /* Branch, exception generation and system insns */
4510         disas_b_exc_sys(s, insn);
4511         break;
4512     case 0x4:
4513     case 0x6:
4514     case 0xc:
4515     case 0xe:      /* Loads and stores */
4516         disas_ldst(s, insn);
4517         break;
4518     case 0x5:
4519     case 0xd:      /* Data processing - register */
4520         disas_data_proc_reg(s, insn);
4521         break;
4522     case 0x7:
4523     case 0xf:      /* Data processing - SIMD and floating point */
4524         disas_data_proc_simd_fp(s, insn);
4525         break;
4526     default:
4527         assert(FALSE); /* all 15 cases should be handled above */
4528         break;
4529     }
4530
4531     /* if we allocated any temporaries, free them here */
4532     free_tmp_a64(s);
4533 }
4534
4535 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
4536                                         TranslationBlock *tb,
4537                                         bool search_pc)
4538 {
4539     CPUState *cs = CPU(cpu);
4540     CPUARMState *env = &cpu->env;
4541     DisasContext dc1, *dc = &dc1;
4542     CPUBreakpoint *bp;
4543     uint16_t *gen_opc_end;
4544     int j, lj;
4545     target_ulong pc_start;
4546     target_ulong next_page_start;
4547     int num_insns;
4548     int max_insns;
4549
4550     pc_start = tb->pc;
4551
4552     dc->tb = tb;
4553
4554     gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
4555
4556     dc->is_jmp = DISAS_NEXT;
4557     dc->pc = pc_start;
4558     dc->singlestep_enabled = cs->singlestep_enabled;
4559     dc->condjmp = 0;
4560
4561     dc->aarch64 = 1;
4562     dc->thumb = 0;
4563     dc->bswap_code = 0;
4564     dc->condexec_mask = 0;
4565     dc->condexec_cond = 0;
4566 #if !defined(CONFIG_USER_ONLY)
4567     dc->user = 0;
4568 #endif
4569     dc->vfp_enabled = 0;
4570     dc->vec_len = 0;
4571     dc->vec_stride = 0;
4572     dc->cp_regs = cpu->cp_regs;
4573     dc->current_pl = arm_current_pl(env);
4574
4575     init_tmp_a64_array(dc);
4576
4577     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
4578     lj = -1;
4579     num_insns = 0;
4580     max_insns = tb->cflags & CF_COUNT_MASK;
4581     if (max_insns == 0) {
4582         max_insns = CF_COUNT_MASK;
4583     }
4584
4585     gen_tb_start();
4586
4587     tcg_clear_temp_count();
4588
4589     do {
4590         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
4591             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
4592                 if (bp->pc == dc->pc) {
4593                     gen_exception_insn(dc, 0, EXCP_DEBUG);
4594                     /* Advance PC so that clearing the breakpoint will
4595                        invalidate this TB.  */
4596                     dc->pc += 2;
4597                     goto done_generating;
4598                 }
4599             }
4600         }
4601
4602         if (search_pc) {
4603             j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
4604             if (lj < j) {
4605                 lj++;
4606                 while (lj < j) {
4607                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
4608                 }
4609             }
4610             tcg_ctx.gen_opc_pc[lj] = dc->pc;
4611             tcg_ctx.gen_opc_instr_start[lj] = 1;
4612             tcg_ctx.gen_opc_icount[lj] = num_insns;
4613         }
4614
4615         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
4616             gen_io_start();
4617         }
4618
4619         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
4620             tcg_gen_debug_insn_start(dc->pc);
4621         }
4622
4623         disas_a64_insn(env, dc);
4624
4625         if (tcg_check_temp_count()) {
4626             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
4627                     dc->pc);
4628         }
4629
4630         /* Translation stops when a conditional branch is encountered.
4631          * Otherwise the subsequent code could get translated several times.
4632          * Also stop translation when a page boundary is reached.  This
4633          * ensures prefetch aborts occur at the right place.
4634          */
4635         num_insns++;
4636     } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
4637              !cs->singlestep_enabled &&
4638              !singlestep &&
4639              dc->pc < next_page_start &&
4640              num_insns < max_insns);
4641
4642     if (tb->cflags & CF_LAST_IO) {
4643         gen_io_end();
4644     }
4645
4646     if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
4647         /* Note that this means single stepping WFI doesn't halt the CPU.
4648          * For conditional branch insns this is harmless unreachable code as
4649          * gen_goto_tb() has already handled emitting the debug exception
4650          * (and thus a tb-jump is not possible when singlestepping).
4651          */
4652         assert(dc->is_jmp != DISAS_TB_JUMP);
4653         if (dc->is_jmp != DISAS_JUMP) {
4654             gen_a64_set_pc_im(dc->pc);
4655         }
4656         gen_exception(EXCP_DEBUG);
4657     } else {
4658         switch (dc->is_jmp) {
4659         case DISAS_NEXT:
4660             gen_goto_tb(dc, 1, dc->pc);
4661             break;
4662         default:
4663         case DISAS_UPDATE:
4664             gen_a64_set_pc_im(dc->pc);
4665             /* fall through */
4666         case DISAS_JUMP:
4667             /* indicate that the hash table must be used to find the next TB */
4668             tcg_gen_exit_tb(0);
4669             break;
4670         case DISAS_TB_JUMP:
4671         case DISAS_EXC:
4672         case DISAS_SWI:
4673             break;
4674         case DISAS_WFI:
4675             /* This is a special case because we don't want to just halt the CPU
4676              * if trying to debug across a WFI.
4677              */
4678             gen_helper_wfi(cpu_env);
4679             break;
4680         }
4681     }
4682
4683 done_generating:
4684     gen_tb_end(tb, num_insns);
4685     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
4686
4687 #ifdef DEBUG_DISAS
4688     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
4689         qemu_log("----------------\n");
4690         qemu_log("IN: %s\n", lookup_symbol(pc_start));
4691         log_target_disas(env, pc_start, dc->pc - pc_start,
4692                          dc->thumb | (dc->bswap_code << 1));
4693         qemu_log("\n");
4694     }
4695 #endif
4696     if (search_pc) {
4697         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
4698         lj++;
4699         while (lj <= j) {
4700             tcg_ctx.gen_opc_instr_start[lj++] = 0;
4701         }
4702     } else {
4703         tb->size = dc->pc - pc_start;
4704         tb->icount = num_insns;
4705     }
4706 }