gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "dominance.h"
  35 #include "cfg.h"
  36 #include "cfgrtl.h"
  37 #include "cfganal.h"
  38 #include "lcm.h"
  39 #include "cfgbuild.h"
  40 #include "cfgcleanup.h"
  41 #include "predict.h"
  42 #include "basic-block.h"
  43 #include "df.h"
  44 #include "hard-reg-set.h"
  45 #include "output.h"
  46 #include "expr.h"
  47 #include "reload.h"
  48 #include "toplev.h"
  49 #include "target.h"
  50 #include "target-def.h"
  51 #include "targhooks.h"
  52 #include "ggc.h"
  53 #include "hashtab.h"
  54 #include "hash-set.h"
  55 #include "vec.h"
  56 #include "machmode.h"
  57 #include "input.h"
  58 #include "function.h"
  59 #include "tm_p.h"
  60 #include "recog.h"
  61 #include "langhooks.h"
  62 #include "diagnostic-core.h"
  63 #include "hash-table.h"
  64 #include "tree-ssa-alias.h"
  65 #include "internal-fn.h"
  66 #include "gimple-fold.h"
  67 #include "tree-eh.h"
  68 #include "gimple-expr.h"
  69 #include "is-a.h"
  70 #include "gimple.h"
  71 #include "gimplify.h"
  72 #include "optabs.h"
  73 #include "dwarf2.h"
  74 #include "cfgloop.h"
  75 #include "tree-vectorizer.h"
  76 #include "aarch64-cost-tables.h"
  77 #include "dumpfile.h"
  78 #include "builtins.h"
  79 #include "rtl-iter.h"
  80
  81 /* Defined for convenience.  */
  82 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  83
  84 /* Classifies an address.
  85
  86    ADDRESS_REG_IMM
  87        A simple base register plus immediate offset.
  88
  89    ADDRESS_REG_WB
  90        A base register indexed by immediate offset with writeback.
  91
  92    ADDRESS_REG_REG
  93        A base register indexed by (optionally scaled) register.
  94
  95    ADDRESS_REG_UXTW
  96        A base register indexed by (optionally scaled) zero-extended register.
  97
  98    ADDRESS_REG_SXTW
  99        A base register indexed by (optionally scaled) sign-extended register.
 100
 101    ADDRESS_LO_SUM
 102        A LO_SUM rtx with a base register and "LO12" symbol relocation.
 103
 104    ADDRESS_SYMBOLIC:
 105        A constant symbolic address, in pc-relative literal pool.  */
 106
 107 enum aarch64_address_type {
 108   ADDRESS_REG_IMM,
 109   ADDRESS_REG_WB,
 110   ADDRESS_REG_REG,
 111   ADDRESS_REG_UXTW,
 112   ADDRESS_REG_SXTW,
 113   ADDRESS_LO_SUM,
 114   ADDRESS_SYMBOLIC
 115 };
 116
 117 struct aarch64_address_info {
 118   enum aarch64_address_type type;
 119   rtx base;
 120   rtx offset;
 121   int shift;
 122   enum aarch64_symbol_type symbol_type;
 123 };
 124
 125 struct simd_immediate_info
 126 {
 127   rtx value;
 128   int shift;
 129   int element_width;
 130   bool mvn;
 131   bool msl;
 132 };
 133
 134 /* The current code model.  */
 135 enum aarch64_code_model aarch64_cmodel;
 136
 137 #ifdef HAVE_AS_TLS
 138 #undef TARGET_HAVE_TLS
 139 #define TARGET_HAVE_TLS 1
 140 #endif
 141
 142 static bool aarch64_lra_p (void);
 143 static bool aarch64_composite_type_p (const_tree, machine_mode);
 144 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
 145                                                      const_tree,
 146                                                      machine_mode *, int *,
 147                                                      bool *);
 148 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 149 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 150 static void aarch64_override_options_after_change (void);
 151 static bool aarch64_vector_mode_supported_p (machine_mode);
 152 static unsigned bit_count (unsigned HOST_WIDE_INT);
 153 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 154                                                  const unsigned char *sel);
 155 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
 156
 157 /* The processor for which instructions should be scheduled.  */
 158 enum aarch64_processor aarch64_tune = cortexa53;
 159
 160 /* The current tuning set.  */
 161 const struct tune_params *aarch64_tune_params;
 162
 163 /* Mask to specify which instructions we are allowed to generate.  */
 164 unsigned long aarch64_isa_flags = 0;
 165
 166 /* Mask to specify which instruction scheduling options should be used.  */
 167 unsigned long aarch64_tune_flags = 0;
 168
 169 /* Tuning parameters.  */
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS
 172 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 173 #else
 174 #define NAMED_PARAM(NAME, VAL) (VAL)
 175 #endif
 176
 177 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 178 __extension__
 179 #endif
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct cpu_addrcost_table generic_addrcost_table =
 185 {
 186 #if HAVE_DESIGNATED_INITIALIZERS
 187   .addr_scale_costs =
 188 #endif
 189     {
 190       NAMED_PARAM (hi, 0),
 191       NAMED_PARAM (si, 0),
 192       NAMED_PARAM (di, 0),
 193       NAMED_PARAM (ti, 0),
 194     },
 195   NAMED_PARAM (pre_modify, 0),
 196   NAMED_PARAM (post_modify, 0),
 197   NAMED_PARAM (register_offset, 0),
 198   NAMED_PARAM (register_extend, 0),
 199   NAMED_PARAM (imm_offset, 0)
 200 };
 201
 202 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 203 __extension__
 204 #endif
 205 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 206 {
 207 #if HAVE_DESIGNATED_INITIALIZERS
 208   .addr_scale_costs =
 209 #endif
 210     {
 211       NAMED_PARAM (hi, 1),
 212       NAMED_PARAM (si, 0),
 213       NAMED_PARAM (di, 0),
 214       NAMED_PARAM (ti, 1),
 215     },
 216   NAMED_PARAM (pre_modify, 0),
 217   NAMED_PARAM (post_modify, 0),
 218   NAMED_PARAM (register_offset, 0),
 219   NAMED_PARAM (register_extend, 0),
 220   NAMED_PARAM (imm_offset, 0),
 221 };
 222
 223 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 224 __extension__
 225 #endif
 226 static const struct cpu_regmove_cost generic_regmove_cost =
 227 {
 228   NAMED_PARAM (GP2GP, 1),
 229   NAMED_PARAM (GP2FP, 2),
 230   NAMED_PARAM (FP2GP, 2),
 231   NAMED_PARAM (FP2FP, 2)
 232 };
 233
 234 static const struct cpu_regmove_cost cortexa57_regmove_cost =
 235 {
 236   NAMED_PARAM (GP2GP, 1),
 237   /* Avoid the use of slow int<->fp moves for spilling by setting
 238      their cost higher than memmov_cost.  */
 239   NAMED_PARAM (GP2FP, 5),
 240   NAMED_PARAM (FP2GP, 5),
 241   NAMED_PARAM (FP2FP, 2)
 242 };
 243
 244 static const struct cpu_regmove_cost cortexa53_regmove_cost =
 245 {
 246   NAMED_PARAM (GP2GP, 1),
 247   /* Avoid the use of slow int<->fp moves for spilling by setting
 248      their cost higher than memmov_cost.  */
 249   NAMED_PARAM (GP2FP, 5),
 250   NAMED_PARAM (FP2GP, 5),
 251   NAMED_PARAM (FP2FP, 2)
 252 };
 253
 254 static const struct cpu_regmove_cost thunderx_regmove_cost =
 255 {
 256   NAMED_PARAM (GP2GP, 2),
 257   NAMED_PARAM (GP2FP, 2),
 258   NAMED_PARAM (FP2GP, 6),
 259   NAMED_PARAM (FP2FP, 4)
 260 };
 261
 262 /* Generic costs for vector insn classes.  */
 263 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 264 __extension__
 265 #endif
 266 static const struct cpu_vector_cost generic_vector_cost =
 267 {
 268   NAMED_PARAM (scalar_stmt_cost, 1),
 269   NAMED_PARAM (scalar_load_cost, 1),
 270   NAMED_PARAM (scalar_store_cost, 1),
 271   NAMED_PARAM (vec_stmt_cost, 1),
 272   NAMED_PARAM (vec_to_scalar_cost, 1),
 273   NAMED_PARAM (scalar_to_vec_cost, 1),
 274   NAMED_PARAM (vec_align_load_cost, 1),
 275   NAMED_PARAM (vec_unalign_load_cost, 1),
 276   NAMED_PARAM (vec_unalign_store_cost, 1),
 277   NAMED_PARAM (vec_store_cost, 1),
 278   NAMED_PARAM (cond_taken_branch_cost, 3),
 279   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 280 };
 281
 282 /* Generic costs for vector insn classes.  */
 283 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 284 __extension__
 285 #endif
 286 static const struct cpu_vector_cost cortexa57_vector_cost =
 287 {
 288   NAMED_PARAM (scalar_stmt_cost, 1),
 289   NAMED_PARAM (scalar_load_cost, 4),
 290   NAMED_PARAM (scalar_store_cost, 1),
 291   NAMED_PARAM (vec_stmt_cost, 3),
 292   NAMED_PARAM (vec_to_scalar_cost, 8),
 293   NAMED_PARAM (scalar_to_vec_cost, 8),
 294   NAMED_PARAM (vec_align_load_cost, 5),
 295   NAMED_PARAM (vec_unalign_load_cost, 5),
 296   NAMED_PARAM (vec_unalign_store_cost, 1),
 297   NAMED_PARAM (vec_store_cost, 1),
 298   NAMED_PARAM (cond_taken_branch_cost, 1),
 299   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 300 };
 301
 302 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 303 __extension__
 304 #endif
 305 static const struct tune_params generic_tunings =
 306 {
 307   &cortexa57_extra_costs,
 308   &generic_addrcost_table,
 309   &generic_regmove_cost,
 310   &generic_vector_cost,
 311   NAMED_PARAM (memmov_cost, 4),
 312   NAMED_PARAM (issue_rate, 2)
 313 };
 314
 315 static const struct tune_params cortexa53_tunings =
 316 {
 317   &cortexa53_extra_costs,
 318   &generic_addrcost_table,
 319   &cortexa53_regmove_cost,
 320   &generic_vector_cost,
 321   NAMED_PARAM (memmov_cost, 4),
 322   NAMED_PARAM (issue_rate, 2)
 323 };
 324
 325 static const struct tune_params cortexa57_tunings =
 326 {
 327   &cortexa57_extra_costs,
 328   &cortexa57_addrcost_table,
 329   &cortexa57_regmove_cost,
 330   &cortexa57_vector_cost,
 331   NAMED_PARAM (memmov_cost, 4),
 332   NAMED_PARAM (issue_rate, 3)
 333 };
 334
 335 static const struct tune_params thunderx_tunings =
 336 {
 337   &thunderx_extra_costs,
 338   &generic_addrcost_table,
 339   &thunderx_regmove_cost,
 340   &generic_vector_cost,
 341   NAMED_PARAM (memmov_cost, 6),
 342   NAMED_PARAM (issue_rate, 2)
 343 };
 344
 345 /* A processor implementing AArch64.  */
 346 struct processor
 347 {
 348   const char *const name;
 349   enum aarch64_processor core;
 350   const char *arch;
 351   const unsigned long flags;
 352   const struct tune_params *const tune;
 353 };
 354
 355 /* Processor cores implementing AArch64.  */
 356 static const struct processor all_cores[] =
 357 {
 358 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 359   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 360 #include "aarch64-cores.def"
 361 #undef AARCH64_CORE
 362   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 363   {NULL, aarch64_none, NULL, 0, NULL}
 364 };
 365
 366 /* Architectures implementing AArch64.  */
 367 static const struct processor all_architectures[] =
 368 {
 369 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 370   {NAME, CORE, #ARCH, FLAGS, NULL},
 371 #include "aarch64-arches.def"
 372 #undef AARCH64_ARCH
 373   {NULL, aarch64_none, NULL, 0, NULL}
 374 };
 375
 376 /* Target specification.  These are populated as commandline arguments
 377    are processed, or NULL if not specified.  */
 378 static const struct processor *selected_arch;
 379 static const struct processor *selected_cpu;
 380 static const struct processor *selected_tune;
 381
 382 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 383
 384 /* An ISA extension in the co-processor and main instruction set space.  */
 385 struct aarch64_option_extension
 386 {
 387   const char *const name;
 388   const unsigned long flags_on;
 389   const unsigned long flags_off;
 390 };
 391
 392 /* ISA extensions in AArch64.  */
 393 static const struct aarch64_option_extension all_extensions[] =
 394 {
 395 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 396   {NAME, FLAGS_ON, FLAGS_OFF},
 397 #include "aarch64-option-extensions.def"
 398 #undef AARCH64_OPT_EXTENSION
 399   {NULL, 0, 0}
 400 };
 401
 402 /* Used to track the size of an address when generating a pre/post
 403    increment address.  */
 404 static machine_mode aarch64_memory_reference_mode;
 405
 406 /* Used to force GTY into this file.  */
 407 static GTY(()) int gty_dummy;
 408
 409 /* A table of valid AArch64 "bitmask immediate" values for
 410    logical instructions.  */
 411
 412 #define AARCH64_NUM_BITMASKS  5334
 413 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 414
 415 typedef enum aarch64_cond_code
 416 {
 417   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 418   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 419   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 420 }
 421 aarch64_cc;
 422
 423 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 424
 425 /* The condition codes of the processor, and the inverse function.  */
 426 static const char * const aarch64_condition_codes[] =
 427 {
 428   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 429   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 430 };
 431
 432 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 433 unsigned
 434 aarch64_dbx_register_number (unsigned regno)
 435 {
 436    if (GP_REGNUM_P (regno))
 437      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 438    else if (regno == SP_REGNUM)
 439      return AARCH64_DWARF_SP;
 440    else if (FP_REGNUM_P (regno))
 441      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 442
 443    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 444       equivalent DWARF register.  */
 445    return DWARF_FRAME_REGISTERS;
 446 }
 447
 448 /* Return TRUE if MODE is any of the large INT modes.  */
 449 static bool
 450 aarch64_vect_struct_mode_p (machine_mode mode)
 451 {
 452   return mode == OImode || mode == CImode || mode == XImode;
 453 }
 454
 455 /* Return TRUE if MODE is any of the vector modes.  */
 456 static bool
 457 aarch64_vector_mode_p (machine_mode mode)
 458 {
 459   return aarch64_vector_mode_supported_p (mode)
 460          || aarch64_vect_struct_mode_p (mode);
 461 }
 462
 463 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 464 static bool
 465 aarch64_array_mode_supported_p (machine_mode mode,
 466                                 unsigned HOST_WIDE_INT nelems)
 467 {
 468   if (TARGET_SIMD
 469       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 470       && (nelems >= 2 && nelems <= 4))
 471     return true;
 472
 473   return false;
 474 }
 475
 476 /* Implement HARD_REGNO_NREGS.  */
 477
 478 int
 479 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
 480 {
 481   switch (aarch64_regno_regclass (regno))
 482     {
 483     case FP_REGS:
 484     case FP_LO_REGS:
 485       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 486     default:
 487       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 488     }
 489   gcc_unreachable ();
 490 }
 491
 492 /* Implement HARD_REGNO_MODE_OK.  */
 493
 494 int
 495 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
 496 {
 497   if (GET_MODE_CLASS (mode) == MODE_CC)
 498     return regno == CC_REGNUM;
 499
 500   if (regno == SP_REGNUM)
 501     /* The purpose of comparing with ptr_mode is to support the
 502        global register variable associated with the stack pointer
 503        register via the syntax of asm ("wsp") in ILP32.  */
 504     return mode == Pmode || mode == ptr_mode;
 505
 506   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 507     return mode == Pmode;
 508
 509   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 510     return 1;
 511
 512   if (FP_REGNUM_P (regno))
 513     {
 514       if (aarch64_vect_struct_mode_p (mode))
 515         return
 516           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 517       else
 518         return 1;
 519     }
 520
 521   return 0;
 522 }
 523
 524 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 525 machine_mode
 526 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 527                                      machine_mode mode)
 528 {
 529   /* Handle modes that fit within single registers.  */
 530   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 531     {
 532       if (GET_MODE_SIZE (mode) >= 4)
 533         return mode;
 534       else
 535         return SImode;
 536     }
 537   /* Fall back to generic for multi-reg and very large modes.  */
 538   else
 539     return choose_hard_reg_mode (regno, nregs, false);
 540 }
 541
 542 /* Return true if calls to DECL should be treated as
 543    long-calls (ie called via a register).  */
 544 static bool
 545 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 546 {
 547   return false;
 548 }
 549
 550 /* Return true if calls to symbol-ref SYM should be treated as
 551    long-calls (ie called via a register).  */
 552 bool
 553 aarch64_is_long_call_p (rtx sym)
 554 {
 555   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 556 }
 557
 558 /* Return true if the offsets to a zero/sign-extract operation
 559    represent an expression that matches an extend operation.  The
 560    operands represent the paramters from
 561
 562    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 563 bool
 564 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
 565                                 rtx extract_imm)
 566 {
 567   HOST_WIDE_INT mult_val, extract_val;
 568
 569   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 570     return false;
 571
 572   mult_val = INTVAL (mult_imm);
 573   extract_val = INTVAL (extract_imm);
 574
 575   if (extract_val > 8
 576       && extract_val < GET_MODE_BITSIZE (mode)
 577       && exact_log2 (extract_val & ~7) > 0
 578       && (extract_val & 7) <= 4
 579       && mult_val == (1 << (extract_val & 7)))
 580     return true;
 581
 582   return false;
 583 }
 584
 585 /* Emit an insn that's a simple single-set.  Both the operands must be
 586    known to be valid.  */
 587 inline static rtx
 588 emit_set_insn (rtx x, rtx y)
 589 {
 590   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 591 }
 592
 593 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 594    return the rtx for register 0 in the proper mode.  */
 595 rtx
 596 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 597 {
 598   machine_mode mode = SELECT_CC_MODE (code, x, y);
 599   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 600
 601   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 602   return cc_reg;
 603 }
 604
 605 /* Build the SYMBOL_REF for __tls_get_addr.  */
 606
 607 static GTY(()) rtx tls_get_addr_libfunc;
 608
 609 rtx
 610 aarch64_tls_get_addr (void)
 611 {
 612   if (!tls_get_addr_libfunc)
 613     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 614   return tls_get_addr_libfunc;
 615 }
 616
 617 /* Return the TLS model to use for ADDR.  */
 618
 619 static enum tls_model
 620 tls_symbolic_operand_type (rtx addr)
 621 {
 622   enum tls_model tls_kind = TLS_MODEL_NONE;
 623   rtx sym, addend;
 624
 625   if (GET_CODE (addr) == CONST)
 626     {
 627       split_const (addr, &sym, &addend);
 628       if (GET_CODE (sym) == SYMBOL_REF)
 629         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 630     }
 631   else if (GET_CODE (addr) == SYMBOL_REF)
 632     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 633
 634   return tls_kind;
 635 }
 636
 637 /* We'll allow lo_sum's in addresses in our legitimate addresses
 638    so that combine would take care of combining addresses where
 639    necessary, but for generation purposes, we'll generate the address
 640    as :
 641    RTL                               Absolute
 642    tmp = hi (symbol_ref);            adrp  x1, foo
 643    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 644                                      nop
 645
 646    PIC                               TLS
 647    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 648    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 649                                      bl   __tls_get_addr
 650                                      nop
 651
 652    Load TLS symbol, depending on TLS mechanism and TLS access model.
 653
 654    Global Dynamic - Traditional TLS:
 655    adrp tmp, :tlsgd:imm
 656    add  dest, tmp, #:tlsgd_lo12:imm
 657    bl   __tls_get_addr
 658
 659    Global Dynamic - TLS Descriptors:
 660    adrp dest, :tlsdesc:imm
 661    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 662    add  dest, dest, #:tlsdesc_lo12:imm
 663    blr  tmp
 664    mrs  tp, tpidr_el0
 665    add  dest, dest, tp
 666
 667    Initial Exec:
 668    mrs  tp, tpidr_el0
 669    adrp tmp, :gottprel:imm
 670    ldr  dest, [tmp, #:gottprel_lo12:imm]
 671    add  dest, dest, tp
 672
 673    Local Exec:
 674    mrs  tp, tpidr_el0
 675    add  t0, tp, #:tprel_hi12:imm
 676    add  t0, #:tprel_lo12_nc:imm
 677 */
 678
 679 static void
 680 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 681                                    enum aarch64_symbol_type type)
 682 {
 683   switch (type)
 684     {
 685     case SYMBOL_SMALL_ABSOLUTE:
 686       {
 687         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 688         rtx tmp_reg = dest;
 689         machine_mode mode = GET_MODE (dest);
 690
 691         gcc_assert (mode == Pmode || mode == ptr_mode);
 692
 693         if (can_create_pseudo_p ())
 694           tmp_reg = gen_reg_rtx (mode);
 695
 696         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 697         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 698         return;
 699       }
 700
 701     case SYMBOL_TINY_ABSOLUTE:
 702       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 703       return;
 704
 705     case SYMBOL_SMALL_GOT:
 706       {
 707         /* In ILP32, the mode of dest can be either SImode or DImode,
 708            while the got entry is always of SImode size.  The mode of
 709            dest depends on how dest is used: if dest is assigned to a
 710            pointer (e.g. in the memory), it has SImode; it may have
 711            DImode if dest is dereferenced to access the memeory.
 712            This is why we have to handle three different ldr_got_small
 713            patterns here (two patterns for ILP32).  */
 714         rtx tmp_reg = dest;
 715         machine_mode mode = GET_MODE (dest);
 716
 717         if (can_create_pseudo_p ())
 718           tmp_reg = gen_reg_rtx (mode);
 719
 720         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 721         if (mode == ptr_mode)
 722           {
 723             if (mode == DImode)
 724               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 725             else
 726               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 727           }
 728         else
 729           {
 730             gcc_assert (mode == Pmode);
 731             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 732           }
 733
 734         return;
 735       }
 736
 737     case SYMBOL_SMALL_TLSGD:
 738       {
 739         rtx_insn *insns;
 740         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 741
 742         start_sequence ();
 743         aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
 744         insns = get_insns ();
 745         end_sequence ();
 746
 747         RTL_CONST_CALL_P (insns) = 1;
 748         emit_libcall_block (insns, dest, result, imm);
 749         return;
 750       }
 751
 752     case SYMBOL_SMALL_TLSDESC:
 753       {
 754         machine_mode mode = GET_MODE (dest);
 755         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 756         rtx tp;
 757
 758         gcc_assert (mode == Pmode || mode == ptr_mode);
 759
 760         /* In ILP32, the got entry is always of SImode size.  Unlike
 761            small GOT, the dest is fixed at reg 0.  */
 762         if (TARGET_ILP32)
 763           emit_insn (gen_tlsdesc_small_si (imm));
 764         else
 765           emit_insn (gen_tlsdesc_small_di (imm));
 766         tp = aarch64_load_tp (NULL);
 767
 768         if (mode != Pmode)
 769           tp = gen_lowpart (mode, tp);
 770
 771         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 772         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 773         return;
 774       }
 775
 776     case SYMBOL_SMALL_GOTTPREL:
 777       {
 778         /* In ILP32, the mode of dest can be either SImode or DImode,
 779            while the got entry is always of SImode size.  The mode of
 780            dest depends on how dest is used: if dest is assigned to a
 781            pointer (e.g. in the memory), it has SImode; it may have
 782            DImode if dest is dereferenced to access the memeory.
 783            This is why we have to handle three different tlsie_small
 784            patterns here (two patterns for ILP32).  */
 785         machine_mode mode = GET_MODE (dest);
 786         rtx tmp_reg = gen_reg_rtx (mode);
 787         rtx tp = aarch64_load_tp (NULL);
 788
 789         if (mode == ptr_mode)
 790           {
 791             if (mode == DImode)
 792               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 793             else
 794               {
 795                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 796                 tp = gen_lowpart (mode, tp);
 797               }
 798           }
 799         else
 800           {
 801             gcc_assert (mode == Pmode);
 802             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 803           }
 804
 805         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 806         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 807         return;
 808       }
 809
 810     case SYMBOL_SMALL_TPREL:
 811       {
 812         rtx tp = aarch64_load_tp (NULL);
 813         emit_insn (gen_tlsle_small (dest, tp, imm));
 814         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 815         return;
 816       }
 817
 818     case SYMBOL_TINY_GOT:
 819       emit_insn (gen_ldr_got_tiny (dest, imm));
 820       return;
 821
 822     default:
 823       gcc_unreachable ();
 824     }
 825 }
 826
 827 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 828    handle all moves if !can_create_pseudo_p ().  The distinction is
 829    important because, unlike emit_move_insn, the move expanders know
 830    how to force Pmode objects into the constant pool even when the
 831    constant pool address is not itself legitimate.  */
 832 static rtx
 833 aarch64_emit_move (rtx dest, rtx src)
 834 {
 835   return (can_create_pseudo_p ()
 836           ? emit_move_insn (dest, src)
 837           : emit_move_insn_1 (dest, src));
 838 }
 839
 840 /* Split a 128-bit move operation into two 64-bit move operations,
 841    taking care to handle partial overlap of register to register
 842    copies.  Special cases are needed when moving between GP regs and
 843    FP regs.  SRC can be a register, constant or memory; DST a register
 844    or memory.  If either operand is memory it must not have any side
 845    effects.  */
 846 void
 847 aarch64_split_128bit_move (rtx dst, rtx src)
 848 {
 849   rtx dst_lo, dst_hi;
 850   rtx src_lo, src_hi;
 851
 852   machine_mode mode = GET_MODE (dst);
 853
 854   gcc_assert (mode == TImode || mode == TFmode);
 855   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 856   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 857
 858   if (REG_P (dst) && REG_P (src))
 859     {
 860       int src_regno = REGNO (src);
 861       int dst_regno = REGNO (dst);
 862
 863       /* Handle FP <-> GP regs.  */
 864       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 865         {
 866           src_lo = gen_lowpart (word_mode, src);
 867           src_hi = gen_highpart (word_mode, src);
 868
 869           if (mode == TImode)
 870             {
 871               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 872               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 873             }
 874           else
 875             {
 876               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 877               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 878             }
 879           return;
 880         }
 881       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 882         {
 883           dst_lo = gen_lowpart (word_mode, dst);
 884           dst_hi = gen_highpart (word_mode, dst);
 885
 886           if (mode == TImode)
 887             {
 888               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 889               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 890             }
 891           else
 892             {
 893               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 894               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 895             }
 896           return;
 897         }
 898     }
 899
 900   dst_lo = gen_lowpart (word_mode, dst);
 901   dst_hi = gen_highpart (word_mode, dst);
 902   src_lo = gen_lowpart (word_mode, src);
 903   src_hi = gen_highpart_mode (word_mode, mode, src);
 904
 905   /* At most one pairing may overlap.  */
 906   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 907     {
 908       aarch64_emit_move (dst_hi, src_hi);
 909       aarch64_emit_move (dst_lo, src_lo);
 910     }
 911   else
 912     {
 913       aarch64_emit_move (dst_lo, src_lo);
 914       aarch64_emit_move (dst_hi, src_hi);
 915     }
 916 }
 917
 918 bool
 919 aarch64_split_128bit_move_p (rtx dst, rtx src)
 920 {
 921   return (! REG_P (src)
 922           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 923 }
 924
 925 /* Split a complex SIMD combine.  */
 926
 927 void
 928 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 929 {
 930   machine_mode src_mode = GET_MODE (src1);
 931   machine_mode dst_mode = GET_MODE (dst);
 932
 933   gcc_assert (VECTOR_MODE_P (dst_mode));
 934
 935   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 936     {
 937       rtx (*gen) (rtx, rtx, rtx);
 938
 939       switch (src_mode)
 940         {
 941         case V8QImode:
 942           gen = gen_aarch64_simd_combinev8qi;
 943           break;
 944         case V4HImode:
 945           gen = gen_aarch64_simd_combinev4hi;
 946           break;
 947         case V2SImode:
 948           gen = gen_aarch64_simd_combinev2si;
 949           break;
 950         case V2SFmode:
 951           gen = gen_aarch64_simd_combinev2sf;
 952           break;
 953         case DImode:
 954           gen = gen_aarch64_simd_combinedi;
 955           break;
 956         case DFmode:
 957           gen = gen_aarch64_simd_combinedf;
 958           break;
 959         default:
 960           gcc_unreachable ();
 961         }
 962
 963       emit_insn (gen (dst, src1, src2));
 964       return;
 965     }
 966 }
 967
 968 /* Split a complex SIMD move.  */
 969
 970 void
 971 aarch64_split_simd_move (rtx dst, rtx src)
 972 {
 973   machine_mode src_mode = GET_MODE (src);
 974   machine_mode dst_mode = GET_MODE (dst);
 975
 976   gcc_assert (VECTOR_MODE_P (dst_mode));
 977
 978   if (REG_P (dst) && REG_P (src))
 979     {
 980       rtx (*gen) (rtx, rtx);
 981
 982       gcc_assert (VECTOR_MODE_P (src_mode));
 983
 984       switch (src_mode)
 985         {
 986         case V16QImode:
 987           gen = gen_aarch64_split_simd_movv16qi;
 988           break;
 989         case V8HImode:
 990           gen = gen_aarch64_split_simd_movv8hi;
 991           break;
 992         case V4SImode:
 993           gen = gen_aarch64_split_simd_movv4si;
 994           break;
 995         case V2DImode:
 996           gen = gen_aarch64_split_simd_movv2di;
 997           break;
 998         case V4SFmode:
 999           gen = gen_aarch64_split_simd_movv4sf;
1000           break;
1001         case V2DFmode:
1002           gen = gen_aarch64_split_simd_movv2df;
1003           break;
1004         default:
1005           gcc_unreachable ();
1006         }
1007
1008       emit_insn (gen (dst, src));
1009       return;
1010     }
1011 }
1012
1013 static rtx
1014 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1015 {
1016   if (can_create_pseudo_p ())
1017     return force_reg (mode, value);
1018   else
1019     {
1020       x = aarch64_emit_move (x, value);
1021       return x;
1022     }
1023 }
1024
1025
1026 static rtx
1027 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1028 {
1029   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1030     {
1031       rtx high;
1032       /* Load the full offset into a register.  This
1033          might be improvable in the future.  */
1034       high = GEN_INT (offset);
1035       offset = 0;
1036       high = aarch64_force_temporary (mode, temp, high);
1037       reg = aarch64_force_temporary (mode, temp,
1038                                      gen_rtx_PLUS (mode, high, reg));
1039     }
1040   return plus_constant (mode, reg, offset);
1041 }
1042
1043 void
1044 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1045 {
1046   machine_mode mode = GET_MODE (dest);
1047   unsigned HOST_WIDE_INT mask;
1048   int i;
1049   bool first;
1050   unsigned HOST_WIDE_INT val;
1051   bool subtargets;
1052   rtx subtarget;
1053   int one_match, zero_match, first_not_ffff_match;
1054
1055   gcc_assert (mode == SImode || mode == DImode);
1056
1057   /* Check on what type of symbol it is.  */
1058   if (GET_CODE (imm) == SYMBOL_REF
1059       || GET_CODE (imm) == LABEL_REF
1060       || GET_CODE (imm) == CONST)
1061     {
1062       rtx mem, base, offset;
1063       enum aarch64_symbol_type sty;
1064
1065       /* If we have (const (plus symbol offset)), separate out the offset
1066          before we start classifying the symbol.  */
1067       split_const (imm, &base, &offset);
1068
1069       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1070       switch (sty)
1071         {
1072         case SYMBOL_FORCE_TO_MEM:
1073           if (offset != const0_rtx
1074               && targetm.cannot_force_const_mem (mode, imm))
1075             {
1076               gcc_assert (can_create_pseudo_p ());
1077               base = aarch64_force_temporary (mode, dest, base);
1078               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1079               aarch64_emit_move (dest, base);
1080               return;
1081             }
1082           mem = force_const_mem (ptr_mode, imm);
1083           gcc_assert (mem);
1084           if (mode != ptr_mode)
1085             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1086           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1087           return;
1088
1089         case SYMBOL_SMALL_TLSGD:
1090         case SYMBOL_SMALL_TLSDESC:
1091         case SYMBOL_SMALL_GOTTPREL:
1092         case SYMBOL_SMALL_GOT:
1093         case SYMBOL_TINY_GOT:
1094           if (offset != const0_rtx)
1095             {
1096               gcc_assert(can_create_pseudo_p ());
1097               base = aarch64_force_temporary (mode, dest, base);
1098               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1099               aarch64_emit_move (dest, base);
1100               return;
1101             }
1102           /* FALLTHRU */
1103
1104         case SYMBOL_SMALL_TPREL:
1105         case SYMBOL_SMALL_ABSOLUTE:
1106         case SYMBOL_TINY_ABSOLUTE:
1107           aarch64_load_symref_appropriately (dest, imm, sty);
1108           return;
1109
1110         default:
1111           gcc_unreachable ();
1112         }
1113     }
1114
1115   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1116     {
1117       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1118       return;
1119     }
1120
1121   if (!CONST_INT_P (imm))
1122     {
1123       if (GET_CODE (imm) == HIGH)
1124         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1125       else
1126         {
1127           rtx mem = force_const_mem (mode, imm);
1128           gcc_assert (mem);
1129           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1130         }
1131
1132       return;
1133     }
1134
1135   if (mode == SImode)
1136     {
1137       /* We know we can't do this in 1 insn, and we must be able to do it
1138          in two; so don't mess around looking for sequences that don't buy
1139          us anything.  */
1140       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1141       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1142                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1143       return;
1144     }
1145
1146   /* Remaining cases are all for DImode.  */
1147
1148   val = INTVAL (imm);
1149   subtargets = optimize && can_create_pseudo_p ();
1150
1151   one_match = 0;
1152   zero_match = 0;
1153   mask = 0xffff;
1154   first_not_ffff_match = -1;
1155
1156   for (i = 0; i < 64; i += 16, mask <<= 16)
1157     {
1158       if ((val & mask) == mask)
1159         one_match++;
1160       else
1161         {
1162           if (first_not_ffff_match < 0)
1163             first_not_ffff_match = i;
1164           if ((val & mask) == 0)
1165             zero_match++;
1166         }
1167     }
1168
1169   if (one_match == 2)
1170     {
1171       /* Set one of the quarters and then insert back into result.  */
1172       mask = 0xffffll << first_not_ffff_match;
1173       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1174       emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1175                                  GEN_INT ((val >> first_not_ffff_match)
1176                                           & 0xffff)));
1177       return;
1178     }
1179
1180   if (zero_match == 2)
1181     goto simple_sequence;
1182
1183   mask = 0x0ffff0000UL;
1184   for (i = 16; i < 64; i += 16, mask <<= 16)
1185     {
1186       HOST_WIDE_INT comp = mask & ~(mask - 1);
1187
1188       if (aarch64_uimm12_shift (val - (val & mask)))
1189         {
1190           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1191
1192           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1193           emit_insn (gen_adddi3 (dest, subtarget,
1194                                  GEN_INT (val - (val & mask))));
1195           return;
1196         }
1197       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1198         {
1199           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1200
1201           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1202                                   GEN_INT ((val + comp) & mask)));
1203           emit_insn (gen_adddi3 (dest, subtarget,
1204                                  GEN_INT (val - ((val + comp) & mask))));
1205           return;
1206         }
1207       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1208         {
1209           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1210
1211           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1212                                   GEN_INT ((val - comp) | ~mask)));
1213           emit_insn (gen_adddi3 (dest, subtarget,
1214                                  GEN_INT (val - ((val - comp) | ~mask))));
1215           return;
1216         }
1217       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1218         {
1219           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1220
1221           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1222                                   GEN_INT (val | ~mask)));
1223           emit_insn (gen_adddi3 (dest, subtarget,
1224                                  GEN_INT (val - (val | ~mask))));
1225           return;
1226         }
1227     }
1228
1229   /* See if we can do it by arithmetically combining two
1230      immediates.  */
1231   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1232     {
1233       int j;
1234       mask = 0xffff;
1235
1236       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1237           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1238         {
1239           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1240           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241                                   GEN_INT (aarch64_bitmasks[i])));
1242           emit_insn (gen_adddi3 (dest, subtarget,
1243                                  GEN_INT (val - aarch64_bitmasks[i])));
1244           return;
1245         }
1246
1247       for (j = 0; j < 64; j += 16, mask <<= 16)
1248         {
1249           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1250             {
1251               emit_insn (gen_rtx_SET (VOIDmode, dest,
1252                                       GEN_INT (aarch64_bitmasks[i])));
1253               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1254                                          GEN_INT ((val >> j) & 0xffff)));
1255               return;
1256             }
1257         }
1258     }
1259
1260   /* See if we can do it by logically combining two immediates.  */
1261   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1262     {
1263       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1264         {
1265           int j;
1266
1267           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1268             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1269               {
1270                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1271                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1272                                         GEN_INT (aarch64_bitmasks[i])));
1273                 emit_insn (gen_iordi3 (dest, subtarget,
1274                                        GEN_INT (aarch64_bitmasks[j])));
1275                 return;
1276               }
1277         }
1278       else if ((val & aarch64_bitmasks[i]) == val)
1279         {
1280           int j;
1281
1282           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1283             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1284               {
1285
1286                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1287                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1288                                         GEN_INT (aarch64_bitmasks[j])));
1289                 emit_insn (gen_anddi3 (dest, subtarget,
1290                                        GEN_INT (aarch64_bitmasks[i])));
1291                 return;
1292               }
1293         }
1294     }
1295
1296   if (one_match > zero_match)
1297     {
1298       /* Set either first three quarters or all but the third.   */
1299       mask = 0xffffll << (16 - first_not_ffff_match);
1300       emit_insn (gen_rtx_SET (VOIDmode, dest,
1301                               GEN_INT (val | mask | 0xffffffff00000000ull)));
1302
1303       /* Now insert other two quarters.  */
1304       for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1305            i < 64; i += 16, mask <<= 16)
1306         {
1307           if ((val & mask) != mask)
1308             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1309                                        GEN_INT ((val >> i) & 0xffff)));
1310         }
1311       return;
1312     }
1313
1314  simple_sequence:
1315   first = true;
1316   mask = 0xffff;
1317   for (i = 0; i < 64; i += 16, mask <<= 16)
1318     {
1319       if ((val & mask) != 0)
1320         {
1321           if (first)
1322             {
1323               emit_insn (gen_rtx_SET (VOIDmode, dest,
1324                                       GEN_INT (val & mask)));
1325               first = false;
1326             }
1327           else
1328             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1329                                        GEN_INT ((val >> i) & 0xffff)));
1330         }
1331     }
1332 }
1333
1334 static bool
1335 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1336                                  tree exp ATTRIBUTE_UNUSED)
1337 {
1338   /* Currently, always true.  */
1339   return true;
1340 }
1341
1342 /* Implement TARGET_PASS_BY_REFERENCE.  */
1343
1344 static bool
1345 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1346                            machine_mode mode,
1347                            const_tree type,
1348                            bool named ATTRIBUTE_UNUSED)
1349 {
1350   HOST_WIDE_INT size;
1351   machine_mode dummymode;
1352   int nregs;
1353
1354   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1355   size = (mode == BLKmode && type)
1356     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1357
1358   /* Aggregates are passed by reference based on their size.  */
1359   if (type && AGGREGATE_TYPE_P (type))
1360     {
1361       size = int_size_in_bytes (type);
1362     }
1363
1364   /* Variable sized arguments are always returned by reference.  */
1365   if (size < 0)
1366     return true;
1367
1368   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1369   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1370                                                &dummymode, &nregs,
1371                                                NULL))
1372     return false;
1373
1374   /* Arguments which are variable sized or larger than 2 registers are
1375      passed by reference unless they are a homogenous floating point
1376      aggregate.  */
1377   return size > 2 * UNITS_PER_WORD;
1378 }
1379
1380 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1381 static bool
1382 aarch64_return_in_msb (const_tree valtype)
1383 {
1384   machine_mode dummy_mode;
1385   int dummy_int;
1386
1387   /* Never happens in little-endian mode.  */
1388   if (!BYTES_BIG_ENDIAN)
1389     return false;
1390
1391   /* Only composite types smaller than or equal to 16 bytes can
1392      be potentially returned in registers.  */
1393   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1394       || int_size_in_bytes (valtype) <= 0
1395       || int_size_in_bytes (valtype) > 16)
1396     return false;
1397
1398   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1399      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1400      is always passed/returned in the least significant bits of fp/simd
1401      register(s).  */
1402   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1403                                                &dummy_mode, &dummy_int, NULL))
1404     return false;
1405
1406   return true;
1407 }
1408
1409 /* Implement TARGET_FUNCTION_VALUE.
1410    Define how to find the value returned by a function.  */
1411
1412 static rtx
1413 aarch64_function_value (const_tree type, const_tree func,
1414                         bool outgoing ATTRIBUTE_UNUSED)
1415 {
1416   machine_mode mode;
1417   int unsignedp;
1418   int count;
1419   machine_mode ag_mode;
1420
1421   mode = TYPE_MODE (type);
1422   if (INTEGRAL_TYPE_P (type))
1423     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1424
1425   if (aarch64_return_in_msb (type))
1426     {
1427       HOST_WIDE_INT size = int_size_in_bytes (type);
1428
1429       if (size % UNITS_PER_WORD != 0)
1430         {
1431           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1432           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1433         }
1434     }
1435
1436   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1437                                                &ag_mode, &count, NULL))
1438     {
1439       if (!aarch64_composite_type_p (type, mode))
1440         {
1441           gcc_assert (count == 1 && mode == ag_mode);
1442           return gen_rtx_REG (mode, V0_REGNUM);
1443         }
1444       else
1445         {
1446           int i;
1447           rtx par;
1448
1449           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1450           for (i = 0; i < count; i++)
1451             {
1452               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1453               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1454                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1455               XVECEXP (par, 0, i) = tmp;
1456             }
1457           return par;
1458         }
1459     }
1460   else
1461     return gen_rtx_REG (mode, R0_REGNUM);
1462 }
1463
1464 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1465    Return true if REGNO is the number of a hard register in which the values
1466    of called function may come back.  */
1467
1468 static bool
1469 aarch64_function_value_regno_p (const unsigned int regno)
1470 {
1471   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1472      of 16-byte return values are: 128-bit integers and 16-byte small
1473      structures (excluding homogeneous floating-point aggregates).  */
1474   if (regno == R0_REGNUM || regno == R1_REGNUM)
1475     return true;
1476
1477   /* Up to four fp/simd registers can return a function value, e.g. a
1478      homogeneous floating-point aggregate having four members.  */
1479   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1480     return !TARGET_GENERAL_REGS_ONLY;
1481
1482   return false;
1483 }
1484
1485 /* Implement TARGET_RETURN_IN_MEMORY.
1486
1487    If the type T of the result of a function is such that
1488      void func (T arg)
1489    would require that arg be passed as a value in a register (or set of
1490    registers) according to the parameter passing rules, then the result
1491    is returned in the same registers as would be used for such an
1492    argument.  */
1493
1494 static bool
1495 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1496 {
1497   HOST_WIDE_INT size;
1498   machine_mode ag_mode;
1499   int count;
1500
1501   if (!AGGREGATE_TYPE_P (type)
1502       && TREE_CODE (type) != COMPLEX_TYPE
1503       && TREE_CODE (type) != VECTOR_TYPE)
1504     /* Simple scalar types always returned in registers.  */
1505     return false;
1506
1507   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1508                                                type,
1509                                                &ag_mode,
1510                                                &count,
1511                                                NULL))
1512     return false;
1513
1514   /* Types larger than 2 registers returned in memory.  */
1515   size = int_size_in_bytes (type);
1516   return (size < 0 || size > 2 * UNITS_PER_WORD);
1517 }
1518
1519 static bool
1520 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1521                                const_tree type, int *nregs)
1522 {
1523   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1524   return aarch64_vfp_is_call_or_return_candidate (mode,
1525                                                   type,
1526                                                   &pcum->aapcs_vfp_rmode,
1527                                                   nregs,
1528                                                   NULL);
1529 }
1530
1531 /* Given MODE and TYPE of a function argument, return the alignment in
1532    bits.  The idea is to suppress any stronger alignment requested by
1533    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1534    This is a helper function for local use only.  */
1535
1536 static unsigned int
1537 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1538 {
1539   unsigned int alignment;
1540
1541   if (type)
1542     {
1543       if (!integer_zerop (TYPE_SIZE (type)))
1544         {
1545           if (TYPE_MODE (type) == mode)
1546             alignment = TYPE_ALIGN (type);
1547           else
1548             alignment = GET_MODE_ALIGNMENT (mode);
1549         }
1550       else
1551         alignment = 0;
1552     }
1553   else
1554     alignment = GET_MODE_ALIGNMENT (mode);
1555
1556   return alignment;
1557 }
1558
1559 /* Layout a function argument according to the AAPCS64 rules.  The rule
1560    numbers refer to the rule numbers in the AAPCS64.  */
1561
1562 static void
1563 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1564                     const_tree type,
1565                     bool named ATTRIBUTE_UNUSED)
1566 {
1567   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1568   int ncrn, nvrn, nregs;
1569   bool allocate_ncrn, allocate_nvrn;
1570   HOST_WIDE_INT size;
1571
1572   /* We need to do this once per argument.  */
1573   if (pcum->aapcs_arg_processed)
1574     return;
1575
1576   pcum->aapcs_arg_processed = true;
1577
1578   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1579   size
1580     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1581                         UNITS_PER_WORD);
1582
1583   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1584   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1585                                                  mode,
1586                                                  type,
1587                                                  &nregs);
1588
1589   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1590      The following code thus handles passing by SIMD/FP registers first.  */
1591
1592   nvrn = pcum->aapcs_nvrn;
1593
1594   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1595      and homogenous short-vector aggregates (HVA).  */
1596   if (allocate_nvrn)
1597     {
1598       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1599         {
1600           pcum->aapcs_nextnvrn = nvrn + nregs;
1601           if (!aarch64_composite_type_p (type, mode))
1602             {
1603               gcc_assert (nregs == 1);
1604               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1605             }
1606           else
1607             {
1608               rtx par;
1609               int i;
1610               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1611               for (i = 0; i < nregs; i++)
1612                 {
1613                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1614                                          V0_REGNUM + nvrn + i);
1615                   tmp = gen_rtx_EXPR_LIST
1616                     (VOIDmode, tmp,
1617                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1618                   XVECEXP (par, 0, i) = tmp;
1619                 }
1620               pcum->aapcs_reg = par;
1621             }
1622           return;
1623         }
1624       else
1625         {
1626           /* C.3 NSRN is set to 8.  */
1627           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1628           goto on_stack;
1629         }
1630     }
1631
1632   ncrn = pcum->aapcs_ncrn;
1633   nregs = size / UNITS_PER_WORD;
1634
1635   /* C6 - C9.  though the sign and zero extension semantics are
1636      handled elsewhere.  This is the case where the argument fits
1637      entirely general registers.  */
1638   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1639     {
1640       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1641
1642       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1643
1644       /* C.8 if the argument has an alignment of 16 then the NGRN is
1645          rounded up to the next even number.  */
1646       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1647         {
1648           ++ncrn;
1649           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1650         }
1651       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1652          A reg is still generated for it, but the caller should be smart
1653          enough not to use it.  */
1654       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1655         {
1656           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1657         }
1658       else
1659         {
1660           rtx par;
1661           int i;
1662
1663           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1664           for (i = 0; i < nregs; i++)
1665             {
1666               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1667               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1668                                        GEN_INT (i * UNITS_PER_WORD));
1669               XVECEXP (par, 0, i) = tmp;
1670             }
1671           pcum->aapcs_reg = par;
1672         }
1673
1674       pcum->aapcs_nextncrn = ncrn + nregs;
1675       return;
1676     }
1677
1678   /* C.11  */
1679   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1680
1681   /* The argument is passed on stack; record the needed number of words for
1682      this argument and align the total size if necessary.  */
1683 on_stack:
1684   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1685   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1686     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1687                                                16 / UNITS_PER_WORD);
1688   return;
1689 }
1690
1691 /* Implement TARGET_FUNCTION_ARG.  */
1692
1693 static rtx
1694 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1695                       const_tree type, bool named)
1696 {
1697   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1698   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1699
1700   if (mode == VOIDmode)
1701     return NULL_RTX;
1702
1703   aarch64_layout_arg (pcum_v, mode, type, named);
1704   return pcum->aapcs_reg;
1705 }
1706
1707 void
1708 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1709                            const_tree fntype ATTRIBUTE_UNUSED,
1710                            rtx libname ATTRIBUTE_UNUSED,
1711                            const_tree fndecl ATTRIBUTE_UNUSED,
1712                            unsigned n_named ATTRIBUTE_UNUSED)
1713 {
1714   pcum->aapcs_ncrn = 0;
1715   pcum->aapcs_nvrn = 0;
1716   pcum->aapcs_nextncrn = 0;
1717   pcum->aapcs_nextnvrn = 0;
1718   pcum->pcs_variant = ARM_PCS_AAPCS64;
1719   pcum->aapcs_reg = NULL_RTX;
1720   pcum->aapcs_arg_processed = false;
1721   pcum->aapcs_stack_words = 0;
1722   pcum->aapcs_stack_size = 0;
1723
1724   return;
1725 }
1726
1727 static void
1728 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1729                               machine_mode mode,
1730                               const_tree type,
1731                               bool named)
1732 {
1733   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1734   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1735     {
1736       aarch64_layout_arg (pcum_v, mode, type, named);
1737       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1738                   != (pcum->aapcs_stack_words != 0));
1739       pcum->aapcs_arg_processed = false;
1740       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1741       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1742       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1743       pcum->aapcs_stack_words = 0;
1744       pcum->aapcs_reg = NULL_RTX;
1745     }
1746 }
1747
1748 bool
1749 aarch64_function_arg_regno_p (unsigned regno)
1750 {
1751   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1752           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1753 }
1754
1755 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1756    PARM_BOUNDARY bits of alignment, but will be given anything up
1757    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1758    that both before and after the layout of each argument, the Next
1759    Stacked Argument Address (NSAA) will have a minimum alignment of
1760    8 bytes.  */
1761
1762 static unsigned int
1763 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1764 {
1765   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1766
1767   if (alignment < PARM_BOUNDARY)
1768     alignment = PARM_BOUNDARY;
1769   if (alignment > STACK_BOUNDARY)
1770     alignment = STACK_BOUNDARY;
1771   return alignment;
1772 }
1773
1774 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1775
1776    Return true if an argument passed on the stack should be padded upwards,
1777    i.e. if the least-significant byte of the stack slot has useful data.
1778
1779    Small aggregate types are placed in the lowest memory address.
1780
1781    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1782
1783 bool
1784 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1785 {
1786   /* On little-endian targets, the least significant byte of every stack
1787      argument is passed at the lowest byte address of the stack slot.  */
1788   if (!BYTES_BIG_ENDIAN)
1789     return true;
1790
1791   /* Otherwise, integral, floating-point and pointer types are padded downward:
1792      the least significant byte of a stack argument is passed at the highest
1793      byte address of the stack slot.  */
1794   if (type
1795       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1796          || POINTER_TYPE_P (type))
1797       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1798     return false;
1799
1800   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1801   return true;
1802 }
1803
1804 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1805
1806    It specifies padding for the last (may also be the only)
1807    element of a block move between registers and memory.  If
1808    assuming the block is in the memory, padding upward means that
1809    the last element is padded after its highest significant byte,
1810    while in downward padding, the last element is padded at the
1811    its least significant byte side.
1812
1813    Small aggregates and small complex types are always padded
1814    upwards.
1815
1816    We don't need to worry about homogeneous floating-point or
1817    short-vector aggregates; their move is not affected by the
1818    padding direction determined here.  Regardless of endianness,
1819    each element of such an aggregate is put in the least
1820    significant bits of a fp/simd register.
1821
1822    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1823    register has useful data, and return the opposite if the most
1824    significant byte does.  */
1825
1826 bool
1827 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
1828                      bool first ATTRIBUTE_UNUSED)
1829 {
1830
1831   /* Small composite types are always padded upward.  */
1832   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1833     {
1834       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1835                             : GET_MODE_SIZE (mode));
1836       if (size < 2 * UNITS_PER_WORD)
1837         return true;
1838     }
1839
1840   /* Otherwise, use the default padding.  */
1841   return !BYTES_BIG_ENDIAN;
1842 }
1843
1844 static machine_mode
1845 aarch64_libgcc_cmp_return_mode (void)
1846 {
1847   return SImode;
1848 }
1849
1850 static bool
1851 aarch64_frame_pointer_required (void)
1852 {
1853   /* In aarch64_override_options_after_change
1854      flag_omit_leaf_frame_pointer turns off the frame pointer by
1855      default.  Turn it back on now if we've not got a leaf
1856      function.  */
1857   if (flag_omit_leaf_frame_pointer
1858       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1859     return true;
1860
1861   return false;
1862 }
1863
1864 /* Mark the registers that need to be saved by the callee and calculate
1865    the size of the callee-saved registers area and frame record (both FP
1866    and LR may be omitted).  */
1867 static void
1868 aarch64_layout_frame (void)
1869 {
1870   HOST_WIDE_INT offset = 0;
1871   int regno;
1872
1873   if (reload_completed && cfun->machine->frame.laid_out)
1874     return;
1875
1876 #define SLOT_NOT_REQUIRED (-2)
1877 #define SLOT_REQUIRED     (-1)
1878
1879   cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1880   cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1881
1882   /* First mark all the registers that really need to be saved...  */
1883   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1884     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1885
1886   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1887     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1888
1889   /* ... that includes the eh data registers (if needed)...  */
1890   if (crtl->calls_eh_return)
1891     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1892       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1893         = SLOT_REQUIRED;
1894
1895   /* ... and any callee saved register that dataflow says is live.  */
1896   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1897     if (df_regs_ever_live_p (regno)
1898         && !call_used_regs[regno])
1899       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1900
1901   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1902     if (df_regs_ever_live_p (regno)
1903         && !call_used_regs[regno])
1904       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1905
1906   if (frame_pointer_needed)
1907     {
1908       /* FP and LR are placed in the linkage record.  */
1909       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1910       cfun->machine->frame.wb_candidate1 = R29_REGNUM;
1911       cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1912       cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1913       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1914       offset += 2 * UNITS_PER_WORD;
1915     }
1916
1917   /* Now assign stack slots for them.  */
1918   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1919     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1920       {
1921         cfun->machine->frame.reg_offset[regno] = offset;
1922         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1923           cfun->machine->frame.wb_candidate1 = regno;
1924         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1925           cfun->machine->frame.wb_candidate2 = regno;
1926         offset += UNITS_PER_WORD;
1927       }
1928
1929   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1930     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1931       {
1932         cfun->machine->frame.reg_offset[regno] = offset;
1933         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1934           cfun->machine->frame.wb_candidate1 = regno;
1935         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1936                  && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1937           cfun->machine->frame.wb_candidate2 = regno;
1938         offset += UNITS_PER_WORD;
1939       }
1940
1941   cfun->machine->frame.padding0 =
1942     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1943   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1944
1945   cfun->machine->frame.saved_regs_size = offset;
1946
1947   cfun->machine->frame.hard_fp_offset
1948     = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1949                         + get_frame_size ()
1950                         + cfun->machine->frame.saved_regs_size,
1951                         STACK_BOUNDARY / BITS_PER_UNIT);
1952
1953   cfun->machine->frame.frame_size
1954     = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1955                         + crtl->outgoing_args_size,
1956                         STACK_BOUNDARY / BITS_PER_UNIT);
1957
1958   cfun->machine->frame.laid_out = true;
1959 }
1960
1961 static bool
1962 aarch64_register_saved_on_entry (int regno)
1963 {
1964   return cfun->machine->frame.reg_offset[regno] >= 0;
1965 }
1966
1967 static unsigned
1968 aarch64_next_callee_save (unsigned regno, unsigned limit)
1969 {
1970   while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1971     regno ++;
1972   return regno;
1973 }
1974
1975 static void
1976 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
1977                            HOST_WIDE_INT adjustment)
1978  {
1979   rtx base_rtx = stack_pointer_rtx;
1980   rtx insn, reg, mem;
1981
1982   reg = gen_rtx_REG (mode, regno);
1983   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1984                             plus_constant (Pmode, base_rtx, -adjustment));
1985   mem = gen_rtx_MEM (mode, mem);
1986
1987   insn = emit_move_insn (mem, reg);
1988   RTX_FRAME_RELATED_P (insn) = 1;
1989 }
1990
1991 static rtx
1992 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
1993                           HOST_WIDE_INT adjustment)
1994 {
1995   switch (mode)
1996     {
1997     case DImode:
1998       return gen_storewb_pairdi_di (base, base, reg, reg2,
1999                                     GEN_INT (-adjustment),
2000                                     GEN_INT (UNITS_PER_WORD - adjustment));
2001     case DFmode:
2002       return gen_storewb_pairdf_di (base, base, reg, reg2,
2003                                     GEN_INT (-adjustment),
2004                                     GEN_INT (UNITS_PER_WORD - adjustment));
2005     default:
2006       gcc_unreachable ();
2007     }
2008 }
2009
2010 static void
2011 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2012                          unsigned regno2, HOST_WIDE_INT adjustment)
2013 {
2014   rtx_insn *insn;
2015   rtx reg1 = gen_rtx_REG (mode, regno1);
2016   rtx reg2 = gen_rtx_REG (mode, regno2);
2017
2018   insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2019                                               reg2, adjustment));
2020   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2021   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2022   RTX_FRAME_RELATED_P (insn) = 1;
2023 }
2024
2025 static rtx
2026 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2027                          HOST_WIDE_INT adjustment)
2028 {
2029   switch (mode)
2030     {
2031     case DImode:
2032       return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2033                                    GEN_INT (UNITS_PER_WORD));
2034     case DFmode:
2035       return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2036                                    GEN_INT (UNITS_PER_WORD));
2037     default:
2038       gcc_unreachable ();
2039     }
2040 }
2041
2042 static rtx
2043 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2044                         rtx reg2)
2045 {
2046   switch (mode)
2047     {
2048     case DImode:
2049       return gen_store_pairdi (mem1, reg1, mem2, reg2);
2050
2051     case DFmode:
2052       return gen_store_pairdf (mem1, reg1, mem2, reg2);
2053
2054     default:
2055       gcc_unreachable ();
2056     }
2057 }
2058
2059 static rtx
2060 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2061                        rtx mem2)
2062 {
2063   switch (mode)
2064     {
2065     case DImode:
2066       return gen_load_pairdi (reg1, mem1, reg2, mem2);
2067
2068     case DFmode:
2069       return gen_load_pairdf (reg1, mem1, reg2, mem2);
2070
2071     default:
2072       gcc_unreachable ();
2073     }
2074 }
2075
2076
2077 static void
2078 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2079                            unsigned start, unsigned limit, bool skip_wb)
2080 {
2081   rtx_insn *insn;
2082   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2083                                                  ? gen_frame_mem : gen_rtx_MEM);
2084   unsigned regno;
2085   unsigned regno2;
2086
2087   for (regno = aarch64_next_callee_save (start, limit);
2088        regno <= limit;
2089        regno = aarch64_next_callee_save (regno + 1, limit))
2090     {
2091       rtx reg, mem;
2092       HOST_WIDE_INT offset;
2093
2094       if (skip_wb
2095           && (regno == cfun->machine->frame.wb_candidate1
2096               || regno == cfun->machine->frame.wb_candidate2))
2097         continue;
2098
2099       reg = gen_rtx_REG (mode, regno);
2100       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2101       mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2102                                               offset));
2103
2104       regno2 = aarch64_next_callee_save (regno + 1, limit);
2105
2106       if (regno2 <= limit
2107           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2108               == cfun->machine->frame.reg_offset[regno2]))
2109
2110         {
2111           rtx reg2 = gen_rtx_REG (mode, regno2);
2112           rtx mem2;
2113
2114           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2115           mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2116                                                    offset));
2117           insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2118                                                     reg2));
2119
2120           /* The first part of a frame-related parallel insn is
2121              always assumed to be relevant to the frame
2122              calculations; subsequent parts, are only
2123              frame-related if explicitly marked.  */
2124           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2125           regno = regno2;
2126         }
2127       else
2128         insn = emit_move_insn (mem, reg);
2129
2130       RTX_FRAME_RELATED_P (insn) = 1;
2131     }
2132 }
2133
2134 static void
2135 aarch64_restore_callee_saves (machine_mode mode,
2136                               HOST_WIDE_INT start_offset, unsigned start,
2137                               unsigned limit, bool skip_wb, rtx *cfi_ops)
2138 {
2139   rtx base_rtx = stack_pointer_rtx;
2140   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2141                                                  ? gen_frame_mem : gen_rtx_MEM);
2142   unsigned regno;
2143   unsigned regno2;
2144   HOST_WIDE_INT offset;
2145
2146   for (regno = aarch64_next_callee_save (start, limit);
2147        regno <= limit;
2148        regno = aarch64_next_callee_save (regno + 1, limit))
2149     {
2150       rtx reg, mem;
2151
2152       if (skip_wb
2153           && (regno == cfun->machine->frame.wb_candidate1
2154               || regno == cfun->machine->frame.wb_candidate2))
2155         continue;
2156
2157       reg = gen_rtx_REG (mode, regno);
2158       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2159       mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2160
2161       regno2 = aarch64_next_callee_save (regno + 1, limit);
2162
2163       if (regno2 <= limit
2164           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2165               == cfun->machine->frame.reg_offset[regno2]))
2166         {
2167           rtx reg2 = gen_rtx_REG (mode, regno2);
2168           rtx mem2;
2169
2170           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2171           mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2172           emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2173
2174           *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2175           regno = regno2;
2176         }
2177       else
2178         emit_move_insn (reg, mem);
2179       *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2180     }
2181 }
2182
2183 /* AArch64 stack frames generated by this compiler look like:
2184
2185         +-------------------------------+
2186         |                               |
2187         |  incoming stack arguments     |
2188         |                               |
2189         +-------------------------------+
2190         |                               | <-- incoming stack pointer (aligned)
2191         |  callee-allocated save area   |
2192         |  for register varargs         |
2193         |                               |
2194         +-------------------------------+
2195         |  local variables              | <-- frame_pointer_rtx
2196         |                               |
2197         +-------------------------------+
2198         |  padding0                     | \
2199         +-------------------------------+  |
2200         |  callee-saved registers       |  | frame.saved_regs_size
2201         +-------------------------------+  |
2202         |  LR'                          |  |
2203         +-------------------------------+  |
2204         |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
2205         +-------------------------------+
2206         |  dynamic allocation           |
2207         +-------------------------------+
2208         |  padding                      |
2209         +-------------------------------+
2210         |  outgoing stack arguments     | <-- arg_pointer
2211         |                               |
2212         +-------------------------------+
2213         |                               | <-- stack_pointer_rtx (aligned)
2214
2215    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2216    but leave frame_pointer_rtx and hard_frame_pointer_rtx
2217    unchanged.  */
2218
2219 /* Generate the prologue instructions for entry into a function.
2220    Establish the stack frame by decreasing the stack pointer with a
2221    properly calculated size and, if necessary, create a frame record
2222    filled with the values of LR and previous frame pointer.  The
2223    current FP is also set up if it is in use.  */
2224
2225 void
2226 aarch64_expand_prologue (void)
2227 {
2228   /* sub sp, sp, #<frame_size>
2229      stp {fp, lr}, [sp, #<frame_size> - 16]
2230      add fp, sp, #<frame_size> - hardfp_offset
2231      stp {cs_reg}, [fp, #-16] etc.
2232
2233      sub sp, sp, <final_adjustment_if_any>
2234   */
2235   HOST_WIDE_INT frame_size, offset;
2236   HOST_WIDE_INT fp_offset;              /* Offset from hard FP to SP.  */
2237   HOST_WIDE_INT hard_fp_offset;
2238   rtx_insn *insn;
2239
2240   aarch64_layout_frame ();
2241
2242   offset = frame_size = cfun->machine->frame.frame_size;
2243   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2244   fp_offset = frame_size - hard_fp_offset;
2245
2246   if (flag_stack_usage_info)
2247     current_function_static_stack_size = frame_size;
2248
2249   /* Store pairs and load pairs have a range only -512 to 504.  */
2250   if (offset >= 512)
2251     {
2252       /* When the frame has a large size, an initial decrease is done on
2253          the stack pointer to jump over the callee-allocated save area for
2254          register varargs, the local variable area and/or the callee-saved
2255          register area.  This will allow the pre-index write-back
2256          store pair instructions to be used for setting up the stack frame
2257          efficiently.  */
2258       offset = hard_fp_offset;
2259       if (offset >= 512)
2260         offset = cfun->machine->frame.saved_regs_size;
2261
2262       frame_size -= (offset + crtl->outgoing_args_size);
2263       fp_offset = 0;
2264
2265       if (frame_size >= 0x1000000)
2266         {
2267           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2268           emit_move_insn (op0, GEN_INT (-frame_size));
2269           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2270
2271           add_reg_note (insn, REG_CFA_ADJUST_CFA,
2272                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2273                                      plus_constant (Pmode, stack_pointer_rtx,
2274                                                     -frame_size)));
2275           RTX_FRAME_RELATED_P (insn) = 1;
2276         }
2277       else if (frame_size > 0)
2278         {
2279           int hi_ofs = frame_size & 0xfff000;
2280           int lo_ofs = frame_size & 0x000fff;
2281
2282           if (hi_ofs)
2283             {
2284               insn = emit_insn (gen_add2_insn
2285                                 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2286               RTX_FRAME_RELATED_P (insn) = 1;
2287             }
2288           if (lo_ofs)
2289             {
2290               insn = emit_insn (gen_add2_insn
2291                                 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2292               RTX_FRAME_RELATED_P (insn) = 1;
2293             }
2294         }
2295     }
2296   else
2297     frame_size = -1;
2298
2299   if (offset > 0)
2300     {
2301       bool skip_wb = false;
2302
2303       if (frame_pointer_needed)
2304         {
2305           skip_wb = true;
2306
2307           if (fp_offset)
2308             {
2309               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2310                                                GEN_INT (-offset)));
2311               RTX_FRAME_RELATED_P (insn) = 1;
2312
2313               aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2314                                          R30_REGNUM, false);
2315             }
2316           else
2317             aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2318
2319           /* Set up frame pointer to point to the location of the
2320              previous frame pointer on the stack.  */
2321           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2322                                            stack_pointer_rtx,
2323                                            GEN_INT (fp_offset)));
2324           RTX_FRAME_RELATED_P (insn) = 1;
2325           emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2326         }
2327       else
2328         {
2329           unsigned reg1 = cfun->machine->frame.wb_candidate1;
2330           unsigned reg2 = cfun->machine->frame.wb_candidate2;
2331
2332           if (fp_offset
2333               || reg1 == FIRST_PSEUDO_REGISTER
2334               || (reg2 == FIRST_PSEUDO_REGISTER
2335                   && offset >= 256))
2336             {
2337               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2338                                                GEN_INT (-offset)));
2339               RTX_FRAME_RELATED_P (insn) = 1;
2340             }
2341           else
2342             {
2343               machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2344
2345               skip_wb = true;
2346
2347               if (reg2 == FIRST_PSEUDO_REGISTER)
2348                 aarch64_pushwb_single_reg (mode1, reg1, offset);
2349               else
2350                 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2351             }
2352         }
2353
2354       aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2355                                  skip_wb);
2356       aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2357                                  skip_wb);
2358     }
2359
2360   /* when offset >= 512,
2361      sub sp, sp, #<outgoing_args_size> */
2362   if (frame_size > -1)
2363     {
2364       if (crtl->outgoing_args_size > 0)
2365         {
2366           insn = emit_insn (gen_add2_insn
2367                             (stack_pointer_rtx,
2368                              GEN_INT (- crtl->outgoing_args_size)));
2369           RTX_FRAME_RELATED_P (insn) = 1;
2370         }
2371     }
2372 }
2373
2374 /* Return TRUE if we can use a simple_return insn.
2375
2376    This function checks whether the callee saved stack is empty, which
2377    means no restore actions are need. The pro_and_epilogue will use
2378    this to check whether shrink-wrapping opt is feasible.  */
2379
2380 bool
2381 aarch64_use_return_insn_p (void)
2382 {
2383   if (!reload_completed)
2384     return false;
2385
2386   if (crtl->profile)
2387     return false;
2388
2389   aarch64_layout_frame ();
2390
2391   return cfun->machine->frame.frame_size == 0;
2392 }
2393
2394 /* Generate the epilogue instructions for returning from a function.  */
2395 void
2396 aarch64_expand_epilogue (bool for_sibcall)
2397 {
2398   HOST_WIDE_INT frame_size, offset;
2399   HOST_WIDE_INT fp_offset;
2400   HOST_WIDE_INT hard_fp_offset;
2401   rtx_insn *insn;
2402
2403   aarch64_layout_frame ();
2404
2405   offset = frame_size = cfun->machine->frame.frame_size;
2406   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2407   fp_offset = frame_size - hard_fp_offset;
2408
2409   /* Store pairs and load pairs have a range only -512 to 504.  */
2410   if (offset >= 512)
2411     {
2412       offset = hard_fp_offset;
2413       if (offset >= 512)
2414         offset = cfun->machine->frame.saved_regs_size;
2415
2416       frame_size -= (offset + crtl->outgoing_args_size);
2417       fp_offset = 0;
2418       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2419         {
2420           insn = emit_insn (gen_add2_insn
2421                             (stack_pointer_rtx,
2422                              GEN_INT (crtl->outgoing_args_size)));
2423           RTX_FRAME_RELATED_P (insn) = 1;
2424         }
2425     }
2426   else
2427     frame_size = -1;
2428
2429   /* If there were outgoing arguments or we've done dynamic stack
2430      allocation, then restore the stack pointer from the frame
2431      pointer.  This is at most one insn and more efficient than using
2432      GCC's internal mechanism.  */
2433   if (frame_pointer_needed
2434       && (crtl->outgoing_args_size || cfun->calls_alloca))
2435     {
2436       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2437                                        hard_frame_pointer_rtx,
2438                                        GEN_INT (0)));
2439       offset = offset - fp_offset;
2440     }
2441
2442   if (offset > 0)
2443     {
2444       unsigned reg1 = cfun->machine->frame.wb_candidate1;
2445       unsigned reg2 = cfun->machine->frame.wb_candidate2;
2446       bool skip_wb = true;
2447       rtx cfi_ops = NULL;
2448
2449       if (frame_pointer_needed)
2450         fp_offset = 0;
2451       else if (fp_offset
2452                || reg1 == FIRST_PSEUDO_REGISTER
2453                || (reg2 == FIRST_PSEUDO_REGISTER
2454                    && offset >= 256))
2455         skip_wb = false;
2456
2457       aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2458                                     skip_wb, &cfi_ops);
2459       aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2460                                     skip_wb, &cfi_ops);
2461
2462       if (skip_wb)
2463         {
2464           machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2465           rtx rreg1 = gen_rtx_REG (mode1, reg1);
2466
2467           cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2468           if (reg2 == FIRST_PSEUDO_REGISTER)
2469             {
2470               rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2471               mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2472               mem = gen_rtx_MEM (mode1, mem);
2473               insn = emit_move_insn (rreg1, mem);
2474             }
2475           else
2476             {
2477               rtx rreg2 = gen_rtx_REG (mode1, reg2);
2478
2479               cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2480               insn = emit_insn (aarch64_gen_loadwb_pair
2481                                 (mode1, stack_pointer_rtx, rreg1,
2482                                  rreg2, offset));
2483             }
2484         }
2485       else
2486         {
2487           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2488                                            GEN_INT (offset)));
2489         }
2490
2491       /* Reset the CFA to be SP + FRAME_SIZE.  */
2492       rtx new_cfa = stack_pointer_rtx;
2493       if (frame_size > 0)
2494         new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2495       cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2496       REG_NOTES (insn) = cfi_ops;
2497       RTX_FRAME_RELATED_P (insn) = 1;
2498     }
2499
2500   if (frame_size > 0)
2501     {
2502       if (frame_size >= 0x1000000)
2503         {
2504           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2505           emit_move_insn (op0, GEN_INT (frame_size));
2506           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2507         }
2508       else
2509         {
2510           int hi_ofs = frame_size & 0xfff000;
2511           int lo_ofs = frame_size & 0x000fff;
2512
2513           if (hi_ofs && lo_ofs)
2514             {
2515               insn = emit_insn (gen_add2_insn
2516                                 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2517               RTX_FRAME_RELATED_P (insn) = 1;
2518               frame_size = lo_ofs;
2519             }
2520           insn = emit_insn (gen_add2_insn
2521                             (stack_pointer_rtx, GEN_INT (frame_size)));
2522         }
2523
2524       /* Reset the CFA to be SP + 0.  */
2525       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2526       RTX_FRAME_RELATED_P (insn) = 1;
2527     }
2528
2529   /* Stack adjustment for exception handler.  */
2530   if (crtl->calls_eh_return)
2531     {
2532       /* We need to unwind the stack by the offset computed by
2533          EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
2534          to be SP; letting the CFA move during this adjustment
2535          is just as correct as retaining the CFA from the body
2536          of the function.  Therefore, do nothing special.  */
2537       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2538     }
2539
2540   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2541   if (!for_sibcall)
2542     emit_jump_insn (ret_rtx);
2543 }
2544
2545 /* Return the place to copy the exception unwinding return address to.
2546    This will probably be a stack slot, but could (in theory be the
2547    return register).  */
2548 rtx
2549 aarch64_final_eh_return_addr (void)
2550 {
2551   HOST_WIDE_INT fp_offset;
2552
2553   aarch64_layout_frame ();
2554
2555   fp_offset = cfun->machine->frame.frame_size
2556               - cfun->machine->frame.hard_fp_offset;
2557
2558   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2559     return gen_rtx_REG (DImode, LR_REGNUM);
2560
2561   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2562      result in a store to save LR introduced by builtin_eh_return () being
2563      incorrectly deleted because the alias is not detected.
2564      So in the calculation of the address to copy the exception unwinding
2565      return address to, we note 2 cases.
2566      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2567      we return a SP-relative location since all the addresses are SP-relative
2568      in this case.  This prevents the store from being optimized away.
2569      If the fp_offset is not 0, then the addresses will be FP-relative and
2570      therefore we return a FP-relative location.  */
2571
2572   if (frame_pointer_needed)
2573     {
2574       if (fp_offset)
2575         return gen_frame_mem (DImode,
2576                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2577       else
2578         return gen_frame_mem (DImode,
2579                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2580     }
2581
2582   /* If FP is not needed, we calculate the location of LR, which would be
2583      at the top of the saved registers block.  */
2584
2585   return gen_frame_mem (DImode,
2586                         plus_constant (Pmode,
2587                                        stack_pointer_rtx,
2588                                        fp_offset
2589                                        + cfun->machine->frame.saved_regs_size
2590                                        - 2 * UNITS_PER_WORD));
2591 }
2592
2593 /* Possibly output code to build up a constant in a register.  For
2594    the benefit of the costs infrastructure, returns the number of
2595    instructions which would be emitted.  GENERATE inhibits or
2596    enables code generation.  */
2597
2598 static int
2599 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2600 {
2601   int insns = 0;
2602
2603   if (aarch64_bitmask_imm (val, DImode))
2604     {
2605       if (generate)
2606         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2607       insns = 1;
2608     }
2609   else
2610     {
2611       int i;
2612       int ncount = 0;
2613       int zcount = 0;
2614       HOST_WIDE_INT valp = val >> 16;
2615       HOST_WIDE_INT valm;
2616       HOST_WIDE_INT tval;
2617
2618       for (i = 16; i < 64; i += 16)
2619         {
2620           valm = (valp & 0xffff);
2621
2622           if (valm != 0)
2623             ++ zcount;
2624
2625           if (valm != 0xffff)
2626             ++ ncount;
2627
2628           valp >>= 16;
2629         }
2630
2631       /* zcount contains the number of additional MOVK instructions
2632          required if the constant is built up with an initial MOVZ instruction,
2633          while ncount is the number of MOVK instructions required if starting
2634          with a MOVN instruction.  Choose the sequence that yields the fewest
2635          number of instructions, preferring MOVZ instructions when they are both
2636          the same.  */
2637       if (ncount < zcount)
2638         {
2639           if (generate)
2640             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2641                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2642           tval = 0xffff;
2643           insns++;
2644         }
2645       else
2646         {
2647           if (generate)
2648             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2649                             GEN_INT (val & 0xffff));
2650           tval = 0;
2651           insns++;
2652         }
2653
2654       val >>= 16;
2655
2656       for (i = 16; i < 64; i += 16)
2657         {
2658           if ((val & 0xffff) != tval)
2659             {
2660               if (generate)
2661                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2662                                            GEN_INT (i),
2663                                            GEN_INT (val & 0xffff)));
2664               insns++;
2665             }
2666           val >>= 16;
2667         }
2668     }
2669   return insns;
2670 }
2671
2672 static void
2673 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2674 {
2675   HOST_WIDE_INT mdelta = delta;
2676   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2677   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2678
2679   if (mdelta < 0)
2680     mdelta = -mdelta;
2681
2682   if (mdelta >= 4096 * 4096)
2683     {
2684       (void) aarch64_build_constant (scratchreg, delta, true);
2685       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2686     }
2687   else if (mdelta > 0)
2688     {
2689       if (mdelta >= 4096)
2690         {
2691           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2692           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2693           if (delta < 0)
2694             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2695                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2696           else
2697             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2698                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2699         }
2700       if (mdelta % 4096 != 0)
2701         {
2702           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2703           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2704                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2705         }
2706     }
2707 }
2708
2709 /* Output code to add DELTA to the first argument, and then jump
2710    to FUNCTION.  Used for C++ multiple inheritance.  */
2711 static void
2712 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2713                          HOST_WIDE_INT delta,
2714                          HOST_WIDE_INT vcall_offset,
2715                          tree function)
2716 {
2717   /* The this pointer is always in x0.  Note that this differs from
2718      Arm where the this pointer maybe bumped to r1 if r0 is required
2719      to return a pointer to an aggregate.  On AArch64 a result value
2720      pointer will be in x8.  */
2721   int this_regno = R0_REGNUM;
2722   rtx this_rtx, temp0, temp1, addr, funexp;
2723   rtx_insn *insn;
2724
2725   reload_completed = 1;
2726   emit_note (NOTE_INSN_PROLOGUE_END);
2727
2728   if (vcall_offset == 0)
2729     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2730   else
2731     {
2732       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2733
2734       this_rtx = gen_rtx_REG (Pmode, this_regno);
2735       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2736       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2737
2738       addr = this_rtx;
2739       if (delta != 0)
2740         {
2741           if (delta >= -256 && delta < 256)
2742             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2743                                        plus_constant (Pmode, this_rtx, delta));
2744           else
2745             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2746         }
2747
2748       if (Pmode == ptr_mode)
2749         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2750       else
2751         aarch64_emit_move (temp0,
2752                            gen_rtx_ZERO_EXTEND (Pmode,
2753                                                 gen_rtx_MEM (ptr_mode, addr)));
2754
2755       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2756           addr = plus_constant (Pmode, temp0, vcall_offset);
2757       else
2758         {
2759           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2760           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2761         }
2762
2763       if (Pmode == ptr_mode)
2764         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2765       else
2766         aarch64_emit_move (temp1,
2767                            gen_rtx_SIGN_EXTEND (Pmode,
2768                                                 gen_rtx_MEM (ptr_mode, addr)));
2769
2770       emit_insn (gen_add2_insn (this_rtx, temp1));
2771     }
2772
2773   /* Generate a tail call to the target function.  */
2774   if (!TREE_USED (function))
2775     {
2776       assemble_external (function);
2777       TREE_USED (function) = 1;
2778     }
2779   funexp = XEXP (DECL_RTL (function), 0);
2780   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2781   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2782   SIBLING_CALL_P (insn) = 1;
2783
2784   insn = get_insns ();
2785   shorten_branches (insn);
2786   final_start_function (insn, file, 1);
2787   final (insn, file, 1);
2788   final_end_function ();
2789
2790   /* Stop pretending to be a post-reload pass.  */
2791   reload_completed = 0;
2792 }
2793
2794 static int
2795 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2796 {
2797   if (GET_CODE (*x) == SYMBOL_REF)
2798     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2799
2800   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2801      TLS offsets, not real symbol references.  */
2802   if (GET_CODE (*x) == UNSPEC
2803       && XINT (*x, 1) == UNSPEC_TLS)
2804     return -1;
2805
2806   return 0;
2807 }
2808
2809 static bool
2810 aarch64_tls_referenced_p (rtx x)
2811 {
2812   if (!TARGET_HAVE_TLS)
2813     return false;
2814
2815   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2816 }
2817
2818
2819 static int
2820 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2821 {
2822   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2823   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2824
2825   if (*imm1 < *imm2)
2826     return -1;
2827   if (*imm1 > *imm2)
2828     return +1;
2829   return 0;
2830 }
2831
2832
2833 static void
2834 aarch64_build_bitmask_table (void)
2835 {
2836   unsigned HOST_WIDE_INT mask, imm;
2837   unsigned int log_e, e, s, r;
2838   unsigned int nimms = 0;
2839
2840   for (log_e = 1; log_e <= 6; log_e++)
2841     {
2842       e = 1 << log_e;
2843       if (e == 64)
2844         mask = ~(HOST_WIDE_INT) 0;
2845       else
2846         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2847       for (s = 1; s < e; s++)
2848         {
2849           for (r = 0; r < e; r++)
2850             {
2851               /* set s consecutive bits to 1 (s < 64) */
2852               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2853               /* rotate right by r */
2854               if (r != 0)
2855                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2856               /* replicate the constant depending on SIMD size */
2857               switch (log_e) {
2858               case 1: imm |= (imm <<  2);
2859               case 2: imm |= (imm <<  4);
2860               case 3: imm |= (imm <<  8);
2861               case 4: imm |= (imm << 16);
2862               case 5: imm |= (imm << 32);
2863               case 6:
2864                 break;
2865               default:
2866                 gcc_unreachable ();
2867               }
2868               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2869               aarch64_bitmasks[nimms++] = imm;
2870             }
2871         }
2872     }
2873
2874   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2875   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2876          aarch64_bitmasks_cmp);
2877 }
2878
2879
2880 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2881    a left shift of 0 or 12 bits.  */
2882 bool
2883 aarch64_uimm12_shift (HOST_WIDE_INT val)
2884 {
2885   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2886           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2887           );
2888 }
2889
2890
2891 /* Return true if val is an immediate that can be loaded into a
2892    register by a MOVZ instruction.  */
2893 static bool
2894 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
2895 {
2896   if (GET_MODE_SIZE (mode) > 4)
2897     {
2898       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2899           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2900         return 1;
2901     }
2902   else
2903     {
2904       /* Ignore sign extension.  */
2905       val &= (HOST_WIDE_INT) 0xffffffff;
2906     }
2907   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2908           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2909 }
2910
2911
2912 /* Return true if val is a valid bitmask immediate.  */
2913 bool
2914 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
2915 {
2916   if (GET_MODE_SIZE (mode) < 8)
2917     {
2918       /* Replicate bit pattern.  */
2919       val &= (HOST_WIDE_INT) 0xffffffff;
2920       val |= val << 32;
2921     }
2922   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2923                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2924 }
2925
2926
2927 /* Return true if val is an immediate that can be loaded into a
2928    register in a single instruction.  */
2929 bool
2930 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
2931 {
2932   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2933     return 1;
2934   return aarch64_bitmask_imm (val, mode);
2935 }
2936
2937 static bool
2938 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2939 {
2940   rtx base, offset;
2941
2942   if (GET_CODE (x) == HIGH)
2943     return true;
2944
2945   split_const (x, &base, &offset);
2946   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2947     {
2948       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2949           != SYMBOL_FORCE_TO_MEM)
2950         return true;
2951       else
2952         /* Avoid generating a 64-bit relocation in ILP32; leave
2953            to aarch64_expand_mov_immediate to handle it properly.  */
2954         return mode != ptr_mode;
2955     }
2956
2957   return aarch64_tls_referenced_p (x);
2958 }
2959
2960 /* Return true if register REGNO is a valid index register.
2961    STRICT_P is true if REG_OK_STRICT is in effect.  */
2962
2963 bool
2964 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2965 {
2966   if (!HARD_REGISTER_NUM_P (regno))
2967     {
2968       if (!strict_p)
2969         return true;
2970
2971       if (!reg_renumber)
2972         return false;
2973
2974       regno = reg_renumber[regno];
2975     }
2976   return GP_REGNUM_P (regno);
2977 }
2978
2979 /* Return true if register REGNO is a valid base register for mode MODE.
2980    STRICT_P is true if REG_OK_STRICT is in effect.  */
2981
2982 bool
2983 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2984 {
2985   if (!HARD_REGISTER_NUM_P (regno))
2986     {
2987       if (!strict_p)
2988         return true;
2989
2990       if (!reg_renumber)
2991         return false;
2992
2993       regno = reg_renumber[regno];
2994     }
2995
2996   /* The fake registers will be eliminated to either the stack or
2997      hard frame pointer, both of which are usually valid base registers.
2998      Reload deals with the cases where the eliminated form isn't valid.  */
2999   return (GP_REGNUM_P (regno)
3000           || regno == SP_REGNUM
3001           || regno == FRAME_POINTER_REGNUM
3002           || regno == ARG_POINTER_REGNUM);
3003 }
3004
3005 /* Return true if X is a valid base register for mode MODE.
3006    STRICT_P is true if REG_OK_STRICT is in effect.  */
3007
3008 static bool
3009 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3010 {
3011   if (!strict_p && GET_CODE (x) == SUBREG)
3012     x = SUBREG_REG (x);
3013
3014   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3015 }
3016
3017 /* Return true if address offset is a valid index.  If it is, fill in INFO
3018    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
3019
3020 static bool
3021 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3022                         machine_mode mode, bool strict_p)
3023 {
3024   enum aarch64_address_type type;
3025   rtx index;
3026   int shift;
3027
3028   /* (reg:P) */
3029   if ((REG_P (x) || GET_CODE (x) == SUBREG)
3030       && GET_MODE (x) == Pmode)
3031     {
3032       type = ADDRESS_REG_REG;
3033       index = x;
3034       shift = 0;
3035     }
3036   /* (sign_extend:DI (reg:SI)) */
3037   else if ((GET_CODE (x) == SIGN_EXTEND
3038             || GET_CODE (x) == ZERO_EXTEND)
3039            && GET_MODE (x) == DImode
3040            && GET_MODE (XEXP (x, 0)) == SImode)
3041     {
3042       type = (GET_CODE (x) == SIGN_EXTEND)
3043         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3044       index = XEXP (x, 0);
3045       shift = 0;
3046     }
3047   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3048   else if (GET_CODE (x) == MULT
3049            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3050                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3051            && GET_MODE (XEXP (x, 0)) == DImode
3052            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3053            && CONST_INT_P (XEXP (x, 1)))
3054     {
3055       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3056         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3057       index = XEXP (XEXP (x, 0), 0);
3058       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3059     }
3060   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3061   else if (GET_CODE (x) == ASHIFT
3062            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3063                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3064            && GET_MODE (XEXP (x, 0)) == DImode
3065            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3066            && CONST_INT_P (XEXP (x, 1)))
3067     {
3068       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3069         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3070       index = XEXP (XEXP (x, 0), 0);
3071       shift = INTVAL (XEXP (x, 1));
3072     }
3073   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3074   else if ((GET_CODE (x) == SIGN_EXTRACT
3075             || GET_CODE (x) == ZERO_EXTRACT)
3076            && GET_MODE (x) == DImode
3077            && GET_CODE (XEXP (x, 0)) == MULT
3078            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3079            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3080     {
3081       type = (GET_CODE (x) == SIGN_EXTRACT)
3082         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3083       index = XEXP (XEXP (x, 0), 0);
3084       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3085       if (INTVAL (XEXP (x, 1)) != 32 + shift
3086           || INTVAL (XEXP (x, 2)) != 0)
3087         shift = -1;
3088     }
3089   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3090      (const_int 0xffffffff<<shift)) */
3091   else if (GET_CODE (x) == AND
3092            && GET_MODE (x) == DImode
3093            && GET_CODE (XEXP (x, 0)) == MULT
3094            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3095            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3096            && CONST_INT_P (XEXP (x, 1)))
3097     {
3098       type = ADDRESS_REG_UXTW;
3099       index = XEXP (XEXP (x, 0), 0);
3100       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3101       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3102         shift = -1;
3103     }
3104   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3105   else if ((GET_CODE (x) == SIGN_EXTRACT
3106             || GET_CODE (x) == ZERO_EXTRACT)
3107            && GET_MODE (x) == DImode
3108            && GET_CODE (XEXP (x, 0)) == ASHIFT
3109            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3110            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3111     {
3112       type = (GET_CODE (x) == SIGN_EXTRACT)
3113         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3114       index = XEXP (XEXP (x, 0), 0);
3115       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3116       if (INTVAL (XEXP (x, 1)) != 32 + shift
3117           || INTVAL (XEXP (x, 2)) != 0)
3118         shift = -1;
3119     }
3120   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3121      (const_int 0xffffffff<<shift)) */
3122   else if (GET_CODE (x) == AND
3123            && GET_MODE (x) == DImode
3124            && GET_CODE (XEXP (x, 0)) == ASHIFT
3125            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3126            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3127            && CONST_INT_P (XEXP (x, 1)))
3128     {
3129       type = ADDRESS_REG_UXTW;
3130       index = XEXP (XEXP (x, 0), 0);
3131       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3132       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3133         shift = -1;
3134     }
3135   /* (mult:P (reg:P) (const_int scale)) */
3136   else if (GET_CODE (x) == MULT
3137            && GET_MODE (x) == Pmode
3138            && GET_MODE (XEXP (x, 0)) == Pmode
3139            && CONST_INT_P (XEXP (x, 1)))
3140     {
3141       type = ADDRESS_REG_REG;
3142       index = XEXP (x, 0);
3143       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3144     }
3145   /* (ashift:P (reg:P) (const_int shift)) */
3146   else if (GET_CODE (x) == ASHIFT
3147            && GET_MODE (x) == Pmode
3148            && GET_MODE (XEXP (x, 0)) == Pmode
3149            && CONST_INT_P (XEXP (x, 1)))
3150     {
3151       type = ADDRESS_REG_REG;
3152       index = XEXP (x, 0);
3153       shift = INTVAL (XEXP (x, 1));
3154     }
3155   else
3156     return false;
3157
3158   if (GET_CODE (index) == SUBREG)
3159     index = SUBREG_REG (index);
3160
3161   if ((shift == 0 ||
3162        (shift > 0 && shift <= 3
3163         && (1 << shift) == GET_MODE_SIZE (mode)))
3164       && REG_P (index)
3165       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3166     {
3167       info->type = type;
3168       info->offset = index;
3169       info->shift = shift;
3170       return true;
3171     }
3172
3173   return false;
3174 }
3175
3176 bool
3177 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3178 {
3179   return (offset >= -64 * GET_MODE_SIZE (mode)
3180           && offset < 64 * GET_MODE_SIZE (mode)
3181           && offset % GET_MODE_SIZE (mode) == 0);
3182 }
3183
3184 static inline bool
3185 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3186                                HOST_WIDE_INT offset)
3187 {
3188   return offset >= -256 && offset < 256;
3189 }
3190
3191 static inline bool
3192 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3193 {
3194   return (offset >= 0
3195           && offset < 4096 * GET_MODE_SIZE (mode)
3196           && offset % GET_MODE_SIZE (mode) == 0);
3197 }
3198
3199 /* Return true if X is a valid address for machine mode MODE.  If it is,
3200    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3201    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3202
3203 static bool
3204 aarch64_classify_address (struct aarch64_address_info *info,
3205                           rtx x, machine_mode mode,
3206                           RTX_CODE outer_code, bool strict_p)
3207 {
3208   enum rtx_code code = GET_CODE (x);
3209   rtx op0, op1;
3210   bool allow_reg_index_p =
3211     outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3212                                || aarch64_vector_mode_supported_p (mode));
3213   /* Don't support anything other than POST_INC or REG addressing for
3214      AdvSIMD.  */
3215   if (aarch64_vect_struct_mode_p (mode)
3216       && (code != POST_INC && code != REG))
3217     return false;
3218
3219   switch (code)
3220     {
3221     case REG:
3222     case SUBREG:
3223       info->type = ADDRESS_REG_IMM;
3224       info->base = x;
3225       info->offset = const0_rtx;
3226       return aarch64_base_register_rtx_p (x, strict_p);
3227
3228     case PLUS:
3229       op0 = XEXP (x, 0);
3230       op1 = XEXP (x, 1);
3231
3232       if (! strict_p
3233           && REG_P (op0)
3234           && (op0 == virtual_stack_vars_rtx
3235               || op0 == frame_pointer_rtx
3236               || op0 == arg_pointer_rtx)
3237           && CONST_INT_P (op1))
3238         {
3239           info->type = ADDRESS_REG_IMM;
3240           info->base = op0;
3241           info->offset = op1;
3242
3243           return true;
3244         }
3245
3246       if (GET_MODE_SIZE (mode) != 0
3247           && CONST_INT_P (op1)
3248           && aarch64_base_register_rtx_p (op0, strict_p))
3249         {
3250           HOST_WIDE_INT offset = INTVAL (op1);
3251
3252           info->type = ADDRESS_REG_IMM;
3253           info->base = op0;
3254           info->offset = op1;
3255
3256           /* TImode and TFmode values are allowed in both pairs of X
3257              registers and individual Q registers.  The available
3258              address modes are:
3259              X,X: 7-bit signed scaled offset
3260              Q:   9-bit signed offset
3261              We conservatively require an offset representable in either mode.
3262            */
3263           if (mode == TImode || mode == TFmode)
3264             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3265                     && offset_9bit_signed_unscaled_p (mode, offset));
3266
3267           if (outer_code == PARALLEL)
3268             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3269                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3270           else
3271             return (offset_9bit_signed_unscaled_p (mode, offset)
3272                     || offset_12bit_unsigned_scaled_p (mode, offset));
3273         }
3274
3275       if (allow_reg_index_p)
3276         {
3277           /* Look for base + (scaled/extended) index register.  */
3278           if (aarch64_base_register_rtx_p (op0, strict_p)
3279               && aarch64_classify_index (info, op1, mode, strict_p))
3280             {
3281               info->base = op0;
3282               return true;
3283             }
3284           if (aarch64_base_register_rtx_p (op1, strict_p)
3285               && aarch64_classify_index (info, op0, mode, strict_p))
3286             {
3287               info->base = op1;
3288               return true;
3289             }
3290         }
3291
3292       return false;
3293
3294     case POST_INC:
3295     case POST_DEC:
3296     case PRE_INC:
3297     case PRE_DEC:
3298       info->type = ADDRESS_REG_WB;
3299       info->base = XEXP (x, 0);
3300       info->offset = NULL_RTX;
3301       return aarch64_base_register_rtx_p (info->base, strict_p);
3302
3303     case POST_MODIFY:
3304     case PRE_MODIFY:
3305       info->type = ADDRESS_REG_WB;
3306       info->base = XEXP (x, 0);
3307       if (GET_CODE (XEXP (x, 1)) == PLUS
3308           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3309           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3310           && aarch64_base_register_rtx_p (info->base, strict_p))
3311         {
3312           HOST_WIDE_INT offset;
3313           info->offset = XEXP (XEXP (x, 1), 1);
3314           offset = INTVAL (info->offset);
3315
3316           /* TImode and TFmode values are allowed in both pairs of X
3317              registers and individual Q registers.  The available
3318              address modes are:
3319              X,X: 7-bit signed scaled offset
3320              Q:   9-bit signed offset
3321              We conservatively require an offset representable in either mode.
3322            */
3323           if (mode == TImode || mode == TFmode)
3324             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3325                     && offset_9bit_signed_unscaled_p (mode, offset));
3326
3327           if (outer_code == PARALLEL)
3328             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3329                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3330           else
3331             return offset_9bit_signed_unscaled_p (mode, offset);
3332         }
3333       return false;
3334
3335     case CONST:
3336     case SYMBOL_REF:
3337     case LABEL_REF:
3338       /* load literal: pc-relative constant pool entry.  Only supported
3339          for SI mode or larger.  */
3340       info->type = ADDRESS_SYMBOLIC;
3341       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3342         {
3343           rtx sym, addend;
3344
3345           split_const (x, &sym, &addend);
3346           return (GET_CODE (sym) == LABEL_REF
3347                   || (GET_CODE (sym) == SYMBOL_REF
3348                       && CONSTANT_POOL_ADDRESS_P (sym)));
3349         }
3350       return false;
3351
3352     case LO_SUM:
3353       info->type = ADDRESS_LO_SUM;
3354       info->base = XEXP (x, 0);
3355       info->offset = XEXP (x, 1);
3356       if (allow_reg_index_p
3357           && aarch64_base_register_rtx_p (info->base, strict_p))
3358         {
3359           rtx sym, offs;
3360           split_const (info->offset, &sym, &offs);
3361           if (GET_CODE (sym) == SYMBOL_REF
3362               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3363                   == SYMBOL_SMALL_ABSOLUTE))
3364             {
3365               /* The symbol and offset must be aligned to the access size.  */
3366               unsigned int align;
3367               unsigned int ref_size;
3368
3369               if (CONSTANT_POOL_ADDRESS_P (sym))
3370                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3371               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3372                 {
3373                   tree exp = SYMBOL_REF_DECL (sym);
3374                   align = TYPE_ALIGN (TREE_TYPE (exp));
3375                   align = CONSTANT_ALIGNMENT (exp, align);
3376                 }
3377               else if (SYMBOL_REF_DECL (sym))
3378                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3379               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3380                        && SYMBOL_REF_BLOCK (sym) != NULL)
3381                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3382               else
3383                 align = BITS_PER_UNIT;
3384
3385               ref_size = GET_MODE_SIZE (mode);
3386               if (ref_size == 0)
3387                 ref_size = GET_MODE_SIZE (DImode);
3388
3389               return ((INTVAL (offs) & (ref_size - 1)) == 0
3390                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3391             }
3392         }
3393       return false;
3394
3395     default:
3396       return false;
3397     }
3398 }
3399
3400 bool
3401 aarch64_symbolic_address_p (rtx x)
3402 {
3403   rtx offset;
3404
3405   split_const (x, &x, &offset);
3406   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3407 }
3408
3409 /* Classify the base of symbolic expression X, given that X appears in
3410    context CONTEXT.  */
3411
3412 enum aarch64_symbol_type
3413 aarch64_classify_symbolic_expression (rtx x,
3414                                       enum aarch64_symbol_context context)
3415 {
3416   rtx offset;
3417
3418   split_const (x, &x, &offset);
3419   return aarch64_classify_symbol (x, context);
3420 }
3421
3422
3423 /* Return TRUE if X is a legitimate address for accessing memory in
3424    mode MODE.  */
3425 static bool
3426 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3427 {
3428   struct aarch64_address_info addr;
3429
3430   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3431 }
3432
3433 /* Return TRUE if X is a legitimate address for accessing memory in
3434    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3435    pair operation.  */
3436 bool
3437 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3438                               RTX_CODE outer_code, bool strict_p)
3439 {
3440   struct aarch64_address_info addr;
3441
3442   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3443 }
3444
3445 /* Return TRUE if rtx X is immediate constant 0.0 */
3446 bool
3447 aarch64_float_const_zero_rtx_p (rtx x)
3448 {
3449   REAL_VALUE_TYPE r;
3450
3451   if (GET_MODE (x) == VOIDmode)
3452     return false;
3453
3454   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455   if (REAL_VALUE_MINUS_ZERO (r))
3456     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3457   return REAL_VALUES_EQUAL (r, dconst0);
3458 }
3459
3460 /* Return the fixed registers used for condition codes.  */
3461
3462 static bool
3463 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3464 {
3465   *p1 = CC_REGNUM;
3466   *p2 = INVALID_REGNUM;
3467   return true;
3468 }
3469
3470 /* Emit call insn with PAT and do aarch64-specific handling.  */
3471
3472 void
3473 aarch64_emit_call_insn (rtx pat)
3474 {
3475   rtx insn = emit_call_insn (pat);
3476
3477   rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3478   clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3479   clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3480 }
3481
3482 machine_mode
3483 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3484 {
3485   /* All floating point compares return CCFP if it is an equality
3486      comparison, and CCFPE otherwise.  */
3487   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3488     {
3489       switch (code)
3490         {
3491         case EQ:
3492         case NE:
3493         case UNORDERED:
3494         case ORDERED:
3495         case UNLT:
3496         case UNLE:
3497         case UNGT:
3498         case UNGE:
3499         case UNEQ:
3500         case LTGT:
3501           return CCFPmode;
3502
3503         case LT:
3504         case LE:
3505         case GT:
3506         case GE:
3507           return CCFPEmode;
3508
3509         default:
3510           gcc_unreachable ();
3511         }
3512     }
3513
3514   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3515       && y == const0_rtx
3516       && (code == EQ || code == NE || code == LT || code == GE)
3517       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3518           || GET_CODE (x) == NEG))
3519     return CC_NZmode;
3520
3521   /* A compare with a shifted operand.  Because of canonicalization,
3522      the comparison will have to be swapped when we emit the assembly
3523      code.  */
3524   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3525       && (REG_P (y) || GET_CODE (y) == SUBREG)
3526       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3527           || GET_CODE (x) == LSHIFTRT
3528           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3529     return CC_SWPmode;
3530
3531   /* Similarly for a negated operand, but we can only do this for
3532      equalities.  */
3533   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3534       && (REG_P (y) || GET_CODE (y) == SUBREG)
3535       && (code == EQ || code == NE)
3536       && GET_CODE (x) == NEG)
3537     return CC_Zmode;
3538
3539   /* A compare of a mode narrower than SI mode against zero can be done
3540      by extending the value in the comparison.  */
3541   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3542       && y == const0_rtx)
3543     /* Only use sign-extension if we really need it.  */
3544     return ((code == GT || code == GE || code == LE || code == LT)
3545             ? CC_SESWPmode : CC_ZESWPmode);
3546
3547   /* For everything else, return CCmode.  */
3548   return CCmode;
3549 }
3550
3551 int
3552 aarch64_get_condition_code (rtx x)
3553 {
3554   machine_mode mode = GET_MODE (XEXP (x, 0));
3555   enum rtx_code comp_code = GET_CODE (x);
3556
3557   if (GET_MODE_CLASS (mode) != MODE_CC)
3558     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3559
3560   switch (mode)
3561     {
3562     case CCFPmode:
3563     case CCFPEmode:
3564       switch (comp_code)
3565         {
3566         case GE: return AARCH64_GE;
3567         case GT: return AARCH64_GT;
3568         case LE: return AARCH64_LS;
3569         case LT: return AARCH64_MI;
3570         case NE: return AARCH64_NE;
3571         case EQ: return AARCH64_EQ;
3572         case ORDERED: return AARCH64_VC;
3573         case UNORDERED: return AARCH64_VS;
3574         case UNLT: return AARCH64_LT;
3575         case UNLE: return AARCH64_LE;
3576         case UNGT: return AARCH64_HI;
3577         case UNGE: return AARCH64_PL;
3578         default: return -1;
3579         }
3580       break;
3581
3582     case CCmode:
3583       switch (comp_code)
3584         {
3585         case NE: return AARCH64_NE;
3586         case EQ: return AARCH64_EQ;
3587         case GE: return AARCH64_GE;
3588         case GT: return AARCH64_GT;
3589         case LE: return AARCH64_LE;
3590         case LT: return AARCH64_LT;
3591         case GEU: return AARCH64_CS;
3592         case GTU: return AARCH64_HI;
3593         case LEU: return AARCH64_LS;
3594         case LTU: return AARCH64_CC;
3595         default: return -1;
3596         }
3597       break;
3598
3599     case CC_SWPmode:
3600     case CC_ZESWPmode:
3601     case CC_SESWPmode:
3602       switch (comp_code)
3603         {
3604         case NE: return AARCH64_NE;
3605         case EQ: return AARCH64_EQ;
3606         case GE: return AARCH64_LE;
3607         case GT: return AARCH64_LT;
3608         case LE: return AARCH64_GE;
3609         case LT: return AARCH64_GT;
3610         case GEU: return AARCH64_LS;
3611         case GTU: return AARCH64_CC;
3612         case LEU: return AARCH64_CS;
3613         case LTU: return AARCH64_HI;
3614         default: return -1;
3615         }
3616       break;
3617
3618     case CC_NZmode:
3619       switch (comp_code)
3620         {
3621         case NE: return AARCH64_NE;
3622         case EQ: return AARCH64_EQ;
3623         case GE: return AARCH64_PL;
3624         case LT: return AARCH64_MI;
3625         default: return -1;
3626         }
3627       break;
3628
3629     case CC_Zmode:
3630       switch (comp_code)
3631         {
3632         case NE: return AARCH64_NE;
3633         case EQ: return AARCH64_EQ;
3634         default: return -1;
3635         }
3636       break;
3637
3638     default:
3639       return -1;
3640       break;
3641     }
3642 }
3643
3644 bool
3645 aarch64_const_vec_all_same_in_range_p (rtx x,
3646                                   HOST_WIDE_INT minval,
3647                                   HOST_WIDE_INT maxval)
3648 {
3649   HOST_WIDE_INT firstval;
3650   int count, i;
3651
3652   if (GET_CODE (x) != CONST_VECTOR
3653       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3654     return false;
3655
3656   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3657   if (firstval < minval || firstval > maxval)
3658     return false;
3659
3660   count = CONST_VECTOR_NUNITS (x);
3661   for (i = 1; i < count; i++)
3662     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3663       return false;
3664
3665   return true;
3666 }
3667
3668 bool
3669 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3670 {
3671   return aarch64_const_vec_all_same_in_range_p (x, val, val);
3672 }
3673
3674 static unsigned
3675 bit_count (unsigned HOST_WIDE_INT value)
3676 {
3677   unsigned count = 0;
3678
3679   while (value)
3680     {
3681       count++;
3682       value &= value - 1;
3683     }
3684
3685   return count;
3686 }
3687
3688 void
3689 aarch64_print_operand (FILE *f, rtx x, char code)
3690 {
3691   switch (code)
3692     {
3693     /* An integer or symbol address without a preceding # sign.  */
3694     case 'c':
3695       switch (GET_CODE (x))
3696         {
3697         case CONST_INT:
3698           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3699           break;
3700
3701         case SYMBOL_REF:
3702           output_addr_const (f, x);
3703           break;
3704
3705         case CONST:
3706           if (GET_CODE (XEXP (x, 0)) == PLUS
3707               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3708             {
3709               output_addr_const (f, x);
3710               break;
3711             }
3712           /* Fall through.  */
3713
3714         default:
3715           output_operand_lossage ("Unsupported operand for code '%c'", code);
3716         }
3717       break;
3718
3719     case 'e':
3720       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3721       {
3722         int n;
3723
3724         if (!CONST_INT_P (x)
3725             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3726           {
3727             output_operand_lossage ("invalid operand for '%%%c'", code);
3728             return;
3729           }
3730
3731         switch (n)
3732           {
3733           case 3:
3734             fputc ('b', f);
3735             break;
3736           case 4:
3737             fputc ('h', f);
3738             break;
3739           case 5:
3740             fputc ('w', f);
3741             break;
3742           default:
3743             output_operand_lossage ("invalid operand for '%%%c'", code);
3744             return;
3745           }
3746       }
3747       break;
3748
3749     case 'p':
3750       {
3751         int n;
3752
3753         /* Print N such that 2^N == X.  */
3754         if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
3755           {
3756             output_operand_lossage ("invalid operand for '%%%c'", code);
3757             return;
3758           }
3759
3760         asm_fprintf (f, "%d", n);
3761       }
3762       break;
3763
3764     case 'P':
3765       /* Print the number of non-zero bits in X (a const_int).  */
3766       if (!CONST_INT_P (x))
3767         {
3768           output_operand_lossage ("invalid operand for '%%%c'", code);
3769           return;
3770         }
3771
3772       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3773       break;
3774
3775     case 'H':
3776       /* Print the higher numbered register of a pair (TImode) of regs.  */
3777       if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
3778         {
3779           output_operand_lossage ("invalid operand for '%%%c'", code);
3780           return;
3781         }
3782
3783       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3784       break;
3785
3786     case 'm':
3787       {
3788         int cond_code;
3789         /* Print a condition (eq, ne, etc).  */
3790
3791         /* CONST_TRUE_RTX means always -- that's the default.  */
3792         if (x == const_true_rtx)
3793           return;
3794
3795         if (!COMPARISON_P (x))
3796           {
3797             output_operand_lossage ("invalid operand for '%%%c'", code);
3798             return;
3799           }
3800
3801         cond_code = aarch64_get_condition_code (x);
3802         gcc_assert (cond_code >= 0);
3803         fputs (aarch64_condition_codes[cond_code], f);
3804       }
3805       break;
3806
3807     case 'M':
3808       {
3809         int cond_code;
3810         /* Print the inverse of a condition (eq <-> ne, etc).  */
3811
3812         /* CONST_TRUE_RTX means never -- that's the default.  */
3813         if (x == const_true_rtx)
3814           {
3815             fputs ("nv", f);
3816             return;
3817           }
3818
3819         if (!COMPARISON_P (x))
3820           {
3821             output_operand_lossage ("invalid operand for '%%%c'", code);
3822             return;
3823           }
3824         cond_code = aarch64_get_condition_code (x);
3825         gcc_assert (cond_code >= 0);
3826         fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3827                                        (cond_code)], f);
3828       }
3829       break;
3830
3831     case 'b':
3832     case 'h':
3833     case 's':
3834     case 'd':
3835     case 'q':
3836       /* Print a scalar FP/SIMD register name.  */
3837       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3838         {
3839           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3840           return;
3841         }
3842       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3843       break;
3844
3845     case 'S':
3846     case 'T':
3847     case 'U':
3848     case 'V':
3849       /* Print the first FP/SIMD register name in a list.  */
3850       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3851         {
3852           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3853           return;
3854         }
3855       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3856       break;
3857
3858     case 'X':
3859       /* Print bottom 16 bits of integer constant in hex.  */
3860       if (!CONST_INT_P (x))
3861         {
3862           output_operand_lossage ("invalid operand for '%%%c'", code);
3863           return;
3864         }
3865       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3866       break;
3867
3868     case 'w':
3869     case 'x':
3870       /* Print a general register name or the zero register (32-bit or
3871          64-bit).  */
3872       if (x == const0_rtx
3873           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3874         {
3875           asm_fprintf (f, "%czr", code);
3876           break;
3877         }
3878
3879       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3880         {
3881           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3882           break;
3883         }
3884
3885       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3886         {
3887           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3888           break;
3889         }
3890
3891       /* Fall through */
3892
3893     case 0:
3894       /* Print a normal operand, if it's a general register, then we
3895          assume DImode.  */
3896       if (x == NULL)
3897         {
3898           output_operand_lossage ("missing operand");
3899           return;
3900         }
3901
3902       switch (GET_CODE (x))
3903         {
3904         case REG:
3905           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3906           break;
3907
3908         case MEM:
3909           aarch64_memory_reference_mode = GET_MODE (x);
3910           output_address (XEXP (x, 0));
3911           break;
3912
3913         case LABEL_REF:
3914         case SYMBOL_REF:
3915           output_addr_const (asm_out_file, x);
3916           break;
3917
3918         case CONST_INT:
3919           asm_fprintf (f, "%wd", INTVAL (x));
3920           break;
3921
3922         case CONST_VECTOR:
3923           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3924             {
3925               gcc_assert (
3926                   aarch64_const_vec_all_same_in_range_p (x,
3927                                                          HOST_WIDE_INT_MIN,
3928                                                          HOST_WIDE_INT_MAX));
3929               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3930             }
3931           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3932             {
3933               fputc ('0', f);
3934             }
3935           else
3936             gcc_unreachable ();
3937           break;
3938
3939         case CONST_DOUBLE:
3940           /* CONST_DOUBLE can represent a double-width integer.
3941              In this case, the mode of x is VOIDmode.  */
3942           if (GET_MODE (x) == VOIDmode)
3943             ; /* Do Nothing.  */
3944           else if (aarch64_float_const_zero_rtx_p (x))
3945             {
3946               fputc ('0', f);
3947               break;
3948             }
3949           else if (aarch64_float_const_representable_p (x))
3950             {
3951 #define buf_size 20
3952               char float_buf[buf_size] = {'\0'};
3953               REAL_VALUE_TYPE r;
3954               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3955               real_to_decimal_for_mode (float_buf, &r,
3956                                         buf_size, buf_size,
3957                                         1, GET_MODE (x));
3958               asm_fprintf (asm_out_file, "%s", float_buf);
3959               break;
3960 #undef buf_size
3961             }
3962           output_operand_lossage ("invalid constant");
3963           return;
3964         default:
3965           output_operand_lossage ("invalid operand");
3966           return;
3967         }
3968       break;
3969
3970     case 'A':
3971       if (GET_CODE (x) == HIGH)
3972         x = XEXP (x, 0);
3973
3974       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3975         {
3976         case SYMBOL_SMALL_GOT:
3977           asm_fprintf (asm_out_file, ":got:");
3978           break;
3979
3980         case SYMBOL_SMALL_TLSGD:
3981           asm_fprintf (asm_out_file, ":tlsgd:");
3982           break;
3983
3984         case SYMBOL_SMALL_TLSDESC:
3985           asm_fprintf (asm_out_file, ":tlsdesc:");
3986           break;
3987
3988         case SYMBOL_SMALL_GOTTPREL:
3989           asm_fprintf (asm_out_file, ":gottprel:");
3990           break;
3991
3992         case SYMBOL_SMALL_TPREL:
3993           asm_fprintf (asm_out_file, ":tprel:");
3994           break;
3995
3996         case SYMBOL_TINY_GOT:
3997           gcc_unreachable ();
3998           break;
3999
4000         default:
4001           break;
4002         }
4003       output_addr_const (asm_out_file, x);
4004       break;
4005
4006     case 'L':
4007       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4008         {
4009         case SYMBOL_SMALL_GOT:
4010           asm_fprintf (asm_out_file, ":lo12:");
4011           break;
4012
4013         case SYMBOL_SMALL_TLSGD:
4014           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4015           break;
4016
4017         case SYMBOL_SMALL_TLSDESC:
4018           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4019           break;
4020
4021         case SYMBOL_SMALL_GOTTPREL:
4022           asm_fprintf (asm_out_file, ":gottprel_lo12:");
4023           break;
4024
4025         case SYMBOL_SMALL_TPREL:
4026           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4027           break;
4028
4029         case SYMBOL_TINY_GOT:
4030           asm_fprintf (asm_out_file, ":got:");
4031           break;
4032
4033         default:
4034           break;
4035         }
4036       output_addr_const (asm_out_file, x);
4037       break;
4038
4039     case 'G':
4040
4041       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4042         {
4043         case SYMBOL_SMALL_TPREL:
4044           asm_fprintf (asm_out_file, ":tprel_hi12:");
4045           break;
4046         default:
4047           break;
4048         }
4049       output_addr_const (asm_out_file, x);
4050       break;
4051
4052     default:
4053       output_operand_lossage ("invalid operand prefix '%%%c'", code);
4054       return;
4055     }
4056 }
4057
4058 void
4059 aarch64_print_operand_address (FILE *f, rtx x)
4060 {
4061   struct aarch64_address_info addr;
4062
4063   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4064                              MEM, true))
4065     switch (addr.type)
4066       {
4067       case ADDRESS_REG_IMM:
4068         if (addr.offset == const0_rtx)
4069           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4070         else
4071           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4072                        INTVAL (addr.offset));
4073         return;
4074
4075       case ADDRESS_REG_REG:
4076         if (addr.shift == 0)
4077           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4078                        reg_names [REGNO (addr.offset)]);
4079         else
4080           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4081                        reg_names [REGNO (addr.offset)], addr.shift);
4082         return;
4083
4084       case ADDRESS_REG_UXTW:
4085         if (addr.shift == 0)
4086           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4087                        REGNO (addr.offset) - R0_REGNUM);
4088         else
4089           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4090                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4091         return;
4092
4093       case ADDRESS_REG_SXTW:
4094         if (addr.shift == 0)
4095           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4096                        REGNO (addr.offset) - R0_REGNUM);
4097         else
4098           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4099                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4100         return;
4101
4102       case ADDRESS_REG_WB:
4103         switch (GET_CODE (x))
4104           {
4105           case PRE_INC:
4106             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4107                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4108             return;
4109           case POST_INC:
4110             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4111                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4112             return;
4113           case PRE_DEC:
4114             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4115                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4116             return;
4117           case POST_DEC:
4118             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4119                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4120             return;
4121           case PRE_MODIFY:
4122             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4123                          INTVAL (addr.offset));
4124             return;
4125           case POST_MODIFY:
4126             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4127                          INTVAL (addr.offset));
4128             return;
4129           default:
4130             break;
4131           }
4132         break;
4133
4134       case ADDRESS_LO_SUM:
4135         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4136         output_addr_const (f, addr.offset);
4137         asm_fprintf (f, "]");
4138         return;
4139
4140       case ADDRESS_SYMBOLIC:
4141         break;
4142       }
4143
4144   output_addr_const (f, x);
4145 }
4146
4147 bool
4148 aarch64_label_mentioned_p (rtx x)
4149 {
4150   const char *fmt;
4151   int i;
4152
4153   if (GET_CODE (x) == LABEL_REF)
4154     return true;
4155
4156   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4157      referencing instruction, but they are constant offsets, not
4158      symbols.  */
4159   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4160     return false;
4161
4162   fmt = GET_RTX_FORMAT (GET_CODE (x));
4163   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4164     {
4165       if (fmt[i] == 'E')
4166         {
4167           int j;
4168
4169           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4170             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4171               return 1;
4172         }
4173       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4174         return 1;
4175     }
4176
4177   return 0;
4178 }
4179
4180 /* Implement REGNO_REG_CLASS.  */
4181
4182 enum reg_class
4183 aarch64_regno_regclass (unsigned regno)
4184 {
4185   if (GP_REGNUM_P (regno))
4186     return GENERAL_REGS;
4187
4188   if (regno == SP_REGNUM)
4189     return STACK_REG;
4190
4191   if (regno == FRAME_POINTER_REGNUM
4192       || regno == ARG_POINTER_REGNUM)
4193     return POINTER_REGS;
4194
4195   if (FP_REGNUM_P (regno))
4196     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4197
4198   return NO_REGS;
4199 }
4200
4201 static rtx
4202 aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
4203 {
4204   /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4205      where mask is selected by alignment and size of the offset.
4206      We try to pick as large a range for the offset as possible to
4207      maximize the chance of a CSE.  However, for aligned addresses
4208      we limit the range to 4k so that structures with different sized
4209      elements are likely to use the same base.  */
4210
4211   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4212     {
4213       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4214       HOST_WIDE_INT base_offset;
4215
4216       /* Does it look like we'll need a load/store-pair operation?  */
4217       if (GET_MODE_SIZE (mode) > 16
4218           || mode == TImode)
4219         base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4220                        & ~((128 * GET_MODE_SIZE (mode)) - 1));
4221       /* For offsets aren't a multiple of the access size, the limit is
4222          -256...255.  */
4223       else if (offset & (GET_MODE_SIZE (mode) - 1))
4224         base_offset = (offset + 0x100) & ~0x1ff;
4225       else
4226         base_offset = offset & ~0xfff;
4227
4228       if (base_offset == 0)
4229         return x;
4230
4231       offset -= base_offset;
4232       rtx base_reg = gen_reg_rtx (Pmode);
4233       rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4234                            NULL_RTX);
4235       emit_move_insn (base_reg, val);
4236       x = plus_constant (Pmode, base_reg, offset);
4237     }
4238
4239   return x;
4240 }
4241
4242 /* Try a machine-dependent way of reloading an illegitimate address
4243    operand.  If we find one, push the reload and return the new rtx.  */
4244
4245 rtx
4246 aarch64_legitimize_reload_address (rtx *x_p,
4247                                    machine_mode mode,
4248                                    int opnum, int type,
4249                                    int ind_levels ATTRIBUTE_UNUSED)
4250 {
4251   rtx x = *x_p;
4252
4253   /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
4254   if (aarch64_vect_struct_mode_p (mode)
4255       && GET_CODE (x) == PLUS
4256       && REG_P (XEXP (x, 0))
4257       && CONST_INT_P (XEXP (x, 1)))
4258     {
4259       rtx orig_rtx = x;
4260       x = copy_rtx (x);
4261       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4262                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4263                    opnum, (enum reload_type) type);
4264       return x;
4265     }
4266
4267   /* We must recognize output that we have already generated ourselves.  */
4268   if (GET_CODE (x) == PLUS
4269       && GET_CODE (XEXP (x, 0)) == PLUS
4270       && REG_P (XEXP (XEXP (x, 0), 0))
4271       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4272       && CONST_INT_P (XEXP (x, 1)))
4273     {
4274       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4275                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4276                    opnum, (enum reload_type) type);
4277       return x;
4278     }
4279
4280   /* We wish to handle large displacements off a base register by splitting
4281      the addend across an add and the mem insn.  This can cut the number of
4282      extra insns needed from 3 to 1.  It is only useful for load/store of a
4283      single register with 12 bit offset field.  */
4284   if (GET_CODE (x) == PLUS
4285       && REG_P (XEXP (x, 0))
4286       && CONST_INT_P (XEXP (x, 1))
4287       && HARD_REGISTER_P (XEXP (x, 0))
4288       && mode != TImode
4289       && mode != TFmode
4290       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4291     {
4292       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4293       HOST_WIDE_INT low = val & 0xfff;
4294       HOST_WIDE_INT high = val - low;
4295       HOST_WIDE_INT offs;
4296       rtx cst;
4297       machine_mode xmode = GET_MODE (x);
4298
4299       /* In ILP32, xmode can be either DImode or SImode.  */
4300       gcc_assert (xmode == DImode || xmode == SImode);
4301
4302       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4303          BLKmode alignment.  */
4304       if (GET_MODE_SIZE (mode) == 0)
4305         return NULL_RTX;
4306
4307       offs = low % GET_MODE_SIZE (mode);
4308
4309       /* Align misaligned offset by adjusting high part to compensate.  */
4310       if (offs != 0)
4311         {
4312           if (aarch64_uimm12_shift (high + offs))
4313             {
4314               /* Align down.  */
4315               low = low - offs;
4316               high = high + offs;
4317             }
4318           else
4319             {
4320               /* Align up.  */
4321               offs = GET_MODE_SIZE (mode) - offs;
4322               low = low + offs;
4323               high = high + (low & 0x1000) - offs;
4324               low &= 0xfff;
4325             }
4326         }
4327
4328       /* Check for overflow.  */
4329       if (high + low != val)
4330         return NULL_RTX;
4331
4332       cst = GEN_INT (high);
4333       if (!aarch64_uimm12_shift (high))
4334         cst = force_const_mem (xmode, cst);
4335
4336       /* Reload high part into base reg, leaving the low part
4337          in the mem instruction.
4338          Note that replacing this gen_rtx_PLUS with plus_constant is
4339          wrong in this case because we rely on the
4340          (plus (plus reg c1) c2) structure being preserved so that
4341          XEXP (*p, 0) in push_reload below uses the correct term.  */
4342       x = gen_rtx_PLUS (xmode,
4343                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4344                         GEN_INT (low));
4345
4346       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4347                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4348                    opnum, (enum reload_type) type);
4349       return x;
4350     }
4351
4352   return NULL_RTX;
4353 }
4354
4355
4356 static reg_class_t
4357 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4358                           reg_class_t rclass,
4359                           machine_mode mode,
4360                           secondary_reload_info *sri)
4361 {
4362   /* Without the TARGET_SIMD instructions we cannot move a Q register
4363      to a Q register directly.  We need a scratch.  */
4364   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4365       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4366       && reg_class_subset_p (rclass, FP_REGS))
4367     {
4368       if (mode == TFmode)
4369         sri->icode = CODE_FOR_aarch64_reload_movtf;
4370       else if (mode == TImode)
4371         sri->icode = CODE_FOR_aarch64_reload_movti;
4372       return NO_REGS;
4373     }
4374
4375   /* A TFmode or TImode memory access should be handled via an FP_REGS
4376      because AArch64 has richer addressing modes for LDR/STR instructions
4377      than LDP/STP instructions.  */
4378   if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4379       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4380     return FP_REGS;
4381
4382   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4383       return GENERAL_REGS;
4384
4385   return NO_REGS;
4386 }
4387
4388 static bool
4389 aarch64_can_eliminate (const int from, const int to)
4390 {
4391   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4392      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4393
4394   if (frame_pointer_needed)
4395     {
4396       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4397         return true;
4398       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4399         return false;
4400       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4401           && !cfun->calls_alloca)
4402         return true;
4403       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4404         return true;
4405
4406       return false;
4407     }
4408
4409   return true;
4410 }
4411
4412 HOST_WIDE_INT
4413 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4414 {
4415   aarch64_layout_frame ();
4416
4417   if (to == HARD_FRAME_POINTER_REGNUM)
4418     {
4419       if (from == ARG_POINTER_REGNUM)
4420         return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4421
4422       if (from == FRAME_POINTER_REGNUM)
4423         return (cfun->machine->frame.hard_fp_offset
4424                 - cfun->machine->frame.saved_varargs_size);
4425     }
4426
4427   if (to == STACK_POINTER_REGNUM)
4428     {
4429       if (from == FRAME_POINTER_REGNUM)
4430           return (cfun->machine->frame.frame_size
4431                   - cfun->machine->frame.saved_varargs_size);
4432     }
4433
4434   return cfun->machine->frame.frame_size;
4435 }
4436
4437 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4438    previous frame.  */
4439
4440 rtx
4441 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4442 {
4443   if (count != 0)
4444     return const0_rtx;
4445   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4446 }
4447
4448
4449 static void
4450 aarch64_asm_trampoline_template (FILE *f)
4451 {
4452   if (TARGET_ILP32)
4453     {
4454       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4455       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4456     }
4457   else
4458     {
4459       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4460       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4461     }
4462   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4463   assemble_aligned_integer (4, const0_rtx);
4464   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4465   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4466 }
4467
4468 static void
4469 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4470 {
4471   rtx fnaddr, mem, a_tramp;
4472   const int tramp_code_sz = 16;
4473
4474   /* Don't need to copy the trailing D-words, we fill those in below.  */
4475   emit_block_move (m_tramp, assemble_trampoline_template (),
4476                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4477   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4478   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4479   if (GET_MODE (fnaddr) != ptr_mode)
4480     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4481   emit_move_insn (mem, fnaddr);
4482
4483   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4484   emit_move_insn (mem, chain_value);
4485
4486   /* XXX We should really define a "clear_cache" pattern and use
4487      gen_clear_cache().  */
4488   a_tramp = XEXP (m_tramp, 0);
4489   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4490                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4491                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4492                      ptr_mode);
4493 }
4494
4495 static unsigned char
4496 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4497 {
4498   switch (regclass)
4499     {
4500     case CALLER_SAVE_REGS:
4501     case POINTER_REGS:
4502     case GENERAL_REGS:
4503     case ALL_REGS:
4504     case FP_REGS:
4505     case FP_LO_REGS:
4506       return
4507         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4508                                        (GET_MODE_SIZE (mode) + 7) / 8;
4509     case STACK_REG:
4510       return 1;
4511
4512     case NO_REGS:
4513       return 0;
4514
4515     default:
4516       break;
4517     }
4518   gcc_unreachable ();
4519 }
4520
4521 static reg_class_t
4522 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4523 {
4524   if (regclass == POINTER_REGS)
4525     return GENERAL_REGS;
4526
4527   if (regclass == STACK_REG)
4528     {
4529       if (REG_P(x)
4530           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4531           return regclass;
4532
4533       return NO_REGS;
4534     }
4535
4536   /* If it's an integer immediate that MOVI can't handle, then
4537      FP_REGS is not an option, so we return NO_REGS instead.  */
4538   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4539       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4540     return NO_REGS;
4541
4542   /* Register eliminiation can result in a request for
4543      SP+constant->FP_REGS.  We cannot support such operations which
4544      use SP as source and an FP_REG as destination, so reject out
4545      right now.  */
4546   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4547     {
4548       rtx lhs = XEXP (x, 0);
4549
4550       /* Look through a possible SUBREG introduced by ILP32.  */
4551       if (GET_CODE (lhs) == SUBREG)
4552         lhs = SUBREG_REG (lhs);
4553
4554       gcc_assert (REG_P (lhs));
4555       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4556                                       POINTER_REGS));
4557       return NO_REGS;
4558     }
4559
4560   return regclass;
4561 }
4562
4563 void
4564 aarch64_asm_output_labelref (FILE* f, const char *name)
4565 {
4566   asm_fprintf (f, "%U%s", name);
4567 }
4568
4569 static void
4570 aarch64_elf_asm_constructor (rtx symbol, int priority)
4571 {
4572   if (priority == DEFAULT_INIT_PRIORITY)
4573     default_ctor_section_asm_out_constructor (symbol, priority);
4574   else
4575     {
4576       section *s;
4577       char buf[18];
4578       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4579       s = get_section (buf, SECTION_WRITE, NULL);
4580       switch_to_section (s);
4581       assemble_align (POINTER_SIZE);
4582       assemble_aligned_integer (POINTER_BYTES, symbol);
4583     }
4584 }
4585
4586 static void
4587 aarch64_elf_asm_destructor (rtx symbol, int priority)
4588 {
4589   if (priority == DEFAULT_INIT_PRIORITY)
4590     default_dtor_section_asm_out_destructor (symbol, priority);
4591   else
4592     {
4593       section *s;
4594       char buf[18];
4595       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4596       s = get_section (buf, SECTION_WRITE, NULL);
4597       switch_to_section (s);
4598       assemble_align (POINTER_SIZE);
4599       assemble_aligned_integer (POINTER_BYTES, symbol);
4600     }
4601 }
4602
4603 const char*
4604 aarch64_output_casesi (rtx *operands)
4605 {
4606   char buf[100];
4607   char label[100];
4608   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
4609   int index;
4610   static const char *const patterns[4][2] =
4611   {
4612     {
4613       "ldrb\t%w3, [%0,%w1,uxtw]",
4614       "add\t%3, %4, %w3, sxtb #2"
4615     },
4616     {
4617       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4618       "add\t%3, %4, %w3, sxth #2"
4619     },
4620     {
4621       "ldr\t%w3, [%0,%w1,uxtw #2]",
4622       "add\t%3, %4, %w3, sxtw #2"
4623     },
4624     /* We assume that DImode is only generated when not optimizing and
4625        that we don't really need 64-bit address offsets.  That would
4626        imply an object file with 8GB of code in a single function!  */
4627     {
4628       "ldr\t%w3, [%0,%w1,uxtw #2]",
4629       "add\t%3, %4, %w3, sxtw #2"
4630     }
4631   };
4632
4633   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4634
4635   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4636
4637   gcc_assert (index >= 0 && index <= 3);
4638
4639   /* Need to implement table size reduction, by chaning the code below.  */
4640   output_asm_insn (patterns[index][0], operands);
4641   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4642   snprintf (buf, sizeof (buf),
4643             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4644   output_asm_insn (buf, operands);
4645   output_asm_insn (patterns[index][1], operands);
4646   output_asm_insn ("br\t%3", operands);
4647   assemble_label (asm_out_file, label);
4648   return "";
4649 }
4650
4651
4652 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4653    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4654    operator.  */
4655
4656 int
4657 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4658 {
4659   if (shift >= 0 && shift <= 3)
4660     {
4661       int size;
4662       for (size = 8; size <= 32; size *= 2)
4663         {
4664           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4665           if (mask == bits << shift)
4666             return size;
4667         }
4668     }
4669   return 0;
4670 }
4671
4672 static bool
4673 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
4674                                    const_rtx x ATTRIBUTE_UNUSED)
4675 {
4676   /* We can't use blocks for constants when we're using a per-function
4677      constant pool.  */
4678   return false;
4679 }
4680
4681 static section *
4682 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
4683                             rtx x ATTRIBUTE_UNUSED,
4684                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4685 {
4686   /* Force all constant pool entries into the current function section.  */
4687   return function_section (current_function_decl);
4688 }
4689
4690
4691 /* Costs.  */
4692
4693 /* Helper function for rtx cost calculation.  Strip a shift expression
4694    from X.  Returns the inner operand if successful, or the original
4695    expression on failure.  */
4696 static rtx
4697 aarch64_strip_shift (rtx x)
4698 {
4699   rtx op = x;
4700
4701   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4702      we can convert both to ROR during final output.  */
4703   if ((GET_CODE (op) == ASHIFT
4704        || GET_CODE (op) == ASHIFTRT
4705        || GET_CODE (op) == LSHIFTRT
4706        || GET_CODE (op) == ROTATERT
4707        || GET_CODE (op) == ROTATE)
4708       && CONST_INT_P (XEXP (op, 1)))
4709     return XEXP (op, 0);
4710
4711   if (GET_CODE (op) == MULT
4712       && CONST_INT_P (XEXP (op, 1))
4713       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4714     return XEXP (op, 0);
4715
4716   return x;
4717 }
4718
4719 /* Helper function for rtx cost calculation.  Strip an extend
4720    expression from X.  Returns the inner operand if successful, or the
4721    original expression on failure.  We deal with a number of possible
4722    canonicalization variations here.  */
4723 static rtx
4724 aarch64_strip_extend (rtx x)
4725 {
4726   rtx op = x;
4727
4728   /* Zero and sign extraction of a widened value.  */
4729   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4730       && XEXP (op, 2) == const0_rtx
4731       && GET_CODE (XEXP (op, 0)) == MULT
4732       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4733                                          XEXP (op, 1)))
4734     return XEXP (XEXP (op, 0), 0);
4735
4736   /* It can also be represented (for zero-extend) as an AND with an
4737      immediate.  */
4738   if (GET_CODE (op) == AND
4739       && GET_CODE (XEXP (op, 0)) == MULT
4740       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4741       && CONST_INT_P (XEXP (op, 1))
4742       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4743                            INTVAL (XEXP (op, 1))) != 0)
4744     return XEXP (XEXP (op, 0), 0);
4745
4746   /* Now handle extended register, as this may also have an optional
4747      left shift by 1..4.  */
4748   if (GET_CODE (op) == ASHIFT
4749       && CONST_INT_P (XEXP (op, 1))
4750       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4751     op = XEXP (op, 0);
4752
4753   if (GET_CODE (op) == ZERO_EXTEND
4754       || GET_CODE (op) == SIGN_EXTEND)
4755     op = XEXP (op, 0);
4756
4757   if (op != x)
4758     return op;
4759
4760   return x;
4761 }
4762
4763 /* Helper function for rtx cost calculation.  Calculate the cost of
4764    a MULT, which may be part of a multiply-accumulate rtx.  Return
4765    the calculated cost of the expression, recursing manually in to
4766    operands where needed.  */
4767
4768 static int
4769 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4770 {
4771   rtx op0, op1;
4772   const struct cpu_cost_table *extra_cost
4773     = aarch64_tune_params->insn_extra_cost;
4774   int cost = 0;
4775   bool maybe_fma = (outer == PLUS || outer == MINUS);
4776   machine_mode mode = GET_MODE (x);
4777
4778   gcc_checking_assert (code == MULT);
4779
4780   op0 = XEXP (x, 0);
4781   op1 = XEXP (x, 1);
4782
4783   if (VECTOR_MODE_P (mode))
4784     mode = GET_MODE_INNER (mode);
4785
4786   /* Integer multiply/fma.  */
4787   if (GET_MODE_CLASS (mode) == MODE_INT)
4788     {
4789       /* The multiply will be canonicalized as a shift, cost it as such.  */
4790       if (CONST_INT_P (op1)
4791           && exact_log2 (INTVAL (op1)) > 0)
4792         {
4793           if (speed)
4794             {
4795               if (maybe_fma)
4796                 /* ADD (shifted register).  */
4797                 cost += extra_cost->alu.arith_shift;
4798               else
4799                 /* LSL (immediate).  */
4800                 cost += extra_cost->alu.shift;
4801             }
4802
4803           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4804
4805           return cost;
4806         }
4807
4808       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4809       if ((GET_CODE (op0) == ZERO_EXTEND
4810            && GET_CODE (op1) == ZERO_EXTEND)
4811           || (GET_CODE (op0) == SIGN_EXTEND
4812               && GET_CODE (op1) == SIGN_EXTEND))
4813         {
4814           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4815                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4816
4817           if (speed)
4818             {
4819               if (maybe_fma)
4820                 /* MADD/SMADDL/UMADDL.  */
4821                 cost += extra_cost->mult[0].extend_add;
4822               else
4823                 /* MUL/SMULL/UMULL.  */
4824                 cost += extra_cost->mult[0].extend;
4825             }
4826
4827           return cost;
4828         }
4829
4830       /* This is either an integer multiply or an FMA.  In both cases
4831          we want to recurse and cost the operands.  */
4832       cost += rtx_cost (op0, MULT, 0, speed)
4833               + rtx_cost (op1, MULT, 1, speed);
4834
4835       if (speed)
4836         {
4837           if (maybe_fma)
4838             /* MADD.  */
4839             cost += extra_cost->mult[mode == DImode].add;
4840           else
4841             /* MUL.  */
4842             cost += extra_cost->mult[mode == DImode].simple;
4843         }
4844
4845       return cost;
4846     }
4847   else
4848     {
4849       if (speed)
4850         {
4851           /* Floating-point FMA/FMUL can also support negations of the
4852              operands.  */
4853           if (GET_CODE (op0) == NEG)
4854             op0 = XEXP (op0, 0);
4855           if (GET_CODE (op1) == NEG)
4856             op1 = XEXP (op1, 0);
4857
4858           if (maybe_fma)
4859             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4860             cost += extra_cost->fp[mode == DFmode].fma;
4861           else
4862             /* FMUL/FNMUL.  */
4863             cost += extra_cost->fp[mode == DFmode].mult;
4864         }
4865
4866       cost += rtx_cost (op0, MULT, 0, speed)
4867               + rtx_cost (op1, MULT, 1, speed);
4868       return cost;
4869     }
4870 }
4871
4872 static int
4873 aarch64_address_cost (rtx x,
4874                       machine_mode mode,
4875                       addr_space_t as ATTRIBUTE_UNUSED,
4876                       bool speed)
4877 {
4878   enum rtx_code c = GET_CODE (x);
4879   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4880   struct aarch64_address_info info;
4881   int cost = 0;
4882   info.shift = 0;
4883
4884   if (!aarch64_classify_address (&info, x, mode, c, false))
4885     {
4886       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4887         {
4888           /* This is a CONST or SYMBOL ref which will be split
4889              in a different way depending on the code model in use.
4890              Cost it through the generic infrastructure.  */
4891           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4892           /* Divide through by the cost of one instruction to
4893              bring it to the same units as the address costs.  */
4894           cost_symbol_ref /= COSTS_N_INSNS (1);
4895           /* The cost is then the cost of preparing the address,
4896              followed by an immediate (possibly 0) offset.  */
4897           return cost_symbol_ref + addr_cost->imm_offset;
4898         }
4899       else
4900         {
4901           /* This is most likely a jump table from a case
4902              statement.  */
4903           return addr_cost->register_offset;
4904         }
4905     }
4906
4907   switch (info.type)
4908     {
4909       case ADDRESS_LO_SUM:
4910       case ADDRESS_SYMBOLIC:
4911       case ADDRESS_REG_IMM:
4912         cost += addr_cost->imm_offset;
4913         break;
4914
4915       case ADDRESS_REG_WB:
4916         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4917           cost += addr_cost->pre_modify;
4918         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4919           cost += addr_cost->post_modify;
4920         else
4921           gcc_unreachable ();
4922
4923         break;
4924
4925       case ADDRESS_REG_REG:
4926         cost += addr_cost->register_offset;
4927         break;
4928
4929       case ADDRESS_REG_UXTW:
4930       case ADDRESS_REG_SXTW:
4931         cost += addr_cost->register_extend;
4932         break;
4933
4934       default:
4935         gcc_unreachable ();
4936     }
4937
4938
4939   if (info.shift > 0)
4940     {
4941       /* For the sake of calculating the cost of the shifted register
4942          component, we can treat same sized modes in the same way.  */
4943       switch (GET_MODE_BITSIZE (mode))
4944         {
4945           case 16:
4946             cost += addr_cost->addr_scale_costs.hi;
4947             break;
4948
4949           case 32:
4950             cost += addr_cost->addr_scale_costs.si;
4951             break;
4952
4953           case 64:
4954             cost += addr_cost->addr_scale_costs.di;
4955             break;
4956
4957           /* We can't tell, or this is a 128-bit vector.  */
4958           default:
4959             cost += addr_cost->addr_scale_costs.ti;
4960             break;
4961         }
4962     }
4963
4964   return cost;
4965 }
4966
4967 /* Return true if the RTX X in mode MODE is a zero or sign extract
4968    usable in an ADD or SUB (extended register) instruction.  */
4969 static bool
4970 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
4971 {
4972   /* Catch add with a sign extract.
4973      This is add_<optab><mode>_multp2.  */
4974   if (GET_CODE (x) == SIGN_EXTRACT
4975       || GET_CODE (x) == ZERO_EXTRACT)
4976     {
4977       rtx op0 = XEXP (x, 0);
4978       rtx op1 = XEXP (x, 1);
4979       rtx op2 = XEXP (x, 2);
4980
4981       if (GET_CODE (op0) == MULT
4982           && CONST_INT_P (op1)
4983           && op2 == const0_rtx
4984           && CONST_INT_P (XEXP (op0, 1))
4985           && aarch64_is_extend_from_extract (mode,
4986                                              XEXP (op0, 1),
4987                                              op1))
4988         {
4989           return true;
4990         }
4991     }
4992
4993   return false;
4994 }
4995
4996 static bool
4997 aarch64_frint_unspec_p (unsigned int u)
4998 {
4999   switch (u)
5000     {
5001       case UNSPEC_FRINTZ:
5002       case UNSPEC_FRINTP:
5003       case UNSPEC_FRINTM:
5004       case UNSPEC_FRINTA:
5005       case UNSPEC_FRINTN:
5006       case UNSPEC_FRINTX:
5007       case UNSPEC_FRINTI:
5008         return true;
5009
5010       default:
5011         return false;
5012     }
5013 }
5014
5015 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5016    storing it in *COST.  Result is true if the total cost of the operation
5017    has now been calculated.  */
5018 static bool
5019 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5020 {
5021   rtx inner;
5022   rtx comparator;
5023   enum rtx_code cmpcode;
5024
5025   if (COMPARISON_P (op0))
5026     {
5027       inner = XEXP (op0, 0);
5028       comparator = XEXP (op0, 1);
5029       cmpcode = GET_CODE (op0);
5030     }
5031   else
5032     {
5033       inner = op0;
5034       comparator = const0_rtx;
5035       cmpcode = NE;
5036     }
5037
5038   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5039     {
5040       /* Conditional branch.  */
5041       if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5042         return true;
5043       else
5044         {
5045           if (cmpcode == NE || cmpcode == EQ)
5046             {
5047               if (comparator == const0_rtx)
5048                 {
5049                   /* TBZ/TBNZ/CBZ/CBNZ.  */
5050                   if (GET_CODE (inner) == ZERO_EXTRACT)
5051                     /* TBZ/TBNZ.  */
5052                     *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5053                                        0, speed);
5054                 else
5055                   /* CBZ/CBNZ.  */
5056                   *cost += rtx_cost (inner, cmpcode, 0, speed);
5057
5058                 return true;
5059               }
5060             }
5061           else if (cmpcode == LT || cmpcode == GE)
5062             {
5063               /* TBZ/TBNZ.  */
5064               if (comparator == const0_rtx)
5065                 return true;
5066             }
5067         }
5068     }
5069   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5070     {
5071       /* It's a conditional operation based on the status flags,
5072          so it must be some flavor of CSEL.  */
5073
5074       /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
5075       if (GET_CODE (op1) == NEG
5076           || GET_CODE (op1) == NOT
5077           || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5078         op1 = XEXP (op1, 0);
5079
5080       *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5081       *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5082       return true;
5083     }
5084
5085   /* We don't know what this is, cost all operands.  */
5086   return false;
5087 }
5088
5089 /* Calculate the cost of calculating X, storing it in *COST.  Result
5090    is true if the total cost of the operation has now been calculated.  */
5091 static bool
5092 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5093                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5094 {
5095   rtx op0, op1, op2;
5096   const struct cpu_cost_table *extra_cost
5097     = aarch64_tune_params->insn_extra_cost;
5098   machine_mode mode = GET_MODE (x);
5099
5100   /* By default, assume that everything has equivalent cost to the
5101      cheapest instruction.  Any additional costs are applied as a delta
5102      above this default.  */
5103   *cost = COSTS_N_INSNS (1);
5104
5105   /* TODO: The cost infrastructure currently does not handle
5106      vector operations.  Assume that all vector operations
5107      are equally expensive.  */
5108   if (VECTOR_MODE_P (mode))
5109     {
5110       if (speed)
5111         *cost += extra_cost->vect.alu;
5112       return true;
5113     }
5114
5115   switch (code)
5116     {
5117     case SET:
5118       /* The cost depends entirely on the operands to SET.  */
5119       *cost = 0;
5120       op0 = SET_DEST (x);
5121       op1 = SET_SRC (x);
5122
5123       switch (GET_CODE (op0))
5124         {
5125         case MEM:
5126           if (speed)
5127             {
5128               rtx address = XEXP (op0, 0);
5129               if (GET_MODE_CLASS (mode) == MODE_INT)
5130                 *cost += extra_cost->ldst.store;
5131               else if (mode == SFmode)
5132                 *cost += extra_cost->ldst.storef;
5133               else if (mode == DFmode)
5134                 *cost += extra_cost->ldst.stored;
5135
5136               *cost +=
5137                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5138                                                      0, speed));
5139             }
5140
5141           *cost += rtx_cost (op1, SET, 1, speed);
5142           return true;
5143
5144         case SUBREG:
5145           if (! REG_P (SUBREG_REG (op0)))
5146             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5147
5148           /* Fall through.  */
5149         case REG:
5150           /* const0_rtx is in general free, but we will use an
5151              instruction to set a register to 0.  */
5152           if (REG_P (op1) || op1 == const0_rtx)
5153             {
5154               /* The cost is 1 per register copied.  */
5155               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5156                               / UNITS_PER_WORD;
5157               *cost = COSTS_N_INSNS (n_minus_1 + 1);
5158             }
5159           else
5160             /* Cost is just the cost of the RHS of the set.  */
5161             *cost += rtx_cost (op1, SET, 1, speed);
5162           return true;
5163
5164         case ZERO_EXTRACT:
5165         case SIGN_EXTRACT:
5166           /* Bit-field insertion.  Strip any redundant widening of
5167              the RHS to meet the width of the target.  */
5168           if (GET_CODE (op1) == SUBREG)
5169             op1 = SUBREG_REG (op1);
5170           if ((GET_CODE (op1) == ZERO_EXTEND
5171                || GET_CODE (op1) == SIGN_EXTEND)
5172               && CONST_INT_P (XEXP (op0, 1))
5173               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5174                   >= INTVAL (XEXP (op0, 1))))
5175             op1 = XEXP (op1, 0);
5176
5177           if (CONST_INT_P (op1))
5178             {
5179               /* MOV immediate is assumed to always be cheap.  */
5180               *cost = COSTS_N_INSNS (1);
5181             }
5182           else
5183             {
5184               /* BFM.  */
5185               if (speed)
5186                 *cost += extra_cost->alu.bfi;
5187               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5188             }
5189
5190           return true;
5191
5192         default:
5193           /* We can't make sense of this, assume default cost.  */
5194           *cost = COSTS_N_INSNS (1);
5195           return false;
5196         }
5197       return false;
5198
5199     case CONST_INT:
5200       /* If an instruction can incorporate a constant within the
5201          instruction, the instruction's expression avoids calling
5202          rtx_cost() on the constant.  If rtx_cost() is called on a
5203          constant, then it is usually because the constant must be
5204          moved into a register by one or more instructions.
5205
5206          The exception is constant 0, which can be expressed
5207          as XZR/WZR and is therefore free.  The exception to this is
5208          if we have (set (reg) (const0_rtx)) in which case we must cost
5209          the move.  However, we can catch that when we cost the SET, so
5210          we don't need to consider that here.  */
5211       if (x == const0_rtx)
5212         *cost = 0;
5213       else
5214         {
5215           /* To an approximation, building any other constant is
5216              proportionally expensive to the number of instructions
5217              required to build that constant.  This is true whether we
5218              are compiling for SPEED or otherwise.  */
5219           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5220                                                          INTVAL (x),
5221                                                          false));
5222         }
5223       return true;
5224
5225     case CONST_DOUBLE:
5226       if (speed)
5227         {
5228           /* mov[df,sf]_aarch64.  */
5229           if (aarch64_float_const_representable_p (x))
5230             /* FMOV (scalar immediate).  */
5231             *cost += extra_cost->fp[mode == DFmode].fpconst;
5232           else if (!aarch64_float_const_zero_rtx_p (x))
5233             {
5234               /* This will be a load from memory.  */
5235               if (mode == DFmode)
5236                 *cost += extra_cost->ldst.loadd;
5237               else
5238                 *cost += extra_cost->ldst.loadf;
5239             }
5240           else
5241             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5242                or MOV v0.s[0], wzr - neither of which are modeled by the
5243                cost tables.  Just use the default cost.  */
5244             {
5245             }
5246         }
5247
5248       return true;
5249
5250     case MEM:
5251       if (speed)
5252         {
5253           /* For loads we want the base cost of a load, plus an
5254              approximation for the additional cost of the addressing
5255              mode.  */
5256           rtx address = XEXP (x, 0);
5257           if (GET_MODE_CLASS (mode) == MODE_INT)
5258             *cost += extra_cost->ldst.load;
5259           else if (mode == SFmode)
5260             *cost += extra_cost->ldst.loadf;
5261           else if (mode == DFmode)
5262             *cost += extra_cost->ldst.loadd;
5263
5264           *cost +=
5265                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5266                                                      0, speed));
5267         }
5268
5269       return true;
5270
5271     case NEG:
5272       op0 = XEXP (x, 0);
5273
5274       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5275        {
5276           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5277               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5278             {
5279               /* CSETM.  */
5280               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5281               return true;
5282             }
5283
5284           /* Cost this as SUB wzr, X.  */
5285           op0 = CONST0_RTX (GET_MODE (x));
5286           op1 = XEXP (x, 0);
5287           goto cost_minus;
5288         }
5289
5290       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5291         {
5292           /* Support (neg(fma...)) as a single instruction only if
5293              sign of zeros is unimportant.  This matches the decision
5294              making in aarch64.md.  */
5295           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5296             {
5297               /* FNMADD.  */
5298               *cost = rtx_cost (op0, NEG, 0, speed);
5299               return true;
5300             }
5301           if (speed)
5302             /* FNEG.  */
5303             *cost += extra_cost->fp[mode == DFmode].neg;
5304           return false;
5305         }
5306
5307       return false;
5308
5309     case CLRSB:
5310     case CLZ:
5311       if (speed)
5312         *cost += extra_cost->alu.clz;
5313
5314       return false;
5315
5316     case COMPARE:
5317       op0 = XEXP (x, 0);
5318       op1 = XEXP (x, 1);
5319
5320       if (op1 == const0_rtx
5321           && GET_CODE (op0) == AND)
5322         {
5323           x = op0;
5324           goto cost_logic;
5325         }
5326
5327       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5328         {
5329           /* TODO: A write to the CC flags possibly costs extra, this
5330              needs encoding in the cost tables.  */
5331
5332           /* CC_ZESWPmode supports zero extend for free.  */
5333           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5334             op0 = XEXP (op0, 0);
5335
5336           /* ANDS.  */
5337           if (GET_CODE (op0) == AND)
5338             {
5339               x = op0;
5340               goto cost_logic;
5341             }
5342
5343           if (GET_CODE (op0) == PLUS)
5344             {
5345               /* ADDS (and CMN alias).  */
5346               x = op0;
5347               goto cost_plus;
5348             }
5349
5350           if (GET_CODE (op0) == MINUS)
5351             {
5352               /* SUBS.  */
5353               x = op0;
5354               goto cost_minus;
5355             }
5356
5357           if (GET_CODE (op1) == NEG)
5358             {
5359               /* CMN.  */
5360               if (speed)
5361                 *cost += extra_cost->alu.arith;
5362
5363               *cost += rtx_cost (op0, COMPARE, 0, speed);
5364               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5365               return true;
5366             }
5367
5368           /* CMP.
5369
5370              Compare can freely swap the order of operands, and
5371              canonicalization puts the more complex operation first.
5372              But the integer MINUS logic expects the shift/extend
5373              operation in op1.  */
5374           if (! (REG_P (op0)
5375                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5376           {
5377             op0 = XEXP (x, 1);
5378             op1 = XEXP (x, 0);
5379           }
5380           goto cost_minus;
5381         }
5382
5383       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5384         {
5385           /* FCMP.  */
5386           if (speed)
5387             *cost += extra_cost->fp[mode == DFmode].compare;
5388
5389           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5390             {
5391               /* FCMP supports constant 0.0 for no extra cost. */
5392               return true;
5393             }
5394           return false;
5395         }
5396
5397       return false;
5398
5399     case MINUS:
5400       {
5401         op0 = XEXP (x, 0);
5402         op1 = XEXP (x, 1);
5403
5404 cost_minus:
5405         /* Detect valid immediates.  */
5406         if ((GET_MODE_CLASS (mode) == MODE_INT
5407              || (GET_MODE_CLASS (mode) == MODE_CC
5408                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5409             && CONST_INT_P (op1)
5410             && aarch64_uimm12_shift (INTVAL (op1)))
5411           {
5412             *cost += rtx_cost (op0, MINUS, 0, speed);
5413
5414             if (speed)
5415               /* SUB(S) (immediate).  */
5416               *cost += extra_cost->alu.arith;
5417             return true;
5418
5419           }
5420
5421         /* Look for SUB (extended register).  */
5422         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5423           {
5424             if (speed)
5425               *cost += extra_cost->alu.arith_shift;
5426
5427             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5428                                (enum rtx_code) GET_CODE (op1),
5429                                0, speed);
5430             return true;
5431           }
5432
5433         rtx new_op1 = aarch64_strip_extend (op1);
5434
5435         /* Cost this as an FMA-alike operation.  */
5436         if ((GET_CODE (new_op1) == MULT
5437              || GET_CODE (new_op1) == ASHIFT)
5438             && code != COMPARE)
5439           {
5440             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5441                                             (enum rtx_code) code,
5442                                             speed);
5443             *cost += rtx_cost (op0, MINUS, 0, speed);
5444             return true;
5445           }
5446
5447         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5448
5449         if (speed)
5450           {
5451             if (GET_MODE_CLASS (mode) == MODE_INT)
5452               /* SUB(S).  */
5453               *cost += extra_cost->alu.arith;
5454             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5455               /* FSUB.  */
5456               *cost += extra_cost->fp[mode == DFmode].addsub;
5457           }
5458         return true;
5459       }
5460
5461     case PLUS:
5462       {
5463         rtx new_op0;
5464
5465         op0 = XEXP (x, 0);
5466         op1 = XEXP (x, 1);
5467
5468 cost_plus:
5469         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5470             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5471           {
5472             /* CSINC.  */
5473             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5474             *cost += rtx_cost (op1, PLUS, 1, speed);
5475             return true;
5476           }
5477
5478         if (GET_MODE_CLASS (mode) == MODE_INT
5479             && CONST_INT_P (op1)
5480             && aarch64_uimm12_shift (INTVAL (op1)))
5481           {
5482             *cost += rtx_cost (op0, PLUS, 0, speed);
5483
5484             if (speed)
5485               /* ADD (immediate).  */
5486               *cost += extra_cost->alu.arith;
5487             return true;
5488           }
5489
5490         /* Look for ADD (extended register).  */
5491         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5492           {
5493             if (speed)
5494               *cost += extra_cost->alu.arith_shift;
5495
5496             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5497                                (enum rtx_code) GET_CODE (op0),
5498                                0, speed);
5499             return true;
5500           }
5501
5502         /* Strip any extend, leave shifts behind as we will
5503            cost them through mult_cost.  */
5504         new_op0 = aarch64_strip_extend (op0);
5505
5506         if (GET_CODE (new_op0) == MULT
5507             || GET_CODE (new_op0) == ASHIFT)
5508           {
5509             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5510                                             speed);
5511             *cost += rtx_cost (op1, PLUS, 1, speed);
5512             return true;
5513           }
5514
5515         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5516                   + rtx_cost (op1, PLUS, 1, speed));
5517
5518         if (speed)
5519           {
5520             if (GET_MODE_CLASS (mode) == MODE_INT)
5521               /* ADD.  */
5522               *cost += extra_cost->alu.arith;
5523             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5524               /* FADD.  */
5525               *cost += extra_cost->fp[mode == DFmode].addsub;
5526           }
5527         return true;
5528       }
5529
5530     case BSWAP:
5531       *cost = COSTS_N_INSNS (1);
5532
5533       if (speed)
5534         *cost += extra_cost->alu.rev;
5535
5536       return false;
5537
5538     case IOR:
5539       if (aarch_rev16_p (x))
5540         {
5541           *cost = COSTS_N_INSNS (1);
5542
5543           if (speed)
5544             *cost += extra_cost->alu.rev;
5545
5546           return true;
5547         }
5548     /* Fall through.  */
5549     case XOR:
5550     case AND:
5551     cost_logic:
5552       op0 = XEXP (x, 0);
5553       op1 = XEXP (x, 1);
5554
5555       if (code == AND
5556           && GET_CODE (op0) == MULT
5557           && CONST_INT_P (XEXP (op0, 1))
5558           && CONST_INT_P (op1)
5559           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5560                                INTVAL (op1)) != 0)
5561         {
5562           /* This is a UBFM/SBFM.  */
5563           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5564           if (speed)
5565             *cost += extra_cost->alu.bfx;
5566           return true;
5567         }
5568
5569       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5570         {
5571           /* We possibly get the immediate for free, this is not
5572              modelled.  */
5573           if (CONST_INT_P (op1)
5574               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5575             {
5576               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5577
5578               if (speed)
5579                 *cost += extra_cost->alu.logical;
5580
5581               return true;
5582             }
5583           else
5584             {
5585               rtx new_op0 = op0;
5586
5587               /* Handle ORN, EON, or BIC.  */
5588               if (GET_CODE (op0) == NOT)
5589                 op0 = XEXP (op0, 0);
5590
5591               new_op0 = aarch64_strip_shift (op0);
5592
5593               /* If we had a shift on op0 then this is a logical-shift-
5594                  by-register/immediate operation.  Otherwise, this is just
5595                  a logical operation.  */
5596               if (speed)
5597                 {
5598                   if (new_op0 != op0)
5599                     {
5600                       /* Shift by immediate.  */
5601                       if (CONST_INT_P (XEXP (op0, 1)))
5602                         *cost += extra_cost->alu.log_shift;
5603                       else
5604                         *cost += extra_cost->alu.log_shift_reg;
5605                     }
5606                   else
5607                     *cost += extra_cost->alu.logical;
5608                 }
5609
5610               /* In both cases we want to cost both operands.  */
5611               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5612                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5613
5614               return true;
5615             }
5616         }
5617       return false;
5618
5619     case NOT:
5620       /* MVN.  */
5621       if (speed)
5622         *cost += extra_cost->alu.logical;
5623
5624       /* The logical instruction could have the shifted register form,
5625          but the cost is the same if the shift is processed as a separate
5626          instruction, so we don't bother with it here.  */
5627       return false;
5628
5629     case ZERO_EXTEND:
5630
5631       op0 = XEXP (x, 0);
5632       /* If a value is written in SI mode, then zero extended to DI
5633          mode, the operation will in general be free as a write to
5634          a 'w' register implicitly zeroes the upper bits of an 'x'
5635          register.  However, if this is
5636
5637            (set (reg) (zero_extend (reg)))
5638
5639          we must cost the explicit register move.  */
5640       if (mode == DImode
5641           && GET_MODE (op0) == SImode
5642           && outer == SET)
5643         {
5644           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5645
5646           if (!op_cost && speed)
5647             /* MOV.  */
5648             *cost += extra_cost->alu.extend;
5649           else
5650             /* Free, the cost is that of the SI mode operation.  */
5651             *cost = op_cost;
5652
5653           return true;
5654         }
5655       else if (MEM_P (XEXP (x, 0)))
5656         {
5657           /* All loads can zero extend to any size for free.  */
5658           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5659           return true;
5660         }
5661
5662       /* UXTB/UXTH.  */
5663       if (speed)
5664         *cost += extra_cost->alu.extend;
5665
5666       return false;
5667
5668     case SIGN_EXTEND:
5669       if (MEM_P (XEXP (x, 0)))
5670         {
5671           /* LDRSH.  */
5672           if (speed)
5673             {
5674               rtx address = XEXP (XEXP (x, 0), 0);
5675               *cost += extra_cost->ldst.load_sign_extend;
5676
5677               *cost +=
5678                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5679                                                      0, speed));
5680             }
5681           return true;
5682         }
5683
5684       if (speed)
5685         *cost += extra_cost->alu.extend;
5686       return false;
5687
5688     case ASHIFT:
5689       op0 = XEXP (x, 0);
5690       op1 = XEXP (x, 1);
5691
5692       if (CONST_INT_P (op1))
5693         {
5694           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
5695              aliases.  */
5696           if (speed)
5697             *cost += extra_cost->alu.shift;
5698
5699           /* We can incorporate zero/sign extend for free.  */
5700           if (GET_CODE (op0) == ZERO_EXTEND
5701               || GET_CODE (op0) == SIGN_EXTEND)
5702             op0 = XEXP (op0, 0);
5703
5704           *cost += rtx_cost (op0, ASHIFT, 0, speed);
5705           return true;
5706         }
5707       else
5708         {
5709           /* LSLV.  */
5710           if (speed)
5711             *cost += extra_cost->alu.shift_reg;
5712
5713           return false;  /* All arguments need to be in registers.  */
5714         }
5715
5716     case ROTATE:
5717     case ROTATERT:
5718     case LSHIFTRT:
5719     case ASHIFTRT:
5720       op0 = XEXP (x, 0);
5721       op1 = XEXP (x, 1);
5722
5723       if (CONST_INT_P (op1))
5724         {
5725           /* ASR (immediate) and friends.  */
5726           if (speed)
5727             *cost += extra_cost->alu.shift;
5728
5729           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5730           return true;
5731         }
5732       else
5733         {
5734
5735           /* ASR (register) and friends.  */
5736           if (speed)
5737             *cost += extra_cost->alu.shift_reg;
5738
5739           return false;  /* All arguments need to be in registers.  */
5740         }
5741
5742     case SYMBOL_REF:
5743
5744       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5745         {
5746           /* LDR.  */
5747           if (speed)
5748             *cost += extra_cost->ldst.load;
5749         }
5750       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5751                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5752         {
5753           /* ADRP, followed by ADD.  */
5754           *cost += COSTS_N_INSNS (1);
5755           if (speed)
5756             *cost += 2 * extra_cost->alu.arith;
5757         }
5758       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5759                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5760         {
5761           /* ADR.  */
5762           if (speed)
5763             *cost += extra_cost->alu.arith;
5764         }
5765
5766       if (flag_pic)
5767         {
5768           /* One extra load instruction, after accessing the GOT.  */
5769           *cost += COSTS_N_INSNS (1);
5770           if (speed)
5771             *cost += extra_cost->ldst.load;
5772         }
5773       return true;
5774
5775     case HIGH:
5776     case LO_SUM:
5777       /* ADRP/ADD (immediate).  */
5778       if (speed)
5779         *cost += extra_cost->alu.arith;
5780       return true;
5781
5782     case ZERO_EXTRACT:
5783     case SIGN_EXTRACT:
5784       /* UBFX/SBFX.  */
5785       if (speed)
5786         *cost += extra_cost->alu.bfx;
5787
5788       /* We can trust that the immediates used will be correct (there
5789          are no by-register forms), so we need only cost op0.  */
5790       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5791       return true;
5792
5793     case MULT:
5794       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5795       /* aarch64_rtx_mult_cost always handles recursion to its
5796          operands.  */
5797       return true;
5798
5799     case MOD:
5800     case UMOD:
5801       if (speed)
5802         {
5803           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5804             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5805                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5806           else if (GET_MODE (x) == DFmode)
5807             *cost += (extra_cost->fp[1].mult
5808                       + extra_cost->fp[1].div);
5809           else if (GET_MODE (x) == SFmode)
5810             *cost += (extra_cost->fp[0].mult
5811                       + extra_cost->fp[0].div);
5812         }
5813       return false;  /* All arguments need to be in registers.  */
5814
5815     case DIV:
5816     case UDIV:
5817     case SQRT:
5818       if (speed)
5819         {
5820           if (GET_MODE_CLASS (mode) == MODE_INT)
5821             /* There is no integer SQRT, so only DIV and UDIV can get
5822                here.  */
5823             *cost += extra_cost->mult[mode == DImode].idiv;
5824           else
5825             *cost += extra_cost->fp[mode == DFmode].div;
5826         }
5827       return false;  /* All arguments need to be in registers.  */
5828
5829     case IF_THEN_ELSE:
5830       return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5831                                          XEXP (x, 2), cost, speed);
5832
5833     case EQ:
5834     case NE:
5835     case GT:
5836     case GTU:
5837     case LT:
5838     case LTU:
5839     case GE:
5840     case GEU:
5841     case LE:
5842     case LEU:
5843
5844       return false; /* All arguments must be in registers.  */
5845
5846     case FMA:
5847       op0 = XEXP (x, 0);
5848       op1 = XEXP (x, 1);
5849       op2 = XEXP (x, 2);
5850
5851       if (speed)
5852         *cost += extra_cost->fp[mode == DFmode].fma;
5853
5854       /* FMSUB, FNMADD, and FNMSUB are free.  */
5855       if (GET_CODE (op0) == NEG)
5856         op0 = XEXP (op0, 0);
5857
5858       if (GET_CODE (op2) == NEG)
5859         op2 = XEXP (op2, 0);
5860
5861       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5862          and the by-element operand as operand 0.  */
5863       if (GET_CODE (op1) == NEG)
5864         op1 = XEXP (op1, 0);
5865
5866       /* Catch vector-by-element operations.  The by-element operand can
5867          either be (vec_duplicate (vec_select (x))) or just
5868          (vec_select (x)), depending on whether we are multiplying by
5869          a vector or a scalar.
5870
5871          Canonicalization is not very good in these cases, FMA4 will put the
5872          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
5873       if (GET_CODE (op0) == VEC_DUPLICATE)
5874         op0 = XEXP (op0, 0);
5875       else if (GET_CODE (op1) == VEC_DUPLICATE)
5876         op1 = XEXP (op1, 0);
5877
5878       if (GET_CODE (op0) == VEC_SELECT)
5879         op0 = XEXP (op0, 0);
5880       else if (GET_CODE (op1) == VEC_SELECT)
5881         op1 = XEXP (op1, 0);
5882
5883       /* If the remaining parameters are not registers,
5884          get the cost to put them into registers.  */
5885       *cost += rtx_cost (op0, FMA, 0, speed);
5886       *cost += rtx_cost (op1, FMA, 1, speed);
5887       *cost += rtx_cost (op2, FMA, 2, speed);
5888       return true;
5889
5890     case FLOAT_EXTEND:
5891       if (speed)
5892         *cost += extra_cost->fp[mode == DFmode].widen;
5893       return false;
5894
5895     case FLOAT_TRUNCATE:
5896       if (speed)
5897         *cost += extra_cost->fp[mode == DFmode].narrow;
5898       return false;
5899
5900     case FIX:
5901     case UNSIGNED_FIX:
5902       x = XEXP (x, 0);
5903       /* Strip the rounding part.  They will all be implemented
5904          by the fcvt* family of instructions anyway.  */
5905       if (GET_CODE (x) == UNSPEC)
5906         {
5907           unsigned int uns_code = XINT (x, 1);
5908
5909           if (uns_code == UNSPEC_FRINTA
5910               || uns_code == UNSPEC_FRINTM
5911               || uns_code == UNSPEC_FRINTN
5912               || uns_code == UNSPEC_FRINTP
5913               || uns_code == UNSPEC_FRINTZ)
5914             x = XVECEXP (x, 0, 0);
5915         }
5916
5917       if (speed)
5918         *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5919
5920       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5921       return true;
5922
5923     case ABS:
5924       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5925         {
5926           /* FABS and FNEG are analogous.  */
5927           if (speed)
5928             *cost += extra_cost->fp[mode == DFmode].neg;
5929         }
5930       else
5931         {
5932           /* Integer ABS will either be split to
5933              two arithmetic instructions, or will be an ABS
5934              (scalar), which we don't model.  */
5935           *cost = COSTS_N_INSNS (2);
5936           if (speed)
5937             *cost += 2 * extra_cost->alu.arith;
5938         }
5939       return false;
5940
5941     case SMAX:
5942     case SMIN:
5943       if (speed)
5944         {
5945           /* FMAXNM/FMINNM/FMAX/FMIN.
5946              TODO: This may not be accurate for all implementations, but
5947              we do not model this in the cost tables.  */
5948           *cost += extra_cost->fp[mode == DFmode].addsub;
5949         }
5950       return false;
5951
5952     case UNSPEC:
5953       /* The floating point round to integer frint* instructions.  */
5954       if (aarch64_frint_unspec_p (XINT (x, 1)))
5955         {
5956           if (speed)
5957             *cost += extra_cost->fp[mode == DFmode].roundint;
5958
5959           return false;
5960         }
5961
5962       if (XINT (x, 1) == UNSPEC_RBIT)
5963         {
5964           if (speed)
5965             *cost += extra_cost->alu.rev;
5966
5967           return false;
5968         }
5969       break;
5970
5971     case TRUNCATE:
5972
5973       /* Decompose <su>muldi3_highpart.  */
5974       if (/* (truncate:DI  */
5975           mode == DImode
5976           /*   (lshiftrt:TI  */
5977           && GET_MODE (XEXP (x, 0)) == TImode
5978           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5979           /*      (mult:TI  */
5980           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5981           /*        (ANY_EXTEND:TI (reg:DI))
5982                     (ANY_EXTEND:TI (reg:DI)))  */
5983           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5984                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5985               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5986                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5987           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5988           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5989           /*     (const_int 64)  */
5990           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5991           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5992         {
5993           /* UMULH/SMULH.  */
5994           if (speed)
5995             *cost += extra_cost->mult[mode == DImode].extend;
5996           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5997                              MULT, 0, speed);
5998           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5999                              MULT, 1, speed);
6000           return true;
6001         }
6002
6003       /* Fall through.  */
6004     default:
6005       break;
6006     }
6007
6008   if (dump_file && (dump_flags & TDF_DETAILS))
6009     fprintf (dump_file,
6010       "\nFailed to cost RTX.  Assuming default cost.\n");
6011
6012   return true;
6013 }
6014
6015 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6016    calculated for X.  This cost is stored in *COST.  Returns true
6017    if the total cost of X was calculated.  */
6018 static bool
6019 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6020                    int param, int *cost, bool speed)
6021 {
6022   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6023
6024   if (dump_file && (dump_flags & TDF_DETAILS))
6025     {
6026       print_rtl_single (dump_file, x);
6027       fprintf (dump_file, "\n%s cost: %d (%s)\n",
6028                speed ? "Hot" : "Cold",
6029                *cost, result ? "final" : "partial");
6030     }
6031
6032   return result;
6033 }
6034
6035 static int
6036 aarch64_register_move_cost (machine_mode mode,
6037                             reg_class_t from_i, reg_class_t to_i)
6038 {
6039   enum reg_class from = (enum reg_class) from_i;
6040   enum reg_class to = (enum reg_class) to_i;
6041   const struct cpu_regmove_cost *regmove_cost
6042     = aarch64_tune_params->regmove_cost;
6043
6044   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
6045   if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6046     to = GENERAL_REGS;
6047
6048   if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6049     from = GENERAL_REGS;
6050
6051   /* Moving between GPR and stack cost is the same as GP2GP.  */
6052   if ((from == GENERAL_REGS && to == STACK_REG)
6053       || (to == GENERAL_REGS && from == STACK_REG))
6054     return regmove_cost->GP2GP;
6055
6056   /* To/From the stack register, we move via the gprs.  */
6057   if (to == STACK_REG || from == STACK_REG)
6058     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6059             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6060
6061   if (GET_MODE_SIZE (mode) == 16)
6062     {
6063       /* 128-bit operations on general registers require 2 instructions.  */
6064       if (from == GENERAL_REGS && to == GENERAL_REGS)
6065         return regmove_cost->GP2GP * 2;
6066       else if (from == GENERAL_REGS)
6067         return regmove_cost->GP2FP * 2;
6068       else if (to == GENERAL_REGS)
6069         return regmove_cost->FP2GP * 2;
6070
6071       /* When AdvSIMD instructions are disabled it is not possible to move
6072          a 128-bit value directly between Q registers.  This is handled in
6073          secondary reload.  A general register is used as a scratch to move
6074          the upper DI value and the lower DI value is moved directly,
6075          hence the cost is the sum of three moves. */
6076       if (! TARGET_SIMD)
6077         return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6078
6079       return regmove_cost->FP2FP;
6080     }
6081
6082   if (from == GENERAL_REGS && to == GENERAL_REGS)
6083     return regmove_cost->GP2GP;
6084   else if (from == GENERAL_REGS)
6085     return regmove_cost->GP2FP;
6086   else if (to == GENERAL_REGS)
6087     return regmove_cost->FP2GP;
6088
6089   return regmove_cost->FP2FP;
6090 }
6091
6092 static int
6093 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6094                           reg_class_t rclass ATTRIBUTE_UNUSED,
6095                           bool in ATTRIBUTE_UNUSED)
6096 {
6097   return aarch64_tune_params->memmov_cost;
6098 }
6099
6100 /* Return the number of instructions that can be issued per cycle.  */
6101 static int
6102 aarch64_sched_issue_rate (void)
6103 {
6104   return aarch64_tune_params->issue_rate;
6105 }
6106
6107 /* Vectorizer cost model target hooks.  */
6108
6109 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6110 static int
6111 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6112                                     tree vectype,
6113                                     int misalign ATTRIBUTE_UNUSED)
6114 {
6115   unsigned elements;
6116
6117   switch (type_of_cost)
6118     {
6119       case scalar_stmt:
6120         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6121
6122       case scalar_load:
6123         return aarch64_tune_params->vec_costs->scalar_load_cost;
6124
6125       case scalar_store:
6126         return aarch64_tune_params->vec_costs->scalar_store_cost;
6127
6128       case vector_stmt:
6129         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6130
6131       case vector_load:
6132         return aarch64_tune_params->vec_costs->vec_align_load_cost;
6133
6134       case vector_store:
6135         return aarch64_tune_params->vec_costs->vec_store_cost;
6136
6137       case vec_to_scalar:
6138         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6139
6140       case scalar_to_vec:
6141         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6142
6143       case unaligned_load:
6144         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6145
6146       case unaligned_store:
6147         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6148
6149       case cond_branch_taken:
6150         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6151
6152       case cond_branch_not_taken:
6153         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6154
6155       case vec_perm:
6156       case vec_promote_demote:
6157         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6158
6159       case vec_construct:
6160         elements = TYPE_VECTOR_SUBPARTS (vectype);
6161         return elements / 2 + 1;
6162
6163       default:
6164         gcc_unreachable ();
6165     }
6166 }
6167
6168 /* Implement targetm.vectorize.add_stmt_cost.  */
6169 static unsigned
6170 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6171                        struct _stmt_vec_info *stmt_info, int misalign,
6172                        enum vect_cost_model_location where)
6173 {
6174   unsigned *cost = (unsigned *) data;
6175   unsigned retval = 0;
6176
6177   if (flag_vect_cost_model)
6178     {
6179       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6180       int stmt_cost =
6181             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6182
6183       /* Statements in an inner loop relative to the loop being
6184          vectorized are weighted more heavily.  The value here is
6185          a function (linear for now) of the loop nest level.  */
6186       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6187         {
6188           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6189           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
6190           unsigned nest_level = loop_depth (loop);
6191
6192           count *= nest_level;
6193         }
6194
6195       retval = (unsigned) (count * stmt_cost);
6196       cost[where] += retval;
6197     }
6198
6199   return retval;
6200 }
6201
6202 static void initialize_aarch64_code_model (void);
6203
6204 /* Parse the architecture extension string.  */
6205
6206 static void
6207 aarch64_parse_extension (char *str)
6208 {
6209   /* The extension string is parsed left to right.  */
6210   const struct aarch64_option_extension *opt = NULL;
6211
6212   /* Flag to say whether we are adding or removing an extension.  */
6213   int adding_ext = -1;
6214
6215   while (str != NULL && *str != 0)
6216     {
6217       char *ext;
6218       size_t len;
6219
6220       str++;
6221       ext = strchr (str, '+');
6222
6223       if (ext != NULL)
6224         len = ext - str;
6225       else
6226         len = strlen (str);
6227
6228       if (len >= 2 && strncmp (str, "no", 2) == 0)
6229         {
6230           adding_ext = 0;
6231           len -= 2;
6232           str += 2;
6233         }
6234       else if (len > 0)
6235         adding_ext = 1;
6236
6237       if (len == 0)
6238         {
6239           error ("missing feature modifier after %qs", "+no");
6240           return;
6241         }
6242
6243       /* Scan over the extensions table trying to find an exact match.  */
6244       for (opt = all_extensions; opt->name != NULL; opt++)
6245         {
6246           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6247             {
6248               /* Add or remove the extension.  */
6249               if (adding_ext)
6250                 aarch64_isa_flags |= opt->flags_on;
6251               else
6252                 aarch64_isa_flags &= ~(opt->flags_off);
6253               break;
6254             }
6255         }
6256
6257       if (opt->name == NULL)
6258         {
6259           /* Extension not found in list.  */
6260           error ("unknown feature modifier %qs", str);
6261           return;
6262         }
6263
6264       str = ext;
6265     };
6266
6267   return;
6268 }
6269
6270 /* Parse the ARCH string.  */
6271
6272 static void
6273 aarch64_parse_arch (void)
6274 {
6275   char *ext;
6276   const struct processor *arch;
6277   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6278   size_t len;
6279
6280   strcpy (str, aarch64_arch_string);
6281
6282   ext = strchr (str, '+');
6283
6284   if (ext != NULL)
6285     len = ext - str;
6286   else
6287     len = strlen (str);
6288
6289   if (len == 0)
6290     {
6291       error ("missing arch name in -march=%qs", str);
6292       return;
6293     }
6294
6295   /* Loop through the list of supported ARCHs to find a match.  */
6296   for (arch = all_architectures; arch->name != NULL; arch++)
6297     {
6298       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6299         {
6300           selected_arch = arch;
6301           aarch64_isa_flags = selected_arch->flags;
6302
6303           if (!selected_cpu)
6304             selected_cpu = &all_cores[selected_arch->core];
6305
6306           if (ext != NULL)
6307             {
6308               /* ARCH string contains at least one extension.  */
6309               aarch64_parse_extension (ext);
6310             }
6311
6312           if (strcmp (selected_arch->arch, selected_cpu->arch))
6313             {
6314               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6315                        selected_cpu->name, selected_arch->name);
6316             }
6317
6318           return;
6319         }
6320     }
6321
6322   /* ARCH name not found in list.  */
6323   error ("unknown value %qs for -march", str);
6324   return;
6325 }
6326
6327 /* Parse the CPU string.  */
6328
6329 static void
6330 aarch64_parse_cpu (void)
6331 {
6332   char *ext;
6333   const struct processor *cpu;
6334   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6335   size_t len;
6336
6337   strcpy (str, aarch64_cpu_string);
6338
6339   ext = strchr (str, '+');
6340
6341   if (ext != NULL)
6342     len = ext - str;
6343   else
6344     len = strlen (str);
6345
6346   if (len == 0)
6347     {
6348       error ("missing cpu name in -mcpu=%qs", str);
6349       return;
6350     }
6351
6352   /* Loop through the list of supported CPUs to find a match.  */
6353   for (cpu = all_cores; cpu->name != NULL; cpu++)
6354     {
6355       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6356         {
6357           selected_cpu = cpu;
6358           selected_tune = cpu;
6359           aarch64_isa_flags = selected_cpu->flags;
6360
6361           if (ext != NULL)
6362             {
6363               /* CPU string contains at least one extension.  */
6364               aarch64_parse_extension (ext);
6365             }
6366
6367           return;
6368         }
6369     }
6370
6371   /* CPU name not found in list.  */
6372   error ("unknown value %qs for -mcpu", str);
6373   return;
6374 }
6375
6376 /* Parse the TUNE string.  */
6377
6378 static void
6379 aarch64_parse_tune (void)
6380 {
6381   const struct processor *cpu;
6382   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6383   strcpy (str, aarch64_tune_string);
6384
6385   /* Loop through the list of supported CPUs to find a match.  */
6386   for (cpu = all_cores; cpu->name != NULL; cpu++)
6387     {
6388       if (strcmp (cpu->name, str) == 0)
6389         {
6390           selected_tune = cpu;
6391           return;
6392         }
6393     }
6394
6395   /* CPU name not found in list.  */
6396   error ("unknown value %qs for -mtune", str);
6397   return;
6398 }
6399
6400
6401 /* Implement TARGET_OPTION_OVERRIDE.  */
6402
6403 static void
6404 aarch64_override_options (void)
6405 {
6406   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6407      If either of -march or -mtune is given, they override their
6408      respective component of -mcpu.
6409
6410      So, first parse AARCH64_CPU_STRING, then the others, be careful
6411      with -march as, if -mcpu is not present on the command line, march
6412      must set a sensible default CPU.  */
6413   if (aarch64_cpu_string)
6414     {
6415       aarch64_parse_cpu ();
6416     }
6417
6418   if (aarch64_arch_string)
6419     {
6420       aarch64_parse_arch ();
6421     }
6422
6423   if (aarch64_tune_string)
6424     {
6425       aarch64_parse_tune ();
6426     }
6427
6428 #ifndef HAVE_AS_MABI_OPTION
6429   /* The compiler may have been configured with 2.23.* binutils, which does
6430      not have support for ILP32.  */
6431   if (TARGET_ILP32)
6432     error ("Assembler does not support -mabi=ilp32");
6433 #endif
6434
6435   initialize_aarch64_code_model ();
6436
6437   aarch64_build_bitmask_table ();
6438
6439   /* This target defaults to strict volatile bitfields.  */
6440   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6441     flag_strict_volatile_bitfields = 1;
6442
6443   /* If the user did not specify a processor, choose the default
6444      one for them.  This will be the CPU set during configuration using
6445      --with-cpu, otherwise it is "generic".  */
6446   if (!selected_cpu)
6447     {
6448       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6449       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6450     }
6451
6452   gcc_assert (selected_cpu);
6453
6454   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
6455   if (!selected_tune)
6456     selected_tune = &all_cores[selected_cpu->core];
6457
6458   aarch64_tune_flags = selected_tune->flags;
6459   aarch64_tune = selected_tune->core;
6460   aarch64_tune_params = selected_tune->tune;
6461
6462   if (aarch64_fix_a53_err835769 == 2)
6463     {
6464 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6465       aarch64_fix_a53_err835769 = 1;
6466 #else
6467       aarch64_fix_a53_err835769 = 0;
6468 #endif
6469     }
6470
6471   aarch64_override_options_after_change ();
6472 }
6473
6474 /* Implement targetm.override_options_after_change.  */
6475
6476 static void
6477 aarch64_override_options_after_change (void)
6478 {
6479   if (flag_omit_frame_pointer)
6480     flag_omit_leaf_frame_pointer = false;
6481   else if (flag_omit_leaf_frame_pointer)
6482     flag_omit_frame_pointer = true;
6483 }
6484
6485 static struct machine_function *
6486 aarch64_init_machine_status (void)
6487 {
6488   struct machine_function *machine;
6489   machine = ggc_cleared_alloc<machine_function> ();
6490   return machine;
6491 }
6492
6493 void
6494 aarch64_init_expanders (void)
6495 {
6496   init_machine_status = aarch64_init_machine_status;
6497 }
6498
6499 /* A checking mechanism for the implementation of the various code models.  */
6500 static void
6501 initialize_aarch64_code_model (void)
6502 {
6503    if (flag_pic)
6504      {
6505        switch (aarch64_cmodel_var)
6506          {
6507          case AARCH64_CMODEL_TINY:
6508            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6509            break;
6510          case AARCH64_CMODEL_SMALL:
6511            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6512            break;
6513          case AARCH64_CMODEL_LARGE:
6514            sorry ("code model %qs with -f%s", "large",
6515                   flag_pic > 1 ? "PIC" : "pic");
6516          default:
6517            gcc_unreachable ();
6518          }
6519      }
6520    else
6521      aarch64_cmodel = aarch64_cmodel_var;
6522 }
6523
6524 /* Return true if SYMBOL_REF X binds locally.  */
6525
6526 static bool
6527 aarch64_symbol_binds_local_p (const_rtx x)
6528 {
6529   return (SYMBOL_REF_DECL (x)
6530           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6531           : SYMBOL_REF_LOCAL_P (x));
6532 }
6533
6534 /* Return true if SYMBOL_REF X is thread local */
6535 static bool
6536 aarch64_tls_symbol_p (rtx x)
6537 {
6538   if (! TARGET_HAVE_TLS)
6539     return false;
6540
6541   if (GET_CODE (x) != SYMBOL_REF)
6542     return false;
6543
6544   return SYMBOL_REF_TLS_MODEL (x) != 0;
6545 }
6546
6547 /* Classify a TLS symbol into one of the TLS kinds.  */
6548 enum aarch64_symbol_type
6549 aarch64_classify_tls_symbol (rtx x)
6550 {
6551   enum tls_model tls_kind = tls_symbolic_operand_type (x);
6552
6553   switch (tls_kind)
6554     {
6555     case TLS_MODEL_GLOBAL_DYNAMIC:
6556     case TLS_MODEL_LOCAL_DYNAMIC:
6557       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6558
6559     case TLS_MODEL_INITIAL_EXEC:
6560       return SYMBOL_SMALL_GOTTPREL;
6561
6562     case TLS_MODEL_LOCAL_EXEC:
6563       return SYMBOL_SMALL_TPREL;
6564
6565     case TLS_MODEL_EMULATED:
6566     case TLS_MODEL_NONE:
6567       return SYMBOL_FORCE_TO_MEM;
6568
6569     default:
6570       gcc_unreachable ();
6571     }
6572 }
6573
6574 /* Return the method that should be used to access SYMBOL_REF or
6575    LABEL_REF X in context CONTEXT.  */
6576
6577 enum aarch64_symbol_type
6578 aarch64_classify_symbol (rtx x,
6579                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6580 {
6581   if (GET_CODE (x) == LABEL_REF)
6582     {
6583       switch (aarch64_cmodel)
6584         {
6585         case AARCH64_CMODEL_LARGE:
6586           return SYMBOL_FORCE_TO_MEM;
6587
6588         case AARCH64_CMODEL_TINY_PIC:
6589         case AARCH64_CMODEL_TINY:
6590           return SYMBOL_TINY_ABSOLUTE;
6591
6592         case AARCH64_CMODEL_SMALL_PIC:
6593         case AARCH64_CMODEL_SMALL:
6594           return SYMBOL_SMALL_ABSOLUTE;
6595
6596         default:
6597           gcc_unreachable ();
6598         }
6599     }
6600
6601   if (GET_CODE (x) == SYMBOL_REF)
6602     {
6603       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6604           return SYMBOL_FORCE_TO_MEM;
6605
6606       if (aarch64_tls_symbol_p (x))
6607         return aarch64_classify_tls_symbol (x);
6608
6609       switch (aarch64_cmodel)
6610         {
6611         case AARCH64_CMODEL_TINY:
6612           if (SYMBOL_REF_WEAK (x))
6613             return SYMBOL_FORCE_TO_MEM;
6614           return SYMBOL_TINY_ABSOLUTE;
6615
6616         case AARCH64_CMODEL_SMALL:
6617           if (SYMBOL_REF_WEAK (x))
6618             return SYMBOL_FORCE_TO_MEM;
6619           return SYMBOL_SMALL_ABSOLUTE;
6620
6621         case AARCH64_CMODEL_TINY_PIC:
6622           if (!aarch64_symbol_binds_local_p (x))
6623             return SYMBOL_TINY_GOT;
6624           return SYMBOL_TINY_ABSOLUTE;
6625
6626         case AARCH64_CMODEL_SMALL_PIC:
6627           if (!aarch64_symbol_binds_local_p (x))
6628             return SYMBOL_SMALL_GOT;
6629           return SYMBOL_SMALL_ABSOLUTE;
6630
6631         default:
6632           gcc_unreachable ();
6633         }
6634     }
6635
6636   /* By default push everything into the constant pool.  */
6637   return SYMBOL_FORCE_TO_MEM;
6638 }
6639
6640 bool
6641 aarch64_constant_address_p (rtx x)
6642 {
6643   return (CONSTANT_P (x) && memory_address_p (DImode, x));
6644 }
6645
6646 bool
6647 aarch64_legitimate_pic_operand_p (rtx x)
6648 {
6649   if (GET_CODE (x) == SYMBOL_REF
6650       || (GET_CODE (x) == CONST
6651           && GET_CODE (XEXP (x, 0)) == PLUS
6652           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6653      return false;
6654
6655   return true;
6656 }
6657
6658 /* Return true if X holds either a quarter-precision or
6659      floating-point +0.0 constant.  */
6660 static bool
6661 aarch64_valid_floating_const (machine_mode mode, rtx x)
6662 {
6663   if (!CONST_DOUBLE_P (x))
6664     return false;
6665
6666   /* TODO: We could handle moving 0.0 to a TFmode register,
6667      but first we would like to refactor the movtf_aarch64
6668      to be more amicable to split moves properly and
6669      correctly gate on TARGET_SIMD.  For now - reject all
6670      constants which are not to SFmode or DFmode registers.  */
6671   if (!(mode == SFmode || mode == DFmode))
6672     return false;
6673
6674   if (aarch64_float_const_zero_rtx_p (x))
6675     return true;
6676   return aarch64_float_const_representable_p (x);
6677 }
6678
6679 static bool
6680 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
6681 {
6682   /* Do not allow vector struct mode constants.  We could support
6683      0 and -1 easily, but they need support in aarch64-simd.md.  */
6684   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6685     return false;
6686
6687   /* This could probably go away because
6688      we now decompose CONST_INTs according to expand_mov_immediate.  */
6689   if ((GET_CODE (x) == CONST_VECTOR
6690        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6691       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6692         return !targetm.cannot_force_const_mem (mode, x);
6693
6694   if (GET_CODE (x) == HIGH
6695       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6696     return true;
6697
6698   return aarch64_constant_address_p (x);
6699 }
6700
6701 rtx
6702 aarch64_load_tp (rtx target)
6703 {
6704   if (!target
6705       || GET_MODE (target) != Pmode
6706       || !register_operand (target, Pmode))
6707     target = gen_reg_rtx (Pmode);
6708
6709   /* Can return in any reg.  */
6710   emit_insn (gen_aarch64_load_tp_hard (target));
6711   return target;
6712 }
6713
6714 /* On AAPCS systems, this is the "struct __va_list".  */
6715 static GTY(()) tree va_list_type;
6716
6717 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6718    Return the type to use as __builtin_va_list.
6719
6720    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6721
6722    struct __va_list
6723    {
6724      void *__stack;
6725      void *__gr_top;
6726      void *__vr_top;
6727      int   __gr_offs;
6728      int   __vr_offs;
6729    };  */
6730
6731 static tree
6732 aarch64_build_builtin_va_list (void)
6733 {
6734   tree va_list_name;
6735   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6736
6737   /* Create the type.  */
6738   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6739   /* Give it the required name.  */
6740   va_list_name = build_decl (BUILTINS_LOCATION,
6741                              TYPE_DECL,
6742                              get_identifier ("__va_list"),
6743                              va_list_type);
6744   DECL_ARTIFICIAL (va_list_name) = 1;
6745   TYPE_NAME (va_list_type) = va_list_name;
6746   TYPE_STUB_DECL (va_list_type) = va_list_name;
6747
6748   /* Create the fields.  */
6749   f_stack = build_decl (BUILTINS_LOCATION,
6750                         FIELD_DECL, get_identifier ("__stack"),
6751                         ptr_type_node);
6752   f_grtop = build_decl (BUILTINS_LOCATION,
6753                         FIELD_DECL, get_identifier ("__gr_top"),
6754                         ptr_type_node);
6755   f_vrtop = build_decl (BUILTINS_LOCATION,
6756                         FIELD_DECL, get_identifier ("__vr_top"),
6757                         ptr_type_node);
6758   f_groff = build_decl (BUILTINS_LOCATION,
6759                         FIELD_DECL, get_identifier ("__gr_offs"),
6760                         integer_type_node);
6761   f_vroff = build_decl (BUILTINS_LOCATION,
6762                         FIELD_DECL, get_identifier ("__vr_offs"),
6763                         integer_type_node);
6764
6765   DECL_ARTIFICIAL (f_stack) = 1;
6766   DECL_ARTIFICIAL (f_grtop) = 1;
6767   DECL_ARTIFICIAL (f_vrtop) = 1;
6768   DECL_ARTIFICIAL (f_groff) = 1;
6769   DECL_ARTIFICIAL (f_vroff) = 1;
6770
6771   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6772   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6773   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6774   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6775   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6776
6777   TYPE_FIELDS (va_list_type) = f_stack;
6778   DECL_CHAIN (f_stack) = f_grtop;
6779   DECL_CHAIN (f_grtop) = f_vrtop;
6780   DECL_CHAIN (f_vrtop) = f_groff;
6781   DECL_CHAIN (f_groff) = f_vroff;
6782
6783   /* Compute its layout.  */
6784   layout_type (va_list_type);
6785
6786   return va_list_type;
6787 }
6788
6789 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6790 static void
6791 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6792 {
6793   const CUMULATIVE_ARGS *cum;
6794   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6795   tree stack, grtop, vrtop, groff, vroff;
6796   tree t;
6797   int gr_save_area_size;
6798   int vr_save_area_size;
6799   int vr_offset;
6800
6801   cum = &crtl->args.info;
6802   gr_save_area_size
6803     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6804   vr_save_area_size
6805     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6806
6807   if (TARGET_GENERAL_REGS_ONLY)
6808     {
6809       if (cum->aapcs_nvrn > 0)
6810         sorry ("%qs and floating point or vector arguments",
6811                "-mgeneral-regs-only");
6812       vr_save_area_size = 0;
6813     }
6814
6815   f_stack = TYPE_FIELDS (va_list_type_node);
6816   f_grtop = DECL_CHAIN (f_stack);
6817   f_vrtop = DECL_CHAIN (f_grtop);
6818   f_groff = DECL_CHAIN (f_vrtop);
6819   f_vroff = DECL_CHAIN (f_groff);
6820
6821   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6822                   NULL_TREE);
6823   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6824                   NULL_TREE);
6825   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6826                   NULL_TREE);
6827   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6828                   NULL_TREE);
6829   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6830                   NULL_TREE);
6831
6832   /* Emit code to initialize STACK, which points to the next varargs stack
6833      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6834      by named arguments.  STACK is 8-byte aligned.  */
6835   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6836   if (cum->aapcs_stack_size > 0)
6837     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6838   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6839   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6840
6841   /* Emit code to initialize GRTOP, the top of the GR save area.
6842      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6843   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6844   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6845   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6846
6847   /* Emit code to initialize VRTOP, the top of the VR save area.
6848      This address is gr_save_area_bytes below GRTOP, rounded
6849      down to the next 16-byte boundary.  */
6850   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6851   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6852                              STACK_BOUNDARY / BITS_PER_UNIT);
6853
6854   if (vr_offset)
6855     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6856   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6857   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6858
6859   /* Emit code to initialize GROFF, the offset from GRTOP of the
6860      next GPR argument.  */
6861   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6862               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6863   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6864
6865   /* Likewise emit code to initialize VROFF, the offset from FTOP
6866      of the next VR argument.  */
6867   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6868               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6869   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6870 }
6871
6872 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6873
6874 static tree
6875 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6876                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6877 {
6878   tree addr;
6879   bool indirect_p;
6880   bool is_ha;           /* is HFA or HVA.  */
6881   bool dw_align;        /* double-word align.  */
6882   machine_mode ag_mode = VOIDmode;
6883   int nregs;
6884   machine_mode mode;
6885
6886   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6887   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6888   HOST_WIDE_INT size, rsize, adjust, align;
6889   tree t, u, cond1, cond2;
6890
6891   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6892   if (indirect_p)
6893     type = build_pointer_type (type);
6894
6895   mode = TYPE_MODE (type);
6896
6897   f_stack = TYPE_FIELDS (va_list_type_node);
6898   f_grtop = DECL_CHAIN (f_stack);
6899   f_vrtop = DECL_CHAIN (f_grtop);
6900   f_groff = DECL_CHAIN (f_vrtop);
6901   f_vroff = DECL_CHAIN (f_groff);
6902
6903   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6904                   f_stack, NULL_TREE);
6905   size = int_size_in_bytes (type);
6906   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6907
6908   dw_align = false;
6909   adjust = 0;
6910   if (aarch64_vfp_is_call_or_return_candidate (mode,
6911                                                type,
6912                                                &ag_mode,
6913                                                &nregs,
6914                                                &is_ha))
6915     {
6916       /* TYPE passed in fp/simd registers.  */
6917       if (TARGET_GENERAL_REGS_ONLY)
6918         sorry ("%qs and floating point or vector arguments",
6919                "-mgeneral-regs-only");
6920
6921       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6922                       unshare_expr (valist), f_vrtop, NULL_TREE);
6923       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6924                       unshare_expr (valist), f_vroff, NULL_TREE);
6925
6926       rsize = nregs * UNITS_PER_VREG;
6927
6928       if (is_ha)
6929         {
6930           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6931             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6932         }
6933       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6934                && size < UNITS_PER_VREG)
6935         {
6936           adjust = UNITS_PER_VREG - size;
6937         }
6938     }
6939   else
6940     {
6941       /* TYPE passed in general registers.  */
6942       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6943                       unshare_expr (valist), f_grtop, NULL_TREE);
6944       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6945                       unshare_expr (valist), f_groff, NULL_TREE);
6946       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6947       nregs = rsize / UNITS_PER_WORD;
6948
6949       if (align > 8)
6950         dw_align = true;
6951
6952       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6953           && size < UNITS_PER_WORD)
6954         {
6955           adjust = UNITS_PER_WORD  - size;
6956         }
6957     }
6958
6959   /* Get a local temporary for the field value.  */
6960   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6961
6962   /* Emit code to branch if off >= 0.  */
6963   t = build2 (GE_EXPR, boolean_type_node, off,
6964               build_int_cst (TREE_TYPE (off), 0));
6965   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6966
6967   if (dw_align)
6968     {
6969       /* Emit: offs = (offs + 15) & -16.  */
6970       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6971                   build_int_cst (TREE_TYPE (off), 15));
6972       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6973                   build_int_cst (TREE_TYPE (off), -16));
6974       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6975     }
6976   else
6977     roundup = NULL;
6978
6979   /* Update ap.__[g|v]r_offs  */
6980   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6981               build_int_cst (TREE_TYPE (off), rsize));
6982   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6983
6984   /* String up.  */
6985   if (roundup)
6986     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6987
6988   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6989   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6990               build_int_cst (TREE_TYPE (f_off), 0));
6991   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6992
6993   /* String up: make sure the assignment happens before the use.  */
6994   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6995   COND_EXPR_ELSE (cond1) = t;
6996
6997   /* Prepare the trees handling the argument that is passed on the stack;
6998      the top level node will store in ON_STACK.  */
6999   arg = get_initialized_tmp_var (stack, pre_p, NULL);
7000   if (align > 8)
7001     {
7002       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
7003       t = fold_convert (intDI_type_node, arg);
7004       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7005                   build_int_cst (TREE_TYPE (t), 15));
7006       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7007                   build_int_cst (TREE_TYPE (t), -16));
7008       t = fold_convert (TREE_TYPE (arg), t);
7009       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7010     }
7011   else
7012     roundup = NULL;
7013   /* Advance ap.__stack  */
7014   t = fold_convert (intDI_type_node, arg);
7015   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7016               build_int_cst (TREE_TYPE (t), size + 7));
7017   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7018               build_int_cst (TREE_TYPE (t), -8));
7019   t = fold_convert (TREE_TYPE (arg), t);
7020   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7021   /* String up roundup and advance.  */
7022   if (roundup)
7023     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7024   /* String up with arg */
7025   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7026   /* Big-endianness related address adjustment.  */
7027   if (BLOCK_REG_PADDING (mode, type, 1) == downward
7028       && size < UNITS_PER_WORD)
7029   {
7030     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7031                 size_int (UNITS_PER_WORD - size));
7032     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7033   }
7034
7035   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7036   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7037
7038   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
7039   t = off;
7040   if (adjust)
7041     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7042                 build_int_cst (TREE_TYPE (off), adjust));
7043
7044   t = fold_convert (sizetype, t);
7045   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7046
7047   if (is_ha)
7048     {
7049       /* type ha; // treat as "struct {ftype field[n];}"
7050          ... [computing offs]
7051          for (i = 0; i <nregs; ++i, offs += 16)
7052            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7053          return ha;  */
7054       int i;
7055       tree tmp_ha, field_t, field_ptr_t;
7056
7057       /* Declare a local variable.  */
7058       tmp_ha = create_tmp_var_raw (type, "ha");
7059       gimple_add_tmp_var (tmp_ha);
7060
7061       /* Establish the base type.  */
7062       switch (ag_mode)
7063         {
7064         case SFmode:
7065           field_t = float_type_node;
7066           field_ptr_t = float_ptr_type_node;
7067           break;
7068         case DFmode:
7069           field_t = double_type_node;
7070           field_ptr_t = double_ptr_type_node;
7071           break;
7072         case TFmode:
7073           field_t = long_double_type_node;
7074           field_ptr_t = long_double_ptr_type_node;
7075           break;
7076 /* The half precision and quad precision are not fully supported yet.  Enable
7077    the following code after the support is complete.  Need to find the correct
7078    type node for __fp16 *.  */
7079 #if 0
7080         case HFmode:
7081           field_t = float_type_node;
7082           field_ptr_t = float_ptr_type_node;
7083           break;
7084 #endif
7085         case V2SImode:
7086         case V4SImode:
7087             {
7088               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7089               field_t = build_vector_type_for_mode (innertype, ag_mode);
7090               field_ptr_t = build_pointer_type (field_t);
7091             }
7092           break;
7093         default:
7094           gcc_assert (0);
7095         }
7096
7097       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
7098       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7099       addr = t;
7100       t = fold_convert (field_ptr_t, addr);
7101       t = build2 (MODIFY_EXPR, field_t,
7102                   build1 (INDIRECT_REF, field_t, tmp_ha),
7103                   build1 (INDIRECT_REF, field_t, t));
7104
7105       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
7106       for (i = 1; i < nregs; ++i)
7107         {
7108           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7109           u = fold_convert (field_ptr_t, addr);
7110           u = build2 (MODIFY_EXPR, field_t,
7111                       build2 (MEM_REF, field_t, tmp_ha,
7112                               build_int_cst (field_ptr_t,
7113                                              (i *
7114                                               int_size_in_bytes (field_t)))),
7115                       build1 (INDIRECT_REF, field_t, u));
7116           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7117         }
7118
7119       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7120       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7121     }
7122
7123   COND_EXPR_ELSE (cond2) = t;
7124   addr = fold_convert (build_pointer_type (type), cond1);
7125   addr = build_va_arg_indirect_ref (addr);
7126
7127   if (indirect_p)
7128     addr = build_va_arg_indirect_ref (addr);
7129
7130   return addr;
7131 }
7132
7133 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
7134
7135 static void
7136 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7137                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7138                                 int no_rtl)
7139 {
7140   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7141   CUMULATIVE_ARGS local_cum;
7142   int gr_saved, vr_saved;
7143
7144   /* The caller has advanced CUM up to, but not beyond, the last named
7145      argument.  Advance a local copy of CUM past the last "real" named
7146      argument, to find out how many registers are left over.  */
7147   local_cum = *cum;
7148   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7149
7150   /* Found out how many registers we need to save.  */
7151   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7152   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7153
7154   if (TARGET_GENERAL_REGS_ONLY)
7155     {
7156       if (local_cum.aapcs_nvrn > 0)
7157         sorry ("%qs and floating point or vector arguments",
7158                "-mgeneral-regs-only");
7159       vr_saved = 0;
7160     }
7161
7162   if (!no_rtl)
7163     {
7164       if (gr_saved > 0)
7165         {
7166           rtx ptr, mem;
7167
7168           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
7169           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7170                                - gr_saved * UNITS_PER_WORD);
7171           mem = gen_frame_mem (BLKmode, ptr);
7172           set_mem_alias_set (mem, get_varargs_alias_set ());
7173
7174           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7175                                mem, gr_saved);
7176         }
7177       if (vr_saved > 0)
7178         {
7179           /* We can't use move_block_from_reg, because it will use
7180              the wrong mode, storing D regs only.  */
7181           machine_mode mode = TImode;
7182           int off, i;
7183
7184           /* Set OFF to the offset from virtual_incoming_args_rtx of
7185              the first vector register.  The VR save area lies below
7186              the GR one, and is aligned to 16 bytes.  */
7187           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7188                                    STACK_BOUNDARY / BITS_PER_UNIT);
7189           off -= vr_saved * UNITS_PER_VREG;
7190
7191           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7192             {
7193               rtx ptr, mem;
7194
7195               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7196               mem = gen_frame_mem (mode, ptr);
7197               set_mem_alias_set (mem, get_varargs_alias_set ());
7198               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7199               off += UNITS_PER_VREG;
7200             }
7201         }
7202     }
7203
7204   /* We don't save the size into *PRETEND_SIZE because we want to avoid
7205      any complication of having crtl->args.pretend_args_size changed.  */
7206   cfun->machine->frame.saved_varargs_size
7207     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7208                       STACK_BOUNDARY / BITS_PER_UNIT)
7209        + vr_saved * UNITS_PER_VREG);
7210 }
7211
7212 static void
7213 aarch64_conditional_register_usage (void)
7214 {
7215   int i;
7216   if (!TARGET_FLOAT)
7217     {
7218       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7219         {
7220           fixed_regs[i] = 1;
7221           call_used_regs[i] = 1;
7222         }
7223     }
7224 }
7225
7226 /* Walk down the type tree of TYPE counting consecutive base elements.
7227    If *MODEP is VOIDmode, then set it to the first valid floating point
7228    type.  If a non-floating point type is found, or if a floating point
7229    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7230    otherwise return the count in the sub-tree.  */
7231 static int
7232 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7233 {
7234   machine_mode mode;
7235   HOST_WIDE_INT size;
7236
7237   switch (TREE_CODE (type))
7238     {
7239     case REAL_TYPE:
7240       mode = TYPE_MODE (type);
7241       if (mode != DFmode && mode != SFmode && mode != TFmode)
7242         return -1;
7243
7244       if (*modep == VOIDmode)
7245         *modep = mode;
7246
7247       if (*modep == mode)
7248         return 1;
7249
7250       break;
7251
7252     case COMPLEX_TYPE:
7253       mode = TYPE_MODE (TREE_TYPE (type));
7254       if (mode != DFmode && mode != SFmode && mode != TFmode)
7255         return -1;
7256
7257       if (*modep == VOIDmode)
7258         *modep = mode;
7259
7260       if (*modep == mode)
7261         return 2;
7262
7263       break;
7264
7265     case VECTOR_TYPE:
7266       /* Use V2SImode and V4SImode as representatives of all 64-bit
7267          and 128-bit vector types.  */
7268       size = int_size_in_bytes (type);
7269       switch (size)
7270         {
7271         case 8:
7272           mode = V2SImode;
7273           break;
7274         case 16:
7275           mode = V4SImode;
7276           break;
7277         default:
7278           return -1;
7279         }
7280
7281       if (*modep == VOIDmode)
7282         *modep = mode;
7283
7284       /* Vector modes are considered to be opaque: two vectors are
7285          equivalent for the purposes of being homogeneous aggregates
7286          if they are the same size.  */
7287       if (*modep == mode)
7288         return 1;
7289
7290       break;
7291
7292     case ARRAY_TYPE:
7293       {
7294         int count;
7295         tree index = TYPE_DOMAIN (type);
7296
7297         /* Can't handle incomplete types nor sizes that are not
7298            fixed.  */
7299         if (!COMPLETE_TYPE_P (type)
7300             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7301           return -1;
7302
7303         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7304         if (count == -1
7305             || !index
7306             || !TYPE_MAX_VALUE (index)
7307             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7308             || !TYPE_MIN_VALUE (index)
7309             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7310             || count < 0)
7311           return -1;
7312
7313         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7314                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7315
7316         /* There must be no padding.  */
7317         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7318           return -1;
7319
7320         return count;
7321       }
7322
7323     case RECORD_TYPE:
7324       {
7325         int count = 0;
7326         int sub_count;
7327         tree field;
7328
7329         /* Can't handle incomplete types nor sizes that are not
7330            fixed.  */
7331         if (!COMPLETE_TYPE_P (type)
7332             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7333           return -1;
7334
7335         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7336           {
7337             if (TREE_CODE (field) != FIELD_DECL)
7338               continue;
7339
7340             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7341             if (sub_count < 0)
7342               return -1;
7343             count += sub_count;
7344           }
7345
7346         /* There must be no padding.  */
7347         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7348           return -1;
7349
7350         return count;
7351       }
7352
7353     case UNION_TYPE:
7354     case QUAL_UNION_TYPE:
7355       {
7356         /* These aren't very interesting except in a degenerate case.  */
7357         int count = 0;
7358         int sub_count;
7359         tree field;
7360
7361         /* Can't handle incomplete types nor sizes that are not
7362            fixed.  */
7363         if (!COMPLETE_TYPE_P (type)
7364             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7365           return -1;
7366
7367         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7368           {
7369             if (TREE_CODE (field) != FIELD_DECL)
7370               continue;
7371
7372             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7373             if (sub_count < 0)
7374               return -1;
7375             count = count > sub_count ? count : sub_count;
7376           }
7377
7378         /* There must be no padding.  */
7379         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7380           return -1;
7381
7382         return count;
7383       }
7384
7385     default:
7386       break;
7387     }
7388
7389   return -1;
7390 }
7391
7392 /* Return true if we use LRA instead of reload pass.  */
7393 static bool
7394 aarch64_lra_p (void)
7395 {
7396   return aarch64_lra_flag;
7397 }
7398
7399 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7400    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7401    array types.  The C99 floating-point complex types are also considered
7402    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7403    types, which are GCC extensions and out of the scope of AAPCS64, are
7404    treated as composite types here as well.
7405
7406    Note that MODE itself is not sufficient in determining whether a type
7407    is such a composite type or not.  This is because
7408    stor-layout.c:compute_record_mode may have already changed the MODE
7409    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7410    structure with only one field may have its MODE set to the mode of the
7411    field.  Also an integer mode whose size matches the size of the
7412    RECORD_TYPE type may be used to substitute the original mode
7413    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7414    solely relied on.  */
7415
7416 static bool
7417 aarch64_composite_type_p (const_tree type,
7418                           machine_mode mode)
7419 {
7420   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7421     return true;
7422
7423   if (mode == BLKmode
7424       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7425       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7426     return true;
7427
7428   return false;
7429 }
7430
7431 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7432    type as described in AAPCS64 \S 4.1.2.
7433
7434    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7435
7436 static bool
7437 aarch64_short_vector_p (const_tree type,
7438                         machine_mode mode)
7439 {
7440   HOST_WIDE_INT size = -1;
7441
7442   if (type && TREE_CODE (type) == VECTOR_TYPE)
7443     size = int_size_in_bytes (type);
7444   else if (!aarch64_composite_type_p (type, mode)
7445            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7446                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7447     size = GET_MODE_SIZE (mode);
7448
7449   return (size == 8 || size == 16) ? true : false;
7450 }
7451
7452 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7453    shall be passed or returned in simd/fp register(s) (providing these
7454    parameter passing registers are available).
7455
7456    Upon successful return, *COUNT returns the number of needed registers,
7457    *BASE_MODE returns the mode of the individual register and when IS_HAF
7458    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7459    floating-point aggregate or a homogeneous short-vector aggregate.  */
7460
7461 static bool
7462 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7463                                          const_tree type,
7464                                          machine_mode *base_mode,
7465                                          int *count,
7466                                          bool *is_ha)
7467 {
7468   machine_mode new_mode = VOIDmode;
7469   bool composite_p = aarch64_composite_type_p (type, mode);
7470
7471   if (is_ha != NULL) *is_ha = false;
7472
7473   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7474       || aarch64_short_vector_p (type, mode))
7475     {
7476       *count = 1;
7477       new_mode = mode;
7478     }
7479   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7480     {
7481       if (is_ha != NULL) *is_ha = true;
7482       *count = 2;
7483       new_mode = GET_MODE_INNER (mode);
7484     }
7485   else if (type && composite_p)
7486     {
7487       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7488
7489       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7490         {
7491           if (is_ha != NULL) *is_ha = true;
7492           *count = ag_count;
7493         }
7494       else
7495         return false;
7496     }
7497   else
7498     return false;
7499
7500   *base_mode = new_mode;
7501   return true;
7502 }
7503
7504 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7505
7506 static rtx
7507 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7508                           int incoming ATTRIBUTE_UNUSED)
7509 {
7510   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7511 }
7512
7513 /* Implements target hook vector_mode_supported_p.  */
7514 static bool
7515 aarch64_vector_mode_supported_p (machine_mode mode)
7516 {
7517   if (TARGET_SIMD
7518       && (mode == V4SImode  || mode == V8HImode
7519           || mode == V16QImode || mode == V2DImode
7520           || mode == V2SImode  || mode == V4HImode
7521           || mode == V8QImode || mode == V2SFmode
7522           || mode == V4SFmode || mode == V2DFmode
7523           || mode == V1DFmode))
7524     return true;
7525
7526   return false;
7527 }
7528
7529 /* Return appropriate SIMD container
7530    for MODE within a vector of WIDTH bits.  */
7531 static machine_mode
7532 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7533 {
7534   gcc_assert (width == 64 || width == 128);
7535   if (TARGET_SIMD)
7536     {
7537       if (width == 128)
7538         switch (mode)
7539           {
7540           case DFmode:
7541             return V2DFmode;
7542           case SFmode:
7543             return V4SFmode;
7544           case SImode:
7545             return V4SImode;
7546           case HImode:
7547             return V8HImode;
7548           case QImode:
7549             return V16QImode;
7550           case DImode:
7551             return V2DImode;
7552           default:
7553             break;
7554           }
7555       else
7556         switch (mode)
7557           {
7558           case SFmode:
7559             return V2SFmode;
7560           case SImode:
7561             return V2SImode;
7562           case HImode:
7563             return V4HImode;
7564           case QImode:
7565             return V8QImode;
7566           default:
7567             break;
7568           }
7569     }
7570   return word_mode;
7571 }
7572
7573 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
7574 static machine_mode
7575 aarch64_preferred_simd_mode (machine_mode mode)
7576 {
7577   return aarch64_simd_container_mode (mode, 128);
7578 }
7579
7580 /* Return the bitmask of possible vector sizes for the vectorizer
7581    to iterate over.  */
7582 static unsigned int
7583 aarch64_autovectorize_vector_sizes (void)
7584 {
7585   return (16 | 8);
7586 }
7587
7588 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7589    vector types in order to conform to the AAPCS64 (see "Procedure
7590    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
7591    qualify for emission with the mangled names defined in that document,
7592    a vector type must not only be of the correct mode but also be
7593    composed of AdvSIMD vector element types (e.g.
7594    _builtin_aarch64_simd_qi); these types are registered by
7595    aarch64_init_simd_builtins ().  In other words, vector types defined
7596    in other ways e.g. via vector_size attribute will get default
7597    mangled names.  */
7598 typedef struct
7599 {
7600   machine_mode mode;
7601   const char *element_type_name;
7602   const char *mangled_name;
7603 } aarch64_simd_mangle_map_entry;
7604
7605 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7606   /* 64-bit containerized types.  */
7607   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
7608   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
7609   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
7610   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
7611   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
7612   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
7613   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
7614   { DImode,    "__builtin_aarch64_simd_di",     "11__Int64x1_t" },
7615   { DImode,    "__builtin_aarch64_simd_udi",    "12__Uint64x1_t" },
7616   { V1DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x1_t" },
7617   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
7618   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7619   /* 128-bit containerized types.  */
7620   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
7621   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
7622   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
7623   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
7624   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
7625   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
7626   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
7627   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
7628   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
7629   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
7630   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
7631   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7632   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7633   { VOIDmode, NULL, NULL }
7634 };
7635
7636 /* Implement TARGET_MANGLE_TYPE.  */
7637
7638 static const char *
7639 aarch64_mangle_type (const_tree type)
7640 {
7641   /* The AArch64 ABI documents say that "__va_list" has to be
7642      managled as if it is in the "std" namespace.  */
7643   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7644     return "St9__va_list";
7645
7646   /* Check the mode of the vector type, and the name of the vector
7647      element type, against the table.  */
7648   if (TREE_CODE (type) == VECTOR_TYPE)
7649     {
7650       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7651
7652       while (pos->mode != VOIDmode)
7653         {
7654           tree elt_type = TREE_TYPE (type);
7655
7656           if (pos->mode == TYPE_MODE (type)
7657               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7658               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7659                           pos->element_type_name))
7660             return pos->mangled_name;
7661
7662           pos++;
7663         }
7664     }
7665
7666   /* Use the default mangling.  */
7667   return NULL;
7668 }
7669
7670
7671 /* Return true if the rtx_insn contains a MEM RTX somewhere
7672    in it.  */
7673
7674 static bool
7675 has_memory_op (rtx_insn *mem_insn)
7676 {
7677   subrtx_iterator::array_type array;
7678   FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7679     if (MEM_P (*iter))
7680       return true;
7681
7682   return false;
7683 }
7684
7685 /* Find the first rtx_insn before insn that will generate an assembly
7686    instruction.  */
7687
7688 static rtx_insn *
7689 aarch64_prev_real_insn (rtx_insn *insn)
7690 {
7691   if (!insn)
7692     return NULL;
7693
7694   do
7695     {
7696       insn = prev_real_insn (insn);
7697     }
7698   while (insn && recog_memoized (insn) < 0);
7699
7700   return insn;
7701 }
7702
7703 static bool
7704 is_madd_op (enum attr_type t1)
7705 {
7706   unsigned int i;
7707   /* A number of these may be AArch32 only.  */
7708   enum attr_type mlatypes[] = {
7709     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7710     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7711     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7712   };
7713
7714   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7715     {
7716       if (t1 == mlatypes[i])
7717         return true;
7718     }
7719
7720   return false;
7721 }
7722
7723 /* Check if there is a register dependency between a load and the insn
7724    for which we hold recog_data.  */
7725
7726 static bool
7727 dep_between_memop_and_curr (rtx memop)
7728 {
7729   rtx load_reg;
7730   int opno;
7731
7732   gcc_assert (GET_CODE (memop) == SET);
7733
7734   if (!REG_P (SET_DEST (memop)))
7735     return false;
7736
7737   load_reg = SET_DEST (memop);
7738   for (opno = 1; opno < recog_data.n_operands; opno++)
7739     {
7740       rtx operand = recog_data.operand[opno];
7741       if (REG_P (operand)
7742           && reg_overlap_mentioned_p (load_reg, operand))
7743         return true;
7744
7745     }
7746   return false;
7747 }
7748
7749
7750 /* When working around the Cortex-A53 erratum 835769,
7751    given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
7752    instruction and has a preceding memory instruction such that a NOP
7753    should be inserted between them.  */
7754
7755 bool
7756 aarch64_madd_needs_nop (rtx_insn* insn)
7757 {
7758   enum attr_type attr_type;
7759   rtx_insn *prev;
7760   rtx body;
7761
7762   if (!aarch64_fix_a53_err835769)
7763     return false;
7764
7765   if (recog_memoized (insn) < 0)
7766     return false;
7767
7768   attr_type = get_attr_type (insn);
7769   if (!is_madd_op (attr_type))
7770     return false;
7771
7772   prev = aarch64_prev_real_insn (insn);
7773   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7774      Restore recog state to INSN to avoid state corruption.  */
7775   extract_constrain_insn_cached (insn);
7776
7777   if (!prev || !has_memory_op (prev))
7778     return false;
7779
7780   body = single_set (prev);
7781
7782   /* If the previous insn is a memory op and there is no dependency between
7783      it and the DImode madd, emit a NOP between them.  If body is NULL then we
7784      have a complex memory operation, probably a load/store pair.
7785      Be conservative for now and emit a NOP.  */
7786   if (GET_MODE (recog_data.operand[0]) == DImode
7787       && (!body || !dep_between_memop_and_curr (body)))
7788     return true;
7789
7790   return false;
7791
7792 }
7793
7794
7795 /* Implement FINAL_PRESCAN_INSN.  */
7796
7797 void
7798 aarch64_final_prescan_insn (rtx_insn *insn)
7799 {
7800   if (aarch64_madd_needs_nop (insn))
7801     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7802 }
7803
7804
7805 /* Return the equivalent letter for size.  */
7806 static char
7807 sizetochar (int size)
7808 {
7809   switch (size)
7810     {
7811     case 64: return 'd';
7812     case 32: return 's';
7813     case 16: return 'h';
7814     case 8 : return 'b';
7815     default: gcc_unreachable ();
7816     }
7817 }
7818
7819 /* Return true iff x is a uniform vector of floating-point
7820    constants, and the constant can be represented in
7821    quarter-precision form.  Note, as aarch64_float_const_representable
7822    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7823 static bool
7824 aarch64_vect_float_const_representable_p (rtx x)
7825 {
7826   int i = 0;
7827   REAL_VALUE_TYPE r0, ri;
7828   rtx x0, xi;
7829
7830   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7831     return false;
7832
7833   x0 = CONST_VECTOR_ELT (x, 0);
7834   if (!CONST_DOUBLE_P (x0))
7835     return false;
7836
7837   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7838
7839   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7840     {
7841       xi = CONST_VECTOR_ELT (x, i);
7842       if (!CONST_DOUBLE_P (xi))
7843         return false;
7844
7845       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7846       if (!REAL_VALUES_EQUAL (r0, ri))
7847         return false;
7848     }
7849
7850   return aarch64_float_const_representable_p (x0);
7851 }
7852
7853 /* Return true for valid and false for invalid.  */
7854 bool
7855 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
7856                               struct simd_immediate_info *info)
7857 {
7858 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7859   matches = 1;                                          \
7860   for (i = 0; i < idx; i += (STRIDE))                   \
7861     if (!(TEST))                                        \
7862       matches = 0;                                      \
7863   if (matches)                                          \
7864     {                                                   \
7865       immtype = (CLASS);                                \
7866       elsize = (ELSIZE);                                \
7867       eshift = (SHIFT);                                 \
7868       emvn = (NEG);                                     \
7869       break;                                            \
7870     }
7871
7872   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7873   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7874   unsigned char bytes[16];
7875   int immtype = -1, matches;
7876   unsigned int invmask = inverse ? 0xff : 0;
7877   int eshift, emvn;
7878
7879   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7880     {
7881       if (! (aarch64_simd_imm_zero_p (op, mode)
7882              || aarch64_vect_float_const_representable_p (op)))
7883         return false;
7884
7885       if (info)
7886         {
7887           info->value = CONST_VECTOR_ELT (op, 0);
7888           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7889           info->mvn = false;
7890           info->shift = 0;
7891         }
7892
7893       return true;
7894     }
7895
7896   /* Splat vector constant out into a byte vector.  */
7897   for (i = 0; i < n_elts; i++)
7898     {
7899       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7900          it must be laid out in the vector register in reverse order.  */
7901       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7902       unsigned HOST_WIDE_INT elpart;
7903       unsigned int part, parts;
7904
7905       if (CONST_INT_P (el))
7906         {
7907           elpart = INTVAL (el);
7908           parts = 1;
7909         }
7910       else if (GET_CODE (el) == CONST_DOUBLE)
7911         {
7912           elpart = CONST_DOUBLE_LOW (el);
7913           parts = 2;
7914         }
7915       else
7916         gcc_unreachable ();
7917
7918       for (part = 0; part < parts; part++)
7919         {
7920           unsigned int byte;
7921           for (byte = 0; byte < innersize; byte++)
7922             {
7923               bytes[idx++] = (elpart & 0xff) ^ invmask;
7924               elpart >>= BITS_PER_UNIT;
7925             }
7926           if (GET_CODE (el) == CONST_DOUBLE)
7927             elpart = CONST_DOUBLE_HIGH (el);
7928         }
7929     }
7930
7931   /* Sanity check.  */
7932   gcc_assert (idx == GET_MODE_SIZE (mode));
7933
7934   do
7935     {
7936       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7937              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7938
7939       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7940              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7941
7942       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7943              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7944
7945       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7946              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7947
7948       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7949
7950       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7951
7952       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7953              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7954
7955       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7956              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7957
7958       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7959              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7960
7961       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7962              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7963
7964       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7965
7966       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7967
7968       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7969              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7970
7971       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7972              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7973
7974       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7975              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7976
7977       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7978              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7979
7980       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7981
7982       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7983              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7984     }
7985   while (0);
7986
7987   if (immtype == -1)
7988     return false;
7989
7990   if (info)
7991     {
7992       info->element_width = elsize;
7993       info->mvn = emvn != 0;
7994       info->shift = eshift;
7995
7996       unsigned HOST_WIDE_INT imm = 0;
7997
7998       if (immtype >= 12 && immtype <= 15)
7999         info->msl = true;
8000
8001       /* Un-invert bytes of recognized vector, if necessary.  */
8002       if (invmask != 0)
8003         for (i = 0; i < idx; i++)
8004           bytes[i] ^= invmask;
8005
8006       if (immtype == 17)
8007         {
8008           /* FIXME: Broken on 32-bit H_W_I hosts.  */
8009           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8010
8011           for (i = 0; i < 8; i++)
8012             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8013               << (i * BITS_PER_UNIT);
8014
8015
8016           info->value = GEN_INT (imm);
8017         }
8018       else
8019         {
8020           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8021             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8022
8023           /* Construct 'abcdefgh' because the assembler cannot handle
8024              generic constants.  */
8025           if (info->mvn)
8026             imm = ~imm;
8027           imm = (imm >> info->shift) & 0xff;
8028           info->value = GEN_INT (imm);
8029         }
8030     }
8031
8032   return true;
8033 #undef CHECK
8034 }
8035
8036 /* Check of immediate shift constants are within range.  */
8037 bool
8038 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8039 {
8040   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8041   if (left)
8042     return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8043   else
8044     return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8045 }
8046
8047 /* Return true if X is a uniform vector where all elements
8048    are either the floating-point constant 0.0 or the
8049    integer constant 0.  */
8050 bool
8051 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8052 {
8053   return x == CONST0_RTX (mode);
8054 }
8055
8056 bool
8057 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8058 {
8059   HOST_WIDE_INT imm = INTVAL (x);
8060   int i;
8061
8062   for (i = 0; i < 8; i++)
8063     {
8064       unsigned int byte = imm & 0xff;
8065       if (byte != 0xff && byte != 0)
8066        return false;
8067       imm >>= 8;
8068     }
8069
8070   return true;
8071 }
8072
8073 bool
8074 aarch64_mov_operand_p (rtx x,
8075                        enum aarch64_symbol_context context,
8076                        machine_mode mode)
8077 {
8078   if (GET_CODE (x) == HIGH
8079       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8080     return true;
8081
8082   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
8083     return true;
8084
8085   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8086     return true;
8087
8088   return aarch64_classify_symbolic_expression (x, context)
8089     == SYMBOL_TINY_ABSOLUTE;
8090 }
8091
8092 /* Return a const_int vector of VAL.  */
8093 rtx
8094 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8095 {
8096   int nunits = GET_MODE_NUNITS (mode);
8097   rtvec v = rtvec_alloc (nunits);
8098   int i;
8099
8100   for (i=0; i < nunits; i++)
8101     RTVEC_ELT (v, i) = GEN_INT (val);
8102
8103   return gen_rtx_CONST_VECTOR (mode, v);
8104 }
8105
8106 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
8107
8108 bool
8109 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8110 {
8111   machine_mode vmode;
8112
8113   gcc_assert (!VECTOR_MODE_P (mode));
8114   vmode = aarch64_preferred_simd_mode (mode);
8115   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8116   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8117 }
8118
8119 /* Construct and return a PARALLEL RTX vector with elements numbering the
8120    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8121    the vector - from the perspective of the architecture.  This does not
8122    line up with GCC's perspective on lane numbers, so we end up with
8123    different masks depending on our target endian-ness.  The diagram
8124    below may help.  We must draw the distinction when building masks
8125    which select one half of the vector.  An instruction selecting
8126    architectural low-lanes for a big-endian target, must be described using
8127    a mask selecting GCC high-lanes.
8128
8129                  Big-Endian             Little-Endian
8130
8131 GCC             0   1   2   3           3   2   1   0
8132               | x | x | x | x |       | x | x | x | x |
8133 Architecture    3   2   1   0           3   2   1   0
8134
8135 Low Mask:         { 2, 3 }                { 0, 1 }
8136 High Mask:        { 0, 1 }                { 2, 3 }
8137 */
8138
8139 rtx
8140 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8141 {
8142   int nunits = GET_MODE_NUNITS (mode);
8143   rtvec v = rtvec_alloc (nunits / 2);
8144   int high_base = nunits / 2;
8145   int low_base = 0;
8146   int base;
8147   rtx t1;
8148   int i;
8149
8150   if (BYTES_BIG_ENDIAN)
8151     base = high ? low_base : high_base;
8152   else
8153     base = high ? high_base : low_base;
8154
8155   for (i = 0; i < nunits / 2; i++)
8156     RTVEC_ELT (v, i) = GEN_INT (base + i);
8157
8158   t1 = gen_rtx_PARALLEL (mode, v);
8159   return t1;
8160 }
8161
8162 /* Check OP for validity as a PARALLEL RTX vector with elements
8163    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8164    from the perspective of the architecture.  See the diagram above
8165    aarch64_simd_vect_par_cnst_half for more details.  */
8166
8167 bool
8168 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8169                                        bool high)
8170 {
8171   rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8172   HOST_WIDE_INT count_op = XVECLEN (op, 0);
8173   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8174   int i = 0;
8175
8176   if (!VECTOR_MODE_P (mode))
8177     return false;
8178
8179   if (count_op != count_ideal)
8180     return false;
8181
8182   for (i = 0; i < count_ideal; i++)
8183     {
8184       rtx elt_op = XVECEXP (op, 0, i);
8185       rtx elt_ideal = XVECEXP (ideal, 0, i);
8186
8187       if (!CONST_INT_P (elt_op)
8188           || INTVAL (elt_ideal) != INTVAL (elt_op))
8189         return false;
8190     }
8191   return true;
8192 }
8193
8194 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
8195    HIGH (exclusive).  */
8196 void
8197 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8198 {
8199   HOST_WIDE_INT lane;
8200   gcc_assert (CONST_INT_P (operand));
8201   lane = INTVAL (operand);
8202
8203   if (lane < low || lane >= high)
8204     error ("lane out of range");
8205 }
8206
8207 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8208    registers).  */
8209 void
8210 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8211                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8212                             rtx op1)
8213 {
8214   rtx mem = gen_rtx_MEM (mode, destaddr);
8215   rtx tmp1 = gen_reg_rtx (mode);
8216   rtx tmp2 = gen_reg_rtx (mode);
8217
8218   emit_insn (intfn (tmp1, op1, tmp2));
8219
8220   emit_move_insn (mem, tmp1);
8221   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8222   emit_move_insn (mem, tmp2);
8223 }
8224
8225 /* Return TRUE if OP is a valid vector addressing mode.  */
8226 bool
8227 aarch64_simd_mem_operand_p (rtx op)
8228 {
8229   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8230                         || REG_P (XEXP (op, 0)));
8231 }
8232
8233 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8234    not to early-clobber SRC registers in the process.
8235
8236    We assume that the operands described by SRC and DEST represent a
8237    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
8238    number of components into which the copy has been decomposed.  */
8239 void
8240 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8241                                 rtx *src, unsigned int count)
8242 {
8243   unsigned int i;
8244
8245   if (!reg_overlap_mentioned_p (operands[0], operands[1])
8246       || REGNO (operands[0]) < REGNO (operands[1]))
8247     {
8248       for (i = 0; i < count; i++)
8249         {
8250           operands[2 * i] = dest[i];
8251           operands[2 * i + 1] = src[i];
8252         }
8253     }
8254   else
8255     {
8256       for (i = 0; i < count; i++)
8257         {
8258           operands[2 * i] = dest[count - i - 1];
8259           operands[2 * i + 1] = src[count - i - 1];
8260         }
8261     }
8262 }
8263
8264 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8265    one of VSTRUCT modes: OI, CI or XI.  */
8266 int
8267 aarch64_simd_attr_length_move (rtx_insn *insn)
8268 {
8269   machine_mode mode;
8270
8271   extract_insn_cached (insn);
8272
8273   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8274     {
8275       mode = GET_MODE (recog_data.operand[0]);
8276       switch (mode)
8277         {
8278         case OImode:
8279           return 8;
8280         case CImode:
8281           return 12;
8282         case XImode:
8283           return 16;
8284         default:
8285           gcc_unreachable ();
8286         }
8287     }
8288   return 4;
8289 }
8290
8291 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
8292    alignment of a vector to 128 bits.  */
8293 static HOST_WIDE_INT
8294 aarch64_simd_vector_alignment (const_tree type)
8295 {
8296   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8297   return MIN (align, 128);
8298 }
8299
8300 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
8301 static bool
8302 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8303 {
8304   if (is_packed)
8305     return false;
8306
8307   /* We guarantee alignment for vectors up to 128-bits.  */
8308   if (tree_int_cst_compare (TYPE_SIZE (type),
8309                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8310     return false;
8311
8312   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
8313   return true;
8314 }
8315
8316 /* If VALS is a vector constant that can be loaded into a register
8317    using DUP, generate instructions to do so and return an RTX to
8318    assign to the register.  Otherwise return NULL_RTX.  */
8319 static rtx
8320 aarch64_simd_dup_constant (rtx vals)
8321 {
8322   machine_mode mode = GET_MODE (vals);
8323   machine_mode inner_mode = GET_MODE_INNER (mode);
8324   int n_elts = GET_MODE_NUNITS (mode);
8325   bool all_same = true;
8326   rtx x;
8327   int i;
8328
8329   if (GET_CODE (vals) != CONST_VECTOR)
8330     return NULL_RTX;
8331
8332   for (i = 1; i < n_elts; ++i)
8333     {
8334       x = CONST_VECTOR_ELT (vals, i);
8335       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8336         all_same = false;
8337     }
8338
8339   if (!all_same)
8340     return NULL_RTX;
8341
8342   /* We can load this constant by using DUP and a constant in a
8343      single ARM register.  This will be cheaper than a vector
8344      load.  */
8345   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8346   return gen_rtx_VEC_DUPLICATE (mode, x);
8347 }
8348
8349
8350 /* Generate code to load VALS, which is a PARALLEL containing only
8351    constants (for vec_init) or CONST_VECTOR, efficiently into a
8352    register.  Returns an RTX to copy into the register, or NULL_RTX
8353    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
8354 static rtx
8355 aarch64_simd_make_constant (rtx vals)
8356 {
8357   machine_mode mode = GET_MODE (vals);
8358   rtx const_dup;
8359   rtx const_vec = NULL_RTX;
8360   int n_elts = GET_MODE_NUNITS (mode);
8361   int n_const = 0;
8362   int i;
8363
8364   if (GET_CODE (vals) == CONST_VECTOR)
8365     const_vec = vals;
8366   else if (GET_CODE (vals) == PARALLEL)
8367     {
8368       /* A CONST_VECTOR must contain only CONST_INTs and
8369          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8370          Only store valid constants in a CONST_VECTOR.  */
8371       for (i = 0; i < n_elts; ++i)
8372         {
8373           rtx x = XVECEXP (vals, 0, i);
8374           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8375             n_const++;
8376         }
8377       if (n_const == n_elts)
8378         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8379     }
8380   else
8381     gcc_unreachable ();
8382
8383   if (const_vec != NULL_RTX
8384       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8385     /* Load using MOVI/MVNI.  */
8386     return const_vec;
8387   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8388     /* Loaded using DUP.  */
8389     return const_dup;
8390   else if (const_vec != NULL_RTX)
8391     /* Load from constant pool. We can not take advantage of single-cycle
8392        LD1 because we need a PC-relative addressing mode.  */
8393     return const_vec;
8394   else
8395     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8396        We can not construct an initializer.  */
8397     return NULL_RTX;
8398 }
8399
8400 void
8401 aarch64_expand_vector_init (rtx target, rtx vals)
8402 {
8403   machine_mode mode = GET_MODE (target);
8404   machine_mode inner_mode = GET_MODE_INNER (mode);
8405   int n_elts = GET_MODE_NUNITS (mode);
8406   int n_var = 0, one_var = -1;
8407   bool all_same = true;
8408   rtx x, mem;
8409   int i;
8410
8411   x = XVECEXP (vals, 0, 0);
8412   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8413     n_var = 1, one_var = 0;
8414
8415   for (i = 1; i < n_elts; ++i)
8416     {
8417       x = XVECEXP (vals, 0, i);
8418       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8419         ++n_var, one_var = i;
8420
8421       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8422         all_same = false;
8423     }
8424
8425   if (n_var == 0)
8426     {
8427       rtx constant = aarch64_simd_make_constant (vals);
8428       if (constant != NULL_RTX)
8429         {
8430           emit_move_insn (target, constant);
8431           return;
8432         }
8433     }
8434
8435   /* Splat a single non-constant element if we can.  */
8436   if (all_same)
8437     {
8438       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8439       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8440       return;
8441     }
8442
8443   /* One field is non-constant.  Load constant then overwrite varying
8444      field.  This is more efficient than using the stack.  */
8445   if (n_var == 1)
8446     {
8447       rtx copy = copy_rtx (vals);
8448       rtx index = GEN_INT (one_var);
8449       enum insn_code icode;
8450
8451       /* Load constant part of vector, substitute neighboring value for
8452          varying element.  */
8453       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8454       aarch64_expand_vector_init (target, copy);
8455
8456       /* Insert variable.  */
8457       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8458       icode = optab_handler (vec_set_optab, mode);
8459       gcc_assert (icode != CODE_FOR_nothing);
8460       emit_insn (GEN_FCN (icode) (target, x, index));
8461       return;
8462     }
8463
8464   /* Construct the vector in memory one field at a time
8465      and load the whole vector.  */
8466   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8467   for (i = 0; i < n_elts; i++)
8468     emit_move_insn (adjust_address_nv (mem, inner_mode,
8469                                     i * GET_MODE_SIZE (inner_mode)),
8470                     XVECEXP (vals, 0, i));
8471   emit_move_insn (target, mem);
8472
8473 }
8474
8475 static unsigned HOST_WIDE_INT
8476 aarch64_shift_truncation_mask (machine_mode mode)
8477 {
8478   return
8479     (aarch64_vector_mode_supported_p (mode)
8480      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8481 }
8482
8483 #ifndef TLS_SECTION_ASM_FLAG
8484 #define TLS_SECTION_ASM_FLAG 'T'
8485 #endif
8486
8487 void
8488 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8489                                tree decl ATTRIBUTE_UNUSED)
8490 {
8491   char flagchars[10], *f = flagchars;
8492
8493   /* If we have already declared this section, we can use an
8494      abbreviated form to switch back to it -- unless this section is
8495      part of a COMDAT groups, in which case GAS requires the full
8496      declaration every time.  */
8497   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8498       && (flags & SECTION_DECLARED))
8499     {
8500       fprintf (asm_out_file, "\t.section\t%s\n", name);
8501       return;
8502     }
8503
8504   if (!(flags & SECTION_DEBUG))
8505     *f++ = 'a';
8506   if (flags & SECTION_WRITE)
8507     *f++ = 'w';
8508   if (flags & SECTION_CODE)
8509     *f++ = 'x';
8510   if (flags & SECTION_SMALL)
8511     *f++ = 's';
8512   if (flags & SECTION_MERGE)
8513     *f++ = 'M';
8514   if (flags & SECTION_STRINGS)
8515     *f++ = 'S';
8516   if (flags & SECTION_TLS)
8517     *f++ = TLS_SECTION_ASM_FLAG;
8518   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8519     *f++ = 'G';
8520   *f = '\0';
8521
8522   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8523
8524   if (!(flags & SECTION_NOTYPE))
8525     {
8526       const char *type;
8527       const char *format;
8528
8529       if (flags & SECTION_BSS)
8530         type = "nobits";
8531       else
8532         type = "progbits";
8533
8534 #ifdef TYPE_OPERAND_FMT
8535       format = "," TYPE_OPERAND_FMT;
8536 #else
8537       format = ",@%s";
8538 #endif
8539
8540       fprintf (asm_out_file, format, type);
8541
8542       if (flags & SECTION_ENTSIZE)
8543         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8544       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8545         {
8546           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8547             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8548           else
8549             fprintf (asm_out_file, ",%s,comdat",
8550                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8551         }
8552     }
8553
8554   putc ('\n', asm_out_file);
8555 }
8556
8557 /* Select a format to encode pointers in exception handling data.  */
8558 int
8559 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8560 {
8561    int type;
8562    switch (aarch64_cmodel)
8563      {
8564      case AARCH64_CMODEL_TINY:
8565      case AARCH64_CMODEL_TINY_PIC:
8566      case AARCH64_CMODEL_SMALL:
8567      case AARCH64_CMODEL_SMALL_PIC:
8568        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8569           for everything.  */
8570        type = DW_EH_PE_sdata4;
8571        break;
8572      default:
8573        /* No assumptions here.  8-byte relocs required.  */
8574        type = DW_EH_PE_sdata8;
8575        break;
8576      }
8577    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8578 }
8579
8580 /* Emit load exclusive.  */
8581
8582 static void
8583 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8584                              rtx mem, rtx model_rtx)
8585 {
8586   rtx (*gen) (rtx, rtx, rtx);
8587
8588   switch (mode)
8589     {
8590     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8591     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8592     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8593     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8594     default:
8595       gcc_unreachable ();
8596     }
8597
8598   emit_insn (gen (rval, mem, model_rtx));
8599 }
8600
8601 /* Emit store exclusive.  */
8602
8603 static void
8604 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
8605                               rtx rval, rtx mem, rtx model_rtx)
8606 {
8607   rtx (*gen) (rtx, rtx, rtx, rtx);
8608
8609   switch (mode)
8610     {
8611     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8612     case HImode: gen = gen_aarch64_store_exclusivehi; break;
8613     case SImode: gen = gen_aarch64_store_exclusivesi; break;
8614     case DImode: gen = gen_aarch64_store_exclusivedi; break;
8615     default:
8616       gcc_unreachable ();
8617     }
8618
8619   emit_insn (gen (bval, rval, mem, model_rtx));
8620 }
8621
8622 /* Mark the previous jump instruction as unlikely.  */
8623
8624 static void
8625 aarch64_emit_unlikely_jump (rtx insn)
8626 {
8627   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8628
8629   insn = emit_jump_insn (insn);
8630   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8631 }
8632
8633 /* Expand a compare and swap pattern.  */
8634
8635 void
8636 aarch64_expand_compare_and_swap (rtx operands[])
8637 {
8638   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8639   machine_mode mode, cmp_mode;
8640   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8641
8642   bval = operands[0];
8643   rval = operands[1];
8644   mem = operands[2];
8645   oldval = operands[3];
8646   newval = operands[4];
8647   is_weak = operands[5];
8648   mod_s = operands[6];
8649   mod_f = operands[7];
8650   mode = GET_MODE (mem);
8651   cmp_mode = mode;
8652
8653   /* Normally the succ memory model must be stronger than fail, but in the
8654      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8655      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
8656
8657   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8658       && INTVAL (mod_s) == MEMMODEL_RELEASE)
8659     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8660
8661   switch (mode)
8662     {
8663     case QImode:
8664     case HImode:
8665       /* For short modes, we're going to perform the comparison in SImode,
8666          so do the zero-extension now.  */
8667       cmp_mode = SImode;
8668       rval = gen_reg_rtx (SImode);
8669       oldval = convert_modes (SImode, mode, oldval, true);
8670       /* Fall through.  */
8671
8672     case SImode:
8673     case DImode:
8674       /* Force the value into a register if needed.  */
8675       if (!aarch64_plus_operand (oldval, mode))
8676         oldval = force_reg (cmp_mode, oldval);
8677       break;
8678
8679     default:
8680       gcc_unreachable ();
8681     }
8682
8683   switch (mode)
8684     {
8685     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8686     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8687     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8688     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8689     default:
8690       gcc_unreachable ();
8691     }
8692
8693   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8694
8695   if (mode == QImode || mode == HImode)
8696     emit_move_insn (operands[1], gen_lowpart (mode, rval));
8697
8698   x = gen_rtx_REG (CCmode, CC_REGNUM);
8699   x = gen_rtx_EQ (SImode, x, const0_rtx);
8700   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8701 }
8702
8703 /* Split a compare and swap pattern.  */
8704
8705 void
8706 aarch64_split_compare_and_swap (rtx operands[])
8707 {
8708   rtx rval, mem, oldval, newval, scratch;
8709   machine_mode mode;
8710   bool is_weak;
8711   rtx_code_label *label1, *label2;
8712   rtx x, cond;
8713
8714   rval = operands[0];
8715   mem = operands[1];
8716   oldval = operands[2];
8717   newval = operands[3];
8718   is_weak = (operands[4] != const0_rtx);
8719   scratch = operands[7];
8720   mode = GET_MODE (mem);
8721
8722   label1 = NULL;
8723   if (!is_weak)
8724     {
8725       label1 = gen_label_rtx ();
8726       emit_label (label1);
8727     }
8728   label2 = gen_label_rtx ();
8729
8730   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8731
8732   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8733   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8734   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8735                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8736   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8737
8738   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8739
8740   if (!is_weak)
8741     {
8742       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8743       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8744                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8745       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8746     }
8747   else
8748     {
8749       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8750       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8751       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8752     }
8753
8754   emit_label (label2);
8755 }
8756
8757 /* Split an atomic operation.  */
8758
8759 void
8760 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8761                      rtx value, rtx model_rtx, rtx cond)
8762 {
8763   machine_mode mode = GET_MODE (mem);
8764   machine_mode wmode = (mode == DImode ? DImode : SImode);
8765   rtx_code_label *label;
8766   rtx x;
8767
8768   label = gen_label_rtx ();
8769   emit_label (label);
8770
8771   if (new_out)
8772     new_out = gen_lowpart (wmode, new_out);
8773   if (old_out)
8774     old_out = gen_lowpart (wmode, old_out);
8775   else
8776     old_out = new_out;
8777   value = simplify_gen_subreg (wmode, value, mode, 0);
8778
8779   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8780
8781   switch (code)
8782     {
8783     case SET:
8784       new_out = value;
8785       break;
8786
8787     case NOT:
8788       x = gen_rtx_AND (wmode, old_out, value);
8789       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8790       x = gen_rtx_NOT (wmode, new_out);
8791       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8792       break;
8793
8794     case MINUS:
8795       if (CONST_INT_P (value))
8796         {
8797           value = GEN_INT (-INTVAL (value));
8798           code = PLUS;
8799         }
8800       /* Fall through.  */
8801
8802     default:
8803       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8804       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8805       break;
8806     }
8807
8808   aarch64_emit_store_exclusive (mode, cond, mem,
8809                                 gen_lowpart (mode, new_out), model_rtx);
8810
8811   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8812   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8813                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8814   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8815 }
8816
8817 static void
8818 aarch64_print_extension (void)
8819 {
8820   const struct aarch64_option_extension *opt = NULL;
8821
8822   for (opt = all_extensions; opt->name != NULL; opt++)
8823     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8824       asm_fprintf (asm_out_file, "+%s", opt->name);
8825
8826   asm_fprintf (asm_out_file, "\n");
8827 }
8828
8829 static void
8830 aarch64_start_file (void)
8831 {
8832   if (selected_arch)
8833     {
8834       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8835       aarch64_print_extension ();
8836     }
8837   else if (selected_cpu)
8838     {
8839       const char *truncated_name
8840             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8841       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8842       aarch64_print_extension ();
8843     }
8844   default_file_start();
8845 }
8846
8847 /* Target hook for c_mode_for_suffix.  */
8848 static machine_mode
8849 aarch64_c_mode_for_suffix (char suffix)
8850 {
8851   if (suffix == 'q')
8852     return TFmode;
8853
8854   return VOIDmode;
8855 }
8856
8857 /* We can only represent floating point constants which will fit in
8858    "quarter-precision" values.  These values are characterised by
8859    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8860    by:
8861
8862    (-1)^s * (n/16) * 2^r
8863
8864    Where:
8865      's' is the sign bit.
8866      'n' is an integer in the range 16 <= n <= 31.
8867      'r' is an integer in the range -3 <= r <= 4.  */
8868
8869 /* Return true iff X can be represented by a quarter-precision
8870    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8871 bool
8872 aarch64_float_const_representable_p (rtx x)
8873 {
8874   /* This represents our current view of how many bits
8875      make up the mantissa.  */
8876   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8877   int exponent;
8878   unsigned HOST_WIDE_INT mantissa, mask;
8879   REAL_VALUE_TYPE r, m;
8880   bool fail;
8881
8882   if (!CONST_DOUBLE_P (x))
8883     return false;
8884
8885   if (GET_MODE (x) == VOIDmode)
8886     return false;
8887
8888   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8889
8890   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8891      know if we have +zero until we analyse the mantissa, but we
8892      can reject the other invalid values.  */
8893   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8894       || REAL_VALUE_MINUS_ZERO (r))
8895     return false;
8896
8897   /* Extract exponent.  */
8898   r = real_value_abs (&r);
8899   exponent = REAL_EXP (&r);
8900
8901   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8902      highest (sign) bit, with a fixed binary point at bit point_pos.
8903      m1 holds the low part of the mantissa, m2 the high part.
8904      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8905      bits for the mantissa, this can fail (low bits will be lost).  */
8906   real_ldexp (&m, &r, point_pos - exponent);
8907   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8908
8909   /* If the low part of the mantissa has bits set we cannot represent
8910      the value.  */
8911   if (w.elt (0) != 0)
8912     return false;
8913   /* We have rejected the lower HOST_WIDE_INT, so update our
8914      understanding of how many bits lie in the mantissa and
8915      look only at the high HOST_WIDE_INT.  */
8916   mantissa = w.elt (1);
8917   point_pos -= HOST_BITS_PER_WIDE_INT;
8918
8919   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8920   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8921   if ((mantissa & mask) != 0)
8922     return false;
8923
8924   /* Having filtered unrepresentable values, we may now remove all
8925      but the highest 5 bits.  */
8926   mantissa >>= point_pos - 5;
8927
8928   /* We cannot represent the value 0.0, so reject it.  This is handled
8929      elsewhere.  */
8930   if (mantissa == 0)
8931     return false;
8932
8933   /* Then, as bit 4 is always set, we can mask it off, leaving
8934      the mantissa in the range [0, 15].  */
8935   mantissa &= ~(1 << 4);
8936   gcc_assert (mantissa <= 15);
8937
8938   /* GCC internally does not use IEEE754-like encoding (where normalized
8939      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8940      Our mantissa values are shifted 4 places to the left relative to
8941      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8942      by 5 places to correct for GCC's representation.  */
8943   exponent = 5 - exponent;
8944
8945   return (exponent >= 0 && exponent <= 7);
8946 }
8947
8948 char*
8949 aarch64_output_simd_mov_immediate (rtx const_vector,
8950                                    machine_mode mode,
8951                                    unsigned width)
8952 {
8953   bool is_valid;
8954   static char templ[40];
8955   const char *mnemonic;
8956   const char *shift_op;
8957   unsigned int lane_count = 0;
8958   char element_char;
8959
8960   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8961
8962   /* This will return true to show const_vector is legal for use as either
8963      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8964      also update INFO to show how the immediate should be generated.  */
8965   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8966   gcc_assert (is_valid);
8967
8968   element_char = sizetochar (info.element_width);
8969   lane_count = width / info.element_width;
8970
8971   mode = GET_MODE_INNER (mode);
8972   if (mode == SFmode || mode == DFmode)
8973     {
8974       gcc_assert (info.shift == 0 && ! info.mvn);
8975       if (aarch64_float_const_zero_rtx_p (info.value))
8976         info.value = GEN_INT (0);
8977       else
8978         {
8979 #define buf_size 20
8980           REAL_VALUE_TYPE r;
8981           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8982           char float_buf[buf_size] = {'\0'};
8983           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8984 #undef buf_size
8985
8986           if (lane_count == 1)
8987             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8988           else
8989             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8990                       lane_count, element_char, float_buf);
8991           return templ;
8992         }
8993     }
8994
8995   mnemonic = info.mvn ? "mvni" : "movi";
8996   shift_op = info.msl ? "msl" : "lsl";
8997
8998   if (lane_count == 1)
8999     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9000               mnemonic, UINTVAL (info.value));
9001   else if (info.shift)
9002     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9003               ", %s %d", mnemonic, lane_count, element_char,
9004               UINTVAL (info.value), shift_op, info.shift);
9005   else
9006     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9007               mnemonic, lane_count, element_char, UINTVAL (info.value));
9008   return templ;
9009 }
9010
9011 char*
9012 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9013                                           machine_mode mode)
9014 {
9015   machine_mode vmode;
9016
9017   gcc_assert (!VECTOR_MODE_P (mode));
9018   vmode = aarch64_simd_container_mode (mode, 64);
9019   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9020   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9021 }
9022
9023 /* Split operands into moves from op[1] + op[2] into op[0].  */
9024
9025 void
9026 aarch64_split_combinev16qi (rtx operands[3])
9027 {
9028   unsigned int dest = REGNO (operands[0]);
9029   unsigned int src1 = REGNO (operands[1]);
9030   unsigned int src2 = REGNO (operands[2]);
9031   machine_mode halfmode = GET_MODE (operands[1]);
9032   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9033   rtx destlo, desthi;
9034
9035   gcc_assert (halfmode == V16QImode);
9036
9037   if (src1 == dest && src2 == dest + halfregs)
9038     {
9039       /* No-op move.  Can't split to nothing; emit something.  */
9040       emit_note (NOTE_INSN_DELETED);
9041       return;
9042     }
9043
9044   /* Preserve register attributes for variable tracking.  */
9045   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9046   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9047                                GET_MODE_SIZE (halfmode));
9048
9049   /* Special case of reversed high/low parts.  */
9050   if (reg_overlap_mentioned_p (operands[2], destlo)
9051       && reg_overlap_mentioned_p (operands[1], desthi))
9052     {
9053       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9054       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9055       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9056     }
9057   else if (!reg_overlap_mentioned_p (operands[2], destlo))
9058     {
9059       /* Try to avoid unnecessary moves if part of the result
9060          is in the right place already.  */
9061       if (src1 != dest)
9062         emit_move_insn (destlo, operands[1]);
9063       if (src2 != dest + halfregs)
9064         emit_move_insn (desthi, operands[2]);
9065     }
9066   else
9067     {
9068       if (src2 != dest + halfregs)
9069         emit_move_insn (desthi, operands[2]);
9070       if (src1 != dest)
9071         emit_move_insn (destlo, operands[1]);
9072     }
9073 }
9074
9075 /* vec_perm support.  */
9076
9077 #define MAX_VECT_LEN 16
9078
9079 struct expand_vec_perm_d
9080 {
9081   rtx target, op0, op1;
9082   unsigned char perm[MAX_VECT_LEN];
9083   machine_mode vmode;
9084   unsigned char nelt;
9085   bool one_vector_p;
9086   bool testing_p;
9087 };
9088
9089 /* Generate a variable permutation.  */
9090
9091 static void
9092 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9093 {
9094   machine_mode vmode = GET_MODE (target);
9095   bool one_vector_p = rtx_equal_p (op0, op1);
9096
9097   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9098   gcc_checking_assert (GET_MODE (op0) == vmode);
9099   gcc_checking_assert (GET_MODE (op1) == vmode);
9100   gcc_checking_assert (GET_MODE (sel) == vmode);
9101   gcc_checking_assert (TARGET_SIMD);
9102
9103   if (one_vector_p)
9104     {
9105       if (vmode == V8QImode)
9106         {
9107           /* Expand the argument to a V16QI mode by duplicating it.  */
9108           rtx pair = gen_reg_rtx (V16QImode);
9109           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9110           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9111         }
9112       else
9113         {
9114           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9115         }
9116     }
9117   else
9118     {
9119       rtx pair;
9120
9121       if (vmode == V8QImode)
9122         {
9123           pair = gen_reg_rtx (V16QImode);
9124           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9125           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9126         }
9127       else
9128         {
9129           pair = gen_reg_rtx (OImode);
9130           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9131           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9132         }
9133     }
9134 }
9135
9136 void
9137 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9138 {
9139   machine_mode vmode = GET_MODE (target);
9140   unsigned int nelt = GET_MODE_NUNITS (vmode);
9141   bool one_vector_p = rtx_equal_p (op0, op1);
9142   rtx mask;
9143
9144   /* The TBL instruction does not use a modulo index, so we must take care
9145      of that ourselves.  */
9146   mask = aarch64_simd_gen_const_vector_dup (vmode,
9147       one_vector_p ? nelt - 1 : 2 * nelt - 1);
9148   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9149
9150   /* For big-endian, we also need to reverse the index within the vector
9151      (but not which vector).  */
9152   if (BYTES_BIG_ENDIAN)
9153     {
9154       /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
9155       if (!one_vector_p)
9156         mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9157       sel = expand_simple_binop (vmode, XOR, sel, mask,
9158                                  NULL, 0, OPTAB_LIB_WIDEN);
9159     }
9160   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9161 }
9162
9163 /* Recognize patterns suitable for the TRN instructions.  */
9164 static bool
9165 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9166 {
9167   unsigned int i, odd, mask, nelt = d->nelt;
9168   rtx out, in0, in1, x;
9169   rtx (*gen) (rtx, rtx, rtx);
9170   machine_mode vmode = d->vmode;
9171
9172   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9173     return false;
9174
9175   /* Note that these are little-endian tests.
9176      We correct for big-endian later.  */
9177   if (d->perm[0] == 0)
9178     odd = 0;
9179   else if (d->perm[0] == 1)
9180     odd = 1;
9181   else
9182     return false;
9183   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9184
9185   for (i = 0; i < nelt; i += 2)
9186     {
9187       if (d->perm[i] != i + odd)
9188         return false;
9189       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9190         return false;
9191     }
9192
9193   /* Success!  */
9194   if (d->testing_p)
9195     return true;
9196
9197   in0 = d->op0;
9198   in1 = d->op1;
9199   if (BYTES_BIG_ENDIAN)
9200     {
9201       x = in0, in0 = in1, in1 = x;
9202       odd = !odd;
9203     }
9204   out = d->target;
9205
9206   if (odd)
9207     {
9208       switch (vmode)
9209         {
9210         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9211         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9212         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9213         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9214         case V4SImode: gen = gen_aarch64_trn2v4si; break;
9215         case V2SImode: gen = gen_aarch64_trn2v2si; break;
9216         case V2DImode: gen = gen_aarch64_trn2v2di; break;
9217         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9218         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9219         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9220         default:
9221           return false;
9222         }
9223     }
9224   else
9225     {
9226       switch (vmode)
9227         {
9228         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9229         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9230         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9231         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9232         case V4SImode: gen = gen_aarch64_trn1v4si; break;
9233         case V2SImode: gen = gen_aarch64_trn1v2si; break;
9234         case V2DImode: gen = gen_aarch64_trn1v2di; break;
9235         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9236         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9237         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9238         default:
9239           return false;
9240         }
9241     }
9242
9243   emit_insn (gen (out, in0, in1));
9244   return true;
9245 }
9246
9247 /* Recognize patterns suitable for the UZP instructions.  */
9248 static bool
9249 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9250 {
9251   unsigned int i, odd, mask, nelt = d->nelt;
9252   rtx out, in0, in1, x;
9253   rtx (*gen) (rtx, rtx, rtx);
9254   machine_mode vmode = d->vmode;
9255
9256   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9257     return false;
9258
9259   /* Note that these are little-endian tests.
9260      We correct for big-endian later.  */
9261   if (d->perm[0] == 0)
9262     odd = 0;
9263   else if (d->perm[0] == 1)
9264     odd = 1;
9265   else
9266     return false;
9267   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9268
9269   for (i = 0; i < nelt; i++)
9270     {
9271       unsigned elt = (i * 2 + odd) & mask;
9272       if (d->perm[i] != elt)
9273         return false;
9274     }
9275
9276   /* Success!  */
9277   if (d->testing_p)
9278     return true;
9279
9280   in0 = d->op0;
9281   in1 = d->op1;
9282   if (BYTES_BIG_ENDIAN)
9283     {
9284       x = in0, in0 = in1, in1 = x;
9285       odd = !odd;
9286     }
9287   out = d->target;
9288
9289   if (odd)
9290     {
9291       switch (vmode)
9292         {
9293         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9294         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9295         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9296         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9297         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9298         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9299         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9300         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9301         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9302         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9303         default:
9304           return false;
9305         }
9306     }
9307   else
9308     {
9309       switch (vmode)
9310         {
9311         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9312         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9313         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9314         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9315         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9316         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9317         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9318         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9319         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9320         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9321         default:
9322           return false;
9323         }
9324     }
9325
9326   emit_insn (gen (out, in0, in1));
9327   return true;
9328 }
9329
9330 /* Recognize patterns suitable for the ZIP instructions.  */
9331 static bool
9332 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9333 {
9334   unsigned int i, high, mask, nelt = d->nelt;
9335   rtx out, in0, in1, x;
9336   rtx (*gen) (rtx, rtx, rtx);
9337   machine_mode vmode = d->vmode;
9338
9339   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9340     return false;
9341
9342   /* Note that these are little-endian tests.
9343      We correct for big-endian later.  */
9344   high = nelt / 2;
9345   if (d->perm[0] == high)
9346     /* Do Nothing.  */
9347     ;
9348   else if (d->perm[0] == 0)
9349     high = 0;
9350   else
9351     return false;
9352   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9353
9354   for (i = 0; i < nelt / 2; i++)
9355     {
9356       unsigned elt = (i + high) & mask;
9357       if (d->perm[i * 2] != elt)
9358         return false;
9359       elt = (elt + nelt) & mask;
9360       if (d->perm[i * 2 + 1] != elt)
9361         return false;
9362     }
9363
9364   /* Success!  */
9365   if (d->testing_p)
9366     return true;
9367
9368   in0 = d->op0;
9369   in1 = d->op1;
9370   if (BYTES_BIG_ENDIAN)
9371     {
9372       x = in0, in0 = in1, in1 = x;
9373       high = !high;
9374     }
9375   out = d->target;
9376
9377   if (high)
9378     {
9379       switch (vmode)
9380         {
9381         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9382         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9383         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9384         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9385         case V4SImode: gen = gen_aarch64_zip2v4si; break;
9386         case V2SImode: gen = gen_aarch64_zip2v2si; break;
9387         case V2DImode: gen = gen_aarch64_zip2v2di; break;
9388         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9389         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9390         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9391         default:
9392           return false;
9393         }
9394     }
9395   else
9396     {
9397       switch (vmode)
9398         {
9399         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9400         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9401         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9402         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9403         case V4SImode: gen = gen_aarch64_zip1v4si; break;
9404         case V2SImode: gen = gen_aarch64_zip1v2si; break;
9405         case V2DImode: gen = gen_aarch64_zip1v2di; break;
9406         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9407         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9408         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9409         default:
9410           return false;
9411         }
9412     }
9413
9414   emit_insn (gen (out, in0, in1));
9415   return true;
9416 }
9417
9418 /* Recognize patterns for the EXT insn.  */
9419
9420 static bool
9421 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9422 {
9423   unsigned int i, nelt = d->nelt;
9424   rtx (*gen) (rtx, rtx, rtx, rtx);
9425   rtx offset;
9426
9427   unsigned int location = d->perm[0]; /* Always < nelt.  */
9428
9429   /* Check if the extracted indices are increasing by one.  */
9430   for (i = 1; i < nelt; i++)
9431     {
9432       unsigned int required = location + i;
9433       if (d->one_vector_p)
9434         {
9435           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9436           required &= (nelt - 1);
9437         }
9438       if (d->perm[i] != required)
9439         return false;
9440     }
9441
9442   switch (d->vmode)
9443     {
9444     case V16QImode: gen = gen_aarch64_extv16qi; break;
9445     case V8QImode: gen = gen_aarch64_extv8qi; break;
9446     case V4HImode: gen = gen_aarch64_extv4hi; break;
9447     case V8HImode: gen = gen_aarch64_extv8hi; break;
9448     case V2SImode: gen = gen_aarch64_extv2si; break;
9449     case V4SImode: gen = gen_aarch64_extv4si; break;
9450     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9451     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9452     case V2DImode: gen = gen_aarch64_extv2di; break;
9453     case V2DFmode: gen = gen_aarch64_extv2df; break;
9454     default:
9455       return false;
9456     }
9457
9458   /* Success! */
9459   if (d->testing_p)
9460     return true;
9461
9462   /* The case where (location == 0) is a no-op for both big- and little-endian,
9463      and is removed by the mid-end at optimization levels -O1 and higher.  */
9464
9465   if (BYTES_BIG_ENDIAN && (location != 0))
9466     {
9467       /* After setup, we want the high elements of the first vector (stored
9468          at the LSB end of the register), and the low elements of the second
9469          vector (stored at the MSB end of the register). So swap.  */
9470       rtx temp = d->op0;
9471       d->op0 = d->op1;
9472       d->op1 = temp;
9473       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9474       location = nelt - location;
9475     }
9476
9477   offset = GEN_INT (location);
9478   emit_insn (gen (d->target, d->op0, d->op1, offset));
9479   return true;
9480 }
9481
9482 /* Recognize patterns for the REV insns.  */
9483
9484 static bool
9485 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9486 {
9487   unsigned int i, j, diff, nelt = d->nelt;
9488   rtx (*gen) (rtx, rtx);
9489
9490   if (!d->one_vector_p)
9491     return false;
9492
9493   diff = d->perm[0];
9494   switch (diff)
9495     {
9496     case 7:
9497       switch (d->vmode)
9498         {
9499         case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9500         case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
9501         default:
9502           return false;
9503         }
9504       break;
9505     case 3:
9506       switch (d->vmode)
9507         {
9508         case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9509         case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
9510         case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
9511         case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
9512         default:
9513           return false;
9514         }
9515       break;
9516     case 1:
9517       switch (d->vmode)
9518         {
9519         case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9520         case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
9521         case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
9522         case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
9523         case V4SImode: gen = gen_aarch64_rev64v4si;  break;
9524         case V2SImode: gen = gen_aarch64_rev64v2si;  break;
9525         case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
9526         case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
9527         default:
9528           return false;
9529         }
9530       break;
9531     default:
9532       return false;
9533     }
9534
9535   for (i = 0; i < nelt ; i += diff + 1)
9536     for (j = 0; j <= diff; j += 1)
9537       {
9538         /* This is guaranteed to be true as the value of diff
9539            is 7, 3, 1 and we should have enough elements in the
9540            queue to generate this.  Getting a vector mask with a
9541            value of diff other than these values implies that
9542            something is wrong by the time we get here.  */
9543         gcc_assert (i + j < nelt);
9544         if (d->perm[i + j] != i + diff - j)
9545           return false;
9546       }
9547
9548   /* Success! */
9549   if (d->testing_p)
9550     return true;
9551
9552   emit_insn (gen (d->target, d->op0));
9553   return true;
9554 }
9555
9556 static bool
9557 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9558 {
9559   rtx (*gen) (rtx, rtx, rtx);
9560   rtx out = d->target;
9561   rtx in0;
9562   machine_mode vmode = d->vmode;
9563   unsigned int i, elt, nelt = d->nelt;
9564   rtx lane;
9565
9566   elt = d->perm[0];
9567   for (i = 1; i < nelt; i++)
9568     {
9569       if (elt != d->perm[i])
9570         return false;
9571     }
9572
9573   /* The generic preparation in aarch64_expand_vec_perm_const_1
9574      swaps the operand order and the permute indices if it finds
9575      d->perm[0] to be in the second operand.  Thus, we can always
9576      use d->op0 and need not do any extra arithmetic to get the
9577      correct lane number.  */
9578   in0 = d->op0;
9579   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
9580
9581   switch (vmode)
9582     {
9583     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9584     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9585     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9586     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9587     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9588     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9589     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9590     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9591     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9592     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9593     default:
9594       return false;
9595     }
9596
9597   emit_insn (gen (out, in0, lane));
9598   return true;
9599 }
9600
9601 static bool
9602 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9603 {
9604   rtx rperm[MAX_VECT_LEN], sel;
9605   machine_mode vmode = d->vmode;
9606   unsigned int i, nelt = d->nelt;
9607
9608   if (d->testing_p)
9609     return true;
9610
9611   /* Generic code will try constant permutation twice.  Once with the
9612      original mode and again with the elements lowered to QImode.
9613      So wait and don't do the selector expansion ourselves.  */
9614   if (vmode != V8QImode && vmode != V16QImode)
9615     return false;
9616
9617   for (i = 0; i < nelt; ++i)
9618     {
9619       int nunits = GET_MODE_NUNITS (vmode);
9620
9621       /* If big-endian and two vectors we end up with a weird mixed-endian
9622          mode on NEON.  Reverse the index within each word but not the word
9623          itself.  */
9624       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9625                                            : d->perm[i]);
9626     }
9627   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9628   sel = force_reg (vmode, sel);
9629
9630   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9631   return true;
9632 }
9633
9634 static bool
9635 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9636 {
9637   /* The pattern matching functions above are written to look for a small
9638      number to begin the sequence (0, 1, N/2).  If we begin with an index
9639      from the second operand, we can swap the operands.  */
9640   if (d->perm[0] >= d->nelt)
9641     {
9642       unsigned i, nelt = d->nelt;
9643       rtx x;
9644
9645       gcc_assert (nelt == (nelt & -nelt));
9646       for (i = 0; i < nelt; ++i)
9647         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
9648
9649       x = d->op0;
9650       d->op0 = d->op1;
9651       d->op1 = x;
9652     }
9653
9654   if (TARGET_SIMD)
9655     {
9656       if (aarch64_evpc_rev (d))
9657         return true;
9658       else if (aarch64_evpc_ext (d))
9659         return true;
9660       else if (aarch64_evpc_dup (d))
9661         return true;
9662       else if (aarch64_evpc_zip (d))
9663         return true;
9664       else if (aarch64_evpc_uzp (d))
9665         return true;
9666       else if (aarch64_evpc_trn (d))
9667         return true;
9668       return aarch64_evpc_tbl (d);
9669     }
9670   return false;
9671 }
9672
9673 /* Expand a vec_perm_const pattern.  */
9674
9675 bool
9676 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9677 {
9678   struct expand_vec_perm_d d;
9679   int i, nelt, which;
9680
9681   d.target = target;
9682   d.op0 = op0;
9683   d.op1 = op1;
9684
9685   d.vmode = GET_MODE (target);
9686   gcc_assert (VECTOR_MODE_P (d.vmode));
9687   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9688   d.testing_p = false;
9689
9690   for (i = which = 0; i < nelt; ++i)
9691     {
9692       rtx e = XVECEXP (sel, 0, i);
9693       int ei = INTVAL (e) & (2 * nelt - 1);
9694       which |= (ei < nelt ? 1 : 2);
9695       d.perm[i] = ei;
9696     }
9697
9698   switch (which)
9699     {
9700     default:
9701       gcc_unreachable ();
9702
9703     case 3:
9704       d.one_vector_p = false;
9705       if (!rtx_equal_p (op0, op1))
9706         break;
9707
9708       /* The elements of PERM do not suggest that only the first operand
9709          is used, but both operands are identical.  Allow easier matching
9710          of the permutation by folding the permutation into the single
9711          input vector.  */
9712       /* Fall Through.  */
9713     case 2:
9714       for (i = 0; i < nelt; ++i)
9715         d.perm[i] &= nelt - 1;
9716       d.op0 = op1;
9717       d.one_vector_p = true;
9718       break;
9719
9720     case 1:
9721       d.op1 = op0;
9722       d.one_vector_p = true;
9723       break;
9724     }
9725
9726   return aarch64_expand_vec_perm_const_1 (&d);
9727 }
9728
9729 static bool
9730 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
9731                                      const unsigned char *sel)
9732 {
9733   struct expand_vec_perm_d d;
9734   unsigned int i, nelt, which;
9735   bool ret;
9736
9737   d.vmode = vmode;
9738   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9739   d.testing_p = true;
9740   memcpy (d.perm, sel, nelt);
9741
9742   /* Calculate whether all elements are in one vector.  */
9743   for (i = which = 0; i < nelt; ++i)
9744     {
9745       unsigned char e = d.perm[i];
9746       gcc_assert (e < 2 * nelt);
9747       which |= (e < nelt ? 1 : 2);
9748     }
9749
9750   /* If all elements are from the second vector, reindex as if from the
9751      first vector.  */
9752   if (which == 2)
9753     for (i = 0; i < nelt; ++i)
9754       d.perm[i] -= nelt;
9755
9756   /* Check whether the mask can be applied to a single vector.  */
9757   d.one_vector_p = (which != 3);
9758
9759   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9760   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9761   if (!d.one_vector_p)
9762     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9763
9764   start_sequence ();
9765   ret = aarch64_expand_vec_perm_const_1 (&d);
9766   end_sequence ();
9767
9768   return ret;
9769 }
9770
9771 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
9772 bool
9773 aarch64_cannot_change_mode_class (machine_mode from,
9774                                   machine_mode to,
9775                                   enum reg_class rclass)
9776 {
9777   /* Full-reg subregs are allowed on general regs or any class if they are
9778      the same size.  */
9779   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9780       || !reg_classes_intersect_p (FP_REGS, rclass))
9781     return false;
9782
9783   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
9784      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9785      2. Scalar to Scalar for integer modes or same size float modes.
9786      3. Vector to Vector modes.
9787      4. On little-endian only, Vector-Structure to Vector modes.  */
9788   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9789     {
9790       if (aarch64_vector_mode_supported_p (from)
9791           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9792         return false;
9793
9794       if (GET_MODE_NUNITS (from) == 1
9795           && GET_MODE_NUNITS (to) == 1
9796           && (GET_MODE_CLASS (from) == MODE_INT
9797               || from == to))
9798         return false;
9799
9800       if (aarch64_vector_mode_supported_p (from)
9801           && aarch64_vector_mode_supported_p (to))
9802         return false;
9803
9804       /* Within an vector structure straddling multiple vector registers
9805          we are in a mixed-endian representation.  As such, we can't
9806          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
9807          switch between vectors and vector structures cheaply.  */
9808       if (!BYTES_BIG_ENDIAN)
9809         if ((aarch64_vector_mode_supported_p (from)
9810               && aarch64_vect_struct_mode_p (to))
9811             || (aarch64_vector_mode_supported_p (to)
9812               && aarch64_vect_struct_mode_p (from)))
9813           return false;
9814     }
9815
9816   return true;
9817 }
9818
9819 /* Implement MODES_TIEABLE_P.  */
9820
9821 bool
9822 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9823 {
9824   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9825     return true;
9826
9827   /* We specifically want to allow elements of "structure" modes to
9828      be tieable to the structure.  This more general condition allows
9829      other rarer situations too.  */
9830   if (TARGET_SIMD
9831       && aarch64_vector_mode_p (mode1)
9832       && aarch64_vector_mode_p (mode2))
9833     return true;
9834
9835   return false;
9836 }
9837
9838 /* Return a new RTX holding the result of moving POINTER forward by
9839    AMOUNT bytes.  */
9840
9841 static rtx
9842 aarch64_move_pointer (rtx pointer, int amount)
9843 {
9844   rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9845
9846   return adjust_automodify_address (pointer, GET_MODE (pointer),
9847                                     next, amount);
9848 }
9849
9850 /* Return a new RTX holding the result of moving POINTER forward by the
9851    size of the mode it points to.  */
9852
9853 static rtx
9854 aarch64_progress_pointer (rtx pointer)
9855 {
9856   HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9857
9858   return aarch64_move_pointer (pointer, amount);
9859 }
9860
9861 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9862    MODE bytes.  */
9863
9864 static void
9865 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9866                                               machine_mode mode)
9867 {
9868   rtx reg = gen_reg_rtx (mode);
9869
9870   /* "Cast" the pointers to the correct mode.  */
9871   *src = adjust_address (*src, mode, 0);
9872   *dst = adjust_address (*dst, mode, 0);
9873   /* Emit the memcpy.  */
9874   emit_move_insn (reg, *src);
9875   emit_move_insn (*dst, reg);
9876   /* Move the pointers forward.  */
9877   *src = aarch64_progress_pointer (*src);
9878   *dst = aarch64_progress_pointer (*dst);
9879 }
9880
9881 /* Expand movmem, as if from a __builtin_memcpy.  Return true if
9882    we succeed, otherwise return false.  */
9883
9884 bool
9885 aarch64_expand_movmem (rtx *operands)
9886 {
9887   unsigned int n;
9888   rtx dst = operands[0];
9889   rtx src = operands[1];
9890   rtx base;
9891   bool speed_p = !optimize_function_for_size_p (cfun);
9892
9893   /* When optimizing for size, give a better estimate of the length of a
9894      memcpy call, but use the default otherwise.  */
9895   unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9896
9897   /* We can't do anything smart if the amount to copy is not constant.  */
9898   if (!CONST_INT_P (operands[2]))
9899     return false;
9900
9901   n = UINTVAL (operands[2]);
9902
9903   /* Try to keep the number of instructions low.  For cases below 16 bytes we
9904      need to make at most two moves.  For cases above 16 bytes it will be one
9905      move for each 16 byte chunk, then at most two additional moves.  */
9906   if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9907     return false;
9908
9909   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9910   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9911
9912   base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9913   src = adjust_automodify_address (src, VOIDmode, base, 0);
9914
9915   /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9916      1-byte chunk.  */
9917   if (n < 4)
9918     {
9919       if (n >= 2)
9920         {
9921           aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9922           n -= 2;
9923         }
9924
9925       if (n == 1)
9926         aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9927
9928       return true;
9929     }
9930
9931   /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
9932      4-byte chunk, partially overlapping with the previously copied chunk.  */
9933   if (n < 8)
9934     {
9935       aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9936       n -= 4;
9937       if (n > 0)
9938         {
9939           int move = n - 4;
9940
9941           src = aarch64_move_pointer (src, move);
9942           dst = aarch64_move_pointer (dst, move);
9943           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9944         }
9945       return true;
9946     }
9947
9948   /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
9949      them, then (if applicable) an 8-byte chunk.  */
9950   while (n >= 8)
9951     {
9952       if (n / 16)
9953         {
9954           aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9955           n -= 16;
9956         }
9957       else
9958         {
9959           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9960           n -= 8;
9961         }
9962     }
9963
9964   /* Finish the final bytes of the copy.  We can always do this in one
9965      instruction.  We either copy the exact amount we need, or partially
9966      overlap with the previous chunk we copied and copy 8-bytes.  */
9967   if (n == 0)
9968     return true;
9969   else if (n == 1)
9970     aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9971   else if (n == 2)
9972     aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9973   else if (n == 4)
9974     aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9975   else
9976     {
9977       if (n == 3)
9978         {
9979           src = aarch64_move_pointer (src, -1);
9980           dst = aarch64_move_pointer (dst, -1);
9981           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9982         }
9983       else
9984         {
9985           int move = n - 8;
9986
9987           src = aarch64_move_pointer (src, move);
9988           dst = aarch64_move_pointer (dst, move);
9989           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9990         }
9991     }
9992
9993   return true;
9994 }
9995
9996 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
9997
9998 static unsigned HOST_WIDE_INT
9999 aarch64_asan_shadow_offset (void)
10000 {
10001   return (HOST_WIDE_INT_1 << 36);
10002 }
10003
10004 static bool
10005 aarch64_use_by_pieces_infrastructure_p (unsigned int size,
10006                                         unsigned int align,
10007                                         enum by_pieces_operation op,
10008                                         bool speed_p)
10009 {
10010   /* STORE_BY_PIECES can be used when copying a constant string, but
10011      in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10012      For now we always fail this and let the move_by_pieces code copy
10013      the string from read-only memory.  */
10014   if (op == STORE_BY_PIECES)
10015     return false;
10016
10017   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10018 }
10019
10020 #undef TARGET_ADDRESS_COST
10021 #define TARGET_ADDRESS_COST aarch64_address_cost
10022
10023 /* This hook will determines whether unnamed bitfields affect the alignment
10024    of the containing structure.  The hook returns true if the structure
10025    should inherit the alignment requirements of an unnamed bitfield's
10026    type.  */
10027 #undef TARGET_ALIGN_ANON_BITFIELD
10028 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10029
10030 #undef TARGET_ASM_ALIGNED_DI_OP
10031 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10032
10033 #undef TARGET_ASM_ALIGNED_HI_OP
10034 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10035
10036 #undef TARGET_ASM_ALIGNED_SI_OP
10037 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10038
10039 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10040 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10041   hook_bool_const_tree_hwi_hwi_const_tree_true
10042
10043 #undef TARGET_ASM_FILE_START
10044 #define TARGET_ASM_FILE_START aarch64_start_file
10045
10046 #undef TARGET_ASM_OUTPUT_MI_THUNK
10047 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10048
10049 #undef TARGET_ASM_SELECT_RTX_SECTION
10050 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10051
10052 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10053 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10054
10055 #undef TARGET_BUILD_BUILTIN_VA_LIST
10056 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10057
10058 #undef TARGET_CALLEE_COPIES
10059 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10060
10061 #undef TARGET_CAN_ELIMINATE
10062 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10063
10064 #undef TARGET_CANNOT_FORCE_CONST_MEM
10065 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10066
10067 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10068 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10069
10070 /* Only the least significant bit is used for initialization guard
10071    variables.  */
10072 #undef TARGET_CXX_GUARD_MASK_BIT
10073 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
10074
10075 #undef TARGET_C_MODE_FOR_SUFFIX
10076 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
10077
10078 #ifdef TARGET_BIG_ENDIAN_DEFAULT
10079 #undef  TARGET_DEFAULT_TARGET_FLAGS
10080 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
10081 #endif
10082
10083 #undef TARGET_CLASS_MAX_NREGS
10084 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
10085
10086 #undef TARGET_BUILTIN_DECL
10087 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
10088
10089 #undef  TARGET_EXPAND_BUILTIN
10090 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
10091
10092 #undef TARGET_EXPAND_BUILTIN_VA_START
10093 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
10094
10095 #undef TARGET_FOLD_BUILTIN
10096 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
10097
10098 #undef TARGET_FUNCTION_ARG
10099 #define TARGET_FUNCTION_ARG aarch64_function_arg
10100
10101 #undef TARGET_FUNCTION_ARG_ADVANCE
10102 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
10103
10104 #undef TARGET_FUNCTION_ARG_BOUNDARY
10105 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
10106
10107 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10108 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
10109
10110 #undef TARGET_FUNCTION_VALUE
10111 #define TARGET_FUNCTION_VALUE aarch64_function_value
10112
10113 #undef TARGET_FUNCTION_VALUE_REGNO_P
10114 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10115
10116 #undef TARGET_FRAME_POINTER_REQUIRED
10117 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
10118
10119 #undef TARGET_GIMPLE_FOLD_BUILTIN
10120 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
10121
10122 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10123 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
10124
10125 #undef  TARGET_INIT_BUILTINS
10126 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
10127
10128 #undef TARGET_LEGITIMATE_ADDRESS_P
10129 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
10130
10131 #undef TARGET_LEGITIMATE_CONSTANT_P
10132 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
10133
10134 #undef TARGET_LIBGCC_CMP_RETURN_MODE
10135 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
10136
10137 #undef TARGET_LRA_P
10138 #define TARGET_LRA_P aarch64_lra_p
10139
10140 #undef TARGET_MANGLE_TYPE
10141 #define TARGET_MANGLE_TYPE aarch64_mangle_type
10142
10143 #undef TARGET_MEMORY_MOVE_COST
10144 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
10145
10146 #undef TARGET_MUST_PASS_IN_STACK
10147 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10148
10149 /* This target hook should return true if accesses to volatile bitfields
10150    should use the narrowest mode possible.  It should return false if these
10151    accesses should use the bitfield container type.  */
10152 #undef TARGET_NARROW_VOLATILE_BITFIELD
10153 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
10154
10155 #undef  TARGET_OPTION_OVERRIDE
10156 #define TARGET_OPTION_OVERRIDE aarch64_override_options
10157
10158 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10159 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10160   aarch64_override_options_after_change
10161
10162 #undef TARGET_PASS_BY_REFERENCE
10163 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
10164
10165 #undef TARGET_PREFERRED_RELOAD_CLASS
10166 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
10167
10168 #undef TARGET_SECONDARY_RELOAD
10169 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
10170
10171 #undef TARGET_SHIFT_TRUNCATION_MASK
10172 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
10173
10174 #undef TARGET_SETUP_INCOMING_VARARGS
10175 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
10176
10177 #undef TARGET_STRUCT_VALUE_RTX
10178 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
10179
10180 #undef TARGET_REGISTER_MOVE_COST
10181 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
10182
10183 #undef TARGET_RETURN_IN_MEMORY
10184 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
10185
10186 #undef TARGET_RETURN_IN_MSB
10187 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
10188
10189 #undef TARGET_RTX_COSTS
10190 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
10191
10192 #undef TARGET_SCHED_ISSUE_RATE
10193 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
10194
10195 #undef TARGET_TRAMPOLINE_INIT
10196 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
10197
10198 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10199 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
10200
10201 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10202 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
10203
10204 #undef TARGET_ARRAY_MODE_SUPPORTED_P
10205 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
10206
10207 #undef TARGET_VECTORIZE_ADD_STMT_COST
10208 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10209
10210 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10211 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10212   aarch64_builtin_vectorization_cost
10213
10214 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10215 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10216
10217 #undef TARGET_VECTORIZE_BUILTINS
10218 #define TARGET_VECTORIZE_BUILTINS
10219
10220 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10221 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10222   aarch64_builtin_vectorized_function
10223
10224 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10225 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10226   aarch64_autovectorize_vector_sizes
10227
10228 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10229 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10230   aarch64_atomic_assign_expand_fenv
10231
10232 /* Section anchor support.  */
10233
10234 #undef TARGET_MIN_ANCHOR_OFFSET
10235 #define TARGET_MIN_ANCHOR_OFFSET -256
10236
10237 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10238    byte offset; we can do much more for larger data types, but have no way
10239    to determine the size of the access.  We assume accesses are aligned.  */
10240 #undef TARGET_MAX_ANCHOR_OFFSET
10241 #define TARGET_MAX_ANCHOR_OFFSET 4095
10242
10243 #undef TARGET_VECTOR_ALIGNMENT
10244 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10245
10246 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10247 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10248   aarch64_simd_vector_alignment_reachable
10249
10250 /* vec_perm support.  */
10251
10252 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10253 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10254   aarch64_vectorize_vec_perm_const_ok
10255
10256
10257 #undef TARGET_FIXED_CONDITION_CODE_REGS
10258 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10259
10260 #undef TARGET_FLAGS_REGNUM
10261 #define TARGET_FLAGS_REGNUM CC_REGNUM
10262
10263 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10264 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10265
10266 #undef TARGET_ASAN_SHADOW_OFFSET
10267 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10268
10269 #undef TARGET_LEGITIMIZE_ADDRESS
10270 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
10271
10272 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
10273 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
10274   aarch64_use_by_pieces_infrastructure_p
10275
10276 struct gcc_target targetm = TARGET_INITIALIZER;
10277
10278 #include "gt-aarch64.h"