gcc-4_8-branch/gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48 #include "cfgloop.h"
  49 #include "tree-vectorizer.h"
  50
  51 /* Classifies an address.
  52
  53    ADDRESS_REG_IMM
  54        A simple base register plus immediate offset.
  55
  56    ADDRESS_REG_WB
  57        A base register indexed by immediate offset with writeback.
  58
  59    ADDRESS_REG_REG
  60        A base register indexed by (optionally scaled) register.
  61
  62    ADDRESS_REG_UXTW
  63        A base register indexed by (optionally scaled) zero-extended register.
  64
  65    ADDRESS_REG_SXTW
  66        A base register indexed by (optionally scaled) sign-extended register.
  67
  68    ADDRESS_LO_SUM
  69        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  70
  71    ADDRESS_SYMBOLIC:
  72        A constant symbolic address, in pc-relative literal pool.  */
  73
  74 enum aarch64_address_type {
  75   ADDRESS_REG_IMM,
  76   ADDRESS_REG_WB,
  77   ADDRESS_REG_REG,
  78   ADDRESS_REG_UXTW,
  79   ADDRESS_REG_SXTW,
  80   ADDRESS_LO_SUM,
  81   ADDRESS_SYMBOLIC
  82 };
  83
  84 struct aarch64_address_info {
  85   enum aarch64_address_type type;
  86   rtx base;
  87   rtx offset;
  88   int shift;
  89   enum aarch64_symbol_type symbol_type;
  90 };
  91
  92 struct simd_immediate_info
  93 {
  94   rtx value;
  95   int shift;
  96   int element_width;
  97   bool mvn;
  98   bool msl;
  99 };
 100
 101 /* The current code model.  */
 102 enum aarch64_code_model aarch64_cmodel;
 103
 104 #ifdef HAVE_AS_TLS
 105 #undef TARGET_HAVE_TLS
 106 #define TARGET_HAVE_TLS 1
 107 #endif
 108
 109 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 110 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 111                                                      const_tree,
 112                                                      enum machine_mode *, int *,
 113                                                      bool *);
 114 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 115 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 116 static void aarch64_override_options_after_change (void);
 117 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 118 static unsigned bit_count (unsigned HOST_WIDE_INT);
 119 static bool aarch64_const_vec_all_same_int_p (rtx,
 120                                               HOST_WIDE_INT, HOST_WIDE_INT);
 121
 122 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 123                                                  const unsigned char *sel);
 124
 125 /* The processor for which instructions should be scheduled.  */
 126 enum aarch64_processor aarch64_tune = generic;
 127
 128 /* The current tuning set.  */
 129 const struct tune_params *aarch64_tune_params;
 130
 131 /* Mask to specify which instructions we are allowed to generate.  */
 132 unsigned long aarch64_isa_flags = 0;
 133
 134 /* Mask to specify which instruction scheduling options should be used.  */
 135 unsigned long aarch64_tune_flags = 0;
 136
 137 /* Tuning parameters.  */
 138
 139 #if HAVE_DESIGNATED_INITIALIZERS
 140 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 141 #else
 142 #define NAMED_PARAM(NAME, VAL) (VAL)
 143 #endif
 144
 145 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 146 __extension__
 147 #endif
 148 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 149 {
 150   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 151   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 152   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 153   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 154   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 155   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 156   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 157   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 158   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 159   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 160   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 161   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 162 };
 163
 164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 165 __extension__
 166 #endif
 167 static const struct cpu_addrcost_table generic_addrcost_table =
 168 {
 169   NAMED_PARAM (pre_modify, 0),
 170   NAMED_PARAM (post_modify, 0),
 171   NAMED_PARAM (register_offset, 0),
 172   NAMED_PARAM (register_extend, 0),
 173   NAMED_PARAM (imm_offset, 0)
 174 };
 175
 176 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 177 __extension__
 178 #endif
 179 static const struct cpu_regmove_cost generic_regmove_cost =
 180 {
 181   NAMED_PARAM (GP2GP, 1),
 182   NAMED_PARAM (GP2FP, 2),
 183   NAMED_PARAM (FP2GP, 2),
 184   /* We currently do not provide direct support for TFmode Q->Q move.
 185      Therefore we need to raise the cost above 2 in order to have
 186      reload handle the situation.  */
 187   NAMED_PARAM (FP2FP, 4)
 188 };
 189
 190 /* Generic costs for vector insn classes.  */
 191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 192 __extension__
 193 #endif
 194 static const struct cpu_vector_cost generic_vector_cost =
 195 {
 196   NAMED_PARAM (scalar_stmt_cost, 1),
 197   NAMED_PARAM (scalar_load_cost, 1),
 198   NAMED_PARAM (scalar_store_cost, 1),
 199   NAMED_PARAM (vec_stmt_cost, 1),
 200   NAMED_PARAM (vec_to_scalar_cost, 1),
 201   NAMED_PARAM (scalar_to_vec_cost, 1),
 202   NAMED_PARAM (vec_align_load_cost, 1),
 203   NAMED_PARAM (vec_unalign_load_cost, 1),
 204   NAMED_PARAM (vec_unalign_store_cost, 1),
 205   NAMED_PARAM (vec_store_cost, 1),
 206   NAMED_PARAM (cond_taken_branch_cost, 3),
 207   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 208 };
 209
 210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 211 __extension__
 212 #endif
 213 static const struct tune_params generic_tunings =
 214 {
 215   &generic_rtx_cost_table,
 216   &generic_addrcost_table,
 217   &generic_regmove_cost,
 218   &generic_vector_cost,
 219   NAMED_PARAM (memmov_cost, 4)
 220 };
 221
 222 /* A processor implementing AArch64.  */
 223 struct processor
 224 {
 225   const char *const name;
 226   enum aarch64_processor core;
 227   const char *arch;
 228   const unsigned long flags;
 229   const struct tune_params *const tune;
 230 };
 231
 232 /* Processor cores implementing AArch64.  */
 233 static const struct processor all_cores[] =
 234 {
 235 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 236   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 237 #include "aarch64-cores.def"
 238 #undef AARCH64_CORE
 239   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 240   {NULL, aarch64_none, NULL, 0, NULL}
 241 };
 242
 243 /* Architectures implementing AArch64.  */
 244 static const struct processor all_architectures[] =
 245 {
 246 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 247   {NAME, CORE, #ARCH, FLAGS, NULL},
 248 #include "aarch64-arches.def"
 249 #undef AARCH64_ARCH
 250   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 251   {NULL, aarch64_none, NULL, 0, NULL}
 252 };
 253
 254 /* Target specification.  These are populated as commandline arguments
 255    are processed, or NULL if not specified.  */
 256 static const struct processor *selected_arch;
 257 static const struct processor *selected_cpu;
 258 static const struct processor *selected_tune;
 259
 260 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 261
 262 /* An ISA extension in the co-processor and main instruction set space.  */
 263 struct aarch64_option_extension
 264 {
 265   const char *const name;
 266   const unsigned long flags_on;
 267   const unsigned long flags_off;
 268 };
 269
 270 /* ISA extensions in AArch64.  */
 271 static const struct aarch64_option_extension all_extensions[] =
 272 {
 273 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 274   {NAME, FLAGS_ON, FLAGS_OFF},
 275 #include "aarch64-option-extensions.def"
 276 #undef AARCH64_OPT_EXTENSION
 277   {NULL, 0, 0}
 278 };
 279
 280 /* Used to track the size of an address when generating a pre/post
 281    increment address.  */
 282 static enum machine_mode aarch64_memory_reference_mode;
 283
 284 /* Used to force GTY into this file.  */
 285 static GTY(()) int gty_dummy;
 286
 287 /* A table of valid AArch64 "bitmask immediate" values for
 288    logical instructions.  */
 289
 290 #define AARCH64_NUM_BITMASKS  5334
 291 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 292
 293 /* Did we set flag_omit_frame_pointer just so
 294    aarch64_frame_pointer_required would be called? */
 295 static bool faked_omit_frame_pointer;
 296
 297 typedef enum aarch64_cond_code
 298 {
 299   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 300   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 301   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 302 }
 303 aarch64_cc;
 304
 305 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 306
 307 /* The condition codes of the processor, and the inverse function.  */
 308 static const char * const aarch64_condition_codes[] =
 309 {
 310   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 311   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 312 };
 313
 314 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 315 unsigned
 316 aarch64_dbx_register_number (unsigned regno)
 317 {
 318    if (GP_REGNUM_P (regno))
 319      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 320    else if (regno == SP_REGNUM)
 321      return AARCH64_DWARF_SP;
 322    else if (FP_REGNUM_P (regno))
 323      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 324
 325    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 326       equivalent DWARF register.  */
 327    return DWARF_FRAME_REGISTERS;
 328 }
 329
 330 /* Return TRUE if MODE is any of the large INT modes.  */
 331 static bool
 332 aarch64_vect_struct_mode_p (enum machine_mode mode)
 333 {
 334   return mode == OImode || mode == CImode || mode == XImode;
 335 }
 336
 337 /* Return TRUE if MODE is any of the vector modes.  */
 338 static bool
 339 aarch64_vector_mode_p (enum machine_mode mode)
 340 {
 341   return aarch64_vector_mode_supported_p (mode)
 342          || aarch64_vect_struct_mode_p (mode);
 343 }
 344
 345 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 346 static bool
 347 aarch64_array_mode_supported_p (enum machine_mode mode,
 348                                 unsigned HOST_WIDE_INT nelems)
 349 {
 350   if (TARGET_SIMD
 351       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 352       && (nelems >= 2 && nelems <= 4))
 353     return true;
 354
 355   return false;
 356 }
 357
 358 /* Implement HARD_REGNO_NREGS.  */
 359
 360 int
 361 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 362 {
 363   switch (aarch64_regno_regclass (regno))
 364     {
 365     case FP_REGS:
 366     case FP_LO_REGS:
 367       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 368     default:
 369       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 370     }
 371   gcc_unreachable ();
 372 }
 373
 374 /* Implement HARD_REGNO_MODE_OK.  */
 375
 376 int
 377 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 378 {
 379   if (GET_MODE_CLASS (mode) == MODE_CC)
 380     return regno == CC_REGNUM;
 381
 382   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 383       || regno == ARG_POINTER_REGNUM)
 384     return mode == Pmode;
 385
 386   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 387     return 1;
 388
 389   if (FP_REGNUM_P (regno))
 390     {
 391       if (aarch64_vect_struct_mode_p (mode))
 392         return
 393           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 394       else
 395         return 1;
 396     }
 397
 398   return 0;
 399 }
 400
 401 /* Return true if calls to DECL should be treated as
 402    long-calls (ie called via a register).  */
 403 static bool
 404 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 405 {
 406   return false;
 407 }
 408
 409 /* Return true if calls to symbol-ref SYM should be treated as
 410    long-calls (ie called via a register).  */
 411 bool
 412 aarch64_is_long_call_p (rtx sym)
 413 {
 414   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 415 }
 416
 417 /* Return true if the offsets to a zero/sign-extract operation
 418    represent an expression that matches an extend operation.  The
 419    operands represent the paramters from
 420
 421    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 422 bool
 423 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 424                                 rtx extract_imm)
 425 {
 426   HOST_WIDE_INT mult_val, extract_val;
 427
 428   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 429     return false;
 430
 431   mult_val = INTVAL (mult_imm);
 432   extract_val = INTVAL (extract_imm);
 433
 434   if (extract_val > 8
 435       && extract_val < GET_MODE_BITSIZE (mode)
 436       && exact_log2 (extract_val & ~7) > 0
 437       && (extract_val & 7) <= 4
 438       && mult_val == (1 << (extract_val & 7)))
 439     return true;
 440
 441   return false;
 442 }
 443
 444 /* Emit an insn that's a simple single-set.  Both the operands must be
 445    known to be valid.  */
 446 inline static rtx
 447 emit_set_insn (rtx x, rtx y)
 448 {
 449   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 450 }
 451
 452 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 453    return the rtx for register 0 in the proper mode.  */
 454 rtx
 455 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 456 {
 457   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 458   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 459
 460   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 461   return cc_reg;
 462 }
 463
 464 /* Build the SYMBOL_REF for __tls_get_addr.  */
 465
 466 static GTY(()) rtx tls_get_addr_libfunc;
 467
 468 rtx
 469 aarch64_tls_get_addr (void)
 470 {
 471   if (!tls_get_addr_libfunc)
 472     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 473   return tls_get_addr_libfunc;
 474 }
 475
 476 /* Return the TLS model to use for ADDR.  */
 477
 478 static enum tls_model
 479 tls_symbolic_operand_type (rtx addr)
 480 {
 481   enum tls_model tls_kind = TLS_MODEL_NONE;
 482   rtx sym, addend;
 483
 484   if (GET_CODE (addr) == CONST)
 485     {
 486       split_const (addr, &sym, &addend);
 487       if (GET_CODE (sym) == SYMBOL_REF)
 488         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 489     }
 490   else if (GET_CODE (addr) == SYMBOL_REF)
 491     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 492
 493   return tls_kind;
 494 }
 495
 496 /* We'll allow lo_sum's in addresses in our legitimate addresses
 497    so that combine would take care of combining addresses where
 498    necessary, but for generation purposes, we'll generate the address
 499    as :
 500    RTL                               Absolute
 501    tmp = hi (symbol_ref);            adrp  x1, foo
 502    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 503                                      nop
 504
 505    PIC                               TLS
 506    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 507    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 508                                      bl   __tls_get_addr
 509                                      nop
 510
 511    Load TLS symbol, depending on TLS mechanism and TLS access model.
 512
 513    Global Dynamic - Traditional TLS:
 514    adrp tmp, :tlsgd:imm
 515    add  dest, tmp, #:tlsgd_lo12:imm
 516    bl   __tls_get_addr
 517
 518    Global Dynamic - TLS Descriptors:
 519    adrp dest, :tlsdesc:imm
 520    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 521    add  dest, dest, #:tlsdesc_lo12:imm
 522    blr  tmp
 523    mrs  tp, tpidr_el0
 524    add  dest, dest, tp
 525
 526    Initial Exec:
 527    mrs  tp, tpidr_el0
 528    adrp tmp, :gottprel:imm
 529    ldr  dest, [tmp, #:gottprel_lo12:imm]
 530    add  dest, dest, tp
 531
 532    Local Exec:
 533    mrs  tp, tpidr_el0
 534    add  t0, tp, #:tprel_hi12:imm
 535    add  t0, #:tprel_lo12_nc:imm
 536 */
 537
 538 static void
 539 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 540                                    enum aarch64_symbol_type type)
 541 {
 542   switch (type)
 543     {
 544     case SYMBOL_SMALL_ABSOLUTE:
 545       {
 546         rtx tmp_reg = dest;
 547         if (can_create_pseudo_p ())
 548           {
 549             tmp_reg =  gen_reg_rtx (Pmode);
 550           }
 551
 552         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 553         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 554         return;
 555       }
 556
 557     case SYMBOL_TINY_ABSOLUTE:
 558       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 559       return;
 560
 561     case SYMBOL_SMALL_GOT:
 562       {
 563         rtx tmp_reg = dest;
 564         if (can_create_pseudo_p ())
 565           tmp_reg =  gen_reg_rtx (Pmode);
 566         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 567         emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 568         return;
 569       }
 570
 571     case SYMBOL_SMALL_TLSGD:
 572       {
 573         rtx insns;
 574         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 575
 576         start_sequence ();
 577         emit_call_insn (gen_tlsgd_small (result, imm));
 578         insns = get_insns ();
 579         end_sequence ();
 580
 581         RTL_CONST_CALL_P (insns) = 1;
 582         emit_libcall_block (insns, dest, result, imm);
 583         return;
 584       }
 585
 586     case SYMBOL_SMALL_TLSDESC:
 587       {
 588         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 589         rtx tp;
 590
 591         emit_insn (gen_tlsdesc_small (imm));
 592         tp = aarch64_load_tp (NULL);
 593         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 594         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 595         return;
 596       }
 597
 598     case SYMBOL_SMALL_GOTTPREL:
 599       {
 600         rtx tmp_reg = gen_reg_rtx (Pmode);
 601         rtx tp = aarch64_load_tp (NULL);
 602         emit_insn (gen_tlsie_small (tmp_reg, imm));
 603         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 604         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 605         return;
 606       }
 607
 608     case SYMBOL_SMALL_TPREL:
 609       {
 610         rtx tp = aarch64_load_tp (NULL);
 611         emit_insn (gen_tlsle_small (dest, tp, imm));
 612         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 613         return;
 614       }
 615
 616     case SYMBOL_TINY_GOT:
 617       emit_insn (gen_ldr_got_tiny (dest, imm));
 618       return;
 619
 620     default:
 621       gcc_unreachable ();
 622     }
 623 }
 624
 625 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 626    handle all moves if !can_create_pseudo_p ().  The distinction is
 627    important because, unlike emit_move_insn, the move expanders know
 628    how to force Pmode objects into the constant pool even when the
 629    constant pool address is not itself legitimate.  */
 630 static rtx
 631 aarch64_emit_move (rtx dest, rtx src)
 632 {
 633   return (can_create_pseudo_p ()
 634           ? emit_move_insn (dest, src)
 635           : emit_move_insn_1 (dest, src));
 636 }
 637
 638 void
 639 aarch64_split_128bit_move (rtx dst, rtx src)
 640 {
 641   rtx low_dst;
 642
 643   enum machine_mode src_mode = GET_MODE (src);
 644   enum machine_mode dst_mode = GET_MODE (dst);
 645   int src_regno = REGNO (src);
 646   int dst_regno = REGNO (dst);
 647
 648   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 649
 650   if (REG_P (dst) && REG_P (src))
 651     {
 652       gcc_assert (src_mode == TImode || src_mode == TFmode);
 653
 654       /* Handle r -> w, w -> r.  */
 655       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 656         {
 657           switch (src_mode) {
 658           case TImode:
 659             emit_insn
 660               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 661             emit_insn
 662               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 663             return;
 664           case TFmode:
 665             emit_insn
 666               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 667             emit_insn
 668               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 669             return;
 670           default:
 671             gcc_unreachable ();
 672           }
 673         }
 674       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 675         {
 676           switch (src_mode) {
 677           case TImode:
 678             emit_insn
 679               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 680             emit_insn
 681               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 682             return;
 683           case TFmode:
 684             emit_insn
 685               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 686             emit_insn
 687               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 688             return;
 689           default:
 690             gcc_unreachable ();
 691           }
 692         }
 693       /* Fall through to r -> r cases.  */
 694     }
 695
 696   switch (dst_mode) {
 697   case TImode:
 698     low_dst = gen_lowpart (word_mode, dst);
 699     if (REG_P (low_dst)
 700         && reg_overlap_mentioned_p (low_dst, src))
 701       {
 702         aarch64_emit_move (gen_highpart (word_mode, dst),
 703                            gen_highpart_mode (word_mode, TImode, src));
 704         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 705       }
 706     else
 707       {
 708         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 709         aarch64_emit_move (gen_highpart (word_mode, dst),
 710                            gen_highpart_mode (word_mode, TImode, src));
 711       }
 712     return;
 713   case TFmode:
 714     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 715                     gen_rtx_REG (DFmode, src_regno));
 716     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 717                     gen_rtx_REG (DFmode, src_regno + 1));
 718     return;
 719   default:
 720     gcc_unreachable ();
 721   }
 722 }
 723
 724 bool
 725 aarch64_split_128bit_move_p (rtx dst, rtx src)
 726 {
 727   return (! REG_P (src)
 728           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 729 }
 730
 731 /* Split a complex SIMD combine.  */
 732
 733 void
 734 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 735 {
 736   enum machine_mode src_mode = GET_MODE (src1);
 737   enum machine_mode dst_mode = GET_MODE (dst);
 738
 739   gcc_assert (VECTOR_MODE_P (dst_mode));
 740
 741   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 742     {
 743       rtx (*gen) (rtx, rtx, rtx);
 744
 745       switch (src_mode)
 746         {
 747         case V8QImode:
 748           gen = gen_aarch64_simd_combinev8qi;
 749           break;
 750         case V4HImode:
 751           gen = gen_aarch64_simd_combinev4hi;
 752           break;
 753         case V2SImode:
 754           gen = gen_aarch64_simd_combinev2si;
 755           break;
 756         case V2SFmode:
 757           gen = gen_aarch64_simd_combinev2sf;
 758           break;
 759         case DImode:
 760           gen = gen_aarch64_simd_combinedi;
 761           break;
 762         case DFmode:
 763           gen = gen_aarch64_simd_combinedf;
 764           break;
 765         default:
 766           gcc_unreachable ();
 767         }
 768
 769       emit_insn (gen (dst, src1, src2));
 770       return;
 771     }
 772 }
 773
 774 /* Split a complex SIMD move.  */
 775
 776 void
 777 aarch64_split_simd_move (rtx dst, rtx src)
 778 {
 779   enum machine_mode src_mode = GET_MODE (src);
 780   enum machine_mode dst_mode = GET_MODE (dst);
 781
 782   gcc_assert (VECTOR_MODE_P (dst_mode));
 783
 784   if (REG_P (dst) && REG_P (src))
 785     {
 786       rtx (*gen) (rtx, rtx);
 787
 788       gcc_assert (VECTOR_MODE_P (src_mode));
 789
 790       switch (src_mode)
 791         {
 792         case V16QImode:
 793           gen = gen_aarch64_split_simd_movv16qi;
 794           break;
 795         case V8HImode:
 796           gen = gen_aarch64_split_simd_movv8hi;
 797           break;
 798         case V4SImode:
 799           gen = gen_aarch64_split_simd_movv4si;
 800           break;
 801         case V2DImode:
 802           gen = gen_aarch64_split_simd_movv2di;
 803           break;
 804         case V4SFmode:
 805           gen = gen_aarch64_split_simd_movv4sf;
 806           break;
 807         case V2DFmode:
 808           gen = gen_aarch64_split_simd_movv2df;
 809           break;
 810         default:
 811           gcc_unreachable ();
 812         }
 813
 814       emit_insn (gen (dst, src));
 815       return;
 816     }
 817 }
 818
 819 static rtx
 820 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 821 {
 822   if (can_create_pseudo_p ())
 823     return force_reg (mode, value);
 824   else
 825     {
 826       x = aarch64_emit_move (x, value);
 827       return x;
 828     }
 829 }
 830
 831
 832 static rtx
 833 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 834 {
 835   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 836     {
 837       rtx high;
 838       /* Load the full offset into a register.  This
 839          might be improvable in the future.  */
 840       high = GEN_INT (offset);
 841       offset = 0;
 842       high = aarch64_force_temporary (mode, temp, high);
 843       reg = aarch64_force_temporary (mode, temp,
 844                                      gen_rtx_PLUS (mode, high, reg));
 845     }
 846   return plus_constant (mode, reg, offset);
 847 }
 848
 849 void
 850 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 851 {
 852   enum machine_mode mode = GET_MODE (dest);
 853   unsigned HOST_WIDE_INT mask;
 854   int i;
 855   bool first;
 856   unsigned HOST_WIDE_INT val;
 857   bool subtargets;
 858   rtx subtarget;
 859   int one_match, zero_match;
 860
 861   gcc_assert (mode == SImode || mode == DImode);
 862
 863   /* Check on what type of symbol it is.  */
 864   if (GET_CODE (imm) == SYMBOL_REF
 865       || GET_CODE (imm) == LABEL_REF
 866       || GET_CODE (imm) == CONST)
 867     {
 868       rtx mem, base, offset;
 869       enum aarch64_symbol_type sty;
 870
 871       /* If we have (const (plus symbol offset)), separate out the offset
 872          before we start classifying the symbol.  */
 873       split_const (imm, &base, &offset);
 874
 875       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 876       switch (sty)
 877         {
 878         case SYMBOL_FORCE_TO_MEM:
 879           if (offset != const0_rtx
 880               && targetm.cannot_force_const_mem (mode, imm))
 881             {
 882               gcc_assert(can_create_pseudo_p ());
 883               base = aarch64_force_temporary (mode, dest, base);
 884               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 885               aarch64_emit_move (dest, base);
 886               return;
 887             }
 888           mem = force_const_mem (mode, imm);
 889           gcc_assert (mem);
 890           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 891           return;
 892
 893         case SYMBOL_SMALL_TLSGD:
 894         case SYMBOL_SMALL_TLSDESC:
 895         case SYMBOL_SMALL_GOTTPREL:
 896         case SYMBOL_SMALL_GOT:
 897         case SYMBOL_TINY_GOT:
 898           if (offset != const0_rtx)
 899             {
 900               gcc_assert(can_create_pseudo_p ());
 901               base = aarch64_force_temporary (mode, dest, base);
 902               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 903               aarch64_emit_move (dest, base);
 904               return;
 905             }
 906           /* FALLTHRU */
 907
 908         case SYMBOL_SMALL_TPREL:
 909         case SYMBOL_SMALL_ABSOLUTE:
 910         case SYMBOL_TINY_ABSOLUTE:
 911           aarch64_load_symref_appropriately (dest, imm, sty);
 912           return;
 913
 914         default:
 915           gcc_unreachable ();
 916         }
 917     }
 918
 919   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 920     {
 921       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 922       return;
 923     }
 924
 925   if (!CONST_INT_P (imm))
 926     {
 927       if (GET_CODE (imm) == HIGH)
 928         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 929       else
 930         {
 931           rtx mem = force_const_mem (mode, imm);
 932           gcc_assert (mem);
 933           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 934         }
 935
 936       return;
 937     }
 938
 939   if (mode == SImode)
 940     {
 941       /* We know we can't do this in 1 insn, and we must be able to do it
 942          in two; so don't mess around looking for sequences that don't buy
 943          us anything.  */
 944       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 945       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 946                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 947       return;
 948     }
 949
 950   /* Remaining cases are all for DImode.  */
 951
 952   val = INTVAL (imm);
 953   subtargets = optimize && can_create_pseudo_p ();
 954
 955   one_match = 0;
 956   zero_match = 0;
 957   mask = 0xffff;
 958
 959   for (i = 0; i < 64; i += 16, mask <<= 16)
 960     {
 961       if ((val & mask) == 0)
 962         zero_match++;
 963       else if ((val & mask) == mask)
 964         one_match++;
 965     }
 966
 967   if (one_match == 2)
 968     {
 969       mask = 0xffff;
 970       for (i = 0; i < 64; i += 16, mask <<= 16)
 971         {
 972           if ((val & mask) != mask)
 973             {
 974               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
 975               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 976                                          GEN_INT ((val >> i) & 0xffff)));
 977               return;
 978             }
 979         }
 980       gcc_unreachable ();
 981     }
 982
 983   if (zero_match == 2)
 984     goto simple_sequence;
 985
 986   mask = 0x0ffff0000UL;
 987   for (i = 16; i < 64; i += 16, mask <<= 16)
 988     {
 989       HOST_WIDE_INT comp = mask & ~(mask - 1);
 990
 991       if (aarch64_uimm12_shift (val - (val & mask)))
 992         {
 993           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 994
 995           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
 996           emit_insn (gen_adddi3 (dest, subtarget,
 997                                  GEN_INT (val - (val & mask))));
 998           return;
 999         }
1000       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1001         {
1002           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1003
1004           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1005                                   GEN_INT ((val + comp) & mask)));
1006           emit_insn (gen_adddi3 (dest, subtarget,
1007                                  GEN_INT (val - ((val + comp) & mask))));
1008           return;
1009         }
1010       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1011         {
1012           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1013
1014           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1015                                   GEN_INT ((val - comp) | ~mask)));
1016           emit_insn (gen_adddi3 (dest, subtarget,
1017                                  GEN_INT (val - ((val - comp) | ~mask))));
1018           return;
1019         }
1020       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1021         {
1022           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1023
1024           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1025                                   GEN_INT (val | ~mask)));
1026           emit_insn (gen_adddi3 (dest, subtarget,
1027                                  GEN_INT (val - (val | ~mask))));
1028           return;
1029         }
1030     }
1031
1032   /* See if we can do it by arithmetically combining two
1033      immediates.  */
1034   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1035     {
1036       int j;
1037       mask = 0xffff;
1038
1039       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1040           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1041         {
1042           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1043           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1044                                   GEN_INT (aarch64_bitmasks[i])));
1045           emit_insn (gen_adddi3 (dest, subtarget,
1046                                  GEN_INT (val - aarch64_bitmasks[i])));
1047           return;
1048         }
1049
1050       for (j = 0; j < 64; j += 16, mask <<= 16)
1051         {
1052           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1053             {
1054               emit_insn (gen_rtx_SET (VOIDmode, dest,
1055                                       GEN_INT (aarch64_bitmasks[i])));
1056               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1057                                          GEN_INT ((val >> j) & 0xffff)));
1058               return;
1059             }
1060         }
1061     }
1062
1063   /* See if we can do it by logically combining two immediates.  */
1064   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1065     {
1066       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1067         {
1068           int j;
1069
1070           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1071             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1072               {
1073                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1074                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1075                                         GEN_INT (aarch64_bitmasks[i])));
1076                 emit_insn (gen_iordi3 (dest, subtarget,
1077                                        GEN_INT (aarch64_bitmasks[j])));
1078                 return;
1079               }
1080         }
1081       else if ((val & aarch64_bitmasks[i]) == val)
1082         {
1083           int j;
1084
1085           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1086             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1087               {
1088
1089                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1090                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1091                                         GEN_INT (aarch64_bitmasks[j])));
1092                 emit_insn (gen_anddi3 (dest, subtarget,
1093                                        GEN_INT (aarch64_bitmasks[i])));
1094                 return;
1095               }
1096         }
1097     }
1098
1099  simple_sequence:
1100   first = true;
1101   mask = 0xffff;
1102   for (i = 0; i < 64; i += 16, mask <<= 16)
1103     {
1104       if ((val & mask) != 0)
1105         {
1106           if (first)
1107             {
1108               emit_insn (gen_rtx_SET (VOIDmode, dest,
1109                                       GEN_INT (val & mask)));
1110               first = false;
1111             }
1112           else
1113             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1114                                        GEN_INT ((val >> i) & 0xffff)));
1115         }
1116     }
1117 }
1118
1119 static bool
1120 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1121 {
1122   /* Indirect calls are not currently supported.  */
1123   if (decl == NULL)
1124     return false;
1125
1126   /* Cannot tail-call to long-calls, since these are outside of the
1127      range of a branch instruction (we could handle this if we added
1128      support for indirect tail-calls.  */
1129   if (aarch64_decl_is_long_call_p (decl))
1130     return false;
1131
1132   return true;
1133 }
1134
1135 /* Implement TARGET_PASS_BY_REFERENCE.  */
1136
1137 static bool
1138 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1139                            enum machine_mode mode,
1140                            const_tree type,
1141                            bool named ATTRIBUTE_UNUSED)
1142 {
1143   HOST_WIDE_INT size;
1144   enum machine_mode dummymode;
1145   int nregs;
1146
1147   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1148   size = (mode == BLKmode && type)
1149     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1150
1151   if (type)
1152     {
1153       /* Arrays always passed by reference.  */
1154       if (TREE_CODE (type) == ARRAY_TYPE)
1155         return true;
1156       /* Other aggregates based on their size.  */
1157       if (AGGREGATE_TYPE_P (type))
1158         size = int_size_in_bytes (type);
1159     }
1160
1161   /* Variable sized arguments are always returned by reference.  */
1162   if (size < 0)
1163     return true;
1164
1165   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1166   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1167                                                &dummymode, &nregs,
1168                                                NULL))
1169     return false;
1170
1171   /* Arguments which are variable sized or larger than 2 registers are
1172      passed by reference unless they are a homogenous floating point
1173      aggregate.  */
1174   return size > 2 * UNITS_PER_WORD;
1175 }
1176
1177 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1178 static bool
1179 aarch64_return_in_msb (const_tree valtype)
1180 {
1181   enum machine_mode dummy_mode;
1182   int dummy_int;
1183
1184   /* Never happens in little-endian mode.  */
1185   if (!BYTES_BIG_ENDIAN)
1186     return false;
1187
1188   /* Only composite types smaller than or equal to 16 bytes can
1189      be potentially returned in registers.  */
1190   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1191       || int_size_in_bytes (valtype) <= 0
1192       || int_size_in_bytes (valtype) > 16)
1193     return false;
1194
1195   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1196      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1197      is always passed/returned in the least significant bits of fp/simd
1198      register(s).  */
1199   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1200                                                &dummy_mode, &dummy_int, NULL))
1201     return false;
1202
1203   return true;
1204 }
1205
1206 /* Implement TARGET_FUNCTION_VALUE.
1207    Define how to find the value returned by a function.  */
1208
1209 static rtx
1210 aarch64_function_value (const_tree type, const_tree func,
1211                         bool outgoing ATTRIBUTE_UNUSED)
1212 {
1213   enum machine_mode mode;
1214   int unsignedp;
1215   int count;
1216   enum machine_mode ag_mode;
1217
1218   mode = TYPE_MODE (type);
1219   if (INTEGRAL_TYPE_P (type))
1220     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1221
1222   if (aarch64_return_in_msb (type))
1223     {
1224       HOST_WIDE_INT size = int_size_in_bytes (type);
1225
1226       if (size % UNITS_PER_WORD != 0)
1227         {
1228           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1229           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1230         }
1231     }
1232
1233   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1234                                                &ag_mode, &count, NULL))
1235     {
1236       if (!aarch64_composite_type_p (type, mode))
1237         {
1238           gcc_assert (count == 1 && mode == ag_mode);
1239           return gen_rtx_REG (mode, V0_REGNUM);
1240         }
1241       else
1242         {
1243           int i;
1244           rtx par;
1245
1246           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1247           for (i = 0; i < count; i++)
1248             {
1249               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1250               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1251                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1252               XVECEXP (par, 0, i) = tmp;
1253             }
1254           return par;
1255         }
1256     }
1257   else
1258     return gen_rtx_REG (mode, R0_REGNUM);
1259 }
1260
1261 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1262    Return true if REGNO is the number of a hard register in which the values
1263    of called function may come back.  */
1264
1265 static bool
1266 aarch64_function_value_regno_p (const unsigned int regno)
1267 {
1268   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1269      of 16-byte return values are: 128-bit integers and 16-byte small
1270      structures (excluding homogeneous floating-point aggregates).  */
1271   if (regno == R0_REGNUM || regno == R1_REGNUM)
1272     return true;
1273
1274   /* Up to four fp/simd registers can return a function value, e.g. a
1275      homogeneous floating-point aggregate having four members.  */
1276   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1277     return !TARGET_GENERAL_REGS_ONLY;
1278
1279   return false;
1280 }
1281
1282 /* Implement TARGET_RETURN_IN_MEMORY.
1283
1284    If the type T of the result of a function is such that
1285      void func (T arg)
1286    would require that arg be passed as a value in a register (or set of
1287    registers) according to the parameter passing rules, then the result
1288    is returned in the same registers as would be used for such an
1289    argument.  */
1290
1291 static bool
1292 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1293 {
1294   HOST_WIDE_INT size;
1295   enum machine_mode ag_mode;
1296   int count;
1297
1298   if (!AGGREGATE_TYPE_P (type)
1299       && TREE_CODE (type) != COMPLEX_TYPE
1300       && TREE_CODE (type) != VECTOR_TYPE)
1301     /* Simple scalar types always returned in registers.  */
1302     return false;
1303
1304   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1305                                                type,
1306                                                &ag_mode,
1307                                                &count,
1308                                                NULL))
1309     return false;
1310
1311   /* Types larger than 2 registers returned in memory.  */
1312   size = int_size_in_bytes (type);
1313   return (size < 0 || size > 2 * UNITS_PER_WORD);
1314 }
1315
1316 static bool
1317 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1318                                const_tree type, int *nregs)
1319 {
1320   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1321   return aarch64_vfp_is_call_or_return_candidate (mode,
1322                                                   type,
1323                                                   &pcum->aapcs_vfp_rmode,
1324                                                   nregs,
1325                                                   NULL);
1326 }
1327
1328 /* Given MODE and TYPE of a function argument, return the alignment in
1329    bits.  The idea is to suppress any stronger alignment requested by
1330    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1331    This is a helper function for local use only.  */
1332
1333 static unsigned int
1334 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1335 {
1336   unsigned int alignment;
1337
1338   if (type)
1339     {
1340       if (!integer_zerop (TYPE_SIZE (type)))
1341         {
1342           if (TYPE_MODE (type) == mode)
1343             alignment = TYPE_ALIGN (type);
1344           else
1345             alignment = GET_MODE_ALIGNMENT (mode);
1346         }
1347       else
1348         alignment = 0;
1349     }
1350   else
1351     alignment = GET_MODE_ALIGNMENT (mode);
1352
1353   return alignment;
1354 }
1355
1356 /* Layout a function argument according to the AAPCS64 rules.  The rule
1357    numbers refer to the rule numbers in the AAPCS64.  */
1358
1359 static void
1360 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1361                     const_tree type,
1362                     bool named ATTRIBUTE_UNUSED)
1363 {
1364   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365   int ncrn, nvrn, nregs;
1366   bool allocate_ncrn, allocate_nvrn;
1367
1368   /* We need to do this once per argument.  */
1369   if (pcum->aapcs_arg_processed)
1370     return;
1371
1372   pcum->aapcs_arg_processed = true;
1373
1374   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1375   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1376                                                  mode,
1377                                                  type,
1378                                                  &nregs);
1379
1380   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1381      The following code thus handles passing by SIMD/FP registers first.  */
1382
1383   nvrn = pcum->aapcs_nvrn;
1384
1385   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1386      and homogenous short-vector aggregates (HVA).  */
1387   if (allocate_nvrn)
1388     {
1389       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1390         {
1391           pcum->aapcs_nextnvrn = nvrn + nregs;
1392           if (!aarch64_composite_type_p (type, mode))
1393             {
1394               gcc_assert (nregs == 1);
1395               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1396             }
1397           else
1398             {
1399               rtx par;
1400               int i;
1401               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1402               for (i = 0; i < nregs; i++)
1403                 {
1404                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1405                                          V0_REGNUM + nvrn + i);
1406                   tmp = gen_rtx_EXPR_LIST
1407                     (VOIDmode, tmp,
1408                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1409                   XVECEXP (par, 0, i) = tmp;
1410                 }
1411               pcum->aapcs_reg = par;
1412             }
1413           return;
1414         }
1415       else
1416         {
1417           /* C.3 NSRN is set to 8.  */
1418           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1419           goto on_stack;
1420         }
1421     }
1422
1423   ncrn = pcum->aapcs_ncrn;
1424   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1425            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1426
1427
1428   /* C6 - C9.  though the sign and zero extension semantics are
1429      handled elsewhere.  This is the case where the argument fits
1430      entirely general registers.  */
1431   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1432     {
1433       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1434
1435       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1436
1437       /* C.8 if the argument has an alignment of 16 then the NGRN is
1438          rounded up to the next even number.  */
1439       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1440         {
1441           ++ncrn;
1442           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1443         }
1444       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1445          A reg is still generated for it, but the caller should be smart
1446          enough not to use it.  */
1447       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1448         {
1449           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1450         }
1451       else
1452         {
1453           rtx par;
1454           int i;
1455
1456           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1457           for (i = 0; i < nregs; i++)
1458             {
1459               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1460               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1461                                        GEN_INT (i * UNITS_PER_WORD));
1462               XVECEXP (par, 0, i) = tmp;
1463             }
1464           pcum->aapcs_reg = par;
1465         }
1466
1467       pcum->aapcs_nextncrn = ncrn + nregs;
1468       return;
1469     }
1470
1471   /* C.11  */
1472   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1473
1474   /* The argument is passed on stack; record the needed number of words for
1475      this argument (we can re-use NREGS) and align the total size if
1476      necessary.  */
1477 on_stack:
1478   pcum->aapcs_stack_words = nregs;
1479   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1480     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1481                                                16 / UNITS_PER_WORD) + 1;
1482   return;
1483 }
1484
1485 /* Implement TARGET_FUNCTION_ARG.  */
1486
1487 static rtx
1488 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1489                       const_tree type, bool named)
1490 {
1491   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1492   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1493
1494   if (mode == VOIDmode)
1495     return NULL_RTX;
1496
1497   aarch64_layout_arg (pcum_v, mode, type, named);
1498   return pcum->aapcs_reg;
1499 }
1500
1501 void
1502 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1503                            const_tree fntype ATTRIBUTE_UNUSED,
1504                            rtx libname ATTRIBUTE_UNUSED,
1505                            const_tree fndecl ATTRIBUTE_UNUSED,
1506                            unsigned n_named ATTRIBUTE_UNUSED)
1507 {
1508   pcum->aapcs_ncrn = 0;
1509   pcum->aapcs_nvrn = 0;
1510   pcum->aapcs_nextncrn = 0;
1511   pcum->aapcs_nextnvrn = 0;
1512   pcum->pcs_variant = ARM_PCS_AAPCS64;
1513   pcum->aapcs_reg = NULL_RTX;
1514   pcum->aapcs_arg_processed = false;
1515   pcum->aapcs_stack_words = 0;
1516   pcum->aapcs_stack_size = 0;
1517
1518   return;
1519 }
1520
1521 static void
1522 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1523                               enum machine_mode mode,
1524                               const_tree type,
1525                               bool named)
1526 {
1527   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1529     {
1530       aarch64_layout_arg (pcum_v, mode, type, named);
1531       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1532                   != (pcum->aapcs_stack_words != 0));
1533       pcum->aapcs_arg_processed = false;
1534       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1535       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1536       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1537       pcum->aapcs_stack_words = 0;
1538       pcum->aapcs_reg = NULL_RTX;
1539     }
1540 }
1541
1542 bool
1543 aarch64_function_arg_regno_p (unsigned regno)
1544 {
1545   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1546           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1547 }
1548
1549 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1550    PARM_BOUNDARY bits of alignment, but will be given anything up
1551    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1552    that both before and after the layout of each argument, the Next
1553    Stacked Argument Address (NSAA) will have a minimum alignment of
1554    8 bytes.  */
1555
1556 static unsigned int
1557 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1558 {
1559   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1560
1561   if (alignment < PARM_BOUNDARY)
1562     alignment = PARM_BOUNDARY;
1563   if (alignment > STACK_BOUNDARY)
1564     alignment = STACK_BOUNDARY;
1565   return alignment;
1566 }
1567
1568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1569
1570    Return true if an argument passed on the stack should be padded upwards,
1571    i.e. if the least-significant byte of the stack slot has useful data.
1572
1573    Small aggregate types are placed in the lowest memory address.
1574
1575    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1576
1577 bool
1578 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1579 {
1580   /* On little-endian targets, the least significant byte of every stack
1581      argument is passed at the lowest byte address of the stack slot.  */
1582   if (!BYTES_BIG_ENDIAN)
1583     return true;
1584
1585   /* Otherwise, integral types and floating point types are padded downward:
1586      the least significant byte of a stack argument is passed at the highest
1587      byte address of the stack slot.  */
1588   if (type
1589       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1590       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1591     return false;
1592
1593   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1594   return true;
1595 }
1596
1597 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1598
1599    It specifies padding for the last (may also be the only)
1600    element of a block move between registers and memory.  If
1601    assuming the block is in the memory, padding upward means that
1602    the last element is padded after its highest significant byte,
1603    while in downward padding, the last element is padded at the
1604    its least significant byte side.
1605
1606    Small aggregates and small complex types are always padded
1607    upwards.
1608
1609    We don't need to worry about homogeneous floating-point or
1610    short-vector aggregates; their move is not affected by the
1611    padding direction determined here.  Regardless of endianness,
1612    each element of such an aggregate is put in the least
1613    significant bits of a fp/simd register.
1614
1615    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1616    register has useful data, and return the opposite if the most
1617    significant byte does.  */
1618
1619 bool
1620 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1621                      bool first ATTRIBUTE_UNUSED)
1622 {
1623
1624   /* Small composite types are always padded upward.  */
1625   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1626     {
1627       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1628                             : GET_MODE_SIZE (mode));
1629       if (size < 2 * UNITS_PER_WORD)
1630         return true;
1631     }
1632
1633   /* Otherwise, use the default padding.  */
1634   return !BYTES_BIG_ENDIAN;
1635 }
1636
1637 static enum machine_mode
1638 aarch64_libgcc_cmp_return_mode (void)
1639 {
1640   return SImode;
1641 }
1642
1643 static bool
1644 aarch64_frame_pointer_required (void)
1645 {
1646   /* If the function contains dynamic stack allocations, we need to
1647      use the frame pointer to access the static parts of the frame.  */
1648   if (cfun->calls_alloca)
1649     return true;
1650
1651   /* We may have turned flag_omit_frame_pointer on in order to have this
1652      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1653      and we'll check it here.
1654      If we really did set flag_omit_frame_pointer normally, then we return false
1655      (no frame pointer required) in all cases.  */
1656
1657   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1658     return false;
1659   else if (flag_omit_leaf_frame_pointer)
1660     return !crtl->is_leaf;
1661   return true;
1662 }
1663
1664 /* Mark the registers that need to be saved by the callee and calculate
1665    the size of the callee-saved registers area and frame record (both FP
1666    and LR may be omitted).  */
1667 static void
1668 aarch64_layout_frame (void)
1669 {
1670   HOST_WIDE_INT offset = 0;
1671   int regno;
1672
1673   if (reload_completed && cfun->machine->frame.laid_out)
1674     return;
1675
1676   cfun->machine->frame.fp_lr_offset = 0;
1677
1678   /* First mark all the registers that really need to be saved...  */
1679   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1680     cfun->machine->frame.reg_offset[regno] = -1;
1681
1682   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1683     cfun->machine->frame.reg_offset[regno] = -1;
1684
1685   /* ... that includes the eh data registers (if needed)...  */
1686   if (crtl->calls_eh_return)
1687     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1688       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1689
1690   /* ... and any callee saved register that dataflow says is live.  */
1691   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1692     if (df_regs_ever_live_p (regno)
1693         && !call_used_regs[regno])
1694       cfun->machine->frame.reg_offset[regno] = 0;
1695
1696   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1697     if (df_regs_ever_live_p (regno)
1698         && !call_used_regs[regno])
1699       cfun->machine->frame.reg_offset[regno] = 0;
1700
1701   if (frame_pointer_needed)
1702     {
1703       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1704       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1705       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1706     }
1707
1708   /* Now assign stack slots for them.  */
1709   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1710     if (cfun->machine->frame.reg_offset[regno] != -1)
1711       {
1712         cfun->machine->frame.reg_offset[regno] = offset;
1713         offset += UNITS_PER_WORD;
1714       }
1715
1716   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1717     if (cfun->machine->frame.reg_offset[regno] != -1)
1718       {
1719         cfun->machine->frame.reg_offset[regno] = offset;
1720         offset += UNITS_PER_WORD;
1721       }
1722
1723   if (frame_pointer_needed)
1724     {
1725       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1726       offset += UNITS_PER_WORD;
1727       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1728     }
1729
1730   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1731     {
1732       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1733       offset += UNITS_PER_WORD;
1734       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1735     }
1736
1737   cfun->machine->frame.padding0 =
1738     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1739   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1740
1741   cfun->machine->frame.saved_regs_size = offset;
1742   cfun->machine->frame.laid_out = true;
1743 }
1744
1745 /* Make the last instruction frame-related and note that it performs
1746    the operation described by FRAME_PATTERN.  */
1747
1748 static void
1749 aarch64_set_frame_expr (rtx frame_pattern)
1750 {
1751   rtx insn;
1752
1753   insn = get_last_insn ();
1754   RTX_FRAME_RELATED_P (insn) = 1;
1755   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1756   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1757                                       frame_pattern,
1758                                       REG_NOTES (insn));
1759 }
1760
1761 static bool
1762 aarch64_register_saved_on_entry (int regno)
1763 {
1764   return cfun->machine->frame.reg_offset[regno] != -1;
1765 }
1766
1767
1768 static void
1769 aarch64_save_or_restore_fprs (int start_offset, int increment,
1770                               bool restore, rtx base_rtx)
1771
1772 {
1773   unsigned regno;
1774   unsigned regno2;
1775   rtx insn;
1776   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1777
1778
1779   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1780     {
1781       if (aarch64_register_saved_on_entry (regno))
1782         {
1783           rtx mem;
1784           mem = gen_mem_ref (DFmode,
1785                              plus_constant (Pmode,
1786                                             base_rtx,
1787                                             start_offset));
1788
1789           for (regno2 = regno + 1;
1790                regno2 <= V31_REGNUM
1791                  && !aarch64_register_saved_on_entry (regno2);
1792                regno2++)
1793             {
1794               /* Empty loop.  */
1795             }
1796           if (regno2 <= V31_REGNUM &&
1797               aarch64_register_saved_on_entry (regno2))
1798             {
1799               rtx mem2;
1800               /* Next highest register to be saved.  */
1801               mem2 = gen_mem_ref (DFmode,
1802                                   plus_constant
1803                                   (Pmode,
1804                                    base_rtx,
1805                                    start_offset + increment));
1806               if (restore == false)
1807                 {
1808                   insn = emit_insn
1809                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1810                                         mem2, gen_rtx_REG (DFmode, regno2)));
1811
1812                 }
1813               else
1814                 {
1815                   insn = emit_insn
1816                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1817                                        gen_rtx_REG (DFmode, regno2), mem2));
1818
1819                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1820                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1821                 }
1822
1823                   /* The first part of a frame-related parallel insn
1824                      is always assumed to be relevant to the frame
1825                      calculations; subsequent parts, are only
1826                      frame-related if explicitly marked.  */
1827               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1828                                             1)) = 1;
1829               regno = regno2;
1830               start_offset += increment * 2;
1831             }
1832           else
1833             {
1834               if (restore == false)
1835                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1836               else
1837                 {
1838                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1839                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1840                 }
1841               start_offset += increment;
1842             }
1843           RTX_FRAME_RELATED_P (insn) = 1;
1844         }
1845     }
1846
1847 }
1848
1849
1850 /* offset from the stack pointer of where the saves and
1851    restore's have to happen.  */
1852 static void
1853 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1854                                             bool restore)
1855 {
1856   rtx insn;
1857   rtx base_rtx = stack_pointer_rtx;
1858   HOST_WIDE_INT start_offset = offset;
1859   HOST_WIDE_INT increment = UNITS_PER_WORD;
1860   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1861   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1862   unsigned regno;
1863   unsigned regno2;
1864
1865   for (regno = R0_REGNUM; regno <= limit; regno++)
1866     {
1867       if (aarch64_register_saved_on_entry (regno))
1868         {
1869           rtx mem;
1870           mem = gen_mem_ref (Pmode,
1871                              plus_constant (Pmode,
1872                                             base_rtx,
1873                                             start_offset));
1874
1875           for (regno2 = regno + 1;
1876                regno2 <= limit
1877                  && !aarch64_register_saved_on_entry (regno2);
1878                regno2++)
1879             {
1880               /* Empty loop.  */
1881             }
1882           if (regno2 <= limit &&
1883               aarch64_register_saved_on_entry (regno2))
1884             {
1885               rtx mem2;
1886               /* Next highest register to be saved.  */
1887               mem2 = gen_mem_ref (Pmode,
1888                                   plus_constant
1889                                   (Pmode,
1890                                    base_rtx,
1891                                    start_offset + increment));
1892               if (restore == false)
1893                 {
1894                   insn = emit_insn
1895                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1896                                         mem2, gen_rtx_REG (DImode, regno2)));
1897
1898                 }
1899               else
1900                 {
1901                   insn = emit_insn
1902                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1903                                      gen_rtx_REG (DImode, regno2), mem2));
1904
1905                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1906                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1907                 }
1908
1909                   /* The first part of a frame-related parallel insn
1910                      is always assumed to be relevant to the frame
1911                      calculations; subsequent parts, are only
1912                      frame-related if explicitly marked.  */
1913               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1914                                             1)) = 1;
1915               regno = regno2;
1916               start_offset += increment * 2;
1917             }
1918           else
1919             {
1920               if (restore == false)
1921                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1922               else
1923                 {
1924                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1925                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1926                 }
1927               start_offset += increment;
1928             }
1929           RTX_FRAME_RELATED_P (insn) = 1;
1930         }
1931     }
1932
1933   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1934
1935 }
1936
1937 /* AArch64 stack frames generated by this compiler look like:
1938
1939         +-------------------------------+
1940         |                               |
1941         |  incoming stack arguments     |
1942         |                               |
1943         +-------------------------------+ <-- arg_pointer_rtx
1944         |                               |
1945         |  callee-allocated save area   |
1946         |  for register varargs         |
1947         |                               |
1948         +-------------------------------+
1949         |                               |
1950         |  local variables              |
1951         |                               |
1952         +-------------------------------+ <-- frame_pointer_rtx
1953         |                               |
1954         |  callee-saved registers       |
1955         |                               |
1956         +-------------------------------+
1957         |  LR'                          |
1958         +-------------------------------+
1959         |  FP'                          |
1960       P +-------------------------------+ <-- hard_frame_pointer_rtx
1961         |  dynamic allocation           |
1962         +-------------------------------+
1963         |                               |
1964         |  outgoing stack arguments     |
1965         |                               |
1966         +-------------------------------+ <-- stack_pointer_rtx
1967
1968    Dynamic stack allocations such as alloca insert data at point P.
1969    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1970    hard_frame_pointer_rtx unchanged.  */
1971
1972 /* Generate the prologue instructions for entry into a function.
1973    Establish the stack frame by decreasing the stack pointer with a
1974    properly calculated size and, if necessary, create a frame record
1975    filled with the values of LR and previous frame pointer.  The
1976    current FP is also set up if it is in use.  */
1977
1978 void
1979 aarch64_expand_prologue (void)
1980 {
1981   /* sub sp, sp, #<frame_size>
1982      stp {fp, lr}, [sp, #<frame_size> - 16]
1983      add fp, sp, #<frame_size> - hardfp_offset
1984      stp {cs_reg}, [fp, #-16] etc.
1985
1986      sub sp, sp, <final_adjustment_if_any>
1987   */
1988   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
1989   HOST_WIDE_INT frame_size, offset;
1990   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
1991   rtx insn;
1992
1993   aarch64_layout_frame ();
1994   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1995   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1996               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1997   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1998                 + crtl->outgoing_args_size);
1999   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2000                                           STACK_BOUNDARY / BITS_PER_UNIT);
2001
2002   if (flag_stack_usage_info)
2003     current_function_static_stack_size = frame_size;
2004
2005   fp_offset = (offset
2006                - original_frame_size
2007                - cfun->machine->frame.saved_regs_size);
2008
2009   /* Store pairs and load pairs have a range only -512 to 504.  */
2010   if (offset >= 512)
2011     {
2012       /* When the frame has a large size, an initial decrease is done on
2013          the stack pointer to jump over the callee-allocated save area for
2014          register varargs, the local variable area and/or the callee-saved
2015          register area.  This will allow the pre-index write-back
2016          store pair instructions to be used for setting up the stack frame
2017          efficiently.  */
2018       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2019       if (offset >= 512)
2020         offset = cfun->machine->frame.saved_regs_size;
2021
2022       frame_size -= (offset + crtl->outgoing_args_size);
2023       fp_offset = 0;
2024
2025       if (frame_size >= 0x1000000)
2026         {
2027           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2028           emit_move_insn (op0, GEN_INT (-frame_size));
2029           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2030           aarch64_set_frame_expr (gen_rtx_SET
2031                                   (Pmode, stack_pointer_rtx,
2032                                    gen_rtx_PLUS (Pmode,
2033                                                  stack_pointer_rtx,
2034                                                  GEN_INT (-frame_size))));
2035         }
2036       else if (frame_size > 0)
2037         {
2038           if ((frame_size & 0xfff) != frame_size)
2039             {
2040               insn = emit_insn (gen_add2_insn
2041                                 (stack_pointer_rtx,
2042                                  GEN_INT (-(frame_size
2043                                             & ~(HOST_WIDE_INT)0xfff))));
2044               RTX_FRAME_RELATED_P (insn) = 1;
2045             }
2046           if ((frame_size & 0xfff) != 0)
2047             {
2048               insn = emit_insn (gen_add2_insn
2049                                 (stack_pointer_rtx,
2050                                  GEN_INT (-(frame_size
2051                                             & (HOST_WIDE_INT)0xfff))));
2052               RTX_FRAME_RELATED_P (insn) = 1;
2053             }
2054         }
2055     }
2056   else
2057     frame_size = -1;
2058
2059   if (offset > 0)
2060     {
2061       /* Save the frame pointer and lr if the frame pointer is needed
2062          first.  Make the frame pointer point to the location of the
2063          old frame pointer on the stack.  */
2064       if (frame_pointer_needed)
2065         {
2066           rtx mem_fp, mem_lr;
2067
2068           if (fp_offset)
2069             {
2070               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2071                                                GEN_INT (-offset)));
2072               RTX_FRAME_RELATED_P (insn) = 1;
2073               aarch64_set_frame_expr (gen_rtx_SET
2074                                       (Pmode, stack_pointer_rtx,
2075                                        gen_rtx_MINUS (Pmode,
2076                                                       stack_pointer_rtx,
2077                                                       GEN_INT (offset))));
2078               mem_fp = gen_frame_mem (DImode,
2079                                       plus_constant (Pmode,
2080                                                      stack_pointer_rtx,
2081                                                      fp_offset));
2082               mem_lr = gen_frame_mem (DImode,
2083                                       plus_constant (Pmode,
2084                                                      stack_pointer_rtx,
2085                                                      fp_offset
2086                                                      + UNITS_PER_WORD));
2087               insn = emit_insn (gen_store_pairdi (mem_fp,
2088                                                   hard_frame_pointer_rtx,
2089                                                   mem_lr,
2090                                                   gen_rtx_REG (DImode,
2091                                                                LR_REGNUM)));
2092             }
2093           else
2094             {
2095               insn = emit_insn (gen_storewb_pairdi_di
2096                                 (stack_pointer_rtx, stack_pointer_rtx,
2097                                  hard_frame_pointer_rtx,
2098                                  gen_rtx_REG (DImode, LR_REGNUM),
2099                                  GEN_INT (-offset),
2100                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2101               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2102             }
2103
2104           /* The first part of a frame-related parallel insn is always
2105              assumed to be relevant to the frame calculations;
2106              subsequent parts, are only frame-related if explicitly
2107              marked.  */
2108           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2109           RTX_FRAME_RELATED_P (insn) = 1;
2110
2111           /* Set up frame pointer to point to the location of the
2112              previous frame pointer on the stack.  */
2113           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2114                                            stack_pointer_rtx,
2115                                            GEN_INT (fp_offset)));
2116           aarch64_set_frame_expr (gen_rtx_SET
2117                                   (Pmode, hard_frame_pointer_rtx,
2118                                    gen_rtx_PLUS (Pmode,
2119                                                  stack_pointer_rtx,
2120                                                  GEN_INT (fp_offset))));
2121           RTX_FRAME_RELATED_P (insn) = 1;
2122           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2123                                            hard_frame_pointer_rtx));
2124         }
2125       else
2126         {
2127           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2128                                            GEN_INT (-offset)));
2129           RTX_FRAME_RELATED_P (insn) = 1;
2130         }
2131
2132       aarch64_save_or_restore_callee_save_registers
2133         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2134     }
2135
2136   /* when offset >= 512,
2137      sub sp, sp, #<outgoing_args_size> */
2138   if (frame_size > -1)
2139     {
2140       if (crtl->outgoing_args_size > 0)
2141         {
2142           insn = emit_insn (gen_add2_insn
2143                             (stack_pointer_rtx,
2144                              GEN_INT (- crtl->outgoing_args_size)));
2145           RTX_FRAME_RELATED_P (insn) = 1;
2146         }
2147     }
2148 }
2149
2150 /* Generate the epilogue instructions for returning from a function.  */
2151 void
2152 aarch64_expand_epilogue (bool for_sibcall)
2153 {
2154   HOST_WIDE_INT original_frame_size, frame_size, offset;
2155   HOST_WIDE_INT fp_offset;
2156   rtx insn;
2157   rtx cfa_reg;
2158
2159   aarch64_layout_frame ();
2160   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2161   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2162                 + crtl->outgoing_args_size);
2163   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2164                                           STACK_BOUNDARY / BITS_PER_UNIT);
2165
2166   fp_offset = (offset
2167                - original_frame_size
2168                - cfun->machine->frame.saved_regs_size);
2169
2170   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2171
2172   /* Store pairs and load pairs have a range only -512 to 504.  */
2173   if (offset >= 512)
2174     {
2175       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2176       if (offset >= 512)
2177         offset = cfun->machine->frame.saved_regs_size;
2178
2179       frame_size -= (offset + crtl->outgoing_args_size);
2180       fp_offset = 0;
2181       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2182         {
2183           insn = emit_insn (gen_add2_insn
2184                             (stack_pointer_rtx,
2185                              GEN_INT (crtl->outgoing_args_size)));
2186           RTX_FRAME_RELATED_P (insn) = 1;
2187         }
2188     }
2189   else
2190     frame_size = -1;
2191
2192   /* If there were outgoing arguments or we've done dynamic stack
2193      allocation, then restore the stack pointer from the frame
2194      pointer.  This is at most one insn and more efficient than using
2195      GCC's internal mechanism.  */
2196   if (frame_pointer_needed
2197       && (crtl->outgoing_args_size || cfun->calls_alloca))
2198     {
2199       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2200                                        hard_frame_pointer_rtx,
2201                                        GEN_INT (- fp_offset)));
2202       RTX_FRAME_RELATED_P (insn) = 1;
2203       /* As SP is set to (FP - fp_offset), according to the rules in
2204          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2205          from the value of SP from now on.  */
2206       cfa_reg = stack_pointer_rtx;
2207     }
2208
2209   aarch64_save_or_restore_callee_save_registers
2210     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2211
2212   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2213   if (offset > 0)
2214     {
2215       if (frame_pointer_needed)
2216         {
2217           rtx mem_fp, mem_lr;
2218
2219           if (fp_offset)
2220             {
2221               mem_fp = gen_frame_mem (DImode,
2222                                       plus_constant (Pmode,
2223                                                      stack_pointer_rtx,
2224                                                      fp_offset));
2225               mem_lr = gen_frame_mem (DImode,
2226                                       plus_constant (Pmode,
2227                                                      stack_pointer_rtx,
2228                                                      fp_offset
2229                                                      + UNITS_PER_WORD));
2230               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2231                                                  mem_fp,
2232                                                  gen_rtx_REG (DImode,
2233                                                               LR_REGNUM),
2234                                                  mem_lr));
2235             }
2236           else
2237             {
2238               insn = emit_insn (gen_loadwb_pairdi_di
2239                                 (stack_pointer_rtx,
2240                                  stack_pointer_rtx,
2241                                  hard_frame_pointer_rtx,
2242                                  gen_rtx_REG (DImode, LR_REGNUM),
2243                                  GEN_INT (offset),
2244                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2245               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2246               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2247                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2248                                           plus_constant (Pmode, cfa_reg,
2249                                                          offset))));
2250             }
2251
2252           /* The first part of a frame-related parallel insn
2253              is always assumed to be relevant to the frame
2254              calculations; subsequent parts, are only
2255              frame-related if explicitly marked.  */
2256           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257           RTX_FRAME_RELATED_P (insn) = 1;
2258           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2259           add_reg_note (insn, REG_CFA_RESTORE,
2260                         gen_rtx_REG (DImode, LR_REGNUM));
2261
2262           if (fp_offset)
2263             {
2264               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2265                                                GEN_INT (offset)));
2266               RTX_FRAME_RELATED_P (insn) = 1;
2267             }
2268         }
2269       else
2270         {
2271           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2272                                            GEN_INT (offset)));
2273           RTX_FRAME_RELATED_P (insn) = 1;
2274         }
2275     }
2276
2277   /* Stack adjustment for exception handler.  */
2278   if (crtl->calls_eh_return)
2279     {
2280       /* We need to unwind the stack by the offset computed by
2281          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2282          based on SP.  Ideally we would update the SP and define the
2283          CFA along the lines of:
2284
2285          SP = SP + EH_RETURN_STACKADJ_RTX
2286          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2287
2288          However the dwarf emitter only understands a constant
2289          register offset.
2290
2291          The solution choosen here is to use the otherwise unused IP0
2292          as a temporary register to hold the current SP value.  The
2293          CFA is described using IP0 then SP is modified.  */
2294
2295       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2296
2297       insn = emit_move_insn (ip0, stack_pointer_rtx);
2298       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2299       RTX_FRAME_RELATED_P (insn) = 1;
2300
2301       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2302
2303       /* Ensure the assignment to IP0 does not get optimized away.  */
2304       emit_use (ip0);
2305     }
2306
2307   if (frame_size > -1)
2308     {
2309       if (frame_size >= 0x1000000)
2310         {
2311           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2312           emit_move_insn (op0, GEN_INT (frame_size));
2313           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2314           aarch64_set_frame_expr (gen_rtx_SET
2315                                   (Pmode, stack_pointer_rtx,
2316                                    gen_rtx_PLUS (Pmode,
2317                                                  stack_pointer_rtx,
2318                                                  GEN_INT (frame_size))));
2319         }
2320       else if (frame_size > 0)
2321         {
2322           if ((frame_size & 0xfff) != 0)
2323             {
2324               insn = emit_insn (gen_add2_insn
2325                                 (stack_pointer_rtx,
2326                                  GEN_INT ((frame_size
2327                                            & (HOST_WIDE_INT) 0xfff))));
2328               RTX_FRAME_RELATED_P (insn) = 1;
2329             }
2330           if ((frame_size & 0xfff) != frame_size)
2331             {
2332               insn = emit_insn (gen_add2_insn
2333                                 (stack_pointer_rtx,
2334                                  GEN_INT ((frame_size
2335                                            & ~ (HOST_WIDE_INT) 0xfff))));
2336               RTX_FRAME_RELATED_P (insn) = 1;
2337             }
2338         }
2339
2340       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2341                                            gen_rtx_PLUS (Pmode,
2342                                                          stack_pointer_rtx,
2343                                                          GEN_INT (offset))));
2344     }
2345
2346   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2347   if (!for_sibcall)
2348     emit_jump_insn (ret_rtx);
2349 }
2350
2351 /* Return the place to copy the exception unwinding return address to.
2352    This will probably be a stack slot, but could (in theory be the
2353    return register).  */
2354 rtx
2355 aarch64_final_eh_return_addr (void)
2356 {
2357   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2358   aarch64_layout_frame ();
2359   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2360   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2361                 + crtl->outgoing_args_size);
2362   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2363                                           STACK_BOUNDARY / BITS_PER_UNIT);
2364   fp_offset = offset
2365     - original_frame_size
2366     - cfun->machine->frame.saved_regs_size;
2367
2368   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2369     return gen_rtx_REG (DImode, LR_REGNUM);
2370
2371   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2372      result in a store to save LR introduced by builtin_eh_return () being
2373      incorrectly deleted because the alias is not detected.
2374      So in the calculation of the address to copy the exception unwinding
2375      return address to, we note 2 cases.
2376      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2377      we return a SP-relative location since all the addresses are SP-relative
2378      in this case.  This prevents the store from being optimized away.
2379      If the fp_offset is not 0, then the addresses will be FP-relative and
2380      therefore we return a FP-relative location.  */
2381
2382   if (frame_pointer_needed)
2383     {
2384       if (fp_offset)
2385         return gen_frame_mem (DImode,
2386                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2387       else
2388         return gen_frame_mem (DImode,
2389                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2390     }
2391
2392   /* If FP is not needed, we calculate the location of LR, which would be
2393      at the top of the saved registers block.  */
2394
2395   return gen_frame_mem (DImode,
2396                         plus_constant (Pmode,
2397                                        stack_pointer_rtx,
2398                                        fp_offset
2399                                        + cfun->machine->frame.saved_regs_size
2400                                        - 2 * UNITS_PER_WORD));
2401 }
2402
2403 /* Output code to build up a constant in a register.  */
2404 static void
2405 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2406 {
2407   if (aarch64_bitmask_imm (val, DImode))
2408     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2409   else
2410     {
2411       int i;
2412       int ncount = 0;
2413       int zcount = 0;
2414       HOST_WIDE_INT valp = val >> 16;
2415       HOST_WIDE_INT valm;
2416       HOST_WIDE_INT tval;
2417
2418       for (i = 16; i < 64; i += 16)
2419         {
2420           valm = (valp & 0xffff);
2421
2422           if (valm != 0)
2423             ++ zcount;
2424
2425           if (valm != 0xffff)
2426             ++ ncount;
2427
2428           valp >>= 16;
2429         }
2430
2431       /* zcount contains the number of additional MOVK instructions
2432          required if the constant is built up with an initial MOVZ instruction,
2433          while ncount is the number of MOVK instructions required if starting
2434          with a MOVN instruction.  Choose the sequence that yields the fewest
2435          number of instructions, preferring MOVZ instructions when they are both
2436          the same.  */
2437       if (ncount < zcount)
2438         {
2439           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2440                           GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2441           tval = 0xffff;
2442         }
2443       else
2444         {
2445           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2446                           GEN_INT (val & 0xffff));
2447           tval = 0;
2448         }
2449
2450       val >>= 16;
2451
2452       for (i = 16; i < 64; i += 16)
2453         {
2454           if ((val & 0xffff) != tval)
2455             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2456                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2457           val >>= 16;
2458         }
2459     }
2460 }
2461
2462 static void
2463 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2464 {
2465   HOST_WIDE_INT mdelta = delta;
2466   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2467   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2468
2469   if (mdelta < 0)
2470     mdelta = -mdelta;
2471
2472   if (mdelta >= 4096 * 4096)
2473     {
2474       aarch64_build_constant (scratchreg, delta);
2475       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2476     }
2477   else if (mdelta > 0)
2478     {
2479       if (mdelta >= 4096)
2480         {
2481           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2482           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2483           if (delta < 0)
2484             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2485                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2486           else
2487             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2488                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2489         }
2490       if (mdelta % 4096 != 0)
2491         {
2492           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2493           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2494                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2495         }
2496     }
2497 }
2498
2499 /* Output code to add DELTA to the first argument, and then jump
2500    to FUNCTION.  Used for C++ multiple inheritance.  */
2501 static void
2502 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2503                          HOST_WIDE_INT delta,
2504                          HOST_WIDE_INT vcall_offset,
2505                          tree function)
2506 {
2507   /* The this pointer is always in x0.  Note that this differs from
2508      Arm where the this pointer maybe bumped to r1 if r0 is required
2509      to return a pointer to an aggregate.  On AArch64 a result value
2510      pointer will be in x8.  */
2511   int this_regno = R0_REGNUM;
2512   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2513
2514   reload_completed = 1;
2515   emit_note (NOTE_INSN_PROLOGUE_END);
2516
2517   if (vcall_offset == 0)
2518     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2519   else
2520     {
2521       gcc_assert ((vcall_offset & 0x7) == 0);
2522
2523       this_rtx = gen_rtx_REG (Pmode, this_regno);
2524       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2525       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2526
2527       addr = this_rtx;
2528       if (delta != 0)
2529         {
2530           if (delta >= -256 && delta < 256)
2531             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2532                                        plus_constant (Pmode, this_rtx, delta));
2533           else
2534             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2535         }
2536
2537       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2538
2539       if (vcall_offset >= -256 && vcall_offset < 32768)
2540           addr = plus_constant (Pmode, temp0, vcall_offset);
2541       else
2542         {
2543           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2544           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2545         }
2546
2547       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2548       emit_insn (gen_add2_insn (this_rtx, temp1));
2549     }
2550
2551   /* Generate a tail call to the target function.  */
2552   if (!TREE_USED (function))
2553     {
2554       assemble_external (function);
2555       TREE_USED (function) = 1;
2556     }
2557   funexp = XEXP (DECL_RTL (function), 0);
2558   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2559   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2560   SIBLING_CALL_P (insn) = 1;
2561
2562   insn = get_insns ();
2563   shorten_branches (insn);
2564   final_start_function (insn, file, 1);
2565   final (insn, file, 1);
2566   final_end_function ();
2567
2568   /* Stop pretending to be a post-reload pass.  */
2569   reload_completed = 0;
2570 }
2571
2572 static int
2573 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2574 {
2575   if (GET_CODE (*x) == SYMBOL_REF)
2576     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2577
2578   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2579      TLS offsets, not real symbol references.  */
2580   if (GET_CODE (*x) == UNSPEC
2581       && XINT (*x, 1) == UNSPEC_TLS)
2582     return -1;
2583
2584   return 0;
2585 }
2586
2587 static bool
2588 aarch64_tls_referenced_p (rtx x)
2589 {
2590   if (!TARGET_HAVE_TLS)
2591     return false;
2592
2593   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2594 }
2595
2596
2597 static int
2598 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2599 {
2600   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2601   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2602
2603   if (*imm1 < *imm2)
2604     return -1;
2605   if (*imm1 > *imm2)
2606     return +1;
2607   return 0;
2608 }
2609
2610
2611 static void
2612 aarch64_build_bitmask_table (void)
2613 {
2614   unsigned HOST_WIDE_INT mask, imm;
2615   unsigned int log_e, e, s, r;
2616   unsigned int nimms = 0;
2617
2618   for (log_e = 1; log_e <= 6; log_e++)
2619     {
2620       e = 1 << log_e;
2621       if (e == 64)
2622         mask = ~(HOST_WIDE_INT) 0;
2623       else
2624         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2625       for (s = 1; s < e; s++)
2626         {
2627           for (r = 0; r < e; r++)
2628             {
2629               /* set s consecutive bits to 1 (s < 64) */
2630               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2631               /* rotate right by r */
2632               if (r != 0)
2633                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2634               /* replicate the constant depending on SIMD size */
2635               switch (log_e) {
2636               case 1: imm |= (imm <<  2);
2637               case 2: imm |= (imm <<  4);
2638               case 3: imm |= (imm <<  8);
2639               case 4: imm |= (imm << 16);
2640               case 5: imm |= (imm << 32);
2641               case 6:
2642                 break;
2643               default:
2644                 gcc_unreachable ();
2645               }
2646               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2647               aarch64_bitmasks[nimms++] = imm;
2648             }
2649         }
2650     }
2651
2652   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2653   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2654          aarch64_bitmasks_cmp);
2655 }
2656
2657
2658 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2659    a left shift of 0 or 12 bits.  */
2660 bool
2661 aarch64_uimm12_shift (HOST_WIDE_INT val)
2662 {
2663   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2664           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2665           );
2666 }
2667
2668
2669 /* Return true if val is an immediate that can be loaded into a
2670    register by a MOVZ instruction.  */
2671 static bool
2672 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2673 {
2674   if (GET_MODE_SIZE (mode) > 4)
2675     {
2676       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2677           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2678         return 1;
2679     }
2680   else
2681     {
2682       /* Ignore sign extension.  */
2683       val &= (HOST_WIDE_INT) 0xffffffff;
2684     }
2685   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2686           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2687 }
2688
2689
2690 /* Return true if val is a valid bitmask immediate.  */
2691 bool
2692 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2693 {
2694   if (GET_MODE_SIZE (mode) < 8)
2695     {
2696       /* Replicate bit pattern.  */
2697       val &= (HOST_WIDE_INT) 0xffffffff;
2698       val |= val << 32;
2699     }
2700   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2701                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2702 }
2703
2704
2705 /* Return true if val is an immediate that can be loaded into a
2706    register in a single instruction.  */
2707 bool
2708 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2709 {
2710   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2711     return 1;
2712   return aarch64_bitmask_imm (val, mode);
2713 }
2714
2715 static bool
2716 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2717 {
2718   rtx base, offset;
2719
2720   if (GET_CODE (x) == HIGH)
2721     return true;
2722
2723   split_const (x, &base, &offset);
2724   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2725     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2726             != SYMBOL_FORCE_TO_MEM);
2727
2728   return aarch64_tls_referenced_p (x);
2729 }
2730
2731 /* Return true if register REGNO is a valid index register.
2732    STRICT_P is true if REG_OK_STRICT is in effect.  */
2733
2734 bool
2735 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2736 {
2737   if (!HARD_REGISTER_NUM_P (regno))
2738     {
2739       if (!strict_p)
2740         return true;
2741
2742       if (!reg_renumber)
2743         return false;
2744
2745       regno = reg_renumber[regno];
2746     }
2747   return GP_REGNUM_P (regno);
2748 }
2749
2750 /* Return true if register REGNO is a valid base register for mode MODE.
2751    STRICT_P is true if REG_OK_STRICT is in effect.  */
2752
2753 bool
2754 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2755 {
2756   if (!HARD_REGISTER_NUM_P (regno))
2757     {
2758       if (!strict_p)
2759         return true;
2760
2761       if (!reg_renumber)
2762         return false;
2763
2764       regno = reg_renumber[regno];
2765     }
2766
2767   /* The fake registers will be eliminated to either the stack or
2768      hard frame pointer, both of which are usually valid base registers.
2769      Reload deals with the cases where the eliminated form isn't valid.  */
2770   return (GP_REGNUM_P (regno)
2771           || regno == SP_REGNUM
2772           || regno == FRAME_POINTER_REGNUM
2773           || regno == ARG_POINTER_REGNUM);
2774 }
2775
2776 /* Return true if X is a valid base register for mode MODE.
2777    STRICT_P is true if REG_OK_STRICT is in effect.  */
2778
2779 static bool
2780 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2781 {
2782   if (!strict_p && GET_CODE (x) == SUBREG)
2783     x = SUBREG_REG (x);
2784
2785   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2786 }
2787
2788 /* Return true if address offset is a valid index.  If it is, fill in INFO
2789    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2790
2791 static bool
2792 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2793                         enum machine_mode mode, bool strict_p)
2794 {
2795   enum aarch64_address_type type;
2796   rtx index;
2797   int shift;
2798
2799   /* (reg:P) */
2800   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2801       && GET_MODE (x) == Pmode)
2802     {
2803       type = ADDRESS_REG_REG;
2804       index = x;
2805       shift = 0;
2806     }
2807   /* (sign_extend:DI (reg:SI)) */
2808   else if ((GET_CODE (x) == SIGN_EXTEND
2809             || GET_CODE (x) == ZERO_EXTEND)
2810            && GET_MODE (x) == DImode
2811            && GET_MODE (XEXP (x, 0)) == SImode)
2812     {
2813       type = (GET_CODE (x) == SIGN_EXTEND)
2814         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2815       index = XEXP (x, 0);
2816       shift = 0;
2817     }
2818   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2819   else if (GET_CODE (x) == MULT
2820            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2821                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2822            && GET_MODE (XEXP (x, 0)) == DImode
2823            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2824            && CONST_INT_P (XEXP (x, 1)))
2825     {
2826       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2827         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2828       index = XEXP (XEXP (x, 0), 0);
2829       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2830     }
2831   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2832   else if (GET_CODE (x) == ASHIFT
2833            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2834                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2835            && GET_MODE (XEXP (x, 0)) == DImode
2836            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2837            && CONST_INT_P (XEXP (x, 1)))
2838     {
2839       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2840         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2841       index = XEXP (XEXP (x, 0), 0);
2842       shift = INTVAL (XEXP (x, 1));
2843     }
2844   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2845   else if ((GET_CODE (x) == SIGN_EXTRACT
2846             || GET_CODE (x) == ZERO_EXTRACT)
2847            && GET_MODE (x) == DImode
2848            && GET_CODE (XEXP (x, 0)) == MULT
2849            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2850            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2851     {
2852       type = (GET_CODE (x) == SIGN_EXTRACT)
2853         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2854       index = XEXP (XEXP (x, 0), 0);
2855       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2856       if (INTVAL (XEXP (x, 1)) != 32 + shift
2857           || INTVAL (XEXP (x, 2)) != 0)
2858         shift = -1;
2859     }
2860   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2861      (const_int 0xffffffff<<shift)) */
2862   else if (GET_CODE (x) == AND
2863            && GET_MODE (x) == DImode
2864            && GET_CODE (XEXP (x, 0)) == MULT
2865            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2866            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2867            && CONST_INT_P (XEXP (x, 1)))
2868     {
2869       type = ADDRESS_REG_UXTW;
2870       index = XEXP (XEXP (x, 0), 0);
2871       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2872       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2873         shift = -1;
2874     }
2875   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2876   else if ((GET_CODE (x) == SIGN_EXTRACT
2877             || GET_CODE (x) == ZERO_EXTRACT)
2878            && GET_MODE (x) == DImode
2879            && GET_CODE (XEXP (x, 0)) == ASHIFT
2880            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2881            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2882     {
2883       type = (GET_CODE (x) == SIGN_EXTRACT)
2884         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2885       index = XEXP (XEXP (x, 0), 0);
2886       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2887       if (INTVAL (XEXP (x, 1)) != 32 + shift
2888           || INTVAL (XEXP (x, 2)) != 0)
2889         shift = -1;
2890     }
2891   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2892      (const_int 0xffffffff<<shift)) */
2893   else if (GET_CODE (x) == AND
2894            && GET_MODE (x) == DImode
2895            && GET_CODE (XEXP (x, 0)) == ASHIFT
2896            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2897            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2898            && CONST_INT_P (XEXP (x, 1)))
2899     {
2900       type = ADDRESS_REG_UXTW;
2901       index = XEXP (XEXP (x, 0), 0);
2902       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2903       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2904         shift = -1;
2905     }
2906   /* (mult:P (reg:P) (const_int scale)) */
2907   else if (GET_CODE (x) == MULT
2908            && GET_MODE (x) == Pmode
2909            && GET_MODE (XEXP (x, 0)) == Pmode
2910            && CONST_INT_P (XEXP (x, 1)))
2911     {
2912       type = ADDRESS_REG_REG;
2913       index = XEXP (x, 0);
2914       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2915     }
2916   /* (ashift:P (reg:P) (const_int shift)) */
2917   else if (GET_CODE (x) == ASHIFT
2918            && GET_MODE (x) == Pmode
2919            && GET_MODE (XEXP (x, 0)) == Pmode
2920            && CONST_INT_P (XEXP (x, 1)))
2921     {
2922       type = ADDRESS_REG_REG;
2923       index = XEXP (x, 0);
2924       shift = INTVAL (XEXP (x, 1));
2925     }
2926   else
2927     return false;
2928
2929   if (GET_CODE (index) == SUBREG)
2930     index = SUBREG_REG (index);
2931
2932   if ((shift == 0 ||
2933        (shift > 0 && shift <= 3
2934         && (1 << shift) == GET_MODE_SIZE (mode)))
2935       && REG_P (index)
2936       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2937     {
2938       info->type = type;
2939       info->offset = index;
2940       info->shift = shift;
2941       return true;
2942     }
2943
2944   return false;
2945 }
2946
2947 static inline bool
2948 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2949 {
2950   return (offset >= -64 * GET_MODE_SIZE (mode)
2951           && offset < 64 * GET_MODE_SIZE (mode)
2952           && offset % GET_MODE_SIZE (mode) == 0);
2953 }
2954
2955 static inline bool
2956 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2957                                HOST_WIDE_INT offset)
2958 {
2959   return offset >= -256 && offset < 256;
2960 }
2961
2962 static inline bool
2963 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2964 {
2965   return (offset >= 0
2966           && offset < 4096 * GET_MODE_SIZE (mode)
2967           && offset % GET_MODE_SIZE (mode) == 0);
2968 }
2969
2970 /* Return true if X is a valid address for machine mode MODE.  If it is,
2971    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2972    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2973
2974 static bool
2975 aarch64_classify_address (struct aarch64_address_info *info,
2976                           rtx x, enum machine_mode mode,
2977                           RTX_CODE outer_code, bool strict_p)
2978 {
2979   enum rtx_code code = GET_CODE (x);
2980   rtx op0, op1;
2981   bool allow_reg_index_p =
2982     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2983
2984   /* Don't support anything other than POST_INC or REG addressing for
2985      AdvSIMD.  */
2986   if (aarch64_vector_mode_p (mode)
2987       && (code != POST_INC && code != REG))
2988     return false;
2989
2990   switch (code)
2991     {
2992     case REG:
2993     case SUBREG:
2994       info->type = ADDRESS_REG_IMM;
2995       info->base = x;
2996       info->offset = const0_rtx;
2997       return aarch64_base_register_rtx_p (x, strict_p);
2998
2999     case PLUS:
3000       op0 = XEXP (x, 0);
3001       op1 = XEXP (x, 1);
3002       if (GET_MODE_SIZE (mode) != 0
3003           && CONST_INT_P (op1)
3004           && aarch64_base_register_rtx_p (op0, strict_p))
3005         {
3006           HOST_WIDE_INT offset = INTVAL (op1);
3007
3008           info->type = ADDRESS_REG_IMM;
3009           info->base = op0;
3010           info->offset = op1;
3011
3012           /* TImode and TFmode values are allowed in both pairs of X
3013              registers and individual Q registers.  The available
3014              address modes are:
3015              X,X: 7-bit signed scaled offset
3016              Q:   9-bit signed offset
3017              We conservatively require an offset representable in either mode.
3018            */
3019           if (mode == TImode || mode == TFmode)
3020             return (offset_7bit_signed_scaled_p (mode, offset)
3021                     && offset_9bit_signed_unscaled_p (mode, offset));
3022
3023           if (outer_code == PARALLEL)
3024             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3025                     && offset_7bit_signed_scaled_p (mode, offset));
3026           else
3027             return (offset_9bit_signed_unscaled_p (mode, offset)
3028                     || offset_12bit_unsigned_scaled_p (mode, offset));
3029         }
3030
3031       if (allow_reg_index_p)
3032         {
3033           /* Look for base + (scaled/extended) index register.  */
3034           if (aarch64_base_register_rtx_p (op0, strict_p)
3035               && aarch64_classify_index (info, op1, mode, strict_p))
3036             {
3037               info->base = op0;
3038               return true;
3039             }
3040           if (aarch64_base_register_rtx_p (op1, strict_p)
3041               && aarch64_classify_index (info, op0, mode, strict_p))
3042             {
3043               info->base = op1;
3044               return true;
3045             }
3046         }
3047
3048       return false;
3049
3050     case POST_INC:
3051     case POST_DEC:
3052     case PRE_INC:
3053     case PRE_DEC:
3054       info->type = ADDRESS_REG_WB;
3055       info->base = XEXP (x, 0);
3056       info->offset = NULL_RTX;
3057       return aarch64_base_register_rtx_p (info->base, strict_p);
3058
3059     case POST_MODIFY:
3060     case PRE_MODIFY:
3061       info->type = ADDRESS_REG_WB;
3062       info->base = XEXP (x, 0);
3063       if (GET_CODE (XEXP (x, 1)) == PLUS
3064           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3065           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3066           && aarch64_base_register_rtx_p (info->base, strict_p))
3067         {
3068           HOST_WIDE_INT offset;
3069           info->offset = XEXP (XEXP (x, 1), 1);
3070           offset = INTVAL (info->offset);
3071
3072           /* TImode and TFmode values are allowed in both pairs of X
3073              registers and individual Q registers.  The available
3074              address modes are:
3075              X,X: 7-bit signed scaled offset
3076              Q:   9-bit signed offset
3077              We conservatively require an offset representable in either mode.
3078            */
3079           if (mode == TImode || mode == TFmode)
3080             return (offset_7bit_signed_scaled_p (mode, offset)
3081                     && offset_9bit_signed_unscaled_p (mode, offset));
3082
3083           if (outer_code == PARALLEL)
3084             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3085                     && offset_7bit_signed_scaled_p (mode, offset));
3086           else
3087             return offset_9bit_signed_unscaled_p (mode, offset);
3088         }
3089       return false;
3090
3091     case CONST:
3092     case SYMBOL_REF:
3093     case LABEL_REF:
3094       /* load literal: pc-relative constant pool entry.  Only supported
3095          for SI mode or larger.  */
3096       info->type = ADDRESS_SYMBOLIC;
3097       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3098         {
3099           rtx sym, addend;
3100
3101           split_const (x, &sym, &addend);
3102           return (GET_CODE (sym) == LABEL_REF
3103                   || (GET_CODE (sym) == SYMBOL_REF
3104                       && CONSTANT_POOL_ADDRESS_P (sym)));
3105         }
3106       return false;
3107
3108     case LO_SUM:
3109       info->type = ADDRESS_LO_SUM;
3110       info->base = XEXP (x, 0);
3111       info->offset = XEXP (x, 1);
3112       if (allow_reg_index_p
3113           && aarch64_base_register_rtx_p (info->base, strict_p))
3114         {
3115           rtx sym, offs;
3116           split_const (info->offset, &sym, &offs);
3117           if (GET_CODE (sym) == SYMBOL_REF
3118               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3119                   == SYMBOL_SMALL_ABSOLUTE))
3120             {
3121               /* The symbol and offset must be aligned to the access size.  */
3122               unsigned int align;
3123               unsigned int ref_size;
3124
3125               if (CONSTANT_POOL_ADDRESS_P (sym))
3126                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3127               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3128                 {
3129                   tree exp = SYMBOL_REF_DECL (sym);
3130                   align = TYPE_ALIGN (TREE_TYPE (exp));
3131                   align = CONSTANT_ALIGNMENT (exp, align);
3132                 }
3133               else if (SYMBOL_REF_DECL (sym))
3134                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3135               else
3136                 align = BITS_PER_UNIT;
3137
3138               ref_size = GET_MODE_SIZE (mode);
3139               if (ref_size == 0)
3140                 ref_size = GET_MODE_SIZE (DImode);
3141
3142               return ((INTVAL (offs) & (ref_size - 1)) == 0
3143                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3144             }
3145         }
3146       return false;
3147
3148     default:
3149       return false;
3150     }
3151 }
3152
3153 bool
3154 aarch64_symbolic_address_p (rtx x)
3155 {
3156   rtx offset;
3157
3158   split_const (x, &x, &offset);
3159   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3160 }
3161
3162 /* Classify the base of symbolic expression X, given that X appears in
3163    context CONTEXT.  */
3164
3165 enum aarch64_symbol_type
3166 aarch64_classify_symbolic_expression (rtx x,
3167                                       enum aarch64_symbol_context context)
3168 {
3169   rtx offset;
3170
3171   split_const (x, &x, &offset);
3172   return aarch64_classify_symbol (x, context);
3173 }
3174
3175
3176 /* Return TRUE if X is a legitimate address for accessing memory in
3177    mode MODE.  */
3178 static bool
3179 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3180 {
3181   struct aarch64_address_info addr;
3182
3183   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3184 }
3185
3186 /* Return TRUE if X is a legitimate address for accessing memory in
3187    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3188    pair operation.  */
3189 bool
3190 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3191                            RTX_CODE outer_code, bool strict_p)
3192 {
3193   struct aarch64_address_info addr;
3194
3195   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3196 }
3197
3198 /* Return TRUE if rtx X is immediate constant 0.0 */
3199 bool
3200 aarch64_float_const_zero_rtx_p (rtx x)
3201 {
3202   REAL_VALUE_TYPE r;
3203
3204   if (GET_MODE (x) == VOIDmode)
3205     return false;
3206
3207   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3208   if (REAL_VALUE_MINUS_ZERO (r))
3209     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3210   return REAL_VALUES_EQUAL (r, dconst0);
3211 }
3212
3213 /* Return the fixed registers used for condition codes.  */
3214
3215 static bool
3216 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3217 {
3218   *p1 = CC_REGNUM;
3219   *p2 = INVALID_REGNUM;
3220   return true;
3221 }
3222
3223 enum machine_mode
3224 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3225 {
3226   /* All floating point compares return CCFP if it is an equality
3227      comparison, and CCFPE otherwise.  */
3228   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3229     {
3230       switch (code)
3231         {
3232         case EQ:
3233         case NE:
3234         case UNORDERED:
3235         case ORDERED:
3236         case UNLT:
3237         case UNLE:
3238         case UNGT:
3239         case UNGE:
3240         case UNEQ:
3241         case LTGT:
3242           return CCFPmode;
3243
3244         case LT:
3245         case LE:
3246         case GT:
3247         case GE:
3248           return CCFPEmode;
3249
3250         default:
3251           gcc_unreachable ();
3252         }
3253     }
3254
3255   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3256       && y == const0_rtx
3257       && (code == EQ || code == NE || code == LT || code == GE)
3258       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3259           || GET_CODE (x) == NEG))
3260     return CC_NZmode;
3261
3262   /* A compare with a shifted or negated operand.  Because of canonicalization,
3263      the comparison will have to be swapped when we emit the assembly
3264      code.  */
3265   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3266       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3267       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3268           || GET_CODE (x) == LSHIFTRT
3269           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3270           || GET_CODE (x) == NEG))
3271     return CC_SWPmode;
3272
3273   /* A compare of a mode narrower than SI mode against zero can be done
3274      by extending the value in the comparison.  */
3275   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3276       && y == const0_rtx)
3277     /* Only use sign-extension if we really need it.  */
3278     return ((code == GT || code == GE || code == LE || code == LT)
3279             ? CC_SESWPmode : CC_ZESWPmode);
3280
3281   /* For everything else, return CCmode.  */
3282   return CCmode;
3283 }
3284
3285 static unsigned
3286 aarch64_get_condition_code (rtx x)
3287 {
3288   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3289   enum rtx_code comp_code = GET_CODE (x);
3290
3291   if (GET_MODE_CLASS (mode) != MODE_CC)
3292     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3293
3294   switch (mode)
3295     {
3296     case CCFPmode:
3297     case CCFPEmode:
3298       switch (comp_code)
3299         {
3300         case GE: return AARCH64_GE;
3301         case GT: return AARCH64_GT;
3302         case LE: return AARCH64_LS;
3303         case LT: return AARCH64_MI;
3304         case NE: return AARCH64_NE;
3305         case EQ: return AARCH64_EQ;
3306         case ORDERED: return AARCH64_VC;
3307         case UNORDERED: return AARCH64_VS;
3308         case UNLT: return AARCH64_LT;
3309         case UNLE: return AARCH64_LE;
3310         case UNGT: return AARCH64_HI;
3311         case UNGE: return AARCH64_PL;
3312         default: gcc_unreachable ();
3313         }
3314       break;
3315
3316     case CCmode:
3317       switch (comp_code)
3318         {
3319         case NE: return AARCH64_NE;
3320         case EQ: return AARCH64_EQ;
3321         case GE: return AARCH64_GE;
3322         case GT: return AARCH64_GT;
3323         case LE: return AARCH64_LE;
3324         case LT: return AARCH64_LT;
3325         case GEU: return AARCH64_CS;
3326         case GTU: return AARCH64_HI;
3327         case LEU: return AARCH64_LS;
3328         case LTU: return AARCH64_CC;
3329         default: gcc_unreachable ();
3330         }
3331       break;
3332
3333     case CC_SWPmode:
3334     case CC_ZESWPmode:
3335     case CC_SESWPmode:
3336       switch (comp_code)
3337         {
3338         case NE: return AARCH64_NE;
3339         case EQ: return AARCH64_EQ;
3340         case GE: return AARCH64_LE;
3341         case GT: return AARCH64_LT;
3342         case LE: return AARCH64_GE;
3343         case LT: return AARCH64_GT;
3344         case GEU: return AARCH64_LS;
3345         case GTU: return AARCH64_CC;
3346         case LEU: return AARCH64_CS;
3347         case LTU: return AARCH64_HI;
3348         default: gcc_unreachable ();
3349         }
3350       break;
3351
3352     case CC_NZmode:
3353       switch (comp_code)
3354         {
3355         case NE: return AARCH64_NE;
3356         case EQ: return AARCH64_EQ;
3357         case GE: return AARCH64_PL;
3358         case LT: return AARCH64_MI;
3359         default: gcc_unreachable ();
3360         }
3361       break;
3362
3363     default:
3364       gcc_unreachable ();
3365       break;
3366     }
3367 }
3368
3369 static unsigned
3370 bit_count (unsigned HOST_WIDE_INT value)
3371 {
3372   unsigned count = 0;
3373
3374   while (value)
3375     {
3376       count++;
3377       value &= value - 1;
3378     }
3379
3380   return count;
3381 }
3382
3383 void
3384 aarch64_print_operand (FILE *f, rtx x, char code)
3385 {
3386   switch (code)
3387     {
3388     case 'e':
3389       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3390       {
3391         int n;
3392
3393         if (GET_CODE (x) != CONST_INT
3394             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3395           {
3396             output_operand_lossage ("invalid operand for '%%%c'", code);
3397             return;
3398           }
3399
3400         switch (n)
3401           {
3402           case 3:
3403             fputc ('b', f);
3404             break;
3405           case 4:
3406             fputc ('h', f);
3407             break;
3408           case 5:
3409             fputc ('w', f);
3410             break;
3411           default:
3412             output_operand_lossage ("invalid operand for '%%%c'", code);
3413             return;
3414           }
3415       }
3416       break;
3417
3418     case 'p':
3419       {
3420         int n;
3421
3422         /* Print N such that 2^N == X.  */
3423         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3424           {
3425             output_operand_lossage ("invalid operand for '%%%c'", code);
3426             return;
3427           }
3428
3429         asm_fprintf (f, "%d", n);
3430       }
3431       break;
3432
3433     case 'P':
3434       /* Print the number of non-zero bits in X (a const_int).  */
3435       if (GET_CODE (x) != CONST_INT)
3436         {
3437           output_operand_lossage ("invalid operand for '%%%c'", code);
3438           return;
3439         }
3440
3441       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3442       break;
3443
3444     case 'H':
3445       /* Print the higher numbered register of a pair (TImode) of regs.  */
3446       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3447         {
3448           output_operand_lossage ("invalid operand for '%%%c'", code);
3449           return;
3450         }
3451
3452       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3453       break;
3454
3455     case 'm':
3456       /* Print a condition (eq, ne, etc).  */
3457
3458       /* CONST_TRUE_RTX means always -- that's the default.  */
3459       if (x == const_true_rtx)
3460         return;
3461
3462       if (!COMPARISON_P (x))
3463         {
3464           output_operand_lossage ("invalid operand for '%%%c'", code);
3465           return;
3466         }
3467
3468       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3469       break;
3470
3471     case 'M':
3472       /* Print the inverse of a condition (eq <-> ne, etc).  */
3473
3474       /* CONST_TRUE_RTX means never -- that's the default.  */
3475       if (x == const_true_rtx)
3476         {
3477           fputs ("nv", f);
3478           return;
3479         }
3480
3481       if (!COMPARISON_P (x))
3482         {
3483           output_operand_lossage ("invalid operand for '%%%c'", code);
3484           return;
3485         }
3486
3487       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3488                                   (aarch64_get_condition_code (x))], f);
3489       break;
3490
3491     case 'b':
3492     case 'h':
3493     case 's':
3494     case 'd':
3495     case 'q':
3496       /* Print a scalar FP/SIMD register name.  */
3497       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3498         {
3499           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3500           return;
3501         }
3502       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3503       break;
3504
3505     case 'S':
3506     case 'T':
3507     case 'U':
3508     case 'V':
3509       /* Print the first FP/SIMD register name in a list.  */
3510       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3511         {
3512           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3513           return;
3514         }
3515       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3516       break;
3517
3518     case 'X':
3519       /* Print bottom 16 bits of integer constant in hex.  */
3520       if (GET_CODE (x) != CONST_INT)
3521         {
3522           output_operand_lossage ("invalid operand for '%%%c'", code);
3523           return;
3524         }
3525       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3526       break;
3527
3528     case 'w':
3529     case 'x':
3530       /* Print a general register name or the zero register (32-bit or
3531          64-bit).  */
3532       if (x == const0_rtx
3533           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3534         {
3535           asm_fprintf (f, "%czr", code);
3536           break;
3537         }
3538
3539       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3540         {
3541           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3542           break;
3543         }
3544
3545       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3546         {
3547           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3548           break;
3549         }
3550
3551       /* Fall through */
3552
3553     case 0:
3554       /* Print a normal operand, if it's a general register, then we
3555          assume DImode.  */
3556       if (x == NULL)
3557         {
3558           output_operand_lossage ("missing operand");
3559           return;
3560         }
3561
3562       switch (GET_CODE (x))
3563         {
3564         case REG:
3565           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3566           break;
3567
3568         case MEM:
3569           aarch64_memory_reference_mode = GET_MODE (x);
3570           output_address (XEXP (x, 0));
3571           break;
3572
3573         case LABEL_REF:
3574         case SYMBOL_REF:
3575           output_addr_const (asm_out_file, x);
3576           break;
3577
3578         case CONST_INT:
3579           asm_fprintf (f, "%wd", INTVAL (x));
3580           break;
3581
3582         case CONST_VECTOR:
3583           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3584             {
3585               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3586                                                             HOST_WIDE_INT_MIN,
3587                                                             HOST_WIDE_INT_MAX));
3588               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3589             }
3590           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3591             {
3592               fputc ('0', f);
3593             }
3594           else
3595             gcc_unreachable ();
3596           break;
3597
3598         case CONST_DOUBLE:
3599           /* CONST_DOUBLE can represent a double-width integer.
3600              In this case, the mode of x is VOIDmode.  */
3601           if (GET_MODE (x) == VOIDmode)
3602             ; /* Do Nothing.  */
3603           else if (aarch64_float_const_zero_rtx_p (x))
3604             {
3605               fputc ('0', f);
3606               break;
3607             }
3608           else if (aarch64_float_const_representable_p (x))
3609             {
3610 #define buf_size 20
3611               char float_buf[buf_size] = {'\0'};
3612               REAL_VALUE_TYPE r;
3613               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3614               real_to_decimal_for_mode (float_buf, &r,
3615                                         buf_size, buf_size,
3616                                         1, GET_MODE (x));
3617               asm_fprintf (asm_out_file, "%s", float_buf);
3618               break;
3619 #undef buf_size
3620             }
3621           output_operand_lossage ("invalid constant");
3622           return;
3623         default:
3624           output_operand_lossage ("invalid operand");
3625           return;
3626         }
3627       break;
3628
3629     case 'A':
3630       if (GET_CODE (x) == HIGH)
3631         x = XEXP (x, 0);
3632
3633       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3634         {
3635         case SYMBOL_SMALL_GOT:
3636           asm_fprintf (asm_out_file, ":got:");
3637           break;
3638
3639         case SYMBOL_SMALL_TLSGD:
3640           asm_fprintf (asm_out_file, ":tlsgd:");
3641           break;
3642
3643         case SYMBOL_SMALL_TLSDESC:
3644           asm_fprintf (asm_out_file, ":tlsdesc:");
3645           break;
3646
3647         case SYMBOL_SMALL_GOTTPREL:
3648           asm_fprintf (asm_out_file, ":gottprel:");
3649           break;
3650
3651         case SYMBOL_SMALL_TPREL:
3652           asm_fprintf (asm_out_file, ":tprel:");
3653           break;
3654
3655         case SYMBOL_TINY_GOT:
3656           gcc_unreachable ();
3657           break;
3658
3659         default:
3660           break;
3661         }
3662       output_addr_const (asm_out_file, x);
3663       break;
3664
3665     case 'L':
3666       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3667         {
3668         case SYMBOL_SMALL_GOT:
3669           asm_fprintf (asm_out_file, ":lo12:");
3670           break;
3671
3672         case SYMBOL_SMALL_TLSGD:
3673           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3674           break;
3675
3676         case SYMBOL_SMALL_TLSDESC:
3677           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3678           break;
3679
3680         case SYMBOL_SMALL_GOTTPREL:
3681           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3682           break;
3683
3684         case SYMBOL_SMALL_TPREL:
3685           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3686           break;
3687
3688         case SYMBOL_TINY_GOT:
3689           asm_fprintf (asm_out_file, ":got:");
3690           break;
3691
3692         default:
3693           break;
3694         }
3695       output_addr_const (asm_out_file, x);
3696       break;
3697
3698     case 'G':
3699
3700       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3701         {
3702         case SYMBOL_SMALL_TPREL:
3703           asm_fprintf (asm_out_file, ":tprel_hi12:");
3704           break;
3705         default:
3706           break;
3707         }
3708       output_addr_const (asm_out_file, x);
3709       break;
3710
3711     default:
3712       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3713       return;
3714     }
3715 }
3716
3717 void
3718 aarch64_print_operand_address (FILE *f, rtx x)
3719 {
3720   struct aarch64_address_info addr;
3721
3722   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3723                              MEM, true))
3724     switch (addr.type)
3725       {
3726       case ADDRESS_REG_IMM:
3727         if (addr.offset == const0_rtx)
3728           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3729         else
3730           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3731                        INTVAL (addr.offset));
3732         return;
3733
3734       case ADDRESS_REG_REG:
3735         if (addr.shift == 0)
3736           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3737                        reg_names [REGNO (addr.offset)]);
3738         else
3739           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3740                        reg_names [REGNO (addr.offset)], addr.shift);
3741         return;
3742
3743       case ADDRESS_REG_UXTW:
3744         if (addr.shift == 0)
3745           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3746                        REGNO (addr.offset) - R0_REGNUM);
3747         else
3748           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3749                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3750         return;
3751
3752       case ADDRESS_REG_SXTW:
3753         if (addr.shift == 0)
3754           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3755                        REGNO (addr.offset) - R0_REGNUM);
3756         else
3757           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3758                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3759         return;
3760
3761       case ADDRESS_REG_WB:
3762         switch (GET_CODE (x))
3763           {
3764           case PRE_INC:
3765             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3766                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3767             return;
3768           case POST_INC:
3769             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3770                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3771             return;
3772           case PRE_DEC:
3773             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3774                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3775             return;
3776           case POST_DEC:
3777             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3778                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3779             return;
3780           case PRE_MODIFY:
3781             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3782                          INTVAL (addr.offset));
3783             return;
3784           case POST_MODIFY:
3785             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3786                          INTVAL (addr.offset));
3787             return;
3788           default:
3789             break;
3790           }
3791         break;
3792
3793       case ADDRESS_LO_SUM:
3794         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3795         output_addr_const (f, addr.offset);
3796         asm_fprintf (f, "]");
3797         return;
3798
3799       case ADDRESS_SYMBOLIC:
3800         break;
3801       }
3802
3803   output_addr_const (f, x);
3804 }
3805
3806 bool
3807 aarch64_label_mentioned_p (rtx x)
3808 {
3809   const char *fmt;
3810   int i;
3811
3812   if (GET_CODE (x) == LABEL_REF)
3813     return true;
3814
3815   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3816      referencing instruction, but they are constant offsets, not
3817      symbols.  */
3818   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3819     return false;
3820
3821   fmt = GET_RTX_FORMAT (GET_CODE (x));
3822   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3823     {
3824       if (fmt[i] == 'E')
3825         {
3826           int j;
3827
3828           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3829             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3830               return 1;
3831         }
3832       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3833         return 1;
3834     }
3835
3836   return 0;
3837 }
3838
3839 /* Implement REGNO_REG_CLASS.  */
3840
3841 enum reg_class
3842 aarch64_regno_regclass (unsigned regno)
3843 {
3844   if (GP_REGNUM_P (regno))
3845     return CORE_REGS;
3846
3847   if (regno == SP_REGNUM)
3848     return STACK_REG;
3849
3850   if (regno == FRAME_POINTER_REGNUM
3851       || regno == ARG_POINTER_REGNUM)
3852     return CORE_REGS;
3853
3854   if (FP_REGNUM_P (regno))
3855     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3856
3857   return NO_REGS;
3858 }
3859
3860 /* Try a machine-dependent way of reloading an illegitimate address
3861    operand.  If we find one, push the reload and return the new rtx.  */
3862
3863 rtx
3864 aarch64_legitimize_reload_address (rtx *x_p,
3865                                    enum machine_mode mode,
3866                                    int opnum, int type,
3867                                    int ind_levels ATTRIBUTE_UNUSED)
3868 {
3869   rtx x = *x_p;
3870
3871   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3872   if (aarch64_vector_mode_p (mode)
3873       && GET_CODE (x) == PLUS
3874       && REG_P (XEXP (x, 0))
3875       && CONST_INT_P (XEXP (x, 1)))
3876     {
3877       rtx orig_rtx = x;
3878       x = copy_rtx (x);
3879       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3880                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3881                    opnum, (enum reload_type) type);
3882       return x;
3883     }
3884
3885   /* We must recognize output that we have already generated ourselves.  */
3886   if (GET_CODE (x) == PLUS
3887       && GET_CODE (XEXP (x, 0)) == PLUS
3888       && REG_P (XEXP (XEXP (x, 0), 0))
3889       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3890       && CONST_INT_P (XEXP (x, 1)))
3891     {
3892       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3893                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3894                    opnum, (enum reload_type) type);
3895       return x;
3896     }
3897
3898   /* We wish to handle large displacements off a base register by splitting
3899      the addend across an add and the mem insn.  This can cut the number of
3900      extra insns needed from 3 to 1.  It is only useful for load/store of a
3901      single register with 12 bit offset field.  */
3902   if (GET_CODE (x) == PLUS
3903       && REG_P (XEXP (x, 0))
3904       && CONST_INT_P (XEXP (x, 1))
3905       && HARD_REGISTER_P (XEXP (x, 0))
3906       && mode != TImode
3907       && mode != TFmode
3908       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3909     {
3910       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3911       HOST_WIDE_INT low = val & 0xfff;
3912       HOST_WIDE_INT high = val - low;
3913       HOST_WIDE_INT offs;
3914       rtx cst;
3915
3916       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3917          BLKmode alignment.  */
3918       if (GET_MODE_SIZE (mode) == 0)
3919         return NULL_RTX;
3920
3921       offs = low % GET_MODE_SIZE (mode);
3922
3923       /* Align misaligned offset by adjusting high part to compensate.  */
3924       if (offs != 0)
3925         {
3926           if (aarch64_uimm12_shift (high + offs))
3927             {
3928               /* Align down.  */
3929               low = low - offs;
3930               high = high + offs;
3931             }
3932           else
3933             {
3934               /* Align up.  */
3935               offs = GET_MODE_SIZE (mode) - offs;
3936               low = low + offs;
3937               high = high + (low & 0x1000) - offs;
3938               low &= 0xfff;
3939             }
3940         }
3941
3942       /* Check for overflow.  */
3943       if (high + low != val)
3944         return NULL_RTX;
3945
3946       cst = GEN_INT (high);
3947       if (!aarch64_uimm12_shift (high))
3948         cst = force_const_mem (Pmode, cst);
3949
3950       /* Reload high part into base reg, leaving the low part
3951          in the mem instruction.  */
3952       x = gen_rtx_PLUS (Pmode,
3953                         gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3954                         GEN_INT (low));
3955
3956       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3957                    BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3958                    opnum, (enum reload_type) type);
3959       return x;
3960     }
3961
3962   return NULL_RTX;
3963 }
3964
3965
3966 static reg_class_t
3967 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3968                           reg_class_t rclass,
3969                           enum machine_mode mode,
3970                           secondary_reload_info *sri)
3971 {
3972   /* Address expressions of the form PLUS (SP, large_offset) need two
3973      scratch registers, one for the constant, and one for holding a
3974      copy of SP, since SP cannot be used on the RHS of an add-reg
3975      instruction.  */
3976   if (mode == DImode
3977       && GET_CODE (x) == PLUS
3978       && XEXP (x, 0) == stack_pointer_rtx
3979       && CONST_INT_P (XEXP (x, 1))
3980       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3981     {
3982       sri->icode = CODE_FOR_reload_sp_immediate;
3983       return NO_REGS;
3984     }
3985
3986   /* Without the TARGET_SIMD instructions we cannot move a Q register
3987      to a Q register directly.  We need a scratch.  */
3988   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3989       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3990       && reg_class_subset_p (rclass, FP_REGS))
3991     {
3992       if (mode == TFmode)
3993         sri->icode = CODE_FOR_aarch64_reload_movtf;
3994       else if (mode == TImode)
3995         sri->icode = CODE_FOR_aarch64_reload_movti;
3996       return NO_REGS;
3997     }
3998
3999   /* A TFmode or TImode memory access should be handled via an FP_REGS
4000      because AArch64 has richer addressing modes for LDR/STR instructions
4001      than LDP/STP instructions.  */
4002   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4003       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4004     return FP_REGS;
4005
4006   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4007       return CORE_REGS;
4008
4009   return NO_REGS;
4010 }
4011
4012 static bool
4013 aarch64_can_eliminate (const int from, const int to)
4014 {
4015   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4016      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4017
4018   if (frame_pointer_needed)
4019     {
4020       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4021         return true;
4022       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4023         return false;
4024       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4025           && !cfun->calls_alloca)
4026         return true;
4027       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4028         return true;
4029     return false;
4030     }
4031   else
4032     {
4033       /* If we decided that we didn't need a leaf frame pointer but then used
4034          LR in the function, then we'll want a frame pointer after all, so
4035          prevent this elimination to ensure a frame pointer is used.
4036
4037          NOTE: the original value of flag_omit_frame_pointer gets trashed
4038          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4039          of faked_omit_frame_pointer here (which is true when we always
4040          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4041          pointers when LR is clobbered).  */
4042       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4043           && df_regs_ever_live_p (LR_REGNUM)
4044           && faked_omit_frame_pointer)
4045         return false;
4046     }
4047
4048   return true;
4049 }
4050
4051 HOST_WIDE_INT
4052 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4053 {
4054   HOST_WIDE_INT frame_size;
4055   HOST_WIDE_INT offset;
4056
4057   aarch64_layout_frame ();
4058   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4059                 + crtl->outgoing_args_size
4060                 + cfun->machine->saved_varargs_size);
4061
4062    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4063    offset = frame_size;
4064
4065    if (to == HARD_FRAME_POINTER_REGNUM)
4066      {
4067        if (from == ARG_POINTER_REGNUM)
4068          return offset - crtl->outgoing_args_size;
4069
4070        if (from == FRAME_POINTER_REGNUM)
4071          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4072      }
4073
4074    if (to == STACK_POINTER_REGNUM)
4075      {
4076        if (from == FRAME_POINTER_REGNUM)
4077          {
4078            HOST_WIDE_INT elim = crtl->outgoing_args_size
4079                               + cfun->machine->frame.saved_regs_size
4080                               + get_frame_size ()
4081                               - cfun->machine->frame.fp_lr_offset;
4082            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4083            return elim;
4084          }
4085      }
4086
4087    return offset;
4088 }
4089
4090
4091 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4092    previous frame.  */
4093
4094 rtx
4095 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4096 {
4097   if (count != 0)
4098     return const0_rtx;
4099   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4100 }
4101
4102
4103 static void
4104 aarch64_asm_trampoline_template (FILE *f)
4105 {
4106   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4107   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4108   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4109   assemble_aligned_integer (4, const0_rtx);
4110   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4111   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4112 }
4113
4114 unsigned
4115 aarch64_trampoline_size (void)
4116 {
4117   return 32;  /* 3 insns + padding + 2 dwords.  */
4118 }
4119
4120 static void
4121 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4122 {
4123   rtx fnaddr, mem, a_tramp;
4124
4125   /* Don't need to copy the trailing D-words, we fill those in below.  */
4126   emit_block_move (m_tramp, assemble_trampoline_template (),
4127                    GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
4128   mem = adjust_address (m_tramp, DImode, 16);
4129   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4130   emit_move_insn (mem, fnaddr);
4131
4132   mem = adjust_address (m_tramp, DImode, 24);
4133   emit_move_insn (mem, chain_value);
4134
4135   /* XXX We should really define a "clear_cache" pattern and use
4136      gen_clear_cache().  */
4137   a_tramp = XEXP (m_tramp, 0);
4138   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4139                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
4140                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4141 }
4142
4143 static unsigned char
4144 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4145 {
4146   switch (regclass)
4147     {
4148     case CORE_REGS:
4149     case POINTER_REGS:
4150     case GENERAL_REGS:
4151     case ALL_REGS:
4152     case FP_REGS:
4153     case FP_LO_REGS:
4154       return
4155         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4156                                        (GET_MODE_SIZE (mode) + 7) / 8;
4157     case STACK_REG:
4158       return 1;
4159
4160     case NO_REGS:
4161       return 0;
4162
4163     default:
4164       break;
4165     }
4166   gcc_unreachable ();
4167 }
4168
4169 static reg_class_t
4170 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4171 {
4172   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4173           ? GENERAL_REGS : regclass);
4174 }
4175
4176 void
4177 aarch64_asm_output_labelref (FILE* f, const char *name)
4178 {
4179   asm_fprintf (f, "%U%s", name);
4180 }
4181
4182 static void
4183 aarch64_elf_asm_constructor (rtx symbol, int priority)
4184 {
4185   if (priority == DEFAULT_INIT_PRIORITY)
4186     default_ctor_section_asm_out_constructor (symbol, priority);
4187   else
4188     {
4189       section *s;
4190       char buf[18];
4191       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4192       s = get_section (buf, SECTION_WRITE, NULL);
4193       switch_to_section (s);
4194       assemble_align (POINTER_SIZE);
4195       fputs ("\t.dword\t", asm_out_file);
4196       output_addr_const (asm_out_file, symbol);
4197       fputc ('\n', asm_out_file);
4198     }
4199 }
4200
4201 static void
4202 aarch64_elf_asm_destructor (rtx symbol, int priority)
4203 {
4204   if (priority == DEFAULT_INIT_PRIORITY)
4205     default_dtor_section_asm_out_destructor (symbol, priority);
4206   else
4207     {
4208       section *s;
4209       char buf[18];
4210       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4211       s = get_section (buf, SECTION_WRITE, NULL);
4212       switch_to_section (s);
4213       assemble_align (POINTER_SIZE);
4214       fputs ("\t.dword\t", asm_out_file);
4215       output_addr_const (asm_out_file, symbol);
4216       fputc ('\n', asm_out_file);
4217     }
4218 }
4219
4220 const char*
4221 aarch64_output_casesi (rtx *operands)
4222 {
4223   char buf[100];
4224   char label[100];
4225   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4226   int index;
4227   static const char *const patterns[4][2] =
4228   {
4229     {
4230       "ldrb\t%w3, [%0,%w1,uxtw]",
4231       "add\t%3, %4, %w3, sxtb #2"
4232     },
4233     {
4234       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4235       "add\t%3, %4, %w3, sxth #2"
4236     },
4237     {
4238       "ldr\t%w3, [%0,%w1,uxtw #2]",
4239       "add\t%3, %4, %w3, sxtw #2"
4240     },
4241     /* We assume that DImode is only generated when not optimizing and
4242        that we don't really need 64-bit address offsets.  That would
4243        imply an object file with 8GB of code in a single function!  */
4244     {
4245       "ldr\t%w3, [%0,%w1,uxtw #2]",
4246       "add\t%3, %4, %w3, sxtw #2"
4247     }
4248   };
4249
4250   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4251
4252   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4253
4254   gcc_assert (index >= 0 && index <= 3);
4255
4256   /* Need to implement table size reduction, by chaning the code below.  */
4257   output_asm_insn (patterns[index][0], operands);
4258   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4259   snprintf (buf, sizeof (buf),
4260             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4261   output_asm_insn (buf, operands);
4262   output_asm_insn (patterns[index][1], operands);
4263   output_asm_insn ("br\t%3", operands);
4264   assemble_label (asm_out_file, label);
4265   return "";
4266 }
4267
4268
4269 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4270    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4271    operator.  */
4272
4273 int
4274 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4275 {
4276   if (shift >= 0 && shift <= 3)
4277     {
4278       int size;
4279       for (size = 8; size <= 32; size *= 2)
4280         {
4281           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4282           if (mask == bits << shift)
4283             return size;
4284         }
4285     }
4286   return 0;
4287 }
4288
4289 static bool
4290 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4291                                    const_rtx x ATTRIBUTE_UNUSED)
4292 {
4293   /* We can't use blocks for constants when we're using a per-function
4294      constant pool.  */
4295   return false;
4296 }
4297
4298 static section *
4299 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4300                             rtx x ATTRIBUTE_UNUSED,
4301                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4302 {
4303   /* Force all constant pool entries into the current function section.  */
4304   return function_section (current_function_decl);
4305 }
4306
4307
4308 /* Costs.  */
4309
4310 /* Helper function for rtx cost calculation.  Strip a shift expression
4311    from X.  Returns the inner operand if successful, or the original
4312    expression on failure.  */
4313 static rtx
4314 aarch64_strip_shift (rtx x)
4315 {
4316   rtx op = x;
4317
4318   if ((GET_CODE (op) == ASHIFT
4319        || GET_CODE (op) == ASHIFTRT
4320        || GET_CODE (op) == LSHIFTRT)
4321       && CONST_INT_P (XEXP (op, 1)))
4322     return XEXP (op, 0);
4323
4324   if (GET_CODE (op) == MULT
4325       && CONST_INT_P (XEXP (op, 1))
4326       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4327     return XEXP (op, 0);
4328
4329   return x;
4330 }
4331
4332 /* Helper function for rtx cost calculation.  Strip a shift or extend
4333    expression from X.  Returns the inner operand if successful, or the
4334    original expression on failure.  We deal with a number of possible
4335    canonicalization variations here.  */
4336 static rtx
4337 aarch64_strip_shift_or_extend (rtx x)
4338 {
4339   rtx op = x;
4340
4341   /* Zero and sign extraction of a widened value.  */
4342   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4343       && XEXP (op, 2) == const0_rtx
4344       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4345                                          XEXP (op, 1)))
4346     return XEXP (XEXP (op, 0), 0);
4347
4348   /* It can also be represented (for zero-extend) as an AND with an
4349      immediate.  */
4350   if (GET_CODE (op) == AND
4351       && GET_CODE (XEXP (op, 0)) == MULT
4352       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4353       && CONST_INT_P (XEXP (op, 1))
4354       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4355                            INTVAL (XEXP (op, 1))) != 0)
4356     return XEXP (XEXP (op, 0), 0);
4357
4358   /* Now handle extended register, as this may also have an optional
4359      left shift by 1..4.  */
4360   if (GET_CODE (op) == ASHIFT
4361       && CONST_INT_P (XEXP (op, 1))
4362       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4363     op = XEXP (op, 0);
4364
4365   if (GET_CODE (op) == ZERO_EXTEND
4366       || GET_CODE (op) == SIGN_EXTEND)
4367     op = XEXP (op, 0);
4368
4369   if (op != x)
4370     return op;
4371
4372   return aarch64_strip_shift (x);
4373 }
4374
4375 /* Calculate the cost of calculating X, storing it in *COST.  Result
4376    is true if the total cost of the operation has now been calculated.  */
4377 static bool
4378 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4379                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4380 {
4381   rtx op0, op1;
4382   const struct cpu_rtx_cost_table *extra_cost
4383     = aarch64_tune_params->insn_extra_cost;
4384
4385   switch (code)
4386     {
4387     case SET:
4388       op0 = SET_DEST (x);
4389       op1 = SET_SRC (x);
4390
4391       switch (GET_CODE (op0))
4392         {
4393         case MEM:
4394           if (speed)
4395             *cost += extra_cost->memory_store;
4396
4397           if (op1 != const0_rtx)
4398             *cost += rtx_cost (op1, SET, 1, speed);
4399           return true;
4400
4401         case SUBREG:
4402           if (! REG_P (SUBREG_REG (op0)))
4403             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4404           /* Fall through.  */
4405         case REG:
4406           /* Cost is just the cost of the RHS of the set.  */
4407           *cost += rtx_cost (op1, SET, 1, true);
4408           return true;
4409
4410         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4411         case SIGN_EXTRACT:
4412           /* Strip any redundant widening of the RHS to meet the width of
4413              the target.  */
4414           if (GET_CODE (op1) == SUBREG)
4415             op1 = SUBREG_REG (op1);
4416           if ((GET_CODE (op1) == ZERO_EXTEND
4417                || GET_CODE (op1) == SIGN_EXTEND)
4418               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4419               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4420                   >= INTVAL (XEXP (op0, 1))))
4421             op1 = XEXP (op1, 0);
4422           *cost += rtx_cost (op1, SET, 1, speed);
4423           return true;
4424
4425         default:
4426           break;
4427         }
4428       return false;
4429
4430     case MEM:
4431       if (speed)
4432         *cost += extra_cost->memory_load;
4433
4434       return true;
4435
4436     case NEG:
4437       op0 = CONST0_RTX (GET_MODE (x));
4438       op1 = XEXP (x, 0);
4439       goto cost_minus;
4440
4441     case COMPARE:
4442       op0 = XEXP (x, 0);
4443       op1 = XEXP (x, 1);
4444
4445       if (op1 == const0_rtx
4446           && GET_CODE (op0) == AND)
4447         {
4448           x = op0;
4449           goto cost_logic;
4450         }
4451
4452       /* Comparisons can work if the order is swapped.
4453          Canonicalization puts the more complex operation first, but
4454          we want it in op1.  */
4455       if (! (REG_P (op0)
4456              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4457         {
4458           op0 = XEXP (x, 1);
4459           op1 = XEXP (x, 0);
4460         }
4461       goto cost_minus;
4462
4463     case MINUS:
4464       op0 = XEXP (x, 0);
4465       op1 = XEXP (x, 1);
4466
4467     cost_minus:
4468       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4469           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4470               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4471         {
4472           if (op0 != const0_rtx)
4473             *cost += rtx_cost (op0, MINUS, 0, speed);
4474
4475           if (CONST_INT_P (op1))
4476             {
4477               if (!aarch64_uimm12_shift (INTVAL (op1)))
4478                 *cost += rtx_cost (op1, MINUS, 1, speed);
4479             }
4480           else
4481             {
4482               op1 = aarch64_strip_shift_or_extend (op1);
4483               *cost += rtx_cost (op1, MINUS, 1, speed);
4484             }
4485           return true;
4486         }
4487
4488       return false;
4489
4490     case PLUS:
4491       op0 = XEXP (x, 0);
4492       op1 = XEXP (x, 1);
4493
4494       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4495         {
4496           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4497             {
4498               *cost += rtx_cost (op0, PLUS, 0, speed);
4499             }
4500           else
4501             {
4502               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4503
4504               if (new_op0 == op0
4505                   && GET_CODE (op0) == MULT)
4506                 {
4507                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4508                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4509                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4510                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4511                     {
4512                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4513                                           speed)
4514                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4515                                             speed)
4516                                 + rtx_cost (op1, PLUS, 1, speed));
4517                       if (speed)
4518                         *cost += extra_cost->int_multiply_extend_add;
4519                       return true;
4520                     }
4521                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4522                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4523                             + rtx_cost (op1, PLUS, 1, speed));
4524
4525                   if (speed)
4526                     *cost += extra_cost->int_multiply_add;
4527                 }
4528
4529               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4530                         + rtx_cost (op1, PLUS, 1, speed));
4531             }
4532           return true;
4533         }
4534
4535       return false;
4536
4537     case IOR:
4538     case XOR:
4539     case AND:
4540     cost_logic:
4541       op0 = XEXP (x, 0);
4542       op1 = XEXP (x, 1);
4543
4544       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4545         {
4546           if (CONST_INT_P (op1)
4547               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4548             {
4549               *cost += rtx_cost (op0, AND, 0, speed);
4550             }
4551           else
4552             {
4553               if (GET_CODE (op0) == NOT)
4554                 op0 = XEXP (op0, 0);
4555               op0 = aarch64_strip_shift (op0);
4556               *cost += (rtx_cost (op0, AND, 0, speed)
4557                         + rtx_cost (op1, AND, 1, speed));
4558             }
4559           return true;
4560         }
4561       return false;
4562
4563     case ZERO_EXTEND:
4564       if ((GET_MODE (x) == DImode
4565            && GET_MODE (XEXP (x, 0)) == SImode)
4566           || GET_CODE (XEXP (x, 0)) == MEM)
4567         {
4568           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4569           return true;
4570         }
4571       return false;
4572
4573     case SIGN_EXTEND:
4574       if (GET_CODE (XEXP (x, 0)) == MEM)
4575         {
4576           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4577           return true;
4578         }
4579       return false;
4580
4581     case ROTATE:
4582       if (!CONST_INT_P (XEXP (x, 1)))
4583         *cost += COSTS_N_INSNS (2);
4584       /* Fall through.  */
4585     case ROTATERT:
4586     case LSHIFTRT:
4587     case ASHIFT:
4588     case ASHIFTRT:
4589
4590       /* Shifting by a register often takes an extra cycle.  */
4591       if (speed && !CONST_INT_P (XEXP (x, 1)))
4592         *cost += extra_cost->register_shift;
4593
4594       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4595       return true;
4596
4597     case HIGH:
4598       if (!CONSTANT_P (XEXP (x, 0)))
4599         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4600       return true;
4601
4602     case LO_SUM:
4603       if (!CONSTANT_P (XEXP (x, 1)))
4604         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4605       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4606       return true;
4607
4608     case ZERO_EXTRACT:
4609     case SIGN_EXTRACT:
4610       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4611       return true;
4612
4613     case MULT:
4614       op0 = XEXP (x, 0);
4615       op1 = XEXP (x, 1);
4616
4617       *cost = COSTS_N_INSNS (1);
4618       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4619         {
4620           if (CONST_INT_P (op1)
4621               && exact_log2 (INTVAL (op1)) > 0)
4622             {
4623               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4624               return true;
4625             }
4626
4627           if ((GET_CODE (op0) == ZERO_EXTEND
4628                && GET_CODE (op1) == ZERO_EXTEND)
4629               || (GET_CODE (op0) == SIGN_EXTEND
4630                   && GET_CODE (op1) == SIGN_EXTEND))
4631             {
4632               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4633                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4634               if (speed)
4635                 *cost += extra_cost->int_multiply_extend;
4636               return true;
4637             }
4638
4639           if (speed)
4640             *cost += extra_cost->int_multiply;
4641         }
4642       else if (speed)
4643         {
4644           if (GET_MODE (x) == DFmode)
4645             *cost += extra_cost->double_multiply;
4646           else if (GET_MODE (x) == SFmode)
4647             *cost += extra_cost->float_multiply;
4648         }
4649
4650       return false;  /* All arguments need to be in registers.  */
4651
4652     case MOD:
4653     case UMOD:
4654       *cost = COSTS_N_INSNS (2);
4655       if (speed)
4656         {
4657           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4658             *cost += (extra_cost->int_multiply_add
4659                       + extra_cost->int_divide);
4660           else if (GET_MODE (x) == DFmode)
4661             *cost += (extra_cost->double_multiply
4662                       + extra_cost->double_divide);
4663           else if (GET_MODE (x) == SFmode)
4664             *cost += (extra_cost->float_multiply
4665                       + extra_cost->float_divide);
4666         }
4667       return false;  /* All arguments need to be in registers.  */
4668
4669     case DIV:
4670     case UDIV:
4671       *cost = COSTS_N_INSNS (1);
4672       if (speed)
4673         {
4674           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4675             *cost += extra_cost->int_divide;
4676           else if (GET_MODE (x) == DFmode)
4677             *cost += extra_cost->double_divide;
4678           else if (GET_MODE (x) == SFmode)
4679             *cost += extra_cost->float_divide;
4680         }
4681       return false;  /* All arguments need to be in registers.  */
4682
4683     default:
4684       break;
4685     }
4686   return false;
4687 }
4688
4689 static int
4690 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4691                   enum machine_mode mode ATTRIBUTE_UNUSED,
4692                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4693 {
4694   enum rtx_code c  = GET_CODE (x);
4695   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4696
4697   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4698     return addr_cost->pre_modify;
4699
4700   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4701     return addr_cost->post_modify;
4702
4703   if (c == PLUS)
4704     {
4705       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4706         return addr_cost->imm_offset;
4707       else if (GET_CODE (XEXP (x, 0)) == MULT
4708                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4709                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4710         return addr_cost->register_extend;
4711
4712       return addr_cost->register_offset;
4713     }
4714   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4715     return addr_cost->imm_offset;
4716
4717   return 0;
4718 }
4719
4720 static int
4721 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4722                             reg_class_t from, reg_class_t to)
4723 {
4724   const struct cpu_regmove_cost *regmove_cost
4725     = aarch64_tune_params->regmove_cost;
4726
4727   if (from == GENERAL_REGS && to == GENERAL_REGS)
4728     return regmove_cost->GP2GP;
4729   else if (from == GENERAL_REGS)
4730     return regmove_cost->GP2FP;
4731   else if (to == GENERAL_REGS)
4732     return regmove_cost->FP2GP;
4733
4734   /* When AdvSIMD instructions are disabled it is not possible to move
4735      a 128-bit value directly between Q registers.  This is handled in
4736      secondary reload.  A general register is used as a scratch to move
4737      the upper DI value and the lower DI value is moved directly,
4738      hence the cost is the sum of three moves. */
4739
4740   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4741     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4742
4743   return regmove_cost->FP2FP;
4744 }
4745
4746 static int
4747 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4748                           reg_class_t rclass ATTRIBUTE_UNUSED,
4749                           bool in ATTRIBUTE_UNUSED)
4750 {
4751   return aarch64_tune_params->memmov_cost;
4752 }
4753
4754 /* Vectorizer cost model target hooks.  */
4755
4756 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4757 static int
4758 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4759                                     tree vectype,
4760                                     int misalign ATTRIBUTE_UNUSED)
4761 {
4762   unsigned elements;
4763
4764   switch (type_of_cost)
4765     {
4766       case scalar_stmt:
4767         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4768
4769       case scalar_load:
4770         return aarch64_tune_params->vec_costs->scalar_load_cost;
4771
4772       case scalar_store:
4773         return aarch64_tune_params->vec_costs->scalar_store_cost;
4774
4775       case vector_stmt:
4776         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4777
4778       case vector_load:
4779         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4780
4781       case vector_store:
4782         return aarch64_tune_params->vec_costs->vec_store_cost;
4783
4784       case vec_to_scalar:
4785         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4786
4787       case scalar_to_vec:
4788         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4789
4790       case unaligned_load:
4791         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4792
4793       case unaligned_store:
4794         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4795
4796       case cond_branch_taken:
4797         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4798
4799       case cond_branch_not_taken:
4800         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4801
4802       case vec_perm:
4803       case vec_promote_demote:
4804         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4805
4806       case vec_construct:
4807         elements = TYPE_VECTOR_SUBPARTS (vectype);
4808         return elements / 2 + 1;
4809
4810       default:
4811         gcc_unreachable ();
4812     }
4813 }
4814
4815 /* Implement targetm.vectorize.add_stmt_cost.  */
4816 static unsigned
4817 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4818                        struct _stmt_vec_info *stmt_info, int misalign,
4819                        enum vect_cost_model_location where)
4820 {
4821   unsigned *cost = (unsigned *) data;
4822   unsigned retval = 0;
4823
4824   if (flag_vect_cost_model)
4825     {
4826       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4827       int stmt_cost =
4828             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4829
4830       /* Statements in an inner loop relative to the loop being
4831          vectorized are weighted more heavily.  The value here is
4832          a function (linear for now) of the loop nest level.  */
4833       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4834         {
4835           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4836           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4837           unsigned nest_level = loop_depth (loop);
4838
4839           count *= nest_level;
4840         }
4841
4842       retval = (unsigned) (count * stmt_cost);
4843       cost[where] += retval;
4844     }
4845
4846   return retval;
4847 }
4848
4849 static void initialize_aarch64_code_model (void);
4850
4851 /* Parse the architecture extension string.  */
4852
4853 static void
4854 aarch64_parse_extension (char *str)
4855 {
4856   /* The extension string is parsed left to right.  */
4857   const struct aarch64_option_extension *opt = NULL;
4858
4859   /* Flag to say whether we are adding or removing an extension.  */
4860   int adding_ext = -1;
4861
4862   while (str != NULL && *str != 0)
4863     {
4864       char *ext;
4865       size_t len;
4866
4867       str++;
4868       ext = strchr (str, '+');
4869
4870       if (ext != NULL)
4871         len = ext - str;
4872       else
4873         len = strlen (str);
4874
4875       if (len >= 2 && strncmp (str, "no", 2) == 0)
4876         {
4877           adding_ext = 0;
4878           len -= 2;
4879           str += 2;
4880         }
4881       else if (len > 0)
4882         adding_ext = 1;
4883
4884       if (len == 0)
4885         {
4886           error ("missing feature modifier after %qs", "+no");
4887           return;
4888         }
4889
4890       /* Scan over the extensions table trying to find an exact match.  */
4891       for (opt = all_extensions; opt->name != NULL; opt++)
4892         {
4893           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4894             {
4895               /* Add or remove the extension.  */
4896               if (adding_ext)
4897                 aarch64_isa_flags |= opt->flags_on;
4898               else
4899                 aarch64_isa_flags &= ~(opt->flags_off);
4900               break;
4901             }
4902         }
4903
4904       if (opt->name == NULL)
4905         {
4906           /* Extension not found in list.  */
4907           error ("unknown feature modifier %qs", str);
4908           return;
4909         }
4910
4911       str = ext;
4912     };
4913
4914   return;
4915 }
4916
4917 /* Parse the ARCH string.  */
4918
4919 static void
4920 aarch64_parse_arch (void)
4921 {
4922   char *ext;
4923   const struct processor *arch;
4924   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4925   size_t len;
4926
4927   strcpy (str, aarch64_arch_string);
4928
4929   ext = strchr (str, '+');
4930
4931   if (ext != NULL)
4932     len = ext - str;
4933   else
4934     len = strlen (str);
4935
4936   if (len == 0)
4937     {
4938       error ("missing arch name in -march=%qs", str);
4939       return;
4940     }
4941
4942   /* Loop through the list of supported ARCHs to find a match.  */
4943   for (arch = all_architectures; arch->name != NULL; arch++)
4944     {
4945       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4946         {
4947           selected_arch = arch;
4948           aarch64_isa_flags = selected_arch->flags;
4949           selected_cpu = &all_cores[selected_arch->core];
4950
4951           if (ext != NULL)
4952             {
4953               /* ARCH string contains at least one extension.  */
4954               aarch64_parse_extension (ext);
4955             }
4956
4957           return;
4958         }
4959     }
4960
4961   /* ARCH name not found in list.  */
4962   error ("unknown value %qs for -march", str);
4963   return;
4964 }
4965
4966 /* Parse the CPU string.  */
4967
4968 static void
4969 aarch64_parse_cpu (void)
4970 {
4971   char *ext;
4972   const struct processor *cpu;
4973   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4974   size_t len;
4975
4976   strcpy (str, aarch64_cpu_string);
4977
4978   ext = strchr (str, '+');
4979
4980   if (ext != NULL)
4981     len = ext - str;
4982   else
4983     len = strlen (str);
4984
4985   if (len == 0)
4986     {
4987       error ("missing cpu name in -mcpu=%qs", str);
4988       return;
4989     }
4990
4991   /* Loop through the list of supported CPUs to find a match.  */
4992   for (cpu = all_cores; cpu->name != NULL; cpu++)
4993     {
4994       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4995         {
4996           selected_cpu = cpu;
4997           aarch64_isa_flags = selected_cpu->flags;
4998
4999           if (ext != NULL)
5000             {
5001               /* CPU string contains at least one extension.  */
5002               aarch64_parse_extension (ext);
5003             }
5004
5005           return;
5006         }
5007     }
5008
5009   /* CPU name not found in list.  */
5010   error ("unknown value %qs for -mcpu", str);
5011   return;
5012 }
5013
5014 /* Parse the TUNE string.  */
5015
5016 static void
5017 aarch64_parse_tune (void)
5018 {
5019   const struct processor *cpu;
5020   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5021   strcpy (str, aarch64_tune_string);
5022
5023   /* Loop through the list of supported CPUs to find a match.  */
5024   for (cpu = all_cores; cpu->name != NULL; cpu++)
5025     {
5026       if (strcmp (cpu->name, str) == 0)
5027         {
5028           selected_tune = cpu;
5029           return;
5030         }
5031     }
5032
5033   /* CPU name not found in list.  */
5034   error ("unknown value %qs for -mtune", str);
5035   return;
5036 }
5037
5038
5039 /* Implement TARGET_OPTION_OVERRIDE.  */
5040
5041 static void
5042 aarch64_override_options (void)
5043 {
5044   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5045      otherwise march remains undefined.  mtune can be used with either march or
5046      mcpu.  */
5047
5048   if (aarch64_arch_string)
5049     {
5050       aarch64_parse_arch ();
5051       aarch64_cpu_string = NULL;
5052     }
5053
5054   if (aarch64_cpu_string)
5055     {
5056       aarch64_parse_cpu ();
5057       selected_arch = NULL;
5058     }
5059
5060   if (aarch64_tune_string)
5061     {
5062       aarch64_parse_tune ();
5063     }
5064
5065   initialize_aarch64_code_model ();
5066
5067   aarch64_build_bitmask_table ();
5068
5069   /* This target defaults to strict volatile bitfields.  */
5070   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5071     flag_strict_volatile_bitfields = 1;
5072
5073   /* If the user did not specify a processor, choose the default
5074      one for them.  This will be the CPU set during configuration using
5075      --with-cpu, otherwise it is "generic".  */
5076   if (!selected_cpu)
5077     {
5078       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5079       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5080     }
5081
5082   gcc_assert (selected_cpu);
5083
5084   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5085   if (!selected_tune)
5086     selected_tune = &all_cores[selected_cpu->core];
5087
5088   aarch64_tune_flags = selected_tune->flags;
5089   aarch64_tune = selected_tune->core;
5090   aarch64_tune_params = selected_tune->tune;
5091
5092   aarch64_override_options_after_change ();
5093 }
5094
5095 /* Implement targetm.override_options_after_change.  */
5096
5097 static void
5098 aarch64_override_options_after_change (void)
5099 {
5100   faked_omit_frame_pointer = false;
5101
5102   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5103      that aarch64_frame_pointer_required will be called.  We need to remember
5104      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5105
5106   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5107     {
5108       flag_omit_frame_pointer = true;
5109       faked_omit_frame_pointer = true;
5110     }
5111 }
5112
5113 static struct machine_function *
5114 aarch64_init_machine_status (void)
5115 {
5116   struct machine_function *machine;
5117   machine = ggc_alloc_cleared_machine_function ();
5118   return machine;
5119 }
5120
5121 void
5122 aarch64_init_expanders (void)
5123 {
5124   init_machine_status = aarch64_init_machine_status;
5125 }
5126
5127 /* A checking mechanism for the implementation of the various code models.  */
5128 static void
5129 initialize_aarch64_code_model (void)
5130 {
5131    if (flag_pic)
5132      {
5133        switch (aarch64_cmodel_var)
5134          {
5135          case AARCH64_CMODEL_TINY:
5136            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5137            break;
5138          case AARCH64_CMODEL_SMALL:
5139            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5140            break;
5141          case AARCH64_CMODEL_LARGE:
5142            sorry ("code model %qs with -f%s", "large",
5143                   flag_pic > 1 ? "PIC" : "pic");
5144          default:
5145            gcc_unreachable ();
5146          }
5147      }
5148    else
5149      aarch64_cmodel = aarch64_cmodel_var;
5150 }
5151
5152 /* Return true if SYMBOL_REF X binds locally.  */
5153
5154 static bool
5155 aarch64_symbol_binds_local_p (const_rtx x)
5156 {
5157   return (SYMBOL_REF_DECL (x)
5158           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5159           : SYMBOL_REF_LOCAL_P (x));
5160 }
5161
5162 /* Return true if SYMBOL_REF X is thread local */
5163 static bool
5164 aarch64_tls_symbol_p (rtx x)
5165 {
5166   if (! TARGET_HAVE_TLS)
5167     return false;
5168
5169   if (GET_CODE (x) != SYMBOL_REF)
5170     return false;
5171
5172   return SYMBOL_REF_TLS_MODEL (x) != 0;
5173 }
5174
5175 /* Classify a TLS symbol into one of the TLS kinds.  */
5176 enum aarch64_symbol_type
5177 aarch64_classify_tls_symbol (rtx x)
5178 {
5179   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5180
5181   switch (tls_kind)
5182     {
5183     case TLS_MODEL_GLOBAL_DYNAMIC:
5184     case TLS_MODEL_LOCAL_DYNAMIC:
5185       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5186
5187     case TLS_MODEL_INITIAL_EXEC:
5188       return SYMBOL_SMALL_GOTTPREL;
5189
5190     case TLS_MODEL_LOCAL_EXEC:
5191       return SYMBOL_SMALL_TPREL;
5192
5193     case TLS_MODEL_EMULATED:
5194     case TLS_MODEL_NONE:
5195       return SYMBOL_FORCE_TO_MEM;
5196
5197     default:
5198       gcc_unreachable ();
5199     }
5200 }
5201
5202 /* Return the method that should be used to access SYMBOL_REF or
5203    LABEL_REF X in context CONTEXT.  */
5204
5205 enum aarch64_symbol_type
5206 aarch64_classify_symbol (rtx x,
5207                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5208 {
5209   if (GET_CODE (x) == LABEL_REF)
5210     {
5211       switch (aarch64_cmodel)
5212         {
5213         case AARCH64_CMODEL_LARGE:
5214           return SYMBOL_FORCE_TO_MEM;
5215
5216         case AARCH64_CMODEL_TINY_PIC:
5217         case AARCH64_CMODEL_TINY:
5218           return SYMBOL_TINY_ABSOLUTE;
5219
5220         case AARCH64_CMODEL_SMALL_PIC:
5221         case AARCH64_CMODEL_SMALL:
5222           return SYMBOL_SMALL_ABSOLUTE;
5223
5224         default:
5225           gcc_unreachable ();
5226         }
5227     }
5228
5229   if (GET_CODE (x) == SYMBOL_REF)
5230     {
5231       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5232           || CONSTANT_POOL_ADDRESS_P (x))
5233         return SYMBOL_FORCE_TO_MEM;
5234
5235       if (aarch64_tls_symbol_p (x))
5236         return aarch64_classify_tls_symbol (x);
5237
5238       switch (aarch64_cmodel)
5239         {
5240         case AARCH64_CMODEL_TINY:
5241           if (SYMBOL_REF_WEAK (x))
5242             return SYMBOL_FORCE_TO_MEM;
5243           return SYMBOL_TINY_ABSOLUTE;
5244
5245         case AARCH64_CMODEL_SMALL:
5246           if (SYMBOL_REF_WEAK (x))
5247             return SYMBOL_FORCE_TO_MEM;
5248           return SYMBOL_SMALL_ABSOLUTE;
5249
5250         case AARCH64_CMODEL_TINY_PIC:
5251           if (!aarch64_symbol_binds_local_p (x))
5252             return SYMBOL_TINY_GOT;
5253           return SYMBOL_TINY_ABSOLUTE;
5254
5255         case AARCH64_CMODEL_SMALL_PIC:
5256           if (!aarch64_symbol_binds_local_p (x))
5257             return SYMBOL_SMALL_GOT;
5258           return SYMBOL_SMALL_ABSOLUTE;
5259
5260         default:
5261           gcc_unreachable ();
5262         }
5263     }
5264
5265   /* By default push everything into the constant pool.  */
5266   return SYMBOL_FORCE_TO_MEM;
5267 }
5268
5269 bool
5270 aarch64_constant_address_p (rtx x)
5271 {
5272   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5273 }
5274
5275 bool
5276 aarch64_legitimate_pic_operand_p (rtx x)
5277 {
5278   if (GET_CODE (x) == SYMBOL_REF
5279       || (GET_CODE (x) == CONST
5280           && GET_CODE (XEXP (x, 0)) == PLUS
5281           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5282      return false;
5283
5284   return true;
5285 }
5286
5287 /* Return true if X holds either a quarter-precision or
5288      floating-point +0.0 constant.  */
5289 static bool
5290 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5291 {
5292   if (!CONST_DOUBLE_P (x))
5293     return false;
5294
5295   /* TODO: We could handle moving 0.0 to a TFmode register,
5296      but first we would like to refactor the movtf_aarch64
5297      to be more amicable to split moves properly and
5298      correctly gate on TARGET_SIMD.  For now - reject all
5299      constants which are not to SFmode or DFmode registers.  */
5300   if (!(mode == SFmode || mode == DFmode))
5301     return false;
5302
5303   if (aarch64_float_const_zero_rtx_p (x))
5304     return true;
5305   return aarch64_float_const_representable_p (x);
5306 }
5307
5308 static bool
5309 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5310 {
5311   /* Do not allow vector struct mode constants.  We could support
5312      0 and -1 easily, but they need support in aarch64-simd.md.  */
5313   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5314     return false;
5315
5316   /* This could probably go away because
5317      we now decompose CONST_INTs according to expand_mov_immediate.  */
5318   if ((GET_CODE (x) == CONST_VECTOR
5319        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5320       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5321         return !targetm.cannot_force_const_mem (mode, x);
5322
5323   if (GET_CODE (x) == HIGH
5324       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5325     return true;
5326
5327   return aarch64_constant_address_p (x);
5328 }
5329
5330 rtx
5331 aarch64_load_tp (rtx target)
5332 {
5333   if (!target
5334       || GET_MODE (target) != Pmode
5335       || !register_operand (target, Pmode))
5336     target = gen_reg_rtx (Pmode);
5337
5338   /* Can return in any reg.  */
5339   emit_insn (gen_aarch64_load_tp_hard (target));
5340   return target;
5341 }
5342
5343 /* On AAPCS systems, this is the "struct __va_list".  */
5344 static GTY(()) tree va_list_type;
5345
5346 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5347    Return the type to use as __builtin_va_list.
5348
5349    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5350
5351    struct __va_list
5352    {
5353      void *__stack;
5354      void *__gr_top;
5355      void *__vr_top;
5356      int   __gr_offs;
5357      int   __vr_offs;
5358    };  */
5359
5360 static tree
5361 aarch64_build_builtin_va_list (void)
5362 {
5363   tree va_list_name;
5364   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5365
5366   /* Create the type.  */
5367   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5368   /* Give it the required name.  */
5369   va_list_name = build_decl (BUILTINS_LOCATION,
5370                              TYPE_DECL,
5371                              get_identifier ("__va_list"),
5372                              va_list_type);
5373   DECL_ARTIFICIAL (va_list_name) = 1;
5374   TYPE_NAME (va_list_type) = va_list_name;
5375   TYPE_STUB_DECL (va_list_type) = va_list_name;
5376
5377   /* Create the fields.  */
5378   f_stack = build_decl (BUILTINS_LOCATION,
5379                         FIELD_DECL, get_identifier ("__stack"),
5380                         ptr_type_node);
5381   f_grtop = build_decl (BUILTINS_LOCATION,
5382                         FIELD_DECL, get_identifier ("__gr_top"),
5383                         ptr_type_node);
5384   f_vrtop = build_decl (BUILTINS_LOCATION,
5385                         FIELD_DECL, get_identifier ("__vr_top"),
5386                         ptr_type_node);
5387   f_groff = build_decl (BUILTINS_LOCATION,
5388                         FIELD_DECL, get_identifier ("__gr_offs"),
5389                         integer_type_node);
5390   f_vroff = build_decl (BUILTINS_LOCATION,
5391                         FIELD_DECL, get_identifier ("__vr_offs"),
5392                         integer_type_node);
5393
5394   DECL_ARTIFICIAL (f_stack) = 1;
5395   DECL_ARTIFICIAL (f_grtop) = 1;
5396   DECL_ARTIFICIAL (f_vrtop) = 1;
5397   DECL_ARTIFICIAL (f_groff) = 1;
5398   DECL_ARTIFICIAL (f_vroff) = 1;
5399
5400   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5401   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5402   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5403   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5404   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5405
5406   TYPE_FIELDS (va_list_type) = f_stack;
5407   DECL_CHAIN (f_stack) = f_grtop;
5408   DECL_CHAIN (f_grtop) = f_vrtop;
5409   DECL_CHAIN (f_vrtop) = f_groff;
5410   DECL_CHAIN (f_groff) = f_vroff;
5411
5412   /* Compute its layout.  */
5413   layout_type (va_list_type);
5414
5415   return va_list_type;
5416 }
5417
5418 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5419 static void
5420 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5421 {
5422   const CUMULATIVE_ARGS *cum;
5423   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5424   tree stack, grtop, vrtop, groff, vroff;
5425   tree t;
5426   int gr_save_area_size;
5427   int vr_save_area_size;
5428   int vr_offset;
5429
5430   cum = &crtl->args.info;
5431   gr_save_area_size
5432     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5433   vr_save_area_size
5434     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5435
5436   if (TARGET_GENERAL_REGS_ONLY)
5437     {
5438       if (cum->aapcs_nvrn > 0)
5439         sorry ("%qs and floating point or vector arguments",
5440                "-mgeneral-regs-only");
5441       vr_save_area_size = 0;
5442     }
5443
5444   f_stack = TYPE_FIELDS (va_list_type_node);
5445   f_grtop = DECL_CHAIN (f_stack);
5446   f_vrtop = DECL_CHAIN (f_grtop);
5447   f_groff = DECL_CHAIN (f_vrtop);
5448   f_vroff = DECL_CHAIN (f_groff);
5449
5450   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5451                   NULL_TREE);
5452   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5453                   NULL_TREE);
5454   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5455                   NULL_TREE);
5456   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5457                   NULL_TREE);
5458   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5459                   NULL_TREE);
5460
5461   /* Emit code to initialize STACK, which points to the next varargs stack
5462      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5463      by named arguments.  STACK is 8-byte aligned.  */
5464   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5465   if (cum->aapcs_stack_size > 0)
5466     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5467   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5468   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5469
5470   /* Emit code to initialize GRTOP, the top of the GR save area.
5471      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5472   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5473   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5474   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5475
5476   /* Emit code to initialize VRTOP, the top of the VR save area.
5477      This address is gr_save_area_bytes below GRTOP, rounded
5478      down to the next 16-byte boundary.  */
5479   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5480   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5481                              STACK_BOUNDARY / BITS_PER_UNIT);
5482
5483   if (vr_offset)
5484     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5485   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5486   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5487
5488   /* Emit code to initialize GROFF, the offset from GRTOP of the
5489      next GPR argument.  */
5490   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5491               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5492   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5493
5494   /* Likewise emit code to initialize VROFF, the offset from FTOP
5495      of the next VR argument.  */
5496   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5497               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5498   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5499 }
5500
5501 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5502
5503 static tree
5504 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5505                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5506 {
5507   tree addr;
5508   bool indirect_p;
5509   bool is_ha;           /* is HFA or HVA.  */
5510   bool dw_align;        /* double-word align.  */
5511   enum machine_mode ag_mode = VOIDmode;
5512   int nregs;
5513   enum machine_mode mode;
5514
5515   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5516   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5517   HOST_WIDE_INT size, rsize, adjust, align;
5518   tree t, u, cond1, cond2;
5519
5520   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5521   if (indirect_p)
5522     type = build_pointer_type (type);
5523
5524   mode = TYPE_MODE (type);
5525
5526   f_stack = TYPE_FIELDS (va_list_type_node);
5527   f_grtop = DECL_CHAIN (f_stack);
5528   f_vrtop = DECL_CHAIN (f_grtop);
5529   f_groff = DECL_CHAIN (f_vrtop);
5530   f_vroff = DECL_CHAIN (f_groff);
5531
5532   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5533                   f_stack, NULL_TREE);
5534   size = int_size_in_bytes (type);
5535   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5536
5537   dw_align = false;
5538   adjust = 0;
5539   if (aarch64_vfp_is_call_or_return_candidate (mode,
5540                                                type,
5541                                                &ag_mode,
5542                                                &nregs,
5543                                                &is_ha))
5544     {
5545       /* TYPE passed in fp/simd registers.  */
5546       if (TARGET_GENERAL_REGS_ONLY)
5547         sorry ("%qs and floating point or vector arguments",
5548                "-mgeneral-regs-only");
5549
5550       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5551                       unshare_expr (valist), f_vrtop, NULL_TREE);
5552       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5553                       unshare_expr (valist), f_vroff, NULL_TREE);
5554
5555       rsize = nregs * UNITS_PER_VREG;
5556
5557       if (is_ha)
5558         {
5559           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5560             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5561         }
5562       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5563                && size < UNITS_PER_VREG)
5564         {
5565           adjust = UNITS_PER_VREG - size;
5566         }
5567     }
5568   else
5569     {
5570       /* TYPE passed in general registers.  */
5571       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5572                       unshare_expr (valist), f_grtop, NULL_TREE);
5573       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5574                       unshare_expr (valist), f_groff, NULL_TREE);
5575       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5576       nregs = rsize / UNITS_PER_WORD;
5577
5578       if (align > 8)
5579         dw_align = true;
5580
5581       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5582           && size < UNITS_PER_WORD)
5583         {
5584           adjust = UNITS_PER_WORD  - size;
5585         }
5586     }
5587
5588   /* Get a local temporary for the field value.  */
5589   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5590
5591   /* Emit code to branch if off >= 0.  */
5592   t = build2 (GE_EXPR, boolean_type_node, off,
5593               build_int_cst (TREE_TYPE (off), 0));
5594   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5595
5596   if (dw_align)
5597     {
5598       /* Emit: offs = (offs + 15) & -16.  */
5599       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5600                   build_int_cst (TREE_TYPE (off), 15));
5601       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5602                   build_int_cst (TREE_TYPE (off), -16));
5603       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5604     }
5605   else
5606     roundup = NULL;
5607
5608   /* Update ap.__[g|v]r_offs  */
5609   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5610               build_int_cst (TREE_TYPE (off), rsize));
5611   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5612
5613   /* String up.  */
5614   if (roundup)
5615     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5616
5617   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5618   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5619               build_int_cst (TREE_TYPE (f_off), 0));
5620   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5621
5622   /* String up: make sure the assignment happens before the use.  */
5623   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5624   COND_EXPR_ELSE (cond1) = t;
5625
5626   /* Prepare the trees handling the argument that is passed on the stack;
5627      the top level node will store in ON_STACK.  */
5628   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5629   if (align > 8)
5630     {
5631       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5632       t = fold_convert (intDI_type_node, arg);
5633       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5634                   build_int_cst (TREE_TYPE (t), 15));
5635       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5636                   build_int_cst (TREE_TYPE (t), -16));
5637       t = fold_convert (TREE_TYPE (arg), t);
5638       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5639     }
5640   else
5641     roundup = NULL;
5642   /* Advance ap.__stack  */
5643   t = fold_convert (intDI_type_node, arg);
5644   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5645               build_int_cst (TREE_TYPE (t), size + 7));
5646   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5647               build_int_cst (TREE_TYPE (t), -8));
5648   t = fold_convert (TREE_TYPE (arg), t);
5649   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5650   /* String up roundup and advance.  */
5651   if (roundup)
5652     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5653   /* String up with arg */
5654   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5655   /* Big-endianness related address adjustment.  */
5656   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5657       && size < UNITS_PER_WORD)
5658   {
5659     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5660                 size_int (UNITS_PER_WORD - size));
5661     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5662   }
5663
5664   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5665   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5666
5667   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5668   t = off;
5669   if (adjust)
5670     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5671                 build_int_cst (TREE_TYPE (off), adjust));
5672
5673   t = fold_convert (sizetype, t);
5674   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5675
5676   if (is_ha)
5677     {
5678       /* type ha; // treat as "struct {ftype field[n];}"
5679          ... [computing offs]
5680          for (i = 0; i <nregs; ++i, offs += 16)
5681            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5682          return ha;  */
5683       int i;
5684       tree tmp_ha, field_t, field_ptr_t;
5685
5686       /* Declare a local variable.  */
5687       tmp_ha = create_tmp_var_raw (type, "ha");
5688       gimple_add_tmp_var (tmp_ha);
5689
5690       /* Establish the base type.  */
5691       switch (ag_mode)
5692         {
5693         case SFmode:
5694           field_t = float_type_node;
5695           field_ptr_t = float_ptr_type_node;
5696           break;
5697         case DFmode:
5698           field_t = double_type_node;
5699           field_ptr_t = double_ptr_type_node;
5700           break;
5701         case TFmode:
5702           field_t = long_double_type_node;
5703           field_ptr_t = long_double_ptr_type_node;
5704           break;
5705 /* The half precision and quad precision are not fully supported yet.  Enable
5706    the following code after the support is complete.  Need to find the correct
5707    type node for __fp16 *.  */
5708 #if 0
5709         case HFmode:
5710           field_t = float_type_node;
5711           field_ptr_t = float_ptr_type_node;
5712           break;
5713 #endif
5714         case V2SImode:
5715         case V4SImode:
5716             {
5717               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5718               field_t = build_vector_type_for_mode (innertype, ag_mode);
5719               field_ptr_t = build_pointer_type (field_t);
5720             }
5721           break;
5722         default:
5723           gcc_assert (0);
5724         }
5725
5726       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5727       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5728       addr = t;
5729       t = fold_convert (field_ptr_t, addr);
5730       t = build2 (MODIFY_EXPR, field_t,
5731                   build1 (INDIRECT_REF, field_t, tmp_ha),
5732                   build1 (INDIRECT_REF, field_t, t));
5733
5734       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5735       for (i = 1; i < nregs; ++i)
5736         {
5737           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5738           u = fold_convert (field_ptr_t, addr);
5739           u = build2 (MODIFY_EXPR, field_t,
5740                       build2 (MEM_REF, field_t, tmp_ha,
5741                               build_int_cst (field_ptr_t,
5742                                              (i *
5743                                               int_size_in_bytes (field_t)))),
5744                       build1 (INDIRECT_REF, field_t, u));
5745           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5746         }
5747
5748       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5749       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5750     }
5751
5752   COND_EXPR_ELSE (cond2) = t;
5753   addr = fold_convert (build_pointer_type (type), cond1);
5754   addr = build_va_arg_indirect_ref (addr);
5755
5756   if (indirect_p)
5757     addr = build_va_arg_indirect_ref (addr);
5758
5759   return addr;
5760 }
5761
5762 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5763
5764 static void
5765 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5766                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5767                                 int no_rtl)
5768 {
5769   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5770   CUMULATIVE_ARGS local_cum;
5771   int gr_saved, vr_saved;
5772
5773   /* The caller has advanced CUM up to, but not beyond, the last named
5774      argument.  Advance a local copy of CUM past the last "real" named
5775      argument, to find out how many registers are left over.  */
5776   local_cum = *cum;
5777   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5778
5779   /* Found out how many registers we need to save.  */
5780   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5781   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5782
5783   if (TARGET_GENERAL_REGS_ONLY)
5784     {
5785       if (local_cum.aapcs_nvrn > 0)
5786         sorry ("%qs and floating point or vector arguments",
5787                "-mgeneral-regs-only");
5788       vr_saved = 0;
5789     }
5790
5791   if (!no_rtl)
5792     {
5793       if (gr_saved > 0)
5794         {
5795           rtx ptr, mem;
5796
5797           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5798           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5799                                - gr_saved * UNITS_PER_WORD);
5800           mem = gen_frame_mem (BLKmode, ptr);
5801           set_mem_alias_set (mem, get_varargs_alias_set ());
5802
5803           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5804                                mem, gr_saved);
5805         }
5806       if (vr_saved > 0)
5807         {
5808           /* We can't use move_block_from_reg, because it will use
5809              the wrong mode, storing D regs only.  */
5810           enum machine_mode mode = TImode;
5811           int off, i;
5812
5813           /* Set OFF to the offset from virtual_incoming_args_rtx of
5814              the first vector register.  The VR save area lies below
5815              the GR one, and is aligned to 16 bytes.  */
5816           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5817                                    STACK_BOUNDARY / BITS_PER_UNIT);
5818           off -= vr_saved * UNITS_PER_VREG;
5819
5820           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5821             {
5822               rtx ptr, mem;
5823
5824               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5825               mem = gen_frame_mem (mode, ptr);
5826               set_mem_alias_set (mem, get_varargs_alias_set ());
5827               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5828               off += UNITS_PER_VREG;
5829             }
5830         }
5831     }
5832
5833   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5834      any complication of having crtl->args.pretend_args_size changed.  */
5835   cfun->machine->saved_varargs_size
5836     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5837                       STACK_BOUNDARY / BITS_PER_UNIT)
5838        + vr_saved * UNITS_PER_VREG);
5839 }
5840
5841 static void
5842 aarch64_conditional_register_usage (void)
5843 {
5844   int i;
5845   if (!TARGET_FLOAT)
5846     {
5847       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5848         {
5849           fixed_regs[i] = 1;
5850           call_used_regs[i] = 1;
5851         }
5852     }
5853 }
5854
5855 /* Walk down the type tree of TYPE counting consecutive base elements.
5856    If *MODEP is VOIDmode, then set it to the first valid floating point
5857    type.  If a non-floating point type is found, or if a floating point
5858    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5859    otherwise return the count in the sub-tree.  */
5860 static int
5861 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5862 {
5863   enum machine_mode mode;
5864   HOST_WIDE_INT size;
5865
5866   switch (TREE_CODE (type))
5867     {
5868     case REAL_TYPE:
5869       mode = TYPE_MODE (type);
5870       if (mode != DFmode && mode != SFmode && mode != TFmode)
5871         return -1;
5872
5873       if (*modep == VOIDmode)
5874         *modep = mode;
5875
5876       if (*modep == mode)
5877         return 1;
5878
5879       break;
5880
5881     case COMPLEX_TYPE:
5882       mode = TYPE_MODE (TREE_TYPE (type));
5883       if (mode != DFmode && mode != SFmode && mode != TFmode)
5884         return -1;
5885
5886       if (*modep == VOIDmode)
5887         *modep = mode;
5888
5889       if (*modep == mode)
5890         return 2;
5891
5892       break;
5893
5894     case VECTOR_TYPE:
5895       /* Use V2SImode and V4SImode as representatives of all 64-bit
5896          and 128-bit vector types.  */
5897       size = int_size_in_bytes (type);
5898       switch (size)
5899         {
5900         case 8:
5901           mode = V2SImode;
5902           break;
5903         case 16:
5904           mode = V4SImode;
5905           break;
5906         default:
5907           return -1;
5908         }
5909
5910       if (*modep == VOIDmode)
5911         *modep = mode;
5912
5913       /* Vector modes are considered to be opaque: two vectors are
5914          equivalent for the purposes of being homogeneous aggregates
5915          if they are the same size.  */
5916       if (*modep == mode)
5917         return 1;
5918
5919       break;
5920
5921     case ARRAY_TYPE:
5922       {
5923         int count;
5924         tree index = TYPE_DOMAIN (type);
5925
5926         /* Can't handle incomplete types.  */
5927         if (!COMPLETE_TYPE_P (type))
5928           return -1;
5929
5930         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5931         if (count == -1
5932             || !index
5933             || !TYPE_MAX_VALUE (index)
5934             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5935             || !TYPE_MIN_VALUE (index)
5936             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5937             || count < 0)
5938           return -1;
5939
5940         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5941                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5942
5943         /* There must be no padding.  */
5944         if (!host_integerp (TYPE_SIZE (type), 1)
5945             || (tree_low_cst (TYPE_SIZE (type), 1)
5946                 != count * GET_MODE_BITSIZE (*modep)))
5947           return -1;
5948
5949         return count;
5950       }
5951
5952     case RECORD_TYPE:
5953       {
5954         int count = 0;
5955         int sub_count;
5956         tree field;
5957
5958         /* Can't handle incomplete types.  */
5959         if (!COMPLETE_TYPE_P (type))
5960           return -1;
5961
5962         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5963           {
5964             if (TREE_CODE (field) != FIELD_DECL)
5965               continue;
5966
5967             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5968             if (sub_count < 0)
5969               return -1;
5970             count += sub_count;
5971           }
5972
5973         /* There must be no padding.  */
5974         if (!host_integerp (TYPE_SIZE (type), 1)
5975             || (tree_low_cst (TYPE_SIZE (type), 1)
5976                 != count * GET_MODE_BITSIZE (*modep)))
5977           return -1;
5978
5979         return count;
5980       }
5981
5982     case UNION_TYPE:
5983     case QUAL_UNION_TYPE:
5984       {
5985         /* These aren't very interesting except in a degenerate case.  */
5986         int count = 0;
5987         int sub_count;
5988         tree field;
5989
5990         /* Can't handle incomplete types.  */
5991         if (!COMPLETE_TYPE_P (type))
5992           return -1;
5993
5994         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5995           {
5996             if (TREE_CODE (field) != FIELD_DECL)
5997               continue;
5998
5999             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6000             if (sub_count < 0)
6001               return -1;
6002             count = count > sub_count ? count : sub_count;
6003           }
6004
6005         /* There must be no padding.  */
6006         if (!host_integerp (TYPE_SIZE (type), 1)
6007             || (tree_low_cst (TYPE_SIZE (type), 1)
6008                 != count * GET_MODE_BITSIZE (*modep)))
6009           return -1;
6010
6011         return count;
6012       }
6013
6014     default:
6015       break;
6016     }
6017
6018   return -1;
6019 }
6020
6021 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6022    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6023    array types.  The C99 floating-point complex types are also considered
6024    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6025    types, which are GCC extensions and out of the scope of AAPCS64, are
6026    treated as composite types here as well.
6027
6028    Note that MODE itself is not sufficient in determining whether a type
6029    is such a composite type or not.  This is because
6030    stor-layout.c:compute_record_mode may have already changed the MODE
6031    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6032    structure with only one field may have its MODE set to the mode of the
6033    field.  Also an integer mode whose size matches the size of the
6034    RECORD_TYPE type may be used to substitute the original mode
6035    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6036    solely relied on.  */
6037
6038 static bool
6039 aarch64_composite_type_p (const_tree type,
6040                           enum machine_mode mode)
6041 {
6042   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6043     return true;
6044
6045   if (mode == BLKmode
6046       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6047       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6048     return true;
6049
6050   return false;
6051 }
6052
6053 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6054    type as described in AAPCS64 \S 4.1.2.
6055
6056    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6057
6058 static bool
6059 aarch64_short_vector_p (const_tree type,
6060                         enum machine_mode mode)
6061 {
6062   HOST_WIDE_INT size = -1;
6063
6064   if (type && TREE_CODE (type) == VECTOR_TYPE)
6065     size = int_size_in_bytes (type);
6066   else if (!aarch64_composite_type_p (type, mode)
6067            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6068                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6069     size = GET_MODE_SIZE (mode);
6070
6071   return (size == 8 || size == 16) ? true : false;
6072 }
6073
6074 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6075    shall be passed or returned in simd/fp register(s) (providing these
6076    parameter passing registers are available).
6077
6078    Upon successful return, *COUNT returns the number of needed registers,
6079    *BASE_MODE returns the mode of the individual register and when IS_HAF
6080    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6081    floating-point aggregate or a homogeneous short-vector aggregate.  */
6082
6083 static bool
6084 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6085                                          const_tree type,
6086                                          enum machine_mode *base_mode,
6087                                          int *count,
6088                                          bool *is_ha)
6089 {
6090   enum machine_mode new_mode = VOIDmode;
6091   bool composite_p = aarch64_composite_type_p (type, mode);
6092
6093   if (is_ha != NULL) *is_ha = false;
6094
6095   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6096       || aarch64_short_vector_p (type, mode))
6097     {
6098       *count = 1;
6099       new_mode = mode;
6100     }
6101   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6102     {
6103       if (is_ha != NULL) *is_ha = true;
6104       *count = 2;
6105       new_mode = GET_MODE_INNER (mode);
6106     }
6107   else if (type && composite_p)
6108     {
6109       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6110
6111       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6112         {
6113           if (is_ha != NULL) *is_ha = true;
6114           *count = ag_count;
6115         }
6116       else
6117         return false;
6118     }
6119   else
6120     return false;
6121
6122   *base_mode = new_mode;
6123   return true;
6124 }
6125
6126 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6127
6128 static rtx
6129 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6130                           int incoming ATTRIBUTE_UNUSED)
6131 {
6132   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6133 }
6134
6135 /* Implements target hook vector_mode_supported_p.  */
6136 static bool
6137 aarch64_vector_mode_supported_p (enum machine_mode mode)
6138 {
6139   if (TARGET_SIMD
6140       && (mode == V4SImode  || mode == V8HImode
6141           || mode == V16QImode || mode == V2DImode
6142           || mode == V2SImode  || mode == V4HImode
6143           || mode == V8QImode || mode == V2SFmode
6144           || mode == V4SFmode || mode == V2DFmode))
6145     return true;
6146
6147   return false;
6148 }
6149
6150 /* Return appropriate SIMD container
6151    for MODE within a vector of WIDTH bits.  */
6152 static enum machine_mode
6153 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6154 {
6155   gcc_assert (width == 64 || width == 128);
6156   if (TARGET_SIMD)
6157     {
6158       if (width == 128)
6159         switch (mode)
6160           {
6161           case DFmode:
6162             return V2DFmode;
6163           case SFmode:
6164             return V4SFmode;
6165           case SImode:
6166             return V4SImode;
6167           case HImode:
6168             return V8HImode;
6169           case QImode:
6170             return V16QImode;
6171           case DImode:
6172             return V2DImode;
6173           default:
6174             break;
6175           }
6176       else
6177         switch (mode)
6178           {
6179           case SFmode:
6180             return V2SFmode;
6181           case SImode:
6182             return V2SImode;
6183           case HImode:
6184             return V4HImode;
6185           case QImode:
6186             return V8QImode;
6187           default:
6188             break;
6189           }
6190     }
6191   return word_mode;
6192 }
6193
6194 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6195 static enum machine_mode
6196 aarch64_preferred_simd_mode (enum machine_mode mode)
6197 {
6198   return aarch64_simd_container_mode (mode, 128);
6199 }
6200
6201 /* Return the bitmask of possible vector sizes for the vectorizer
6202    to iterate over.  */
6203 static unsigned int
6204 aarch64_autovectorize_vector_sizes (void)
6205 {
6206   return (16 | 8);
6207 }
6208
6209 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6210    vector types in order to conform to the AAPCS64 (see "Procedure
6211    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6212    qualify for emission with the mangled names defined in that document,
6213    a vector type must not only be of the correct mode but also be
6214    composed of AdvSIMD vector element types (e.g.
6215    _builtin_aarch64_simd_qi); these types are registered by
6216    aarch64_init_simd_builtins ().  In other words, vector types defined
6217    in other ways e.g. via vector_size attribute will get default
6218    mangled names.  */
6219 typedef struct
6220 {
6221   enum machine_mode mode;
6222   const char *element_type_name;
6223   const char *mangled_name;
6224 } aarch64_simd_mangle_map_entry;
6225
6226 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6227   /* 64-bit containerized types.  */
6228   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6229   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6230   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6231   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6232   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6233   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6234   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6235   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6236   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6237   /* 128-bit containerized types.  */
6238   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6239   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6240   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6241   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6242   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6243   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6244   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6245   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6246   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6247   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6248   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6249   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6250   { VOIDmode, NULL, NULL }
6251 };
6252
6253 /* Implement TARGET_MANGLE_TYPE.  */
6254
6255 static const char *
6256 aarch64_mangle_type (const_tree type)
6257 {
6258   /* The AArch64 ABI documents say that "__va_list" has to be
6259      managled as if it is in the "std" namespace.  */
6260   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6261     return "St9__va_list";
6262
6263   /* Check the mode of the vector type, and the name of the vector
6264      element type, against the table.  */
6265   if (TREE_CODE (type) == VECTOR_TYPE)
6266     {
6267       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6268
6269       while (pos->mode != VOIDmode)
6270         {
6271           tree elt_type = TREE_TYPE (type);
6272
6273           if (pos->mode == TYPE_MODE (type)
6274               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6275               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6276                           pos->element_type_name))
6277             return pos->mangled_name;
6278
6279           pos++;
6280         }
6281     }
6282
6283   /* Use the default mangling.  */
6284   return NULL;
6285 }
6286
6287 /* Return the equivalent letter for size.  */
6288 static char
6289 sizetochar (int size)
6290 {
6291   switch (size)
6292     {
6293     case 64: return 'd';
6294     case 32: return 's';
6295     case 16: return 'h';
6296     case 8 : return 'b';
6297     default: gcc_unreachable ();
6298     }
6299 }
6300
6301 /* Return true iff x is a uniform vector of floating-point
6302    constants, and the constant can be represented in
6303    quarter-precision form.  Note, as aarch64_float_const_representable
6304    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6305 static bool
6306 aarch64_vect_float_const_representable_p (rtx x)
6307 {
6308   int i = 0;
6309   REAL_VALUE_TYPE r0, ri;
6310   rtx x0, xi;
6311
6312   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6313     return false;
6314
6315   x0 = CONST_VECTOR_ELT (x, 0);
6316   if (!CONST_DOUBLE_P (x0))
6317     return false;
6318
6319   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6320
6321   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6322     {
6323       xi = CONST_VECTOR_ELT (x, i);
6324       if (!CONST_DOUBLE_P (xi))
6325         return false;
6326
6327       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6328       if (!REAL_VALUES_EQUAL (r0, ri))
6329         return false;
6330     }
6331
6332   return aarch64_float_const_representable_p (x0);
6333 }
6334
6335 /* Return true for valid and false for invalid.  */
6336 bool
6337 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6338                               struct simd_immediate_info *info)
6339 {
6340 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6341   matches = 1;                                          \
6342   for (i = 0; i < idx; i += (STRIDE))                   \
6343     if (!(TEST))                                        \
6344       matches = 0;                                      \
6345   if (matches)                                          \
6346     {                                                   \
6347       immtype = (CLASS);                                \
6348       elsize = (ELSIZE);                                \
6349       eshift = (SHIFT);                                 \
6350       emvn = (NEG);                                     \
6351       break;                                            \
6352     }
6353
6354   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6355   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6356   unsigned char bytes[16];
6357   int immtype = -1, matches;
6358   unsigned int invmask = inverse ? 0xff : 0;
6359   int eshift, emvn;
6360
6361   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6362     {
6363       if (! (aarch64_simd_imm_zero_p (op, mode)
6364              || aarch64_vect_float_const_representable_p (op)))
6365         return false;
6366
6367       if (info)
6368         {
6369           info->value = CONST_VECTOR_ELT (op, 0);
6370           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6371           info->mvn = false;
6372           info->shift = 0;
6373         }
6374
6375       return true;
6376     }
6377
6378   /* Splat vector constant out into a byte vector.  */
6379   for (i = 0; i < n_elts; i++)
6380     {
6381       rtx el = CONST_VECTOR_ELT (op, i);
6382       unsigned HOST_WIDE_INT elpart;
6383       unsigned int part, parts;
6384
6385       if (GET_CODE (el) == CONST_INT)
6386         {
6387           elpart = INTVAL (el);
6388           parts = 1;
6389         }
6390       else if (GET_CODE (el) == CONST_DOUBLE)
6391         {
6392           elpart = CONST_DOUBLE_LOW (el);
6393           parts = 2;
6394         }
6395       else
6396         gcc_unreachable ();
6397
6398       for (part = 0; part < parts; part++)
6399         {
6400           unsigned int byte;
6401           for (byte = 0; byte < innersize; byte++)
6402             {
6403               bytes[idx++] = (elpart & 0xff) ^ invmask;
6404               elpart >>= BITS_PER_UNIT;
6405             }
6406           if (GET_CODE (el) == CONST_DOUBLE)
6407             elpart = CONST_DOUBLE_HIGH (el);
6408         }
6409     }
6410
6411   /* Sanity check.  */
6412   gcc_assert (idx == GET_MODE_SIZE (mode));
6413
6414   do
6415     {
6416       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6417              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6418
6419       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6420              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6421
6422       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6423              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6424
6425       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6426              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6427
6428       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6429
6430       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6431
6432       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6433              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6434
6435       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6436              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6437
6438       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6439              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6440
6441       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6442              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6443
6444       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6445
6446       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6447
6448       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6449              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6450
6451       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6452              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6453
6454       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6455              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6456
6457       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6458              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6459
6460       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6461
6462       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6463              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6464     }
6465   while (0);
6466
6467   if (immtype == -1)
6468     return false;
6469
6470   if (info)
6471     {
6472       info->element_width = elsize;
6473       info->mvn = emvn != 0;
6474       info->shift = eshift;
6475
6476       unsigned HOST_WIDE_INT imm = 0;
6477
6478       if (immtype >= 12 && immtype <= 15)
6479         info->msl = true;
6480
6481       /* Un-invert bytes of recognized vector, if necessary.  */
6482       if (invmask != 0)
6483         for (i = 0; i < idx; i++)
6484           bytes[i] ^= invmask;
6485
6486       if (immtype == 17)
6487         {
6488           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6489           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6490
6491           for (i = 0; i < 8; i++)
6492             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6493               << (i * BITS_PER_UNIT);
6494
6495
6496           info->value = GEN_INT (imm);
6497         }
6498       else
6499         {
6500           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6501             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6502
6503           /* Construct 'abcdefgh' because the assembler cannot handle
6504              generic constants.  */
6505           if (info->mvn)
6506             imm = ~imm;
6507           imm = (imm >> info->shift) & 0xff;
6508           info->value = GEN_INT (imm);
6509         }
6510     }
6511
6512   return true;
6513 #undef CHECK
6514 }
6515
6516 static bool
6517 aarch64_const_vec_all_same_int_p (rtx x,
6518                                   HOST_WIDE_INT minval,
6519                                   HOST_WIDE_INT maxval)
6520 {
6521   HOST_WIDE_INT firstval;
6522   int count, i;
6523
6524   if (GET_CODE (x) != CONST_VECTOR
6525       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6526     return false;
6527
6528   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6529   if (firstval < minval || firstval > maxval)
6530     return false;
6531
6532   count = CONST_VECTOR_NUNITS (x);
6533   for (i = 1; i < count; i++)
6534     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6535       return false;
6536
6537   return true;
6538 }
6539
6540 /* Check of immediate shift constants are within range.  */
6541 bool
6542 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6543 {
6544   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6545   if (left)
6546     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6547   else
6548     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6549 }
6550
6551 /* Return true if X is a uniform vector where all elements
6552    are either the floating-point constant 0.0 or the
6553    integer constant 0.  */
6554 bool
6555 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6556 {
6557   return x == CONST0_RTX (mode);
6558 }
6559
6560 bool
6561 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6562 {
6563   HOST_WIDE_INT imm = INTVAL (x);
6564   int i;
6565
6566   for (i = 0; i < 8; i++)
6567     {
6568       unsigned int byte = imm & 0xff;
6569       if (byte != 0xff && byte != 0)
6570        return false;
6571       imm >>= 8;
6572     }
6573
6574   return true;
6575 }
6576
6577 bool
6578 aarch64_mov_operand_p (rtx x,
6579                        enum aarch64_symbol_context context,
6580                        enum machine_mode mode)
6581 {
6582   if (GET_CODE (x) == HIGH
6583       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6584     return true;
6585
6586   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6587     return true;
6588
6589   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6590     return true;
6591
6592   return aarch64_classify_symbolic_expression (x, context)
6593     == SYMBOL_TINY_ABSOLUTE;
6594 }
6595
6596 /* Return a const_int vector of VAL.  */
6597 rtx
6598 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6599 {
6600   int nunits = GET_MODE_NUNITS (mode);
6601   rtvec v = rtvec_alloc (nunits);
6602   int i;
6603
6604   for (i=0; i < nunits; i++)
6605     RTVEC_ELT (v, i) = GEN_INT (val);
6606
6607   return gen_rtx_CONST_VECTOR (mode, v);
6608 }
6609
6610 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6611
6612 bool
6613 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6614 {
6615   enum machine_mode vmode;
6616
6617   gcc_assert (!VECTOR_MODE_P (mode));
6618   vmode = aarch64_preferred_simd_mode (mode);
6619   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6620   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6621 }
6622
6623 /* Construct and return a PARALLEL RTX vector.  */
6624 rtx
6625 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6626 {
6627   int nunits = GET_MODE_NUNITS (mode);
6628   rtvec v = rtvec_alloc (nunits / 2);
6629   int base = high ? nunits / 2 : 0;
6630   rtx t1;
6631   int i;
6632
6633   for (i=0; i < nunits / 2; i++)
6634     RTVEC_ELT (v, i) = GEN_INT (base + i);
6635
6636   t1 = gen_rtx_PARALLEL (mode, v);
6637   return t1;
6638 }
6639
6640 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6641    HIGH (exclusive).  */
6642 void
6643 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6644 {
6645   HOST_WIDE_INT lane;
6646   gcc_assert (GET_CODE (operand) == CONST_INT);
6647   lane = INTVAL (operand);
6648
6649   if (lane < low || lane >= high)
6650     error ("lane out of range");
6651 }
6652
6653 void
6654 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6655 {
6656   gcc_assert (GET_CODE (operand) == CONST_INT);
6657   HOST_WIDE_INT lane = INTVAL (operand);
6658
6659   if (lane < low || lane >= high)
6660     error ("constant out of range");
6661 }
6662
6663 /* Emit code to reinterpret one AdvSIMD type as another,
6664    without altering bits.  */
6665 void
6666 aarch64_simd_reinterpret (rtx dest, rtx src)
6667 {
6668   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6669 }
6670
6671 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6672    registers).  */
6673 void
6674 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6675                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6676                             rtx op1)
6677 {
6678   rtx mem = gen_rtx_MEM (mode, destaddr);
6679   rtx tmp1 = gen_reg_rtx (mode);
6680   rtx tmp2 = gen_reg_rtx (mode);
6681
6682   emit_insn (intfn (tmp1, op1, tmp2));
6683
6684   emit_move_insn (mem, tmp1);
6685   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6686   emit_move_insn (mem, tmp2);
6687 }
6688
6689 /* Return TRUE if OP is a valid vector addressing mode.  */
6690 bool
6691 aarch64_simd_mem_operand_p (rtx op)
6692 {
6693   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6694                         || GET_CODE (XEXP (op, 0)) == REG);
6695 }
6696
6697 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6698    not to early-clobber SRC registers in the process.
6699
6700    We assume that the operands described by SRC and DEST represent a
6701    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6702    number of components into which the copy has been decomposed.  */
6703 void
6704 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6705                                 rtx *src, unsigned int count)
6706 {
6707   unsigned int i;
6708
6709   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6710       || REGNO (operands[0]) < REGNO (operands[1]))
6711     {
6712       for (i = 0; i < count; i++)
6713         {
6714           operands[2 * i] = dest[i];
6715           operands[2 * i + 1] = src[i];
6716         }
6717     }
6718   else
6719     {
6720       for (i = 0; i < count; i++)
6721         {
6722           operands[2 * i] = dest[count - i - 1];
6723           operands[2 * i + 1] = src[count - i - 1];
6724         }
6725     }
6726 }
6727
6728 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6729    one of VSTRUCT modes: OI, CI or XI.  */
6730 int
6731 aarch64_simd_attr_length_move (rtx insn)
6732 {
6733   enum machine_mode mode;
6734
6735   extract_insn_cached (insn);
6736
6737   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6738     {
6739       mode = GET_MODE (recog_data.operand[0]);
6740       switch (mode)
6741         {
6742         case OImode:
6743           return 8;
6744         case CImode:
6745           return 12;
6746         case XImode:
6747           return 16;
6748         default:
6749           gcc_unreachable ();
6750         }
6751     }
6752   return 4;
6753 }
6754
6755 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6756    alignment of a vector to 128 bits.  */
6757 static HOST_WIDE_INT
6758 aarch64_simd_vector_alignment (const_tree type)
6759 {
6760   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6761   return MIN (align, 128);
6762 }
6763
6764 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6765 static bool
6766 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6767 {
6768   if (is_packed)
6769     return false;
6770
6771   /* We guarantee alignment for vectors up to 128-bits.  */
6772   if (tree_int_cst_compare (TYPE_SIZE (type),
6773                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6774     return false;
6775
6776   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6777   return true;
6778 }
6779
6780 /* If VALS is a vector constant that can be loaded into a register
6781    using DUP, generate instructions to do so and return an RTX to
6782    assign to the register.  Otherwise return NULL_RTX.  */
6783 static rtx
6784 aarch64_simd_dup_constant (rtx vals)
6785 {
6786   enum machine_mode mode = GET_MODE (vals);
6787   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6788   int n_elts = GET_MODE_NUNITS (mode);
6789   bool all_same = true;
6790   rtx x;
6791   int i;
6792
6793   if (GET_CODE (vals) != CONST_VECTOR)
6794     return NULL_RTX;
6795
6796   for (i = 1; i < n_elts; ++i)
6797     {
6798       x = CONST_VECTOR_ELT (vals, i);
6799       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6800         all_same = false;
6801     }
6802
6803   if (!all_same)
6804     return NULL_RTX;
6805
6806   /* We can load this constant by using DUP and a constant in a
6807      single ARM register.  This will be cheaper than a vector
6808      load.  */
6809   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6810   return gen_rtx_VEC_DUPLICATE (mode, x);
6811 }
6812
6813
6814 /* Generate code to load VALS, which is a PARALLEL containing only
6815    constants (for vec_init) or CONST_VECTOR, efficiently into a
6816    register.  Returns an RTX to copy into the register, or NULL_RTX
6817    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6818 static rtx
6819 aarch64_simd_make_constant (rtx vals)
6820 {
6821   enum machine_mode mode = GET_MODE (vals);
6822   rtx const_dup;
6823   rtx const_vec = NULL_RTX;
6824   int n_elts = GET_MODE_NUNITS (mode);
6825   int n_const = 0;
6826   int i;
6827
6828   if (GET_CODE (vals) == CONST_VECTOR)
6829     const_vec = vals;
6830   else if (GET_CODE (vals) == PARALLEL)
6831     {
6832       /* A CONST_VECTOR must contain only CONST_INTs and
6833          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6834          Only store valid constants in a CONST_VECTOR.  */
6835       for (i = 0; i < n_elts; ++i)
6836         {
6837           rtx x = XVECEXP (vals, 0, i);
6838           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6839             n_const++;
6840         }
6841       if (n_const == n_elts)
6842         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6843     }
6844   else
6845     gcc_unreachable ();
6846
6847   if (const_vec != NULL_RTX
6848       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6849     /* Load using MOVI/MVNI.  */
6850     return const_vec;
6851   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6852     /* Loaded using DUP.  */
6853     return const_dup;
6854   else if (const_vec != NULL_RTX)
6855     /* Load from constant pool. We can not take advantage of single-cycle
6856        LD1 because we need a PC-relative addressing mode.  */
6857     return const_vec;
6858   else
6859     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6860        We can not construct an initializer.  */
6861     return NULL_RTX;
6862 }
6863
6864 void
6865 aarch64_expand_vector_init (rtx target, rtx vals)
6866 {
6867   enum machine_mode mode = GET_MODE (target);
6868   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6869   int n_elts = GET_MODE_NUNITS (mode);
6870   int n_var = 0, one_var = -1;
6871   bool all_same = true;
6872   rtx x, mem;
6873   int i;
6874
6875   x = XVECEXP (vals, 0, 0);
6876   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6877     n_var = 1, one_var = 0;
6878
6879   for (i = 1; i < n_elts; ++i)
6880     {
6881       x = XVECEXP (vals, 0, i);
6882       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6883         ++n_var, one_var = i;
6884
6885       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6886         all_same = false;
6887     }
6888
6889   if (n_var == 0)
6890     {
6891       rtx constant = aarch64_simd_make_constant (vals);
6892       if (constant != NULL_RTX)
6893         {
6894           emit_move_insn (target, constant);
6895           return;
6896         }
6897     }
6898
6899   /* Splat a single non-constant element if we can.  */
6900   if (all_same)
6901     {
6902       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6903       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6904       return;
6905     }
6906
6907   /* One field is non-constant.  Load constant then overwrite varying
6908      field.  This is more efficient than using the stack.  */
6909   if (n_var == 1)
6910     {
6911       rtx copy = copy_rtx (vals);
6912       rtx index = GEN_INT (one_var);
6913       enum insn_code icode;
6914
6915       /* Load constant part of vector, substitute neighboring value for
6916          varying element.  */
6917       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6918       aarch64_expand_vector_init (target, copy);
6919
6920       /* Insert variable.  */
6921       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6922       icode = optab_handler (vec_set_optab, mode);
6923       gcc_assert (icode != CODE_FOR_nothing);
6924       emit_insn (GEN_FCN (icode) (target, x, index));
6925       return;
6926     }
6927
6928   /* Construct the vector in memory one field at a time
6929      and load the whole vector.  */
6930   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6931   for (i = 0; i < n_elts; i++)
6932     emit_move_insn (adjust_address_nv (mem, inner_mode,
6933                                     i * GET_MODE_SIZE (inner_mode)),
6934                     XVECEXP (vals, 0, i));
6935   emit_move_insn (target, mem);
6936
6937 }
6938
6939 static unsigned HOST_WIDE_INT
6940 aarch64_shift_truncation_mask (enum machine_mode mode)
6941 {
6942   return
6943     (aarch64_vector_mode_supported_p (mode)
6944      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6945 }
6946
6947 #ifndef TLS_SECTION_ASM_FLAG
6948 #define TLS_SECTION_ASM_FLAG 'T'
6949 #endif
6950
6951 void
6952 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6953                                tree decl ATTRIBUTE_UNUSED)
6954 {
6955   char flagchars[10], *f = flagchars;
6956
6957   /* If we have already declared this section, we can use an
6958      abbreviated form to switch back to it -- unless this section is
6959      part of a COMDAT groups, in which case GAS requires the full
6960      declaration every time.  */
6961   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6962       && (flags & SECTION_DECLARED))
6963     {
6964       fprintf (asm_out_file, "\t.section\t%s\n", name);
6965       return;
6966     }
6967
6968   if (!(flags & SECTION_DEBUG))
6969     *f++ = 'a';
6970   if (flags & SECTION_WRITE)
6971     *f++ = 'w';
6972   if (flags & SECTION_CODE)
6973     *f++ = 'x';
6974   if (flags & SECTION_SMALL)
6975     *f++ = 's';
6976   if (flags & SECTION_MERGE)
6977     *f++ = 'M';
6978   if (flags & SECTION_STRINGS)
6979     *f++ = 'S';
6980   if (flags & SECTION_TLS)
6981     *f++ = TLS_SECTION_ASM_FLAG;
6982   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6983     *f++ = 'G';
6984   *f = '\0';
6985
6986   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6987
6988   if (!(flags & SECTION_NOTYPE))
6989     {
6990       const char *type;
6991       const char *format;
6992
6993       if (flags & SECTION_BSS)
6994         type = "nobits";
6995       else
6996         type = "progbits";
6997
6998 #ifdef TYPE_OPERAND_FMT
6999       format = "," TYPE_OPERAND_FMT;
7000 #else
7001       format = ",@%s";
7002 #endif
7003
7004       fprintf (asm_out_file, format, type);
7005
7006       if (flags & SECTION_ENTSIZE)
7007         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7008       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7009         {
7010           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7011             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7012           else
7013             fprintf (asm_out_file, ",%s,comdat",
7014                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7015         }
7016     }
7017
7018   putc ('\n', asm_out_file);
7019 }
7020
7021 /* Select a format to encode pointers in exception handling data.  */
7022 int
7023 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7024 {
7025    int type;
7026    switch (aarch64_cmodel)
7027      {
7028      case AARCH64_CMODEL_TINY:
7029      case AARCH64_CMODEL_TINY_PIC:
7030      case AARCH64_CMODEL_SMALL:
7031      case AARCH64_CMODEL_SMALL_PIC:
7032        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7033           for everything.  */
7034        type = DW_EH_PE_sdata4;
7035        break;
7036      default:
7037        /* No assumptions here.  8-byte relocs required.  */
7038        type = DW_EH_PE_sdata8;
7039        break;
7040      }
7041    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7042 }
7043
7044 /* Emit load exclusive.  */
7045
7046 static void
7047 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7048                              rtx mem, rtx model_rtx)
7049 {
7050   rtx (*gen) (rtx, rtx, rtx);
7051
7052   switch (mode)
7053     {
7054     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7055     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7056     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7057     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7058     default:
7059       gcc_unreachable ();
7060     }
7061
7062   emit_insn (gen (rval, mem, model_rtx));
7063 }
7064
7065 /* Emit store exclusive.  */
7066
7067 static void
7068 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7069                               rtx rval, rtx mem, rtx model_rtx)
7070 {
7071   rtx (*gen) (rtx, rtx, rtx, rtx);
7072
7073   switch (mode)
7074     {
7075     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7076     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7077     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7078     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7079     default:
7080       gcc_unreachable ();
7081     }
7082
7083   emit_insn (gen (bval, rval, mem, model_rtx));
7084 }
7085
7086 /* Mark the previous jump instruction as unlikely.  */
7087
7088 static void
7089 aarch64_emit_unlikely_jump (rtx insn)
7090 {
7091   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7092
7093   insn = emit_jump_insn (insn);
7094   add_reg_note (insn, REG_BR_PROB, very_unlikely);
7095 }
7096
7097 /* Expand a compare and swap pattern.  */
7098
7099 void
7100 aarch64_expand_compare_and_swap (rtx operands[])
7101 {
7102   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7103   enum machine_mode mode, cmp_mode;
7104   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7105
7106   bval = operands[0];
7107   rval = operands[1];
7108   mem = operands[2];
7109   oldval = operands[3];
7110   newval = operands[4];
7111   is_weak = operands[5];
7112   mod_s = operands[6];
7113   mod_f = operands[7];
7114   mode = GET_MODE (mem);
7115   cmp_mode = mode;
7116
7117   /* Normally the succ memory model must be stronger than fail, but in the
7118      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7119      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7120
7121   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7122       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7123     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7124
7125   switch (mode)
7126     {
7127     case QImode:
7128     case HImode:
7129       /* For short modes, we're going to perform the comparison in SImode,
7130          so do the zero-extension now.  */
7131       cmp_mode = SImode;
7132       rval = gen_reg_rtx (SImode);
7133       oldval = convert_modes (SImode, mode, oldval, true);
7134       /* Fall through.  */
7135
7136     case SImode:
7137     case DImode:
7138       /* Force the value into a register if needed.  */
7139       if (!aarch64_plus_operand (oldval, mode))
7140         oldval = force_reg (cmp_mode, oldval);
7141       break;
7142
7143     default:
7144       gcc_unreachable ();
7145     }
7146
7147   switch (mode)
7148     {
7149     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7150     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7151     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7152     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7153     default:
7154       gcc_unreachable ();
7155     }
7156
7157   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7158
7159   if (mode == QImode || mode == HImode)
7160     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7161
7162   x = gen_rtx_REG (CCmode, CC_REGNUM);
7163   x = gen_rtx_EQ (SImode, x, const0_rtx);
7164   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7165 }
7166
7167 /* Split a compare and swap pattern.  */
7168
7169 void
7170 aarch64_split_compare_and_swap (rtx operands[])
7171 {
7172   rtx rval, mem, oldval, newval, scratch;
7173   enum machine_mode mode;
7174   bool is_weak;
7175   rtx label1, label2, x, cond;
7176
7177   rval = operands[0];
7178   mem = operands[1];
7179   oldval = operands[2];
7180   newval = operands[3];
7181   is_weak = (operands[4] != const0_rtx);
7182   scratch = operands[7];
7183   mode = GET_MODE (mem);
7184
7185   label1 = NULL_RTX;
7186   if (!is_weak)
7187     {
7188       label1 = gen_label_rtx ();
7189       emit_label (label1);
7190     }
7191   label2 = gen_label_rtx ();
7192
7193   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7194
7195   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7196   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7197   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7198                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7199   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7200
7201   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7202
7203   if (!is_weak)
7204     {
7205       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7206       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7207                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7208       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7209     }
7210   else
7211     {
7212       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7213       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7214       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7215     }
7216
7217   emit_label (label2);
7218 }
7219
7220 /* Split an atomic operation.  */
7221
7222 void
7223 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7224                      rtx value, rtx model_rtx, rtx cond)
7225 {
7226   enum machine_mode mode = GET_MODE (mem);
7227   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7228   rtx label, x;
7229
7230   label = gen_label_rtx ();
7231   emit_label (label);
7232
7233   if (new_out)
7234     new_out = gen_lowpart (wmode, new_out);
7235   if (old_out)
7236     old_out = gen_lowpart (wmode, old_out);
7237   else
7238     old_out = new_out;
7239   value = simplify_gen_subreg (wmode, value, mode, 0);
7240
7241   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7242
7243   switch (code)
7244     {
7245     case SET:
7246       new_out = value;
7247       break;
7248
7249     case NOT:
7250       x = gen_rtx_AND (wmode, old_out, value);
7251       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7252       x = gen_rtx_NOT (wmode, new_out);
7253       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7254       break;
7255
7256     case MINUS:
7257       if (CONST_INT_P (value))
7258         {
7259           value = GEN_INT (-INTVAL (value));
7260           code = PLUS;
7261         }
7262       /* Fall through.  */
7263
7264     default:
7265       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7266       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7267       break;
7268     }
7269
7270   aarch64_emit_store_exclusive (mode, cond, mem,
7271                                 gen_lowpart (mode, new_out), model_rtx);
7272
7273   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7274   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7275                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7276   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7277 }
7278
7279 static void
7280 aarch64_print_extension (void)
7281 {
7282   const struct aarch64_option_extension *opt = NULL;
7283
7284   for (opt = all_extensions; opt->name != NULL; opt++)
7285     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7286       asm_fprintf (asm_out_file, "+%s", opt->name);
7287
7288   asm_fprintf (asm_out_file, "\n");
7289 }
7290
7291 static void
7292 aarch64_start_file (void)
7293 {
7294   if (selected_arch)
7295     {
7296       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7297       aarch64_print_extension ();
7298     }
7299   else if (selected_cpu)
7300     {
7301       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7302       aarch64_print_extension ();
7303     }
7304   default_file_start();
7305 }
7306
7307 /* Target hook for c_mode_for_suffix.  */
7308 static enum machine_mode
7309 aarch64_c_mode_for_suffix (char suffix)
7310 {
7311   if (suffix == 'q')
7312     return TFmode;
7313
7314   return VOIDmode;
7315 }
7316
7317 /* We can only represent floating point constants which will fit in
7318    "quarter-precision" values.  These values are characterised by
7319    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7320    by:
7321
7322    (-1)^s * (n/16) * 2^r
7323
7324    Where:
7325      's' is the sign bit.
7326      'n' is an integer in the range 16 <= n <= 31.
7327      'r' is an integer in the range -3 <= r <= 4.  */
7328
7329 /* Return true iff X can be represented by a quarter-precision
7330    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7331 bool
7332 aarch64_float_const_representable_p (rtx x)
7333 {
7334   /* This represents our current view of how many bits
7335      make up the mantissa.  */
7336   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7337   int exponent;
7338   unsigned HOST_WIDE_INT mantissa, mask;
7339   HOST_WIDE_INT m1, m2;
7340   REAL_VALUE_TYPE r, m;
7341
7342   if (!CONST_DOUBLE_P (x))
7343     return false;
7344
7345   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7346
7347   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7348      know if we have +zero until we analyse the mantissa, but we
7349      can reject the other invalid values.  */
7350   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7351       || REAL_VALUE_MINUS_ZERO (r))
7352     return false;
7353
7354   /* Extract exponent.  */
7355   r = real_value_abs (&r);
7356   exponent = REAL_EXP (&r);
7357
7358   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7359      highest (sign) bit, with a fixed binary point at bit point_pos.
7360      m1 holds the low part of the mantissa, m2 the high part.
7361      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7362      bits for the mantissa, this can fail (low bits will be lost).  */
7363   real_ldexp (&m, &r, point_pos - exponent);
7364   REAL_VALUE_TO_INT (&m1, &m2, m);
7365
7366   /* If the low part of the mantissa has bits set we cannot represent
7367      the value.  */
7368   if (m1 != 0)
7369     return false;
7370   /* We have rejected the lower HOST_WIDE_INT, so update our
7371      understanding of how many bits lie in the mantissa and
7372      look only at the high HOST_WIDE_INT.  */
7373   mantissa = m2;
7374   point_pos -= HOST_BITS_PER_WIDE_INT;
7375
7376   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7377   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7378   if ((mantissa & mask) != 0)
7379     return false;
7380
7381   /* Having filtered unrepresentable values, we may now remove all
7382      but the highest 5 bits.  */
7383   mantissa >>= point_pos - 5;
7384
7385   /* We cannot represent the value 0.0, so reject it.  This is handled
7386      elsewhere.  */
7387   if (mantissa == 0)
7388     return false;
7389
7390   /* Then, as bit 4 is always set, we can mask it off, leaving
7391      the mantissa in the range [0, 15].  */
7392   mantissa &= ~(1 << 4);
7393   gcc_assert (mantissa <= 15);
7394
7395   /* GCC internally does not use IEEE754-like encoding (where normalized
7396      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7397      Our mantissa values are shifted 4 places to the left relative to
7398      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7399      by 5 places to correct for GCC's representation.  */
7400   exponent = 5 - exponent;
7401
7402   return (exponent >= 0 && exponent <= 7);
7403 }
7404
7405 char*
7406 aarch64_output_simd_mov_immediate (rtx const_vector,
7407                                    enum machine_mode mode,
7408                                    unsigned width)
7409 {
7410   bool is_valid;
7411   static char templ[40];
7412   const char *mnemonic;
7413   const char *shift_op;
7414   unsigned int lane_count = 0;
7415   char element_char;
7416
7417   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7418
7419   /* This will return true to show const_vector is legal for use as either
7420      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7421      also update INFO to show how the immediate should be generated.  */
7422   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7423   gcc_assert (is_valid);
7424
7425   element_char = sizetochar (info.element_width);
7426   lane_count = width / info.element_width;
7427
7428   mode = GET_MODE_INNER (mode);
7429   if (mode == SFmode || mode == DFmode)
7430     {
7431       gcc_assert (info.shift == 0 && ! info.mvn);
7432       if (aarch64_float_const_zero_rtx_p (info.value))
7433         info.value = GEN_INT (0);
7434       else
7435         {
7436 #define buf_size 20
7437           REAL_VALUE_TYPE r;
7438           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7439           char float_buf[buf_size] = {'\0'};
7440           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7441 #undef buf_size
7442
7443           if (lane_count == 1)
7444             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7445           else
7446             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7447                       lane_count, element_char, float_buf);
7448           return templ;
7449         }
7450     }
7451
7452   mnemonic = info.mvn ? "mvni" : "movi";
7453   shift_op = info.msl ? "msl" : "lsl";
7454
7455   if (lane_count == 1)
7456     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7457               mnemonic, UINTVAL (info.value));
7458   else if (info.shift)
7459     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7460               ", %s %d", mnemonic, lane_count, element_char,
7461               UINTVAL (info.value), shift_op, info.shift);
7462   else
7463     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7464               mnemonic, lane_count, element_char, UINTVAL (info.value));
7465   return templ;
7466 }
7467
7468 char*
7469 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7470                                           enum machine_mode mode)
7471 {
7472   enum machine_mode vmode;
7473
7474   gcc_assert (!VECTOR_MODE_P (mode));
7475   vmode = aarch64_simd_container_mode (mode, 64);
7476   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7477   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7478 }
7479
7480 /* Split operands into moves from op[1] + op[2] into op[0].  */
7481
7482 void
7483 aarch64_split_combinev16qi (rtx operands[3])
7484 {
7485   unsigned int dest = REGNO (operands[0]);
7486   unsigned int src1 = REGNO (operands[1]);
7487   unsigned int src2 = REGNO (operands[2]);
7488   enum machine_mode halfmode = GET_MODE (operands[1]);
7489   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7490   rtx destlo, desthi;
7491
7492   gcc_assert (halfmode == V16QImode);
7493
7494   if (src1 == dest && src2 == dest + halfregs)
7495     {
7496       /* No-op move.  Can't split to nothing; emit something.  */
7497       emit_note (NOTE_INSN_DELETED);
7498       return;
7499     }
7500
7501   /* Preserve register attributes for variable tracking.  */
7502   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7503   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7504                                GET_MODE_SIZE (halfmode));
7505
7506   /* Special case of reversed high/low parts.  */
7507   if (reg_overlap_mentioned_p (operands[2], destlo)
7508       && reg_overlap_mentioned_p (operands[1], desthi))
7509     {
7510       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7511       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7512       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7513     }
7514   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7515     {
7516       /* Try to avoid unnecessary moves if part of the result
7517          is in the right place already.  */
7518       if (src1 != dest)
7519         emit_move_insn (destlo, operands[1]);
7520       if (src2 != dest + halfregs)
7521         emit_move_insn (desthi, operands[2]);
7522     }
7523   else
7524     {
7525       if (src2 != dest + halfregs)
7526         emit_move_insn (desthi, operands[2]);
7527       if (src1 != dest)
7528         emit_move_insn (destlo, operands[1]);
7529     }
7530 }
7531
7532 /* vec_perm support.  */
7533
7534 #define MAX_VECT_LEN 16
7535
7536 struct expand_vec_perm_d
7537 {
7538   rtx target, op0, op1;
7539   unsigned char perm[MAX_VECT_LEN];
7540   enum machine_mode vmode;
7541   unsigned char nelt;
7542   bool one_vector_p;
7543   bool testing_p;
7544 };
7545
7546 /* Generate a variable permutation.  */
7547
7548 static void
7549 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7550 {
7551   enum machine_mode vmode = GET_MODE (target);
7552   bool one_vector_p = rtx_equal_p (op0, op1);
7553
7554   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7555   gcc_checking_assert (GET_MODE (op0) == vmode);
7556   gcc_checking_assert (GET_MODE (op1) == vmode);
7557   gcc_checking_assert (GET_MODE (sel) == vmode);
7558   gcc_checking_assert (TARGET_SIMD);
7559
7560   if (one_vector_p)
7561     {
7562       if (vmode == V8QImode)
7563         {
7564           /* Expand the argument to a V16QI mode by duplicating it.  */
7565           rtx pair = gen_reg_rtx (V16QImode);
7566           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7567           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7568         }
7569       else
7570         {
7571           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7572         }
7573     }
7574   else
7575     {
7576       rtx pair;
7577
7578       if (vmode == V8QImode)
7579         {
7580           pair = gen_reg_rtx (V16QImode);
7581           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7582           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7583         }
7584       else
7585         {
7586           pair = gen_reg_rtx (OImode);
7587           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7588           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7589         }
7590     }
7591 }
7592
7593 void
7594 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7595 {
7596   enum machine_mode vmode = GET_MODE (target);
7597   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7598   bool one_vector_p = rtx_equal_p (op0, op1);
7599   rtx rmask[MAX_VECT_LEN], mask;
7600
7601   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7602
7603   /* The TBL instruction does not use a modulo index, so we must take care
7604      of that ourselves.  */
7605   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7606   for (i = 0; i < nelt; ++i)
7607     rmask[i] = mask;
7608   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7609   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7610
7611   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7612 }
7613
7614 /* Recognize patterns suitable for the TRN instructions.  */
7615 static bool
7616 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7617 {
7618   unsigned int i, odd, mask, nelt = d->nelt;
7619   rtx out, in0, in1, x;
7620   rtx (*gen) (rtx, rtx, rtx);
7621   enum machine_mode vmode = d->vmode;
7622
7623   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7624     return false;
7625
7626   /* Note that these are little-endian tests.
7627      We correct for big-endian later.  */
7628   if (d->perm[0] == 0)
7629     odd = 0;
7630   else if (d->perm[0] == 1)
7631     odd = 1;
7632   else
7633     return false;
7634   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7635
7636   for (i = 0; i < nelt; i += 2)
7637     {
7638       if (d->perm[i] != i + odd)
7639         return false;
7640       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7641         return false;
7642     }
7643
7644   /* Success!  */
7645   if (d->testing_p)
7646     return true;
7647
7648   in0 = d->op0;
7649   in1 = d->op1;
7650   if (BYTES_BIG_ENDIAN)
7651     {
7652       x = in0, in0 = in1, in1 = x;
7653       odd = !odd;
7654     }
7655   out = d->target;
7656
7657   if (odd)
7658     {
7659       switch (vmode)
7660         {
7661         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7662         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7663         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7664         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7665         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7666         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7667         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7668         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7669         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7670         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7671         default:
7672           return false;
7673         }
7674     }
7675   else
7676     {
7677       switch (vmode)
7678         {
7679         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7680         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7681         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7682         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7683         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7684         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7685         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7686         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7687         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7688         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7689         default:
7690           return false;
7691         }
7692     }
7693
7694   emit_insn (gen (out, in0, in1));
7695   return true;
7696 }
7697
7698 /* Recognize patterns suitable for the UZP instructions.  */
7699 static bool
7700 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7701 {
7702   unsigned int i, odd, mask, nelt = d->nelt;
7703   rtx out, in0, in1, x;
7704   rtx (*gen) (rtx, rtx, rtx);
7705   enum machine_mode vmode = d->vmode;
7706
7707   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7708     return false;
7709
7710   /* Note that these are little-endian tests.
7711      We correct for big-endian later.  */
7712   if (d->perm[0] == 0)
7713     odd = 0;
7714   else if (d->perm[0] == 1)
7715     odd = 1;
7716   else
7717     return false;
7718   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7719
7720   for (i = 0; i < nelt; i++)
7721     {
7722       unsigned elt = (i * 2 + odd) & mask;
7723       if (d->perm[i] != elt)
7724         return false;
7725     }
7726
7727   /* Success!  */
7728   if (d->testing_p)
7729     return true;
7730
7731   in0 = d->op0;
7732   in1 = d->op1;
7733   if (BYTES_BIG_ENDIAN)
7734     {
7735       x = in0, in0 = in1, in1 = x;
7736       odd = !odd;
7737     }
7738   out = d->target;
7739
7740   if (odd)
7741     {
7742       switch (vmode)
7743         {
7744         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7745         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7746         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7747         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7748         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7749         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7750         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7751         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7752         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7753         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7754         default:
7755           return false;
7756         }
7757     }
7758   else
7759     {
7760       switch (vmode)
7761         {
7762         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7763         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7764         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7765         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7766         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7767         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7768         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7769         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7770         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7771         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7772         default:
7773           return false;
7774         }
7775     }
7776
7777   emit_insn (gen (out, in0, in1));
7778   return true;
7779 }
7780
7781 /* Recognize patterns suitable for the ZIP instructions.  */
7782 static bool
7783 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7784 {
7785   unsigned int i, high, mask, nelt = d->nelt;
7786   rtx out, in0, in1, x;
7787   rtx (*gen) (rtx, rtx, rtx);
7788   enum machine_mode vmode = d->vmode;
7789
7790   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7791     return false;
7792
7793   /* Note that these are little-endian tests.
7794      We correct for big-endian later.  */
7795   high = nelt / 2;
7796   if (d->perm[0] == high)
7797     /* Do Nothing.  */
7798     ;
7799   else if (d->perm[0] == 0)
7800     high = 0;
7801   else
7802     return false;
7803   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7804
7805   for (i = 0; i < nelt / 2; i++)
7806     {
7807       unsigned elt = (i + high) & mask;
7808       if (d->perm[i * 2] != elt)
7809         return false;
7810       elt = (elt + nelt) & mask;
7811       if (d->perm[i * 2 + 1] != elt)
7812         return false;
7813     }
7814
7815   /* Success!  */
7816   if (d->testing_p)
7817     return true;
7818
7819   in0 = d->op0;
7820   in1 = d->op1;
7821   if (BYTES_BIG_ENDIAN)
7822     {
7823       x = in0, in0 = in1, in1 = x;
7824       high = !high;
7825     }
7826   out = d->target;
7827
7828   if (high)
7829     {
7830       switch (vmode)
7831         {
7832         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7833         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7834         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7835         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7836         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7837         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7838         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7839         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7840         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7841         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7842         default:
7843           return false;
7844         }
7845     }
7846   else
7847     {
7848       switch (vmode)
7849         {
7850         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7851         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7852         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7853         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7854         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7855         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7856         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7857         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7858         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7859         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7860         default:
7861           return false;
7862         }
7863     }
7864
7865   emit_insn (gen (out, in0, in1));
7866   return true;
7867 }
7868
7869 static bool
7870 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7871 {
7872   rtx rperm[MAX_VECT_LEN], sel;
7873   enum machine_mode vmode = d->vmode;
7874   unsigned int i, nelt = d->nelt;
7875
7876   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7877      numbering of elements for big-endian, we must reverse the order.  */
7878   if (BYTES_BIG_ENDIAN)
7879     return false;
7880
7881   if (d->testing_p)
7882     return true;
7883
7884   /* Generic code will try constant permutation twice.  Once with the
7885      original mode and again with the elements lowered to QImode.
7886      So wait and don't do the selector expansion ourselves.  */
7887   if (vmode != V8QImode && vmode != V16QImode)
7888     return false;
7889
7890   for (i = 0; i < nelt; ++i)
7891     rperm[i] = GEN_INT (d->perm[i]);
7892   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7893   sel = force_reg (vmode, sel);
7894
7895   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7896   return true;
7897 }
7898
7899 static bool
7900 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7901 {
7902   /* The pattern matching functions above are written to look for a small
7903      number to begin the sequence (0, 1, N/2).  If we begin with an index
7904      from the second operand, we can swap the operands.  */
7905   if (d->perm[0] >= d->nelt)
7906     {
7907       unsigned i, nelt = d->nelt;
7908       rtx x;
7909
7910       for (i = 0; i < nelt; ++i)
7911         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7912
7913       x = d->op0;
7914       d->op0 = d->op1;
7915       d->op1 = x;
7916     }
7917
7918   if (TARGET_SIMD)
7919     {
7920       if (aarch64_evpc_zip (d))
7921         return true;
7922       else if (aarch64_evpc_uzp (d))
7923         return true;
7924       else if (aarch64_evpc_trn (d))
7925         return true;
7926       return aarch64_evpc_tbl (d);
7927     }
7928   return false;
7929 }
7930
7931 /* Expand a vec_perm_const pattern.  */
7932
7933 bool
7934 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7935 {
7936   struct expand_vec_perm_d d;
7937   int i, nelt, which;
7938
7939   d.target = target;
7940   d.op0 = op0;
7941   d.op1 = op1;
7942
7943   d.vmode = GET_MODE (target);
7944   gcc_assert (VECTOR_MODE_P (d.vmode));
7945   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7946   d.testing_p = false;
7947
7948   for (i = which = 0; i < nelt; ++i)
7949     {
7950       rtx e = XVECEXP (sel, 0, i);
7951       int ei = INTVAL (e) & (2 * nelt - 1);
7952       which |= (ei < nelt ? 1 : 2);
7953       d.perm[i] = ei;
7954     }
7955
7956   switch (which)
7957     {
7958     default:
7959       gcc_unreachable ();
7960
7961     case 3:
7962       d.one_vector_p = false;
7963       if (!rtx_equal_p (op0, op1))
7964         break;
7965
7966       /* The elements of PERM do not suggest that only the first operand
7967          is used, but both operands are identical.  Allow easier matching
7968          of the permutation by folding the permutation into the single
7969          input vector.  */
7970       /* Fall Through.  */
7971     case 2:
7972       for (i = 0; i < nelt; ++i)
7973         d.perm[i] &= nelt - 1;
7974       d.op0 = op1;
7975       d.one_vector_p = true;
7976       break;
7977
7978     case 1:
7979       d.op1 = op0;
7980       d.one_vector_p = true;
7981       break;
7982     }
7983
7984   return aarch64_expand_vec_perm_const_1 (&d);
7985 }
7986
7987 static bool
7988 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7989                                      const unsigned char *sel)
7990 {
7991   struct expand_vec_perm_d d;
7992   unsigned int i, nelt, which;
7993   bool ret;
7994
7995   d.vmode = vmode;
7996   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7997   d.testing_p = true;
7998   memcpy (d.perm, sel, nelt);
7999
8000   /* Calculate whether all elements are in one vector.  */
8001   for (i = which = 0; i < nelt; ++i)
8002     {
8003       unsigned char e = d.perm[i];
8004       gcc_assert (e < 2 * nelt);
8005       which |= (e < nelt ? 1 : 2);
8006     }
8007
8008   /* If all elements are from the second vector, reindex as if from the
8009      first vector.  */
8010   if (which == 2)
8011     for (i = 0; i < nelt; ++i)
8012       d.perm[i] -= nelt;
8013
8014   /* Check whether the mask can be applied to a single vector.  */
8015   d.one_vector_p = (which != 3);
8016
8017   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8018   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8019   if (!d.one_vector_p)
8020     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8021
8022   start_sequence ();
8023   ret = aarch64_expand_vec_perm_const_1 (&d);
8024   end_sequence ();
8025
8026   return ret;
8027 }
8028
8029 #undef TARGET_ADDRESS_COST
8030 #define TARGET_ADDRESS_COST aarch64_address_cost
8031
8032 /* This hook will determines whether unnamed bitfields affect the alignment
8033    of the containing structure.  The hook returns true if the structure
8034    should inherit the alignment requirements of an unnamed bitfield's
8035    type.  */
8036 #undef TARGET_ALIGN_ANON_BITFIELD
8037 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8038
8039 #undef TARGET_ASM_ALIGNED_DI_OP
8040 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8041
8042 #undef TARGET_ASM_ALIGNED_HI_OP
8043 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8044
8045 #undef TARGET_ASM_ALIGNED_SI_OP
8046 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8047
8048 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8049 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8050   hook_bool_const_tree_hwi_hwi_const_tree_true
8051
8052 #undef TARGET_ASM_FILE_START
8053 #define TARGET_ASM_FILE_START aarch64_start_file
8054
8055 #undef TARGET_ASM_OUTPUT_MI_THUNK
8056 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8057
8058 #undef TARGET_ASM_SELECT_RTX_SECTION
8059 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8060
8061 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8062 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8063
8064 #undef TARGET_BUILD_BUILTIN_VA_LIST
8065 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8066
8067 #undef TARGET_CALLEE_COPIES
8068 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8069
8070 #undef TARGET_CAN_ELIMINATE
8071 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8072
8073 #undef TARGET_CANNOT_FORCE_CONST_MEM
8074 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8075
8076 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8077 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8078
8079 /* Only the least significant bit is used for initialization guard
8080    variables.  */
8081 #undef TARGET_CXX_GUARD_MASK_BIT
8082 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8083
8084 #undef TARGET_C_MODE_FOR_SUFFIX
8085 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8086
8087 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8088 #undef  TARGET_DEFAULT_TARGET_FLAGS
8089 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8090 #endif
8091
8092 #undef TARGET_CLASS_MAX_NREGS
8093 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8094
8095 #undef TARGET_BUILTIN_DECL
8096 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8097
8098 #undef  TARGET_EXPAND_BUILTIN
8099 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8100
8101 #undef TARGET_EXPAND_BUILTIN_VA_START
8102 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8103
8104 #undef TARGET_FOLD_BUILTIN
8105 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8106
8107 #undef TARGET_FUNCTION_ARG
8108 #define TARGET_FUNCTION_ARG aarch64_function_arg
8109
8110 #undef TARGET_FUNCTION_ARG_ADVANCE
8111 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8112
8113 #undef TARGET_FUNCTION_ARG_BOUNDARY
8114 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8115
8116 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8117 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8118
8119 #undef TARGET_FUNCTION_VALUE
8120 #define TARGET_FUNCTION_VALUE aarch64_function_value
8121
8122 #undef TARGET_FUNCTION_VALUE_REGNO_P
8123 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8124
8125 #undef TARGET_FRAME_POINTER_REQUIRED
8126 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8127
8128 #undef TARGET_GIMPLE_FOLD_BUILTIN
8129 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8130
8131 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8132 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8133
8134 #undef  TARGET_INIT_BUILTINS
8135 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8136
8137 #undef TARGET_LEGITIMATE_ADDRESS_P
8138 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8139
8140 #undef TARGET_LEGITIMATE_CONSTANT_P
8141 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8142
8143 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8144 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8145
8146 #undef TARGET_MANGLE_TYPE
8147 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8148
8149 #undef TARGET_MEMORY_MOVE_COST
8150 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8151
8152 #undef TARGET_MUST_PASS_IN_STACK
8153 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8154
8155 /* This target hook should return true if accesses to volatile bitfields
8156    should use the narrowest mode possible.  It should return false if these
8157    accesses should use the bitfield container type.  */
8158 #undef TARGET_NARROW_VOLATILE_BITFIELD
8159 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8160
8161 #undef  TARGET_OPTION_OVERRIDE
8162 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8163
8164 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8165 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8166   aarch64_override_options_after_change
8167
8168 #undef TARGET_PASS_BY_REFERENCE
8169 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8170
8171 #undef TARGET_PREFERRED_RELOAD_CLASS
8172 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8173
8174 #undef TARGET_SECONDARY_RELOAD
8175 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8176
8177 #undef TARGET_SHIFT_TRUNCATION_MASK
8178 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8179
8180 #undef TARGET_SETUP_INCOMING_VARARGS
8181 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8182
8183 #undef TARGET_STRUCT_VALUE_RTX
8184 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8185
8186 #undef TARGET_REGISTER_MOVE_COST
8187 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8188
8189 #undef TARGET_RETURN_IN_MEMORY
8190 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8191
8192 #undef TARGET_RETURN_IN_MSB
8193 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8194
8195 #undef TARGET_RTX_COSTS
8196 #define TARGET_RTX_COSTS aarch64_rtx_costs
8197
8198 #undef TARGET_TRAMPOLINE_INIT
8199 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8200
8201 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8202 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8203
8204 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8205 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8206
8207 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8208 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8209
8210 #undef TARGET_VECTORIZE_ADD_STMT_COST
8211 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8212
8213 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8214 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8215   aarch64_builtin_vectorization_cost
8216
8217 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8218 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8219
8220 #undef TARGET_VECTORIZE_BUILTINS
8221 #define TARGET_VECTORIZE_BUILTINS
8222
8223 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8224 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8225   aarch64_builtin_vectorized_function
8226
8227 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8228 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8229   aarch64_autovectorize_vector_sizes
8230
8231 /* Section anchor support.  */
8232
8233 #undef TARGET_MIN_ANCHOR_OFFSET
8234 #define TARGET_MIN_ANCHOR_OFFSET -256
8235
8236 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8237    byte offset; we can do much more for larger data types, but have no way
8238    to determine the size of the access.  We assume accesses are aligned.  */
8239 #undef TARGET_MAX_ANCHOR_OFFSET
8240 #define TARGET_MAX_ANCHOR_OFFSET 4095
8241
8242 #undef TARGET_VECTOR_ALIGNMENT
8243 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8244
8245 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8246 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8247   aarch64_simd_vector_alignment_reachable
8248
8249 /* vec_perm support.  */
8250
8251 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8252 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8253   aarch64_vectorize_vec_perm_const_ok
8254
8255
8256 #undef TARGET_FIXED_CONDITION_CODE_REGS
8257 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8258
8259 struct gcc_target targetm = TARGET_INITIALIZER;
8260
8261 #include "gt-aarch64.h"