gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48 #include "cfgloop.h"
  49 #include "tree-vectorizer.h"
  50
  51 /* Defined for convenience.  */
  52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  53
  54 /* Classifies an address.
  55
  56    ADDRESS_REG_IMM
  57        A simple base register plus immediate offset.
  58
  59    ADDRESS_REG_WB
  60        A base register indexed by immediate offset with writeback.
  61
  62    ADDRESS_REG_REG
  63        A base register indexed by (optionally scaled) register.
  64
  65    ADDRESS_REG_UXTW
  66        A base register indexed by (optionally scaled) zero-extended register.
  67
  68    ADDRESS_REG_SXTW
  69        A base register indexed by (optionally scaled) sign-extended register.
  70
  71    ADDRESS_LO_SUM
  72        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  73
  74    ADDRESS_SYMBOLIC:
  75        A constant symbolic address, in pc-relative literal pool.  */
  76
  77 enum aarch64_address_type {
  78   ADDRESS_REG_IMM,
  79   ADDRESS_REG_WB,
  80   ADDRESS_REG_REG,
  81   ADDRESS_REG_UXTW,
  82   ADDRESS_REG_SXTW,
  83   ADDRESS_LO_SUM,
  84   ADDRESS_SYMBOLIC
  85 };
  86
  87 struct aarch64_address_info {
  88   enum aarch64_address_type type;
  89   rtx base;
  90   rtx offset;
  91   int shift;
  92   enum aarch64_symbol_type symbol_type;
  93 };
  94
  95 struct simd_immediate_info
  96 {
  97   rtx value;
  98   int shift;
  99   int element_width;
 100   bool mvn;
 101   bool msl;
 102 };
 103
 104 /* The current code model.  */
 105 enum aarch64_code_model aarch64_cmodel;
 106
 107 #ifdef HAVE_AS_TLS
 108 #undef TARGET_HAVE_TLS
 109 #define TARGET_HAVE_TLS 1
 110 #endif
 111
 112 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 113 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 114                                                      const_tree,
 115                                                      enum machine_mode *, int *,
 116                                                      bool *);
 117 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 118 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 119 static void aarch64_override_options_after_change (void);
 120 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 121 static unsigned bit_count (unsigned HOST_WIDE_INT);
 122 static bool aarch64_const_vec_all_same_int_p (rtx,
 123                                               HOST_WIDE_INT, HOST_WIDE_INT);
 124
 125 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 126                                                  const unsigned char *sel);
 127
 128 /* The processor for which instructions should be scheduled.  */
 129 enum aarch64_processor aarch64_tune = generic;
 130
 131 /* The current tuning set.  */
 132 const struct tune_params *aarch64_tune_params;
 133
 134 /* Mask to specify which instructions we are allowed to generate.  */
 135 unsigned long aarch64_isa_flags = 0;
 136
 137 /* Mask to specify which instruction scheduling options should be used.  */
 138 unsigned long aarch64_tune_flags = 0;
 139
 140 /* Tuning parameters.  */
 141
 142 #if HAVE_DESIGNATED_INITIALIZERS
 143 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 144 #else
 145 #define NAMED_PARAM(NAME, VAL) (VAL)
 146 #endif
 147
 148 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 149 __extension__
 150 #endif
 151 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 152 {
 153   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 154   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 155   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 156   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 157   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 158   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 159   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 160   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 161   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 162   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 163   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 164   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 165 };
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170 static const struct cpu_addrcost_table generic_addrcost_table =
 171 {
 172   NAMED_PARAM (pre_modify, 0),
 173   NAMED_PARAM (post_modify, 0),
 174   NAMED_PARAM (register_offset, 0),
 175   NAMED_PARAM (register_extend, 0),
 176   NAMED_PARAM (imm_offset, 0)
 177 };
 178
 179 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 180 __extension__
 181 #endif
 182 static const struct cpu_regmove_cost generic_regmove_cost =
 183 {
 184   NAMED_PARAM (GP2GP, 1),
 185   NAMED_PARAM (GP2FP, 2),
 186   NAMED_PARAM (FP2GP, 2),
 187   /* We currently do not provide direct support for TFmode Q->Q move.
 188      Therefore we need to raise the cost above 2 in order to have
 189      reload handle the situation.  */
 190   NAMED_PARAM (FP2FP, 4)
 191 };
 192
 193 /* Generic costs for vector insn classes.  */
 194 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 195 __extension__
 196 #endif
 197 static const struct cpu_vector_cost generic_vector_cost =
 198 {
 199   NAMED_PARAM (scalar_stmt_cost, 1),
 200   NAMED_PARAM (scalar_load_cost, 1),
 201   NAMED_PARAM (scalar_store_cost, 1),
 202   NAMED_PARAM (vec_stmt_cost, 1),
 203   NAMED_PARAM (vec_to_scalar_cost, 1),
 204   NAMED_PARAM (scalar_to_vec_cost, 1),
 205   NAMED_PARAM (vec_align_load_cost, 1),
 206   NAMED_PARAM (vec_unalign_load_cost, 1),
 207   NAMED_PARAM (vec_unalign_store_cost, 1),
 208   NAMED_PARAM (vec_store_cost, 1),
 209   NAMED_PARAM (cond_taken_branch_cost, 3),
 210   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct tune_params generic_tunings =
 217 {
 218   &generic_rtx_cost_table,
 219   &generic_addrcost_table,
 220   &generic_regmove_cost,
 221   &generic_vector_cost,
 222   NAMED_PARAM (memmov_cost, 4)
 223 };
 224
 225 /* A processor implementing AArch64.  */
 226 struct processor
 227 {
 228   const char *const name;
 229   enum aarch64_processor core;
 230   const char *arch;
 231   const unsigned long flags;
 232   const struct tune_params *const tune;
 233 };
 234
 235 /* Processor cores implementing AArch64.  */
 236 static const struct processor all_cores[] =
 237 {
 238 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 239   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 240 #include "aarch64-cores.def"
 241 #undef AARCH64_CORE
 242   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 243   {NULL, aarch64_none, NULL, 0, NULL}
 244 };
 245
 246 /* Architectures implementing AArch64.  */
 247 static const struct processor all_architectures[] =
 248 {
 249 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 250   {NAME, CORE, #ARCH, FLAGS, NULL},
 251 #include "aarch64-arches.def"
 252 #undef AARCH64_ARCH
 253   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 254   {NULL, aarch64_none, NULL, 0, NULL}
 255 };
 256
 257 /* Target specification.  These are populated as commandline arguments
 258    are processed, or NULL if not specified.  */
 259 static const struct processor *selected_arch;
 260 static const struct processor *selected_cpu;
 261 static const struct processor *selected_tune;
 262
 263 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 264
 265 /* An ISA extension in the co-processor and main instruction set space.  */
 266 struct aarch64_option_extension
 267 {
 268   const char *const name;
 269   const unsigned long flags_on;
 270   const unsigned long flags_off;
 271 };
 272
 273 /* ISA extensions in AArch64.  */
 274 static const struct aarch64_option_extension all_extensions[] =
 275 {
 276 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 277   {NAME, FLAGS_ON, FLAGS_OFF},
 278 #include "aarch64-option-extensions.def"
 279 #undef AARCH64_OPT_EXTENSION
 280   {NULL, 0, 0}
 281 };
 282
 283 /* Used to track the size of an address when generating a pre/post
 284    increment address.  */
 285 static enum machine_mode aarch64_memory_reference_mode;
 286
 287 /* Used to force GTY into this file.  */
 288 static GTY(()) int gty_dummy;
 289
 290 /* A table of valid AArch64 "bitmask immediate" values for
 291    logical instructions.  */
 292
 293 #define AARCH64_NUM_BITMASKS  5334
 294 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 295
 296 /* Did we set flag_omit_frame_pointer just so
 297    aarch64_frame_pointer_required would be called? */
 298 static bool faked_omit_frame_pointer;
 299
 300 typedef enum aarch64_cond_code
 301 {
 302   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 303   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 304   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 305 }
 306 aarch64_cc;
 307
 308 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 309
 310 /* The condition codes of the processor, and the inverse function.  */
 311 static const char * const aarch64_condition_codes[] =
 312 {
 313   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 314   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 315 };
 316
 317 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 318 unsigned
 319 aarch64_dbx_register_number (unsigned regno)
 320 {
 321    if (GP_REGNUM_P (regno))
 322      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 323    else if (regno == SP_REGNUM)
 324      return AARCH64_DWARF_SP;
 325    else if (FP_REGNUM_P (regno))
 326      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 327
 328    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 329       equivalent DWARF register.  */
 330    return DWARF_FRAME_REGISTERS;
 331 }
 332
 333 /* Return TRUE if MODE is any of the large INT modes.  */
 334 static bool
 335 aarch64_vect_struct_mode_p (enum machine_mode mode)
 336 {
 337   return mode == OImode || mode == CImode || mode == XImode;
 338 }
 339
 340 /* Return TRUE if MODE is any of the vector modes.  */
 341 static bool
 342 aarch64_vector_mode_p (enum machine_mode mode)
 343 {
 344   return aarch64_vector_mode_supported_p (mode)
 345          || aarch64_vect_struct_mode_p (mode);
 346 }
 347
 348 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 349 static bool
 350 aarch64_array_mode_supported_p (enum machine_mode mode,
 351                                 unsigned HOST_WIDE_INT nelems)
 352 {
 353   if (TARGET_SIMD
 354       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 355       && (nelems >= 2 && nelems <= 4))
 356     return true;
 357
 358   return false;
 359 }
 360
 361 /* Implement HARD_REGNO_NREGS.  */
 362
 363 int
 364 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 365 {
 366   switch (aarch64_regno_regclass (regno))
 367     {
 368     case FP_REGS:
 369     case FP_LO_REGS:
 370       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 371     default:
 372       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 373     }
 374   gcc_unreachable ();
 375 }
 376
 377 /* Implement HARD_REGNO_MODE_OK.  */
 378
 379 int
 380 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 381 {
 382   if (GET_MODE_CLASS (mode) == MODE_CC)
 383     return regno == CC_REGNUM;
 384
 385   if (regno == SP_REGNUM)
 386     /* The purpose of comparing with ptr_mode is to support the
 387        global register variable associated with the stack pointer
 388        register via the syntax of asm ("wsp") in ILP32.  */
 389     return mode == Pmode || mode == ptr_mode;
 390
 391   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 392     return mode == Pmode;
 393
 394   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 395     return 1;
 396
 397   if (FP_REGNUM_P (regno))
 398     {
 399       if (aarch64_vect_struct_mode_p (mode))
 400         return
 401           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 402       else
 403         return 1;
 404     }
 405
 406   return 0;
 407 }
 408
 409 /* Return true if calls to DECL should be treated as
 410    long-calls (ie called via a register).  */
 411 static bool
 412 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 413 {
 414   return false;
 415 }
 416
 417 /* Return true if calls to symbol-ref SYM should be treated as
 418    long-calls (ie called via a register).  */
 419 bool
 420 aarch64_is_long_call_p (rtx sym)
 421 {
 422   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 423 }
 424
 425 /* Return true if the offsets to a zero/sign-extract operation
 426    represent an expression that matches an extend operation.  The
 427    operands represent the paramters from
 428
 429    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 430 bool
 431 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 432                                 rtx extract_imm)
 433 {
 434   HOST_WIDE_INT mult_val, extract_val;
 435
 436   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 437     return false;
 438
 439   mult_val = INTVAL (mult_imm);
 440   extract_val = INTVAL (extract_imm);
 441
 442   if (extract_val > 8
 443       && extract_val < GET_MODE_BITSIZE (mode)
 444       && exact_log2 (extract_val & ~7) > 0
 445       && (extract_val & 7) <= 4
 446       && mult_val == (1 << (extract_val & 7)))
 447     return true;
 448
 449   return false;
 450 }
 451
 452 /* Emit an insn that's a simple single-set.  Both the operands must be
 453    known to be valid.  */
 454 inline static rtx
 455 emit_set_insn (rtx x, rtx y)
 456 {
 457   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 458 }
 459
 460 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 461    return the rtx for register 0 in the proper mode.  */
 462 rtx
 463 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 464 {
 465   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 466   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 467
 468   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 469   return cc_reg;
 470 }
 471
 472 /* Build the SYMBOL_REF for __tls_get_addr.  */
 473
 474 static GTY(()) rtx tls_get_addr_libfunc;
 475
 476 rtx
 477 aarch64_tls_get_addr (void)
 478 {
 479   if (!tls_get_addr_libfunc)
 480     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 481   return tls_get_addr_libfunc;
 482 }
 483
 484 /* Return the TLS model to use for ADDR.  */
 485
 486 static enum tls_model
 487 tls_symbolic_operand_type (rtx addr)
 488 {
 489   enum tls_model tls_kind = TLS_MODEL_NONE;
 490   rtx sym, addend;
 491
 492   if (GET_CODE (addr) == CONST)
 493     {
 494       split_const (addr, &sym, &addend);
 495       if (GET_CODE (sym) == SYMBOL_REF)
 496         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 497     }
 498   else if (GET_CODE (addr) == SYMBOL_REF)
 499     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 500
 501   return tls_kind;
 502 }
 503
 504 /* We'll allow lo_sum's in addresses in our legitimate addresses
 505    so that combine would take care of combining addresses where
 506    necessary, but for generation purposes, we'll generate the address
 507    as :
 508    RTL                               Absolute
 509    tmp = hi (symbol_ref);            adrp  x1, foo
 510    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 511                                      nop
 512
 513    PIC                               TLS
 514    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 515    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 516                                      bl   __tls_get_addr
 517                                      nop
 518
 519    Load TLS symbol, depending on TLS mechanism and TLS access model.
 520
 521    Global Dynamic - Traditional TLS:
 522    adrp tmp, :tlsgd:imm
 523    add  dest, tmp, #:tlsgd_lo12:imm
 524    bl   __tls_get_addr
 525
 526    Global Dynamic - TLS Descriptors:
 527    adrp dest, :tlsdesc:imm
 528    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 529    add  dest, dest, #:tlsdesc_lo12:imm
 530    blr  tmp
 531    mrs  tp, tpidr_el0
 532    add  dest, dest, tp
 533
 534    Initial Exec:
 535    mrs  tp, tpidr_el0
 536    adrp tmp, :gottprel:imm
 537    ldr  dest, [tmp, #:gottprel_lo12:imm]
 538    add  dest, dest, tp
 539
 540    Local Exec:
 541    mrs  tp, tpidr_el0
 542    add  t0, tp, #:tprel_hi12:imm
 543    add  t0, #:tprel_lo12_nc:imm
 544 */
 545
 546 static void
 547 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 548                                    enum aarch64_symbol_type type)
 549 {
 550   switch (type)
 551     {
 552     case SYMBOL_SMALL_ABSOLUTE:
 553       {
 554         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 555         rtx tmp_reg = dest;
 556         enum machine_mode mode = GET_MODE (dest);
 557
 558         gcc_assert (mode == Pmode || mode == ptr_mode);
 559
 560         if (can_create_pseudo_p ())
 561           tmp_reg = gen_reg_rtx (mode);
 562
 563         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 564         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 565         return;
 566       }
 567
 568     case SYMBOL_TINY_ABSOLUTE:
 569       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 570       return;
 571
 572     case SYMBOL_SMALL_GOT:
 573       {
 574         /* In ILP32, the mode of dest can be either SImode or DImode,
 575            while the got entry is always of SImode size.  The mode of
 576            dest depends on how dest is used: if dest is assigned to a
 577            pointer (e.g. in the memory), it has SImode; it may have
 578            DImode if dest is dereferenced to access the memeory.
 579            This is why we have to handle three different ldr_got_small
 580            patterns here (two patterns for ILP32).  */
 581         rtx tmp_reg = dest;
 582         enum machine_mode mode = GET_MODE (dest);
 583
 584         if (can_create_pseudo_p ())
 585           tmp_reg = gen_reg_rtx (mode);
 586
 587         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 588         if (mode == ptr_mode)
 589           {
 590             if (mode == DImode)
 591               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 592             else
 593               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 594           }
 595         else
 596           {
 597             gcc_assert (mode == Pmode);
 598             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 599           }
 600
 601         return;
 602       }
 603
 604     case SYMBOL_SMALL_TLSGD:
 605       {
 606         rtx insns;
 607         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 608
 609         start_sequence ();
 610         emit_call_insn (gen_tlsgd_small (result, imm));
 611         insns = get_insns ();
 612         end_sequence ();
 613
 614         RTL_CONST_CALL_P (insns) = 1;
 615         emit_libcall_block (insns, dest, result, imm);
 616         return;
 617       }
 618
 619     case SYMBOL_SMALL_TLSDESC:
 620       {
 621         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 622         rtx tp;
 623
 624         emit_insn (gen_tlsdesc_small (imm));
 625         tp = aarch64_load_tp (NULL);
 626         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 627         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 628         return;
 629       }
 630
 631     case SYMBOL_SMALL_GOTTPREL:
 632       {
 633         rtx tmp_reg = gen_reg_rtx (Pmode);
 634         rtx tp = aarch64_load_tp (NULL);
 635         emit_insn (gen_tlsie_small (tmp_reg, imm));
 636         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 637         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 638         return;
 639       }
 640
 641     case SYMBOL_SMALL_TPREL:
 642       {
 643         rtx tp = aarch64_load_tp (NULL);
 644         emit_insn (gen_tlsle_small (dest, tp, imm));
 645         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 646         return;
 647       }
 648
 649     case SYMBOL_TINY_GOT:
 650       emit_insn (gen_ldr_got_tiny (dest, imm));
 651       return;
 652
 653     default:
 654       gcc_unreachable ();
 655     }
 656 }
 657
 658 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 659    handle all moves if !can_create_pseudo_p ().  The distinction is
 660    important because, unlike emit_move_insn, the move expanders know
 661    how to force Pmode objects into the constant pool even when the
 662    constant pool address is not itself legitimate.  */
 663 static rtx
 664 aarch64_emit_move (rtx dest, rtx src)
 665 {
 666   return (can_create_pseudo_p ()
 667           ? emit_move_insn (dest, src)
 668           : emit_move_insn_1 (dest, src));
 669 }
 670
 671 void
 672 aarch64_split_128bit_move (rtx dst, rtx src)
 673 {
 674   rtx low_dst;
 675
 676   enum machine_mode src_mode = GET_MODE (src);
 677   enum machine_mode dst_mode = GET_MODE (dst);
 678   int src_regno = REGNO (src);
 679   int dst_regno = REGNO (dst);
 680
 681   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 682
 683   if (REG_P (dst) && REG_P (src))
 684     {
 685       gcc_assert (src_mode == TImode || src_mode == TFmode);
 686
 687       /* Handle r -> w, w -> r.  */
 688       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 689         {
 690           switch (src_mode) {
 691           case TImode:
 692             emit_insn
 693               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 694             emit_insn
 695               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 696             return;
 697           case TFmode:
 698             emit_insn
 699               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 700             emit_insn
 701               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 702             return;
 703           default:
 704             gcc_unreachable ();
 705           }
 706         }
 707       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 708         {
 709           switch (src_mode) {
 710           case TImode:
 711             emit_insn
 712               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 713             emit_insn
 714               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 715             return;
 716           case TFmode:
 717             emit_insn
 718               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 719             emit_insn
 720               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 721             return;
 722           default:
 723             gcc_unreachable ();
 724           }
 725         }
 726       /* Fall through to r -> r cases.  */
 727     }
 728
 729   switch (dst_mode) {
 730   case TImode:
 731     low_dst = gen_lowpart (word_mode, dst);
 732     if (REG_P (low_dst)
 733         && reg_overlap_mentioned_p (low_dst, src))
 734       {
 735         aarch64_emit_move (gen_highpart (word_mode, dst),
 736                            gen_highpart_mode (word_mode, TImode, src));
 737         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 738       }
 739     else
 740       {
 741         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 742         aarch64_emit_move (gen_highpart (word_mode, dst),
 743                            gen_highpart_mode (word_mode, TImode, src));
 744       }
 745     return;
 746   case TFmode:
 747     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 748                     gen_rtx_REG (DFmode, src_regno));
 749     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 750                     gen_rtx_REG (DFmode, src_regno + 1));
 751     return;
 752   default:
 753     gcc_unreachable ();
 754   }
 755 }
 756
 757 bool
 758 aarch64_split_128bit_move_p (rtx dst, rtx src)
 759 {
 760   return (! REG_P (src)
 761           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 762 }
 763
 764 /* Split a complex SIMD combine.  */
 765
 766 void
 767 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 768 {
 769   enum machine_mode src_mode = GET_MODE (src1);
 770   enum machine_mode dst_mode = GET_MODE (dst);
 771
 772   gcc_assert (VECTOR_MODE_P (dst_mode));
 773
 774   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 775     {
 776       rtx (*gen) (rtx, rtx, rtx);
 777
 778       switch (src_mode)
 779         {
 780         case V8QImode:
 781           gen = gen_aarch64_simd_combinev8qi;
 782           break;
 783         case V4HImode:
 784           gen = gen_aarch64_simd_combinev4hi;
 785           break;
 786         case V2SImode:
 787           gen = gen_aarch64_simd_combinev2si;
 788           break;
 789         case V2SFmode:
 790           gen = gen_aarch64_simd_combinev2sf;
 791           break;
 792         case DImode:
 793           gen = gen_aarch64_simd_combinedi;
 794           break;
 795         case DFmode:
 796           gen = gen_aarch64_simd_combinedf;
 797           break;
 798         default:
 799           gcc_unreachable ();
 800         }
 801
 802       emit_insn (gen (dst, src1, src2));
 803       return;
 804     }
 805 }
 806
 807 /* Split a complex SIMD move.  */
 808
 809 void
 810 aarch64_split_simd_move (rtx dst, rtx src)
 811 {
 812   enum machine_mode src_mode = GET_MODE (src);
 813   enum machine_mode dst_mode = GET_MODE (dst);
 814
 815   gcc_assert (VECTOR_MODE_P (dst_mode));
 816
 817   if (REG_P (dst) && REG_P (src))
 818     {
 819       rtx (*gen) (rtx, rtx);
 820
 821       gcc_assert (VECTOR_MODE_P (src_mode));
 822
 823       switch (src_mode)
 824         {
 825         case V16QImode:
 826           gen = gen_aarch64_split_simd_movv16qi;
 827           break;
 828         case V8HImode:
 829           gen = gen_aarch64_split_simd_movv8hi;
 830           break;
 831         case V4SImode:
 832           gen = gen_aarch64_split_simd_movv4si;
 833           break;
 834         case V2DImode:
 835           gen = gen_aarch64_split_simd_movv2di;
 836           break;
 837         case V4SFmode:
 838           gen = gen_aarch64_split_simd_movv4sf;
 839           break;
 840         case V2DFmode:
 841           gen = gen_aarch64_split_simd_movv2df;
 842           break;
 843         default:
 844           gcc_unreachable ();
 845         }
 846
 847       emit_insn (gen (dst, src));
 848       return;
 849     }
 850 }
 851
 852 static rtx
 853 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 854 {
 855   if (can_create_pseudo_p ())
 856     return force_reg (mode, value);
 857   else
 858     {
 859       x = aarch64_emit_move (x, value);
 860       return x;
 861     }
 862 }
 863
 864
 865 static rtx
 866 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 867 {
 868   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 869     {
 870       rtx high;
 871       /* Load the full offset into a register.  This
 872          might be improvable in the future.  */
 873       high = GEN_INT (offset);
 874       offset = 0;
 875       high = aarch64_force_temporary (mode, temp, high);
 876       reg = aarch64_force_temporary (mode, temp,
 877                                      gen_rtx_PLUS (mode, high, reg));
 878     }
 879   return plus_constant (mode, reg, offset);
 880 }
 881
 882 void
 883 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 884 {
 885   enum machine_mode mode = GET_MODE (dest);
 886   unsigned HOST_WIDE_INT mask;
 887   int i;
 888   bool first;
 889   unsigned HOST_WIDE_INT val;
 890   bool subtargets;
 891   rtx subtarget;
 892   int one_match, zero_match;
 893
 894   gcc_assert (mode == SImode || mode == DImode);
 895
 896   /* Check on what type of symbol it is.  */
 897   if (GET_CODE (imm) == SYMBOL_REF
 898       || GET_CODE (imm) == LABEL_REF
 899       || GET_CODE (imm) == CONST)
 900     {
 901       rtx mem, base, offset;
 902       enum aarch64_symbol_type sty;
 903
 904       /* If we have (const (plus symbol offset)), separate out the offset
 905          before we start classifying the symbol.  */
 906       split_const (imm, &base, &offset);
 907
 908       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 909       switch (sty)
 910         {
 911         case SYMBOL_FORCE_TO_MEM:
 912           if (offset != const0_rtx
 913               && targetm.cannot_force_const_mem (mode, imm))
 914             {
 915               gcc_assert(can_create_pseudo_p ());
 916               base = aarch64_force_temporary (mode, dest, base);
 917               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 918               aarch64_emit_move (dest, base);
 919               return;
 920             }
 921           mem = force_const_mem (ptr_mode, imm);
 922           gcc_assert (mem);
 923           if (mode != ptr_mode)
 924             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 925           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 926           return;
 927
 928         case SYMBOL_SMALL_TLSGD:
 929         case SYMBOL_SMALL_TLSDESC:
 930         case SYMBOL_SMALL_GOTTPREL:
 931         case SYMBOL_SMALL_GOT:
 932         case SYMBOL_TINY_GOT:
 933           if (offset != const0_rtx)
 934             {
 935               gcc_assert(can_create_pseudo_p ());
 936               base = aarch64_force_temporary (mode, dest, base);
 937               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 938               aarch64_emit_move (dest, base);
 939               return;
 940             }
 941           /* FALLTHRU */
 942
 943         case SYMBOL_SMALL_TPREL:
 944         case SYMBOL_SMALL_ABSOLUTE:
 945         case SYMBOL_TINY_ABSOLUTE:
 946           aarch64_load_symref_appropriately (dest, imm, sty);
 947           return;
 948
 949         default:
 950           gcc_unreachable ();
 951         }
 952     }
 953
 954   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 955     {
 956       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 957       return;
 958     }
 959
 960   if (!CONST_INT_P (imm))
 961     {
 962       if (GET_CODE (imm) == HIGH)
 963         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 964       else
 965         {
 966           rtx mem = force_const_mem (mode, imm);
 967           gcc_assert (mem);
 968           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 969         }
 970
 971       return;
 972     }
 973
 974   if (mode == SImode)
 975     {
 976       /* We know we can't do this in 1 insn, and we must be able to do it
 977          in two; so don't mess around looking for sequences that don't buy
 978          us anything.  */
 979       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 980       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 981                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 982       return;
 983     }
 984
 985   /* Remaining cases are all for DImode.  */
 986
 987   val = INTVAL (imm);
 988   subtargets = optimize && can_create_pseudo_p ();
 989
 990   one_match = 0;
 991   zero_match = 0;
 992   mask = 0xffff;
 993
 994   for (i = 0; i < 64; i += 16, mask <<= 16)
 995     {
 996       if ((val & mask) == 0)
 997         zero_match++;
 998       else if ((val & mask) == mask)
 999         one_match++;
1000     }
1001
1002   if (one_match == 2)
1003     {
1004       mask = 0xffff;
1005       for (i = 0; i < 64; i += 16, mask <<= 16)
1006         {
1007           if ((val & mask) != mask)
1008             {
1009               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1010               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1011                                          GEN_INT ((val >> i) & 0xffff)));
1012               return;
1013             }
1014         }
1015       gcc_unreachable ();
1016     }
1017
1018   if (zero_match == 2)
1019     goto simple_sequence;
1020
1021   mask = 0x0ffff0000UL;
1022   for (i = 16; i < 64; i += 16, mask <<= 16)
1023     {
1024       HOST_WIDE_INT comp = mask & ~(mask - 1);
1025
1026       if (aarch64_uimm12_shift (val - (val & mask)))
1027         {
1028           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1029
1030           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1031           emit_insn (gen_adddi3 (dest, subtarget,
1032                                  GEN_INT (val - (val & mask))));
1033           return;
1034         }
1035       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1036         {
1037           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1038
1039           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1040                                   GEN_INT ((val + comp) & mask)));
1041           emit_insn (gen_adddi3 (dest, subtarget,
1042                                  GEN_INT (val - ((val + comp) & mask))));
1043           return;
1044         }
1045       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1046         {
1047           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050                                   GEN_INT ((val - comp) | ~mask)));
1051           emit_insn (gen_adddi3 (dest, subtarget,
1052                                  GEN_INT (val - ((val - comp) | ~mask))));
1053           return;
1054         }
1055       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1056         {
1057           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060                                   GEN_INT (val | ~mask)));
1061           emit_insn (gen_adddi3 (dest, subtarget,
1062                                  GEN_INT (val - (val | ~mask))));
1063           return;
1064         }
1065     }
1066
1067   /* See if we can do it by arithmetically combining two
1068      immediates.  */
1069   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1070     {
1071       int j;
1072       mask = 0xffff;
1073
1074       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1075           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1076         {
1077           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1078           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1079                                   GEN_INT (aarch64_bitmasks[i])));
1080           emit_insn (gen_adddi3 (dest, subtarget,
1081                                  GEN_INT (val - aarch64_bitmasks[i])));
1082           return;
1083         }
1084
1085       for (j = 0; j < 64; j += 16, mask <<= 16)
1086         {
1087           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1088             {
1089               emit_insn (gen_rtx_SET (VOIDmode, dest,
1090                                       GEN_INT (aarch64_bitmasks[i])));
1091               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1092                                          GEN_INT ((val >> j) & 0xffff)));
1093               return;
1094             }
1095         }
1096     }
1097
1098   /* See if we can do it by logically combining two immediates.  */
1099   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1100     {
1101       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1102         {
1103           int j;
1104
1105           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1106             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1107               {
1108                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1109                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1110                                         GEN_INT (aarch64_bitmasks[i])));
1111                 emit_insn (gen_iordi3 (dest, subtarget,
1112                                        GEN_INT (aarch64_bitmasks[j])));
1113                 return;
1114               }
1115         }
1116       else if ((val & aarch64_bitmasks[i]) == val)
1117         {
1118           int j;
1119
1120           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1121             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1122               {
1123
1124                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1125                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1126                                         GEN_INT (aarch64_bitmasks[j])));
1127                 emit_insn (gen_anddi3 (dest, subtarget,
1128                                        GEN_INT (aarch64_bitmasks[i])));
1129                 return;
1130               }
1131         }
1132     }
1133
1134  simple_sequence:
1135   first = true;
1136   mask = 0xffff;
1137   for (i = 0; i < 64; i += 16, mask <<= 16)
1138     {
1139       if ((val & mask) != 0)
1140         {
1141           if (first)
1142             {
1143               emit_insn (gen_rtx_SET (VOIDmode, dest,
1144                                       GEN_INT (val & mask)));
1145               first = false;
1146             }
1147           else
1148             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1149                                        GEN_INT ((val >> i) & 0xffff)));
1150         }
1151     }
1152 }
1153
1154 static bool
1155 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1156 {
1157   /* Indirect calls are not currently supported.  */
1158   if (decl == NULL)
1159     return false;
1160
1161   /* Cannot tail-call to long-calls, since these are outside of the
1162      range of a branch instruction (we could handle this if we added
1163      support for indirect tail-calls.  */
1164   if (aarch64_decl_is_long_call_p (decl))
1165     return false;
1166
1167   return true;
1168 }
1169
1170 /* Implement TARGET_PASS_BY_REFERENCE.  */
1171
1172 static bool
1173 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1174                            enum machine_mode mode,
1175                            const_tree type,
1176                            bool named ATTRIBUTE_UNUSED)
1177 {
1178   HOST_WIDE_INT size;
1179   enum machine_mode dummymode;
1180   int nregs;
1181
1182   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1183   size = (mode == BLKmode && type)
1184     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1185
1186   if (type)
1187     {
1188       /* Arrays always passed by reference.  */
1189       if (TREE_CODE (type) == ARRAY_TYPE)
1190         return true;
1191       /* Other aggregates based on their size.  */
1192       if (AGGREGATE_TYPE_P (type))
1193         size = int_size_in_bytes (type);
1194     }
1195
1196   /* Variable sized arguments are always returned by reference.  */
1197   if (size < 0)
1198     return true;
1199
1200   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1201   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1202                                                &dummymode, &nregs,
1203                                                NULL))
1204     return false;
1205
1206   /* Arguments which are variable sized or larger than 2 registers are
1207      passed by reference unless they are a homogenous floating point
1208      aggregate.  */
1209   return size > 2 * UNITS_PER_WORD;
1210 }
1211
1212 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1213 static bool
1214 aarch64_return_in_msb (const_tree valtype)
1215 {
1216   enum machine_mode dummy_mode;
1217   int dummy_int;
1218
1219   /* Never happens in little-endian mode.  */
1220   if (!BYTES_BIG_ENDIAN)
1221     return false;
1222
1223   /* Only composite types smaller than or equal to 16 bytes can
1224      be potentially returned in registers.  */
1225   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1226       || int_size_in_bytes (valtype) <= 0
1227       || int_size_in_bytes (valtype) > 16)
1228     return false;
1229
1230   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1231      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1232      is always passed/returned in the least significant bits of fp/simd
1233      register(s).  */
1234   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1235                                                &dummy_mode, &dummy_int, NULL))
1236     return false;
1237
1238   return true;
1239 }
1240
1241 /* Implement TARGET_FUNCTION_VALUE.
1242    Define how to find the value returned by a function.  */
1243
1244 static rtx
1245 aarch64_function_value (const_tree type, const_tree func,
1246                         bool outgoing ATTRIBUTE_UNUSED)
1247 {
1248   enum machine_mode mode;
1249   int unsignedp;
1250   int count;
1251   enum machine_mode ag_mode;
1252
1253   mode = TYPE_MODE (type);
1254   if (INTEGRAL_TYPE_P (type))
1255     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1256
1257   if (aarch64_return_in_msb (type))
1258     {
1259       HOST_WIDE_INT size = int_size_in_bytes (type);
1260
1261       if (size % UNITS_PER_WORD != 0)
1262         {
1263           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1264           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1265         }
1266     }
1267
1268   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1269                                                &ag_mode, &count, NULL))
1270     {
1271       if (!aarch64_composite_type_p (type, mode))
1272         {
1273           gcc_assert (count == 1 && mode == ag_mode);
1274           return gen_rtx_REG (mode, V0_REGNUM);
1275         }
1276       else
1277         {
1278           int i;
1279           rtx par;
1280
1281           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1282           for (i = 0; i < count; i++)
1283             {
1284               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1285               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1286                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1287               XVECEXP (par, 0, i) = tmp;
1288             }
1289           return par;
1290         }
1291     }
1292   else
1293     return gen_rtx_REG (mode, R0_REGNUM);
1294 }
1295
1296 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1297    Return true if REGNO is the number of a hard register in which the values
1298    of called function may come back.  */
1299
1300 static bool
1301 aarch64_function_value_regno_p (const unsigned int regno)
1302 {
1303   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1304      of 16-byte return values are: 128-bit integers and 16-byte small
1305      structures (excluding homogeneous floating-point aggregates).  */
1306   if (regno == R0_REGNUM || regno == R1_REGNUM)
1307     return true;
1308
1309   /* Up to four fp/simd registers can return a function value, e.g. a
1310      homogeneous floating-point aggregate having four members.  */
1311   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1312     return !TARGET_GENERAL_REGS_ONLY;
1313
1314   return false;
1315 }
1316
1317 /* Implement TARGET_RETURN_IN_MEMORY.
1318
1319    If the type T of the result of a function is such that
1320      void func (T arg)
1321    would require that arg be passed as a value in a register (or set of
1322    registers) according to the parameter passing rules, then the result
1323    is returned in the same registers as would be used for such an
1324    argument.  */
1325
1326 static bool
1327 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1328 {
1329   HOST_WIDE_INT size;
1330   enum machine_mode ag_mode;
1331   int count;
1332
1333   if (!AGGREGATE_TYPE_P (type)
1334       && TREE_CODE (type) != COMPLEX_TYPE
1335       && TREE_CODE (type) != VECTOR_TYPE)
1336     /* Simple scalar types always returned in registers.  */
1337     return false;
1338
1339   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1340                                                type,
1341                                                &ag_mode,
1342                                                &count,
1343                                                NULL))
1344     return false;
1345
1346   /* Types larger than 2 registers returned in memory.  */
1347   size = int_size_in_bytes (type);
1348   return (size < 0 || size > 2 * UNITS_PER_WORD);
1349 }
1350
1351 static bool
1352 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1353                                const_tree type, int *nregs)
1354 {
1355   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1356   return aarch64_vfp_is_call_or_return_candidate (mode,
1357                                                   type,
1358                                                   &pcum->aapcs_vfp_rmode,
1359                                                   nregs,
1360                                                   NULL);
1361 }
1362
1363 /* Given MODE and TYPE of a function argument, return the alignment in
1364    bits.  The idea is to suppress any stronger alignment requested by
1365    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1366    This is a helper function for local use only.  */
1367
1368 static unsigned int
1369 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1370 {
1371   unsigned int alignment;
1372
1373   if (type)
1374     {
1375       if (!integer_zerop (TYPE_SIZE (type)))
1376         {
1377           if (TYPE_MODE (type) == mode)
1378             alignment = TYPE_ALIGN (type);
1379           else
1380             alignment = GET_MODE_ALIGNMENT (mode);
1381         }
1382       else
1383         alignment = 0;
1384     }
1385   else
1386     alignment = GET_MODE_ALIGNMENT (mode);
1387
1388   return alignment;
1389 }
1390
1391 /* Layout a function argument according to the AAPCS64 rules.  The rule
1392    numbers refer to the rule numbers in the AAPCS64.  */
1393
1394 static void
1395 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1396                     const_tree type,
1397                     bool named ATTRIBUTE_UNUSED)
1398 {
1399   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1400   int ncrn, nvrn, nregs;
1401   bool allocate_ncrn, allocate_nvrn;
1402
1403   /* We need to do this once per argument.  */
1404   if (pcum->aapcs_arg_processed)
1405     return;
1406
1407   pcum->aapcs_arg_processed = true;
1408
1409   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1410   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1411                                                  mode,
1412                                                  type,
1413                                                  &nregs);
1414
1415   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1416      The following code thus handles passing by SIMD/FP registers first.  */
1417
1418   nvrn = pcum->aapcs_nvrn;
1419
1420   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1421      and homogenous short-vector aggregates (HVA).  */
1422   if (allocate_nvrn)
1423     {
1424       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1425         {
1426           pcum->aapcs_nextnvrn = nvrn + nregs;
1427           if (!aarch64_composite_type_p (type, mode))
1428             {
1429               gcc_assert (nregs == 1);
1430               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1431             }
1432           else
1433             {
1434               rtx par;
1435               int i;
1436               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1437               for (i = 0; i < nregs; i++)
1438                 {
1439                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1440                                          V0_REGNUM + nvrn + i);
1441                   tmp = gen_rtx_EXPR_LIST
1442                     (VOIDmode, tmp,
1443                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1444                   XVECEXP (par, 0, i) = tmp;
1445                 }
1446               pcum->aapcs_reg = par;
1447             }
1448           return;
1449         }
1450       else
1451         {
1452           /* C.3 NSRN is set to 8.  */
1453           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1454           goto on_stack;
1455         }
1456     }
1457
1458   ncrn = pcum->aapcs_ncrn;
1459   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1460            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1461
1462
1463   /* C6 - C9.  though the sign and zero extension semantics are
1464      handled elsewhere.  This is the case where the argument fits
1465      entirely general registers.  */
1466   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1467     {
1468       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1469
1470       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1471
1472       /* C.8 if the argument has an alignment of 16 then the NGRN is
1473          rounded up to the next even number.  */
1474       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1475         {
1476           ++ncrn;
1477           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1478         }
1479       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1480          A reg is still generated for it, but the caller should be smart
1481          enough not to use it.  */
1482       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1483         {
1484           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1485         }
1486       else
1487         {
1488           rtx par;
1489           int i;
1490
1491           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1492           for (i = 0; i < nregs; i++)
1493             {
1494               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1495               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1496                                        GEN_INT (i * UNITS_PER_WORD));
1497               XVECEXP (par, 0, i) = tmp;
1498             }
1499           pcum->aapcs_reg = par;
1500         }
1501
1502       pcum->aapcs_nextncrn = ncrn + nregs;
1503       return;
1504     }
1505
1506   /* C.11  */
1507   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1508
1509   /* The argument is passed on stack; record the needed number of words for
1510      this argument (we can re-use NREGS) and align the total size if
1511      necessary.  */
1512 on_stack:
1513   pcum->aapcs_stack_words = nregs;
1514   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1515     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1516                                                16 / UNITS_PER_WORD) + 1;
1517   return;
1518 }
1519
1520 /* Implement TARGET_FUNCTION_ARG.  */
1521
1522 static rtx
1523 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1524                       const_tree type, bool named)
1525 {
1526   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1527   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1528
1529   if (mode == VOIDmode)
1530     return NULL_RTX;
1531
1532   aarch64_layout_arg (pcum_v, mode, type, named);
1533   return pcum->aapcs_reg;
1534 }
1535
1536 void
1537 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1538                            const_tree fntype ATTRIBUTE_UNUSED,
1539                            rtx libname ATTRIBUTE_UNUSED,
1540                            const_tree fndecl ATTRIBUTE_UNUSED,
1541                            unsigned n_named ATTRIBUTE_UNUSED)
1542 {
1543   pcum->aapcs_ncrn = 0;
1544   pcum->aapcs_nvrn = 0;
1545   pcum->aapcs_nextncrn = 0;
1546   pcum->aapcs_nextnvrn = 0;
1547   pcum->pcs_variant = ARM_PCS_AAPCS64;
1548   pcum->aapcs_reg = NULL_RTX;
1549   pcum->aapcs_arg_processed = false;
1550   pcum->aapcs_stack_words = 0;
1551   pcum->aapcs_stack_size = 0;
1552
1553   return;
1554 }
1555
1556 static void
1557 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1558                               enum machine_mode mode,
1559                               const_tree type,
1560                               bool named)
1561 {
1562   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1563   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1564     {
1565       aarch64_layout_arg (pcum_v, mode, type, named);
1566       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1567                   != (pcum->aapcs_stack_words != 0));
1568       pcum->aapcs_arg_processed = false;
1569       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1570       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1571       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1572       pcum->aapcs_stack_words = 0;
1573       pcum->aapcs_reg = NULL_RTX;
1574     }
1575 }
1576
1577 bool
1578 aarch64_function_arg_regno_p (unsigned regno)
1579 {
1580   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1581           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1582 }
1583
1584 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1585    PARM_BOUNDARY bits of alignment, but will be given anything up
1586    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1587    that both before and after the layout of each argument, the Next
1588    Stacked Argument Address (NSAA) will have a minimum alignment of
1589    8 bytes.  */
1590
1591 static unsigned int
1592 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1593 {
1594   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1595
1596   if (alignment < PARM_BOUNDARY)
1597     alignment = PARM_BOUNDARY;
1598   if (alignment > STACK_BOUNDARY)
1599     alignment = STACK_BOUNDARY;
1600   return alignment;
1601 }
1602
1603 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1604
1605    Return true if an argument passed on the stack should be padded upwards,
1606    i.e. if the least-significant byte of the stack slot has useful data.
1607
1608    Small aggregate types are placed in the lowest memory address.
1609
1610    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1611
1612 bool
1613 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1614 {
1615   /* On little-endian targets, the least significant byte of every stack
1616      argument is passed at the lowest byte address of the stack slot.  */
1617   if (!BYTES_BIG_ENDIAN)
1618     return true;
1619
1620   /* Otherwise, integral, floating-point and pointer types are padded downward:
1621      the least significant byte of a stack argument is passed at the highest
1622      byte address of the stack slot.  */
1623   if (type
1624       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1625          || POINTER_TYPE_P (type))
1626       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1627     return false;
1628
1629   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1630   return true;
1631 }
1632
1633 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1634
1635    It specifies padding for the last (may also be the only)
1636    element of a block move between registers and memory.  If
1637    assuming the block is in the memory, padding upward means that
1638    the last element is padded after its highest significant byte,
1639    while in downward padding, the last element is padded at the
1640    its least significant byte side.
1641
1642    Small aggregates and small complex types are always padded
1643    upwards.
1644
1645    We don't need to worry about homogeneous floating-point or
1646    short-vector aggregates; their move is not affected by the
1647    padding direction determined here.  Regardless of endianness,
1648    each element of such an aggregate is put in the least
1649    significant bits of a fp/simd register.
1650
1651    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1652    register has useful data, and return the opposite if the most
1653    significant byte does.  */
1654
1655 bool
1656 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1657                      bool first ATTRIBUTE_UNUSED)
1658 {
1659
1660   /* Small composite types are always padded upward.  */
1661   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1662     {
1663       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1664                             : GET_MODE_SIZE (mode));
1665       if (size < 2 * UNITS_PER_WORD)
1666         return true;
1667     }
1668
1669   /* Otherwise, use the default padding.  */
1670   return !BYTES_BIG_ENDIAN;
1671 }
1672
1673 static enum machine_mode
1674 aarch64_libgcc_cmp_return_mode (void)
1675 {
1676   return SImode;
1677 }
1678
1679 static bool
1680 aarch64_frame_pointer_required (void)
1681 {
1682   /* If the function contains dynamic stack allocations, we need to
1683      use the frame pointer to access the static parts of the frame.  */
1684   if (cfun->calls_alloca)
1685     return true;
1686
1687   /* We may have turned flag_omit_frame_pointer on in order to have this
1688      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1689      and we'll check it here.
1690      If we really did set flag_omit_frame_pointer normally, then we return false
1691      (no frame pointer required) in all cases.  */
1692
1693   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1694     return false;
1695   else if (flag_omit_leaf_frame_pointer)
1696     return !crtl->is_leaf;
1697   return true;
1698 }
1699
1700 /* Mark the registers that need to be saved by the callee and calculate
1701    the size of the callee-saved registers area and frame record (both FP
1702    and LR may be omitted).  */
1703 static void
1704 aarch64_layout_frame (void)
1705 {
1706   HOST_WIDE_INT offset = 0;
1707   int regno;
1708
1709   if (reload_completed && cfun->machine->frame.laid_out)
1710     return;
1711
1712   cfun->machine->frame.fp_lr_offset = 0;
1713
1714   /* First mark all the registers that really need to be saved...  */
1715   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1716     cfun->machine->frame.reg_offset[regno] = -1;
1717
1718   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1719     cfun->machine->frame.reg_offset[regno] = -1;
1720
1721   /* ... that includes the eh data registers (if needed)...  */
1722   if (crtl->calls_eh_return)
1723     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1724       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1725
1726   /* ... and any callee saved register that dataflow says is live.  */
1727   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1728     if (df_regs_ever_live_p (regno)
1729         && !call_used_regs[regno])
1730       cfun->machine->frame.reg_offset[regno] = 0;
1731
1732   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1733     if (df_regs_ever_live_p (regno)
1734         && !call_used_regs[regno])
1735       cfun->machine->frame.reg_offset[regno] = 0;
1736
1737   if (frame_pointer_needed)
1738     {
1739       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1740       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1741       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1742     }
1743
1744   /* Now assign stack slots for them.  */
1745   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1746     if (cfun->machine->frame.reg_offset[regno] != -1)
1747       {
1748         cfun->machine->frame.reg_offset[regno] = offset;
1749         offset += UNITS_PER_WORD;
1750       }
1751
1752   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1753     if (cfun->machine->frame.reg_offset[regno] != -1)
1754       {
1755         cfun->machine->frame.reg_offset[regno] = offset;
1756         offset += UNITS_PER_WORD;
1757       }
1758
1759   if (frame_pointer_needed)
1760     {
1761       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1762       offset += UNITS_PER_WORD;
1763       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1764     }
1765
1766   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1767     {
1768       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1769       offset += UNITS_PER_WORD;
1770       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1771     }
1772
1773   cfun->machine->frame.padding0 =
1774     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1775   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1776
1777   cfun->machine->frame.saved_regs_size = offset;
1778   cfun->machine->frame.laid_out = true;
1779 }
1780
1781 /* Make the last instruction frame-related and note that it performs
1782    the operation described by FRAME_PATTERN.  */
1783
1784 static void
1785 aarch64_set_frame_expr (rtx frame_pattern)
1786 {
1787   rtx insn;
1788
1789   insn = get_last_insn ();
1790   RTX_FRAME_RELATED_P (insn) = 1;
1791   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1792   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1793                                       frame_pattern,
1794                                       REG_NOTES (insn));
1795 }
1796
1797 static bool
1798 aarch64_register_saved_on_entry (int regno)
1799 {
1800   return cfun->machine->frame.reg_offset[regno] != -1;
1801 }
1802
1803
1804 static void
1805 aarch64_save_or_restore_fprs (int start_offset, int increment,
1806                               bool restore, rtx base_rtx)
1807
1808 {
1809   unsigned regno;
1810   unsigned regno2;
1811   rtx insn;
1812   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1813
1814
1815   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1816     {
1817       if (aarch64_register_saved_on_entry (regno))
1818         {
1819           rtx mem;
1820           mem = gen_mem_ref (DFmode,
1821                              plus_constant (Pmode,
1822                                             base_rtx,
1823                                             start_offset));
1824
1825           for (regno2 = regno + 1;
1826                regno2 <= V31_REGNUM
1827                  && !aarch64_register_saved_on_entry (regno2);
1828                regno2++)
1829             {
1830               /* Empty loop.  */
1831             }
1832           if (regno2 <= V31_REGNUM &&
1833               aarch64_register_saved_on_entry (regno2))
1834             {
1835               rtx mem2;
1836               /* Next highest register to be saved.  */
1837               mem2 = gen_mem_ref (DFmode,
1838                                   plus_constant
1839                                   (Pmode,
1840                                    base_rtx,
1841                                    start_offset + increment));
1842               if (restore == false)
1843                 {
1844                   insn = emit_insn
1845                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1846                                         mem2, gen_rtx_REG (DFmode, regno2)));
1847
1848                 }
1849               else
1850                 {
1851                   insn = emit_insn
1852                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1853                                        gen_rtx_REG (DFmode, regno2), mem2));
1854
1855                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1856                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1857                 }
1858
1859                   /* The first part of a frame-related parallel insn
1860                      is always assumed to be relevant to the frame
1861                      calculations; subsequent parts, are only
1862                      frame-related if explicitly marked.  */
1863               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1864                                             1)) = 1;
1865               regno = regno2;
1866               start_offset += increment * 2;
1867             }
1868           else
1869             {
1870               if (restore == false)
1871                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1872               else
1873                 {
1874                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1875                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1876                 }
1877               start_offset += increment;
1878             }
1879           RTX_FRAME_RELATED_P (insn) = 1;
1880         }
1881     }
1882
1883 }
1884
1885
1886 /* offset from the stack pointer of where the saves and
1887    restore's have to happen.  */
1888 static void
1889 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1890                                             bool restore)
1891 {
1892   rtx insn;
1893   rtx base_rtx = stack_pointer_rtx;
1894   HOST_WIDE_INT start_offset = offset;
1895   HOST_WIDE_INT increment = UNITS_PER_WORD;
1896   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1897   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1898   unsigned regno;
1899   unsigned regno2;
1900
1901   for (regno = R0_REGNUM; regno <= limit; regno++)
1902     {
1903       if (aarch64_register_saved_on_entry (regno))
1904         {
1905           rtx mem;
1906           mem = gen_mem_ref (Pmode,
1907                              plus_constant (Pmode,
1908                                             base_rtx,
1909                                             start_offset));
1910
1911           for (regno2 = regno + 1;
1912                regno2 <= limit
1913                  && !aarch64_register_saved_on_entry (regno2);
1914                regno2++)
1915             {
1916               /* Empty loop.  */
1917             }
1918           if (regno2 <= limit &&
1919               aarch64_register_saved_on_entry (regno2))
1920             {
1921               rtx mem2;
1922               /* Next highest register to be saved.  */
1923               mem2 = gen_mem_ref (Pmode,
1924                                   plus_constant
1925                                   (Pmode,
1926                                    base_rtx,
1927                                    start_offset + increment));
1928               if (restore == false)
1929                 {
1930                   insn = emit_insn
1931                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1932                                         mem2, gen_rtx_REG (DImode, regno2)));
1933
1934                 }
1935               else
1936                 {
1937                   insn = emit_insn
1938                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1939                                      gen_rtx_REG (DImode, regno2), mem2));
1940
1941                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1942                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1943                 }
1944
1945                   /* The first part of a frame-related parallel insn
1946                      is always assumed to be relevant to the frame
1947                      calculations; subsequent parts, are only
1948                      frame-related if explicitly marked.  */
1949               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1950                                             1)) = 1;
1951               regno = regno2;
1952               start_offset += increment * 2;
1953             }
1954           else
1955             {
1956               if (restore == false)
1957                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1958               else
1959                 {
1960                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1961                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1962                 }
1963               start_offset += increment;
1964             }
1965           RTX_FRAME_RELATED_P (insn) = 1;
1966         }
1967     }
1968
1969   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1970
1971 }
1972
1973 /* AArch64 stack frames generated by this compiler look like:
1974
1975         +-------------------------------+
1976         |                               |
1977         |  incoming stack arguments     |
1978         |                               |
1979         +-------------------------------+ <-- arg_pointer_rtx
1980         |                               |
1981         |  callee-allocated save area   |
1982         |  for register varargs         |
1983         |                               |
1984         +-------------------------------+
1985         |                               |
1986         |  local variables              |
1987         |                               |
1988         +-------------------------------+ <-- frame_pointer_rtx
1989         |                               |
1990         |  callee-saved registers       |
1991         |                               |
1992         +-------------------------------+
1993         |  LR'                          |
1994         +-------------------------------+
1995         |  FP'                          |
1996       P +-------------------------------+ <-- hard_frame_pointer_rtx
1997         |  dynamic allocation           |
1998         +-------------------------------+
1999         |                               |
2000         |  outgoing stack arguments     |
2001         |                               |
2002         +-------------------------------+ <-- stack_pointer_rtx
2003
2004    Dynamic stack allocations such as alloca insert data at point P.
2005    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2006    hard_frame_pointer_rtx unchanged.  */
2007
2008 /* Generate the prologue instructions for entry into a function.
2009    Establish the stack frame by decreasing the stack pointer with a
2010    properly calculated size and, if necessary, create a frame record
2011    filled with the values of LR and previous frame pointer.  The
2012    current FP is also set up is it is in use.  */
2013
2014 void
2015 aarch64_expand_prologue (void)
2016 {
2017   /* sub sp, sp, #<frame_size>
2018      stp {fp, lr}, [sp, #<frame_size> - 16]
2019      add fp, sp, #<frame_size> - hardfp_offset
2020      stp {cs_reg}, [fp, #-16] etc.
2021
2022      sub sp, sp, <final_adjustment_if_any>
2023   */
2024   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2025   HOST_WIDE_INT frame_size, offset;
2026   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2027   rtx insn;
2028
2029   aarch64_layout_frame ();
2030   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2031   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2032               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2033   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2034                 + crtl->outgoing_args_size);
2035   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2036                                           STACK_BOUNDARY / BITS_PER_UNIT);
2037
2038   if (flag_stack_usage_info)
2039     current_function_static_stack_size = frame_size;
2040
2041   fp_offset = (offset
2042                - original_frame_size
2043                - cfun->machine->frame.saved_regs_size);
2044
2045   /* Store pairs and load pairs have a range only -512 to 504.  */
2046   if (offset >= 512)
2047     {
2048       /* When the frame has a large size, an initial decrease is done on
2049          the stack pointer to jump over the callee-allocated save area for
2050          register varargs, the local variable area and/or the callee-saved
2051          register area.  This will allow the pre-index write-back
2052          store pair instructions to be used for setting up the stack frame
2053          efficiently.  */
2054       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2055       if (offset >= 512)
2056         offset = cfun->machine->frame.saved_regs_size;
2057
2058       frame_size -= (offset + crtl->outgoing_args_size);
2059       fp_offset = 0;
2060
2061       if (frame_size >= 0x1000000)
2062         {
2063           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2064           emit_move_insn (op0, GEN_INT (-frame_size));
2065           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2066           aarch64_set_frame_expr (gen_rtx_SET
2067                                   (Pmode, stack_pointer_rtx,
2068                                    plus_constant (Pmode,
2069                                                   stack_pointer_rtx,
2070                                                   -frame_size)));
2071         }
2072       else if (frame_size > 0)
2073         {
2074           if ((frame_size & 0xfff) != frame_size)
2075             {
2076               insn = emit_insn (gen_add2_insn
2077                                 (stack_pointer_rtx,
2078                                  GEN_INT (-(frame_size
2079                                             & ~(HOST_WIDE_INT)0xfff))));
2080               RTX_FRAME_RELATED_P (insn) = 1;
2081             }
2082           if ((frame_size & 0xfff) != 0)
2083             {
2084               insn = emit_insn (gen_add2_insn
2085                                 (stack_pointer_rtx,
2086                                  GEN_INT (-(frame_size
2087                                             & (HOST_WIDE_INT)0xfff))));
2088               RTX_FRAME_RELATED_P (insn) = 1;
2089             }
2090         }
2091     }
2092   else
2093     frame_size = -1;
2094
2095   if (offset > 0)
2096     {
2097       /* Save the frame pointer and lr if the frame pointer is needed
2098          first.  Make the frame pointer point to the location of the
2099          old frame pointer on the stack.  */
2100       if (frame_pointer_needed)
2101         {
2102           rtx mem_fp, mem_lr;
2103
2104           if (fp_offset)
2105             {
2106               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2107                                                GEN_INT (-offset)));
2108               RTX_FRAME_RELATED_P (insn) = 1;
2109               aarch64_set_frame_expr (gen_rtx_SET
2110                                       (Pmode, stack_pointer_rtx,
2111                                        gen_rtx_MINUS (Pmode,
2112                                                       stack_pointer_rtx,
2113                                                       GEN_INT (offset))));
2114               mem_fp = gen_frame_mem (DImode,
2115                                       plus_constant (Pmode,
2116                                                      stack_pointer_rtx,
2117                                                      fp_offset));
2118               mem_lr = gen_frame_mem (DImode,
2119                                       plus_constant (Pmode,
2120                                                      stack_pointer_rtx,
2121                                                      fp_offset
2122                                                      + UNITS_PER_WORD));
2123               insn = emit_insn (gen_store_pairdi (mem_fp,
2124                                                   hard_frame_pointer_rtx,
2125                                                   mem_lr,
2126                                                   gen_rtx_REG (DImode,
2127                                                                LR_REGNUM)));
2128             }
2129           else
2130             {
2131               insn = emit_insn (gen_storewb_pairdi_di
2132                                 (stack_pointer_rtx, stack_pointer_rtx,
2133                                  hard_frame_pointer_rtx,
2134                                  gen_rtx_REG (DImode, LR_REGNUM),
2135                                  GEN_INT (-offset),
2136                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2137               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2138             }
2139
2140           /* The first part of a frame-related parallel insn is always
2141              assumed to be relevant to the frame calculations;
2142              subsequent parts, are only frame-related if explicitly
2143              marked.  */
2144           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2145           RTX_FRAME_RELATED_P (insn) = 1;
2146
2147           /* Set up frame pointer to point to the location of the
2148              previous frame pointer on the stack.  */
2149           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2150                                            stack_pointer_rtx,
2151                                            GEN_INT (fp_offset)));
2152           aarch64_set_frame_expr (gen_rtx_SET
2153                                   (Pmode, hard_frame_pointer_rtx,
2154                                    plus_constant (Pmode,
2155                                                   stack_pointer_rtx,
2156                                                   fp_offset)));
2157           RTX_FRAME_RELATED_P (insn) = 1;
2158           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2159                                            hard_frame_pointer_rtx));
2160         }
2161       else
2162         {
2163           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2164                                            GEN_INT (-offset)));
2165           RTX_FRAME_RELATED_P (insn) = 1;
2166         }
2167
2168       aarch64_save_or_restore_callee_save_registers
2169         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2170     }
2171
2172   /* when offset >= 512,
2173      sub sp, sp, #<outgoing_args_size> */
2174   if (frame_size > -1)
2175     {
2176       if (crtl->outgoing_args_size > 0)
2177         {
2178           insn = emit_insn (gen_add2_insn
2179                             (stack_pointer_rtx,
2180                              GEN_INT (- crtl->outgoing_args_size)));
2181           RTX_FRAME_RELATED_P (insn) = 1;
2182         }
2183     }
2184 }
2185
2186 /* Generate the epilogue instructions for returning from a function.  */
2187 void
2188 aarch64_expand_epilogue (bool for_sibcall)
2189 {
2190   HOST_WIDE_INT original_frame_size, frame_size, offset;
2191   HOST_WIDE_INT fp_offset;
2192   rtx insn;
2193   rtx cfa_reg;
2194
2195   aarch64_layout_frame ();
2196   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198                 + crtl->outgoing_args_size);
2199   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200                                           STACK_BOUNDARY / BITS_PER_UNIT);
2201
2202   fp_offset = (offset
2203                - original_frame_size
2204                - cfun->machine->frame.saved_regs_size);
2205
2206   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2207
2208   /* Store pairs and load pairs have a range only -512 to 504.  */
2209   if (offset >= 512)
2210     {
2211       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2212       if (offset >= 512)
2213         offset = cfun->machine->frame.saved_regs_size;
2214
2215       frame_size -= (offset + crtl->outgoing_args_size);
2216       fp_offset = 0;
2217       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2218         {
2219           insn = emit_insn (gen_add2_insn
2220                             (stack_pointer_rtx,
2221                              GEN_INT (crtl->outgoing_args_size)));
2222           RTX_FRAME_RELATED_P (insn) = 1;
2223         }
2224     }
2225   else
2226     frame_size = -1;
2227
2228   /* If there were outgoing arguments or we've done dynamic stack
2229      allocation, then restore the stack pointer from the frame
2230      pointer.  This is at most one insn and more efficient than using
2231      GCC's internal mechanism.  */
2232   if (frame_pointer_needed
2233       && (crtl->outgoing_args_size || cfun->calls_alloca))
2234     {
2235       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2236                                        hard_frame_pointer_rtx,
2237                                        GEN_INT (- fp_offset)));
2238       RTX_FRAME_RELATED_P (insn) = 1;
2239       /* As SP is set to (FP - fp_offset), according to the rules in
2240          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2241          from the value of SP from now on.  */
2242       cfa_reg = stack_pointer_rtx;
2243     }
2244
2245   aarch64_save_or_restore_callee_save_registers
2246     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2247
2248   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2249   if (offset > 0)
2250     {
2251       if (frame_pointer_needed)
2252         {
2253           rtx mem_fp, mem_lr;
2254
2255           if (fp_offset)
2256             {
2257               mem_fp = gen_frame_mem (DImode,
2258                                       plus_constant (Pmode,
2259                                                      stack_pointer_rtx,
2260                                                      fp_offset));
2261               mem_lr = gen_frame_mem (DImode,
2262                                       plus_constant (Pmode,
2263                                                      stack_pointer_rtx,
2264                                                      fp_offset
2265                                                      + UNITS_PER_WORD));
2266               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2267                                                  mem_fp,
2268                                                  gen_rtx_REG (DImode,
2269                                                               LR_REGNUM),
2270                                                  mem_lr));
2271             }
2272           else
2273             {
2274               insn = emit_insn (gen_loadwb_pairdi_di
2275                                 (stack_pointer_rtx,
2276                                  stack_pointer_rtx,
2277                                  hard_frame_pointer_rtx,
2278                                  gen_rtx_REG (DImode, LR_REGNUM),
2279                                  GEN_INT (offset),
2280                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2281               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2282               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2283                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2284                                           plus_constant (Pmode, cfa_reg,
2285                                                          offset))));
2286             }
2287
2288           /* The first part of a frame-related parallel insn
2289              is always assumed to be relevant to the frame
2290              calculations; subsequent parts, are only
2291              frame-related if explicitly marked.  */
2292           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2293           RTX_FRAME_RELATED_P (insn) = 1;
2294           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2295           add_reg_note (insn, REG_CFA_RESTORE,
2296                         gen_rtx_REG (DImode, LR_REGNUM));
2297
2298           if (fp_offset)
2299             {
2300               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2301                                                GEN_INT (offset)));
2302               RTX_FRAME_RELATED_P (insn) = 1;
2303             }
2304         }
2305       else
2306         {
2307           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2308                                            GEN_INT (offset)));
2309           RTX_FRAME_RELATED_P (insn) = 1;
2310         }
2311     }
2312
2313   /* Stack adjustment for exception handler.  */
2314   if (crtl->calls_eh_return)
2315     {
2316       /* We need to unwind the stack by the offset computed by
2317          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2318          based on SP.  Ideally we would update the SP and define the
2319          CFA along the lines of:
2320
2321          SP = SP + EH_RETURN_STACKADJ_RTX
2322          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2323
2324          However the dwarf emitter only understands a constant
2325          register offset.
2326
2327          The solution chosen here is to use the otherwise unused IP0
2328          as a temporary register to hold the current SP value.  The
2329          CFA is described using IP0 then SP is modified.  */
2330
2331       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2332
2333       insn = emit_move_insn (ip0, stack_pointer_rtx);
2334       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2335       RTX_FRAME_RELATED_P (insn) = 1;
2336
2337       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2338
2339       /* Ensure the assignment to IP0 does not get optimized away.  */
2340       emit_use (ip0);
2341     }
2342
2343   if (frame_size > -1)
2344     {
2345       if (frame_size >= 0x1000000)
2346         {
2347           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2348           emit_move_insn (op0, GEN_INT (frame_size));
2349           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2350           aarch64_set_frame_expr (gen_rtx_SET
2351                                   (Pmode, stack_pointer_rtx,
2352                                    plus_constant (Pmode,
2353                                                   stack_pointer_rtx,
2354                                                   frame_size)));
2355         }
2356       else if (frame_size > 0)
2357         {
2358           if ((frame_size & 0xfff) != 0)
2359             {
2360               insn = emit_insn (gen_add2_insn
2361                                 (stack_pointer_rtx,
2362                                  GEN_INT ((frame_size
2363                                            & (HOST_WIDE_INT) 0xfff))));
2364               RTX_FRAME_RELATED_P (insn) = 1;
2365             }
2366           if ((frame_size & 0xfff) != frame_size)
2367             {
2368               insn = emit_insn (gen_add2_insn
2369                                 (stack_pointer_rtx,
2370                                  GEN_INT ((frame_size
2371                                            & ~ (HOST_WIDE_INT) 0xfff))));
2372               RTX_FRAME_RELATED_P (insn) = 1;
2373             }
2374         }
2375
2376         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2377                                              plus_constant (Pmode,
2378                                                             stack_pointer_rtx,
2379                                                             offset)));
2380     }
2381
2382   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2383   if (!for_sibcall)
2384     emit_jump_insn (ret_rtx);
2385 }
2386
2387 /* Return the place to copy the exception unwinding return address to.
2388    This will probably be a stack slot, but could (in theory be the
2389    return register).  */
2390 rtx
2391 aarch64_final_eh_return_addr (void)
2392 {
2393   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2394   aarch64_layout_frame ();
2395   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2396   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2397                 + crtl->outgoing_args_size);
2398   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2399                                           STACK_BOUNDARY / BITS_PER_UNIT);
2400   fp_offset = offset
2401     - original_frame_size
2402     - cfun->machine->frame.saved_regs_size;
2403
2404   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2405     return gen_rtx_REG (DImode, LR_REGNUM);
2406
2407   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2408      result in a store to save LR introduced by builtin_eh_return () being
2409      incorrectly deleted because the alias is not detected.
2410      So in the calculation of the address to copy the exception unwinding
2411      return address to, we note 2 cases.
2412      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2413      we return a SP-relative location since all the addresses are SP-relative
2414      in this case.  This prevents the store from being optimized away.
2415      If the fp_offset is not 0, then the addresses will be FP-relative and
2416      therefore we return a FP-relative location.  */
2417
2418   if (frame_pointer_needed)
2419     {
2420       if (fp_offset)
2421         return gen_frame_mem (DImode,
2422                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2423       else
2424         return gen_frame_mem (DImode,
2425                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2426     }
2427
2428   /* If FP is not needed, we calculate the location of LR, which would be
2429      at the top of the saved registers block.  */
2430
2431   return gen_frame_mem (DImode,
2432                         plus_constant (Pmode,
2433                                        stack_pointer_rtx,
2434                                        fp_offset
2435                                        + cfun->machine->frame.saved_regs_size
2436                                        - 2 * UNITS_PER_WORD));
2437 }
2438
2439 /* Output code to build up a constant in a register.  */
2440 static void
2441 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2442 {
2443   if (aarch64_bitmask_imm (val, DImode))
2444     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2445   else
2446     {
2447       int i;
2448       int ncount = 0;
2449       int zcount = 0;
2450       HOST_WIDE_INT valp = val >> 16;
2451       HOST_WIDE_INT valm;
2452       HOST_WIDE_INT tval;
2453
2454       for (i = 16; i < 64; i += 16)
2455         {
2456           valm = (valp & 0xffff);
2457
2458           if (valm != 0)
2459             ++ zcount;
2460
2461           if (valm != 0xffff)
2462             ++ ncount;
2463
2464           valp >>= 16;
2465         }
2466
2467       /* zcount contains the number of additional MOVK instructions
2468          required if the constant is built up with an initial MOVZ instruction,
2469          while ncount is the number of MOVK instructions required if starting
2470          with a MOVN instruction.  Choose the sequence that yields the fewest
2471          number of instructions, preferring MOVZ instructions when they are both
2472          the same.  */
2473       if (ncount < zcount)
2474         {
2475           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2476                           GEN_INT ((~val) & 0xffff));
2477           tval = 0xffff;
2478         }
2479       else
2480         {
2481           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2482                           GEN_INT (val & 0xffff));
2483           tval = 0;
2484         }
2485
2486       val >>= 16;
2487
2488       for (i = 16; i < 64; i += 16)
2489         {
2490           if ((val & 0xffff) != tval)
2491             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2492                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2493           val >>= 16;
2494         }
2495     }
2496 }
2497
2498 static void
2499 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2500 {
2501   HOST_WIDE_INT mdelta = delta;
2502   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2503   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2504
2505   if (mdelta < 0)
2506     mdelta = -mdelta;
2507
2508   if (mdelta >= 4096 * 4096)
2509     {
2510       aarch64_build_constant (scratchreg, delta);
2511       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2512     }
2513   else if (mdelta > 0)
2514     {
2515       if (mdelta >= 4096)
2516         {
2517           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2518           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2519           if (delta < 0)
2520             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2521                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2522           else
2523             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2524                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2525         }
2526       if (mdelta % 4096 != 0)
2527         {
2528           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2529           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2530                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2531         }
2532     }
2533 }
2534
2535 /* Output code to add DELTA to the first argument, and then jump
2536    to FUNCTION.  Used for C++ multiple inheritance.  */
2537 static void
2538 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2539                          HOST_WIDE_INT delta,
2540                          HOST_WIDE_INT vcall_offset,
2541                          tree function)
2542 {
2543   /* The this pointer is always in x0.  Note that this differs from
2544      Arm where the this pointer maybe bumped to r1 if r0 is required
2545      to return a pointer to an aggregate.  On AArch64 a result value
2546      pointer will be in x8.  */
2547   int this_regno = R0_REGNUM;
2548   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2549
2550   reload_completed = 1;
2551   emit_note (NOTE_INSN_PROLOGUE_END);
2552
2553   if (vcall_offset == 0)
2554     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2555   else
2556     {
2557       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2558
2559       this_rtx = gen_rtx_REG (Pmode, this_regno);
2560       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2561       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2562
2563       addr = this_rtx;
2564       if (delta != 0)
2565         {
2566           if (delta >= -256 && delta < 256)
2567             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2568                                        plus_constant (Pmode, this_rtx, delta));
2569           else
2570             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2571         }
2572
2573       if (Pmode == ptr_mode)
2574         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2575       else
2576         aarch64_emit_move (temp0,
2577                            gen_rtx_ZERO_EXTEND (Pmode,
2578                                                 gen_rtx_MEM (ptr_mode, addr)));
2579
2580       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2581           addr = plus_constant (Pmode, temp0, vcall_offset);
2582       else
2583         {
2584           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2585           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2586         }
2587
2588       if (Pmode == ptr_mode)
2589         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2590       else
2591         aarch64_emit_move (temp1,
2592                            gen_rtx_SIGN_EXTEND (Pmode,
2593                                                 gen_rtx_MEM (ptr_mode, addr)));
2594
2595       emit_insn (gen_add2_insn (this_rtx, temp1));
2596     }
2597
2598   /* Generate a tail call to the target function.  */
2599   if (!TREE_USED (function))
2600     {
2601       assemble_external (function);
2602       TREE_USED (function) = 1;
2603     }
2604   funexp = XEXP (DECL_RTL (function), 0);
2605   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2606   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2607   SIBLING_CALL_P (insn) = 1;
2608
2609   insn = get_insns ();
2610   shorten_branches (insn);
2611   final_start_function (insn, file, 1);
2612   final (insn, file, 1);
2613   final_end_function ();
2614
2615   /* Stop pretending to be a post-reload pass.  */
2616   reload_completed = 0;
2617 }
2618
2619 static int
2620 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2621 {
2622   if (GET_CODE (*x) == SYMBOL_REF)
2623     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2624
2625   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2626      TLS offsets, not real symbol references.  */
2627   if (GET_CODE (*x) == UNSPEC
2628       && XINT (*x, 1) == UNSPEC_TLS)
2629     return -1;
2630
2631   return 0;
2632 }
2633
2634 static bool
2635 aarch64_tls_referenced_p (rtx x)
2636 {
2637   if (!TARGET_HAVE_TLS)
2638     return false;
2639
2640   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2641 }
2642
2643
2644 static int
2645 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2646 {
2647   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2648   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2649
2650   if (*imm1 < *imm2)
2651     return -1;
2652   if (*imm1 > *imm2)
2653     return +1;
2654   return 0;
2655 }
2656
2657
2658 static void
2659 aarch64_build_bitmask_table (void)
2660 {
2661   unsigned HOST_WIDE_INT mask, imm;
2662   unsigned int log_e, e, s, r;
2663   unsigned int nimms = 0;
2664
2665   for (log_e = 1; log_e <= 6; log_e++)
2666     {
2667       e = 1 << log_e;
2668       if (e == 64)
2669         mask = ~(HOST_WIDE_INT) 0;
2670       else
2671         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2672       for (s = 1; s < e; s++)
2673         {
2674           for (r = 0; r < e; r++)
2675             {
2676               /* set s consecutive bits to 1 (s < 64) */
2677               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2678               /* rotate right by r */
2679               if (r != 0)
2680                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2681               /* replicate the constant depending on SIMD size */
2682               switch (log_e) {
2683               case 1: imm |= (imm <<  2);
2684               case 2: imm |= (imm <<  4);
2685               case 3: imm |= (imm <<  8);
2686               case 4: imm |= (imm << 16);
2687               case 5: imm |= (imm << 32);
2688               case 6:
2689                 break;
2690               default:
2691                 gcc_unreachable ();
2692               }
2693               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2694               aarch64_bitmasks[nimms++] = imm;
2695             }
2696         }
2697     }
2698
2699   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2700   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2701          aarch64_bitmasks_cmp);
2702 }
2703
2704
2705 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2706    a left shift of 0 or 12 bits.  */
2707 bool
2708 aarch64_uimm12_shift (HOST_WIDE_INT val)
2709 {
2710   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2711           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2712           );
2713 }
2714
2715
2716 /* Return true if val is an immediate that can be loaded into a
2717    register by a MOVZ instruction.  */
2718 static bool
2719 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2720 {
2721   if (GET_MODE_SIZE (mode) > 4)
2722     {
2723       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2724           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2725         return 1;
2726     }
2727   else
2728     {
2729       /* Ignore sign extension.  */
2730       val &= (HOST_WIDE_INT) 0xffffffff;
2731     }
2732   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2733           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2734 }
2735
2736
2737 /* Return true if val is a valid bitmask immediate.  */
2738 bool
2739 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2740 {
2741   if (GET_MODE_SIZE (mode) < 8)
2742     {
2743       /* Replicate bit pattern.  */
2744       val &= (HOST_WIDE_INT) 0xffffffff;
2745       val |= val << 32;
2746     }
2747   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2748                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2749 }
2750
2751
2752 /* Return true if val is an immediate that can be loaded into a
2753    register in a single instruction.  */
2754 bool
2755 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2756 {
2757   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2758     return 1;
2759   return aarch64_bitmask_imm (val, mode);
2760 }
2761
2762 static bool
2763 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2764 {
2765   rtx base, offset;
2766
2767   if (GET_CODE (x) == HIGH)
2768     return true;
2769
2770   split_const (x, &base, &offset);
2771   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2772     {
2773       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2774           != SYMBOL_FORCE_TO_MEM)
2775         return true;
2776       else
2777         /* Avoid generating a 64-bit relocation in ILP32; leave
2778            to aarch64_expand_mov_immediate to handle it properly.  */
2779         return mode != ptr_mode;
2780     }
2781
2782   return aarch64_tls_referenced_p (x);
2783 }
2784
2785 /* Return true if register REGNO is a valid index register.
2786    STRICT_P is true if REG_OK_STRICT is in effect.  */
2787
2788 bool
2789 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2790 {
2791   if (!HARD_REGISTER_NUM_P (regno))
2792     {
2793       if (!strict_p)
2794         return true;
2795
2796       if (!reg_renumber)
2797         return false;
2798
2799       regno = reg_renumber[regno];
2800     }
2801   return GP_REGNUM_P (regno);
2802 }
2803
2804 /* Return true if register REGNO is a valid base register for mode MODE.
2805    STRICT_P is true if REG_OK_STRICT is in effect.  */
2806
2807 bool
2808 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2809 {
2810   if (!HARD_REGISTER_NUM_P (regno))
2811     {
2812       if (!strict_p)
2813         return true;
2814
2815       if (!reg_renumber)
2816         return false;
2817
2818       regno = reg_renumber[regno];
2819     }
2820
2821   /* The fake registers will be eliminated to either the stack or
2822      hard frame pointer, both of which are usually valid base registers.
2823      Reload deals with the cases where the eliminated form isn't valid.  */
2824   return (GP_REGNUM_P (regno)
2825           || regno == SP_REGNUM
2826           || regno == FRAME_POINTER_REGNUM
2827           || regno == ARG_POINTER_REGNUM);
2828 }
2829
2830 /* Return true if X is a valid base register for mode MODE.
2831    STRICT_P is true if REG_OK_STRICT is in effect.  */
2832
2833 static bool
2834 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2835 {
2836   if (!strict_p && GET_CODE (x) == SUBREG)
2837     x = SUBREG_REG (x);
2838
2839   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2840 }
2841
2842 /* Return true if address offset is a valid index.  If it is, fill in INFO
2843    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2844
2845 static bool
2846 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2847                         enum machine_mode mode, bool strict_p)
2848 {
2849   enum aarch64_address_type type;
2850   rtx index;
2851   int shift;
2852
2853   /* (reg:P) */
2854   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2855       && GET_MODE (x) == Pmode)
2856     {
2857       type = ADDRESS_REG_REG;
2858       index = x;
2859       shift = 0;
2860     }
2861   /* (sign_extend:DI (reg:SI)) */
2862   else if ((GET_CODE (x) == SIGN_EXTEND
2863             || GET_CODE (x) == ZERO_EXTEND)
2864            && GET_MODE (x) == DImode
2865            && GET_MODE (XEXP (x, 0)) == SImode)
2866     {
2867       type = (GET_CODE (x) == SIGN_EXTEND)
2868         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2869       index = XEXP (x, 0);
2870       shift = 0;
2871     }
2872   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2873   else if (GET_CODE (x) == MULT
2874            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2875                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2876            && GET_MODE (XEXP (x, 0)) == DImode
2877            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2878            && CONST_INT_P (XEXP (x, 1)))
2879     {
2880       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2881         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882       index = XEXP (XEXP (x, 0), 0);
2883       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2884     }
2885   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2886   else if (GET_CODE (x) == ASHIFT
2887            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889            && GET_MODE (XEXP (x, 0)) == DImode
2890            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891            && CONST_INT_P (XEXP (x, 1)))
2892     {
2893       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895       index = XEXP (XEXP (x, 0), 0);
2896       shift = INTVAL (XEXP (x, 1));
2897     }
2898   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2899   else if ((GET_CODE (x) == SIGN_EXTRACT
2900             || GET_CODE (x) == ZERO_EXTRACT)
2901            && GET_MODE (x) == DImode
2902            && GET_CODE (XEXP (x, 0)) == MULT
2903            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2904            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2905     {
2906       type = (GET_CODE (x) == SIGN_EXTRACT)
2907         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908       index = XEXP (XEXP (x, 0), 0);
2909       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2910       if (INTVAL (XEXP (x, 1)) != 32 + shift
2911           || INTVAL (XEXP (x, 2)) != 0)
2912         shift = -1;
2913     }
2914   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2915      (const_int 0xffffffff<<shift)) */
2916   else if (GET_CODE (x) == AND
2917            && GET_MODE (x) == DImode
2918            && GET_CODE (XEXP (x, 0)) == MULT
2919            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2920            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2921            && CONST_INT_P (XEXP (x, 1)))
2922     {
2923       type = ADDRESS_REG_UXTW;
2924       index = XEXP (XEXP (x, 0), 0);
2925       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2926       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2927         shift = -1;
2928     }
2929   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2930   else if ((GET_CODE (x) == SIGN_EXTRACT
2931             || GET_CODE (x) == ZERO_EXTRACT)
2932            && GET_MODE (x) == DImode
2933            && GET_CODE (XEXP (x, 0)) == ASHIFT
2934            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2935            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2936     {
2937       type = (GET_CODE (x) == SIGN_EXTRACT)
2938         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2939       index = XEXP (XEXP (x, 0), 0);
2940       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2941       if (INTVAL (XEXP (x, 1)) != 32 + shift
2942           || INTVAL (XEXP (x, 2)) != 0)
2943         shift = -1;
2944     }
2945   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2946      (const_int 0xffffffff<<shift)) */
2947   else if (GET_CODE (x) == AND
2948            && GET_MODE (x) == DImode
2949            && GET_CODE (XEXP (x, 0)) == ASHIFT
2950            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2951            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2952            && CONST_INT_P (XEXP (x, 1)))
2953     {
2954       type = ADDRESS_REG_UXTW;
2955       index = XEXP (XEXP (x, 0), 0);
2956       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2957       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2958         shift = -1;
2959     }
2960   /* (mult:P (reg:P) (const_int scale)) */
2961   else if (GET_CODE (x) == MULT
2962            && GET_MODE (x) == Pmode
2963            && GET_MODE (XEXP (x, 0)) == Pmode
2964            && CONST_INT_P (XEXP (x, 1)))
2965     {
2966       type = ADDRESS_REG_REG;
2967       index = XEXP (x, 0);
2968       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2969     }
2970   /* (ashift:P (reg:P) (const_int shift)) */
2971   else if (GET_CODE (x) == ASHIFT
2972            && GET_MODE (x) == Pmode
2973            && GET_MODE (XEXP (x, 0)) == Pmode
2974            && CONST_INT_P (XEXP (x, 1)))
2975     {
2976       type = ADDRESS_REG_REG;
2977       index = XEXP (x, 0);
2978       shift = INTVAL (XEXP (x, 1));
2979     }
2980   else
2981     return false;
2982
2983   if (GET_CODE (index) == SUBREG)
2984     index = SUBREG_REG (index);
2985
2986   if ((shift == 0 ||
2987        (shift > 0 && shift <= 3
2988         && (1 << shift) == GET_MODE_SIZE (mode)))
2989       && REG_P (index)
2990       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2991     {
2992       info->type = type;
2993       info->offset = index;
2994       info->shift = shift;
2995       return true;
2996     }
2997
2998   return false;
2999 }
3000
3001 static inline bool
3002 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3003 {
3004   return (offset >= -64 * GET_MODE_SIZE (mode)
3005           && offset < 64 * GET_MODE_SIZE (mode)
3006           && offset % GET_MODE_SIZE (mode) == 0);
3007 }
3008
3009 static inline bool
3010 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3011                                HOST_WIDE_INT offset)
3012 {
3013   return offset >= -256 && offset < 256;
3014 }
3015
3016 static inline bool
3017 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3018 {
3019   return (offset >= 0
3020           && offset < 4096 * GET_MODE_SIZE (mode)
3021           && offset % GET_MODE_SIZE (mode) == 0);
3022 }
3023
3024 /* Return true if X is a valid address for machine mode MODE.  If it is,
3025    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3026    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3027
3028 static bool
3029 aarch64_classify_address (struct aarch64_address_info *info,
3030                           rtx x, enum machine_mode mode,
3031                           RTX_CODE outer_code, bool strict_p)
3032 {
3033   enum rtx_code code = GET_CODE (x);
3034   rtx op0, op1;
3035   bool allow_reg_index_p =
3036     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3037
3038   /* Don't support anything other than POST_INC or REG addressing for
3039      AdvSIMD.  */
3040   if (aarch64_vector_mode_p (mode)
3041       && (code != POST_INC && code != REG))
3042     return false;
3043
3044   switch (code)
3045     {
3046     case REG:
3047     case SUBREG:
3048       info->type = ADDRESS_REG_IMM;
3049       info->base = x;
3050       info->offset = const0_rtx;
3051       return aarch64_base_register_rtx_p (x, strict_p);
3052
3053     case PLUS:
3054       op0 = XEXP (x, 0);
3055       op1 = XEXP (x, 1);
3056       if (GET_MODE_SIZE (mode) != 0
3057           && CONST_INT_P (op1)
3058           && aarch64_base_register_rtx_p (op0, strict_p))
3059         {
3060           HOST_WIDE_INT offset = INTVAL (op1);
3061
3062           info->type = ADDRESS_REG_IMM;
3063           info->base = op0;
3064           info->offset = op1;
3065
3066           /* TImode and TFmode values are allowed in both pairs of X
3067              registers and individual Q registers.  The available
3068              address modes are:
3069              X,X: 7-bit signed scaled offset
3070              Q:   9-bit signed offset
3071              We conservatively require an offset representable in either mode.
3072            */
3073           if (mode == TImode || mode == TFmode)
3074             return (offset_7bit_signed_scaled_p (mode, offset)
3075                     && offset_9bit_signed_unscaled_p (mode, offset));
3076
3077           if (outer_code == PARALLEL)
3078             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3079                     && offset_7bit_signed_scaled_p (mode, offset));
3080           else
3081             return (offset_9bit_signed_unscaled_p (mode, offset)
3082                     || offset_12bit_unsigned_scaled_p (mode, offset));
3083         }
3084
3085       if (allow_reg_index_p)
3086         {
3087           /* Look for base + (scaled/extended) index register.  */
3088           if (aarch64_base_register_rtx_p (op0, strict_p)
3089               && aarch64_classify_index (info, op1, mode, strict_p))
3090             {
3091               info->base = op0;
3092               return true;
3093             }
3094           if (aarch64_base_register_rtx_p (op1, strict_p)
3095               && aarch64_classify_index (info, op0, mode, strict_p))
3096             {
3097               info->base = op1;
3098               return true;
3099             }
3100         }
3101
3102       return false;
3103
3104     case POST_INC:
3105     case POST_DEC:
3106     case PRE_INC:
3107     case PRE_DEC:
3108       info->type = ADDRESS_REG_WB;
3109       info->base = XEXP (x, 0);
3110       info->offset = NULL_RTX;
3111       return aarch64_base_register_rtx_p (info->base, strict_p);
3112
3113     case POST_MODIFY:
3114     case PRE_MODIFY:
3115       info->type = ADDRESS_REG_WB;
3116       info->base = XEXP (x, 0);
3117       if (GET_CODE (XEXP (x, 1)) == PLUS
3118           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3119           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3120           && aarch64_base_register_rtx_p (info->base, strict_p))
3121         {
3122           HOST_WIDE_INT offset;
3123           info->offset = XEXP (XEXP (x, 1), 1);
3124           offset = INTVAL (info->offset);
3125
3126           /* TImode and TFmode values are allowed in both pairs of X
3127              registers and individual Q registers.  The available
3128              address modes are:
3129              X,X: 7-bit signed scaled offset
3130              Q:   9-bit signed offset
3131              We conservatively require an offset representable in either mode.
3132            */
3133           if (mode == TImode || mode == TFmode)
3134             return (offset_7bit_signed_scaled_p (mode, offset)
3135                     && offset_9bit_signed_unscaled_p (mode, offset));
3136
3137           if (outer_code == PARALLEL)
3138             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3139                     && offset_7bit_signed_scaled_p (mode, offset));
3140           else
3141             return offset_9bit_signed_unscaled_p (mode, offset);
3142         }
3143       return false;
3144
3145     case CONST:
3146     case SYMBOL_REF:
3147     case LABEL_REF:
3148       /* load literal: pc-relative constant pool entry.  Only supported
3149          for SI mode or larger.  */
3150       info->type = ADDRESS_SYMBOLIC;
3151       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3152         {
3153           rtx sym, addend;
3154
3155           split_const (x, &sym, &addend);
3156           return (GET_CODE (sym) == LABEL_REF
3157                   || (GET_CODE (sym) == SYMBOL_REF
3158                       && CONSTANT_POOL_ADDRESS_P (sym)));
3159         }
3160       return false;
3161
3162     case LO_SUM:
3163       info->type = ADDRESS_LO_SUM;
3164       info->base = XEXP (x, 0);
3165       info->offset = XEXP (x, 1);
3166       if (allow_reg_index_p
3167           && aarch64_base_register_rtx_p (info->base, strict_p))
3168         {
3169           rtx sym, offs;
3170           split_const (info->offset, &sym, &offs);
3171           if (GET_CODE (sym) == SYMBOL_REF
3172               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3173                   == SYMBOL_SMALL_ABSOLUTE))
3174             {
3175               /* The symbol and offset must be aligned to the access size.  */
3176               unsigned int align;
3177               unsigned int ref_size;
3178
3179               if (CONSTANT_POOL_ADDRESS_P (sym))
3180                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3181               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3182                 {
3183                   tree exp = SYMBOL_REF_DECL (sym);
3184                   align = TYPE_ALIGN (TREE_TYPE (exp));
3185                   align = CONSTANT_ALIGNMENT (exp, align);
3186                 }
3187               else if (SYMBOL_REF_DECL (sym))
3188                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3189               else
3190                 align = BITS_PER_UNIT;
3191
3192               ref_size = GET_MODE_SIZE (mode);
3193               if (ref_size == 0)
3194                 ref_size = GET_MODE_SIZE (DImode);
3195
3196               return ((INTVAL (offs) & (ref_size - 1)) == 0
3197                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3198             }
3199         }
3200       return false;
3201
3202     default:
3203       return false;
3204     }
3205 }
3206
3207 bool
3208 aarch64_symbolic_address_p (rtx x)
3209 {
3210   rtx offset;
3211
3212   split_const (x, &x, &offset);
3213   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3214 }
3215
3216 /* Classify the base of symbolic expression X, given that X appears in
3217    context CONTEXT.  */
3218
3219 enum aarch64_symbol_type
3220 aarch64_classify_symbolic_expression (rtx x,
3221                                       enum aarch64_symbol_context context)
3222 {
3223   rtx offset;
3224
3225   split_const (x, &x, &offset);
3226   return aarch64_classify_symbol (x, context);
3227 }
3228
3229
3230 /* Return TRUE if X is a legitimate address for accessing memory in
3231    mode MODE.  */
3232 static bool
3233 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3234 {
3235   struct aarch64_address_info addr;
3236
3237   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3238 }
3239
3240 /* Return TRUE if X is a legitimate address for accessing memory in
3241    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3242    pair operation.  */
3243 bool
3244 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3245                            RTX_CODE outer_code, bool strict_p)
3246 {
3247   struct aarch64_address_info addr;
3248
3249   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3250 }
3251
3252 /* Return TRUE if rtx X is immediate constant 0.0 */
3253 bool
3254 aarch64_float_const_zero_rtx_p (rtx x)
3255 {
3256   REAL_VALUE_TYPE r;
3257
3258   if (GET_MODE (x) == VOIDmode)
3259     return false;
3260
3261   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3262   if (REAL_VALUE_MINUS_ZERO (r))
3263     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3264   return REAL_VALUES_EQUAL (r, dconst0);
3265 }
3266
3267 /* Return the fixed registers used for condition codes.  */
3268
3269 static bool
3270 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3271 {
3272   *p1 = CC_REGNUM;
3273   *p2 = INVALID_REGNUM;
3274   return true;
3275 }
3276
3277 enum machine_mode
3278 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3279 {
3280   /* All floating point compares return CCFP if it is an equality
3281      comparison, and CCFPE otherwise.  */
3282   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3283     {
3284       switch (code)
3285         {
3286         case EQ:
3287         case NE:
3288         case UNORDERED:
3289         case ORDERED:
3290         case UNLT:
3291         case UNLE:
3292         case UNGT:
3293         case UNGE:
3294         case UNEQ:
3295         case LTGT:
3296           return CCFPmode;
3297
3298         case LT:
3299         case LE:
3300         case GT:
3301         case GE:
3302           return CCFPEmode;
3303
3304         default:
3305           gcc_unreachable ();
3306         }
3307     }
3308
3309   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3310       && y == const0_rtx
3311       && (code == EQ || code == NE || code == LT || code == GE)
3312       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3313           || GET_CODE (x) == NEG))
3314     return CC_NZmode;
3315
3316   /* A compare with a shifted or negated operand.  Because of canonicalization,
3317      the comparison will have to be swapped when we emit the assembly
3318      code.  */
3319   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3320       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3321       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3322           || GET_CODE (x) == LSHIFTRT
3323           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3324           || GET_CODE (x) == NEG))
3325     return CC_SWPmode;
3326
3327   /* A compare of a mode narrower than SI mode against zero can be done
3328      by extending the value in the comparison.  */
3329   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3330       && y == const0_rtx)
3331     /* Only use sign-extension if we really need it.  */
3332     return ((code == GT || code == GE || code == LE || code == LT)
3333             ? CC_SESWPmode : CC_ZESWPmode);
3334
3335   /* For everything else, return CCmode.  */
3336   return CCmode;
3337 }
3338
3339 static unsigned
3340 aarch64_get_condition_code (rtx x)
3341 {
3342   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3343   enum rtx_code comp_code = GET_CODE (x);
3344
3345   if (GET_MODE_CLASS (mode) != MODE_CC)
3346     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3347
3348   switch (mode)
3349     {
3350     case CCFPmode:
3351     case CCFPEmode:
3352       switch (comp_code)
3353         {
3354         case GE: return AARCH64_GE;
3355         case GT: return AARCH64_GT;
3356         case LE: return AARCH64_LS;
3357         case LT: return AARCH64_MI;
3358         case NE: return AARCH64_NE;
3359         case EQ: return AARCH64_EQ;
3360         case ORDERED: return AARCH64_VC;
3361         case UNORDERED: return AARCH64_VS;
3362         case UNLT: return AARCH64_LT;
3363         case UNLE: return AARCH64_LE;
3364         case UNGT: return AARCH64_HI;
3365         case UNGE: return AARCH64_PL;
3366         default: gcc_unreachable ();
3367         }
3368       break;
3369
3370     case CCmode:
3371       switch (comp_code)
3372         {
3373         case NE: return AARCH64_NE;
3374         case EQ: return AARCH64_EQ;
3375         case GE: return AARCH64_GE;
3376         case GT: return AARCH64_GT;
3377         case LE: return AARCH64_LE;
3378         case LT: return AARCH64_LT;
3379         case GEU: return AARCH64_CS;
3380         case GTU: return AARCH64_HI;
3381         case LEU: return AARCH64_LS;
3382         case LTU: return AARCH64_CC;
3383         default: gcc_unreachable ();
3384         }
3385       break;
3386
3387     case CC_SWPmode:
3388     case CC_ZESWPmode:
3389     case CC_SESWPmode:
3390       switch (comp_code)
3391         {
3392         case NE: return AARCH64_NE;
3393         case EQ: return AARCH64_EQ;
3394         case GE: return AARCH64_LE;
3395         case GT: return AARCH64_LT;
3396         case LE: return AARCH64_GE;
3397         case LT: return AARCH64_GT;
3398         case GEU: return AARCH64_LS;
3399         case GTU: return AARCH64_CC;
3400         case LEU: return AARCH64_CS;
3401         case LTU: return AARCH64_HI;
3402         default: gcc_unreachable ();
3403         }
3404       break;
3405
3406     case CC_NZmode:
3407       switch (comp_code)
3408         {
3409         case NE: return AARCH64_NE;
3410         case EQ: return AARCH64_EQ;
3411         case GE: return AARCH64_PL;
3412         case LT: return AARCH64_MI;
3413         default: gcc_unreachable ();
3414         }
3415       break;
3416
3417     default:
3418       gcc_unreachable ();
3419       break;
3420     }
3421 }
3422
3423 static unsigned
3424 bit_count (unsigned HOST_WIDE_INT value)
3425 {
3426   unsigned count = 0;
3427
3428   while (value)
3429     {
3430       count++;
3431       value &= value - 1;
3432     }
3433
3434   return count;
3435 }
3436
3437 void
3438 aarch64_print_operand (FILE *f, rtx x, char code)
3439 {
3440   switch (code)
3441     {
3442     case 'e':
3443       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3444       {
3445         int n;
3446
3447         if (GET_CODE (x) != CONST_INT
3448             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3449           {
3450             output_operand_lossage ("invalid operand for '%%%c'", code);
3451             return;
3452           }
3453
3454         switch (n)
3455           {
3456           case 3:
3457             fputc ('b', f);
3458             break;
3459           case 4:
3460             fputc ('h', f);
3461             break;
3462           case 5:
3463             fputc ('w', f);
3464             break;
3465           default:
3466             output_operand_lossage ("invalid operand for '%%%c'", code);
3467             return;
3468           }
3469       }
3470       break;
3471
3472     case 'p':
3473       {
3474         int n;
3475
3476         /* Print N such that 2^N == X.  */
3477         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3478           {
3479             output_operand_lossage ("invalid operand for '%%%c'", code);
3480             return;
3481           }
3482
3483         asm_fprintf (f, "%d", n);
3484       }
3485       break;
3486
3487     case 'P':
3488       /* Print the number of non-zero bits in X (a const_int).  */
3489       if (GET_CODE (x) != CONST_INT)
3490         {
3491           output_operand_lossage ("invalid operand for '%%%c'", code);
3492           return;
3493         }
3494
3495       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3496       break;
3497
3498     case 'H':
3499       /* Print the higher numbered register of a pair (TImode) of regs.  */
3500       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3501         {
3502           output_operand_lossage ("invalid operand for '%%%c'", code);
3503           return;
3504         }
3505
3506       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3507       break;
3508
3509     case 'm':
3510       /* Print a condition (eq, ne, etc).  */
3511
3512       /* CONST_TRUE_RTX means always -- that's the default.  */
3513       if (x == const_true_rtx)
3514         return;
3515
3516       if (!COMPARISON_P (x))
3517         {
3518           output_operand_lossage ("invalid operand for '%%%c'", code);
3519           return;
3520         }
3521
3522       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3523       break;
3524
3525     case 'M':
3526       /* Print the inverse of a condition (eq <-> ne, etc).  */
3527
3528       /* CONST_TRUE_RTX means never -- that's the default.  */
3529       if (x == const_true_rtx)
3530         {
3531           fputs ("nv", f);
3532           return;
3533         }
3534
3535       if (!COMPARISON_P (x))
3536         {
3537           output_operand_lossage ("invalid operand for '%%%c'", code);
3538           return;
3539         }
3540
3541       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3542                                   (aarch64_get_condition_code (x))], f);
3543       break;
3544
3545     case 'b':
3546     case 'h':
3547     case 's':
3548     case 'd':
3549     case 'q':
3550       /* Print a scalar FP/SIMD register name.  */
3551       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3552         {
3553           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3554           return;
3555         }
3556       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3557       break;
3558
3559     case 'S':
3560     case 'T':
3561     case 'U':
3562     case 'V':
3563       /* Print the first FP/SIMD register name in a list.  */
3564       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3565         {
3566           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3567           return;
3568         }
3569       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3570       break;
3571
3572     case 'X':
3573       /* Print bottom 16 bits of integer constant in hex.  */
3574       if (GET_CODE (x) != CONST_INT)
3575         {
3576           output_operand_lossage ("invalid operand for '%%%c'", code);
3577           return;
3578         }
3579       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3580       break;
3581
3582     case 'w':
3583     case 'x':
3584       /* Print a general register name or the zero register (32-bit or
3585          64-bit).  */
3586       if (x == const0_rtx
3587           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3588         {
3589           asm_fprintf (f, "%czr", code);
3590           break;
3591         }
3592
3593       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3594         {
3595           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3596           break;
3597         }
3598
3599       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3600         {
3601           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3602           break;
3603         }
3604
3605       /* Fall through */
3606
3607     case 0:
3608       /* Print a normal operand, if it's a general register, then we
3609          assume DImode.  */
3610       if (x == NULL)
3611         {
3612           output_operand_lossage ("missing operand");
3613           return;
3614         }
3615
3616       switch (GET_CODE (x))
3617         {
3618         case REG:
3619           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3620           break;
3621
3622         case MEM:
3623           aarch64_memory_reference_mode = GET_MODE (x);
3624           output_address (XEXP (x, 0));
3625           break;
3626
3627         case LABEL_REF:
3628         case SYMBOL_REF:
3629           output_addr_const (asm_out_file, x);
3630           break;
3631
3632         case CONST_INT:
3633           asm_fprintf (f, "%wd", INTVAL (x));
3634           break;
3635
3636         case CONST_VECTOR:
3637           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3638             {
3639               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3640                                                             HOST_WIDE_INT_MIN,
3641                                                             HOST_WIDE_INT_MAX));
3642               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3643             }
3644           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3645             {
3646               fputc ('0', f);
3647             }
3648           else
3649             gcc_unreachable ();
3650           break;
3651
3652         case CONST_DOUBLE:
3653           /* CONST_DOUBLE can represent a double-width integer.
3654              In this case, the mode of x is VOIDmode.  */
3655           if (GET_MODE (x) == VOIDmode)
3656             ; /* Do Nothing.  */
3657           else if (aarch64_float_const_zero_rtx_p (x))
3658             {
3659               fputc ('0', f);
3660               break;
3661             }
3662           else if (aarch64_float_const_representable_p (x))
3663             {
3664 #define buf_size 20
3665               char float_buf[buf_size] = {'\0'};
3666               REAL_VALUE_TYPE r;
3667               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3668               real_to_decimal_for_mode (float_buf, &r,
3669                                         buf_size, buf_size,
3670                                         1, GET_MODE (x));
3671               asm_fprintf (asm_out_file, "%s", float_buf);
3672               break;
3673 #undef buf_size
3674             }
3675           output_operand_lossage ("invalid constant");
3676           return;
3677         default:
3678           output_operand_lossage ("invalid operand");
3679           return;
3680         }
3681       break;
3682
3683     case 'A':
3684       if (GET_CODE (x) == HIGH)
3685         x = XEXP (x, 0);
3686
3687       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3688         {
3689         case SYMBOL_SMALL_GOT:
3690           asm_fprintf (asm_out_file, ":got:");
3691           break;
3692
3693         case SYMBOL_SMALL_TLSGD:
3694           asm_fprintf (asm_out_file, ":tlsgd:");
3695           break;
3696
3697         case SYMBOL_SMALL_TLSDESC:
3698           asm_fprintf (asm_out_file, ":tlsdesc:");
3699           break;
3700
3701         case SYMBOL_SMALL_GOTTPREL:
3702           asm_fprintf (asm_out_file, ":gottprel:");
3703           break;
3704
3705         case SYMBOL_SMALL_TPREL:
3706           asm_fprintf (asm_out_file, ":tprel:");
3707           break;
3708
3709         case SYMBOL_TINY_GOT:
3710           gcc_unreachable ();
3711           break;
3712
3713         default:
3714           break;
3715         }
3716       output_addr_const (asm_out_file, x);
3717       break;
3718
3719     case 'L':
3720       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3721         {
3722         case SYMBOL_SMALL_GOT:
3723           asm_fprintf (asm_out_file, ":lo12:");
3724           break;
3725
3726         case SYMBOL_SMALL_TLSGD:
3727           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3728           break;
3729
3730         case SYMBOL_SMALL_TLSDESC:
3731           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3732           break;
3733
3734         case SYMBOL_SMALL_GOTTPREL:
3735           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3736           break;
3737
3738         case SYMBOL_SMALL_TPREL:
3739           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3740           break;
3741
3742         case SYMBOL_TINY_GOT:
3743           asm_fprintf (asm_out_file, ":got:");
3744           break;
3745
3746         default:
3747           break;
3748         }
3749       output_addr_const (asm_out_file, x);
3750       break;
3751
3752     case 'G':
3753
3754       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3755         {
3756         case SYMBOL_SMALL_TPREL:
3757           asm_fprintf (asm_out_file, ":tprel_hi12:");
3758           break;
3759         default:
3760           break;
3761         }
3762       output_addr_const (asm_out_file, x);
3763       break;
3764
3765     default:
3766       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3767       return;
3768     }
3769 }
3770
3771 void
3772 aarch64_print_operand_address (FILE *f, rtx x)
3773 {
3774   struct aarch64_address_info addr;
3775
3776   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3777                              MEM, true))
3778     switch (addr.type)
3779       {
3780       case ADDRESS_REG_IMM:
3781         if (addr.offset == const0_rtx)
3782           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3783         else
3784           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3785                        INTVAL (addr.offset));
3786         return;
3787
3788       case ADDRESS_REG_REG:
3789         if (addr.shift == 0)
3790           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3791                        reg_names [REGNO (addr.offset)]);
3792         else
3793           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3794                        reg_names [REGNO (addr.offset)], addr.shift);
3795         return;
3796
3797       case ADDRESS_REG_UXTW:
3798         if (addr.shift == 0)
3799           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3800                        REGNO (addr.offset) - R0_REGNUM);
3801         else
3802           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3803                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3804         return;
3805
3806       case ADDRESS_REG_SXTW:
3807         if (addr.shift == 0)
3808           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3809                        REGNO (addr.offset) - R0_REGNUM);
3810         else
3811           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3812                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3813         return;
3814
3815       case ADDRESS_REG_WB:
3816         switch (GET_CODE (x))
3817           {
3818           case PRE_INC:
3819             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3820                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3821             return;
3822           case POST_INC:
3823             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3824                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3825             return;
3826           case PRE_DEC:
3827             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3828                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3829             return;
3830           case POST_DEC:
3831             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3832                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3833             return;
3834           case PRE_MODIFY:
3835             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3836                          INTVAL (addr.offset));
3837             return;
3838           case POST_MODIFY:
3839             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3840                          INTVAL (addr.offset));
3841             return;
3842           default:
3843             break;
3844           }
3845         break;
3846
3847       case ADDRESS_LO_SUM:
3848         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3849         output_addr_const (f, addr.offset);
3850         asm_fprintf (f, "]");
3851         return;
3852
3853       case ADDRESS_SYMBOLIC:
3854         break;
3855       }
3856
3857   output_addr_const (f, x);
3858 }
3859
3860 void
3861 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3862                            int labelno ATTRIBUTE_UNUSED)
3863 {
3864   sorry ("function profiling");
3865 }
3866
3867 bool
3868 aarch64_label_mentioned_p (rtx x)
3869 {
3870   const char *fmt;
3871   int i;
3872
3873   if (GET_CODE (x) == LABEL_REF)
3874     return true;
3875
3876   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3877      referencing instruction, but they are constant offsets, not
3878      symbols.  */
3879   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3880     return false;
3881
3882   fmt = GET_RTX_FORMAT (GET_CODE (x));
3883   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3884     {
3885       if (fmt[i] == 'E')
3886         {
3887           int j;
3888
3889           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3890             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3891               return 1;
3892         }
3893       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3894         return 1;
3895     }
3896
3897   return 0;
3898 }
3899
3900 /* Implement REGNO_REG_CLASS.  */
3901
3902 enum reg_class
3903 aarch64_regno_regclass (unsigned regno)
3904 {
3905   if (GP_REGNUM_P (regno))
3906     return CORE_REGS;
3907
3908   if (regno == SP_REGNUM)
3909     return STACK_REG;
3910
3911   if (regno == FRAME_POINTER_REGNUM
3912       || regno == ARG_POINTER_REGNUM)
3913     return CORE_REGS;
3914
3915   if (FP_REGNUM_P (regno))
3916     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3917
3918   return NO_REGS;
3919 }
3920
3921 /* Try a machine-dependent way of reloading an illegitimate address
3922    operand.  If we find one, push the reload and return the new rtx.  */
3923
3924 rtx
3925 aarch64_legitimize_reload_address (rtx *x_p,
3926                                    enum machine_mode mode,
3927                                    int opnum, int type,
3928                                    int ind_levels ATTRIBUTE_UNUSED)
3929 {
3930   rtx x = *x_p;
3931
3932   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3933   if (aarch64_vector_mode_p (mode)
3934       && GET_CODE (x) == PLUS
3935       && REG_P (XEXP (x, 0))
3936       && CONST_INT_P (XEXP (x, 1)))
3937     {
3938       rtx orig_rtx = x;
3939       x = copy_rtx (x);
3940       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3941                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3942                    opnum, (enum reload_type) type);
3943       return x;
3944     }
3945
3946   /* We must recognize output that we have already generated ourselves.  */
3947   if (GET_CODE (x) == PLUS
3948       && GET_CODE (XEXP (x, 0)) == PLUS
3949       && REG_P (XEXP (XEXP (x, 0), 0))
3950       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3951       && CONST_INT_P (XEXP (x, 1)))
3952     {
3953       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3954                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3955                    opnum, (enum reload_type) type);
3956       return x;
3957     }
3958
3959   /* We wish to handle large displacements off a base register by splitting
3960      the addend across an add and the mem insn.  This can cut the number of
3961      extra insns needed from 3 to 1.  It is only useful for load/store of a
3962      single register with 12 bit offset field.  */
3963   if (GET_CODE (x) == PLUS
3964       && REG_P (XEXP (x, 0))
3965       && CONST_INT_P (XEXP (x, 1))
3966       && HARD_REGISTER_P (XEXP (x, 0))
3967       && mode != TImode
3968       && mode != TFmode
3969       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3970     {
3971       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3972       HOST_WIDE_INT low = val & 0xfff;
3973       HOST_WIDE_INT high = val - low;
3974       HOST_WIDE_INT offs;
3975       rtx cst;
3976       enum machine_mode xmode = GET_MODE (x);
3977
3978       /* In ILP32, xmode can be either DImode or SImode.  */
3979       gcc_assert (xmode == DImode || xmode == SImode);
3980
3981       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3982          BLKmode alignment.  */
3983       if (GET_MODE_SIZE (mode) == 0)
3984         return NULL_RTX;
3985
3986       offs = low % GET_MODE_SIZE (mode);
3987
3988       /* Align misaligned offset by adjusting high part to compensate.  */
3989       if (offs != 0)
3990         {
3991           if (aarch64_uimm12_shift (high + offs))
3992             {
3993               /* Align down.  */
3994               low = low - offs;
3995               high = high + offs;
3996             }
3997           else
3998             {
3999               /* Align up.  */
4000               offs = GET_MODE_SIZE (mode) - offs;
4001               low = low + offs;
4002               high = high + (low & 0x1000) - offs;
4003               low &= 0xfff;
4004             }
4005         }
4006
4007       /* Check for overflow.  */
4008       if (high + low != val)
4009         return NULL_RTX;
4010
4011       cst = GEN_INT (high);
4012       if (!aarch64_uimm12_shift (high))
4013         cst = force_const_mem (xmode, cst);
4014
4015       /* Reload high part into base reg, leaving the low part
4016          in the mem instruction.  */
4017       x = plus_constant (xmode,
4018                          gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4019                          low);
4020
4021       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4022                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4023                    opnum, (enum reload_type) type);
4024       return x;
4025     }
4026
4027   return NULL_RTX;
4028 }
4029
4030
4031 static reg_class_t
4032 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4033                           reg_class_t rclass,
4034                           enum machine_mode mode,
4035                           secondary_reload_info *sri)
4036 {
4037   /* Address expressions of the form PLUS (SP, large_offset) need two
4038      scratch registers, one for the constant, and one for holding a
4039      copy of SP, since SP cannot be used on the RHS of an add-reg
4040      instruction.  */
4041   if (mode == DImode
4042       && GET_CODE (x) == PLUS
4043       && XEXP (x, 0) == stack_pointer_rtx
4044       && CONST_INT_P (XEXP (x, 1))
4045       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
4046     {
4047       sri->icode = CODE_FOR_reload_sp_immediate;
4048       return NO_REGS;
4049     }
4050
4051   /* Without the TARGET_SIMD instructions we cannot move a Q register
4052      to a Q register directly.  We need a scratch.  */
4053   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4054       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4055       && reg_class_subset_p (rclass, FP_REGS))
4056     {
4057       if (mode == TFmode)
4058         sri->icode = CODE_FOR_aarch64_reload_movtf;
4059       else if (mode == TImode)
4060         sri->icode = CODE_FOR_aarch64_reload_movti;
4061       return NO_REGS;
4062     }
4063
4064   /* A TFmode or TImode memory access should be handled via an FP_REGS
4065      because AArch64 has richer addressing modes for LDR/STR instructions
4066      than LDP/STP instructions.  */
4067   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4068       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4069     return FP_REGS;
4070
4071   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4072       return CORE_REGS;
4073
4074   return NO_REGS;
4075 }
4076
4077 static bool
4078 aarch64_can_eliminate (const int from, const int to)
4079 {
4080   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4081      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4082
4083   if (frame_pointer_needed)
4084     {
4085       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4086         return true;
4087       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4088         return false;
4089       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4090           && !cfun->calls_alloca)
4091         return true;
4092       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4093         return true;
4094     return false;
4095     }
4096   else
4097     {
4098       /* If we decided that we didn't need a leaf frame pointer but then used
4099          LR in the function, then we'll want a frame pointer after all, so
4100          prevent this elimination to ensure a frame pointer is used.
4101
4102          NOTE: the original value of flag_omit_frame_pointer gets trashed
4103          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4104          of faked_omit_frame_pointer here (which is true when we always
4105          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4106          pointers when LR is clobbered).  */
4107       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4108           && df_regs_ever_live_p (LR_REGNUM)
4109           && faked_omit_frame_pointer)
4110         return false;
4111     }
4112
4113   return true;
4114 }
4115
4116 HOST_WIDE_INT
4117 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4118 {
4119   HOST_WIDE_INT frame_size;
4120   HOST_WIDE_INT offset;
4121
4122   aarch64_layout_frame ();
4123   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4124                 + crtl->outgoing_args_size
4125                 + cfun->machine->saved_varargs_size);
4126
4127    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4128    offset = frame_size;
4129
4130    if (to == HARD_FRAME_POINTER_REGNUM)
4131      {
4132        if (from == ARG_POINTER_REGNUM)
4133          return offset - crtl->outgoing_args_size;
4134
4135        if (from == FRAME_POINTER_REGNUM)
4136          return cfun->machine->frame.saved_regs_size;
4137      }
4138
4139    if (to == STACK_POINTER_REGNUM)
4140      {
4141        if (from == FRAME_POINTER_REGNUM)
4142          {
4143            HOST_WIDE_INT elim = crtl->outgoing_args_size
4144                               + cfun->machine->frame.saved_regs_size
4145                               - cfun->machine->frame.fp_lr_offset;
4146            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4147            return elim;
4148          }
4149      }
4150
4151    return offset;
4152 }
4153
4154
4155 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4156    previous frame.  */
4157
4158 rtx
4159 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4160 {
4161   if (count != 0)
4162     return const0_rtx;
4163   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4164 }
4165
4166
4167 static void
4168 aarch64_asm_trampoline_template (FILE *f)
4169 {
4170   if (TARGET_ILP32)
4171     {
4172       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4173       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4174     }
4175   else
4176     {
4177       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4178       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4179     }
4180   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4181   assemble_aligned_integer (4, const0_rtx);
4182   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4183   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4184 }
4185
4186 static void
4187 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4188 {
4189   rtx fnaddr, mem, a_tramp;
4190   const int tramp_code_sz = 16;
4191
4192   /* Don't need to copy the trailing D-words, we fill those in below.  */
4193   emit_block_move (m_tramp, assemble_trampoline_template (),
4194                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4195   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4196   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4197   if (GET_MODE (fnaddr) != ptr_mode)
4198     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4199   emit_move_insn (mem, fnaddr);
4200
4201   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4202   emit_move_insn (mem, chain_value);
4203
4204   /* XXX We should really define a "clear_cache" pattern and use
4205      gen_clear_cache().  */
4206   a_tramp = XEXP (m_tramp, 0);
4207   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4208                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4209                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4210                      ptr_mode);
4211 }
4212
4213 static unsigned char
4214 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4215 {
4216   switch (regclass)
4217     {
4218     case CORE_REGS:
4219     case POINTER_REGS:
4220     case GENERAL_REGS:
4221     case ALL_REGS:
4222     case FP_REGS:
4223     case FP_LO_REGS:
4224       return
4225         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4226                                        (GET_MODE_SIZE (mode) + 7) / 8;
4227     case STACK_REG:
4228       return 1;
4229
4230     case NO_REGS:
4231       return 0;
4232
4233     default:
4234       break;
4235     }
4236   gcc_unreachable ();
4237 }
4238
4239 static reg_class_t
4240 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4241 {
4242   if (regclass == POINTER_REGS || regclass == STACK_REG)
4243     return GENERAL_REGS;
4244
4245   /* If it's an integer immediate that MOVI can't handle, then
4246      FP_REGS is not an option, so we return NO_REGS instead.  */
4247   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4248       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4249     return NO_REGS;
4250
4251   return regclass;
4252 }
4253
4254 void
4255 aarch64_asm_output_labelref (FILE* f, const char *name)
4256 {
4257   asm_fprintf (f, "%U%s", name);
4258 }
4259
4260 static void
4261 aarch64_elf_asm_constructor (rtx symbol, int priority)
4262 {
4263   if (priority == DEFAULT_INIT_PRIORITY)
4264     default_ctor_section_asm_out_constructor (symbol, priority);
4265   else
4266     {
4267       section *s;
4268       char buf[18];
4269       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4270       s = get_section (buf, SECTION_WRITE, NULL);
4271       switch_to_section (s);
4272       assemble_align (POINTER_SIZE);
4273       assemble_aligned_integer (POINTER_BYTES, symbol);
4274     }
4275 }
4276
4277 static void
4278 aarch64_elf_asm_destructor (rtx symbol, int priority)
4279 {
4280   if (priority == DEFAULT_INIT_PRIORITY)
4281     default_dtor_section_asm_out_destructor (symbol, priority);
4282   else
4283     {
4284       section *s;
4285       char buf[18];
4286       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4287       s = get_section (buf, SECTION_WRITE, NULL);
4288       switch_to_section (s);
4289       assemble_align (POINTER_SIZE);
4290       assemble_aligned_integer (POINTER_BYTES, symbol);
4291     }
4292 }
4293
4294 const char*
4295 aarch64_output_casesi (rtx *operands)
4296 {
4297   char buf[100];
4298   char label[100];
4299   rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4300   int index;
4301   static const char *const patterns[4][2] =
4302   {
4303     {
4304       "ldrb\t%w3, [%0,%w1,uxtw]",
4305       "add\t%3, %4, %w3, sxtb #2"
4306     },
4307     {
4308       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4309       "add\t%3, %4, %w3, sxth #2"
4310     },
4311     {
4312       "ldr\t%w3, [%0,%w1,uxtw #2]",
4313       "add\t%3, %4, %w3, sxtw #2"
4314     },
4315     /* We assume that DImode is only generated when not optimizing and
4316        that we don't really need 64-bit address offsets.  That would
4317        imply an object file with 8GB of code in a single function!  */
4318     {
4319       "ldr\t%w3, [%0,%w1,uxtw #2]",
4320       "add\t%3, %4, %w3, sxtw #2"
4321     }
4322   };
4323
4324   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4325
4326   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4327
4328   gcc_assert (index >= 0 && index <= 3);
4329
4330   /* Need to implement table size reduction, by chaning the code below.  */
4331   output_asm_insn (patterns[index][0], operands);
4332   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4333   snprintf (buf, sizeof (buf),
4334             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4335   output_asm_insn (buf, operands);
4336   output_asm_insn (patterns[index][1], operands);
4337   output_asm_insn ("br\t%3", operands);
4338   assemble_label (asm_out_file, label);
4339   return "";
4340 }
4341
4342
4343 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4344    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4345    operator.  */
4346
4347 int
4348 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4349 {
4350   if (shift >= 0 && shift <= 3)
4351     {
4352       int size;
4353       for (size = 8; size <= 32; size *= 2)
4354         {
4355           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4356           if (mask == bits << shift)
4357             return size;
4358         }
4359     }
4360   return 0;
4361 }
4362
4363 static bool
4364 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4365                                    const_rtx x ATTRIBUTE_UNUSED)
4366 {
4367   /* We can't use blocks for constants when we're using a per-function
4368      constant pool.  */
4369   return false;
4370 }
4371
4372 static section *
4373 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4374                             rtx x ATTRIBUTE_UNUSED,
4375                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4376 {
4377   /* Force all constant pool entries into the current function section.  */
4378   return function_section (current_function_decl);
4379 }
4380
4381
4382 /* Costs.  */
4383
4384 /* Helper function for rtx cost calculation.  Strip a shift expression
4385    from X.  Returns the inner operand if successful, or the original
4386    expression on failure.  */
4387 static rtx
4388 aarch64_strip_shift (rtx x)
4389 {
4390   rtx op = x;
4391
4392   if ((GET_CODE (op) == ASHIFT
4393        || GET_CODE (op) == ASHIFTRT
4394        || GET_CODE (op) == LSHIFTRT)
4395       && CONST_INT_P (XEXP (op, 1)))
4396     return XEXP (op, 0);
4397
4398   if (GET_CODE (op) == MULT
4399       && CONST_INT_P (XEXP (op, 1))
4400       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4401     return XEXP (op, 0);
4402
4403   return x;
4404 }
4405
4406 /* Helper function for rtx cost calculation.  Strip a shift or extend
4407    expression from X.  Returns the inner operand if successful, or the
4408    original expression on failure.  We deal with a number of possible
4409    canonicalization variations here.  */
4410 static rtx
4411 aarch64_strip_shift_or_extend (rtx x)
4412 {
4413   rtx op = x;
4414
4415   /* Zero and sign extraction of a widened value.  */
4416   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4417       && XEXP (op, 2) == const0_rtx
4418       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4419                                          XEXP (op, 1)))
4420     return XEXP (XEXP (op, 0), 0);
4421
4422   /* It can also be represented (for zero-extend) as an AND with an
4423      immediate.  */
4424   if (GET_CODE (op) == AND
4425       && GET_CODE (XEXP (op, 0)) == MULT
4426       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4427       && CONST_INT_P (XEXP (op, 1))
4428       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4429                            INTVAL (XEXP (op, 1))) != 0)
4430     return XEXP (XEXP (op, 0), 0);
4431
4432   /* Now handle extended register, as this may also have an optional
4433      left shift by 1..4.  */
4434   if (GET_CODE (op) == ASHIFT
4435       && CONST_INT_P (XEXP (op, 1))
4436       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4437     op = XEXP (op, 0);
4438
4439   if (GET_CODE (op) == ZERO_EXTEND
4440       || GET_CODE (op) == SIGN_EXTEND)
4441     op = XEXP (op, 0);
4442
4443   if (op != x)
4444     return op;
4445
4446   return aarch64_strip_shift (x);
4447 }
4448
4449 /* Calculate the cost of calculating X, storing it in *COST.  Result
4450    is true if the total cost of the operation has now been calculated.  */
4451 static bool
4452 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4453                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4454 {
4455   rtx op0, op1;
4456   const struct cpu_rtx_cost_table *extra_cost
4457     = aarch64_tune_params->insn_extra_cost;
4458
4459   switch (code)
4460     {
4461     case SET:
4462       op0 = SET_DEST (x);
4463       op1 = SET_SRC (x);
4464
4465       switch (GET_CODE (op0))
4466         {
4467         case MEM:
4468           if (speed)
4469             *cost += extra_cost->memory_store;
4470
4471           if (op1 != const0_rtx)
4472             *cost += rtx_cost (op1, SET, 1, speed);
4473           return true;
4474
4475         case SUBREG:
4476           if (! REG_P (SUBREG_REG (op0)))
4477             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4478           /* Fall through.  */
4479         case REG:
4480           /* Cost is just the cost of the RHS of the set.  */
4481           *cost += rtx_cost (op1, SET, 1, true);
4482           return true;
4483
4484         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4485         case SIGN_EXTRACT:
4486           /* Strip any redundant widening of the RHS to meet the width of
4487              the target.  */
4488           if (GET_CODE (op1) == SUBREG)
4489             op1 = SUBREG_REG (op1);
4490           if ((GET_CODE (op1) == ZERO_EXTEND
4491                || GET_CODE (op1) == SIGN_EXTEND)
4492               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4493               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4494                   >= INTVAL (XEXP (op0, 1))))
4495             op1 = XEXP (op1, 0);
4496           *cost += rtx_cost (op1, SET, 1, speed);
4497           return true;
4498
4499         default:
4500           break;
4501         }
4502       return false;
4503
4504     case MEM:
4505       if (speed)
4506         *cost += extra_cost->memory_load;
4507
4508       return true;
4509
4510     case NEG:
4511       op0 = CONST0_RTX (GET_MODE (x));
4512       op1 = XEXP (x, 0);
4513       goto cost_minus;
4514
4515     case COMPARE:
4516       op0 = XEXP (x, 0);
4517       op1 = XEXP (x, 1);
4518
4519       if (op1 == const0_rtx
4520           && GET_CODE (op0) == AND)
4521         {
4522           x = op0;
4523           goto cost_logic;
4524         }
4525
4526       /* Comparisons can work if the order is swapped.
4527          Canonicalization puts the more complex operation first, but
4528          we want it in op1.  */
4529       if (! (REG_P (op0)
4530              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4531         {
4532           op0 = XEXP (x, 1);
4533           op1 = XEXP (x, 0);
4534         }
4535       goto cost_minus;
4536
4537     case MINUS:
4538       op0 = XEXP (x, 0);
4539       op1 = XEXP (x, 1);
4540
4541     cost_minus:
4542       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4543           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4544               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4545         {
4546           if (op0 != const0_rtx)
4547             *cost += rtx_cost (op0, MINUS, 0, speed);
4548
4549           if (CONST_INT_P (op1))
4550             {
4551               if (!aarch64_uimm12_shift (INTVAL (op1)))
4552                 *cost += rtx_cost (op1, MINUS, 1, speed);
4553             }
4554           else
4555             {
4556               op1 = aarch64_strip_shift_or_extend (op1);
4557               *cost += rtx_cost (op1, MINUS, 1, speed);
4558             }
4559           return true;
4560         }
4561
4562       return false;
4563
4564     case PLUS:
4565       op0 = XEXP (x, 0);
4566       op1 = XEXP (x, 1);
4567
4568       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4569         {
4570           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4571             {
4572               *cost += rtx_cost (op0, PLUS, 0, speed);
4573             }
4574           else
4575             {
4576               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4577
4578               if (new_op0 == op0
4579                   && GET_CODE (op0) == MULT)
4580                 {
4581                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4582                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4583                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4584                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4585                     {
4586                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4587                                           speed)
4588                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4589                                             speed)
4590                                 + rtx_cost (op1, PLUS, 1, speed));
4591                       if (speed)
4592                         *cost += extra_cost->int_multiply_extend_add;
4593                       return true;
4594                     }
4595                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4596                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4597                             + rtx_cost (op1, PLUS, 1, speed));
4598
4599                   if (speed)
4600                     *cost += extra_cost->int_multiply_add;
4601                 }
4602
4603               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4604                         + rtx_cost (op1, PLUS, 1, speed));
4605             }
4606           return true;
4607         }
4608
4609       return false;
4610
4611     case IOR:
4612     case XOR:
4613     case AND:
4614     cost_logic:
4615       op0 = XEXP (x, 0);
4616       op1 = XEXP (x, 1);
4617
4618       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4619         {
4620           if (CONST_INT_P (op1)
4621               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4622             {
4623               *cost += rtx_cost (op0, AND, 0, speed);
4624             }
4625           else
4626             {
4627               if (GET_CODE (op0) == NOT)
4628                 op0 = XEXP (op0, 0);
4629               op0 = aarch64_strip_shift (op0);
4630               *cost += (rtx_cost (op0, AND, 0, speed)
4631                         + rtx_cost (op1, AND, 1, speed));
4632             }
4633           return true;
4634         }
4635       return false;
4636
4637     case ZERO_EXTEND:
4638       if ((GET_MODE (x) == DImode
4639            && GET_MODE (XEXP (x, 0)) == SImode)
4640           || GET_CODE (XEXP (x, 0)) == MEM)
4641         {
4642           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4643           return true;
4644         }
4645       return false;
4646
4647     case SIGN_EXTEND:
4648       if (GET_CODE (XEXP (x, 0)) == MEM)
4649         {
4650           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4651           return true;
4652         }
4653       return false;
4654
4655     case ROTATE:
4656       if (!CONST_INT_P (XEXP (x, 1)))
4657         *cost += COSTS_N_INSNS (2);
4658       /* Fall through.  */
4659     case ROTATERT:
4660     case LSHIFTRT:
4661     case ASHIFT:
4662     case ASHIFTRT:
4663
4664       /* Shifting by a register often takes an extra cycle.  */
4665       if (speed && !CONST_INT_P (XEXP (x, 1)))
4666         *cost += extra_cost->register_shift;
4667
4668       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4669       return true;
4670
4671     case HIGH:
4672       if (!CONSTANT_P (XEXP (x, 0)))
4673         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4674       return true;
4675
4676     case LO_SUM:
4677       if (!CONSTANT_P (XEXP (x, 1)))
4678         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4679       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4680       return true;
4681
4682     case ZERO_EXTRACT:
4683     case SIGN_EXTRACT:
4684       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4685       return true;
4686
4687     case MULT:
4688       op0 = XEXP (x, 0);
4689       op1 = XEXP (x, 1);
4690
4691       *cost = COSTS_N_INSNS (1);
4692       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4693         {
4694           if (CONST_INT_P (op1)
4695               && exact_log2 (INTVAL (op1)) > 0)
4696             {
4697               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4698               return true;
4699             }
4700
4701           if ((GET_CODE (op0) == ZERO_EXTEND
4702                && GET_CODE (op1) == ZERO_EXTEND)
4703               || (GET_CODE (op0) == SIGN_EXTEND
4704                   && GET_CODE (op1) == SIGN_EXTEND))
4705             {
4706               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4707                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4708               if (speed)
4709                 *cost += extra_cost->int_multiply_extend;
4710               return true;
4711             }
4712
4713           if (speed)
4714             *cost += extra_cost->int_multiply;
4715         }
4716       else if (speed)
4717         {
4718           if (GET_MODE (x) == DFmode)
4719             *cost += extra_cost->double_multiply;
4720           else if (GET_MODE (x) == SFmode)
4721             *cost += extra_cost->float_multiply;
4722         }
4723
4724       return false;  /* All arguments need to be in registers.  */
4725
4726     case MOD:
4727     case UMOD:
4728       *cost = COSTS_N_INSNS (2);
4729       if (speed)
4730         {
4731           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4732             *cost += (extra_cost->int_multiply_add
4733                       + extra_cost->int_divide);
4734           else if (GET_MODE (x) == DFmode)
4735             *cost += (extra_cost->double_multiply
4736                       + extra_cost->double_divide);
4737           else if (GET_MODE (x) == SFmode)
4738             *cost += (extra_cost->float_multiply
4739                       + extra_cost->float_divide);
4740         }
4741       return false;  /* All arguments need to be in registers.  */
4742
4743     case DIV:
4744     case UDIV:
4745       *cost = COSTS_N_INSNS (1);
4746       if (speed)
4747         {
4748           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4749             *cost += extra_cost->int_divide;
4750           else if (GET_MODE (x) == DFmode)
4751             *cost += extra_cost->double_divide;
4752           else if (GET_MODE (x) == SFmode)
4753             *cost += extra_cost->float_divide;
4754         }
4755       return false;  /* All arguments need to be in registers.  */
4756
4757     default:
4758       break;
4759     }
4760   return false;
4761 }
4762
4763 static int
4764 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4765                   enum machine_mode mode ATTRIBUTE_UNUSED,
4766                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4767 {
4768   enum rtx_code c  = GET_CODE (x);
4769   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4770
4771   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4772     return addr_cost->pre_modify;
4773
4774   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4775     return addr_cost->post_modify;
4776
4777   if (c == PLUS)
4778     {
4779       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4780         return addr_cost->imm_offset;
4781       else if (GET_CODE (XEXP (x, 0)) == MULT
4782                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4783                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4784         return addr_cost->register_extend;
4785
4786       return addr_cost->register_offset;
4787     }
4788   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4789     return addr_cost->imm_offset;
4790
4791   return 0;
4792 }
4793
4794 static int
4795 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4796                             reg_class_t from, reg_class_t to)
4797 {
4798   const struct cpu_regmove_cost *regmove_cost
4799     = aarch64_tune_params->regmove_cost;
4800
4801   if (from == GENERAL_REGS && to == GENERAL_REGS)
4802     return regmove_cost->GP2GP;
4803   else if (from == GENERAL_REGS)
4804     return regmove_cost->GP2FP;
4805   else if (to == GENERAL_REGS)
4806     return regmove_cost->FP2GP;
4807
4808   /* When AdvSIMD instructions are disabled it is not possible to move
4809      a 128-bit value directly between Q registers.  This is handled in
4810      secondary reload.  A general register is used as a scratch to move
4811      the upper DI value and the lower DI value is moved directly,
4812      hence the cost is the sum of three moves. */
4813
4814   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4815     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4816
4817   return regmove_cost->FP2FP;
4818 }
4819
4820 static int
4821 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4822                           reg_class_t rclass ATTRIBUTE_UNUSED,
4823                           bool in ATTRIBUTE_UNUSED)
4824 {
4825   return aarch64_tune_params->memmov_cost;
4826 }
4827
4828 /* Vectorizer cost model target hooks.  */
4829
4830 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4831 static int
4832 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4833                                     tree vectype,
4834                                     int misalign ATTRIBUTE_UNUSED)
4835 {
4836   unsigned elements;
4837
4838   switch (type_of_cost)
4839     {
4840       case scalar_stmt:
4841         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4842
4843       case scalar_load:
4844         return aarch64_tune_params->vec_costs->scalar_load_cost;
4845
4846       case scalar_store:
4847         return aarch64_tune_params->vec_costs->scalar_store_cost;
4848
4849       case vector_stmt:
4850         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4851
4852       case vector_load:
4853         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4854
4855       case vector_store:
4856         return aarch64_tune_params->vec_costs->vec_store_cost;
4857
4858       case vec_to_scalar:
4859         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4860
4861       case scalar_to_vec:
4862         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4863
4864       case unaligned_load:
4865         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4866
4867       case unaligned_store:
4868         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4869
4870       case cond_branch_taken:
4871         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4872
4873       case cond_branch_not_taken:
4874         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4875
4876       case vec_perm:
4877       case vec_promote_demote:
4878         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4879
4880       case vec_construct:
4881         elements = TYPE_VECTOR_SUBPARTS (vectype);
4882         return elements / 2 + 1;
4883
4884       default:
4885         gcc_unreachable ();
4886     }
4887 }
4888
4889 /* Implement targetm.vectorize.add_stmt_cost.  */
4890 static unsigned
4891 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4892                        struct _stmt_vec_info *stmt_info, int misalign,
4893                        enum vect_cost_model_location where)
4894 {
4895   unsigned *cost = (unsigned *) data;
4896   unsigned retval = 0;
4897
4898   if (flag_vect_cost_model)
4899     {
4900       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4901       int stmt_cost =
4902             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4903
4904       /* Statements in an inner loop relative to the loop being
4905          vectorized are weighted more heavily.  The value here is
4906          a function (linear for now) of the loop nest level.  */
4907       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4908         {
4909           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4910           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4911           unsigned nest_level = loop_depth (loop);
4912
4913           count *= nest_level;
4914         }
4915
4916       retval = (unsigned) (count * stmt_cost);
4917       cost[where] += retval;
4918     }
4919
4920   return retval;
4921 }
4922
4923 static void initialize_aarch64_code_model (void);
4924
4925 /* Parse the architecture extension string.  */
4926
4927 static void
4928 aarch64_parse_extension (char *str)
4929 {
4930   /* The extension string is parsed left to right.  */
4931   const struct aarch64_option_extension *opt = NULL;
4932
4933   /* Flag to say whether we are adding or removing an extension.  */
4934   int adding_ext = -1;
4935
4936   while (str != NULL && *str != 0)
4937     {
4938       char *ext;
4939       size_t len;
4940
4941       str++;
4942       ext = strchr (str, '+');
4943
4944       if (ext != NULL)
4945         len = ext - str;
4946       else
4947         len = strlen (str);
4948
4949       if (len >= 2 && strncmp (str, "no", 2) == 0)
4950         {
4951           adding_ext = 0;
4952           len -= 2;
4953           str += 2;
4954         }
4955       else if (len > 0)
4956         adding_ext = 1;
4957
4958       if (len == 0)
4959         {
4960           error ("missing feature modifier after %qs", "+no");
4961           return;
4962         }
4963
4964       /* Scan over the extensions table trying to find an exact match.  */
4965       for (opt = all_extensions; opt->name != NULL; opt++)
4966         {
4967           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4968             {
4969               /* Add or remove the extension.  */
4970               if (adding_ext)
4971                 aarch64_isa_flags |= opt->flags_on;
4972               else
4973                 aarch64_isa_flags &= ~(opt->flags_off);
4974               break;
4975             }
4976         }
4977
4978       if (opt->name == NULL)
4979         {
4980           /* Extension not found in list.  */
4981           error ("unknown feature modifier %qs", str);
4982           return;
4983         }
4984
4985       str = ext;
4986     };
4987
4988   return;
4989 }
4990
4991 /* Parse the ARCH string.  */
4992
4993 static void
4994 aarch64_parse_arch (void)
4995 {
4996   char *ext;
4997   const struct processor *arch;
4998   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4999   size_t len;
5000
5001   strcpy (str, aarch64_arch_string);
5002
5003   ext = strchr (str, '+');
5004
5005   if (ext != NULL)
5006     len = ext - str;
5007   else
5008     len = strlen (str);
5009
5010   if (len == 0)
5011     {
5012       error ("missing arch name in -march=%qs", str);
5013       return;
5014     }
5015
5016   /* Loop through the list of supported ARCHs to find a match.  */
5017   for (arch = all_architectures; arch->name != NULL; arch++)
5018     {
5019       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5020         {
5021           selected_arch = arch;
5022           aarch64_isa_flags = selected_arch->flags;
5023           selected_cpu = &all_cores[selected_arch->core];
5024
5025           if (ext != NULL)
5026             {
5027               /* ARCH string contains at least one extension.  */
5028               aarch64_parse_extension (ext);
5029             }
5030
5031           return;
5032         }
5033     }
5034
5035   /* ARCH name not found in list.  */
5036   error ("unknown value %qs for -march", str);
5037   return;
5038 }
5039
5040 /* Parse the CPU string.  */
5041
5042 static void
5043 aarch64_parse_cpu (void)
5044 {
5045   char *ext;
5046   const struct processor *cpu;
5047   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5048   size_t len;
5049
5050   strcpy (str, aarch64_cpu_string);
5051
5052   ext = strchr (str, '+');
5053
5054   if (ext != NULL)
5055     len = ext - str;
5056   else
5057     len = strlen (str);
5058
5059   if (len == 0)
5060     {
5061       error ("missing cpu name in -mcpu=%qs", str);
5062       return;
5063     }
5064
5065   /* Loop through the list of supported CPUs to find a match.  */
5066   for (cpu = all_cores; cpu->name != NULL; cpu++)
5067     {
5068       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5069         {
5070           selected_cpu = cpu;
5071           aarch64_isa_flags = selected_cpu->flags;
5072
5073           if (ext != NULL)
5074             {
5075               /* CPU string contains at least one extension.  */
5076               aarch64_parse_extension (ext);
5077             }
5078
5079           return;
5080         }
5081     }
5082
5083   /* CPU name not found in list.  */
5084   error ("unknown value %qs for -mcpu", str);
5085   return;
5086 }
5087
5088 /* Parse the TUNE string.  */
5089
5090 static void
5091 aarch64_parse_tune (void)
5092 {
5093   const struct processor *cpu;
5094   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5095   strcpy (str, aarch64_tune_string);
5096
5097   /* Loop through the list of supported CPUs to find a match.  */
5098   for (cpu = all_cores; cpu->name != NULL; cpu++)
5099     {
5100       if (strcmp (cpu->name, str) == 0)
5101         {
5102           selected_tune = cpu;
5103           return;
5104         }
5105     }
5106
5107   /* CPU name not found in list.  */
5108   error ("unknown value %qs for -mtune", str);
5109   return;
5110 }
5111
5112
5113 /* Implement TARGET_OPTION_OVERRIDE.  */
5114
5115 static void
5116 aarch64_override_options (void)
5117 {
5118   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5119      otherwise march remains undefined.  mtune can be used with either march or
5120      mcpu.  */
5121
5122   if (aarch64_arch_string)
5123     {
5124       aarch64_parse_arch ();
5125       aarch64_cpu_string = NULL;
5126     }
5127
5128   if (aarch64_cpu_string)
5129     {
5130       aarch64_parse_cpu ();
5131       selected_arch = NULL;
5132     }
5133
5134   if (aarch64_tune_string)
5135     {
5136       aarch64_parse_tune ();
5137     }
5138
5139   initialize_aarch64_code_model ();
5140
5141   aarch64_build_bitmask_table ();
5142
5143   /* This target defaults to strict volatile bitfields.  */
5144   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5145     flag_strict_volatile_bitfields = 1;
5146
5147   /* If the user did not specify a processor, choose the default
5148      one for them.  This will be the CPU set during configuration using
5149      --with-cpu, otherwise it is "generic".  */
5150   if (!selected_cpu)
5151     {
5152       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5153       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5154     }
5155
5156   gcc_assert (selected_cpu);
5157
5158   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5159   if (!selected_tune)
5160     selected_tune = &all_cores[selected_cpu->core];
5161
5162   aarch64_tune_flags = selected_tune->flags;
5163   aarch64_tune = selected_tune->core;
5164   aarch64_tune_params = selected_tune->tune;
5165
5166   aarch64_override_options_after_change ();
5167 }
5168
5169 /* Implement targetm.override_options_after_change.  */
5170
5171 static void
5172 aarch64_override_options_after_change (void)
5173 {
5174   faked_omit_frame_pointer = false;
5175
5176   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5177      that aarch64_frame_pointer_required will be called.  We need to remember
5178      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5179
5180   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5181     {
5182       flag_omit_frame_pointer = true;
5183       faked_omit_frame_pointer = true;
5184     }
5185 }
5186
5187 static struct machine_function *
5188 aarch64_init_machine_status (void)
5189 {
5190   struct machine_function *machine;
5191   machine = ggc_alloc_cleared_machine_function ();
5192   return machine;
5193 }
5194
5195 void
5196 aarch64_init_expanders (void)
5197 {
5198   init_machine_status = aarch64_init_machine_status;
5199 }
5200
5201 /* A checking mechanism for the implementation of the various code models.  */
5202 static void
5203 initialize_aarch64_code_model (void)
5204 {
5205    if (flag_pic)
5206      {
5207        switch (aarch64_cmodel_var)
5208          {
5209          case AARCH64_CMODEL_TINY:
5210            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5211            break;
5212          case AARCH64_CMODEL_SMALL:
5213            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5214            break;
5215          case AARCH64_CMODEL_LARGE:
5216            sorry ("code model %qs with -f%s", "large",
5217                   flag_pic > 1 ? "PIC" : "pic");
5218          default:
5219            gcc_unreachable ();
5220          }
5221      }
5222    else
5223      aarch64_cmodel = aarch64_cmodel_var;
5224 }
5225
5226 /* Return true if SYMBOL_REF X binds locally.  */
5227
5228 static bool
5229 aarch64_symbol_binds_local_p (const_rtx x)
5230 {
5231   return (SYMBOL_REF_DECL (x)
5232           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5233           : SYMBOL_REF_LOCAL_P (x));
5234 }
5235
5236 /* Return true if SYMBOL_REF X is thread local */
5237 static bool
5238 aarch64_tls_symbol_p (rtx x)
5239 {
5240   if (! TARGET_HAVE_TLS)
5241     return false;
5242
5243   if (GET_CODE (x) != SYMBOL_REF)
5244     return false;
5245
5246   return SYMBOL_REF_TLS_MODEL (x) != 0;
5247 }
5248
5249 /* Classify a TLS symbol into one of the TLS kinds.  */
5250 enum aarch64_symbol_type
5251 aarch64_classify_tls_symbol (rtx x)
5252 {
5253   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5254
5255   switch (tls_kind)
5256     {
5257     case TLS_MODEL_GLOBAL_DYNAMIC:
5258     case TLS_MODEL_LOCAL_DYNAMIC:
5259       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5260
5261     case TLS_MODEL_INITIAL_EXEC:
5262       return SYMBOL_SMALL_GOTTPREL;
5263
5264     case TLS_MODEL_LOCAL_EXEC:
5265       return SYMBOL_SMALL_TPREL;
5266
5267     case TLS_MODEL_EMULATED:
5268     case TLS_MODEL_NONE:
5269       return SYMBOL_FORCE_TO_MEM;
5270
5271     default:
5272       gcc_unreachable ();
5273     }
5274 }
5275
5276 /* Return the method that should be used to access SYMBOL_REF or
5277    LABEL_REF X in context CONTEXT.  */
5278
5279 enum aarch64_symbol_type
5280 aarch64_classify_symbol (rtx x,
5281                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5282 {
5283   if (GET_CODE (x) == LABEL_REF)
5284     {
5285       switch (aarch64_cmodel)
5286         {
5287         case AARCH64_CMODEL_LARGE:
5288           return SYMBOL_FORCE_TO_MEM;
5289
5290         case AARCH64_CMODEL_TINY_PIC:
5291         case AARCH64_CMODEL_TINY:
5292           return SYMBOL_TINY_ABSOLUTE;
5293
5294         case AARCH64_CMODEL_SMALL_PIC:
5295         case AARCH64_CMODEL_SMALL:
5296           return SYMBOL_SMALL_ABSOLUTE;
5297
5298         default:
5299           gcc_unreachable ();
5300         }
5301     }
5302
5303   if (GET_CODE (x) == SYMBOL_REF)
5304     {
5305       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5306           || CONSTANT_POOL_ADDRESS_P (x))
5307         return SYMBOL_FORCE_TO_MEM;
5308
5309       if (aarch64_tls_symbol_p (x))
5310         return aarch64_classify_tls_symbol (x);
5311
5312       switch (aarch64_cmodel)
5313         {
5314         case AARCH64_CMODEL_TINY:
5315           if (SYMBOL_REF_WEAK (x))
5316             return SYMBOL_FORCE_TO_MEM;
5317           return SYMBOL_TINY_ABSOLUTE;
5318
5319         case AARCH64_CMODEL_SMALL:
5320           if (SYMBOL_REF_WEAK (x))
5321             return SYMBOL_FORCE_TO_MEM;
5322           return SYMBOL_SMALL_ABSOLUTE;
5323
5324         case AARCH64_CMODEL_TINY_PIC:
5325           if (!aarch64_symbol_binds_local_p (x))
5326             return SYMBOL_TINY_GOT;
5327           return SYMBOL_TINY_ABSOLUTE;
5328
5329         case AARCH64_CMODEL_SMALL_PIC:
5330           if (!aarch64_symbol_binds_local_p (x))
5331             return SYMBOL_SMALL_GOT;
5332           return SYMBOL_SMALL_ABSOLUTE;
5333
5334         default:
5335           gcc_unreachable ();
5336         }
5337     }
5338
5339   /* By default push everything into the constant pool.  */
5340   return SYMBOL_FORCE_TO_MEM;
5341 }
5342
5343 bool
5344 aarch64_constant_address_p (rtx x)
5345 {
5346   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5347 }
5348
5349 bool
5350 aarch64_legitimate_pic_operand_p (rtx x)
5351 {
5352   if (GET_CODE (x) == SYMBOL_REF
5353       || (GET_CODE (x) == CONST
5354           && GET_CODE (XEXP (x, 0)) == PLUS
5355           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5356      return false;
5357
5358   return true;
5359 }
5360
5361 /* Return true if X holds either a quarter-precision or
5362      floating-point +0.0 constant.  */
5363 static bool
5364 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5365 {
5366   if (!CONST_DOUBLE_P (x))
5367     return false;
5368
5369   /* TODO: We could handle moving 0.0 to a TFmode register,
5370      but first we would like to refactor the movtf_aarch64
5371      to be more amicable to split moves properly and
5372      correctly gate on TARGET_SIMD.  For now - reject all
5373      constants which are not to SFmode or DFmode registers.  */
5374   if (!(mode == SFmode || mode == DFmode))
5375     return false;
5376
5377   if (aarch64_float_const_zero_rtx_p (x))
5378     return true;
5379   return aarch64_float_const_representable_p (x);
5380 }
5381
5382 static bool
5383 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5384 {
5385   /* Do not allow vector struct mode constants.  We could support
5386      0 and -1 easily, but they need support in aarch64-simd.md.  */
5387   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5388     return false;
5389
5390   /* This could probably go away because
5391      we now decompose CONST_INTs according to expand_mov_immediate.  */
5392   if ((GET_CODE (x) == CONST_VECTOR
5393        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5394       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5395         return !targetm.cannot_force_const_mem (mode, x);
5396
5397   if (GET_CODE (x) == HIGH
5398       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5399     return true;
5400
5401   return aarch64_constant_address_p (x);
5402 }
5403
5404 rtx
5405 aarch64_load_tp (rtx target)
5406 {
5407   if (!target
5408       || GET_MODE (target) != Pmode
5409       || !register_operand (target, Pmode))
5410     target = gen_reg_rtx (Pmode);
5411
5412   /* Can return in any reg.  */
5413   emit_insn (gen_aarch64_load_tp_hard (target));
5414   return target;
5415 }
5416
5417 /* On AAPCS systems, this is the "struct __va_list".  */
5418 static GTY(()) tree va_list_type;
5419
5420 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5421    Return the type to use as __builtin_va_list.
5422
5423    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5424
5425    struct __va_list
5426    {
5427      void *__stack;
5428      void *__gr_top;
5429      void *__vr_top;
5430      int   __gr_offs;
5431      int   __vr_offs;
5432    };  */
5433
5434 static tree
5435 aarch64_build_builtin_va_list (void)
5436 {
5437   tree va_list_name;
5438   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5439
5440   /* Create the type.  */
5441   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5442   /* Give it the required name.  */
5443   va_list_name = build_decl (BUILTINS_LOCATION,
5444                              TYPE_DECL,
5445                              get_identifier ("__va_list"),
5446                              va_list_type);
5447   DECL_ARTIFICIAL (va_list_name) = 1;
5448   TYPE_NAME (va_list_type) = va_list_name;
5449   TYPE_STUB_DECL (va_list_type) = va_list_name;
5450
5451   /* Create the fields.  */
5452   f_stack = build_decl (BUILTINS_LOCATION,
5453                         FIELD_DECL, get_identifier ("__stack"),
5454                         ptr_type_node);
5455   f_grtop = build_decl (BUILTINS_LOCATION,
5456                         FIELD_DECL, get_identifier ("__gr_top"),
5457                         ptr_type_node);
5458   f_vrtop = build_decl (BUILTINS_LOCATION,
5459                         FIELD_DECL, get_identifier ("__vr_top"),
5460                         ptr_type_node);
5461   f_groff = build_decl (BUILTINS_LOCATION,
5462                         FIELD_DECL, get_identifier ("__gr_offs"),
5463                         integer_type_node);
5464   f_vroff = build_decl (BUILTINS_LOCATION,
5465                         FIELD_DECL, get_identifier ("__vr_offs"),
5466                         integer_type_node);
5467
5468   DECL_ARTIFICIAL (f_stack) = 1;
5469   DECL_ARTIFICIAL (f_grtop) = 1;
5470   DECL_ARTIFICIAL (f_vrtop) = 1;
5471   DECL_ARTIFICIAL (f_groff) = 1;
5472   DECL_ARTIFICIAL (f_vroff) = 1;
5473
5474   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5475   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5476   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5477   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5478   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5479
5480   TYPE_FIELDS (va_list_type) = f_stack;
5481   DECL_CHAIN (f_stack) = f_grtop;
5482   DECL_CHAIN (f_grtop) = f_vrtop;
5483   DECL_CHAIN (f_vrtop) = f_groff;
5484   DECL_CHAIN (f_groff) = f_vroff;
5485
5486   /* Compute its layout.  */
5487   layout_type (va_list_type);
5488
5489   return va_list_type;
5490 }
5491
5492 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5493 static void
5494 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5495 {
5496   const CUMULATIVE_ARGS *cum;
5497   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5498   tree stack, grtop, vrtop, groff, vroff;
5499   tree t;
5500   int gr_save_area_size;
5501   int vr_save_area_size;
5502   int vr_offset;
5503
5504   cum = &crtl->args.info;
5505   gr_save_area_size
5506     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5507   vr_save_area_size
5508     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5509
5510   if (TARGET_GENERAL_REGS_ONLY)
5511     {
5512       if (cum->aapcs_nvrn > 0)
5513         sorry ("%qs and floating point or vector arguments",
5514                "-mgeneral-regs-only");
5515       vr_save_area_size = 0;
5516     }
5517
5518   f_stack = TYPE_FIELDS (va_list_type_node);
5519   f_grtop = DECL_CHAIN (f_stack);
5520   f_vrtop = DECL_CHAIN (f_grtop);
5521   f_groff = DECL_CHAIN (f_vrtop);
5522   f_vroff = DECL_CHAIN (f_groff);
5523
5524   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5525                   NULL_TREE);
5526   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5527                   NULL_TREE);
5528   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5529                   NULL_TREE);
5530   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5531                   NULL_TREE);
5532   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5533                   NULL_TREE);
5534
5535   /* Emit code to initialize STACK, which points to the next varargs stack
5536      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5537      by named arguments.  STACK is 8-byte aligned.  */
5538   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5539   if (cum->aapcs_stack_size > 0)
5540     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5541   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5542   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5543
5544   /* Emit code to initialize GRTOP, the top of the GR save area.
5545      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5546   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5547   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5548   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5549
5550   /* Emit code to initialize VRTOP, the top of the VR save area.
5551      This address is gr_save_area_bytes below GRTOP, rounded
5552      down to the next 16-byte boundary.  */
5553   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5554   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5555                              STACK_BOUNDARY / BITS_PER_UNIT);
5556
5557   if (vr_offset)
5558     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5559   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5560   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5561
5562   /* Emit code to initialize GROFF, the offset from GRTOP of the
5563      next GPR argument.  */
5564   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5565               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5566   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5567
5568   /* Likewise emit code to initialize VROFF, the offset from FTOP
5569      of the next VR argument.  */
5570   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5571               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5572   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5573 }
5574
5575 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5576
5577 static tree
5578 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5579                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5580 {
5581   tree addr;
5582   bool indirect_p;
5583   bool is_ha;           /* is HFA or HVA.  */
5584   bool dw_align;        /* double-word align.  */
5585   enum machine_mode ag_mode = VOIDmode;
5586   int nregs;
5587   enum machine_mode mode;
5588
5589   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5590   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5591   HOST_WIDE_INT size, rsize, adjust, align;
5592   tree t, u, cond1, cond2;
5593
5594   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5595   if (indirect_p)
5596     type = build_pointer_type (type);
5597
5598   mode = TYPE_MODE (type);
5599
5600   f_stack = TYPE_FIELDS (va_list_type_node);
5601   f_grtop = DECL_CHAIN (f_stack);
5602   f_vrtop = DECL_CHAIN (f_grtop);
5603   f_groff = DECL_CHAIN (f_vrtop);
5604   f_vroff = DECL_CHAIN (f_groff);
5605
5606   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5607                   f_stack, NULL_TREE);
5608   size = int_size_in_bytes (type);
5609   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5610
5611   dw_align = false;
5612   adjust = 0;
5613   if (aarch64_vfp_is_call_or_return_candidate (mode,
5614                                                type,
5615                                                &ag_mode,
5616                                                &nregs,
5617                                                &is_ha))
5618     {
5619       /* TYPE passed in fp/simd registers.  */
5620       if (TARGET_GENERAL_REGS_ONLY)
5621         sorry ("%qs and floating point or vector arguments",
5622                "-mgeneral-regs-only");
5623
5624       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5625                       unshare_expr (valist), f_vrtop, NULL_TREE);
5626       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5627                       unshare_expr (valist), f_vroff, NULL_TREE);
5628
5629       rsize = nregs * UNITS_PER_VREG;
5630
5631       if (is_ha)
5632         {
5633           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5634             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5635         }
5636       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5637                && size < UNITS_PER_VREG)
5638         {
5639           adjust = UNITS_PER_VREG - size;
5640         }
5641     }
5642   else
5643     {
5644       /* TYPE passed in general registers.  */
5645       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5646                       unshare_expr (valist), f_grtop, NULL_TREE);
5647       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5648                       unshare_expr (valist), f_groff, NULL_TREE);
5649       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5650       nregs = rsize / UNITS_PER_WORD;
5651
5652       if (align > 8)
5653         dw_align = true;
5654
5655       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5656           && size < UNITS_PER_WORD)
5657         {
5658           adjust = UNITS_PER_WORD  - size;
5659         }
5660     }
5661
5662   /* Get a local temporary for the field value.  */
5663   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5664
5665   /* Emit code to branch if off >= 0.  */
5666   t = build2 (GE_EXPR, boolean_type_node, off,
5667               build_int_cst (TREE_TYPE (off), 0));
5668   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5669
5670   if (dw_align)
5671     {
5672       /* Emit: offs = (offs + 15) & -16.  */
5673       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5674                   build_int_cst (TREE_TYPE (off), 15));
5675       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5676                   build_int_cst (TREE_TYPE (off), -16));
5677       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5678     }
5679   else
5680     roundup = NULL;
5681
5682   /* Update ap.__[g|v]r_offs  */
5683   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5684               build_int_cst (TREE_TYPE (off), rsize));
5685   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5686
5687   /* String up.  */
5688   if (roundup)
5689     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5690
5691   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5692   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5693               build_int_cst (TREE_TYPE (f_off), 0));
5694   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5695
5696   /* String up: make sure the assignment happens before the use.  */
5697   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5698   COND_EXPR_ELSE (cond1) = t;
5699
5700   /* Prepare the trees handling the argument that is passed on the stack;
5701      the top level node will store in ON_STACK.  */
5702   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5703   if (align > 8)
5704     {
5705       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5706       t = fold_convert (intDI_type_node, arg);
5707       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5708                   build_int_cst (TREE_TYPE (t), 15));
5709       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5710                   build_int_cst (TREE_TYPE (t), -16));
5711       t = fold_convert (TREE_TYPE (arg), t);
5712       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5713     }
5714   else
5715     roundup = NULL;
5716   /* Advance ap.__stack  */
5717   t = fold_convert (intDI_type_node, arg);
5718   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5719               build_int_cst (TREE_TYPE (t), size + 7));
5720   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5721               build_int_cst (TREE_TYPE (t), -8));
5722   t = fold_convert (TREE_TYPE (arg), t);
5723   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5724   /* String up roundup and advance.  */
5725   if (roundup)
5726     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5727   /* String up with arg */
5728   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5729   /* Big-endianness related address adjustment.  */
5730   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5731       && size < UNITS_PER_WORD)
5732   {
5733     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5734                 size_int (UNITS_PER_WORD - size));
5735     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5736   }
5737
5738   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5739   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5740
5741   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5742   t = off;
5743   if (adjust)
5744     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5745                 build_int_cst (TREE_TYPE (off), adjust));
5746
5747   t = fold_convert (sizetype, t);
5748   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5749
5750   if (is_ha)
5751     {
5752       /* type ha; // treat as "struct {ftype field[n];}"
5753          ... [computing offs]
5754          for (i = 0; i <nregs; ++i, offs += 16)
5755            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5756          return ha;  */
5757       int i;
5758       tree tmp_ha, field_t, field_ptr_t;
5759
5760       /* Declare a local variable.  */
5761       tmp_ha = create_tmp_var_raw (type, "ha");
5762       gimple_add_tmp_var (tmp_ha);
5763
5764       /* Establish the base type.  */
5765       switch (ag_mode)
5766         {
5767         case SFmode:
5768           field_t = float_type_node;
5769           field_ptr_t = float_ptr_type_node;
5770           break;
5771         case DFmode:
5772           field_t = double_type_node;
5773           field_ptr_t = double_ptr_type_node;
5774           break;
5775         case TFmode:
5776           field_t = long_double_type_node;
5777           field_ptr_t = long_double_ptr_type_node;
5778           break;
5779 /* The half precision and quad precision are not fully supported yet.  Enable
5780    the following code after the support is complete.  Need to find the correct
5781    type node for __fp16 *.  */
5782 #if 0
5783         case HFmode:
5784           field_t = float_type_node;
5785           field_ptr_t = float_ptr_type_node;
5786           break;
5787 #endif
5788         case V2SImode:
5789         case V4SImode:
5790             {
5791               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5792               field_t = build_vector_type_for_mode (innertype, ag_mode);
5793               field_ptr_t = build_pointer_type (field_t);
5794             }
5795           break;
5796         default:
5797           gcc_assert (0);
5798         }
5799
5800       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5801       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5802       addr = t;
5803       t = fold_convert (field_ptr_t, addr);
5804       t = build2 (MODIFY_EXPR, field_t,
5805                   build1 (INDIRECT_REF, field_t, tmp_ha),
5806                   build1 (INDIRECT_REF, field_t, t));
5807
5808       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5809       for (i = 1; i < nregs; ++i)
5810         {
5811           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5812           u = fold_convert (field_ptr_t, addr);
5813           u = build2 (MODIFY_EXPR, field_t,
5814                       build2 (MEM_REF, field_t, tmp_ha,
5815                               build_int_cst (field_ptr_t,
5816                                              (i *
5817                                               int_size_in_bytes (field_t)))),
5818                       build1 (INDIRECT_REF, field_t, u));
5819           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5820         }
5821
5822       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5823       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5824     }
5825
5826   COND_EXPR_ELSE (cond2) = t;
5827   addr = fold_convert (build_pointer_type (type), cond1);
5828   addr = build_va_arg_indirect_ref (addr);
5829
5830   if (indirect_p)
5831     addr = build_va_arg_indirect_ref (addr);
5832
5833   return addr;
5834 }
5835
5836 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5837
5838 static void
5839 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5840                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5841                                 int no_rtl)
5842 {
5843   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5844   CUMULATIVE_ARGS local_cum;
5845   int gr_saved, vr_saved;
5846
5847   /* The caller has advanced CUM up to, but not beyond, the last named
5848      argument.  Advance a local copy of CUM past the last "real" named
5849      argument, to find out how many registers are left over.  */
5850   local_cum = *cum;
5851   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5852
5853   /* Found out how many registers we need to save.  */
5854   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5855   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5856
5857   if (TARGET_GENERAL_REGS_ONLY)
5858     {
5859       if (local_cum.aapcs_nvrn > 0)
5860         sorry ("%qs and floating point or vector arguments",
5861                "-mgeneral-regs-only");
5862       vr_saved = 0;
5863     }
5864
5865   if (!no_rtl)
5866     {
5867       if (gr_saved > 0)
5868         {
5869           rtx ptr, mem;
5870
5871           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5872           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5873                                - gr_saved * UNITS_PER_WORD);
5874           mem = gen_frame_mem (BLKmode, ptr);
5875           set_mem_alias_set (mem, get_varargs_alias_set ());
5876
5877           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5878                                mem, gr_saved);
5879         }
5880       if (vr_saved > 0)
5881         {
5882           /* We can't use move_block_from_reg, because it will use
5883              the wrong mode, storing D regs only.  */
5884           enum machine_mode mode = TImode;
5885           int off, i;
5886
5887           /* Set OFF to the offset from virtual_incoming_args_rtx of
5888              the first vector register.  The VR save area lies below
5889              the GR one, and is aligned to 16 bytes.  */
5890           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5891                                    STACK_BOUNDARY / BITS_PER_UNIT);
5892           off -= vr_saved * UNITS_PER_VREG;
5893
5894           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5895             {
5896               rtx ptr, mem;
5897
5898               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5899               mem = gen_frame_mem (mode, ptr);
5900               set_mem_alias_set (mem, get_varargs_alias_set ());
5901               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5902               off += UNITS_PER_VREG;
5903             }
5904         }
5905     }
5906
5907   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5908      any complication of having crtl->args.pretend_args_size changed.  */
5909   cfun->machine->saved_varargs_size
5910     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5911                       STACK_BOUNDARY / BITS_PER_UNIT)
5912        + vr_saved * UNITS_PER_VREG);
5913 }
5914
5915 static void
5916 aarch64_conditional_register_usage (void)
5917 {
5918   int i;
5919   if (!TARGET_FLOAT)
5920     {
5921       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5922         {
5923           fixed_regs[i] = 1;
5924           call_used_regs[i] = 1;
5925         }
5926     }
5927 }
5928
5929 /* Walk down the type tree of TYPE counting consecutive base elements.
5930    If *MODEP is VOIDmode, then set it to the first valid floating point
5931    type.  If a non-floating point type is found, or if a floating point
5932    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5933    otherwise return the count in the sub-tree.  */
5934 static int
5935 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5936 {
5937   enum machine_mode mode;
5938   HOST_WIDE_INT size;
5939
5940   switch (TREE_CODE (type))
5941     {
5942     case REAL_TYPE:
5943       mode = TYPE_MODE (type);
5944       if (mode != DFmode && mode != SFmode && mode != TFmode)
5945         return -1;
5946
5947       if (*modep == VOIDmode)
5948         *modep = mode;
5949
5950       if (*modep == mode)
5951         return 1;
5952
5953       break;
5954
5955     case COMPLEX_TYPE:
5956       mode = TYPE_MODE (TREE_TYPE (type));
5957       if (mode != DFmode && mode != SFmode && mode != TFmode)
5958         return -1;
5959
5960       if (*modep == VOIDmode)
5961         *modep = mode;
5962
5963       if (*modep == mode)
5964         return 2;
5965
5966       break;
5967
5968     case VECTOR_TYPE:
5969       /* Use V2SImode and V4SImode as representatives of all 64-bit
5970          and 128-bit vector types.  */
5971       size = int_size_in_bytes (type);
5972       switch (size)
5973         {
5974         case 8:
5975           mode = V2SImode;
5976           break;
5977         case 16:
5978           mode = V4SImode;
5979           break;
5980         default:
5981           return -1;
5982         }
5983
5984       if (*modep == VOIDmode)
5985         *modep = mode;
5986
5987       /* Vector modes are considered to be opaque: two vectors are
5988          equivalent for the purposes of being homogeneous aggregates
5989          if they are the same size.  */
5990       if (*modep == mode)
5991         return 1;
5992
5993       break;
5994
5995     case ARRAY_TYPE:
5996       {
5997         int count;
5998         tree index = TYPE_DOMAIN (type);
5999
6000         /* Can't handle incomplete types.  */
6001         if (!COMPLETE_TYPE_P (type))
6002           return -1;
6003
6004         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6005         if (count == -1
6006             || !index
6007             || !TYPE_MAX_VALUE (index)
6008             || !host_integerp (TYPE_MAX_VALUE (index), 1)
6009             || !TYPE_MIN_VALUE (index)
6010             || !host_integerp (TYPE_MIN_VALUE (index), 1)
6011             || count < 0)
6012           return -1;
6013
6014         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6015                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6016
6017         /* There must be no padding.  */
6018         if (!host_integerp (TYPE_SIZE (type), 1)
6019             || (tree_low_cst (TYPE_SIZE (type), 1)
6020                 != count * GET_MODE_BITSIZE (*modep)))
6021           return -1;
6022
6023         return count;
6024       }
6025
6026     case RECORD_TYPE:
6027       {
6028         int count = 0;
6029         int sub_count;
6030         tree field;
6031
6032         /* Can't handle incomplete types.  */
6033         if (!COMPLETE_TYPE_P (type))
6034           return -1;
6035
6036         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6037           {
6038             if (TREE_CODE (field) != FIELD_DECL)
6039               continue;
6040
6041             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6042             if (sub_count < 0)
6043               return -1;
6044             count += sub_count;
6045           }
6046
6047         /* There must be no padding.  */
6048         if (!host_integerp (TYPE_SIZE (type), 1)
6049             || (tree_low_cst (TYPE_SIZE (type), 1)
6050                 != count * GET_MODE_BITSIZE (*modep)))
6051           return -1;
6052
6053         return count;
6054       }
6055
6056     case UNION_TYPE:
6057     case QUAL_UNION_TYPE:
6058       {
6059         /* These aren't very interesting except in a degenerate case.  */
6060         int count = 0;
6061         int sub_count;
6062         tree field;
6063
6064         /* Can't handle incomplete types.  */
6065         if (!COMPLETE_TYPE_P (type))
6066           return -1;
6067
6068         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6069           {
6070             if (TREE_CODE (field) != FIELD_DECL)
6071               continue;
6072
6073             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6074             if (sub_count < 0)
6075               return -1;
6076             count = count > sub_count ? count : sub_count;
6077           }
6078
6079         /* There must be no padding.  */
6080         if (!host_integerp (TYPE_SIZE (type), 1)
6081             || (tree_low_cst (TYPE_SIZE (type), 1)
6082                 != count * GET_MODE_BITSIZE (*modep)))
6083           return -1;
6084
6085         return count;
6086       }
6087
6088     default:
6089       break;
6090     }
6091
6092   return -1;
6093 }
6094
6095 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6096    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6097    array types.  The C99 floating-point complex types are also considered
6098    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6099    types, which are GCC extensions and out of the scope of AAPCS64, are
6100    treated as composite types here as well.
6101
6102    Note that MODE itself is not sufficient in determining whether a type
6103    is such a composite type or not.  This is because
6104    stor-layout.c:compute_record_mode may have already changed the MODE
6105    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6106    structure with only one field may have its MODE set to the mode of the
6107    field.  Also an integer mode whose size matches the size of the
6108    RECORD_TYPE type may be used to substitute the original mode
6109    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6110    solely relied on.  */
6111
6112 static bool
6113 aarch64_composite_type_p (const_tree type,
6114                           enum machine_mode mode)
6115 {
6116   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6117     return true;
6118
6119   if (mode == BLKmode
6120       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6121       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6122     return true;
6123
6124   return false;
6125 }
6126
6127 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6128    type as described in AAPCS64 \S 4.1.2.
6129
6130    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6131
6132 static bool
6133 aarch64_short_vector_p (const_tree type,
6134                         enum machine_mode mode)
6135 {
6136   HOST_WIDE_INT size = -1;
6137
6138   if (type && TREE_CODE (type) == VECTOR_TYPE)
6139     size = int_size_in_bytes (type);
6140   else if (!aarch64_composite_type_p (type, mode)
6141            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6142                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6143     size = GET_MODE_SIZE (mode);
6144
6145   return (size == 8 || size == 16) ? true : false;
6146 }
6147
6148 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6149    shall be passed or returned in simd/fp register(s) (providing these
6150    parameter passing registers are available).
6151
6152    Upon successful return, *COUNT returns the number of needed registers,
6153    *BASE_MODE returns the mode of the individual register and when IS_HAF
6154    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6155    floating-point aggregate or a homogeneous short-vector aggregate.  */
6156
6157 static bool
6158 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6159                                          const_tree type,
6160                                          enum machine_mode *base_mode,
6161                                          int *count,
6162                                          bool *is_ha)
6163 {
6164   enum machine_mode new_mode = VOIDmode;
6165   bool composite_p = aarch64_composite_type_p (type, mode);
6166
6167   if (is_ha != NULL) *is_ha = false;
6168
6169   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6170       || aarch64_short_vector_p (type, mode))
6171     {
6172       *count = 1;
6173       new_mode = mode;
6174     }
6175   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6176     {
6177       if (is_ha != NULL) *is_ha = true;
6178       *count = 2;
6179       new_mode = GET_MODE_INNER (mode);
6180     }
6181   else if (type && composite_p)
6182     {
6183       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6184
6185       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6186         {
6187           if (is_ha != NULL) *is_ha = true;
6188           *count = ag_count;
6189         }
6190       else
6191         return false;
6192     }
6193   else
6194     return false;
6195
6196   *base_mode = new_mode;
6197   return true;
6198 }
6199
6200 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6201
6202 static rtx
6203 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6204                           int incoming ATTRIBUTE_UNUSED)
6205 {
6206   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6207 }
6208
6209 /* Implements target hook vector_mode_supported_p.  */
6210 static bool
6211 aarch64_vector_mode_supported_p (enum machine_mode mode)
6212 {
6213   if (TARGET_SIMD
6214       && (mode == V4SImode  || mode == V8HImode
6215           || mode == V16QImode || mode == V2DImode
6216           || mode == V2SImode  || mode == V4HImode
6217           || mode == V8QImode || mode == V2SFmode
6218           || mode == V4SFmode || mode == V2DFmode))
6219     return true;
6220
6221   return false;
6222 }
6223
6224 /* Return appropriate SIMD container
6225    for MODE within a vector of WIDTH bits.  */
6226 static enum machine_mode
6227 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6228 {
6229   gcc_assert (width == 64 || width == 128);
6230   if (TARGET_SIMD)
6231     {
6232       if (width == 128)
6233         switch (mode)
6234           {
6235           case DFmode:
6236             return V2DFmode;
6237           case SFmode:
6238             return V4SFmode;
6239           case SImode:
6240             return V4SImode;
6241           case HImode:
6242             return V8HImode;
6243           case QImode:
6244             return V16QImode;
6245           case DImode:
6246             return V2DImode;
6247           default:
6248             break;
6249           }
6250       else
6251         switch (mode)
6252           {
6253           case SFmode:
6254             return V2SFmode;
6255           case SImode:
6256             return V2SImode;
6257           case HImode:
6258             return V4HImode;
6259           case QImode:
6260             return V8QImode;
6261           default:
6262             break;
6263           }
6264     }
6265   return word_mode;
6266 }
6267
6268 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6269 static enum machine_mode
6270 aarch64_preferred_simd_mode (enum machine_mode mode)
6271 {
6272   return aarch64_simd_container_mode (mode, 128);
6273 }
6274
6275 /* Return the bitmask of possible vector sizes for the vectorizer
6276    to iterate over.  */
6277 static unsigned int
6278 aarch64_autovectorize_vector_sizes (void)
6279 {
6280   return (16 | 8);
6281 }
6282
6283 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6284    vector types in order to conform to the AAPCS64 (see "Procedure
6285    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6286    qualify for emission with the mangled names defined in that document,
6287    a vector type must not only be of the correct mode but also be
6288    composed of AdvSIMD vector element types (e.g.
6289    _builtin_aarch64_simd_qi); these types are registered by
6290    aarch64_init_simd_builtins ().  In other words, vector types defined
6291    in other ways e.g. via vector_size attribute will get default
6292    mangled names.  */
6293 typedef struct
6294 {
6295   enum machine_mode mode;
6296   const char *element_type_name;
6297   const char *mangled_name;
6298 } aarch64_simd_mangle_map_entry;
6299
6300 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6301   /* 64-bit containerized types.  */
6302   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6303   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6304   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6305   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6306   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6307   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6308   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6309   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6310   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6311   /* 128-bit containerized types.  */
6312   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6313   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6314   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6315   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6316   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6317   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6318   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6319   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6320   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6321   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6322   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6323   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6324   { VOIDmode, NULL, NULL }
6325 };
6326
6327 /* Implement TARGET_MANGLE_TYPE.  */
6328
6329 static const char *
6330 aarch64_mangle_type (const_tree type)
6331 {
6332   /* The AArch64 ABI documents say that "__va_list" has to be
6333      managled as if it is in the "std" namespace.  */
6334   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6335     return "St9__va_list";
6336
6337   /* Check the mode of the vector type, and the name of the vector
6338      element type, against the table.  */
6339   if (TREE_CODE (type) == VECTOR_TYPE)
6340     {
6341       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6342
6343       while (pos->mode != VOIDmode)
6344         {
6345           tree elt_type = TREE_TYPE (type);
6346
6347           if (pos->mode == TYPE_MODE (type)
6348               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6349               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6350                           pos->element_type_name))
6351             return pos->mangled_name;
6352
6353           pos++;
6354         }
6355     }
6356
6357   /* Use the default mangling.  */
6358   return NULL;
6359 }
6360
6361 /* Return the equivalent letter for size.  */
6362 static char
6363 sizetochar (int size)
6364 {
6365   switch (size)
6366     {
6367     case 64: return 'd';
6368     case 32: return 's';
6369     case 16: return 'h';
6370     case 8 : return 'b';
6371     default: gcc_unreachable ();
6372     }
6373 }
6374
6375 /* Return true iff x is a uniform vector of floating-point
6376    constants, and the constant can be represented in
6377    quarter-precision form.  Note, as aarch64_float_const_representable
6378    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6379 static bool
6380 aarch64_vect_float_const_representable_p (rtx x)
6381 {
6382   int i = 0;
6383   REAL_VALUE_TYPE r0, ri;
6384   rtx x0, xi;
6385
6386   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6387     return false;
6388
6389   x0 = CONST_VECTOR_ELT (x, 0);
6390   if (!CONST_DOUBLE_P (x0))
6391     return false;
6392
6393   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6394
6395   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6396     {
6397       xi = CONST_VECTOR_ELT (x, i);
6398       if (!CONST_DOUBLE_P (xi))
6399         return false;
6400
6401       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6402       if (!REAL_VALUES_EQUAL (r0, ri))
6403         return false;
6404     }
6405
6406   return aarch64_float_const_representable_p (x0);
6407 }
6408
6409 /* Return true for valid and false for invalid.  */
6410 bool
6411 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6412                               struct simd_immediate_info *info)
6413 {
6414 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6415   matches = 1;                                          \
6416   for (i = 0; i < idx; i += (STRIDE))                   \
6417     if (!(TEST))                                        \
6418       matches = 0;                                      \
6419   if (matches)                                          \
6420     {                                                   \
6421       immtype = (CLASS);                                \
6422       elsize = (ELSIZE);                                \
6423       eshift = (SHIFT);                                 \
6424       emvn = (NEG);                                     \
6425       break;                                            \
6426     }
6427
6428   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6429   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6430   unsigned char bytes[16];
6431   int immtype = -1, matches;
6432   unsigned int invmask = inverse ? 0xff : 0;
6433   int eshift, emvn;
6434
6435   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6436     {
6437       if (! (aarch64_simd_imm_zero_p (op, mode)
6438              || aarch64_vect_float_const_representable_p (op)))
6439         return false;
6440
6441       if (info)
6442         {
6443           info->value = CONST_VECTOR_ELT (op, 0);
6444           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6445           info->mvn = false;
6446           info->shift = 0;
6447         }
6448
6449       return true;
6450     }
6451
6452   /* Splat vector constant out into a byte vector.  */
6453   for (i = 0; i < n_elts; i++)
6454     {
6455       rtx el = CONST_VECTOR_ELT (op, i);
6456       unsigned HOST_WIDE_INT elpart;
6457       unsigned int part, parts;
6458
6459       if (GET_CODE (el) == CONST_INT)
6460         {
6461           elpart = INTVAL (el);
6462           parts = 1;
6463         }
6464       else if (GET_CODE (el) == CONST_DOUBLE)
6465         {
6466           elpart = CONST_DOUBLE_LOW (el);
6467           parts = 2;
6468         }
6469       else
6470         gcc_unreachable ();
6471
6472       for (part = 0; part < parts; part++)
6473         {
6474           unsigned int byte;
6475           for (byte = 0; byte < innersize; byte++)
6476             {
6477               bytes[idx++] = (elpart & 0xff) ^ invmask;
6478               elpart >>= BITS_PER_UNIT;
6479             }
6480           if (GET_CODE (el) == CONST_DOUBLE)
6481             elpart = CONST_DOUBLE_HIGH (el);
6482         }
6483     }
6484
6485   /* Sanity check.  */
6486   gcc_assert (idx == GET_MODE_SIZE (mode));
6487
6488   do
6489     {
6490       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6491              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6492
6493       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6494              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6495
6496       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6497              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6498
6499       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6500              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6501
6502       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6503
6504       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6505
6506       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6507              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6508
6509       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6510              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6511
6512       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6513              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6514
6515       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6516              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6517
6518       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6519
6520       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6521
6522       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6523              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6524
6525       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6526              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6527
6528       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6529              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6530
6531       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6532              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6533
6534       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6535
6536       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6537              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6538     }
6539   while (0);
6540
6541   if (immtype == -1)
6542     return false;
6543
6544   if (info)
6545     {
6546       info->element_width = elsize;
6547       info->mvn = emvn != 0;
6548       info->shift = eshift;
6549
6550       unsigned HOST_WIDE_INT imm = 0;
6551
6552       if (immtype >= 12 && immtype <= 15)
6553         info->msl = true;
6554
6555       /* Un-invert bytes of recognized vector, if necessary.  */
6556       if (invmask != 0)
6557         for (i = 0; i < idx; i++)
6558           bytes[i] ^= invmask;
6559
6560       if (immtype == 17)
6561         {
6562           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6563           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6564
6565           for (i = 0; i < 8; i++)
6566             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6567               << (i * BITS_PER_UNIT);
6568
6569
6570           info->value = GEN_INT (imm);
6571         }
6572       else
6573         {
6574           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6575             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6576
6577           /* Construct 'abcdefgh' because the assembler cannot handle
6578              generic constants.  */
6579           if (info->mvn)
6580             imm = ~imm;
6581           imm = (imm >> info->shift) & 0xff;
6582           info->value = GEN_INT (imm);
6583         }
6584     }
6585
6586   return true;
6587 #undef CHECK
6588 }
6589
6590 static bool
6591 aarch64_const_vec_all_same_int_p (rtx x,
6592                                   HOST_WIDE_INT minval,
6593                                   HOST_WIDE_INT maxval)
6594 {
6595   HOST_WIDE_INT firstval;
6596   int count, i;
6597
6598   if (GET_CODE (x) != CONST_VECTOR
6599       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6600     return false;
6601
6602   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6603   if (firstval < minval || firstval > maxval)
6604     return false;
6605
6606   count = CONST_VECTOR_NUNITS (x);
6607   for (i = 1; i < count; i++)
6608     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6609       return false;
6610
6611   return true;
6612 }
6613
6614 /* Check of immediate shift constants are within range.  */
6615 bool
6616 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6617 {
6618   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6619   if (left)
6620     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6621   else
6622     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6623 }
6624
6625 /* Return true if X is a uniform vector where all elements
6626    are either the floating-point constant 0.0 or the
6627    integer constant 0.  */
6628 bool
6629 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6630 {
6631   return x == CONST0_RTX (mode);
6632 }
6633
6634 bool
6635 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6636 {
6637   HOST_WIDE_INT imm = INTVAL (x);
6638   int i;
6639
6640   for (i = 0; i < 8; i++)
6641     {
6642       unsigned int byte = imm & 0xff;
6643       if (byte != 0xff && byte != 0)
6644        return false;
6645       imm >>= 8;
6646     }
6647
6648   return true;
6649 }
6650
6651 bool
6652 aarch64_mov_operand_p (rtx x,
6653                        enum aarch64_symbol_context context,
6654                        enum machine_mode mode)
6655 {
6656   if (GET_CODE (x) == HIGH
6657       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6658     return true;
6659
6660   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6661     return true;
6662
6663   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6664     return true;
6665
6666   return aarch64_classify_symbolic_expression (x, context)
6667     == SYMBOL_TINY_ABSOLUTE;
6668 }
6669
6670 /* Return a const_int vector of VAL.  */
6671 rtx
6672 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6673 {
6674   int nunits = GET_MODE_NUNITS (mode);
6675   rtvec v = rtvec_alloc (nunits);
6676   int i;
6677
6678   for (i=0; i < nunits; i++)
6679     RTVEC_ELT (v, i) = GEN_INT (val);
6680
6681   return gen_rtx_CONST_VECTOR (mode, v);
6682 }
6683
6684 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6685
6686 bool
6687 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6688 {
6689   enum machine_mode vmode;
6690
6691   gcc_assert (!VECTOR_MODE_P (mode));
6692   vmode = aarch64_preferred_simd_mode (mode);
6693   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6694   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6695 }
6696
6697 /* Construct and return a PARALLEL RTX vector.  */
6698 rtx
6699 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6700 {
6701   int nunits = GET_MODE_NUNITS (mode);
6702   rtvec v = rtvec_alloc (nunits / 2);
6703   int base = high ? nunits / 2 : 0;
6704   rtx t1;
6705   int i;
6706
6707   for (i=0; i < nunits / 2; i++)
6708     RTVEC_ELT (v, i) = GEN_INT (base + i);
6709
6710   t1 = gen_rtx_PARALLEL (mode, v);
6711   return t1;
6712 }
6713
6714 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6715    HIGH (exclusive).  */
6716 void
6717 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6718 {
6719   HOST_WIDE_INT lane;
6720   gcc_assert (GET_CODE (operand) == CONST_INT);
6721   lane = INTVAL (operand);
6722
6723   if (lane < low || lane >= high)
6724     error ("lane out of range");
6725 }
6726
6727 void
6728 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6729 {
6730   gcc_assert (GET_CODE (operand) == CONST_INT);
6731   HOST_WIDE_INT lane = INTVAL (operand);
6732
6733   if (lane < low || lane >= high)
6734     error ("constant out of range");
6735 }
6736
6737 /* Emit code to reinterpret one AdvSIMD type as another,
6738    without altering bits.  */
6739 void
6740 aarch64_simd_reinterpret (rtx dest, rtx src)
6741 {
6742   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6743 }
6744
6745 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6746    registers).  */
6747 void
6748 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6749                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6750                             rtx op1)
6751 {
6752   rtx mem = gen_rtx_MEM (mode, destaddr);
6753   rtx tmp1 = gen_reg_rtx (mode);
6754   rtx tmp2 = gen_reg_rtx (mode);
6755
6756   emit_insn (intfn (tmp1, op1, tmp2));
6757
6758   emit_move_insn (mem, tmp1);
6759   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6760   emit_move_insn (mem, tmp2);
6761 }
6762
6763 /* Return TRUE if OP is a valid vector addressing mode.  */
6764 bool
6765 aarch64_simd_mem_operand_p (rtx op)
6766 {
6767   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6768                         || GET_CODE (XEXP (op, 0)) == REG);
6769 }
6770
6771 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6772    not to early-clobber SRC registers in the process.
6773
6774    We assume that the operands described by SRC and DEST represent a
6775    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6776    number of components into which the copy has been decomposed.  */
6777 void
6778 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6779                                 rtx *src, unsigned int count)
6780 {
6781   unsigned int i;
6782
6783   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6784       || REGNO (operands[0]) < REGNO (operands[1]))
6785     {
6786       for (i = 0; i < count; i++)
6787         {
6788           operands[2 * i] = dest[i];
6789           operands[2 * i + 1] = src[i];
6790         }
6791     }
6792   else
6793     {
6794       for (i = 0; i < count; i++)
6795         {
6796           operands[2 * i] = dest[count - i - 1];
6797           operands[2 * i + 1] = src[count - i - 1];
6798         }
6799     }
6800 }
6801
6802 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6803    one of VSTRUCT modes: OI, CI or XI.  */
6804 int
6805 aarch64_simd_attr_length_move (rtx insn)
6806 {
6807   enum machine_mode mode;
6808
6809   extract_insn_cached (insn);
6810
6811   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6812     {
6813       mode = GET_MODE (recog_data.operand[0]);
6814       switch (mode)
6815         {
6816         case OImode:
6817           return 8;
6818         case CImode:
6819           return 12;
6820         case XImode:
6821           return 16;
6822         default:
6823           gcc_unreachable ();
6824         }
6825     }
6826   return 4;
6827 }
6828
6829 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6830    alignment of a vector to 128 bits.  */
6831 static HOST_WIDE_INT
6832 aarch64_simd_vector_alignment (const_tree type)
6833 {
6834   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6835   return MIN (align, 128);
6836 }
6837
6838 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6839 static bool
6840 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6841 {
6842   if (is_packed)
6843     return false;
6844
6845   /* We guarantee alignment for vectors up to 128-bits.  */
6846   if (tree_int_cst_compare (TYPE_SIZE (type),
6847                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6848     return false;
6849
6850   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6851   return true;
6852 }
6853
6854 /* If VALS is a vector constant that can be loaded into a register
6855    using DUP, generate instructions to do so and return an RTX to
6856    assign to the register.  Otherwise return NULL_RTX.  */
6857 static rtx
6858 aarch64_simd_dup_constant (rtx vals)
6859 {
6860   enum machine_mode mode = GET_MODE (vals);
6861   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6862   int n_elts = GET_MODE_NUNITS (mode);
6863   bool all_same = true;
6864   rtx x;
6865   int i;
6866
6867   if (GET_CODE (vals) != CONST_VECTOR)
6868     return NULL_RTX;
6869
6870   for (i = 1; i < n_elts; ++i)
6871     {
6872       x = CONST_VECTOR_ELT (vals, i);
6873       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6874         all_same = false;
6875     }
6876
6877   if (!all_same)
6878     return NULL_RTX;
6879
6880   /* We can load this constant by using DUP and a constant in a
6881      single ARM register.  This will be cheaper than a vector
6882      load.  */
6883   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6884   return gen_rtx_VEC_DUPLICATE (mode, x);
6885 }
6886
6887
6888 /* Generate code to load VALS, which is a PARALLEL containing only
6889    constants (for vec_init) or CONST_VECTOR, efficiently into a
6890    register.  Returns an RTX to copy into the register, or NULL_RTX
6891    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6892 static rtx
6893 aarch64_simd_make_constant (rtx vals)
6894 {
6895   enum machine_mode mode = GET_MODE (vals);
6896   rtx const_dup;
6897   rtx const_vec = NULL_RTX;
6898   int n_elts = GET_MODE_NUNITS (mode);
6899   int n_const = 0;
6900   int i;
6901
6902   if (GET_CODE (vals) == CONST_VECTOR)
6903     const_vec = vals;
6904   else if (GET_CODE (vals) == PARALLEL)
6905     {
6906       /* A CONST_VECTOR must contain only CONST_INTs and
6907          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6908          Only store valid constants in a CONST_VECTOR.  */
6909       for (i = 0; i < n_elts; ++i)
6910         {
6911           rtx x = XVECEXP (vals, 0, i);
6912           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6913             n_const++;
6914         }
6915       if (n_const == n_elts)
6916         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6917     }
6918   else
6919     gcc_unreachable ();
6920
6921   if (const_vec != NULL_RTX
6922       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6923     /* Load using MOVI/MVNI.  */
6924     return const_vec;
6925   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6926     /* Loaded using DUP.  */
6927     return const_dup;
6928   else if (const_vec != NULL_RTX)
6929     /* Load from constant pool. We can not take advantage of single-cycle
6930        LD1 because we need a PC-relative addressing mode.  */
6931     return const_vec;
6932   else
6933     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6934        We can not construct an initializer.  */
6935     return NULL_RTX;
6936 }
6937
6938 void
6939 aarch64_expand_vector_init (rtx target, rtx vals)
6940 {
6941   enum machine_mode mode = GET_MODE (target);
6942   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6943   int n_elts = GET_MODE_NUNITS (mode);
6944   int n_var = 0, one_var = -1;
6945   bool all_same = true;
6946   rtx x, mem;
6947   int i;
6948
6949   x = XVECEXP (vals, 0, 0);
6950   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6951     n_var = 1, one_var = 0;
6952
6953   for (i = 1; i < n_elts; ++i)
6954     {
6955       x = XVECEXP (vals, 0, i);
6956       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6957         ++n_var, one_var = i;
6958
6959       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6960         all_same = false;
6961     }
6962
6963   if (n_var == 0)
6964     {
6965       rtx constant = aarch64_simd_make_constant (vals);
6966       if (constant != NULL_RTX)
6967         {
6968           emit_move_insn (target, constant);
6969           return;
6970         }
6971     }
6972
6973   /* Splat a single non-constant element if we can.  */
6974   if (all_same)
6975     {
6976       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6977       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6978       return;
6979     }
6980
6981   /* One field is non-constant.  Load constant then overwrite varying
6982      field.  This is more efficient than using the stack.  */
6983   if (n_var == 1)
6984     {
6985       rtx copy = copy_rtx (vals);
6986       rtx index = GEN_INT (one_var);
6987       enum insn_code icode;
6988
6989       /* Load constant part of vector, substitute neighboring value for
6990          varying element.  */
6991       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6992       aarch64_expand_vector_init (target, copy);
6993
6994       /* Insert variable.  */
6995       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6996       icode = optab_handler (vec_set_optab, mode);
6997       gcc_assert (icode != CODE_FOR_nothing);
6998       emit_insn (GEN_FCN (icode) (target, x, index));
6999       return;
7000     }
7001
7002   /* Construct the vector in memory one field at a time
7003      and load the whole vector.  */
7004   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7005   for (i = 0; i < n_elts; i++)
7006     emit_move_insn (adjust_address_nv (mem, inner_mode,
7007                                     i * GET_MODE_SIZE (inner_mode)),
7008                     XVECEXP (vals, 0, i));
7009   emit_move_insn (target, mem);
7010
7011 }
7012
7013 static unsigned HOST_WIDE_INT
7014 aarch64_shift_truncation_mask (enum machine_mode mode)
7015 {
7016   return
7017     (aarch64_vector_mode_supported_p (mode)
7018      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7019 }
7020
7021 #ifndef TLS_SECTION_ASM_FLAG
7022 #define TLS_SECTION_ASM_FLAG 'T'
7023 #endif
7024
7025 void
7026 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7027                                tree decl ATTRIBUTE_UNUSED)
7028 {
7029   char flagchars[10], *f = flagchars;
7030
7031   /* If we have already declared this section, we can use an
7032      abbreviated form to switch back to it -- unless this section is
7033      part of a COMDAT groups, in which case GAS requires the full
7034      declaration every time.  */
7035   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7036       && (flags & SECTION_DECLARED))
7037     {
7038       fprintf (asm_out_file, "\t.section\t%s\n", name);
7039       return;
7040     }
7041
7042   if (!(flags & SECTION_DEBUG))
7043     *f++ = 'a';
7044   if (flags & SECTION_WRITE)
7045     *f++ = 'w';
7046   if (flags & SECTION_CODE)
7047     *f++ = 'x';
7048   if (flags & SECTION_SMALL)
7049     *f++ = 's';
7050   if (flags & SECTION_MERGE)
7051     *f++ = 'M';
7052   if (flags & SECTION_STRINGS)
7053     *f++ = 'S';
7054   if (flags & SECTION_TLS)
7055     *f++ = TLS_SECTION_ASM_FLAG;
7056   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7057     *f++ = 'G';
7058   *f = '\0';
7059
7060   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7061
7062   if (!(flags & SECTION_NOTYPE))
7063     {
7064       const char *type;
7065       const char *format;
7066
7067       if (flags & SECTION_BSS)
7068         type = "nobits";
7069       else
7070         type = "progbits";
7071
7072 #ifdef TYPE_OPERAND_FMT
7073       format = "," TYPE_OPERAND_FMT;
7074 #else
7075       format = ",@%s";
7076 #endif
7077
7078       fprintf (asm_out_file, format, type);
7079
7080       if (flags & SECTION_ENTSIZE)
7081         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7082       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7083         {
7084           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7085             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7086           else
7087             fprintf (asm_out_file, ",%s,comdat",
7088                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7089         }
7090     }
7091
7092   putc ('\n', asm_out_file);
7093 }
7094
7095 /* Select a format to encode pointers in exception handling data.  */
7096 int
7097 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7098 {
7099    int type;
7100    switch (aarch64_cmodel)
7101      {
7102      case AARCH64_CMODEL_TINY:
7103      case AARCH64_CMODEL_TINY_PIC:
7104      case AARCH64_CMODEL_SMALL:
7105      case AARCH64_CMODEL_SMALL_PIC:
7106        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7107           for everything.  */
7108        type = DW_EH_PE_sdata4;
7109        break;
7110      default:
7111        /* No assumptions here.  8-byte relocs required.  */
7112        type = DW_EH_PE_sdata8;
7113        break;
7114      }
7115    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7116 }
7117
7118 /* Emit load exclusive.  */
7119
7120 static void
7121 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7122                              rtx mem, rtx model_rtx)
7123 {
7124   rtx (*gen) (rtx, rtx, rtx);
7125
7126   switch (mode)
7127     {
7128     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7129     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7130     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7131     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7132     default:
7133       gcc_unreachable ();
7134     }
7135
7136   emit_insn (gen (rval, mem, model_rtx));
7137 }
7138
7139 /* Emit store exclusive.  */
7140
7141 static void
7142 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7143                               rtx rval, rtx mem, rtx model_rtx)
7144 {
7145   rtx (*gen) (rtx, rtx, rtx, rtx);
7146
7147   switch (mode)
7148     {
7149     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7150     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7151     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7152     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7153     default:
7154       gcc_unreachable ();
7155     }
7156
7157   emit_insn (gen (bval, rval, mem, model_rtx));
7158 }
7159
7160 /* Mark the previous jump instruction as unlikely.  */
7161
7162 static void
7163 aarch64_emit_unlikely_jump (rtx insn)
7164 {
7165   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7166
7167   insn = emit_jump_insn (insn);
7168   add_reg_note (insn, REG_BR_PROB, very_unlikely);
7169 }
7170
7171 /* Expand a compare and swap pattern.  */
7172
7173 void
7174 aarch64_expand_compare_and_swap (rtx operands[])
7175 {
7176   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7177   enum machine_mode mode, cmp_mode;
7178   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7179
7180   bval = operands[0];
7181   rval = operands[1];
7182   mem = operands[2];
7183   oldval = operands[3];
7184   newval = operands[4];
7185   is_weak = operands[5];
7186   mod_s = operands[6];
7187   mod_f = operands[7];
7188   mode = GET_MODE (mem);
7189   cmp_mode = mode;
7190
7191   /* Normally the succ memory model must be stronger than fail, but in the
7192      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7193      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7194
7195   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7196       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7197     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7198
7199   switch (mode)
7200     {
7201     case QImode:
7202     case HImode:
7203       /* For short modes, we're going to perform the comparison in SImode,
7204          so do the zero-extension now.  */
7205       cmp_mode = SImode;
7206       rval = gen_reg_rtx (SImode);
7207       oldval = convert_modes (SImode, mode, oldval, true);
7208       /* Fall through.  */
7209
7210     case SImode:
7211     case DImode:
7212       /* Force the value into a register if needed.  */
7213       if (!aarch64_plus_operand (oldval, mode))
7214         oldval = force_reg (cmp_mode, oldval);
7215       break;
7216
7217     default:
7218       gcc_unreachable ();
7219     }
7220
7221   switch (mode)
7222     {
7223     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7224     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7225     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7226     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7227     default:
7228       gcc_unreachable ();
7229     }
7230
7231   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7232
7233   if (mode == QImode || mode == HImode)
7234     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7235
7236   x = gen_rtx_REG (CCmode, CC_REGNUM);
7237   x = gen_rtx_EQ (SImode, x, const0_rtx);
7238   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7239 }
7240
7241 /* Split a compare and swap pattern.  */
7242
7243 void
7244 aarch64_split_compare_and_swap (rtx operands[])
7245 {
7246   rtx rval, mem, oldval, newval, scratch;
7247   enum machine_mode mode;
7248   bool is_weak;
7249   rtx label1, label2, x, cond;
7250
7251   rval = operands[0];
7252   mem = operands[1];
7253   oldval = operands[2];
7254   newval = operands[3];
7255   is_weak = (operands[4] != const0_rtx);
7256   scratch = operands[7];
7257   mode = GET_MODE (mem);
7258
7259   label1 = NULL_RTX;
7260   if (!is_weak)
7261     {
7262       label1 = gen_label_rtx ();
7263       emit_label (label1);
7264     }
7265   label2 = gen_label_rtx ();
7266
7267   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7268
7269   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7270   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7271   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7272                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7273   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7274
7275   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7276
7277   if (!is_weak)
7278     {
7279       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7280       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7281                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7282       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7283     }
7284   else
7285     {
7286       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7287       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7288       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7289     }
7290
7291   emit_label (label2);
7292 }
7293
7294 /* Split an atomic operation.  */
7295
7296 void
7297 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7298                      rtx value, rtx model_rtx, rtx cond)
7299 {
7300   enum machine_mode mode = GET_MODE (mem);
7301   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7302   rtx label, x;
7303
7304   label = gen_label_rtx ();
7305   emit_label (label);
7306
7307   if (new_out)
7308     new_out = gen_lowpart (wmode, new_out);
7309   if (old_out)
7310     old_out = gen_lowpart (wmode, old_out);
7311   else
7312     old_out = new_out;
7313   value = simplify_gen_subreg (wmode, value, mode, 0);
7314
7315   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7316
7317   switch (code)
7318     {
7319     case SET:
7320       new_out = value;
7321       break;
7322
7323     case NOT:
7324       x = gen_rtx_AND (wmode, old_out, value);
7325       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7326       x = gen_rtx_NOT (wmode, new_out);
7327       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7328       break;
7329
7330     case MINUS:
7331       if (CONST_INT_P (value))
7332         {
7333           value = GEN_INT (-INTVAL (value));
7334           code = PLUS;
7335         }
7336       /* Fall through.  */
7337
7338     default:
7339       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7340       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7341       break;
7342     }
7343
7344   aarch64_emit_store_exclusive (mode, cond, mem,
7345                                 gen_lowpart (mode, new_out), model_rtx);
7346
7347   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7348   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7349                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7350   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7351 }
7352
7353 static void
7354 aarch64_print_extension (void)
7355 {
7356   const struct aarch64_option_extension *opt = NULL;
7357
7358   for (opt = all_extensions; opt->name != NULL; opt++)
7359     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7360       asm_fprintf (asm_out_file, "+%s", opt->name);
7361
7362   asm_fprintf (asm_out_file, "\n");
7363 }
7364
7365 static void
7366 aarch64_start_file (void)
7367 {
7368   if (selected_arch)
7369     {
7370       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7371       aarch64_print_extension ();
7372     }
7373   else if (selected_cpu)
7374     {
7375       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7376       aarch64_print_extension ();
7377     }
7378   default_file_start();
7379 }
7380
7381 /* Target hook for c_mode_for_suffix.  */
7382 static enum machine_mode
7383 aarch64_c_mode_for_suffix (char suffix)
7384 {
7385   if (suffix == 'q')
7386     return TFmode;
7387
7388   return VOIDmode;
7389 }
7390
7391 /* We can only represent floating point constants which will fit in
7392    "quarter-precision" values.  These values are characterised by
7393    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7394    by:
7395
7396    (-1)^s * (n/16) * 2^r
7397
7398    Where:
7399      's' is the sign bit.
7400      'n' is an integer in the range 16 <= n <= 31.
7401      'r' is an integer in the range -3 <= r <= 4.  */
7402
7403 /* Return true iff X can be represented by a quarter-precision
7404    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7405 bool
7406 aarch64_float_const_representable_p (rtx x)
7407 {
7408   /* This represents our current view of how many bits
7409      make up the mantissa.  */
7410   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7411   int exponent;
7412   unsigned HOST_WIDE_INT mantissa, mask;
7413   HOST_WIDE_INT m1, m2;
7414   REAL_VALUE_TYPE r, m;
7415
7416   if (!CONST_DOUBLE_P (x))
7417     return false;
7418
7419   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7420
7421   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7422      know if we have +zero until we analyse the mantissa, but we
7423      can reject the other invalid values.  */
7424   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7425       || REAL_VALUE_MINUS_ZERO (r))
7426     return false;
7427
7428   /* Extract exponent.  */
7429   r = real_value_abs (&r);
7430   exponent = REAL_EXP (&r);
7431
7432   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7433      highest (sign) bit, with a fixed binary point at bit point_pos.
7434      m1 holds the low part of the mantissa, m2 the high part.
7435      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7436      bits for the mantissa, this can fail (low bits will be lost).  */
7437   real_ldexp (&m, &r, point_pos - exponent);
7438   REAL_VALUE_TO_INT (&m1, &m2, m);
7439
7440   /* If the low part of the mantissa has bits set we cannot represent
7441      the value.  */
7442   if (m1 != 0)
7443     return false;
7444   /* We have rejected the lower HOST_WIDE_INT, so update our
7445      understanding of how many bits lie in the mantissa and
7446      look only at the high HOST_WIDE_INT.  */
7447   mantissa = m2;
7448   point_pos -= HOST_BITS_PER_WIDE_INT;
7449
7450   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7451   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7452   if ((mantissa & mask) != 0)
7453     return false;
7454
7455   /* Having filtered unrepresentable values, we may now remove all
7456      but the highest 5 bits.  */
7457   mantissa >>= point_pos - 5;
7458
7459   /* We cannot represent the value 0.0, so reject it.  This is handled
7460      elsewhere.  */
7461   if (mantissa == 0)
7462     return false;
7463
7464   /* Then, as bit 4 is always set, we can mask it off, leaving
7465      the mantissa in the range [0, 15].  */
7466   mantissa &= ~(1 << 4);
7467   gcc_assert (mantissa <= 15);
7468
7469   /* GCC internally does not use IEEE754-like encoding (where normalized
7470      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7471      Our mantissa values are shifted 4 places to the left relative to
7472      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7473      by 5 places to correct for GCC's representation.  */
7474   exponent = 5 - exponent;
7475
7476   return (exponent >= 0 && exponent <= 7);
7477 }
7478
7479 char*
7480 aarch64_output_simd_mov_immediate (rtx const_vector,
7481                                    enum machine_mode mode,
7482                                    unsigned width)
7483 {
7484   bool is_valid;
7485   static char templ[40];
7486   const char *mnemonic;
7487   const char *shift_op;
7488   unsigned int lane_count = 0;
7489   char element_char;
7490
7491   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7492
7493   /* This will return true to show const_vector is legal for use as either
7494      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7495      also update INFO to show how the immediate should be generated.  */
7496   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7497   gcc_assert (is_valid);
7498
7499   element_char = sizetochar (info.element_width);
7500   lane_count = width / info.element_width;
7501
7502   mode = GET_MODE_INNER (mode);
7503   if (mode == SFmode || mode == DFmode)
7504     {
7505       gcc_assert (info.shift == 0 && ! info.mvn);
7506       if (aarch64_float_const_zero_rtx_p (info.value))
7507         info.value = GEN_INT (0);
7508       else
7509         {
7510 #define buf_size 20
7511           REAL_VALUE_TYPE r;
7512           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7513           char float_buf[buf_size] = {'\0'};
7514           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7515 #undef buf_size
7516
7517           if (lane_count == 1)
7518             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7519           else
7520             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7521                       lane_count, element_char, float_buf);
7522           return templ;
7523         }
7524     }
7525
7526   mnemonic = info.mvn ? "mvni" : "movi";
7527   shift_op = info.msl ? "msl" : "lsl";
7528
7529   if (lane_count == 1)
7530     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7531               mnemonic, UINTVAL (info.value));
7532   else if (info.shift)
7533     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7534               ", %s %d", mnemonic, lane_count, element_char,
7535               UINTVAL (info.value), shift_op, info.shift);
7536   else
7537     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7538               mnemonic, lane_count, element_char, UINTVAL (info.value));
7539   return templ;
7540 }
7541
7542 char*
7543 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7544                                           enum machine_mode mode)
7545 {
7546   enum machine_mode vmode;
7547
7548   gcc_assert (!VECTOR_MODE_P (mode));
7549   vmode = aarch64_simd_container_mode (mode, 64);
7550   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7551   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7552 }
7553
7554 /* Split operands into moves from op[1] + op[2] into op[0].  */
7555
7556 void
7557 aarch64_split_combinev16qi (rtx operands[3])
7558 {
7559   unsigned int dest = REGNO (operands[0]);
7560   unsigned int src1 = REGNO (operands[1]);
7561   unsigned int src2 = REGNO (operands[2]);
7562   enum machine_mode halfmode = GET_MODE (operands[1]);
7563   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7564   rtx destlo, desthi;
7565
7566   gcc_assert (halfmode == V16QImode);
7567
7568   if (src1 == dest && src2 == dest + halfregs)
7569     {
7570       /* No-op move.  Can't split to nothing; emit something.  */
7571       emit_note (NOTE_INSN_DELETED);
7572       return;
7573     }
7574
7575   /* Preserve register attributes for variable tracking.  */
7576   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7577   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7578                                GET_MODE_SIZE (halfmode));
7579
7580   /* Special case of reversed high/low parts.  */
7581   if (reg_overlap_mentioned_p (operands[2], destlo)
7582       && reg_overlap_mentioned_p (operands[1], desthi))
7583     {
7584       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7585       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7586       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7587     }
7588   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7589     {
7590       /* Try to avoid unnecessary moves if part of the result
7591          is in the right place already.  */
7592       if (src1 != dest)
7593         emit_move_insn (destlo, operands[1]);
7594       if (src2 != dest + halfregs)
7595         emit_move_insn (desthi, operands[2]);
7596     }
7597   else
7598     {
7599       if (src2 != dest + halfregs)
7600         emit_move_insn (desthi, operands[2]);
7601       if (src1 != dest)
7602         emit_move_insn (destlo, operands[1]);
7603     }
7604 }
7605
7606 /* vec_perm support.  */
7607
7608 #define MAX_VECT_LEN 16
7609
7610 struct expand_vec_perm_d
7611 {
7612   rtx target, op0, op1;
7613   unsigned char perm[MAX_VECT_LEN];
7614   enum machine_mode vmode;
7615   unsigned char nelt;
7616   bool one_vector_p;
7617   bool testing_p;
7618 };
7619
7620 /* Generate a variable permutation.  */
7621
7622 static void
7623 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7624 {
7625   enum machine_mode vmode = GET_MODE (target);
7626   bool one_vector_p = rtx_equal_p (op0, op1);
7627
7628   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7629   gcc_checking_assert (GET_MODE (op0) == vmode);
7630   gcc_checking_assert (GET_MODE (op1) == vmode);
7631   gcc_checking_assert (GET_MODE (sel) == vmode);
7632   gcc_checking_assert (TARGET_SIMD);
7633
7634   if (one_vector_p)
7635     {
7636       if (vmode == V8QImode)
7637         {
7638           /* Expand the argument to a V16QI mode by duplicating it.  */
7639           rtx pair = gen_reg_rtx (V16QImode);
7640           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7641           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7642         }
7643       else
7644         {
7645           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7646         }
7647     }
7648   else
7649     {
7650       rtx pair;
7651
7652       if (vmode == V8QImode)
7653         {
7654           pair = gen_reg_rtx (V16QImode);
7655           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7656           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7657         }
7658       else
7659         {
7660           pair = gen_reg_rtx (OImode);
7661           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7662           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7663         }
7664     }
7665 }
7666
7667 void
7668 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7669 {
7670   enum machine_mode vmode = GET_MODE (target);
7671   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7672   bool one_vector_p = rtx_equal_p (op0, op1);
7673   rtx rmask[MAX_VECT_LEN], mask;
7674
7675   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7676
7677   /* The TBL instruction does not use a modulo index, so we must take care
7678      of that ourselves.  */
7679   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7680   for (i = 0; i < nelt; ++i)
7681     rmask[i] = mask;
7682   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7683   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7684
7685   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7686 }
7687
7688 /* Recognize patterns suitable for the TRN instructions.  */
7689 static bool
7690 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7691 {
7692   unsigned int i, odd, mask, nelt = d->nelt;
7693   rtx out, in0, in1, x;
7694   rtx (*gen) (rtx, rtx, rtx);
7695   enum machine_mode vmode = d->vmode;
7696
7697   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7698     return false;
7699
7700   /* Note that these are little-endian tests.
7701      We correct for big-endian later.  */
7702   if (d->perm[0] == 0)
7703     odd = 0;
7704   else if (d->perm[0] == 1)
7705     odd = 1;
7706   else
7707     return false;
7708   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7709
7710   for (i = 0; i < nelt; i += 2)
7711     {
7712       if (d->perm[i] != i + odd)
7713         return false;
7714       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7715         return false;
7716     }
7717
7718   /* Success!  */
7719   if (d->testing_p)
7720     return true;
7721
7722   in0 = d->op0;
7723   in1 = d->op1;
7724   if (BYTES_BIG_ENDIAN)
7725     {
7726       x = in0, in0 = in1, in1 = x;
7727       odd = !odd;
7728     }
7729   out = d->target;
7730
7731   if (odd)
7732     {
7733       switch (vmode)
7734         {
7735         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7736         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7737         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7738         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7739         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7740         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7741         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7742         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7743         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7744         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7745         default:
7746           return false;
7747         }
7748     }
7749   else
7750     {
7751       switch (vmode)
7752         {
7753         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7754         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7755         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7756         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7757         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7758         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7759         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7760         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7761         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7762         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7763         default:
7764           return false;
7765         }
7766     }
7767
7768   emit_insn (gen (out, in0, in1));
7769   return true;
7770 }
7771
7772 /* Recognize patterns suitable for the UZP instructions.  */
7773 static bool
7774 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7775 {
7776   unsigned int i, odd, mask, nelt = d->nelt;
7777   rtx out, in0, in1, x;
7778   rtx (*gen) (rtx, rtx, rtx);
7779   enum machine_mode vmode = d->vmode;
7780
7781   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7782     return false;
7783
7784   /* Note that these are little-endian tests.
7785      We correct for big-endian later.  */
7786   if (d->perm[0] == 0)
7787     odd = 0;
7788   else if (d->perm[0] == 1)
7789     odd = 1;
7790   else
7791     return false;
7792   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7793
7794   for (i = 0; i < nelt; i++)
7795     {
7796       unsigned elt = (i * 2 + odd) & mask;
7797       if (d->perm[i] != elt)
7798         return false;
7799     }
7800
7801   /* Success!  */
7802   if (d->testing_p)
7803     return true;
7804
7805   in0 = d->op0;
7806   in1 = d->op1;
7807   if (BYTES_BIG_ENDIAN)
7808     {
7809       x = in0, in0 = in1, in1 = x;
7810       odd = !odd;
7811     }
7812   out = d->target;
7813
7814   if (odd)
7815     {
7816       switch (vmode)
7817         {
7818         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7819         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7820         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7821         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7822         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7823         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7824         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7825         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7826         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7827         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7828         default:
7829           return false;
7830         }
7831     }
7832   else
7833     {
7834       switch (vmode)
7835         {
7836         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7837         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7838         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7839         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7840         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7841         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7842         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7843         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7844         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7845         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7846         default:
7847           return false;
7848         }
7849     }
7850
7851   emit_insn (gen (out, in0, in1));
7852   return true;
7853 }
7854
7855 /* Recognize patterns suitable for the ZIP instructions.  */
7856 static bool
7857 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7858 {
7859   unsigned int i, high, mask, nelt = d->nelt;
7860   rtx out, in0, in1, x;
7861   rtx (*gen) (rtx, rtx, rtx);
7862   enum machine_mode vmode = d->vmode;
7863
7864   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7865     return false;
7866
7867   /* Note that these are little-endian tests.
7868      We correct for big-endian later.  */
7869   high = nelt / 2;
7870   if (d->perm[0] == high)
7871     /* Do Nothing.  */
7872     ;
7873   else if (d->perm[0] == 0)
7874     high = 0;
7875   else
7876     return false;
7877   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7878
7879   for (i = 0; i < nelt / 2; i++)
7880     {
7881       unsigned elt = (i + high) & mask;
7882       if (d->perm[i * 2] != elt)
7883         return false;
7884       elt = (elt + nelt) & mask;
7885       if (d->perm[i * 2 + 1] != elt)
7886         return false;
7887     }
7888
7889   /* Success!  */
7890   if (d->testing_p)
7891     return true;
7892
7893   in0 = d->op0;
7894   in1 = d->op1;
7895   if (BYTES_BIG_ENDIAN)
7896     {
7897       x = in0, in0 = in1, in1 = x;
7898       high = !high;
7899     }
7900   out = d->target;
7901
7902   if (high)
7903     {
7904       switch (vmode)
7905         {
7906         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7907         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7908         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7909         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7910         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7911         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7912         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7913         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7914         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7915         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7916         default:
7917           return false;
7918         }
7919     }
7920   else
7921     {
7922       switch (vmode)
7923         {
7924         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7925         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7926         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7927         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7928         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7929         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7930         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7931         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7932         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7933         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7934         default:
7935           return false;
7936         }
7937     }
7938
7939   emit_insn (gen (out, in0, in1));
7940   return true;
7941 }
7942
7943 static bool
7944 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7945 {
7946   rtx (*gen) (rtx, rtx, rtx);
7947   rtx out = d->target;
7948   rtx in0;
7949   enum machine_mode vmode = d->vmode;
7950   unsigned int i, elt, nelt = d->nelt;
7951   rtx lane;
7952
7953   /* TODO: This may not be big-endian safe.  */
7954   if (BYTES_BIG_ENDIAN)
7955     return false;
7956
7957   elt = d->perm[0];
7958   for (i = 1; i < nelt; i++)
7959     {
7960       if (elt != d->perm[i])
7961         return false;
7962     }
7963
7964   /* The generic preparation in aarch64_expand_vec_perm_const_1
7965      swaps the operand order and the permute indices if it finds
7966      d->perm[0] to be in the second operand.  Thus, we can always
7967      use d->op0 and need not do any extra arithmetic to get the
7968      correct lane number.  */
7969   in0 = d->op0;
7970   lane = GEN_INT (elt);
7971
7972   switch (vmode)
7973     {
7974     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
7975     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
7976     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
7977     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
7978     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
7979     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
7980     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
7981     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
7982     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
7983     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
7984     default:
7985       return false;
7986     }
7987
7988   emit_insn (gen (out, in0, lane));
7989   return true;
7990 }
7991
7992 static bool
7993 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7994 {
7995   rtx rperm[MAX_VECT_LEN], sel;
7996   enum machine_mode vmode = d->vmode;
7997   unsigned int i, nelt = d->nelt;
7998
7999   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8000      numbering of elements for big-endian, we must reverse the order.  */
8001   if (BYTES_BIG_ENDIAN)
8002     return false;
8003
8004   if (d->testing_p)
8005     return true;
8006
8007   /* Generic code will try constant permutation twice.  Once with the
8008      original mode and again with the elements lowered to QImode.
8009      So wait and don't do the selector expansion ourselves.  */
8010   if (vmode != V8QImode && vmode != V16QImode)
8011     return false;
8012
8013   for (i = 0; i < nelt; ++i)
8014     rperm[i] = GEN_INT (d->perm[i]);
8015   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8016   sel = force_reg (vmode, sel);
8017
8018   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8019   return true;
8020 }
8021
8022 static bool
8023 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8024 {
8025   /* The pattern matching functions above are written to look for a small
8026      number to begin the sequence (0, 1, N/2).  If we begin with an index
8027      from the second operand, we can swap the operands.  */
8028   if (d->perm[0] >= d->nelt)
8029     {
8030       unsigned i, nelt = d->nelt;
8031       rtx x;
8032
8033       for (i = 0; i < nelt; ++i)
8034         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8035
8036       x = d->op0;
8037       d->op0 = d->op1;
8038       d->op1 = x;
8039     }
8040
8041   if (TARGET_SIMD)
8042     {
8043       if (aarch64_evpc_zip (d))
8044         return true;
8045       else if (aarch64_evpc_uzp (d))
8046         return true;
8047       else if (aarch64_evpc_trn (d))
8048         return true;
8049       else if (aarch64_evpc_dup (d))
8050         return true;
8051       return aarch64_evpc_tbl (d);
8052     }
8053   return false;
8054 }
8055
8056 /* Expand a vec_perm_const pattern.  */
8057
8058 bool
8059 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8060 {
8061   struct expand_vec_perm_d d;
8062   int i, nelt, which;
8063
8064   d.target = target;
8065   d.op0 = op0;
8066   d.op1 = op1;
8067
8068   d.vmode = GET_MODE (target);
8069   gcc_assert (VECTOR_MODE_P (d.vmode));
8070   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8071   d.testing_p = false;
8072
8073   for (i = which = 0; i < nelt; ++i)
8074     {
8075       rtx e = XVECEXP (sel, 0, i);
8076       int ei = INTVAL (e) & (2 * nelt - 1);
8077       which |= (ei < nelt ? 1 : 2);
8078       d.perm[i] = ei;
8079     }
8080
8081   switch (which)
8082     {
8083     default:
8084       gcc_unreachable ();
8085
8086     case 3:
8087       d.one_vector_p = false;
8088       if (!rtx_equal_p (op0, op1))
8089         break;
8090
8091       /* The elements of PERM do not suggest that only the first operand
8092          is used, but both operands are identical.  Allow easier matching
8093          of the permutation by folding the permutation into the single
8094          input vector.  */
8095       /* Fall Through.  */
8096     case 2:
8097       for (i = 0; i < nelt; ++i)
8098         d.perm[i] &= nelt - 1;
8099       d.op0 = op1;
8100       d.one_vector_p = true;
8101       break;
8102
8103     case 1:
8104       d.op1 = op0;
8105       d.one_vector_p = true;
8106       break;
8107     }
8108
8109   return aarch64_expand_vec_perm_const_1 (&d);
8110 }
8111
8112 static bool
8113 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8114                                      const unsigned char *sel)
8115 {
8116   struct expand_vec_perm_d d;
8117   unsigned int i, nelt, which;
8118   bool ret;
8119
8120   d.vmode = vmode;
8121   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8122   d.testing_p = true;
8123   memcpy (d.perm, sel, nelt);
8124
8125   /* Calculate whether all elements are in one vector.  */
8126   for (i = which = 0; i < nelt; ++i)
8127     {
8128       unsigned char e = d.perm[i];
8129       gcc_assert (e < 2 * nelt);
8130       which |= (e < nelt ? 1 : 2);
8131     }
8132
8133   /* If all elements are from the second vector, reindex as if from the
8134      first vector.  */
8135   if (which == 2)
8136     for (i = 0; i < nelt; ++i)
8137       d.perm[i] -= nelt;
8138
8139   /* Check whether the mask can be applied to a single vector.  */
8140   d.one_vector_p = (which != 3);
8141
8142   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8143   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8144   if (!d.one_vector_p)
8145     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8146
8147   start_sequence ();
8148   ret = aarch64_expand_vec_perm_const_1 (&d);
8149   end_sequence ();
8150
8151   return ret;
8152 }
8153
8154 #undef TARGET_ADDRESS_COST
8155 #define TARGET_ADDRESS_COST aarch64_address_cost
8156
8157 /* This hook will determines whether unnamed bitfields affect the alignment
8158    of the containing structure.  The hook returns true if the structure
8159    should inherit the alignment requirements of an unnamed bitfield's
8160    type.  */
8161 #undef TARGET_ALIGN_ANON_BITFIELD
8162 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8163
8164 #undef TARGET_ASM_ALIGNED_DI_OP
8165 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8166
8167 #undef TARGET_ASM_ALIGNED_HI_OP
8168 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8169
8170 #undef TARGET_ASM_ALIGNED_SI_OP
8171 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8172
8173 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8174 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8175   hook_bool_const_tree_hwi_hwi_const_tree_true
8176
8177 #undef TARGET_ASM_FILE_START
8178 #define TARGET_ASM_FILE_START aarch64_start_file
8179
8180 #undef TARGET_ASM_OUTPUT_MI_THUNK
8181 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8182
8183 #undef TARGET_ASM_SELECT_RTX_SECTION
8184 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8185
8186 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8187 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8188
8189 #undef TARGET_BUILD_BUILTIN_VA_LIST
8190 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8191
8192 #undef TARGET_CALLEE_COPIES
8193 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8194
8195 #undef TARGET_CAN_ELIMINATE
8196 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8197
8198 #undef TARGET_CANNOT_FORCE_CONST_MEM
8199 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8200
8201 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8202 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8203
8204 /* Only the least significant bit is used for initialization guard
8205    variables.  */
8206 #undef TARGET_CXX_GUARD_MASK_BIT
8207 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8208
8209 #undef TARGET_C_MODE_FOR_SUFFIX
8210 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8211
8212 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8213 #undef  TARGET_DEFAULT_TARGET_FLAGS
8214 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8215 #endif
8216
8217 #undef TARGET_CLASS_MAX_NREGS
8218 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8219
8220 #undef TARGET_BUILTIN_DECL
8221 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8222
8223 #undef  TARGET_EXPAND_BUILTIN
8224 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8225
8226 #undef TARGET_EXPAND_BUILTIN_VA_START
8227 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8228
8229 #undef TARGET_FOLD_BUILTIN
8230 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8231
8232 #undef TARGET_FUNCTION_ARG
8233 #define TARGET_FUNCTION_ARG aarch64_function_arg
8234
8235 #undef TARGET_FUNCTION_ARG_ADVANCE
8236 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8237
8238 #undef TARGET_FUNCTION_ARG_BOUNDARY
8239 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8240
8241 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8242 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8243
8244 #undef TARGET_FUNCTION_VALUE
8245 #define TARGET_FUNCTION_VALUE aarch64_function_value
8246
8247 #undef TARGET_FUNCTION_VALUE_REGNO_P
8248 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8249
8250 #undef TARGET_FRAME_POINTER_REQUIRED
8251 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8252
8253 #undef TARGET_GIMPLE_FOLD_BUILTIN
8254 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8255
8256 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8257 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8258
8259 #undef  TARGET_INIT_BUILTINS
8260 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8261
8262 #undef TARGET_LEGITIMATE_ADDRESS_P
8263 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8264
8265 #undef TARGET_LEGITIMATE_CONSTANT_P
8266 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8267
8268 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8269 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8270
8271 #undef TARGET_MANGLE_TYPE
8272 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8273
8274 #undef TARGET_MEMORY_MOVE_COST
8275 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8276
8277 #undef TARGET_MUST_PASS_IN_STACK
8278 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8279
8280 /* This target hook should return true if accesses to volatile bitfields
8281    should use the narrowest mode possible.  It should return false if these
8282    accesses should use the bitfield container type.  */
8283 #undef TARGET_NARROW_VOLATILE_BITFIELD
8284 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8285
8286 #undef  TARGET_OPTION_OVERRIDE
8287 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8288
8289 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8290 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8291   aarch64_override_options_after_change
8292
8293 #undef TARGET_PASS_BY_REFERENCE
8294 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8295
8296 #undef TARGET_PREFERRED_RELOAD_CLASS
8297 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8298
8299 #undef TARGET_SECONDARY_RELOAD
8300 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8301
8302 #undef TARGET_SHIFT_TRUNCATION_MASK
8303 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8304
8305 #undef TARGET_SETUP_INCOMING_VARARGS
8306 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8307
8308 #undef TARGET_STRUCT_VALUE_RTX
8309 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8310
8311 #undef TARGET_REGISTER_MOVE_COST
8312 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8313
8314 #undef TARGET_RETURN_IN_MEMORY
8315 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8316
8317 #undef TARGET_RETURN_IN_MSB
8318 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8319
8320 #undef TARGET_RTX_COSTS
8321 #define TARGET_RTX_COSTS aarch64_rtx_costs
8322
8323 #undef TARGET_TRAMPOLINE_INIT
8324 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8325
8326 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8327 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8328
8329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8330 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8331
8332 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8333 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8334
8335 #undef TARGET_VECTORIZE_ADD_STMT_COST
8336 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8337
8338 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8339 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8340   aarch64_builtin_vectorization_cost
8341
8342 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8343 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8344
8345 #undef TARGET_VECTORIZE_BUILTINS
8346 #define TARGET_VECTORIZE_BUILTINS
8347
8348 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8349 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8350   aarch64_builtin_vectorized_function
8351
8352 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8353 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8354   aarch64_autovectorize_vector_sizes
8355
8356 /* Section anchor support.  */
8357
8358 #undef TARGET_MIN_ANCHOR_OFFSET
8359 #define TARGET_MIN_ANCHOR_OFFSET -256
8360
8361 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8362    byte offset; we can do much more for larger data types, but have no way
8363    to determine the size of the access.  We assume accesses are aligned.  */
8364 #undef TARGET_MAX_ANCHOR_OFFSET
8365 #define TARGET_MAX_ANCHOR_OFFSET 4095
8366
8367 #undef TARGET_VECTOR_ALIGNMENT
8368 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8369
8370 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8371 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8372   aarch64_simd_vector_alignment_reachable
8373
8374 /* vec_perm support.  */
8375
8376 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8377 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8378   aarch64_vectorize_vec_perm_const_ok
8379
8380
8381 #undef TARGET_FIXED_CONDITION_CODE_REGS
8382 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8383
8384 struct gcc_target targetm = TARGET_INITIALIZER;
8385
8386 #include "gt-aarch64.h"