gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48
  49 /* Classifies an address.
  50
  51    ADDRESS_REG_IMM
  52        A simple base register plus immediate offset.
  53
  54    ADDRESS_REG_WB
  55        A base register indexed by immediate offset with writeback.
  56
  57    ADDRESS_REG_REG
  58        A base register indexed by (optionally scaled) register.
  59
  60    ADDRESS_REG_UXTW
  61        A base register indexed by (optionally scaled) zero-extended register.
  62
  63    ADDRESS_REG_SXTW
  64        A base register indexed by (optionally scaled) sign-extended register.
  65
  66    ADDRESS_LO_SUM
  67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  68
  69    ADDRESS_SYMBOLIC:
  70        A constant symbolic address, in pc-relative literal pool.  */
  71
  72 enum aarch64_address_type {
  73   ADDRESS_REG_IMM,
  74   ADDRESS_REG_WB,
  75   ADDRESS_REG_REG,
  76   ADDRESS_REG_UXTW,
  77   ADDRESS_REG_SXTW,
  78   ADDRESS_LO_SUM,
  79   ADDRESS_SYMBOLIC
  80 };
  81
  82 struct aarch64_address_info {
  83   enum aarch64_address_type type;
  84   rtx base;
  85   rtx offset;
  86   int shift;
  87   enum aarch64_symbol_type symbol_type;
  88 };
  89
  90 struct simd_immediate_info
  91 {
  92   rtx value;
  93   int shift;
  94   int element_width;
  95   bool mvn;
  96 };
  97
  98 /* The current code model.  */
  99 enum aarch64_code_model aarch64_cmodel;
 100
 101 #ifdef HAVE_AS_TLS
 102 #undef TARGET_HAVE_TLS
 103 #define TARGET_HAVE_TLS 1
 104 #endif
 105
 106 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 107 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 108                                                      const_tree,
 109                                                      enum machine_mode *, int *,
 110                                                      bool *);
 111 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 112 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 113 static void aarch64_override_options_after_change (void);
 114 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 115 static unsigned bit_count (unsigned HOST_WIDE_INT);
 116 static bool aarch64_const_vec_all_same_int_p (rtx,
 117                                               HOST_WIDE_INT, HOST_WIDE_INT);
 118
 119 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 120                                                  const unsigned char *sel);
 121
 122 /* The processor for which instructions should be scheduled.  */
 123 enum aarch64_processor aarch64_tune = generic;
 124
 125 /* The current tuning set.  */
 126 const struct tune_params *aarch64_tune_params;
 127
 128 /* Mask to specify which instructions we are allowed to generate.  */
 129 unsigned long aarch64_isa_flags = 0;
 130
 131 /* Mask to specify which instruction scheduling options should be used.  */
 132 unsigned long aarch64_tune_flags = 0;
 133
 134 /* Tuning parameters.  */
 135
 136 #if HAVE_DESIGNATED_INITIALIZERS
 137 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 138 #else
 139 #define NAMED_PARAM(NAME, VAL) (VAL)
 140 #endif
 141
 142 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 143 __extension__
 144 #endif
 145 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 146 {
 147   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 148   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 149   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 150   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 151   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 152   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 153   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 154   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 155   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 156   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 157   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 158   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 159 };
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 162 __extension__
 163 #endif
 164 static const struct cpu_addrcost_table generic_addrcost_table =
 165 {
 166   NAMED_PARAM (pre_modify, 0),
 167   NAMED_PARAM (post_modify, 0),
 168   NAMED_PARAM (register_offset, 0),
 169   NAMED_PARAM (register_extend, 0),
 170   NAMED_PARAM (imm_offset, 0)
 171 };
 172
 173 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 174 __extension__
 175 #endif
 176 static const struct cpu_regmove_cost generic_regmove_cost =
 177 {
 178   NAMED_PARAM (GP2GP, 1),
 179   NAMED_PARAM (GP2FP, 2),
 180   NAMED_PARAM (FP2GP, 2),
 181   /* We currently do not provide direct support for TFmode Q->Q move.
 182      Therefore we need to raise the cost above 2 in order to have
 183      reload handle the situation.  */
 184   NAMED_PARAM (FP2FP, 4)
 185 };
 186
 187 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 188 __extension__
 189 #endif
 190 static const struct tune_params generic_tunings =
 191 {
 192   &generic_rtx_cost_table,
 193   &generic_addrcost_table,
 194   &generic_regmove_cost,
 195   NAMED_PARAM (memmov_cost, 4)
 196 };
 197
 198 /* A processor implementing AArch64.  */
 199 struct processor
 200 {
 201   const char *const name;
 202   enum aarch64_processor core;
 203   const char *arch;
 204   const unsigned long flags;
 205   const struct tune_params *const tune;
 206 };
 207
 208 /* Processor cores implementing AArch64.  */
 209 static const struct processor all_cores[] =
 210 {
 211 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 212   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 213 #include "aarch64-cores.def"
 214 #undef AARCH64_CORE
 215   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 216   {NULL, aarch64_none, NULL, 0, NULL}
 217 };
 218
 219 /* Architectures implementing AArch64.  */
 220 static const struct processor all_architectures[] =
 221 {
 222 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 223   {NAME, CORE, #ARCH, FLAGS, NULL},
 224 #include "aarch64-arches.def"
 225 #undef AARCH64_ARCH
 226   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 227   {NULL, aarch64_none, NULL, 0, NULL}
 228 };
 229
 230 /* Target specification.  These are populated as commandline arguments
 231    are processed, or NULL if not specified.  */
 232 static const struct processor *selected_arch;
 233 static const struct processor *selected_cpu;
 234 static const struct processor *selected_tune;
 235
 236 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 237
 238 /* An ISA extension in the co-processor and main instruction set space.  */
 239 struct aarch64_option_extension
 240 {
 241   const char *const name;
 242   const unsigned long flags_on;
 243   const unsigned long flags_off;
 244 };
 245
 246 /* ISA extensions in AArch64.  */
 247 static const struct aarch64_option_extension all_extensions[] =
 248 {
 249 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 250   {NAME, FLAGS_ON, FLAGS_OFF},
 251 #include "aarch64-option-extensions.def"
 252 #undef AARCH64_OPT_EXTENSION
 253   {NULL, 0, 0}
 254 };
 255
 256 /* Used to track the size of an address when generating a pre/post
 257    increment address.  */
 258 static enum machine_mode aarch64_memory_reference_mode;
 259
 260 /* Used to force GTY into this file.  */
 261 static GTY(()) int gty_dummy;
 262
 263 /* A table of valid AArch64 "bitmask immediate" values for
 264    logical instructions.  */
 265
 266 #define AARCH64_NUM_BITMASKS  5334
 267 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 268
 269 /* Did we set flag_omit_frame_pointer just so
 270    aarch64_frame_pointer_required would be called? */
 271 static bool faked_omit_frame_pointer;
 272
 273 typedef enum aarch64_cond_code
 274 {
 275   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 276   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 277   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 278 }
 279 aarch64_cc;
 280
 281 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 282
 283 /* The condition codes of the processor, and the inverse function.  */
 284 static const char * const aarch64_condition_codes[] =
 285 {
 286   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 287   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 288 };
 289
 290 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 291 unsigned
 292 aarch64_dbx_register_number (unsigned regno)
 293 {
 294    if (GP_REGNUM_P (regno))
 295      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 296    else if (regno == SP_REGNUM)
 297      return AARCH64_DWARF_SP;
 298    else if (FP_REGNUM_P (regno))
 299      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 300
 301    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 302       equivalent DWARF register.  */
 303    return DWARF_FRAME_REGISTERS;
 304 }
 305
 306 /* Return TRUE if MODE is any of the large INT modes.  */
 307 static bool
 308 aarch64_vect_struct_mode_p (enum machine_mode mode)
 309 {
 310   return mode == OImode || mode == CImode || mode == XImode;
 311 }
 312
 313 /* Return TRUE if MODE is any of the vector modes.  */
 314 static bool
 315 aarch64_vector_mode_p (enum machine_mode mode)
 316 {
 317   return aarch64_vector_mode_supported_p (mode)
 318          || aarch64_vect_struct_mode_p (mode);
 319 }
 320
 321 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 322 static bool
 323 aarch64_array_mode_supported_p (enum machine_mode mode,
 324                                 unsigned HOST_WIDE_INT nelems)
 325 {
 326   if (TARGET_SIMD
 327       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 328       && (nelems >= 2 && nelems <= 4))
 329     return true;
 330
 331   return false;
 332 }
 333
 334 /* Implement HARD_REGNO_NREGS.  */
 335
 336 int
 337 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 338 {
 339   switch (aarch64_regno_regclass (regno))
 340     {
 341     case FP_REGS:
 342     case FP_LO_REGS:
 343       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 344     default:
 345       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 346     }
 347   gcc_unreachable ();
 348 }
 349
 350 /* Implement HARD_REGNO_MODE_OK.  */
 351
 352 int
 353 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 354 {
 355   if (GET_MODE_CLASS (mode) == MODE_CC)
 356     return regno == CC_REGNUM;
 357
 358   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 359       || regno == ARG_POINTER_REGNUM)
 360     return mode == Pmode;
 361
 362   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 363     return 1;
 364
 365   if (FP_REGNUM_P (regno))
 366     {
 367       if (aarch64_vect_struct_mode_p (mode))
 368         return
 369           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 370       else
 371         return 1;
 372     }
 373
 374   return 0;
 375 }
 376
 377 /* Return true if calls to DECL should be treated as
 378    long-calls (ie called via a register).  */
 379 static bool
 380 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 381 {
 382   return false;
 383 }
 384
 385 /* Return true if calls to symbol-ref SYM should be treated as
 386    long-calls (ie called via a register).  */
 387 bool
 388 aarch64_is_long_call_p (rtx sym)
 389 {
 390   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 391 }
 392
 393 /* Return true if the offsets to a zero/sign-extract operation
 394    represent an expression that matches an extend operation.  The
 395    operands represent the paramters from
 396
 397    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 398 bool
 399 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 400                                 rtx extract_imm)
 401 {
 402   HOST_WIDE_INT mult_val, extract_val;
 403
 404   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 405     return false;
 406
 407   mult_val = INTVAL (mult_imm);
 408   extract_val = INTVAL (extract_imm);
 409
 410   if (extract_val > 8
 411       && extract_val < GET_MODE_BITSIZE (mode)
 412       && exact_log2 (extract_val & ~7) > 0
 413       && (extract_val & 7) <= 4
 414       && mult_val == (1 << (extract_val & 7)))
 415     return true;
 416
 417   return false;
 418 }
 419
 420 /* Emit an insn that's a simple single-set.  Both the operands must be
 421    known to be valid.  */
 422 inline static rtx
 423 emit_set_insn (rtx x, rtx y)
 424 {
 425   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 426 }
 427
 428 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 429    return the rtx for register 0 in the proper mode.  */
 430 rtx
 431 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 432 {
 433   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 434   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 435
 436   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 437   return cc_reg;
 438 }
 439
 440 /* Build the SYMBOL_REF for __tls_get_addr.  */
 441
 442 static GTY(()) rtx tls_get_addr_libfunc;
 443
 444 rtx
 445 aarch64_tls_get_addr (void)
 446 {
 447   if (!tls_get_addr_libfunc)
 448     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 449   return tls_get_addr_libfunc;
 450 }
 451
 452 /* Return the TLS model to use for ADDR.  */
 453
 454 static enum tls_model
 455 tls_symbolic_operand_type (rtx addr)
 456 {
 457   enum tls_model tls_kind = TLS_MODEL_NONE;
 458   rtx sym, addend;
 459
 460   if (GET_CODE (addr) == CONST)
 461     {
 462       split_const (addr, &sym, &addend);
 463       if (GET_CODE (sym) == SYMBOL_REF)
 464         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 465     }
 466   else if (GET_CODE (addr) == SYMBOL_REF)
 467     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 468
 469   return tls_kind;
 470 }
 471
 472 /* We'll allow lo_sum's in addresses in our legitimate addresses
 473    so that combine would take care of combining addresses where
 474    necessary, but for generation purposes, we'll generate the address
 475    as :
 476    RTL                               Absolute
 477    tmp = hi (symbol_ref);            adrp  x1, foo
 478    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 479                                      nop
 480
 481    PIC                               TLS
 482    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 483    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 484                                      bl   __tls_get_addr
 485                                      nop
 486
 487    Load TLS symbol, depending on TLS mechanism and TLS access model.
 488
 489    Global Dynamic - Traditional TLS:
 490    adrp tmp, :tlsgd:imm
 491    add  dest, tmp, #:tlsgd_lo12:imm
 492    bl   __tls_get_addr
 493
 494    Global Dynamic - TLS Descriptors:
 495    adrp dest, :tlsdesc:imm
 496    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 497    add  dest, dest, #:tlsdesc_lo12:imm
 498    blr  tmp
 499    mrs  tp, tpidr_el0
 500    add  dest, dest, tp
 501
 502    Initial Exec:
 503    mrs  tp, tpidr_el0
 504    adrp tmp, :gottprel:imm
 505    ldr  dest, [tmp, #:gottprel_lo12:imm]
 506    add  dest, dest, tp
 507
 508    Local Exec:
 509    mrs  tp, tpidr_el0
 510    add  t0, tp, #:tprel_hi12:imm
 511    add  t0, #:tprel_lo12_nc:imm
 512 */
 513
 514 static void
 515 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 516                                    enum aarch64_symbol_type type)
 517 {
 518   switch (type)
 519     {
 520     case SYMBOL_SMALL_ABSOLUTE:
 521       {
 522         rtx tmp_reg = dest;
 523         if (can_create_pseudo_p ())
 524           {
 525             tmp_reg =  gen_reg_rtx (Pmode);
 526           }
 527
 528         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 529         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 530         return;
 531       }
 532
 533     case SYMBOL_TINY_ABSOLUTE:
 534       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 535       return;
 536
 537     case SYMBOL_SMALL_GOT:
 538       {
 539         rtx tmp_reg = dest;
 540         if (can_create_pseudo_p ())
 541           tmp_reg =  gen_reg_rtx (Pmode);
 542         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 543         emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 544         return;
 545       }
 546
 547     case SYMBOL_SMALL_TLSGD:
 548       {
 549         rtx insns;
 550         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 551
 552         start_sequence ();
 553         emit_call_insn (gen_tlsgd_small (result, imm));
 554         insns = get_insns ();
 555         end_sequence ();
 556
 557         RTL_CONST_CALL_P (insns) = 1;
 558         emit_libcall_block (insns, dest, result, imm);
 559         return;
 560       }
 561
 562     case SYMBOL_SMALL_TLSDESC:
 563       {
 564         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 565         rtx tp;
 566
 567         emit_insn (gen_tlsdesc_small (imm));
 568         tp = aarch64_load_tp (NULL);
 569         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 570         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 571         return;
 572       }
 573
 574     case SYMBOL_SMALL_GOTTPREL:
 575       {
 576         rtx tmp_reg = gen_reg_rtx (Pmode);
 577         rtx tp = aarch64_load_tp (NULL);
 578         emit_insn (gen_tlsie_small (tmp_reg, imm));
 579         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 580         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 581         return;
 582       }
 583
 584     case SYMBOL_SMALL_TPREL:
 585       {
 586         rtx tp = aarch64_load_tp (NULL);
 587         emit_insn (gen_tlsle_small (dest, tp, imm));
 588         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 589         return;
 590       }
 591
 592     default:
 593       gcc_unreachable ();
 594     }
 595 }
 596
 597 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 598    handle all moves if !can_create_pseudo_p ().  The distinction is
 599    important because, unlike emit_move_insn, the move expanders know
 600    how to force Pmode objects into the constant pool even when the
 601    constant pool address is not itself legitimate.  */
 602 static rtx
 603 aarch64_emit_move (rtx dest, rtx src)
 604 {
 605   return (can_create_pseudo_p ()
 606           ? emit_move_insn (dest, src)
 607           : emit_move_insn_1 (dest, src));
 608 }
 609
 610 void
 611 aarch64_split_128bit_move (rtx dst, rtx src)
 612 {
 613   rtx low_dst;
 614
 615   enum machine_mode src_mode = GET_MODE (src);
 616   enum machine_mode dst_mode = GET_MODE (dst);
 617   int src_regno = REGNO (src);
 618   int dst_regno = REGNO (dst);
 619
 620   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 621
 622   if (REG_P (dst) && REG_P (src))
 623     {
 624       gcc_assert (src_mode == TImode || src_mode == TFmode);
 625
 626       /* Handle r -> w, w -> r.  */
 627       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 628         {
 629           switch (src_mode) {
 630           case TImode:
 631             emit_insn
 632               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 633             emit_insn
 634               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 635             return;
 636           case TFmode:
 637             emit_insn
 638               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 639             emit_insn
 640               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 641             return;
 642           default:
 643             gcc_unreachable ();
 644           }
 645         }
 646       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 647         {
 648           switch (src_mode) {
 649           case TImode:
 650             emit_insn
 651               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 652             emit_insn
 653               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 654             return;
 655           case TFmode:
 656             emit_insn
 657               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 658             emit_insn
 659               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 660             return;
 661           default:
 662             gcc_unreachable ();
 663           }
 664         }
 665       /* Fall through to r -> r cases.  */
 666     }
 667
 668   switch (dst_mode) {
 669   case TImode:
 670     low_dst = gen_lowpart (word_mode, dst);
 671     if (REG_P (low_dst)
 672         && reg_overlap_mentioned_p (low_dst, src))
 673       {
 674         aarch64_emit_move (gen_highpart (word_mode, dst),
 675                            gen_highpart_mode (word_mode, TImode, src));
 676         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 677       }
 678     else
 679       {
 680         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 681         aarch64_emit_move (gen_highpart (word_mode, dst),
 682                            gen_highpart_mode (word_mode, TImode, src));
 683       }
 684     return;
 685   case TFmode:
 686     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 687                     gen_rtx_REG (DFmode, src_regno));
 688     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 689                     gen_rtx_REG (DFmode, src_regno + 1));
 690     return;
 691   default:
 692     gcc_unreachable ();
 693   }
 694 }
 695
 696 bool
 697 aarch64_split_128bit_move_p (rtx dst, rtx src)
 698 {
 699   return (! REG_P (src)
 700           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 701 }
 702
 703 /* Split a complex SIMD combine.  */
 704
 705 void
 706 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 707 {
 708   enum machine_mode src_mode = GET_MODE (src1);
 709   enum machine_mode dst_mode = GET_MODE (dst);
 710
 711   gcc_assert (VECTOR_MODE_P (dst_mode));
 712
 713   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 714     {
 715       rtx (*gen) (rtx, rtx, rtx);
 716
 717       switch (src_mode)
 718         {
 719         case V8QImode:
 720           gen = gen_aarch64_simd_combinev8qi;
 721           break;
 722         case V4HImode:
 723           gen = gen_aarch64_simd_combinev4hi;
 724           break;
 725         case V2SImode:
 726           gen = gen_aarch64_simd_combinev2si;
 727           break;
 728         case V2SFmode:
 729           gen = gen_aarch64_simd_combinev2sf;
 730           break;
 731         case DImode:
 732           gen = gen_aarch64_simd_combinedi;
 733           break;
 734         case DFmode:
 735           gen = gen_aarch64_simd_combinedf;
 736           break;
 737         default:
 738           gcc_unreachable ();
 739         }
 740
 741       emit_insn (gen (dst, src1, src2));
 742       return;
 743     }
 744 }
 745
 746 /* Split a complex SIMD move.  */
 747
 748 void
 749 aarch64_split_simd_move (rtx dst, rtx src)
 750 {
 751   enum machine_mode src_mode = GET_MODE (src);
 752   enum machine_mode dst_mode = GET_MODE (dst);
 753
 754   gcc_assert (VECTOR_MODE_P (dst_mode));
 755
 756   if (REG_P (dst) && REG_P (src))
 757     {
 758       rtx (*gen) (rtx, rtx);
 759
 760       gcc_assert (VECTOR_MODE_P (src_mode));
 761
 762       switch (src_mode)
 763         {
 764         case V16QImode:
 765           gen = gen_aarch64_split_simd_movv16qi;
 766           break;
 767         case V8HImode:
 768           gen = gen_aarch64_split_simd_movv8hi;
 769           break;
 770         case V4SImode:
 771           gen = gen_aarch64_split_simd_movv4si;
 772           break;
 773         case V2DImode:
 774           gen = gen_aarch64_split_simd_movv2di;
 775           break;
 776         case V4SFmode:
 777           gen = gen_aarch64_split_simd_movv4sf;
 778           break;
 779         case V2DFmode:
 780           gen = gen_aarch64_split_simd_movv2df;
 781           break;
 782         default:
 783           gcc_unreachable ();
 784         }
 785
 786       emit_insn (gen (dst, src));
 787       return;
 788     }
 789 }
 790
 791 static rtx
 792 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 793 {
 794   if (can_create_pseudo_p ())
 795     return force_reg (mode, value);
 796   else
 797     {
 798       x = aarch64_emit_move (x, value);
 799       return x;
 800     }
 801 }
 802
 803
 804 static rtx
 805 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 806 {
 807   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 808     {
 809       rtx high;
 810       /* Load the full offset into a register.  This
 811          might be improvable in the future.  */
 812       high = GEN_INT (offset);
 813       offset = 0;
 814       high = aarch64_force_temporary (mode, temp, high);
 815       reg = aarch64_force_temporary (mode, temp,
 816                                      gen_rtx_PLUS (mode, high, reg));
 817     }
 818   return plus_constant (mode, reg, offset);
 819 }
 820
 821 void
 822 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 823 {
 824   enum machine_mode mode = GET_MODE (dest);
 825   unsigned HOST_WIDE_INT mask;
 826   int i;
 827   bool first;
 828   unsigned HOST_WIDE_INT val;
 829   bool subtargets;
 830   rtx subtarget;
 831   int one_match, zero_match;
 832
 833   gcc_assert (mode == SImode || mode == DImode);
 834
 835   /* Check on what type of symbol it is.  */
 836   if (GET_CODE (imm) == SYMBOL_REF
 837       || GET_CODE (imm) == LABEL_REF
 838       || GET_CODE (imm) == CONST)
 839     {
 840       rtx mem, base, offset;
 841       enum aarch64_symbol_type sty;
 842
 843       /* If we have (const (plus symbol offset)), separate out the offset
 844          before we start classifying the symbol.  */
 845       split_const (imm, &base, &offset);
 846
 847       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 848       switch (sty)
 849         {
 850         case SYMBOL_FORCE_TO_MEM:
 851           if (offset != const0_rtx
 852               && targetm.cannot_force_const_mem (mode, imm))
 853             {
 854               gcc_assert(can_create_pseudo_p ());
 855               base = aarch64_force_temporary (mode, dest, base);
 856               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 857               aarch64_emit_move (dest, base);
 858               return;
 859             }
 860           mem = force_const_mem (mode, imm);
 861           gcc_assert (mem);
 862           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 863           return;
 864
 865         case SYMBOL_SMALL_TLSGD:
 866         case SYMBOL_SMALL_TLSDESC:
 867         case SYMBOL_SMALL_GOTTPREL:
 868         case SYMBOL_SMALL_GOT:
 869           if (offset != const0_rtx)
 870             {
 871               gcc_assert(can_create_pseudo_p ());
 872               base = aarch64_force_temporary (mode, dest, base);
 873               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 874               aarch64_emit_move (dest, base);
 875               return;
 876             }
 877           /* FALLTHRU */
 878
 879         case SYMBOL_SMALL_TPREL:
 880         case SYMBOL_SMALL_ABSOLUTE:
 881         case SYMBOL_TINY_ABSOLUTE:
 882           aarch64_load_symref_appropriately (dest, imm, sty);
 883           return;
 884
 885         default:
 886           gcc_unreachable ();
 887         }
 888     }
 889
 890   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 891     {
 892       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 893       return;
 894     }
 895
 896   if (!CONST_INT_P (imm))
 897     {
 898       if (GET_CODE (imm) == HIGH)
 899         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 900       else
 901         {
 902           rtx mem = force_const_mem (mode, imm);
 903           gcc_assert (mem);
 904           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 905         }
 906
 907       return;
 908     }
 909
 910   if (mode == SImode)
 911     {
 912       /* We know we can't do this in 1 insn, and we must be able to do it
 913          in two; so don't mess around looking for sequences that don't buy
 914          us anything.  */
 915       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 916       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 917                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 918       return;
 919     }
 920
 921   /* Remaining cases are all for DImode.  */
 922
 923   val = INTVAL (imm);
 924   subtargets = optimize && can_create_pseudo_p ();
 925
 926   one_match = 0;
 927   zero_match = 0;
 928   mask = 0xffff;
 929
 930   for (i = 0; i < 64; i += 16, mask <<= 16)
 931     {
 932       if ((val & mask) == 0)
 933         zero_match++;
 934       else if ((val & mask) == mask)
 935         one_match++;
 936     }
 937
 938   if (one_match == 2)
 939     {
 940       mask = 0xffff;
 941       for (i = 0; i < 64; i += 16, mask <<= 16)
 942         {
 943           if ((val & mask) != mask)
 944             {
 945               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
 946               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 947                                          GEN_INT ((val >> i) & 0xffff)));
 948               return;
 949             }
 950         }
 951       gcc_unreachable ();
 952     }
 953
 954   if (zero_match == 2)
 955     goto simple_sequence;
 956
 957   mask = 0x0ffff0000UL;
 958   for (i = 16; i < 64; i += 16, mask <<= 16)
 959     {
 960       HOST_WIDE_INT comp = mask & ~(mask - 1);
 961
 962       if (aarch64_uimm12_shift (val - (val & mask)))
 963         {
 964           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 965
 966           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
 967           emit_insn (gen_adddi3 (dest, subtarget,
 968                                  GEN_INT (val - (val & mask))));
 969           return;
 970         }
 971       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 972         {
 973           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 974
 975           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 976                                   GEN_INT ((val + comp) & mask)));
 977           emit_insn (gen_adddi3 (dest, subtarget,
 978                                  GEN_INT (val - ((val + comp) & mask))));
 979           return;
 980         }
 981       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 982         {
 983           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 984
 985           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 986                                   GEN_INT ((val - comp) | ~mask)));
 987           emit_insn (gen_adddi3 (dest, subtarget,
 988                                  GEN_INT (val - ((val - comp) | ~mask))));
 989           return;
 990         }
 991       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 992         {
 993           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 994
 995           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 996                                   GEN_INT (val | ~mask)));
 997           emit_insn (gen_adddi3 (dest, subtarget,
 998                                  GEN_INT (val - (val | ~mask))));
 999           return;
1000         }
1001     }
1002
1003   /* See if we can do it by arithmetically combining two
1004      immediates.  */
1005   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1006     {
1007       int j;
1008       mask = 0xffff;
1009
1010       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1011           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1012         {
1013           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1014           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1015                                   GEN_INT (aarch64_bitmasks[i])));
1016           emit_insn (gen_adddi3 (dest, subtarget,
1017                                  GEN_INT (val - aarch64_bitmasks[i])));
1018           return;
1019         }
1020
1021       for (j = 0; j < 64; j += 16, mask <<= 16)
1022         {
1023           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1024             {
1025               emit_insn (gen_rtx_SET (VOIDmode, dest,
1026                                       GEN_INT (aarch64_bitmasks[i])));
1027               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1028                                          GEN_INT ((val >> j) & 0xffff)));
1029               return;
1030             }
1031         }
1032     }
1033
1034   /* See if we can do it by logically combining two immediates.  */
1035   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1036     {
1037       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1038         {
1039           int j;
1040
1041           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1042             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1043               {
1044                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1045                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1046                                         GEN_INT (aarch64_bitmasks[i])));
1047                 emit_insn (gen_iordi3 (dest, subtarget,
1048                                        GEN_INT (aarch64_bitmasks[j])));
1049                 return;
1050               }
1051         }
1052       else if ((val & aarch64_bitmasks[i]) == val)
1053         {
1054           int j;
1055
1056           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1057             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1058               {
1059
1060                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1061                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1062                                         GEN_INT (aarch64_bitmasks[j])));
1063                 emit_insn (gen_anddi3 (dest, subtarget,
1064                                        GEN_INT (aarch64_bitmasks[i])));
1065                 return;
1066               }
1067         }
1068     }
1069
1070  simple_sequence:
1071   first = true;
1072   mask = 0xffff;
1073   for (i = 0; i < 64; i += 16, mask <<= 16)
1074     {
1075       if ((val & mask) != 0)
1076         {
1077           if (first)
1078             {
1079               emit_insn (gen_rtx_SET (VOIDmode, dest,
1080                                       GEN_INT (val & mask)));
1081               first = false;
1082             }
1083           else
1084             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1085                                        GEN_INT ((val >> i) & 0xffff)));
1086         }
1087     }
1088 }
1089
1090 static bool
1091 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1092 {
1093   /* Indirect calls are not currently supported.  */
1094   if (decl == NULL)
1095     return false;
1096
1097   /* Cannot tail-call to long-calls, since these are outside of the
1098      range of a branch instruction (we could handle this if we added
1099      support for indirect tail-calls.  */
1100   if (aarch64_decl_is_long_call_p (decl))
1101     return false;
1102
1103   return true;
1104 }
1105
1106 /* Implement TARGET_PASS_BY_REFERENCE.  */
1107
1108 static bool
1109 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1110                            enum machine_mode mode,
1111                            const_tree type,
1112                            bool named ATTRIBUTE_UNUSED)
1113 {
1114   HOST_WIDE_INT size;
1115   enum machine_mode dummymode;
1116   int nregs;
1117
1118   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1119   size = (mode == BLKmode && type)
1120     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1121
1122   if (type)
1123     {
1124       /* Arrays always passed by reference.  */
1125       if (TREE_CODE (type) == ARRAY_TYPE)
1126         return true;
1127       /* Other aggregates based on their size.  */
1128       if (AGGREGATE_TYPE_P (type))
1129         size = int_size_in_bytes (type);
1130     }
1131
1132   /* Variable sized arguments are always returned by reference.  */
1133   if (size < 0)
1134     return true;
1135
1136   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1137   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1138                                                &dummymode, &nregs,
1139                                                NULL))
1140     return false;
1141
1142   /* Arguments which are variable sized or larger than 2 registers are
1143      passed by reference unless they are a homogenous floating point
1144      aggregate.  */
1145   return size > 2 * UNITS_PER_WORD;
1146 }
1147
1148 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1149 static bool
1150 aarch64_return_in_msb (const_tree valtype)
1151 {
1152   enum machine_mode dummy_mode;
1153   int dummy_int;
1154
1155   /* Never happens in little-endian mode.  */
1156   if (!BYTES_BIG_ENDIAN)
1157     return false;
1158
1159   /* Only composite types smaller than or equal to 16 bytes can
1160      be potentially returned in registers.  */
1161   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1162       || int_size_in_bytes (valtype) <= 0
1163       || int_size_in_bytes (valtype) > 16)
1164     return false;
1165
1166   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1167      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1168      is always passed/returned in the least significant bits of fp/simd
1169      register(s).  */
1170   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1171                                                &dummy_mode, &dummy_int, NULL))
1172     return false;
1173
1174   return true;
1175 }
1176
1177 /* Implement TARGET_FUNCTION_VALUE.
1178    Define how to find the value returned by a function.  */
1179
1180 static rtx
1181 aarch64_function_value (const_tree type, const_tree func,
1182                         bool outgoing ATTRIBUTE_UNUSED)
1183 {
1184   enum machine_mode mode;
1185   int unsignedp;
1186   int count;
1187   enum machine_mode ag_mode;
1188
1189   mode = TYPE_MODE (type);
1190   if (INTEGRAL_TYPE_P (type))
1191     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1192
1193   if (aarch64_return_in_msb (type))
1194     {
1195       HOST_WIDE_INT size = int_size_in_bytes (type);
1196
1197       if (size % UNITS_PER_WORD != 0)
1198         {
1199           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1200           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1201         }
1202     }
1203
1204   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1205                                                &ag_mode, &count, NULL))
1206     {
1207       if (!aarch64_composite_type_p (type, mode))
1208         {
1209           gcc_assert (count == 1 && mode == ag_mode);
1210           return gen_rtx_REG (mode, V0_REGNUM);
1211         }
1212       else
1213         {
1214           int i;
1215           rtx par;
1216
1217           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1218           for (i = 0; i < count; i++)
1219             {
1220               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1221               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1222                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1223               XVECEXP (par, 0, i) = tmp;
1224             }
1225           return par;
1226         }
1227     }
1228   else
1229     return gen_rtx_REG (mode, R0_REGNUM);
1230 }
1231
1232 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1233    Return true if REGNO is the number of a hard register in which the values
1234    of called function may come back.  */
1235
1236 static bool
1237 aarch64_function_value_regno_p (const unsigned int regno)
1238 {
1239   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1240      of 16-byte return values are: 128-bit integers and 16-byte small
1241      structures (excluding homogeneous floating-point aggregates).  */
1242   if (regno == R0_REGNUM || regno == R1_REGNUM)
1243     return true;
1244
1245   /* Up to four fp/simd registers can return a function value, e.g. a
1246      homogeneous floating-point aggregate having four members.  */
1247   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1248     return !TARGET_GENERAL_REGS_ONLY;
1249
1250   return false;
1251 }
1252
1253 /* Implement TARGET_RETURN_IN_MEMORY.
1254
1255    If the type T of the result of a function is such that
1256      void func (T arg)
1257    would require that arg be passed as a value in a register (or set of
1258    registers) according to the parameter passing rules, then the result
1259    is returned in the same registers as would be used for such an
1260    argument.  */
1261
1262 static bool
1263 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1264 {
1265   HOST_WIDE_INT size;
1266   enum machine_mode ag_mode;
1267   int count;
1268
1269   if (!AGGREGATE_TYPE_P (type)
1270       && TREE_CODE (type) != COMPLEX_TYPE
1271       && TREE_CODE (type) != VECTOR_TYPE)
1272     /* Simple scalar types always returned in registers.  */
1273     return false;
1274
1275   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1276                                                type,
1277                                                &ag_mode,
1278                                                &count,
1279                                                NULL))
1280     return false;
1281
1282   /* Types larger than 2 registers returned in memory.  */
1283   size = int_size_in_bytes (type);
1284   return (size < 0 || size > 2 * UNITS_PER_WORD);
1285 }
1286
1287 static bool
1288 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1289                                const_tree type, int *nregs)
1290 {
1291   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1292   return aarch64_vfp_is_call_or_return_candidate (mode,
1293                                                   type,
1294                                                   &pcum->aapcs_vfp_rmode,
1295                                                   nregs,
1296                                                   NULL);
1297 }
1298
1299 /* Given MODE and TYPE of a function argument, return the alignment in
1300    bits.  The idea is to suppress any stronger alignment requested by
1301    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1302    This is a helper function for local use only.  */
1303
1304 static unsigned int
1305 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1306 {
1307   unsigned int alignment;
1308
1309   if (type)
1310     {
1311       if (!integer_zerop (TYPE_SIZE (type)))
1312         {
1313           if (TYPE_MODE (type) == mode)
1314             alignment = TYPE_ALIGN (type);
1315           else
1316             alignment = GET_MODE_ALIGNMENT (mode);
1317         }
1318       else
1319         alignment = 0;
1320     }
1321   else
1322     alignment = GET_MODE_ALIGNMENT (mode);
1323
1324   return alignment;
1325 }
1326
1327 /* Layout a function argument according to the AAPCS64 rules.  The rule
1328    numbers refer to the rule numbers in the AAPCS64.  */
1329
1330 static void
1331 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1332                     const_tree type,
1333                     bool named ATTRIBUTE_UNUSED)
1334 {
1335   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1336   int ncrn, nvrn, nregs;
1337   bool allocate_ncrn, allocate_nvrn;
1338
1339   /* We need to do this once per argument.  */
1340   if (pcum->aapcs_arg_processed)
1341     return;
1342
1343   pcum->aapcs_arg_processed = true;
1344
1345   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1346   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1347                                                  mode,
1348                                                  type,
1349                                                  &nregs);
1350
1351   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1352      The following code thus handles passing by SIMD/FP registers first.  */
1353
1354   nvrn = pcum->aapcs_nvrn;
1355
1356   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1357      and homogenous short-vector aggregates (HVA).  */
1358   if (allocate_nvrn)
1359     {
1360       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1361         {
1362           pcum->aapcs_nextnvrn = nvrn + nregs;
1363           if (!aarch64_composite_type_p (type, mode))
1364             {
1365               gcc_assert (nregs == 1);
1366               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1367             }
1368           else
1369             {
1370               rtx par;
1371               int i;
1372               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1373               for (i = 0; i < nregs; i++)
1374                 {
1375                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1376                                          V0_REGNUM + nvrn + i);
1377                   tmp = gen_rtx_EXPR_LIST
1378                     (VOIDmode, tmp,
1379                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1380                   XVECEXP (par, 0, i) = tmp;
1381                 }
1382               pcum->aapcs_reg = par;
1383             }
1384           return;
1385         }
1386       else
1387         {
1388           /* C.3 NSRN is set to 8.  */
1389           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1390           goto on_stack;
1391         }
1392     }
1393
1394   ncrn = pcum->aapcs_ncrn;
1395   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1396            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1397
1398
1399   /* C6 - C9.  though the sign and zero extension semantics are
1400      handled elsewhere.  This is the case where the argument fits
1401      entirely general registers.  */
1402   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1403     {
1404       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1405
1406       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1407
1408       /* C.8 if the argument has an alignment of 16 then the NGRN is
1409          rounded up to the next even number.  */
1410       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1411         {
1412           ++ncrn;
1413           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1414         }
1415       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1416          A reg is still generated for it, but the caller should be smart
1417          enough not to use it.  */
1418       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1419         {
1420           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1421         }
1422       else
1423         {
1424           rtx par;
1425           int i;
1426
1427           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1428           for (i = 0; i < nregs; i++)
1429             {
1430               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1431               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1432                                        GEN_INT (i * UNITS_PER_WORD));
1433               XVECEXP (par, 0, i) = tmp;
1434             }
1435           pcum->aapcs_reg = par;
1436         }
1437
1438       pcum->aapcs_nextncrn = ncrn + nregs;
1439       return;
1440     }
1441
1442   /* C.11  */
1443   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1444
1445   /* The argument is passed on stack; record the needed number of words for
1446      this argument (we can re-use NREGS) and align the total size if
1447      necessary.  */
1448 on_stack:
1449   pcum->aapcs_stack_words = nregs;
1450   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1451     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1452                                                16 / UNITS_PER_WORD) + 1;
1453   return;
1454 }
1455
1456 /* Implement TARGET_FUNCTION_ARG.  */
1457
1458 static rtx
1459 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1460                       const_tree type, bool named)
1461 {
1462   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1463   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1464
1465   if (mode == VOIDmode)
1466     return NULL_RTX;
1467
1468   aarch64_layout_arg (pcum_v, mode, type, named);
1469   return pcum->aapcs_reg;
1470 }
1471
1472 void
1473 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1474                            const_tree fntype ATTRIBUTE_UNUSED,
1475                            rtx libname ATTRIBUTE_UNUSED,
1476                            const_tree fndecl ATTRIBUTE_UNUSED,
1477                            unsigned n_named ATTRIBUTE_UNUSED)
1478 {
1479   pcum->aapcs_ncrn = 0;
1480   pcum->aapcs_nvrn = 0;
1481   pcum->aapcs_nextncrn = 0;
1482   pcum->aapcs_nextnvrn = 0;
1483   pcum->pcs_variant = ARM_PCS_AAPCS64;
1484   pcum->aapcs_reg = NULL_RTX;
1485   pcum->aapcs_arg_processed = false;
1486   pcum->aapcs_stack_words = 0;
1487   pcum->aapcs_stack_size = 0;
1488
1489   return;
1490 }
1491
1492 static void
1493 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1494                               enum machine_mode mode,
1495                               const_tree type,
1496                               bool named)
1497 {
1498   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1499   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1500     {
1501       aarch64_layout_arg (pcum_v, mode, type, named);
1502       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1503                   != (pcum->aapcs_stack_words != 0));
1504       pcum->aapcs_arg_processed = false;
1505       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1506       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1507       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1508       pcum->aapcs_stack_words = 0;
1509       pcum->aapcs_reg = NULL_RTX;
1510     }
1511 }
1512
1513 bool
1514 aarch64_function_arg_regno_p (unsigned regno)
1515 {
1516   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1517           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1518 }
1519
1520 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1521    PARM_BOUNDARY bits of alignment, but will be given anything up
1522    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1523    that both before and after the layout of each argument, the Next
1524    Stacked Argument Address (NSAA) will have a minimum alignment of
1525    8 bytes.  */
1526
1527 static unsigned int
1528 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1529 {
1530   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1531
1532   if (alignment < PARM_BOUNDARY)
1533     alignment = PARM_BOUNDARY;
1534   if (alignment > STACK_BOUNDARY)
1535     alignment = STACK_BOUNDARY;
1536   return alignment;
1537 }
1538
1539 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1540
1541    Return true if an argument passed on the stack should be padded upwards,
1542    i.e. if the least-significant byte of the stack slot has useful data.
1543
1544    Small aggregate types are placed in the lowest memory address.
1545
1546    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1547
1548 bool
1549 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1550 {
1551   /* On little-endian targets, the least significant byte of every stack
1552      argument is passed at the lowest byte address of the stack slot.  */
1553   if (!BYTES_BIG_ENDIAN)
1554     return true;
1555
1556   /* Otherwise, integral types and floating point types are padded downward:
1557      the least significant byte of a stack argument is passed at the highest
1558      byte address of the stack slot.  */
1559   if (type
1560       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1561       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1562     return false;
1563
1564   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1565   return true;
1566 }
1567
1568 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1569
1570    It specifies padding for the last (may also be the only)
1571    element of a block move between registers and memory.  If
1572    assuming the block is in the memory, padding upward means that
1573    the last element is padded after its highest significant byte,
1574    while in downward padding, the last element is padded at the
1575    its least significant byte side.
1576
1577    Small aggregates and small complex types are always padded
1578    upwards.
1579
1580    We don't need to worry about homogeneous floating-point or
1581    short-vector aggregates; their move is not affected by the
1582    padding direction determined here.  Regardless of endianness,
1583    each element of such an aggregate is put in the least
1584    significant bits of a fp/simd register.
1585
1586    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1587    register has useful data, and return the opposite if the most
1588    significant byte does.  */
1589
1590 bool
1591 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1592                      bool first ATTRIBUTE_UNUSED)
1593 {
1594
1595   /* Small composite types are always padded upward.  */
1596   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1597     {
1598       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1599                             : GET_MODE_SIZE (mode));
1600       if (size < 2 * UNITS_PER_WORD)
1601         return true;
1602     }
1603
1604   /* Otherwise, use the default padding.  */
1605   return !BYTES_BIG_ENDIAN;
1606 }
1607
1608 static enum machine_mode
1609 aarch64_libgcc_cmp_return_mode (void)
1610 {
1611   return SImode;
1612 }
1613
1614 static bool
1615 aarch64_frame_pointer_required (void)
1616 {
1617   /* If the function contains dynamic stack allocations, we need to
1618      use the frame pointer to access the static parts of the frame.  */
1619   if (cfun->calls_alloca)
1620     return true;
1621
1622   /* We may have turned flag_omit_frame_pointer on in order to have this
1623      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1624      and we'll check it here.
1625      If we really did set flag_omit_frame_pointer normally, then we return false
1626      (no frame pointer required) in all cases.  */
1627
1628   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1629     return false;
1630   else if (flag_omit_leaf_frame_pointer)
1631     return !crtl->is_leaf;
1632   return true;
1633 }
1634
1635 /* Mark the registers that need to be saved by the callee and calculate
1636    the size of the callee-saved registers area and frame record (both FP
1637    and LR may be omitted).  */
1638 static void
1639 aarch64_layout_frame (void)
1640 {
1641   HOST_WIDE_INT offset = 0;
1642   int regno;
1643
1644   if (reload_completed && cfun->machine->frame.laid_out)
1645     return;
1646
1647   cfun->machine->frame.fp_lr_offset = 0;
1648
1649   /* First mark all the registers that really need to be saved...  */
1650   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1651     cfun->machine->frame.reg_offset[regno] = -1;
1652
1653   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1654     cfun->machine->frame.reg_offset[regno] = -1;
1655
1656   /* ... that includes the eh data registers (if needed)...  */
1657   if (crtl->calls_eh_return)
1658     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1659       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1660
1661   /* ... and any callee saved register that dataflow says is live.  */
1662   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1663     if (df_regs_ever_live_p (regno)
1664         && !call_used_regs[regno])
1665       cfun->machine->frame.reg_offset[regno] = 0;
1666
1667   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1668     if (df_regs_ever_live_p (regno)
1669         && !call_used_regs[regno])
1670       cfun->machine->frame.reg_offset[regno] = 0;
1671
1672   if (frame_pointer_needed)
1673     {
1674       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1675       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1676       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1677     }
1678
1679   /* Now assign stack slots for them.  */
1680   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1681     if (cfun->machine->frame.reg_offset[regno] != -1)
1682       {
1683         cfun->machine->frame.reg_offset[regno] = offset;
1684         offset += UNITS_PER_WORD;
1685       }
1686
1687   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1688     if (cfun->machine->frame.reg_offset[regno] != -1)
1689       {
1690         cfun->machine->frame.reg_offset[regno] = offset;
1691         offset += UNITS_PER_WORD;
1692       }
1693
1694   if (frame_pointer_needed)
1695     {
1696       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1697       offset += UNITS_PER_WORD;
1698       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1699     }
1700
1701   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1702     {
1703       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1704       offset += UNITS_PER_WORD;
1705       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1706     }
1707
1708   cfun->machine->frame.padding0 =
1709     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1710   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1711
1712   cfun->machine->frame.saved_regs_size = offset;
1713   cfun->machine->frame.laid_out = true;
1714 }
1715
1716 /* Make the last instruction frame-related and note that it performs
1717    the operation described by FRAME_PATTERN.  */
1718
1719 static void
1720 aarch64_set_frame_expr (rtx frame_pattern)
1721 {
1722   rtx insn;
1723
1724   insn = get_last_insn ();
1725   RTX_FRAME_RELATED_P (insn) = 1;
1726   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1727   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1728                                       frame_pattern,
1729                                       REG_NOTES (insn));
1730 }
1731
1732 static bool
1733 aarch64_register_saved_on_entry (int regno)
1734 {
1735   return cfun->machine->frame.reg_offset[regno] != -1;
1736 }
1737
1738
1739 static void
1740 aarch64_save_or_restore_fprs (int start_offset, int increment,
1741                               bool restore, rtx base_rtx)
1742
1743 {
1744   unsigned regno;
1745   unsigned regno2;
1746   rtx insn;
1747   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1748
1749
1750   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1751     {
1752       if (aarch64_register_saved_on_entry (regno))
1753         {
1754           rtx mem;
1755           mem = gen_mem_ref (DFmode,
1756                              plus_constant (Pmode,
1757                                             base_rtx,
1758                                             start_offset));
1759
1760           for (regno2 = regno + 1;
1761                regno2 <= V31_REGNUM
1762                  && !aarch64_register_saved_on_entry (regno2);
1763                regno2++)
1764             {
1765               /* Empty loop.  */
1766             }
1767           if (regno2 <= V31_REGNUM &&
1768               aarch64_register_saved_on_entry (regno2))
1769             {
1770               rtx mem2;
1771               /* Next highest register to be saved.  */
1772               mem2 = gen_mem_ref (DFmode,
1773                                   plus_constant
1774                                   (Pmode,
1775                                    base_rtx,
1776                                    start_offset + increment));
1777               if (restore == false)
1778                 {
1779                   insn = emit_insn
1780                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1781                                         mem2, gen_rtx_REG (DFmode, regno2)));
1782
1783                 }
1784               else
1785                 {
1786                   insn = emit_insn
1787                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1788                                        gen_rtx_REG (DFmode, regno2), mem2));
1789
1790                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1791                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1792                 }
1793
1794                   /* The first part of a frame-related parallel insn
1795                      is always assumed to be relevant to the frame
1796                      calculations; subsequent parts, are only
1797                      frame-related if explicitly marked.  */
1798               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1799                                             1)) = 1;
1800               regno = regno2;
1801               start_offset += increment * 2;
1802             }
1803           else
1804             {
1805               if (restore == false)
1806                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1807               else
1808                 {
1809                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1810                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1811                 }
1812               start_offset += increment;
1813             }
1814           RTX_FRAME_RELATED_P (insn) = 1;
1815         }
1816     }
1817
1818 }
1819
1820
1821 /* offset from the stack pointer of where the saves and
1822    restore's have to happen.  */
1823 static void
1824 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1825                                             bool restore)
1826 {
1827   rtx insn;
1828   rtx base_rtx = stack_pointer_rtx;
1829   HOST_WIDE_INT start_offset = offset;
1830   HOST_WIDE_INT increment = UNITS_PER_WORD;
1831   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1832   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1833   unsigned regno;
1834   unsigned regno2;
1835
1836   for (regno = R0_REGNUM; regno <= limit; regno++)
1837     {
1838       if (aarch64_register_saved_on_entry (regno))
1839         {
1840           rtx mem;
1841           mem = gen_mem_ref (Pmode,
1842                              plus_constant (Pmode,
1843                                             base_rtx,
1844                                             start_offset));
1845
1846           for (regno2 = regno + 1;
1847                regno2 <= limit
1848                  && !aarch64_register_saved_on_entry (regno2);
1849                regno2++)
1850             {
1851               /* Empty loop.  */
1852             }
1853           if (regno2 <= limit &&
1854               aarch64_register_saved_on_entry (regno2))
1855             {
1856               rtx mem2;
1857               /* Next highest register to be saved.  */
1858               mem2 = gen_mem_ref (Pmode,
1859                                   plus_constant
1860                                   (Pmode,
1861                                    base_rtx,
1862                                    start_offset + increment));
1863               if (restore == false)
1864                 {
1865                   insn = emit_insn
1866                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1867                                         mem2, gen_rtx_REG (DImode, regno2)));
1868
1869                 }
1870               else
1871                 {
1872                   insn = emit_insn
1873                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1874                                      gen_rtx_REG (DImode, regno2), mem2));
1875
1876                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1877                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1878                 }
1879
1880                   /* The first part of a frame-related parallel insn
1881                      is always assumed to be relevant to the frame
1882                      calculations; subsequent parts, are only
1883                      frame-related if explicitly marked.  */
1884               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1885                                             1)) = 1;
1886               regno = regno2;
1887               start_offset += increment * 2;
1888             }
1889           else
1890             {
1891               if (restore == false)
1892                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1893               else
1894                 {
1895                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1896                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1897                 }
1898               start_offset += increment;
1899             }
1900           RTX_FRAME_RELATED_P (insn) = 1;
1901         }
1902     }
1903
1904   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1905
1906 }
1907
1908 /* AArch64 stack frames generated by this compiler look like:
1909
1910         +-------------------------------+
1911         |                               |
1912         |  incoming stack arguments     |
1913         |                               |
1914         +-------------------------------+ <-- arg_pointer_rtx
1915         |                               |
1916         |  callee-allocated save area   |
1917         |  for register varargs         |
1918         |                               |
1919         +-------------------------------+
1920         |                               |
1921         |  local variables              |
1922         |                               |
1923         +-------------------------------+ <-- frame_pointer_rtx
1924         |                               |
1925         |  callee-saved registers       |
1926         |                               |
1927         +-------------------------------+
1928         |  LR'                          |
1929         +-------------------------------+
1930         |  FP'                          |
1931       P +-------------------------------+ <-- hard_frame_pointer_rtx
1932         |  dynamic allocation           |
1933         +-------------------------------+
1934         |                               |
1935         |  outgoing stack arguments     |
1936         |                               |
1937         +-------------------------------+ <-- stack_pointer_rtx
1938
1939    Dynamic stack allocations such as alloca insert data at point P.
1940    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1941    hard_frame_pointer_rtx unchanged.  */
1942
1943 /* Generate the prologue instructions for entry into a function.
1944    Establish the stack frame by decreasing the stack pointer with a
1945    properly calculated size and, if necessary, create a frame record
1946    filled with the values of LR and previous frame pointer.  The
1947    current FP is also set up is it is in use.  */
1948
1949 void
1950 aarch64_expand_prologue (void)
1951 {
1952   /* sub sp, sp, #<frame_size>
1953      stp {fp, lr}, [sp, #<frame_size> - 16]
1954      add fp, sp, #<frame_size> - hardfp_offset
1955      stp {cs_reg}, [fp, #-16] etc.
1956
1957      sub sp, sp, <final_adjustment_if_any>
1958   */
1959   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
1960   HOST_WIDE_INT frame_size, offset;
1961   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
1962   rtx insn;
1963
1964   aarch64_layout_frame ();
1965   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1966   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1967               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1968   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1969                 + crtl->outgoing_args_size);
1970   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1971                                           STACK_BOUNDARY / BITS_PER_UNIT);
1972
1973   if (flag_stack_usage_info)
1974     current_function_static_stack_size = frame_size;
1975
1976   fp_offset = (offset
1977                - original_frame_size
1978                - cfun->machine->frame.saved_regs_size);
1979
1980   /* Store pairs and load pairs have a range only -512 to 504.  */
1981   if (offset >= 512)
1982     {
1983       /* When the frame has a large size, an initial decrease is done on
1984          the stack pointer to jump over the callee-allocated save area for
1985          register varargs, the local variable area and/or the callee-saved
1986          register area.  This will allow the pre-index write-back
1987          store pair instructions to be used for setting up the stack frame
1988          efficiently.  */
1989       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1990       if (offset >= 512)
1991         offset = cfun->machine->frame.saved_regs_size;
1992
1993       frame_size -= (offset + crtl->outgoing_args_size);
1994       fp_offset = 0;
1995
1996       if (frame_size >= 0x1000000)
1997         {
1998           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1999           emit_move_insn (op0, GEN_INT (-frame_size));
2000           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2001           aarch64_set_frame_expr (gen_rtx_SET
2002                                   (Pmode, stack_pointer_rtx,
2003                                    gen_rtx_PLUS (Pmode,
2004                                                  stack_pointer_rtx,
2005                                                  GEN_INT (-frame_size))));
2006         }
2007       else if (frame_size > 0)
2008         {
2009           if ((frame_size & 0xfff) != frame_size)
2010             {
2011               insn = emit_insn (gen_add2_insn
2012                                 (stack_pointer_rtx,
2013                                  GEN_INT (-(frame_size
2014                                             & ~(HOST_WIDE_INT)0xfff))));
2015               RTX_FRAME_RELATED_P (insn) = 1;
2016             }
2017           if ((frame_size & 0xfff) != 0)
2018             {
2019               insn = emit_insn (gen_add2_insn
2020                                 (stack_pointer_rtx,
2021                                  GEN_INT (-(frame_size
2022                                             & (HOST_WIDE_INT)0xfff))));
2023               RTX_FRAME_RELATED_P (insn) = 1;
2024             }
2025         }
2026     }
2027   else
2028     frame_size = -1;
2029
2030   if (offset > 0)
2031     {
2032       /* Save the frame pointer and lr if the frame pointer is needed
2033          first.  Make the frame pointer point to the location of the
2034          old frame pointer on the stack.  */
2035       if (frame_pointer_needed)
2036         {
2037           rtx mem_fp, mem_lr;
2038
2039           if (fp_offset)
2040             {
2041               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2042                                                GEN_INT (-offset)));
2043               RTX_FRAME_RELATED_P (insn) = 1;
2044               aarch64_set_frame_expr (gen_rtx_SET
2045                                       (Pmode, stack_pointer_rtx,
2046                                        gen_rtx_MINUS (Pmode,
2047                                                       stack_pointer_rtx,
2048                                                       GEN_INT (offset))));
2049               mem_fp = gen_frame_mem (DImode,
2050                                       plus_constant (Pmode,
2051                                                      stack_pointer_rtx,
2052                                                      fp_offset));
2053               mem_lr = gen_frame_mem (DImode,
2054                                       plus_constant (Pmode,
2055                                                      stack_pointer_rtx,
2056                                                      fp_offset
2057                                                      + UNITS_PER_WORD));
2058               insn = emit_insn (gen_store_pairdi (mem_fp,
2059                                                   hard_frame_pointer_rtx,
2060                                                   mem_lr,
2061                                                   gen_rtx_REG (DImode,
2062                                                                LR_REGNUM)));
2063             }
2064           else
2065             {
2066               insn = emit_insn (gen_storewb_pairdi_di
2067                                 (stack_pointer_rtx, stack_pointer_rtx,
2068                                  hard_frame_pointer_rtx,
2069                                  gen_rtx_REG (DImode, LR_REGNUM),
2070                                  GEN_INT (-offset),
2071                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2072               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2073             }
2074
2075           /* The first part of a frame-related parallel insn is always
2076              assumed to be relevant to the frame calculations;
2077              subsequent parts, are only frame-related if explicitly
2078              marked.  */
2079           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2080           RTX_FRAME_RELATED_P (insn) = 1;
2081
2082           /* Set up frame pointer to point to the location of the
2083              previous frame pointer on the stack.  */
2084           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2085                                            stack_pointer_rtx,
2086                                            GEN_INT (fp_offset)));
2087           aarch64_set_frame_expr (gen_rtx_SET
2088                                   (Pmode, hard_frame_pointer_rtx,
2089                                    gen_rtx_PLUS (Pmode,
2090                                                  stack_pointer_rtx,
2091                                                  GEN_INT (fp_offset))));
2092           RTX_FRAME_RELATED_P (insn) = 1;
2093           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2094                                            hard_frame_pointer_rtx));
2095         }
2096       else
2097         {
2098           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2099                                            GEN_INT (-offset)));
2100           RTX_FRAME_RELATED_P (insn) = 1;
2101         }
2102
2103       aarch64_save_or_restore_callee_save_registers
2104         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2105     }
2106
2107   /* when offset >= 512,
2108      sub sp, sp, #<outgoing_args_size> */
2109   if (frame_size > -1)
2110     {
2111       if (crtl->outgoing_args_size > 0)
2112         {
2113           insn = emit_insn (gen_add2_insn
2114                             (stack_pointer_rtx,
2115                              GEN_INT (- crtl->outgoing_args_size)));
2116           RTX_FRAME_RELATED_P (insn) = 1;
2117         }
2118     }
2119 }
2120
2121 /* Generate the epilogue instructions for returning from a function.  */
2122 void
2123 aarch64_expand_epilogue (bool for_sibcall)
2124 {
2125   HOST_WIDE_INT original_frame_size, frame_size, offset;
2126   HOST_WIDE_INT fp_offset;
2127   rtx insn;
2128   rtx cfa_reg;
2129
2130   aarch64_layout_frame ();
2131   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2132   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2133                 + crtl->outgoing_args_size);
2134   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2135                                           STACK_BOUNDARY / BITS_PER_UNIT);
2136
2137   fp_offset = (offset
2138                - original_frame_size
2139                - cfun->machine->frame.saved_regs_size);
2140
2141   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2142
2143   /* Store pairs and load pairs have a range only -512 to 504.  */
2144   if (offset >= 512)
2145     {
2146       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2147       if (offset >= 512)
2148         offset = cfun->machine->frame.saved_regs_size;
2149
2150       frame_size -= (offset + crtl->outgoing_args_size);
2151       fp_offset = 0;
2152       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2153         {
2154           insn = emit_insn (gen_add2_insn
2155                             (stack_pointer_rtx,
2156                              GEN_INT (crtl->outgoing_args_size)));
2157           RTX_FRAME_RELATED_P (insn) = 1;
2158         }
2159     }
2160   else
2161     frame_size = -1;
2162
2163   /* If there were outgoing arguments or we've done dynamic stack
2164      allocation, then restore the stack pointer from the frame
2165      pointer.  This is at most one insn and more efficient than using
2166      GCC's internal mechanism.  */
2167   if (frame_pointer_needed
2168       && (crtl->outgoing_args_size || cfun->calls_alloca))
2169     {
2170       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2171                                        hard_frame_pointer_rtx,
2172                                        GEN_INT (- fp_offset)));
2173       RTX_FRAME_RELATED_P (insn) = 1;
2174       /* As SP is set to (FP - fp_offset), according to the rules in
2175          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2176          from the value of SP from now on.  */
2177       cfa_reg = stack_pointer_rtx;
2178     }
2179
2180   aarch64_save_or_restore_callee_save_registers
2181     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2182
2183   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2184   if (offset > 0)
2185     {
2186       if (frame_pointer_needed)
2187         {
2188           rtx mem_fp, mem_lr;
2189
2190           if (fp_offset)
2191             {
2192               mem_fp = gen_frame_mem (DImode,
2193                                       plus_constant (Pmode,
2194                                                      stack_pointer_rtx,
2195                                                      fp_offset));
2196               mem_lr = gen_frame_mem (DImode,
2197                                       plus_constant (Pmode,
2198                                                      stack_pointer_rtx,
2199                                                      fp_offset
2200                                                      + UNITS_PER_WORD));
2201               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2202                                                  mem_fp,
2203                                                  gen_rtx_REG (DImode,
2204                                                               LR_REGNUM),
2205                                                  mem_lr));
2206             }
2207           else
2208             {
2209               insn = emit_insn (gen_loadwb_pairdi_di
2210                                 (stack_pointer_rtx,
2211                                  stack_pointer_rtx,
2212                                  hard_frame_pointer_rtx,
2213                                  gen_rtx_REG (DImode, LR_REGNUM),
2214                                  GEN_INT (offset),
2215                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2216               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2217               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2218                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2219                                           plus_constant (Pmode, cfa_reg,
2220                                                          offset))));
2221             }
2222
2223           /* The first part of a frame-related parallel insn
2224              is always assumed to be relevant to the frame
2225              calculations; subsequent parts, are only
2226              frame-related if explicitly marked.  */
2227           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2228           RTX_FRAME_RELATED_P (insn) = 1;
2229           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2230           add_reg_note (insn, REG_CFA_RESTORE,
2231                         gen_rtx_REG (DImode, LR_REGNUM));
2232
2233           if (fp_offset)
2234             {
2235               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2236                                                GEN_INT (offset)));
2237               RTX_FRAME_RELATED_P (insn) = 1;
2238             }
2239         }
2240       else
2241         {
2242           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2243                                            GEN_INT (offset)));
2244           RTX_FRAME_RELATED_P (insn) = 1;
2245         }
2246     }
2247
2248   /* Stack adjustment for exception handler.  */
2249   if (crtl->calls_eh_return)
2250     {
2251       /* We need to unwind the stack by the offset computed by
2252          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2253          based on SP.  Ideally we would update the SP and define the
2254          CFA along the lines of:
2255
2256          SP = SP + EH_RETURN_STACKADJ_RTX
2257          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2258
2259          However the dwarf emitter only understands a constant
2260          register offset.
2261
2262          The solution choosen here is to use the otherwise unused IP0
2263          as a temporary register to hold the current SP value.  The
2264          CFA is described using IP0 then SP is modified.  */
2265
2266       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2267
2268       insn = emit_move_insn (ip0, stack_pointer_rtx);
2269       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2270       RTX_FRAME_RELATED_P (insn) = 1;
2271
2272       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2273
2274       /* Ensure the assignment to IP0 does not get optimized away.  */
2275       emit_use (ip0);
2276     }
2277
2278   if (frame_size > -1)
2279     {
2280       if (frame_size >= 0x1000000)
2281         {
2282           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2283           emit_move_insn (op0, GEN_INT (frame_size));
2284           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2285           aarch64_set_frame_expr (gen_rtx_SET
2286                                   (Pmode, stack_pointer_rtx,
2287                                    gen_rtx_PLUS (Pmode,
2288                                                  stack_pointer_rtx,
2289                                                  GEN_INT (frame_size))));
2290         }
2291       else if (frame_size > 0)
2292         {
2293           if ((frame_size & 0xfff) != 0)
2294             {
2295               insn = emit_insn (gen_add2_insn
2296                                 (stack_pointer_rtx,
2297                                  GEN_INT ((frame_size
2298                                            & (HOST_WIDE_INT) 0xfff))));
2299               RTX_FRAME_RELATED_P (insn) = 1;
2300             }
2301           if ((frame_size & 0xfff) != frame_size)
2302             {
2303               insn = emit_insn (gen_add2_insn
2304                                 (stack_pointer_rtx,
2305                                  GEN_INT ((frame_size
2306                                            & ~ (HOST_WIDE_INT) 0xfff))));
2307               RTX_FRAME_RELATED_P (insn) = 1;
2308             }
2309         }
2310
2311       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2312                                            gen_rtx_PLUS (Pmode,
2313                                                          stack_pointer_rtx,
2314                                                          GEN_INT (offset))));
2315     }
2316
2317   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2318   if (!for_sibcall)
2319     emit_jump_insn (ret_rtx);
2320 }
2321
2322 /* Return the place to copy the exception unwinding return address to.
2323    This will probably be a stack slot, but could (in theory be the
2324    return register).  */
2325 rtx
2326 aarch64_final_eh_return_addr (void)
2327 {
2328   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2329   aarch64_layout_frame ();
2330   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2331   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2332                 + crtl->outgoing_args_size);
2333   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2334                                           STACK_BOUNDARY / BITS_PER_UNIT);
2335   fp_offset = offset
2336     - original_frame_size
2337     - cfun->machine->frame.saved_regs_size;
2338
2339   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2340     return gen_rtx_REG (DImode, LR_REGNUM);
2341
2342   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2343      result in a store to save LR introduced by builtin_eh_return () being
2344      incorrectly deleted because the alias is not detected.
2345      So in the calculation of the address to copy the exception unwinding
2346      return address to, we note 2 cases.
2347      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2348      we return a SP-relative location since all the addresses are SP-relative
2349      in this case.  This prevents the store from being optimized away.
2350      If the fp_offset is not 0, then the addresses will be FP-relative and
2351      therefore we return a FP-relative location.  */
2352
2353   if (frame_pointer_needed)
2354     {
2355       if (fp_offset)
2356         return gen_frame_mem (DImode,
2357                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2358       else
2359         return gen_frame_mem (DImode,
2360                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2361     }
2362
2363   /* If FP is not needed, we calculate the location of LR, which would be
2364      at the top of the saved registers block.  */
2365
2366   return gen_frame_mem (DImode,
2367                         plus_constant (Pmode,
2368                                        stack_pointer_rtx,
2369                                        fp_offset
2370                                        + cfun->machine->frame.saved_regs_size
2371                                        - 2 * UNITS_PER_WORD));
2372 }
2373
2374 /* Output code to build up a constant in a register.  */
2375 static void
2376 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2377 {
2378   if (aarch64_bitmask_imm (val, DImode))
2379     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2380   else
2381     {
2382       int i;
2383       int ncount = 0;
2384       int zcount = 0;
2385       HOST_WIDE_INT valp = val >> 16;
2386       HOST_WIDE_INT valm;
2387       HOST_WIDE_INT tval;
2388
2389       for (i = 16; i < 64; i += 16)
2390         {
2391           valm = (valp & 0xffff);
2392
2393           if (valm != 0)
2394             ++ zcount;
2395
2396           if (valm != 0xffff)
2397             ++ ncount;
2398
2399           valp >>= 16;
2400         }
2401
2402       /* zcount contains the number of additional MOVK instructions
2403          required if the constant is built up with an initial MOVZ instruction,
2404          while ncount is the number of MOVK instructions required if starting
2405          with a MOVN instruction.  Choose the sequence that yields the fewest
2406          number of instructions, preferring MOVZ instructions when they are both
2407          the same.  */
2408       if (ncount < zcount)
2409         {
2410           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2411                           GEN_INT ((~val) & 0xffff));
2412           tval = 0xffff;
2413         }
2414       else
2415         {
2416           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2417                           GEN_INT (val & 0xffff));
2418           tval = 0;
2419         }
2420
2421       val >>= 16;
2422
2423       for (i = 16; i < 64; i += 16)
2424         {
2425           if ((val & 0xffff) != tval)
2426             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2427                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2428           val >>= 16;
2429         }
2430     }
2431 }
2432
2433 static void
2434 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2435 {
2436   HOST_WIDE_INT mdelta = delta;
2437   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2438   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2439
2440   if (mdelta < 0)
2441     mdelta = -mdelta;
2442
2443   if (mdelta >= 4096 * 4096)
2444     {
2445       aarch64_build_constant (scratchreg, delta);
2446       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2447     }
2448   else if (mdelta > 0)
2449     {
2450       if (mdelta >= 4096)
2451         {
2452           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2453           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2454           if (delta < 0)
2455             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2456                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2457           else
2458             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2459                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2460         }
2461       if (mdelta % 4096 != 0)
2462         {
2463           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2464           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2465                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2466         }
2467     }
2468 }
2469
2470 /* Output code to add DELTA to the first argument, and then jump
2471    to FUNCTION.  Used for C++ multiple inheritance.  */
2472 static void
2473 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2474                          HOST_WIDE_INT delta,
2475                          HOST_WIDE_INT vcall_offset,
2476                          tree function)
2477 {
2478   /* The this pointer is always in x0.  Note that this differs from
2479      Arm where the this pointer maybe bumped to r1 if r0 is required
2480      to return a pointer to an aggregate.  On AArch64 a result value
2481      pointer will be in x8.  */
2482   int this_regno = R0_REGNUM;
2483   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2484
2485   reload_completed = 1;
2486   emit_note (NOTE_INSN_PROLOGUE_END);
2487
2488   if (vcall_offset == 0)
2489     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2490   else
2491     {
2492       gcc_assert ((vcall_offset & 0x7) == 0);
2493
2494       this_rtx = gen_rtx_REG (Pmode, this_regno);
2495       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2496       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2497
2498       addr = this_rtx;
2499       if (delta != 0)
2500         {
2501           if (delta >= -256 && delta < 256)
2502             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2503                                        plus_constant (Pmode, this_rtx, delta));
2504           else
2505             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2506         }
2507
2508       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2509
2510       if (vcall_offset >= -256 && vcall_offset < 32768)
2511           addr = plus_constant (Pmode, temp0, vcall_offset);
2512       else
2513         {
2514           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2515           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2516         }
2517
2518       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2519       emit_insn (gen_add2_insn (this_rtx, temp1));
2520     }
2521
2522   /* Generate a tail call to the target function.  */
2523   if (!TREE_USED (function))
2524     {
2525       assemble_external (function);
2526       TREE_USED (function) = 1;
2527     }
2528   funexp = XEXP (DECL_RTL (function), 0);
2529   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2530   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2531   SIBLING_CALL_P (insn) = 1;
2532
2533   insn = get_insns ();
2534   shorten_branches (insn);
2535   final_start_function (insn, file, 1);
2536   final (insn, file, 1);
2537   final_end_function ();
2538
2539   /* Stop pretending to be a post-reload pass.  */
2540   reload_completed = 0;
2541 }
2542
2543 static int
2544 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2545 {
2546   if (GET_CODE (*x) == SYMBOL_REF)
2547     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2548
2549   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2550      TLS offsets, not real symbol references.  */
2551   if (GET_CODE (*x) == UNSPEC
2552       && XINT (*x, 1) == UNSPEC_TLS)
2553     return -1;
2554
2555   return 0;
2556 }
2557
2558 static bool
2559 aarch64_tls_referenced_p (rtx x)
2560 {
2561   if (!TARGET_HAVE_TLS)
2562     return false;
2563
2564   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2565 }
2566
2567
2568 static int
2569 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2570 {
2571   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2572   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2573
2574   if (*imm1 < *imm2)
2575     return -1;
2576   if (*imm1 > *imm2)
2577     return +1;
2578   return 0;
2579 }
2580
2581
2582 static void
2583 aarch64_build_bitmask_table (void)
2584 {
2585   unsigned HOST_WIDE_INT mask, imm;
2586   unsigned int log_e, e, s, r;
2587   unsigned int nimms = 0;
2588
2589   for (log_e = 1; log_e <= 6; log_e++)
2590     {
2591       e = 1 << log_e;
2592       if (e == 64)
2593         mask = ~(HOST_WIDE_INT) 0;
2594       else
2595         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2596       for (s = 1; s < e; s++)
2597         {
2598           for (r = 0; r < e; r++)
2599             {
2600               /* set s consecutive bits to 1 (s < 64) */
2601               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2602               /* rotate right by r */
2603               if (r != 0)
2604                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2605               /* replicate the constant depending on SIMD size */
2606               switch (log_e) {
2607               case 1: imm |= (imm <<  2);
2608               case 2: imm |= (imm <<  4);
2609               case 3: imm |= (imm <<  8);
2610               case 4: imm |= (imm << 16);
2611               case 5: imm |= (imm << 32);
2612               case 6:
2613                 break;
2614               default:
2615                 gcc_unreachable ();
2616               }
2617               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2618               aarch64_bitmasks[nimms++] = imm;
2619             }
2620         }
2621     }
2622
2623   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2624   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2625          aarch64_bitmasks_cmp);
2626 }
2627
2628
2629 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2630    a left shift of 0 or 12 bits.  */
2631 bool
2632 aarch64_uimm12_shift (HOST_WIDE_INT val)
2633 {
2634   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2635           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2636           );
2637 }
2638
2639
2640 /* Return true if val is an immediate that can be loaded into a
2641    register by a MOVZ instruction.  */
2642 static bool
2643 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2644 {
2645   if (GET_MODE_SIZE (mode) > 4)
2646     {
2647       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2648           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2649         return 1;
2650     }
2651   else
2652     {
2653       /* Ignore sign extension.  */
2654       val &= (HOST_WIDE_INT) 0xffffffff;
2655     }
2656   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2657           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2658 }
2659
2660
2661 /* Return true if val is a valid bitmask immediate.  */
2662 bool
2663 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2664 {
2665   if (GET_MODE_SIZE (mode) < 8)
2666     {
2667       /* Replicate bit pattern.  */
2668       val &= (HOST_WIDE_INT) 0xffffffff;
2669       val |= val << 32;
2670     }
2671   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2672                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2673 }
2674
2675
2676 /* Return true if val is an immediate that can be loaded into a
2677    register in a single instruction.  */
2678 bool
2679 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2680 {
2681   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2682     return 1;
2683   return aarch64_bitmask_imm (val, mode);
2684 }
2685
2686 static bool
2687 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2688 {
2689   rtx base, offset;
2690   if (GET_CODE (x) == HIGH)
2691     return true;
2692
2693   split_const (x, &base, &offset);
2694   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2695     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2696
2697   return aarch64_tls_referenced_p (x);
2698 }
2699
2700 /* Return true if register REGNO is a valid index register.
2701    STRICT_P is true if REG_OK_STRICT is in effect.  */
2702
2703 bool
2704 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2705 {
2706   if (!HARD_REGISTER_NUM_P (regno))
2707     {
2708       if (!strict_p)
2709         return true;
2710
2711       if (!reg_renumber)
2712         return false;
2713
2714       regno = reg_renumber[regno];
2715     }
2716   return GP_REGNUM_P (regno);
2717 }
2718
2719 /* Return true if register REGNO is a valid base register for mode MODE.
2720    STRICT_P is true if REG_OK_STRICT is in effect.  */
2721
2722 bool
2723 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2724 {
2725   if (!HARD_REGISTER_NUM_P (regno))
2726     {
2727       if (!strict_p)
2728         return true;
2729
2730       if (!reg_renumber)
2731         return false;
2732
2733       regno = reg_renumber[regno];
2734     }
2735
2736   /* The fake registers will be eliminated to either the stack or
2737      hard frame pointer, both of which are usually valid base registers.
2738      Reload deals with the cases where the eliminated form isn't valid.  */
2739   return (GP_REGNUM_P (regno)
2740           || regno == SP_REGNUM
2741           || regno == FRAME_POINTER_REGNUM
2742           || regno == ARG_POINTER_REGNUM);
2743 }
2744
2745 /* Return true if X is a valid base register for mode MODE.
2746    STRICT_P is true if REG_OK_STRICT is in effect.  */
2747
2748 static bool
2749 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2750 {
2751   if (!strict_p && GET_CODE (x) == SUBREG)
2752     x = SUBREG_REG (x);
2753
2754   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2755 }
2756
2757 /* Return true if address offset is a valid index.  If it is, fill in INFO
2758    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2759
2760 static bool
2761 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2762                         enum machine_mode mode, bool strict_p)
2763 {
2764   enum aarch64_address_type type;
2765   rtx index;
2766   int shift;
2767
2768   /* (reg:P) */
2769   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2770       && GET_MODE (x) == Pmode)
2771     {
2772       type = ADDRESS_REG_REG;
2773       index = x;
2774       shift = 0;
2775     }
2776   /* (sign_extend:DI (reg:SI)) */
2777   else if ((GET_CODE (x) == SIGN_EXTEND
2778             || GET_CODE (x) == ZERO_EXTEND)
2779            && GET_MODE (x) == DImode
2780            && GET_MODE (XEXP (x, 0)) == SImode)
2781     {
2782       type = (GET_CODE (x) == SIGN_EXTEND)
2783         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2784       index = XEXP (x, 0);
2785       shift = 0;
2786     }
2787   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2788   else if (GET_CODE (x) == MULT
2789            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2790                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2791            && GET_MODE (XEXP (x, 0)) == DImode
2792            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2793            && CONST_INT_P (XEXP (x, 1)))
2794     {
2795       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2796         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2797       index = XEXP (XEXP (x, 0), 0);
2798       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2799     }
2800   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2801   else if (GET_CODE (x) == ASHIFT
2802            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2803                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2804            && GET_MODE (XEXP (x, 0)) == DImode
2805            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2806            && CONST_INT_P (XEXP (x, 1)))
2807     {
2808       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2809         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2810       index = XEXP (XEXP (x, 0), 0);
2811       shift = INTVAL (XEXP (x, 1));
2812     }
2813   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2814   else if ((GET_CODE (x) == SIGN_EXTRACT
2815             || GET_CODE (x) == ZERO_EXTRACT)
2816            && GET_MODE (x) == DImode
2817            && GET_CODE (XEXP (x, 0)) == MULT
2818            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2819            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2820     {
2821       type = (GET_CODE (x) == SIGN_EXTRACT)
2822         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2823       index = XEXP (XEXP (x, 0), 0);
2824       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2825       if (INTVAL (XEXP (x, 1)) != 32 + shift
2826           || INTVAL (XEXP (x, 2)) != 0)
2827         shift = -1;
2828     }
2829   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2830      (const_int 0xffffffff<<shift)) */
2831   else if (GET_CODE (x) == AND
2832            && GET_MODE (x) == DImode
2833            && GET_CODE (XEXP (x, 0)) == MULT
2834            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2835            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2836            && CONST_INT_P (XEXP (x, 1)))
2837     {
2838       type = ADDRESS_REG_UXTW;
2839       index = XEXP (XEXP (x, 0), 0);
2840       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2841       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2842         shift = -1;
2843     }
2844   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2845   else if ((GET_CODE (x) == SIGN_EXTRACT
2846             || GET_CODE (x) == ZERO_EXTRACT)
2847            && GET_MODE (x) == DImode
2848            && GET_CODE (XEXP (x, 0)) == ASHIFT
2849            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2850            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2851     {
2852       type = (GET_CODE (x) == SIGN_EXTRACT)
2853         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2854       index = XEXP (XEXP (x, 0), 0);
2855       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2856       if (INTVAL (XEXP (x, 1)) != 32 + shift
2857           || INTVAL (XEXP (x, 2)) != 0)
2858         shift = -1;
2859     }
2860   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2861      (const_int 0xffffffff<<shift)) */
2862   else if (GET_CODE (x) == AND
2863            && GET_MODE (x) == DImode
2864            && GET_CODE (XEXP (x, 0)) == ASHIFT
2865            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2866            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2867            && CONST_INT_P (XEXP (x, 1)))
2868     {
2869       type = ADDRESS_REG_UXTW;
2870       index = XEXP (XEXP (x, 0), 0);
2871       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2872       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2873         shift = -1;
2874     }
2875   /* (mult:P (reg:P) (const_int scale)) */
2876   else if (GET_CODE (x) == MULT
2877            && GET_MODE (x) == Pmode
2878            && GET_MODE (XEXP (x, 0)) == Pmode
2879            && CONST_INT_P (XEXP (x, 1)))
2880     {
2881       type = ADDRESS_REG_REG;
2882       index = XEXP (x, 0);
2883       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2884     }
2885   /* (ashift:P (reg:P) (const_int shift)) */
2886   else if (GET_CODE (x) == ASHIFT
2887            && GET_MODE (x) == Pmode
2888            && GET_MODE (XEXP (x, 0)) == Pmode
2889            && CONST_INT_P (XEXP (x, 1)))
2890     {
2891       type = ADDRESS_REG_REG;
2892       index = XEXP (x, 0);
2893       shift = INTVAL (XEXP (x, 1));
2894     }
2895   else
2896     return false;
2897
2898   if (GET_CODE (index) == SUBREG)
2899     index = SUBREG_REG (index);
2900
2901   if ((shift == 0 ||
2902        (shift > 0 && shift <= 3
2903         && (1 << shift) == GET_MODE_SIZE (mode)))
2904       && REG_P (index)
2905       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2906     {
2907       info->type = type;
2908       info->offset = index;
2909       info->shift = shift;
2910       return true;
2911     }
2912
2913   return false;
2914 }
2915
2916 static inline bool
2917 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2918 {
2919   return (offset >= -64 * GET_MODE_SIZE (mode)
2920           && offset < 64 * GET_MODE_SIZE (mode)
2921           && offset % GET_MODE_SIZE (mode) == 0);
2922 }
2923
2924 static inline bool
2925 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2926                                HOST_WIDE_INT offset)
2927 {
2928   return offset >= -256 && offset < 256;
2929 }
2930
2931 static inline bool
2932 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2933 {
2934   return (offset >= 0
2935           && offset < 4096 * GET_MODE_SIZE (mode)
2936           && offset % GET_MODE_SIZE (mode) == 0);
2937 }
2938
2939 /* Return true if X is a valid address for machine mode MODE.  If it is,
2940    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2941    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2942
2943 static bool
2944 aarch64_classify_address (struct aarch64_address_info *info,
2945                           rtx x, enum machine_mode mode,
2946                           RTX_CODE outer_code, bool strict_p)
2947 {
2948   enum rtx_code code = GET_CODE (x);
2949   rtx op0, op1;
2950   bool allow_reg_index_p =
2951     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2952
2953   /* Don't support anything other than POST_INC or REG addressing for
2954      AdvSIMD.  */
2955   if (aarch64_vector_mode_p (mode)
2956       && (code != POST_INC && code != REG))
2957     return false;
2958
2959   switch (code)
2960     {
2961     case REG:
2962     case SUBREG:
2963       info->type = ADDRESS_REG_IMM;
2964       info->base = x;
2965       info->offset = const0_rtx;
2966       return aarch64_base_register_rtx_p (x, strict_p);
2967
2968     case PLUS:
2969       op0 = XEXP (x, 0);
2970       op1 = XEXP (x, 1);
2971       if (GET_MODE_SIZE (mode) != 0
2972           && CONST_INT_P (op1)
2973           && aarch64_base_register_rtx_p (op0, strict_p))
2974         {
2975           HOST_WIDE_INT offset = INTVAL (op1);
2976
2977           info->type = ADDRESS_REG_IMM;
2978           info->base = op0;
2979           info->offset = op1;
2980
2981           /* TImode and TFmode values are allowed in both pairs of X
2982              registers and individual Q registers.  The available
2983              address modes are:
2984              X,X: 7-bit signed scaled offset
2985              Q:   9-bit signed offset
2986              We conservatively require an offset representable in either mode.
2987            */
2988           if (mode == TImode || mode == TFmode)
2989             return (offset_7bit_signed_scaled_p (mode, offset)
2990                     && offset_9bit_signed_unscaled_p (mode, offset));
2991
2992           if (outer_code == PARALLEL)
2993             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2994                     && offset_7bit_signed_scaled_p (mode, offset));
2995           else
2996             return (offset_9bit_signed_unscaled_p (mode, offset)
2997                     || offset_12bit_unsigned_scaled_p (mode, offset));
2998         }
2999
3000       if (allow_reg_index_p)
3001         {
3002           /* Look for base + (scaled/extended) index register.  */
3003           if (aarch64_base_register_rtx_p (op0, strict_p)
3004               && aarch64_classify_index (info, op1, mode, strict_p))
3005             {
3006               info->base = op0;
3007               return true;
3008             }
3009           if (aarch64_base_register_rtx_p (op1, strict_p)
3010               && aarch64_classify_index (info, op0, mode, strict_p))
3011             {
3012               info->base = op1;
3013               return true;
3014             }
3015         }
3016
3017       return false;
3018
3019     case POST_INC:
3020     case POST_DEC:
3021     case PRE_INC:
3022     case PRE_DEC:
3023       info->type = ADDRESS_REG_WB;
3024       info->base = XEXP (x, 0);
3025       info->offset = NULL_RTX;
3026       return aarch64_base_register_rtx_p (info->base, strict_p);
3027
3028     case POST_MODIFY:
3029     case PRE_MODIFY:
3030       info->type = ADDRESS_REG_WB;
3031       info->base = XEXP (x, 0);
3032       if (GET_CODE (XEXP (x, 1)) == PLUS
3033           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3034           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3035           && aarch64_base_register_rtx_p (info->base, strict_p))
3036         {
3037           HOST_WIDE_INT offset;
3038           info->offset = XEXP (XEXP (x, 1), 1);
3039           offset = INTVAL (info->offset);
3040
3041           /* TImode and TFmode values are allowed in both pairs of X
3042              registers and individual Q registers.  The available
3043              address modes are:
3044              X,X: 7-bit signed scaled offset
3045              Q:   9-bit signed offset
3046              We conservatively require an offset representable in either mode.
3047            */
3048           if (mode == TImode || mode == TFmode)
3049             return (offset_7bit_signed_scaled_p (mode, offset)
3050                     && offset_9bit_signed_unscaled_p (mode, offset));
3051
3052           if (outer_code == PARALLEL)
3053             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3054                     && offset_7bit_signed_scaled_p (mode, offset));
3055           else
3056             return offset_9bit_signed_unscaled_p (mode, offset);
3057         }
3058       return false;
3059
3060     case CONST:
3061     case SYMBOL_REF:
3062     case LABEL_REF:
3063       /* load literal: pc-relative constant pool entry.  Only supported
3064          for SI mode or larger.  */
3065       info->type = ADDRESS_SYMBOLIC;
3066       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3067         {
3068           rtx sym, addend;
3069
3070           split_const (x, &sym, &addend);
3071           return (GET_CODE (sym) == LABEL_REF
3072                   || (GET_CODE (sym) == SYMBOL_REF
3073                       && CONSTANT_POOL_ADDRESS_P (sym)));
3074         }
3075       return false;
3076
3077     case LO_SUM:
3078       info->type = ADDRESS_LO_SUM;
3079       info->base = XEXP (x, 0);
3080       info->offset = XEXP (x, 1);
3081       if (allow_reg_index_p
3082           && aarch64_base_register_rtx_p (info->base, strict_p))
3083         {
3084           rtx sym, offs;
3085           split_const (info->offset, &sym, &offs);
3086           if (GET_CODE (sym) == SYMBOL_REF
3087               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3088                   == SYMBOL_SMALL_ABSOLUTE))
3089             {
3090               /* The symbol and offset must be aligned to the access size.  */
3091               unsigned int align;
3092               unsigned int ref_size;
3093
3094               if (CONSTANT_POOL_ADDRESS_P (sym))
3095                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3096               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3097                 {
3098                   tree exp = SYMBOL_REF_DECL (sym);
3099                   align = TYPE_ALIGN (TREE_TYPE (exp));
3100                   align = CONSTANT_ALIGNMENT (exp, align);
3101                 }
3102               else if (SYMBOL_REF_DECL (sym))
3103                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3104               else
3105                 align = BITS_PER_UNIT;
3106
3107               ref_size = GET_MODE_SIZE (mode);
3108               if (ref_size == 0)
3109                 ref_size = GET_MODE_SIZE (DImode);
3110
3111               return ((INTVAL (offs) & (ref_size - 1)) == 0
3112                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3113             }
3114         }
3115       return false;
3116
3117     default:
3118       return false;
3119     }
3120 }
3121
3122 bool
3123 aarch64_symbolic_address_p (rtx x)
3124 {
3125   rtx offset;
3126
3127   split_const (x, &x, &offset);
3128   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3129 }
3130
3131 /* Classify the base of symbolic expression X, given that X appears in
3132    context CONTEXT.  */
3133
3134 enum aarch64_symbol_type
3135 aarch64_classify_symbolic_expression (rtx x,
3136                                       enum aarch64_symbol_context context)
3137 {
3138   rtx offset;
3139
3140   split_const (x, &x, &offset);
3141   return aarch64_classify_symbol (x, context);
3142 }
3143
3144
3145 /* Return TRUE if X is a legitimate address for accessing memory in
3146    mode MODE.  */
3147 static bool
3148 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3149 {
3150   struct aarch64_address_info addr;
3151
3152   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3153 }
3154
3155 /* Return TRUE if X is a legitimate address for accessing memory in
3156    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3157    pair operation.  */
3158 bool
3159 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3160                            RTX_CODE outer_code, bool strict_p)
3161 {
3162   struct aarch64_address_info addr;
3163
3164   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3165 }
3166
3167 /* Return TRUE if rtx X is immediate constant 0.0 */
3168 bool
3169 aarch64_float_const_zero_rtx_p (rtx x)
3170 {
3171   REAL_VALUE_TYPE r;
3172
3173   if (GET_MODE (x) == VOIDmode)
3174     return false;
3175
3176   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3177   if (REAL_VALUE_MINUS_ZERO (r))
3178     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3179   return REAL_VALUES_EQUAL (r, dconst0);
3180 }
3181
3182 /* Return the fixed registers used for condition codes.  */
3183
3184 static bool
3185 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3186 {
3187   *p1 = CC_REGNUM;
3188   *p2 = INVALID_REGNUM;
3189   return true;
3190 }
3191
3192 enum machine_mode
3193 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3194 {
3195   /* All floating point compares return CCFP if it is an equality
3196      comparison, and CCFPE otherwise.  */
3197   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3198     {
3199       switch (code)
3200         {
3201         case EQ:
3202         case NE:
3203         case UNORDERED:
3204         case ORDERED:
3205         case UNLT:
3206         case UNLE:
3207         case UNGT:
3208         case UNGE:
3209         case UNEQ:
3210         case LTGT:
3211           return CCFPmode;
3212
3213         case LT:
3214         case LE:
3215         case GT:
3216         case GE:
3217           return CCFPEmode;
3218
3219         default:
3220           gcc_unreachable ();
3221         }
3222     }
3223
3224   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3225       && y == const0_rtx
3226       && (code == EQ || code == NE || code == LT || code == GE)
3227       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3228           || GET_CODE (x) == NEG))
3229     return CC_NZmode;
3230
3231   /* A compare with a shifted operand.  Because of canonicalization,
3232      the comparison will have to be swapped when we emit the assembly
3233      code.  */
3234   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3235       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3236       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3237           || GET_CODE (x) == LSHIFTRT
3238           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3239     return CC_SWPmode;
3240
3241   /* A compare of a mode narrower than SI mode against zero can be done
3242      by extending the value in the comparison.  */
3243   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3244       && y == const0_rtx)
3245     /* Only use sign-extension if we really need it.  */
3246     return ((code == GT || code == GE || code == LE || code == LT)
3247             ? CC_SESWPmode : CC_ZESWPmode);
3248
3249   /* For everything else, return CCmode.  */
3250   return CCmode;
3251 }
3252
3253 static unsigned
3254 aarch64_get_condition_code (rtx x)
3255 {
3256   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3257   enum rtx_code comp_code = GET_CODE (x);
3258
3259   if (GET_MODE_CLASS (mode) != MODE_CC)
3260     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3261
3262   switch (mode)
3263     {
3264     case CCFPmode:
3265     case CCFPEmode:
3266       switch (comp_code)
3267         {
3268         case GE: return AARCH64_GE;
3269         case GT: return AARCH64_GT;
3270         case LE: return AARCH64_LS;
3271         case LT: return AARCH64_MI;
3272         case NE: return AARCH64_NE;
3273         case EQ: return AARCH64_EQ;
3274         case ORDERED: return AARCH64_VC;
3275         case UNORDERED: return AARCH64_VS;
3276         case UNLT: return AARCH64_LT;
3277         case UNLE: return AARCH64_LE;
3278         case UNGT: return AARCH64_HI;
3279         case UNGE: return AARCH64_PL;
3280         default: gcc_unreachable ();
3281         }
3282       break;
3283
3284     case CCmode:
3285       switch (comp_code)
3286         {
3287         case NE: return AARCH64_NE;
3288         case EQ: return AARCH64_EQ;
3289         case GE: return AARCH64_GE;
3290         case GT: return AARCH64_GT;
3291         case LE: return AARCH64_LE;
3292         case LT: return AARCH64_LT;
3293         case GEU: return AARCH64_CS;
3294         case GTU: return AARCH64_HI;
3295         case LEU: return AARCH64_LS;
3296         case LTU: return AARCH64_CC;
3297         default: gcc_unreachable ();
3298         }
3299       break;
3300
3301     case CC_SWPmode:
3302     case CC_ZESWPmode:
3303     case CC_SESWPmode:
3304       switch (comp_code)
3305         {
3306         case NE: return AARCH64_NE;
3307         case EQ: return AARCH64_EQ;
3308         case GE: return AARCH64_LE;
3309         case GT: return AARCH64_LT;
3310         case LE: return AARCH64_GE;
3311         case LT: return AARCH64_GT;
3312         case GEU: return AARCH64_LS;
3313         case GTU: return AARCH64_CC;
3314         case LEU: return AARCH64_CS;
3315         case LTU: return AARCH64_HI;
3316         default: gcc_unreachable ();
3317         }
3318       break;
3319
3320     case CC_NZmode:
3321       switch (comp_code)
3322         {
3323         case NE: return AARCH64_NE;
3324         case EQ: return AARCH64_EQ;
3325         case GE: return AARCH64_PL;
3326         case LT: return AARCH64_MI;
3327         default: gcc_unreachable ();
3328         }
3329       break;
3330
3331     default:
3332       gcc_unreachable ();
3333       break;
3334     }
3335 }
3336
3337 static unsigned
3338 bit_count (unsigned HOST_WIDE_INT value)
3339 {
3340   unsigned count = 0;
3341
3342   while (value)
3343     {
3344       count++;
3345       value &= value - 1;
3346     }
3347
3348   return count;
3349 }
3350
3351 void
3352 aarch64_print_operand (FILE *f, rtx x, char code)
3353 {
3354   switch (code)
3355     {
3356     case 'e':
3357       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3358       {
3359         int n;
3360
3361         if (GET_CODE (x) != CONST_INT
3362             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3363           {
3364             output_operand_lossage ("invalid operand for '%%%c'", code);
3365             return;
3366           }
3367
3368         switch (n)
3369           {
3370           case 3:
3371             fputc ('b', f);
3372             break;
3373           case 4:
3374             fputc ('h', f);
3375             break;
3376           case 5:
3377             fputc ('w', f);
3378             break;
3379           default:
3380             output_operand_lossage ("invalid operand for '%%%c'", code);
3381             return;
3382           }
3383       }
3384       break;
3385
3386     case 'p':
3387       {
3388         int n;
3389
3390         /* Print N such that 2^N == X.  */
3391         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3392           {
3393             output_operand_lossage ("invalid operand for '%%%c'", code);
3394             return;
3395           }
3396
3397         asm_fprintf (f, "%d", n);
3398       }
3399       break;
3400
3401     case 'P':
3402       /* Print the number of non-zero bits in X (a const_int).  */
3403       if (GET_CODE (x) != CONST_INT)
3404         {
3405           output_operand_lossage ("invalid operand for '%%%c'", code);
3406           return;
3407         }
3408
3409       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3410       break;
3411
3412     case 'H':
3413       /* Print the higher numbered register of a pair (TImode) of regs.  */
3414       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3415         {
3416           output_operand_lossage ("invalid operand for '%%%c'", code);
3417           return;
3418         }
3419
3420       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3421       break;
3422
3423     case 'm':
3424       /* Print a condition (eq, ne, etc).  */
3425
3426       /* CONST_TRUE_RTX means always -- that's the default.  */
3427       if (x == const_true_rtx)
3428         return;
3429
3430       if (!COMPARISON_P (x))
3431         {
3432           output_operand_lossage ("invalid operand for '%%%c'", code);
3433           return;
3434         }
3435
3436       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3437       break;
3438
3439     case 'M':
3440       /* Print the inverse of a condition (eq <-> ne, etc).  */
3441
3442       /* CONST_TRUE_RTX means never -- that's the default.  */
3443       if (x == const_true_rtx)
3444         {
3445           fputs ("nv", f);
3446           return;
3447         }
3448
3449       if (!COMPARISON_P (x))
3450         {
3451           output_operand_lossage ("invalid operand for '%%%c'", code);
3452           return;
3453         }
3454
3455       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3456                                   (aarch64_get_condition_code (x))], f);
3457       break;
3458
3459     case 'b':
3460     case 'h':
3461     case 's':
3462     case 'd':
3463     case 'q':
3464       /* Print a scalar FP/SIMD register name.  */
3465       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3466         {
3467           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3468           return;
3469         }
3470       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3471       break;
3472
3473     case 'S':
3474     case 'T':
3475     case 'U':
3476     case 'V':
3477       /* Print the first FP/SIMD register name in a list.  */
3478       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3479         {
3480           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3481           return;
3482         }
3483       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3484       break;
3485
3486     case 'X':
3487       /* Print bottom 16 bits of integer constant in hex.  */
3488       if (GET_CODE (x) != CONST_INT)
3489         {
3490           output_operand_lossage ("invalid operand for '%%%c'", code);
3491           return;
3492         }
3493       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3494       break;
3495
3496     case 'w':
3497     case 'x':
3498       /* Print a general register name or the zero register (32-bit or
3499          64-bit).  */
3500       if (x == const0_rtx
3501           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3502         {
3503           asm_fprintf (f, "%czr", code);
3504           break;
3505         }
3506
3507       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3508         {
3509           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3510           break;
3511         }
3512
3513       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3514         {
3515           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3516           break;
3517         }
3518
3519       /* Fall through */
3520
3521     case 0:
3522       /* Print a normal operand, if it's a general register, then we
3523          assume DImode.  */
3524       if (x == NULL)
3525         {
3526           output_operand_lossage ("missing operand");
3527           return;
3528         }
3529
3530       switch (GET_CODE (x))
3531         {
3532         case REG:
3533           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3534           break;
3535
3536         case MEM:
3537           aarch64_memory_reference_mode = GET_MODE (x);
3538           output_address (XEXP (x, 0));
3539           break;
3540
3541         case LABEL_REF:
3542         case SYMBOL_REF:
3543           output_addr_const (asm_out_file, x);
3544           break;
3545
3546         case CONST_INT:
3547           asm_fprintf (f, "%wd", INTVAL (x));
3548           break;
3549
3550         case CONST_VECTOR:
3551           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3552             {
3553               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3554                                                             HOST_WIDE_INT_MIN,
3555                                                             HOST_WIDE_INT_MAX));
3556               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3557             }
3558           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3559             {
3560               fputc ('0', f);
3561             }
3562           else
3563             gcc_unreachable ();
3564           break;
3565
3566         case CONST_DOUBLE:
3567           /* CONST_DOUBLE can represent a double-width integer.
3568              In this case, the mode of x is VOIDmode.  */
3569           if (GET_MODE (x) == VOIDmode)
3570             ; /* Do Nothing.  */
3571           else if (aarch64_float_const_zero_rtx_p (x))
3572             {
3573               fputc ('0', f);
3574               break;
3575             }
3576           else if (aarch64_float_const_representable_p (x))
3577             {
3578 #define buf_size 20
3579               char float_buf[buf_size] = {'\0'};
3580               REAL_VALUE_TYPE r;
3581               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3582               real_to_decimal_for_mode (float_buf, &r,
3583                                         buf_size, buf_size,
3584                                         1, GET_MODE (x));
3585               asm_fprintf (asm_out_file, "%s", float_buf);
3586               break;
3587 #undef buf_size
3588             }
3589           output_operand_lossage ("invalid constant");
3590           return;
3591         default:
3592           output_operand_lossage ("invalid operand");
3593           return;
3594         }
3595       break;
3596
3597     case 'A':
3598       if (GET_CODE (x) == HIGH)
3599         x = XEXP (x, 0);
3600
3601       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3602         {
3603         case SYMBOL_SMALL_GOT:
3604           asm_fprintf (asm_out_file, ":got:");
3605           break;
3606
3607         case SYMBOL_SMALL_TLSGD:
3608           asm_fprintf (asm_out_file, ":tlsgd:");
3609           break;
3610
3611         case SYMBOL_SMALL_TLSDESC:
3612           asm_fprintf (asm_out_file, ":tlsdesc:");
3613           break;
3614
3615         case SYMBOL_SMALL_GOTTPREL:
3616           asm_fprintf (asm_out_file, ":gottprel:");
3617           break;
3618
3619         case SYMBOL_SMALL_TPREL:
3620           asm_fprintf (asm_out_file, ":tprel:");
3621           break;
3622
3623         default:
3624           break;
3625         }
3626       output_addr_const (asm_out_file, x);
3627       break;
3628
3629     case 'L':
3630       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3631         {
3632         case SYMBOL_SMALL_GOT:
3633           asm_fprintf (asm_out_file, ":lo12:");
3634           break;
3635
3636         case SYMBOL_SMALL_TLSGD:
3637           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3638           break;
3639
3640         case SYMBOL_SMALL_TLSDESC:
3641           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3642           break;
3643
3644         case SYMBOL_SMALL_GOTTPREL:
3645           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3646           break;
3647
3648         case SYMBOL_SMALL_TPREL:
3649           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3650           break;
3651
3652         default:
3653           break;
3654         }
3655       output_addr_const (asm_out_file, x);
3656       break;
3657
3658     case 'G':
3659
3660       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3661         {
3662         case SYMBOL_SMALL_TPREL:
3663           asm_fprintf (asm_out_file, ":tprel_hi12:");
3664           break;
3665         default:
3666           break;
3667         }
3668       output_addr_const (asm_out_file, x);
3669       break;
3670
3671     default:
3672       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3673       return;
3674     }
3675 }
3676
3677 void
3678 aarch64_print_operand_address (FILE *f, rtx x)
3679 {
3680   struct aarch64_address_info addr;
3681
3682   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3683                              MEM, true))
3684     switch (addr.type)
3685       {
3686       case ADDRESS_REG_IMM:
3687         if (addr.offset == const0_rtx)
3688           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3689         else
3690           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3691                        INTVAL (addr.offset));
3692         return;
3693
3694       case ADDRESS_REG_REG:
3695         if (addr.shift == 0)
3696           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3697                        reg_names [REGNO (addr.offset)]);
3698         else
3699           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3700                        reg_names [REGNO (addr.offset)], addr.shift);
3701         return;
3702
3703       case ADDRESS_REG_UXTW:
3704         if (addr.shift == 0)
3705           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3706                        REGNO (addr.offset) - R0_REGNUM);
3707         else
3708           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3709                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3710         return;
3711
3712       case ADDRESS_REG_SXTW:
3713         if (addr.shift == 0)
3714           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3715                        REGNO (addr.offset) - R0_REGNUM);
3716         else
3717           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3718                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3719         return;
3720
3721       case ADDRESS_REG_WB:
3722         switch (GET_CODE (x))
3723           {
3724           case PRE_INC:
3725             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3726                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3727             return;
3728           case POST_INC:
3729             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3730                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3731             return;
3732           case PRE_DEC:
3733             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3734                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3735             return;
3736           case POST_DEC:
3737             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3738                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3739             return;
3740           case PRE_MODIFY:
3741             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3742                          INTVAL (addr.offset));
3743             return;
3744           case POST_MODIFY:
3745             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3746                          INTVAL (addr.offset));
3747             return;
3748           default:
3749             break;
3750           }
3751         break;
3752
3753       case ADDRESS_LO_SUM:
3754         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3755         output_addr_const (f, addr.offset);
3756         asm_fprintf (f, "]");
3757         return;
3758
3759       case ADDRESS_SYMBOLIC:
3760         break;
3761       }
3762
3763   output_addr_const (f, x);
3764 }
3765
3766 void
3767 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3768                            int labelno ATTRIBUTE_UNUSED)
3769 {
3770   sorry ("function profiling");
3771 }
3772
3773 bool
3774 aarch64_label_mentioned_p (rtx x)
3775 {
3776   const char *fmt;
3777   int i;
3778
3779   if (GET_CODE (x) == LABEL_REF)
3780     return true;
3781
3782   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3783      referencing instruction, but they are constant offsets, not
3784      symbols.  */
3785   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3786     return false;
3787
3788   fmt = GET_RTX_FORMAT (GET_CODE (x));
3789   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3790     {
3791       if (fmt[i] == 'E')
3792         {
3793           int j;
3794
3795           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3796             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3797               return 1;
3798         }
3799       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3800         return 1;
3801     }
3802
3803   return 0;
3804 }
3805
3806 /* Implement REGNO_REG_CLASS.  */
3807
3808 enum reg_class
3809 aarch64_regno_regclass (unsigned regno)
3810 {
3811   if (GP_REGNUM_P (regno))
3812     return CORE_REGS;
3813
3814   if (regno == SP_REGNUM)
3815     return STACK_REG;
3816
3817   if (regno == FRAME_POINTER_REGNUM
3818       || regno == ARG_POINTER_REGNUM)
3819     return CORE_REGS;
3820
3821   if (FP_REGNUM_P (regno))
3822     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3823
3824   return NO_REGS;
3825 }
3826
3827 /* Try a machine-dependent way of reloading an illegitimate address
3828    operand.  If we find one, push the reload and return the new rtx.  */
3829
3830 rtx
3831 aarch64_legitimize_reload_address (rtx *x_p,
3832                                    enum machine_mode mode,
3833                                    int opnum, int type,
3834                                    int ind_levels ATTRIBUTE_UNUSED)
3835 {
3836   rtx x = *x_p;
3837
3838   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3839   if (aarch64_vector_mode_p (mode)
3840       && GET_CODE (x) == PLUS
3841       && REG_P (XEXP (x, 0))
3842       && CONST_INT_P (XEXP (x, 1)))
3843     {
3844       rtx orig_rtx = x;
3845       x = copy_rtx (x);
3846       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3847                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3848                    opnum, (enum reload_type) type);
3849       return x;
3850     }
3851
3852   /* We must recognize output that we have already generated ourselves.  */
3853   if (GET_CODE (x) == PLUS
3854       && GET_CODE (XEXP (x, 0)) == PLUS
3855       && REG_P (XEXP (XEXP (x, 0), 0))
3856       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3857       && CONST_INT_P (XEXP (x, 1)))
3858     {
3859       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3860                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3861                    opnum, (enum reload_type) type);
3862       return x;
3863     }
3864
3865   /* We wish to handle large displacements off a base register by splitting
3866      the addend across an add and the mem insn.  This can cut the number of
3867      extra insns needed from 3 to 1.  It is only useful for load/store of a
3868      single register with 12 bit offset field.  */
3869   if (GET_CODE (x) == PLUS
3870       && REG_P (XEXP (x, 0))
3871       && CONST_INT_P (XEXP (x, 1))
3872       && HARD_REGISTER_P (XEXP (x, 0))
3873       && mode != TImode
3874       && mode != TFmode
3875       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3876     {
3877       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3878       HOST_WIDE_INT low = val & 0xfff;
3879       HOST_WIDE_INT high = val - low;
3880       HOST_WIDE_INT offs;
3881       rtx cst;
3882
3883       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3884          BLKmode alignment.  */
3885       if (GET_MODE_SIZE (mode) == 0)
3886         return NULL_RTX;
3887
3888       offs = low % GET_MODE_SIZE (mode);
3889
3890       /* Align misaligned offset by adjusting high part to compensate.  */
3891       if (offs != 0)
3892         {
3893           if (aarch64_uimm12_shift (high + offs))
3894             {
3895               /* Align down.  */
3896               low = low - offs;
3897               high = high + offs;
3898             }
3899           else
3900             {
3901               /* Align up.  */
3902               offs = GET_MODE_SIZE (mode) - offs;
3903               low = low + offs;
3904               high = high + (low & 0x1000) - offs;
3905               low &= 0xfff;
3906             }
3907         }
3908
3909       /* Check for overflow.  */
3910       if (high + low != val)
3911         return NULL_RTX;
3912
3913       cst = GEN_INT (high);
3914       if (!aarch64_uimm12_shift (high))
3915         cst = force_const_mem (Pmode, cst);
3916
3917       /* Reload high part into base reg, leaving the low part
3918          in the mem instruction.  */
3919       x = gen_rtx_PLUS (Pmode,
3920                         gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3921                         GEN_INT (low));
3922
3923       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3924                    BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3925                    opnum, (enum reload_type) type);
3926       return x;
3927     }
3928
3929   return NULL_RTX;
3930 }
3931
3932
3933 static reg_class_t
3934 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3935                           reg_class_t rclass,
3936                           enum machine_mode mode,
3937                           secondary_reload_info *sri)
3938 {
3939   /* Address expressions of the form PLUS (SP, large_offset) need two
3940      scratch registers, one for the constant, and one for holding a
3941      copy of SP, since SP cannot be used on the RHS of an add-reg
3942      instruction.  */
3943   if (mode == DImode
3944       && GET_CODE (x) == PLUS
3945       && XEXP (x, 0) == stack_pointer_rtx
3946       && CONST_INT_P (XEXP (x, 1))
3947       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3948     {
3949       sri->icode = CODE_FOR_reload_sp_immediate;
3950       return NO_REGS;
3951     }
3952
3953   /* Without the TARGET_SIMD instructions we cannot move a Q register
3954      to a Q register directly.  We need a scratch.  */
3955   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3956       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3957       && reg_class_subset_p (rclass, FP_REGS))
3958     {
3959       if (mode == TFmode)
3960         sri->icode = CODE_FOR_aarch64_reload_movtf;
3961       else if (mode == TImode)
3962         sri->icode = CODE_FOR_aarch64_reload_movti;
3963       return NO_REGS;
3964     }
3965
3966   /* A TFmode or TImode memory access should be handled via an FP_REGS
3967      because AArch64 has richer addressing modes for LDR/STR instructions
3968      than LDP/STP instructions.  */
3969   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3970       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3971     return FP_REGS;
3972
3973   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3974       return CORE_REGS;
3975
3976   return NO_REGS;
3977 }
3978
3979 static bool
3980 aarch64_can_eliminate (const int from, const int to)
3981 {
3982   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3983      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3984
3985   if (frame_pointer_needed)
3986     {
3987       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3988         return true;
3989       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3990         return false;
3991       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3992           && !cfun->calls_alloca)
3993         return true;
3994       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3995         return true;
3996     return false;
3997     }
3998   else
3999     {
4000       /* If we decided that we didn't need a leaf frame pointer but then used
4001          LR in the function, then we'll want a frame pointer after all, so
4002          prevent this elimination to ensure a frame pointer is used.
4003
4004          NOTE: the original value of flag_omit_frame_pointer gets trashed
4005          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4006          of faked_omit_frame_pointer here (which is true when we always
4007          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4008          pointers when LR is clobbered).  */
4009       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4010           && df_regs_ever_live_p (LR_REGNUM)
4011           && faked_omit_frame_pointer)
4012         return false;
4013     }
4014
4015   return true;
4016 }
4017
4018 HOST_WIDE_INT
4019 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4020 {
4021   HOST_WIDE_INT frame_size;
4022   HOST_WIDE_INT offset;
4023
4024   aarch64_layout_frame ();
4025   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4026                 + crtl->outgoing_args_size
4027                 + cfun->machine->saved_varargs_size);
4028
4029    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4030    offset = frame_size;
4031
4032    if (to == HARD_FRAME_POINTER_REGNUM)
4033      {
4034        if (from == ARG_POINTER_REGNUM)
4035          return offset - crtl->outgoing_args_size;
4036
4037        if (from == FRAME_POINTER_REGNUM)
4038          return cfun->machine->frame.saved_regs_size;
4039      }
4040
4041    if (to == STACK_POINTER_REGNUM)
4042      {
4043        if (from == FRAME_POINTER_REGNUM)
4044          {
4045            HOST_WIDE_INT elim = crtl->outgoing_args_size
4046                               + cfun->machine->frame.saved_regs_size
4047                               - cfun->machine->frame.fp_lr_offset;
4048            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4049            return elim;
4050          }
4051      }
4052
4053    return offset;
4054 }
4055
4056
4057 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4058    previous frame.  */
4059
4060 rtx
4061 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4062 {
4063   if (count != 0)
4064     return const0_rtx;
4065   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4066 }
4067
4068
4069 static void
4070 aarch64_asm_trampoline_template (FILE *f)
4071 {
4072   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4073   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4074   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4075   assemble_aligned_integer (4, const0_rtx);
4076   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4077   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4078 }
4079
4080 unsigned
4081 aarch64_trampoline_size (void)
4082 {
4083   return 32;  /* 3 insns + padding + 2 dwords.  */
4084 }
4085
4086 static void
4087 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4088 {
4089   rtx fnaddr, mem, a_tramp;
4090
4091   /* Don't need to copy the trailing D-words, we fill those in below.  */
4092   emit_block_move (m_tramp, assemble_trampoline_template (),
4093                    GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
4094   mem = adjust_address (m_tramp, DImode, 16);
4095   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4096   emit_move_insn (mem, fnaddr);
4097
4098   mem = adjust_address (m_tramp, DImode, 24);
4099   emit_move_insn (mem, chain_value);
4100
4101   /* XXX We should really define a "clear_cache" pattern and use
4102      gen_clear_cache().  */
4103   a_tramp = XEXP (m_tramp, 0);
4104   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4105                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
4106                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4107 }
4108
4109 static unsigned char
4110 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4111 {
4112   switch (regclass)
4113     {
4114     case CORE_REGS:
4115     case POINTER_REGS:
4116     case GENERAL_REGS:
4117     case ALL_REGS:
4118     case FP_REGS:
4119     case FP_LO_REGS:
4120       return
4121         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4122                                        (GET_MODE_SIZE (mode) + 7) / 8;
4123     case STACK_REG:
4124       return 1;
4125
4126     case NO_REGS:
4127       return 0;
4128
4129     default:
4130       break;
4131     }
4132   gcc_unreachable ();
4133 }
4134
4135 static reg_class_t
4136 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4137 {
4138   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4139           ? GENERAL_REGS : regclass);
4140 }
4141
4142 void
4143 aarch64_asm_output_labelref (FILE* f, const char *name)
4144 {
4145   asm_fprintf (f, "%U%s", name);
4146 }
4147
4148 static void
4149 aarch64_elf_asm_constructor (rtx symbol, int priority)
4150 {
4151   if (priority == DEFAULT_INIT_PRIORITY)
4152     default_ctor_section_asm_out_constructor (symbol, priority);
4153   else
4154     {
4155       section *s;
4156       char buf[18];
4157       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4158       s = get_section (buf, SECTION_WRITE, NULL);
4159       switch_to_section (s);
4160       assemble_align (POINTER_SIZE);
4161       fputs ("\t.dword\t", asm_out_file);
4162       output_addr_const (asm_out_file, symbol);
4163       fputc ('\n', asm_out_file);
4164     }
4165 }
4166
4167 static void
4168 aarch64_elf_asm_destructor (rtx symbol, int priority)
4169 {
4170   if (priority == DEFAULT_INIT_PRIORITY)
4171     default_dtor_section_asm_out_destructor (symbol, priority);
4172   else
4173     {
4174       section *s;
4175       char buf[18];
4176       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4177       s = get_section (buf, SECTION_WRITE, NULL);
4178       switch_to_section (s);
4179       assemble_align (POINTER_SIZE);
4180       fputs ("\t.dword\t", asm_out_file);
4181       output_addr_const (asm_out_file, symbol);
4182       fputc ('\n', asm_out_file);
4183     }
4184 }
4185
4186 const char*
4187 aarch64_output_casesi (rtx *operands)
4188 {
4189   char buf[100];
4190   char label[100];
4191   rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4192   int index;
4193   static const char *const patterns[4][2] =
4194   {
4195     {
4196       "ldrb\t%w3, [%0,%w1,uxtw]",
4197       "add\t%3, %4, %w3, sxtb #2"
4198     },
4199     {
4200       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4201       "add\t%3, %4, %w3, sxth #2"
4202     },
4203     {
4204       "ldr\t%w3, [%0,%w1,uxtw #2]",
4205       "add\t%3, %4, %w3, sxtw #2"
4206     },
4207     /* We assume that DImode is only generated when not optimizing and
4208        that we don't really need 64-bit address offsets.  That would
4209        imply an object file with 8GB of code in a single function!  */
4210     {
4211       "ldr\t%w3, [%0,%w1,uxtw #2]",
4212       "add\t%3, %4, %w3, sxtw #2"
4213     }
4214   };
4215
4216   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4217
4218   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4219
4220   gcc_assert (index >= 0 && index <= 3);
4221
4222   /* Need to implement table size reduction, by chaning the code below.  */
4223   output_asm_insn (patterns[index][0], operands);
4224   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4225   snprintf (buf, sizeof (buf),
4226             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4227   output_asm_insn (buf, operands);
4228   output_asm_insn (patterns[index][1], operands);
4229   output_asm_insn ("br\t%3", operands);
4230   assemble_label (asm_out_file, label);
4231   return "";
4232 }
4233
4234
4235 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4236    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4237    operator.  */
4238
4239 int
4240 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4241 {
4242   if (shift >= 0 && shift <= 3)
4243     {
4244       int size;
4245       for (size = 8; size <= 32; size *= 2)
4246         {
4247           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4248           if (mask == bits << shift)
4249             return size;
4250         }
4251     }
4252   return 0;
4253 }
4254
4255 static bool
4256 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4257                                    const_rtx x ATTRIBUTE_UNUSED)
4258 {
4259   /* We can't use blocks for constants when we're using a per-function
4260      constant pool.  */
4261   return false;
4262 }
4263
4264 static section *
4265 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4266                             rtx x ATTRIBUTE_UNUSED,
4267                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4268 {
4269   /* Force all constant pool entries into the current function section.  */
4270   return function_section (current_function_decl);
4271 }
4272
4273
4274 /* Costs.  */
4275
4276 /* Helper function for rtx cost calculation.  Strip a shift expression
4277    from X.  Returns the inner operand if successful, or the original
4278    expression on failure.  */
4279 static rtx
4280 aarch64_strip_shift (rtx x)
4281 {
4282   rtx op = x;
4283
4284   if ((GET_CODE (op) == ASHIFT
4285        || GET_CODE (op) == ASHIFTRT
4286        || GET_CODE (op) == LSHIFTRT)
4287       && CONST_INT_P (XEXP (op, 1)))
4288     return XEXP (op, 0);
4289
4290   if (GET_CODE (op) == MULT
4291       && CONST_INT_P (XEXP (op, 1))
4292       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4293     return XEXP (op, 0);
4294
4295   return x;
4296 }
4297
4298 /* Helper function for rtx cost calculation.  Strip a shift or extend
4299    expression from X.  Returns the inner operand if successful, or the
4300    original expression on failure.  We deal with a number of possible
4301    canonicalization variations here.  */
4302 static rtx
4303 aarch64_strip_shift_or_extend (rtx x)
4304 {
4305   rtx op = x;
4306
4307   /* Zero and sign extraction of a widened value.  */
4308   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4309       && XEXP (op, 2) == const0_rtx
4310       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4311                                          XEXP (op, 1)))
4312     return XEXP (XEXP (op, 0), 0);
4313
4314   /* It can also be represented (for zero-extend) as an AND with an
4315      immediate.  */
4316   if (GET_CODE (op) == AND
4317       && GET_CODE (XEXP (op, 0)) == MULT
4318       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4319       && CONST_INT_P (XEXP (op, 1))
4320       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4321                            INTVAL (XEXP (op, 1))) != 0)
4322     return XEXP (XEXP (op, 0), 0);
4323
4324   /* Now handle extended register, as this may also have an optional
4325      left shift by 1..4.  */
4326   if (GET_CODE (op) == ASHIFT
4327       && CONST_INT_P (XEXP (op, 1))
4328       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4329     op = XEXP (op, 0);
4330
4331   if (GET_CODE (op) == ZERO_EXTEND
4332       || GET_CODE (op) == SIGN_EXTEND)
4333     op = XEXP (op, 0);
4334
4335   if (op != x)
4336     return op;
4337
4338   return aarch64_strip_shift (x);
4339 }
4340
4341 /* Calculate the cost of calculating X, storing it in *COST.  Result
4342    is true if the total cost of the operation has now been calculated.  */
4343 static bool
4344 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4345                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4346 {
4347   rtx op0, op1;
4348   const struct cpu_rtx_cost_table *extra_cost
4349     = aarch64_tune_params->insn_extra_cost;
4350
4351   switch (code)
4352     {
4353     case SET:
4354       op0 = SET_DEST (x);
4355       op1 = SET_SRC (x);
4356
4357       switch (GET_CODE (op0))
4358         {
4359         case MEM:
4360           if (speed)
4361             *cost += extra_cost->memory_store;
4362
4363           if (op1 != const0_rtx)
4364             *cost += rtx_cost (op1, SET, 1, speed);
4365           return true;
4366
4367         case SUBREG:
4368           if (! REG_P (SUBREG_REG (op0)))
4369             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4370           /* Fall through.  */
4371         case REG:
4372           /* Cost is just the cost of the RHS of the set.  */
4373           *cost += rtx_cost (op1, SET, 1, true);
4374           return true;
4375
4376         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4377         case SIGN_EXTRACT:
4378           /* Strip any redundant widening of the RHS to meet the width of
4379              the target.  */
4380           if (GET_CODE (op1) == SUBREG)
4381             op1 = SUBREG_REG (op1);
4382           if ((GET_CODE (op1) == ZERO_EXTEND
4383                || GET_CODE (op1) == SIGN_EXTEND)
4384               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4385               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4386                   >= INTVAL (XEXP (op0, 1))))
4387             op1 = XEXP (op1, 0);
4388           *cost += rtx_cost (op1, SET, 1, speed);
4389           return true;
4390
4391         default:
4392           break;
4393         }
4394       return false;
4395
4396     case MEM:
4397       if (speed)
4398         *cost += extra_cost->memory_load;
4399
4400       return true;
4401
4402     case NEG:
4403       op0 = CONST0_RTX (GET_MODE (x));
4404       op1 = XEXP (x, 0);
4405       goto cost_minus;
4406
4407     case COMPARE:
4408       op0 = XEXP (x, 0);
4409       op1 = XEXP (x, 1);
4410
4411       if (op1 == const0_rtx
4412           && GET_CODE (op0) == AND)
4413         {
4414           x = op0;
4415           goto cost_logic;
4416         }
4417
4418       /* Comparisons can work if the order is swapped.
4419          Canonicalization puts the more complex operation first, but
4420          we want it in op1.  */
4421       if (! (REG_P (op0)
4422              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4423         {
4424           op0 = XEXP (x, 1);
4425           op1 = XEXP (x, 0);
4426         }
4427       goto cost_minus;
4428
4429     case MINUS:
4430       op0 = XEXP (x, 0);
4431       op1 = XEXP (x, 1);
4432
4433     cost_minus:
4434       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4435           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4436               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4437         {
4438           if (op0 != const0_rtx)
4439             *cost += rtx_cost (op0, MINUS, 0, speed);
4440
4441           if (CONST_INT_P (op1))
4442             {
4443               if (!aarch64_uimm12_shift (INTVAL (op1)))
4444                 *cost += rtx_cost (op1, MINUS, 1, speed);
4445             }
4446           else
4447             {
4448               op1 = aarch64_strip_shift_or_extend (op1);
4449               *cost += rtx_cost (op1, MINUS, 1, speed);
4450             }
4451           return true;
4452         }
4453
4454       return false;
4455
4456     case PLUS:
4457       op0 = XEXP (x, 0);
4458       op1 = XEXP (x, 1);
4459
4460       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4461         {
4462           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4463             {
4464               *cost += rtx_cost (op0, PLUS, 0, speed);
4465             }
4466           else
4467             {
4468               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4469
4470               if (new_op0 == op0
4471                   && GET_CODE (op0) == MULT)
4472                 {
4473                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4474                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4475                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4476                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4477                     {
4478                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4479                                           speed)
4480                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4481                                             speed)
4482                                 + rtx_cost (op1, PLUS, 1, speed));
4483                       if (speed)
4484                         *cost += extra_cost->int_multiply_extend_add;
4485                       return true;
4486                     }
4487                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4488                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4489                             + rtx_cost (op1, PLUS, 1, speed));
4490
4491                   if (speed)
4492                     *cost += extra_cost->int_multiply_add;
4493                 }
4494
4495               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4496                         + rtx_cost (op1, PLUS, 1, speed));
4497             }
4498           return true;
4499         }
4500
4501       return false;
4502
4503     case IOR:
4504     case XOR:
4505     case AND:
4506     cost_logic:
4507       op0 = XEXP (x, 0);
4508       op1 = XEXP (x, 1);
4509
4510       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4511         {
4512           if (CONST_INT_P (op1)
4513               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4514             {
4515               *cost += rtx_cost (op0, AND, 0, speed);
4516             }
4517           else
4518             {
4519               if (GET_CODE (op0) == NOT)
4520                 op0 = XEXP (op0, 0);
4521               op0 = aarch64_strip_shift (op0);
4522               *cost += (rtx_cost (op0, AND, 0, speed)
4523                         + rtx_cost (op1, AND, 1, speed));
4524             }
4525           return true;
4526         }
4527       return false;
4528
4529     case ZERO_EXTEND:
4530       if ((GET_MODE (x) == DImode
4531            && GET_MODE (XEXP (x, 0)) == SImode)
4532           || GET_CODE (XEXP (x, 0)) == MEM)
4533         {
4534           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4535           return true;
4536         }
4537       return false;
4538
4539     case SIGN_EXTEND:
4540       if (GET_CODE (XEXP (x, 0)) == MEM)
4541         {
4542           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4543           return true;
4544         }
4545       return false;
4546
4547     case ROTATE:
4548       if (!CONST_INT_P (XEXP (x, 1)))
4549         *cost += COSTS_N_INSNS (2);
4550       /* Fall through.  */
4551     case ROTATERT:
4552     case LSHIFTRT:
4553     case ASHIFT:
4554     case ASHIFTRT:
4555
4556       /* Shifting by a register often takes an extra cycle.  */
4557       if (speed && !CONST_INT_P (XEXP (x, 1)))
4558         *cost += extra_cost->register_shift;
4559
4560       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4561       return true;
4562
4563     case HIGH:
4564       if (!CONSTANT_P (XEXP (x, 0)))
4565         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4566       return true;
4567
4568     case LO_SUM:
4569       if (!CONSTANT_P (XEXP (x, 1)))
4570         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4571       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4572       return true;
4573
4574     case ZERO_EXTRACT:
4575     case SIGN_EXTRACT:
4576       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4577       return true;
4578
4579     case MULT:
4580       op0 = XEXP (x, 0);
4581       op1 = XEXP (x, 1);
4582
4583       *cost = COSTS_N_INSNS (1);
4584       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4585         {
4586           if (CONST_INT_P (op1)
4587               && exact_log2 (INTVAL (op1)) > 0)
4588             {
4589               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4590               return true;
4591             }
4592
4593           if ((GET_CODE (op0) == ZERO_EXTEND
4594                && GET_CODE (op1) == ZERO_EXTEND)
4595               || (GET_CODE (op0) == SIGN_EXTEND
4596                   && GET_CODE (op1) == SIGN_EXTEND))
4597             {
4598               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4599                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4600               if (speed)
4601                 *cost += extra_cost->int_multiply_extend;
4602               return true;
4603             }
4604
4605           if (speed)
4606             *cost += extra_cost->int_multiply;
4607         }
4608       else if (speed)
4609         {
4610           if (GET_MODE (x) == DFmode)
4611             *cost += extra_cost->double_multiply;
4612           else if (GET_MODE (x) == SFmode)
4613             *cost += extra_cost->float_multiply;
4614         }
4615
4616       return false;  /* All arguments need to be in registers.  */
4617
4618     case MOD:
4619     case UMOD:
4620       *cost = COSTS_N_INSNS (2);
4621       if (speed)
4622         {
4623           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4624             *cost += (extra_cost->int_multiply_add
4625                       + extra_cost->int_divide);
4626           else if (GET_MODE (x) == DFmode)
4627             *cost += (extra_cost->double_multiply
4628                       + extra_cost->double_divide);
4629           else if (GET_MODE (x) == SFmode)
4630             *cost += (extra_cost->float_multiply
4631                       + extra_cost->float_divide);
4632         }
4633       return false;  /* All arguments need to be in registers.  */
4634
4635     case DIV:
4636     case UDIV:
4637       *cost = COSTS_N_INSNS (1);
4638       if (speed)
4639         {
4640           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4641             *cost += extra_cost->int_divide;
4642           else if (GET_MODE (x) == DFmode)
4643             *cost += extra_cost->double_divide;
4644           else if (GET_MODE (x) == SFmode)
4645             *cost += extra_cost->float_divide;
4646         }
4647       return false;  /* All arguments need to be in registers.  */
4648
4649     default:
4650       break;
4651     }
4652   return false;
4653 }
4654
4655 static int
4656 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4657                   enum machine_mode mode ATTRIBUTE_UNUSED,
4658                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4659 {
4660   enum rtx_code c  = GET_CODE (x);
4661   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4662
4663   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4664     return addr_cost->pre_modify;
4665
4666   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4667     return addr_cost->post_modify;
4668
4669   if (c == PLUS)
4670     {
4671       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4672         return addr_cost->imm_offset;
4673       else if (GET_CODE (XEXP (x, 0)) == MULT
4674                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4675                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4676         return addr_cost->register_extend;
4677
4678       return addr_cost->register_offset;
4679     }
4680   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4681     return addr_cost->imm_offset;
4682
4683   return 0;
4684 }
4685
4686 static int
4687 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4688                             reg_class_t from, reg_class_t to)
4689 {
4690   const struct cpu_regmove_cost *regmove_cost
4691     = aarch64_tune_params->regmove_cost;
4692
4693   if (from == GENERAL_REGS && to == GENERAL_REGS)
4694     return regmove_cost->GP2GP;
4695   else if (from == GENERAL_REGS)
4696     return regmove_cost->GP2FP;
4697   else if (to == GENERAL_REGS)
4698     return regmove_cost->FP2GP;
4699
4700   /* When AdvSIMD instructions are disabled it is not possible to move
4701      a 128-bit value directly between Q registers.  This is handled in
4702      secondary reload.  A general register is used as a scratch to move
4703      the upper DI value and the lower DI value is moved directly,
4704      hence the cost is the sum of three moves. */
4705
4706   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4707     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4708
4709   return regmove_cost->FP2FP;
4710 }
4711
4712 static int
4713 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4714                           reg_class_t rclass ATTRIBUTE_UNUSED,
4715                           bool in ATTRIBUTE_UNUSED)
4716 {
4717   return aarch64_tune_params->memmov_cost;
4718 }
4719
4720 static void initialize_aarch64_code_model (void);
4721
4722 /* Parse the architecture extension string.  */
4723
4724 static void
4725 aarch64_parse_extension (char *str)
4726 {
4727   /* The extension string is parsed left to right.  */
4728   const struct aarch64_option_extension *opt = NULL;
4729
4730   /* Flag to say whether we are adding or removing an extension.  */
4731   int adding_ext = -1;
4732
4733   while (str != NULL && *str != 0)
4734     {
4735       char *ext;
4736       size_t len;
4737
4738       str++;
4739       ext = strchr (str, '+');
4740
4741       if (ext != NULL)
4742         len = ext - str;
4743       else
4744         len = strlen (str);
4745
4746       if (len >= 2 && strncmp (str, "no", 2) == 0)
4747         {
4748           adding_ext = 0;
4749           len -= 2;
4750           str += 2;
4751         }
4752       else if (len > 0)
4753         adding_ext = 1;
4754
4755       if (len == 0)
4756         {
4757           error ("missing feature modifier after %qs", "+no");
4758           return;
4759         }
4760
4761       /* Scan over the extensions table trying to find an exact match.  */
4762       for (opt = all_extensions; opt->name != NULL; opt++)
4763         {
4764           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4765             {
4766               /* Add or remove the extension.  */
4767               if (adding_ext)
4768                 aarch64_isa_flags |= opt->flags_on;
4769               else
4770                 aarch64_isa_flags &= ~(opt->flags_off);
4771               break;
4772             }
4773         }
4774
4775       if (opt->name == NULL)
4776         {
4777           /* Extension not found in list.  */
4778           error ("unknown feature modifier %qs", str);
4779           return;
4780         }
4781
4782       str = ext;
4783     };
4784
4785   return;
4786 }
4787
4788 /* Parse the ARCH string.  */
4789
4790 static void
4791 aarch64_parse_arch (void)
4792 {
4793   char *ext;
4794   const struct processor *arch;
4795   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4796   size_t len;
4797
4798   strcpy (str, aarch64_arch_string);
4799
4800   ext = strchr (str, '+');
4801
4802   if (ext != NULL)
4803     len = ext - str;
4804   else
4805     len = strlen (str);
4806
4807   if (len == 0)
4808     {
4809       error ("missing arch name in -march=%qs", str);
4810       return;
4811     }
4812
4813   /* Loop through the list of supported ARCHs to find a match.  */
4814   for (arch = all_architectures; arch->name != NULL; arch++)
4815     {
4816       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4817         {
4818           selected_arch = arch;
4819           aarch64_isa_flags = selected_arch->flags;
4820           selected_cpu = &all_cores[selected_arch->core];
4821
4822           if (ext != NULL)
4823             {
4824               /* ARCH string contains at least one extension.  */
4825               aarch64_parse_extension (ext);
4826             }
4827
4828           return;
4829         }
4830     }
4831
4832   /* ARCH name not found in list.  */
4833   error ("unknown value %qs for -march", str);
4834   return;
4835 }
4836
4837 /* Parse the CPU string.  */
4838
4839 static void
4840 aarch64_parse_cpu (void)
4841 {
4842   char *ext;
4843   const struct processor *cpu;
4844   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4845   size_t len;
4846
4847   strcpy (str, aarch64_cpu_string);
4848
4849   ext = strchr (str, '+');
4850
4851   if (ext != NULL)
4852     len = ext - str;
4853   else
4854     len = strlen (str);
4855
4856   if (len == 0)
4857     {
4858       error ("missing cpu name in -mcpu=%qs", str);
4859       return;
4860     }
4861
4862   /* Loop through the list of supported CPUs to find a match.  */
4863   for (cpu = all_cores; cpu->name != NULL; cpu++)
4864     {
4865       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4866         {
4867           selected_cpu = cpu;
4868           aarch64_isa_flags = selected_cpu->flags;
4869
4870           if (ext != NULL)
4871             {
4872               /* CPU string contains at least one extension.  */
4873               aarch64_parse_extension (ext);
4874             }
4875
4876           return;
4877         }
4878     }
4879
4880   /* CPU name not found in list.  */
4881   error ("unknown value %qs for -mcpu", str);
4882   return;
4883 }
4884
4885 /* Parse the TUNE string.  */
4886
4887 static void
4888 aarch64_parse_tune (void)
4889 {
4890   const struct processor *cpu;
4891   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4892   strcpy (str, aarch64_tune_string);
4893
4894   /* Loop through the list of supported CPUs to find a match.  */
4895   for (cpu = all_cores; cpu->name != NULL; cpu++)
4896     {
4897       if (strcmp (cpu->name, str) == 0)
4898         {
4899           selected_tune = cpu;
4900           return;
4901         }
4902     }
4903
4904   /* CPU name not found in list.  */
4905   error ("unknown value %qs for -mtune", str);
4906   return;
4907 }
4908
4909
4910 /* Implement TARGET_OPTION_OVERRIDE.  */
4911
4912 static void
4913 aarch64_override_options (void)
4914 {
4915   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4916      otherwise march remains undefined.  mtune can be used with either march or
4917      mcpu.  */
4918
4919   if (aarch64_arch_string)
4920     {
4921       aarch64_parse_arch ();
4922       aarch64_cpu_string = NULL;
4923     }
4924
4925   if (aarch64_cpu_string)
4926     {
4927       aarch64_parse_cpu ();
4928       selected_arch = NULL;
4929     }
4930
4931   if (aarch64_tune_string)
4932     {
4933       aarch64_parse_tune ();
4934     }
4935
4936   initialize_aarch64_code_model ();
4937
4938   aarch64_build_bitmask_table ();
4939
4940   /* This target defaults to strict volatile bitfields.  */
4941   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4942     flag_strict_volatile_bitfields = 1;
4943
4944   /* If the user did not specify a processor, choose the default
4945      one for them.  This will be the CPU set during configuration using
4946      --with-cpu, otherwise it is "generic".  */
4947   if (!selected_cpu)
4948     {
4949       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4950       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4951     }
4952
4953   gcc_assert (selected_cpu);
4954
4955   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4956   if (!selected_tune)
4957     selected_tune = &all_cores[selected_cpu->core];
4958
4959   aarch64_tune_flags = selected_tune->flags;
4960   aarch64_tune = selected_tune->core;
4961   aarch64_tune_params = selected_tune->tune;
4962
4963   aarch64_override_options_after_change ();
4964 }
4965
4966 /* Implement targetm.override_options_after_change.  */
4967
4968 static void
4969 aarch64_override_options_after_change (void)
4970 {
4971   faked_omit_frame_pointer = false;
4972
4973   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4974      that aarch64_frame_pointer_required will be called.  We need to remember
4975      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4976
4977   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4978     {
4979       flag_omit_frame_pointer = true;
4980       faked_omit_frame_pointer = true;
4981     }
4982 }
4983
4984 static struct machine_function *
4985 aarch64_init_machine_status (void)
4986 {
4987   struct machine_function *machine;
4988   machine = ggc_alloc_cleared_machine_function ();
4989   return machine;
4990 }
4991
4992 void
4993 aarch64_init_expanders (void)
4994 {
4995   init_machine_status = aarch64_init_machine_status;
4996 }
4997
4998 /* A checking mechanism for the implementation of the various code models.  */
4999 static void
5000 initialize_aarch64_code_model (void)
5001 {
5002    if (flag_pic)
5003      {
5004        switch (aarch64_cmodel_var)
5005          {
5006          case AARCH64_CMODEL_TINY:
5007            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5008            break;
5009          case AARCH64_CMODEL_SMALL:
5010            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5011            break;
5012          case AARCH64_CMODEL_LARGE:
5013            sorry ("code model %qs with -f%s", "large",
5014                   flag_pic > 1 ? "PIC" : "pic");
5015          default:
5016            gcc_unreachable ();
5017          }
5018      }
5019    else
5020      aarch64_cmodel = aarch64_cmodel_var;
5021 }
5022
5023 /* Return true if SYMBOL_REF X binds locally.  */
5024
5025 static bool
5026 aarch64_symbol_binds_local_p (const_rtx x)
5027 {
5028   return (SYMBOL_REF_DECL (x)
5029           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5030           : SYMBOL_REF_LOCAL_P (x));
5031 }
5032
5033 /* Return true if SYMBOL_REF X is thread local */
5034 static bool
5035 aarch64_tls_symbol_p (rtx x)
5036 {
5037   if (! TARGET_HAVE_TLS)
5038     return false;
5039
5040   if (GET_CODE (x) != SYMBOL_REF)
5041     return false;
5042
5043   return SYMBOL_REF_TLS_MODEL (x) != 0;
5044 }
5045
5046 /* Classify a TLS symbol into one of the TLS kinds.  */
5047 enum aarch64_symbol_type
5048 aarch64_classify_tls_symbol (rtx x)
5049 {
5050   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5051
5052   switch (tls_kind)
5053     {
5054     case TLS_MODEL_GLOBAL_DYNAMIC:
5055     case TLS_MODEL_LOCAL_DYNAMIC:
5056       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5057
5058     case TLS_MODEL_INITIAL_EXEC:
5059       return SYMBOL_SMALL_GOTTPREL;
5060
5061     case TLS_MODEL_LOCAL_EXEC:
5062       return SYMBOL_SMALL_TPREL;
5063
5064     case TLS_MODEL_EMULATED:
5065     case TLS_MODEL_NONE:
5066       return SYMBOL_FORCE_TO_MEM;
5067
5068     default:
5069       gcc_unreachable ();
5070     }
5071 }
5072
5073 /* Return the method that should be used to access SYMBOL_REF or
5074    LABEL_REF X in context CONTEXT.  */
5075
5076 enum aarch64_symbol_type
5077 aarch64_classify_symbol (rtx x,
5078                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5079 {
5080   if (GET_CODE (x) == LABEL_REF)
5081     {
5082       switch (aarch64_cmodel)
5083         {
5084         case AARCH64_CMODEL_LARGE:
5085           return SYMBOL_FORCE_TO_MEM;
5086
5087         case AARCH64_CMODEL_TINY_PIC:
5088         case AARCH64_CMODEL_TINY:
5089           return SYMBOL_TINY_ABSOLUTE;
5090
5091         case AARCH64_CMODEL_SMALL_PIC:
5092         case AARCH64_CMODEL_SMALL:
5093           return SYMBOL_SMALL_ABSOLUTE;
5094
5095         default:
5096           gcc_unreachable ();
5097         }
5098     }
5099
5100   if (GET_CODE (x) == SYMBOL_REF)
5101     {
5102       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5103           || CONSTANT_POOL_ADDRESS_P (x))
5104         return SYMBOL_FORCE_TO_MEM;
5105
5106       if (aarch64_tls_symbol_p (x))
5107         return aarch64_classify_tls_symbol (x);
5108
5109       switch (aarch64_cmodel)
5110         {
5111         case AARCH64_CMODEL_TINY:
5112           if (SYMBOL_REF_WEAK (x))
5113             return SYMBOL_FORCE_TO_MEM;
5114           return SYMBOL_TINY_ABSOLUTE;
5115
5116         case AARCH64_CMODEL_SMALL:
5117           if (SYMBOL_REF_WEAK (x))
5118             return SYMBOL_FORCE_TO_MEM;
5119           return SYMBOL_SMALL_ABSOLUTE;
5120
5121         case AARCH64_CMODEL_TINY_PIC:
5122           if (!aarch64_symbol_binds_local_p (x))
5123             return SYMBOL_SMALL_GOT;
5124           return SYMBOL_TINY_ABSOLUTE;
5125
5126         case AARCH64_CMODEL_SMALL_PIC:
5127           if (!aarch64_symbol_binds_local_p (x))
5128             return SYMBOL_SMALL_GOT;
5129           return SYMBOL_SMALL_ABSOLUTE;
5130
5131         default:
5132           gcc_unreachable ();
5133         }
5134     }
5135
5136   /* By default push everything into the constant pool.  */
5137   return SYMBOL_FORCE_TO_MEM;
5138 }
5139
5140 bool
5141 aarch64_constant_address_p (rtx x)
5142 {
5143   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5144 }
5145
5146 bool
5147 aarch64_legitimate_pic_operand_p (rtx x)
5148 {
5149   if (GET_CODE (x) == SYMBOL_REF
5150       || (GET_CODE (x) == CONST
5151           && GET_CODE (XEXP (x, 0)) == PLUS
5152           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5153      return false;
5154
5155   return true;
5156 }
5157
5158 /* Return true if X holds either a quarter-precision or
5159      floating-point +0.0 constant.  */
5160 static bool
5161 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5162 {
5163   if (!CONST_DOUBLE_P (x))
5164     return false;
5165
5166   /* TODO: We could handle moving 0.0 to a TFmode register,
5167      but first we would like to refactor the movtf_aarch64
5168      to be more amicable to split moves properly and
5169      correctly gate on TARGET_SIMD.  For now - reject all
5170      constants which are not to SFmode or DFmode registers.  */
5171   if (!(mode == SFmode || mode == DFmode))
5172     return false;
5173
5174   if (aarch64_float_const_zero_rtx_p (x))
5175     return true;
5176   return aarch64_float_const_representable_p (x);
5177 }
5178
5179 static bool
5180 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5181 {
5182   /* Do not allow vector struct mode constants.  We could support
5183      0 and -1 easily, but they need support in aarch64-simd.md.  */
5184   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5185     return false;
5186
5187   /* This could probably go away because
5188      we now decompose CONST_INTs according to expand_mov_immediate.  */
5189   if ((GET_CODE (x) == CONST_VECTOR
5190        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5191       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5192         return !targetm.cannot_force_const_mem (mode, x);
5193
5194   if (GET_CODE (x) == HIGH
5195       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5196     return true;
5197
5198   return aarch64_constant_address_p (x);
5199 }
5200
5201 rtx
5202 aarch64_load_tp (rtx target)
5203 {
5204   if (!target
5205       || GET_MODE (target) != Pmode
5206       || !register_operand (target, Pmode))
5207     target = gen_reg_rtx (Pmode);
5208
5209   /* Can return in any reg.  */
5210   emit_insn (gen_aarch64_load_tp_hard (target));
5211   return target;
5212 }
5213
5214 /* On AAPCS systems, this is the "struct __va_list".  */
5215 static GTY(()) tree va_list_type;
5216
5217 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5218    Return the type to use as __builtin_va_list.
5219
5220    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5221
5222    struct __va_list
5223    {
5224      void *__stack;
5225      void *__gr_top;
5226      void *__vr_top;
5227      int   __gr_offs;
5228      int   __vr_offs;
5229    };  */
5230
5231 static tree
5232 aarch64_build_builtin_va_list (void)
5233 {
5234   tree va_list_name;
5235   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5236
5237   /* Create the type.  */
5238   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5239   /* Give it the required name.  */
5240   va_list_name = build_decl (BUILTINS_LOCATION,
5241                              TYPE_DECL,
5242                              get_identifier ("__va_list"),
5243                              va_list_type);
5244   DECL_ARTIFICIAL (va_list_name) = 1;
5245   TYPE_NAME (va_list_type) = va_list_name;
5246   TYPE_STUB_DECL (va_list_type) = va_list_name;
5247
5248   /* Create the fields.  */
5249   f_stack = build_decl (BUILTINS_LOCATION,
5250                         FIELD_DECL, get_identifier ("__stack"),
5251                         ptr_type_node);
5252   f_grtop = build_decl (BUILTINS_LOCATION,
5253                         FIELD_DECL, get_identifier ("__gr_top"),
5254                         ptr_type_node);
5255   f_vrtop = build_decl (BUILTINS_LOCATION,
5256                         FIELD_DECL, get_identifier ("__vr_top"),
5257                         ptr_type_node);
5258   f_groff = build_decl (BUILTINS_LOCATION,
5259                         FIELD_DECL, get_identifier ("__gr_offs"),
5260                         integer_type_node);
5261   f_vroff = build_decl (BUILTINS_LOCATION,
5262                         FIELD_DECL, get_identifier ("__vr_offs"),
5263                         integer_type_node);
5264
5265   DECL_ARTIFICIAL (f_stack) = 1;
5266   DECL_ARTIFICIAL (f_grtop) = 1;
5267   DECL_ARTIFICIAL (f_vrtop) = 1;
5268   DECL_ARTIFICIAL (f_groff) = 1;
5269   DECL_ARTIFICIAL (f_vroff) = 1;
5270
5271   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5272   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5273   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5274   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5275   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5276
5277   TYPE_FIELDS (va_list_type) = f_stack;
5278   DECL_CHAIN (f_stack) = f_grtop;
5279   DECL_CHAIN (f_grtop) = f_vrtop;
5280   DECL_CHAIN (f_vrtop) = f_groff;
5281   DECL_CHAIN (f_groff) = f_vroff;
5282
5283   /* Compute its layout.  */
5284   layout_type (va_list_type);
5285
5286   return va_list_type;
5287 }
5288
5289 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5290 static void
5291 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5292 {
5293   const CUMULATIVE_ARGS *cum;
5294   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5295   tree stack, grtop, vrtop, groff, vroff;
5296   tree t;
5297   int gr_save_area_size;
5298   int vr_save_area_size;
5299   int vr_offset;
5300
5301   cum = &crtl->args.info;
5302   gr_save_area_size
5303     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5304   vr_save_area_size
5305     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5306
5307   if (TARGET_GENERAL_REGS_ONLY)
5308     {
5309       if (cum->aapcs_nvrn > 0)
5310         sorry ("%qs and floating point or vector arguments",
5311                "-mgeneral-regs-only");
5312       vr_save_area_size = 0;
5313     }
5314
5315   f_stack = TYPE_FIELDS (va_list_type_node);
5316   f_grtop = DECL_CHAIN (f_stack);
5317   f_vrtop = DECL_CHAIN (f_grtop);
5318   f_groff = DECL_CHAIN (f_vrtop);
5319   f_vroff = DECL_CHAIN (f_groff);
5320
5321   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5322                   NULL_TREE);
5323   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5324                   NULL_TREE);
5325   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5326                   NULL_TREE);
5327   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5328                   NULL_TREE);
5329   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5330                   NULL_TREE);
5331
5332   /* Emit code to initialize STACK, which points to the next varargs stack
5333      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5334      by named arguments.  STACK is 8-byte aligned.  */
5335   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5336   if (cum->aapcs_stack_size > 0)
5337     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5338   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5339   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5340
5341   /* Emit code to initialize GRTOP, the top of the GR save area.
5342      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5343   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5344   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5345   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5346
5347   /* Emit code to initialize VRTOP, the top of the VR save area.
5348      This address is gr_save_area_bytes below GRTOP, rounded
5349      down to the next 16-byte boundary.  */
5350   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5351   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5352                              STACK_BOUNDARY / BITS_PER_UNIT);
5353
5354   if (vr_offset)
5355     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5356   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5357   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5358
5359   /* Emit code to initialize GROFF, the offset from GRTOP of the
5360      next GPR argument.  */
5361   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5362               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5363   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5364
5365   /* Likewise emit code to initialize VROFF, the offset from FTOP
5366      of the next VR argument.  */
5367   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5368               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5369   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5370 }
5371
5372 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5373
5374 static tree
5375 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5376                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5377 {
5378   tree addr;
5379   bool indirect_p;
5380   bool is_ha;           /* is HFA or HVA.  */
5381   bool dw_align;        /* double-word align.  */
5382   enum machine_mode ag_mode = VOIDmode;
5383   int nregs;
5384   enum machine_mode mode;
5385
5386   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5387   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5388   HOST_WIDE_INT size, rsize, adjust, align;
5389   tree t, u, cond1, cond2;
5390
5391   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5392   if (indirect_p)
5393     type = build_pointer_type (type);
5394
5395   mode = TYPE_MODE (type);
5396
5397   f_stack = TYPE_FIELDS (va_list_type_node);
5398   f_grtop = DECL_CHAIN (f_stack);
5399   f_vrtop = DECL_CHAIN (f_grtop);
5400   f_groff = DECL_CHAIN (f_vrtop);
5401   f_vroff = DECL_CHAIN (f_groff);
5402
5403   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5404                   f_stack, NULL_TREE);
5405   size = int_size_in_bytes (type);
5406   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5407
5408   dw_align = false;
5409   adjust = 0;
5410   if (aarch64_vfp_is_call_or_return_candidate (mode,
5411                                                type,
5412                                                &ag_mode,
5413                                                &nregs,
5414                                                &is_ha))
5415     {
5416       /* TYPE passed in fp/simd registers.  */
5417       if (TARGET_GENERAL_REGS_ONLY)
5418         sorry ("%qs and floating point or vector arguments",
5419                "-mgeneral-regs-only");
5420
5421       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5422                       unshare_expr (valist), f_vrtop, NULL_TREE);
5423       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5424                       unshare_expr (valist), f_vroff, NULL_TREE);
5425
5426       rsize = nregs * UNITS_PER_VREG;
5427
5428       if (is_ha)
5429         {
5430           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5431             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5432         }
5433       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5434                && size < UNITS_PER_VREG)
5435         {
5436           adjust = UNITS_PER_VREG - size;
5437         }
5438     }
5439   else
5440     {
5441       /* TYPE passed in general registers.  */
5442       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5443                       unshare_expr (valist), f_grtop, NULL_TREE);
5444       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5445                       unshare_expr (valist), f_groff, NULL_TREE);
5446       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5447       nregs = rsize / UNITS_PER_WORD;
5448
5449       if (align > 8)
5450         dw_align = true;
5451
5452       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5453           && size < UNITS_PER_WORD)
5454         {
5455           adjust = UNITS_PER_WORD  - size;
5456         }
5457     }
5458
5459   /* Get a local temporary for the field value.  */
5460   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5461
5462   /* Emit code to branch if off >= 0.  */
5463   t = build2 (GE_EXPR, boolean_type_node, off,
5464               build_int_cst (TREE_TYPE (off), 0));
5465   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5466
5467   if (dw_align)
5468     {
5469       /* Emit: offs = (offs + 15) & -16.  */
5470       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5471                   build_int_cst (TREE_TYPE (off), 15));
5472       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5473                   build_int_cst (TREE_TYPE (off), -16));
5474       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5475     }
5476   else
5477     roundup = NULL;
5478
5479   /* Update ap.__[g|v]r_offs  */
5480   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5481               build_int_cst (TREE_TYPE (off), rsize));
5482   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5483
5484   /* String up.  */
5485   if (roundup)
5486     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5487
5488   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5489   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5490               build_int_cst (TREE_TYPE (f_off), 0));
5491   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5492
5493   /* String up: make sure the assignment happens before the use.  */
5494   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5495   COND_EXPR_ELSE (cond1) = t;
5496
5497   /* Prepare the trees handling the argument that is passed on the stack;
5498      the top level node will store in ON_STACK.  */
5499   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5500   if (align > 8)
5501     {
5502       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5503       t = fold_convert (intDI_type_node, arg);
5504       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5505                   build_int_cst (TREE_TYPE (t), 15));
5506       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5507                   build_int_cst (TREE_TYPE (t), -16));
5508       t = fold_convert (TREE_TYPE (arg), t);
5509       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5510     }
5511   else
5512     roundup = NULL;
5513   /* Advance ap.__stack  */
5514   t = fold_convert (intDI_type_node, arg);
5515   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5516               build_int_cst (TREE_TYPE (t), size + 7));
5517   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5518               build_int_cst (TREE_TYPE (t), -8));
5519   t = fold_convert (TREE_TYPE (arg), t);
5520   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5521   /* String up roundup and advance.  */
5522   if (roundup)
5523     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5524   /* String up with arg */
5525   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5526   /* Big-endianness related address adjustment.  */
5527   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5528       && size < UNITS_PER_WORD)
5529   {
5530     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5531                 size_int (UNITS_PER_WORD - size));
5532     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5533   }
5534
5535   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5536   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5537
5538   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5539   t = off;
5540   if (adjust)
5541     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5542                 build_int_cst (TREE_TYPE (off), adjust));
5543
5544   t = fold_convert (sizetype, t);
5545   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5546
5547   if (is_ha)
5548     {
5549       /* type ha; // treat as "struct {ftype field[n];}"
5550          ... [computing offs]
5551          for (i = 0; i <nregs; ++i, offs += 16)
5552            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5553          return ha;  */
5554       int i;
5555       tree tmp_ha, field_t, field_ptr_t;
5556
5557       /* Declare a local variable.  */
5558       tmp_ha = create_tmp_var_raw (type, "ha");
5559       gimple_add_tmp_var (tmp_ha);
5560
5561       /* Establish the base type.  */
5562       switch (ag_mode)
5563         {
5564         case SFmode:
5565           field_t = float_type_node;
5566           field_ptr_t = float_ptr_type_node;
5567           break;
5568         case DFmode:
5569           field_t = double_type_node;
5570           field_ptr_t = double_ptr_type_node;
5571           break;
5572         case TFmode:
5573           field_t = long_double_type_node;
5574           field_ptr_t = long_double_ptr_type_node;
5575           break;
5576 /* The half precision and quad precision are not fully supported yet.  Enable
5577    the following code after the support is complete.  Need to find the correct
5578    type node for __fp16 *.  */
5579 #if 0
5580         case HFmode:
5581           field_t = float_type_node;
5582           field_ptr_t = float_ptr_type_node;
5583           break;
5584 #endif
5585         case V2SImode:
5586         case V4SImode:
5587             {
5588               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5589               field_t = build_vector_type_for_mode (innertype, ag_mode);
5590               field_ptr_t = build_pointer_type (field_t);
5591             }
5592           break;
5593         default:
5594           gcc_assert (0);
5595         }
5596
5597       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5598       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5599       addr = t;
5600       t = fold_convert (field_ptr_t, addr);
5601       t = build2 (MODIFY_EXPR, field_t,
5602                   build1 (INDIRECT_REF, field_t, tmp_ha),
5603                   build1 (INDIRECT_REF, field_t, t));
5604
5605       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5606       for (i = 1; i < nregs; ++i)
5607         {
5608           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5609           u = fold_convert (field_ptr_t, addr);
5610           u = build2 (MODIFY_EXPR, field_t,
5611                       build2 (MEM_REF, field_t, tmp_ha,
5612                               build_int_cst (field_ptr_t,
5613                                              (i *
5614                                               int_size_in_bytes (field_t)))),
5615                       build1 (INDIRECT_REF, field_t, u));
5616           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5617         }
5618
5619       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5620       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5621     }
5622
5623   COND_EXPR_ELSE (cond2) = t;
5624   addr = fold_convert (build_pointer_type (type), cond1);
5625   addr = build_va_arg_indirect_ref (addr);
5626
5627   if (indirect_p)
5628     addr = build_va_arg_indirect_ref (addr);
5629
5630   return addr;
5631 }
5632
5633 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5634
5635 static void
5636 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5637                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5638                                 int no_rtl)
5639 {
5640   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5641   CUMULATIVE_ARGS local_cum;
5642   int gr_saved, vr_saved;
5643
5644   /* The caller has advanced CUM up to, but not beyond, the last named
5645      argument.  Advance a local copy of CUM past the last "real" named
5646      argument, to find out how many registers are left over.  */
5647   local_cum = *cum;
5648   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5649
5650   /* Found out how many registers we need to save.  */
5651   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5652   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5653
5654   if (TARGET_GENERAL_REGS_ONLY)
5655     {
5656       if (local_cum.aapcs_nvrn > 0)
5657         sorry ("%qs and floating point or vector arguments",
5658                "-mgeneral-regs-only");
5659       vr_saved = 0;
5660     }
5661
5662   if (!no_rtl)
5663     {
5664       if (gr_saved > 0)
5665         {
5666           rtx ptr, mem;
5667
5668           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5669           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5670                                - gr_saved * UNITS_PER_WORD);
5671           mem = gen_frame_mem (BLKmode, ptr);
5672           set_mem_alias_set (mem, get_varargs_alias_set ());
5673
5674           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5675                                mem, gr_saved);
5676         }
5677       if (vr_saved > 0)
5678         {
5679           /* We can't use move_block_from_reg, because it will use
5680              the wrong mode, storing D regs only.  */
5681           enum machine_mode mode = TImode;
5682           int off, i;
5683
5684           /* Set OFF to the offset from virtual_incoming_args_rtx of
5685              the first vector register.  The VR save area lies below
5686              the GR one, and is aligned to 16 bytes.  */
5687           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5688                                    STACK_BOUNDARY / BITS_PER_UNIT);
5689           off -= vr_saved * UNITS_PER_VREG;
5690
5691           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5692             {
5693               rtx ptr, mem;
5694
5695               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5696               mem = gen_frame_mem (mode, ptr);
5697               set_mem_alias_set (mem, get_varargs_alias_set ());
5698               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5699               off += UNITS_PER_VREG;
5700             }
5701         }
5702     }
5703
5704   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5705      any complication of having crtl->args.pretend_args_size changed.  */
5706   cfun->machine->saved_varargs_size
5707     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5708                       STACK_BOUNDARY / BITS_PER_UNIT)
5709        + vr_saved * UNITS_PER_VREG);
5710 }
5711
5712 static void
5713 aarch64_conditional_register_usage (void)
5714 {
5715   int i;
5716   if (!TARGET_FLOAT)
5717     {
5718       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5719         {
5720           fixed_regs[i] = 1;
5721           call_used_regs[i] = 1;
5722         }
5723     }
5724 }
5725
5726 /* Walk down the type tree of TYPE counting consecutive base elements.
5727    If *MODEP is VOIDmode, then set it to the first valid floating point
5728    type.  If a non-floating point type is found, or if a floating point
5729    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5730    otherwise return the count in the sub-tree.  */
5731 static int
5732 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5733 {
5734   enum machine_mode mode;
5735   HOST_WIDE_INT size;
5736
5737   switch (TREE_CODE (type))
5738     {
5739     case REAL_TYPE:
5740       mode = TYPE_MODE (type);
5741       if (mode != DFmode && mode != SFmode && mode != TFmode)
5742         return -1;
5743
5744       if (*modep == VOIDmode)
5745         *modep = mode;
5746
5747       if (*modep == mode)
5748         return 1;
5749
5750       break;
5751
5752     case COMPLEX_TYPE:
5753       mode = TYPE_MODE (TREE_TYPE (type));
5754       if (mode != DFmode && mode != SFmode && mode != TFmode)
5755         return -1;
5756
5757       if (*modep == VOIDmode)
5758         *modep = mode;
5759
5760       if (*modep == mode)
5761         return 2;
5762
5763       break;
5764
5765     case VECTOR_TYPE:
5766       /* Use V2SImode and V4SImode as representatives of all 64-bit
5767          and 128-bit vector types.  */
5768       size = int_size_in_bytes (type);
5769       switch (size)
5770         {
5771         case 8:
5772           mode = V2SImode;
5773           break;
5774         case 16:
5775           mode = V4SImode;
5776           break;
5777         default:
5778           return -1;
5779         }
5780
5781       if (*modep == VOIDmode)
5782         *modep = mode;
5783
5784       /* Vector modes are considered to be opaque: two vectors are
5785          equivalent for the purposes of being homogeneous aggregates
5786          if they are the same size.  */
5787       if (*modep == mode)
5788         return 1;
5789
5790       break;
5791
5792     case ARRAY_TYPE:
5793       {
5794         int count;
5795         tree index = TYPE_DOMAIN (type);
5796
5797         /* Can't handle incomplete types.  */
5798         if (!COMPLETE_TYPE_P (type))
5799           return -1;
5800
5801         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5802         if (count == -1
5803             || !index
5804             || !TYPE_MAX_VALUE (index)
5805             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5806             || !TYPE_MIN_VALUE (index)
5807             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5808             || count < 0)
5809           return -1;
5810
5811         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5812                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5813
5814         /* There must be no padding.  */
5815         if (!host_integerp (TYPE_SIZE (type), 1)
5816             || (tree_low_cst (TYPE_SIZE (type), 1)
5817                 != count * GET_MODE_BITSIZE (*modep)))
5818           return -1;
5819
5820         return count;
5821       }
5822
5823     case RECORD_TYPE:
5824       {
5825         int count = 0;
5826         int sub_count;
5827         tree field;
5828
5829         /* Can't handle incomplete types.  */
5830         if (!COMPLETE_TYPE_P (type))
5831           return -1;
5832
5833         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5834           {
5835             if (TREE_CODE (field) != FIELD_DECL)
5836               continue;
5837
5838             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5839             if (sub_count < 0)
5840               return -1;
5841             count += sub_count;
5842           }
5843
5844         /* There must be no padding.  */
5845         if (!host_integerp (TYPE_SIZE (type), 1)
5846             || (tree_low_cst (TYPE_SIZE (type), 1)
5847                 != count * GET_MODE_BITSIZE (*modep)))
5848           return -1;
5849
5850         return count;
5851       }
5852
5853     case UNION_TYPE:
5854     case QUAL_UNION_TYPE:
5855       {
5856         /* These aren't very interesting except in a degenerate case.  */
5857         int count = 0;
5858         int sub_count;
5859         tree field;
5860
5861         /* Can't handle incomplete types.  */
5862         if (!COMPLETE_TYPE_P (type))
5863           return -1;
5864
5865         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5866           {
5867             if (TREE_CODE (field) != FIELD_DECL)
5868               continue;
5869
5870             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5871             if (sub_count < 0)
5872               return -1;
5873             count = count > sub_count ? count : sub_count;
5874           }
5875
5876         /* There must be no padding.  */
5877         if (!host_integerp (TYPE_SIZE (type), 1)
5878             || (tree_low_cst (TYPE_SIZE (type), 1)
5879                 != count * GET_MODE_BITSIZE (*modep)))
5880           return -1;
5881
5882         return count;
5883       }
5884
5885     default:
5886       break;
5887     }
5888
5889   return -1;
5890 }
5891
5892 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5893    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5894    array types.  The C99 floating-point complex types are also considered
5895    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5896    types, which are GCC extensions and out of the scope of AAPCS64, are
5897    treated as composite types here as well.
5898
5899    Note that MODE itself is not sufficient in determining whether a type
5900    is such a composite type or not.  This is because
5901    stor-layout.c:compute_record_mode may have already changed the MODE
5902    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5903    structure with only one field may have its MODE set to the mode of the
5904    field.  Also an integer mode whose size matches the size of the
5905    RECORD_TYPE type may be used to substitute the original mode
5906    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5907    solely relied on.  */
5908
5909 static bool
5910 aarch64_composite_type_p (const_tree type,
5911                           enum machine_mode mode)
5912 {
5913   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5914     return true;
5915
5916   if (mode == BLKmode
5917       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5918       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5919     return true;
5920
5921   return false;
5922 }
5923
5924 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5925    type as described in AAPCS64 \S 4.1.2.
5926
5927    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5928
5929 static bool
5930 aarch64_short_vector_p (const_tree type,
5931                         enum machine_mode mode)
5932 {
5933   HOST_WIDE_INT size = -1;
5934
5935   if (type && TREE_CODE (type) == VECTOR_TYPE)
5936     size = int_size_in_bytes (type);
5937   else if (!aarch64_composite_type_p (type, mode)
5938            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5939                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5940     size = GET_MODE_SIZE (mode);
5941
5942   return (size == 8 || size == 16) ? true : false;
5943 }
5944
5945 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5946    shall be passed or returned in simd/fp register(s) (providing these
5947    parameter passing registers are available).
5948
5949    Upon successful return, *COUNT returns the number of needed registers,
5950    *BASE_MODE returns the mode of the individual register and when IS_HAF
5951    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5952    floating-point aggregate or a homogeneous short-vector aggregate.  */
5953
5954 static bool
5955 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5956                                          const_tree type,
5957                                          enum machine_mode *base_mode,
5958                                          int *count,
5959                                          bool *is_ha)
5960 {
5961   enum machine_mode new_mode = VOIDmode;
5962   bool composite_p = aarch64_composite_type_p (type, mode);
5963
5964   if (is_ha != NULL) *is_ha = false;
5965
5966   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5967       || aarch64_short_vector_p (type, mode))
5968     {
5969       *count = 1;
5970       new_mode = mode;
5971     }
5972   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5973     {
5974       if (is_ha != NULL) *is_ha = true;
5975       *count = 2;
5976       new_mode = GET_MODE_INNER (mode);
5977     }
5978   else if (type && composite_p)
5979     {
5980       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5981
5982       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5983         {
5984           if (is_ha != NULL) *is_ha = true;
5985           *count = ag_count;
5986         }
5987       else
5988         return false;
5989     }
5990   else
5991     return false;
5992
5993   *base_mode = new_mode;
5994   return true;
5995 }
5996
5997 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5998
5999 static rtx
6000 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6001                           int incoming ATTRIBUTE_UNUSED)
6002 {
6003   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6004 }
6005
6006 /* Implements target hook vector_mode_supported_p.  */
6007 static bool
6008 aarch64_vector_mode_supported_p (enum machine_mode mode)
6009 {
6010   if (TARGET_SIMD
6011       && (mode == V4SImode  || mode == V8HImode
6012           || mode == V16QImode || mode == V2DImode
6013           || mode == V2SImode  || mode == V4HImode
6014           || mode == V8QImode || mode == V2SFmode
6015           || mode == V4SFmode || mode == V2DFmode))
6016     return true;
6017
6018   return false;
6019 }
6020
6021 /* Return appropriate SIMD container
6022    for MODE within a vector of WIDTH bits.  */
6023 static enum machine_mode
6024 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6025 {
6026   gcc_assert (width == 64 || width == 128);
6027   if (TARGET_SIMD)
6028     {
6029       if (width == 128)
6030         switch (mode)
6031           {
6032           case DFmode:
6033             return V2DFmode;
6034           case SFmode:
6035             return V4SFmode;
6036           case SImode:
6037             return V4SImode;
6038           case HImode:
6039             return V8HImode;
6040           case QImode:
6041             return V16QImode;
6042           case DImode:
6043             return V2DImode;
6044           default:
6045             break;
6046           }
6047       else
6048         switch (mode)
6049           {
6050           case SFmode:
6051             return V2SFmode;
6052           case SImode:
6053             return V2SImode;
6054           case HImode:
6055             return V4HImode;
6056           case QImode:
6057             return V8QImode;
6058           default:
6059             break;
6060           }
6061     }
6062   return word_mode;
6063 }
6064
6065 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6066 static enum machine_mode
6067 aarch64_preferred_simd_mode (enum machine_mode mode)
6068 {
6069   return aarch64_simd_container_mode (mode, 128);
6070 }
6071
6072 /* Return the bitmask of possible vector sizes for the vectorizer
6073    to iterate over.  */
6074 static unsigned int
6075 aarch64_autovectorize_vector_sizes (void)
6076 {
6077   return (16 | 8);
6078 }
6079
6080 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6081    vector types in order to conform to the AAPCS64 (see "Procedure
6082    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6083    qualify for emission with the mangled names defined in that document,
6084    a vector type must not only be of the correct mode but also be
6085    composed of AdvSIMD vector element types (e.g.
6086    _builtin_aarch64_simd_qi); these types are registered by
6087    aarch64_init_simd_builtins ().  In other words, vector types defined
6088    in other ways e.g. via vector_size attribute will get default
6089    mangled names.  */
6090 typedef struct
6091 {
6092   enum machine_mode mode;
6093   const char *element_type_name;
6094   const char *mangled_name;
6095 } aarch64_simd_mangle_map_entry;
6096
6097 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6098   /* 64-bit containerized types.  */
6099   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6100   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6101   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6102   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6103   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6104   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6105   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6106   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6107   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6108   /* 128-bit containerized types.  */
6109   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6110   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6111   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6112   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6113   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6114   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6115   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6116   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6117   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6118   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6119   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6120   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6121   { VOIDmode, NULL, NULL }
6122 };
6123
6124 /* Implement TARGET_MANGLE_TYPE.  */
6125
6126 static const char *
6127 aarch64_mangle_type (const_tree type)
6128 {
6129   /* The AArch64 ABI documents say that "__va_list" has to be
6130      managled as if it is in the "std" namespace.  */
6131   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6132     return "St9__va_list";
6133
6134   /* Check the mode of the vector type, and the name of the vector
6135      element type, against the table.  */
6136   if (TREE_CODE (type) == VECTOR_TYPE)
6137     {
6138       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6139
6140       while (pos->mode != VOIDmode)
6141         {
6142           tree elt_type = TREE_TYPE (type);
6143
6144           if (pos->mode == TYPE_MODE (type)
6145               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6146               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6147                           pos->element_type_name))
6148             return pos->mangled_name;
6149
6150           pos++;
6151         }
6152     }
6153
6154   /* Use the default mangling.  */
6155   return NULL;
6156 }
6157
6158 /* Return the equivalent letter for size.  */
6159 static char
6160 sizetochar (int size)
6161 {
6162   switch (size)
6163     {
6164     case 64: return 'd';
6165     case 32: return 's';
6166     case 16: return 'h';
6167     case 8 : return 'b';
6168     default: gcc_unreachable ();
6169     }
6170 }
6171
6172 /* Return true iff x is a uniform vector of floating-point
6173    constants, and the constant can be represented in
6174    quarter-precision form.  Note, as aarch64_float_const_representable
6175    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6176 static bool
6177 aarch64_vect_float_const_representable_p (rtx x)
6178 {
6179   int i = 0;
6180   REAL_VALUE_TYPE r0, ri;
6181   rtx x0, xi;
6182
6183   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6184     return false;
6185
6186   x0 = CONST_VECTOR_ELT (x, 0);
6187   if (!CONST_DOUBLE_P (x0))
6188     return false;
6189
6190   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6191
6192   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6193     {
6194       xi = CONST_VECTOR_ELT (x, i);
6195       if (!CONST_DOUBLE_P (xi))
6196         return false;
6197
6198       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6199       if (!REAL_VALUES_EQUAL (r0, ri))
6200         return false;
6201     }
6202
6203   return aarch64_float_const_representable_p (x0);
6204 }
6205
6206 /* Return true for valid and false for invalid.  */
6207 bool
6208 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6209                               struct simd_immediate_info *info)
6210 {
6211 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6212   matches = 1;                                          \
6213   for (i = 0; i < idx; i += (STRIDE))                   \
6214     if (!(TEST))                                        \
6215       matches = 0;                                      \
6216   if (matches)                                          \
6217     {                                                   \
6218       immtype = (CLASS);                                \
6219       elsize = (ELSIZE);                                \
6220       eshift = (SHIFT);                                 \
6221       emvn = (NEG);                                     \
6222       break;                                            \
6223     }
6224
6225   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6226   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6227   unsigned char bytes[16];
6228   int immtype = -1, matches;
6229   unsigned int invmask = inverse ? 0xff : 0;
6230   int eshift, emvn;
6231
6232   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6233     {
6234       if (! (aarch64_simd_imm_zero_p (op, mode)
6235              || aarch64_vect_float_const_representable_p (op)))
6236         return false;
6237
6238       if (info)
6239         {
6240           info->value = CONST_VECTOR_ELT (op, 0);
6241           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6242           info->mvn = false;
6243           info->shift = 0;
6244         }
6245
6246       return true;
6247     }
6248
6249   /* Splat vector constant out into a byte vector.  */
6250   for (i = 0; i < n_elts; i++)
6251     {
6252       rtx el = CONST_VECTOR_ELT (op, i);
6253       unsigned HOST_WIDE_INT elpart;
6254       unsigned int part, parts;
6255
6256       if (GET_CODE (el) == CONST_INT)
6257         {
6258           elpart = INTVAL (el);
6259           parts = 1;
6260         }
6261       else if (GET_CODE (el) == CONST_DOUBLE)
6262         {
6263           elpart = CONST_DOUBLE_LOW (el);
6264           parts = 2;
6265         }
6266       else
6267         gcc_unreachable ();
6268
6269       for (part = 0; part < parts; part++)
6270         {
6271           unsigned int byte;
6272           for (byte = 0; byte < innersize; byte++)
6273             {
6274               bytes[idx++] = (elpart & 0xff) ^ invmask;
6275               elpart >>= BITS_PER_UNIT;
6276             }
6277           if (GET_CODE (el) == CONST_DOUBLE)
6278             elpart = CONST_DOUBLE_HIGH (el);
6279         }
6280     }
6281
6282   /* Sanity check.  */
6283   gcc_assert (idx == GET_MODE_SIZE (mode));
6284
6285   do
6286     {
6287       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6288              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6289
6290       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6291              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6292
6293       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6294              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6295
6296       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6297              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6298
6299       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6300
6301       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6302
6303       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6304              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6305
6306       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6307              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6308
6309       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6310              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6311
6312       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6313              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6314
6315       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6316
6317       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6318
6319       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6320              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6321
6322       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6323              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6324
6325       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6326              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6327
6328       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6329              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6330
6331       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6332
6333       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6334              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6335     }
6336   while (0);
6337
6338   /* TODO: Currently the assembler cannot handle types 12 to 15.
6339      And there is no way to specify cmode through the compiler.
6340      Disable them till there is support in the assembler.  */
6341   if (immtype == -1
6342       || (immtype >= 12 && immtype <= 15)
6343       || immtype == 18)
6344     return false;
6345
6346   if (info)
6347     {
6348       info->element_width = elsize;
6349       info->mvn = emvn != 0;
6350       info->shift = eshift;
6351
6352       unsigned HOST_WIDE_INT imm = 0;
6353
6354       /* Un-invert bytes of recognized vector, if necessary.  */
6355       if (invmask != 0)
6356         for (i = 0; i < idx; i++)
6357           bytes[i] ^= invmask;
6358
6359       if (immtype == 17)
6360         {
6361           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6362           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6363
6364           for (i = 0; i < 8; i++)
6365             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6366               << (i * BITS_PER_UNIT);
6367
6368
6369           info->value = GEN_INT (imm);
6370         }
6371       else
6372         {
6373           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6374             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6375
6376           /* Construct 'abcdefgh' because the assembler cannot handle
6377              generic constants.  */
6378           if (info->mvn)
6379             imm = ~imm;
6380           imm = (imm >> info->shift) & 0xff;
6381           info->value = GEN_INT (imm);
6382         }
6383     }
6384
6385   return true;
6386 #undef CHECK
6387 }
6388
6389 static bool
6390 aarch64_const_vec_all_same_int_p (rtx x,
6391                                   HOST_WIDE_INT minval,
6392                                   HOST_WIDE_INT maxval)
6393 {
6394   HOST_WIDE_INT firstval;
6395   int count, i;
6396
6397   if (GET_CODE (x) != CONST_VECTOR
6398       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6399     return false;
6400
6401   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6402   if (firstval < minval || firstval > maxval)
6403     return false;
6404
6405   count = CONST_VECTOR_NUNITS (x);
6406   for (i = 1; i < count; i++)
6407     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6408       return false;
6409
6410   return true;
6411 }
6412
6413 /* Check of immediate shift constants are within range.  */
6414 bool
6415 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6416 {
6417   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6418   if (left)
6419     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6420   else
6421     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6422 }
6423
6424 /* Return true if X is a uniform vector where all elements
6425    are either the floating-point constant 0.0 or the
6426    integer constant 0.  */
6427 bool
6428 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6429 {
6430   return x == CONST0_RTX (mode);
6431 }
6432
6433 bool
6434 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6435 {
6436   HOST_WIDE_INT imm = INTVAL (x);
6437   int i;
6438
6439   for (i = 0; i < 8; i++)
6440     {
6441       unsigned int byte = imm & 0xff;
6442       if (byte != 0xff && byte != 0)
6443        return false;
6444       imm >>= 8;
6445     }
6446
6447   return true;
6448 }
6449
6450 bool
6451 aarch64_mov_operand_p (rtx x,
6452                        enum aarch64_symbol_context context,
6453                        enum machine_mode mode)
6454 {
6455   if (GET_CODE (x) == HIGH
6456       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6457     return true;
6458
6459   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6460     return true;
6461
6462   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6463     return true;
6464
6465   return aarch64_classify_symbolic_expression (x, context)
6466     == SYMBOL_TINY_ABSOLUTE;
6467 }
6468
6469 /* Return a const_int vector of VAL.  */
6470 rtx
6471 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6472 {
6473   int nunits = GET_MODE_NUNITS (mode);
6474   rtvec v = rtvec_alloc (nunits);
6475   int i;
6476
6477   for (i=0; i < nunits; i++)
6478     RTVEC_ELT (v, i) = GEN_INT (val);
6479
6480   return gen_rtx_CONST_VECTOR (mode, v);
6481 }
6482
6483 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6484
6485 bool
6486 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6487 {
6488   enum machine_mode vmode;
6489
6490   gcc_assert (!VECTOR_MODE_P (mode));
6491   vmode = aarch64_preferred_simd_mode (mode);
6492   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6493   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6494 }
6495
6496 /* Construct and return a PARALLEL RTX vector.  */
6497 rtx
6498 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6499 {
6500   int nunits = GET_MODE_NUNITS (mode);
6501   rtvec v = rtvec_alloc (nunits / 2);
6502   int base = high ? nunits / 2 : 0;
6503   rtx t1;
6504   int i;
6505
6506   for (i=0; i < nunits / 2; i++)
6507     RTVEC_ELT (v, i) = GEN_INT (base + i);
6508
6509   t1 = gen_rtx_PARALLEL (mode, v);
6510   return t1;
6511 }
6512
6513 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6514    HIGH (exclusive).  */
6515 void
6516 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6517 {
6518   HOST_WIDE_INT lane;
6519   gcc_assert (GET_CODE (operand) == CONST_INT);
6520   lane = INTVAL (operand);
6521
6522   if (lane < low || lane >= high)
6523     error ("lane out of range");
6524 }
6525
6526 void
6527 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6528 {
6529   gcc_assert (GET_CODE (operand) == CONST_INT);
6530   HOST_WIDE_INT lane = INTVAL (operand);
6531
6532   if (lane < low || lane >= high)
6533     error ("constant out of range");
6534 }
6535
6536 /* Emit code to reinterpret one AdvSIMD type as another,
6537    without altering bits.  */
6538 void
6539 aarch64_simd_reinterpret (rtx dest, rtx src)
6540 {
6541   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6542 }
6543
6544 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6545    registers).  */
6546 void
6547 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6548                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6549                             rtx op1)
6550 {
6551   rtx mem = gen_rtx_MEM (mode, destaddr);
6552   rtx tmp1 = gen_reg_rtx (mode);
6553   rtx tmp2 = gen_reg_rtx (mode);
6554
6555   emit_insn (intfn (tmp1, op1, tmp2));
6556
6557   emit_move_insn (mem, tmp1);
6558   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6559   emit_move_insn (mem, tmp2);
6560 }
6561
6562 /* Return TRUE if OP is a valid vector addressing mode.  */
6563 bool
6564 aarch64_simd_mem_operand_p (rtx op)
6565 {
6566   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6567                         || GET_CODE (XEXP (op, 0)) == REG);
6568 }
6569
6570 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6571    not to early-clobber SRC registers in the process.
6572
6573    We assume that the operands described by SRC and DEST represent a
6574    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6575    number of components into which the copy has been decomposed.  */
6576 void
6577 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6578                                 rtx *src, unsigned int count)
6579 {
6580   unsigned int i;
6581
6582   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6583       || REGNO (operands[0]) < REGNO (operands[1]))
6584     {
6585       for (i = 0; i < count; i++)
6586         {
6587           operands[2 * i] = dest[i];
6588           operands[2 * i + 1] = src[i];
6589         }
6590     }
6591   else
6592     {
6593       for (i = 0; i < count; i++)
6594         {
6595           operands[2 * i] = dest[count - i - 1];
6596           operands[2 * i + 1] = src[count - i - 1];
6597         }
6598     }
6599 }
6600
6601 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6602    one of VSTRUCT modes: OI, CI or XI.  */
6603 int
6604 aarch64_simd_attr_length_move (rtx insn)
6605 {
6606   enum machine_mode mode;
6607
6608   extract_insn_cached (insn);
6609
6610   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6611     {
6612       mode = GET_MODE (recog_data.operand[0]);
6613       switch (mode)
6614         {
6615         case OImode:
6616           return 8;
6617         case CImode:
6618           return 12;
6619         case XImode:
6620           return 16;
6621         default:
6622           gcc_unreachable ();
6623         }
6624     }
6625   return 4;
6626 }
6627
6628 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6629    alignment of a vector to 128 bits.  */
6630 static HOST_WIDE_INT
6631 aarch64_simd_vector_alignment (const_tree type)
6632 {
6633   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6634   return MIN (align, 128);
6635 }
6636
6637 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6638 static bool
6639 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6640 {
6641   if (is_packed)
6642     return false;
6643
6644   /* We guarantee alignment for vectors up to 128-bits.  */
6645   if (tree_int_cst_compare (TYPE_SIZE (type),
6646                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6647     return false;
6648
6649   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6650   return true;
6651 }
6652
6653 /* If VALS is a vector constant that can be loaded into a register
6654    using DUP, generate instructions to do so and return an RTX to
6655    assign to the register.  Otherwise return NULL_RTX.  */
6656 static rtx
6657 aarch64_simd_dup_constant (rtx vals)
6658 {
6659   enum machine_mode mode = GET_MODE (vals);
6660   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6661   int n_elts = GET_MODE_NUNITS (mode);
6662   bool all_same = true;
6663   rtx x;
6664   int i;
6665
6666   if (GET_CODE (vals) != CONST_VECTOR)
6667     return NULL_RTX;
6668
6669   for (i = 1; i < n_elts; ++i)
6670     {
6671       x = CONST_VECTOR_ELT (vals, i);
6672       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6673         all_same = false;
6674     }
6675
6676   if (!all_same)
6677     return NULL_RTX;
6678
6679   /* We can load this constant by using DUP and a constant in a
6680      single ARM register.  This will be cheaper than a vector
6681      load.  */
6682   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6683   return gen_rtx_VEC_DUPLICATE (mode, x);
6684 }
6685
6686
6687 /* Generate code to load VALS, which is a PARALLEL containing only
6688    constants (for vec_init) or CONST_VECTOR, efficiently into a
6689    register.  Returns an RTX to copy into the register, or NULL_RTX
6690    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6691 static rtx
6692 aarch64_simd_make_constant (rtx vals)
6693 {
6694   enum machine_mode mode = GET_MODE (vals);
6695   rtx const_dup;
6696   rtx const_vec = NULL_RTX;
6697   int n_elts = GET_MODE_NUNITS (mode);
6698   int n_const = 0;
6699   int i;
6700
6701   if (GET_CODE (vals) == CONST_VECTOR)
6702     const_vec = vals;
6703   else if (GET_CODE (vals) == PARALLEL)
6704     {
6705       /* A CONST_VECTOR must contain only CONST_INTs and
6706          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6707          Only store valid constants in a CONST_VECTOR.  */
6708       for (i = 0; i < n_elts; ++i)
6709         {
6710           rtx x = XVECEXP (vals, 0, i);
6711           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6712             n_const++;
6713         }
6714       if (n_const == n_elts)
6715         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6716     }
6717   else
6718     gcc_unreachable ();
6719
6720   if (const_vec != NULL_RTX
6721       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6722     /* Load using MOVI/MVNI.  */
6723     return const_vec;
6724   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6725     /* Loaded using DUP.  */
6726     return const_dup;
6727   else if (const_vec != NULL_RTX)
6728     /* Load from constant pool. We can not take advantage of single-cycle
6729        LD1 because we need a PC-relative addressing mode.  */
6730     return const_vec;
6731   else
6732     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6733        We can not construct an initializer.  */
6734     return NULL_RTX;
6735 }
6736
6737 void
6738 aarch64_expand_vector_init (rtx target, rtx vals)
6739 {
6740   enum machine_mode mode = GET_MODE (target);
6741   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6742   int n_elts = GET_MODE_NUNITS (mode);
6743   int n_var = 0, one_var = -1;
6744   bool all_same = true;
6745   rtx x, mem;
6746   int i;
6747
6748   x = XVECEXP (vals, 0, 0);
6749   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6750     n_var = 1, one_var = 0;
6751
6752   for (i = 1; i < n_elts; ++i)
6753     {
6754       x = XVECEXP (vals, 0, i);
6755       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6756         ++n_var, one_var = i;
6757
6758       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6759         all_same = false;
6760     }
6761
6762   if (n_var == 0)
6763     {
6764       rtx constant = aarch64_simd_make_constant (vals);
6765       if (constant != NULL_RTX)
6766         {
6767           emit_move_insn (target, constant);
6768           return;
6769         }
6770     }
6771
6772   /* Splat a single non-constant element if we can.  */
6773   if (all_same)
6774     {
6775       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6776       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6777       return;
6778     }
6779
6780   /* One field is non-constant.  Load constant then overwrite varying
6781      field.  This is more efficient than using the stack.  */
6782   if (n_var == 1)
6783     {
6784       rtx copy = copy_rtx (vals);
6785       rtx index = GEN_INT (one_var);
6786       enum insn_code icode;
6787
6788       /* Load constant part of vector, substitute neighboring value for
6789          varying element.  */
6790       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6791       aarch64_expand_vector_init (target, copy);
6792
6793       /* Insert variable.  */
6794       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6795       icode = optab_handler (vec_set_optab, mode);
6796       gcc_assert (icode != CODE_FOR_nothing);
6797       emit_insn (GEN_FCN (icode) (target, x, index));
6798       return;
6799     }
6800
6801   /* Construct the vector in memory one field at a time
6802      and load the whole vector.  */
6803   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6804   for (i = 0; i < n_elts; i++)
6805     emit_move_insn (adjust_address_nv (mem, inner_mode,
6806                                     i * GET_MODE_SIZE (inner_mode)),
6807                     XVECEXP (vals, 0, i));
6808   emit_move_insn (target, mem);
6809
6810 }
6811
6812 static unsigned HOST_WIDE_INT
6813 aarch64_shift_truncation_mask (enum machine_mode mode)
6814 {
6815   return
6816     (aarch64_vector_mode_supported_p (mode)
6817      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6818 }
6819
6820 #ifndef TLS_SECTION_ASM_FLAG
6821 #define TLS_SECTION_ASM_FLAG 'T'
6822 #endif
6823
6824 void
6825 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6826                                tree decl ATTRIBUTE_UNUSED)
6827 {
6828   char flagchars[10], *f = flagchars;
6829
6830   /* If we have already declared this section, we can use an
6831      abbreviated form to switch back to it -- unless this section is
6832      part of a COMDAT groups, in which case GAS requires the full
6833      declaration every time.  */
6834   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6835       && (flags & SECTION_DECLARED))
6836     {
6837       fprintf (asm_out_file, "\t.section\t%s\n", name);
6838       return;
6839     }
6840
6841   if (!(flags & SECTION_DEBUG))
6842     *f++ = 'a';
6843   if (flags & SECTION_WRITE)
6844     *f++ = 'w';
6845   if (flags & SECTION_CODE)
6846     *f++ = 'x';
6847   if (flags & SECTION_SMALL)
6848     *f++ = 's';
6849   if (flags & SECTION_MERGE)
6850     *f++ = 'M';
6851   if (flags & SECTION_STRINGS)
6852     *f++ = 'S';
6853   if (flags & SECTION_TLS)
6854     *f++ = TLS_SECTION_ASM_FLAG;
6855   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6856     *f++ = 'G';
6857   *f = '\0';
6858
6859   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6860
6861   if (!(flags & SECTION_NOTYPE))
6862     {
6863       const char *type;
6864       const char *format;
6865
6866       if (flags & SECTION_BSS)
6867         type = "nobits";
6868       else
6869         type = "progbits";
6870
6871 #ifdef TYPE_OPERAND_FMT
6872       format = "," TYPE_OPERAND_FMT;
6873 #else
6874       format = ",@%s";
6875 #endif
6876
6877       fprintf (asm_out_file, format, type);
6878
6879       if (flags & SECTION_ENTSIZE)
6880         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6881       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6882         {
6883           if (TREE_CODE (decl) == IDENTIFIER_NODE)
6884             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6885           else
6886             fprintf (asm_out_file, ",%s,comdat",
6887                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6888         }
6889     }
6890
6891   putc ('\n', asm_out_file);
6892 }
6893
6894 /* Select a format to encode pointers in exception handling data.  */
6895 int
6896 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6897 {
6898    int type;
6899    switch (aarch64_cmodel)
6900      {
6901      case AARCH64_CMODEL_TINY:
6902      case AARCH64_CMODEL_TINY_PIC:
6903      case AARCH64_CMODEL_SMALL:
6904      case AARCH64_CMODEL_SMALL_PIC:
6905        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6906           for everything.  */
6907        type = DW_EH_PE_sdata4;
6908        break;
6909      default:
6910        /* No assumptions here.  8-byte relocs required.  */
6911        type = DW_EH_PE_sdata8;
6912        break;
6913      }
6914    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6915 }
6916
6917 /* Emit load exclusive.  */
6918
6919 static void
6920 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6921                              rtx mem, rtx model_rtx)
6922 {
6923   rtx (*gen) (rtx, rtx, rtx);
6924
6925   switch (mode)
6926     {
6927     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6928     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6929     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6930     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6931     default:
6932       gcc_unreachable ();
6933     }
6934
6935   emit_insn (gen (rval, mem, model_rtx));
6936 }
6937
6938 /* Emit store exclusive.  */
6939
6940 static void
6941 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6942                               rtx rval, rtx mem, rtx model_rtx)
6943 {
6944   rtx (*gen) (rtx, rtx, rtx, rtx);
6945
6946   switch (mode)
6947     {
6948     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6949     case HImode: gen = gen_aarch64_store_exclusivehi; break;
6950     case SImode: gen = gen_aarch64_store_exclusivesi; break;
6951     case DImode: gen = gen_aarch64_store_exclusivedi; break;
6952     default:
6953       gcc_unreachable ();
6954     }
6955
6956   emit_insn (gen (bval, rval, mem, model_rtx));
6957 }
6958
6959 /* Mark the previous jump instruction as unlikely.  */
6960
6961 static void
6962 aarch64_emit_unlikely_jump (rtx insn)
6963 {
6964   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6965
6966   insn = emit_jump_insn (insn);
6967   add_reg_note (insn, REG_BR_PROB, very_unlikely);
6968 }
6969
6970 /* Expand a compare and swap pattern.  */
6971
6972 void
6973 aarch64_expand_compare_and_swap (rtx operands[])
6974 {
6975   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6976   enum machine_mode mode, cmp_mode;
6977   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6978
6979   bval = operands[0];
6980   rval = operands[1];
6981   mem = operands[2];
6982   oldval = operands[3];
6983   newval = operands[4];
6984   is_weak = operands[5];
6985   mod_s = operands[6];
6986   mod_f = operands[7];
6987   mode = GET_MODE (mem);
6988   cmp_mode = mode;
6989
6990   /* Normally the succ memory model must be stronger than fail, but in the
6991      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6992      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
6993
6994   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6995       && INTVAL (mod_s) == MEMMODEL_RELEASE)
6996     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6997
6998   switch (mode)
6999     {
7000     case QImode:
7001     case HImode:
7002       /* For short modes, we're going to perform the comparison in SImode,
7003          so do the zero-extension now.  */
7004       cmp_mode = SImode;
7005       rval = gen_reg_rtx (SImode);
7006       oldval = convert_modes (SImode, mode, oldval, true);
7007       /* Fall through.  */
7008
7009     case SImode:
7010     case DImode:
7011       /* Force the value into a register if needed.  */
7012       if (!aarch64_plus_operand (oldval, mode))
7013         oldval = force_reg (cmp_mode, oldval);
7014       break;
7015
7016     default:
7017       gcc_unreachable ();
7018     }
7019
7020   switch (mode)
7021     {
7022     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7023     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7024     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7025     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7026     default:
7027       gcc_unreachable ();
7028     }
7029
7030   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7031
7032   if (mode == QImode || mode == HImode)
7033     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7034
7035   x = gen_rtx_REG (CCmode, CC_REGNUM);
7036   x = gen_rtx_EQ (SImode, x, const0_rtx);
7037   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7038 }
7039
7040 /* Split a compare and swap pattern.  */
7041
7042 void
7043 aarch64_split_compare_and_swap (rtx operands[])
7044 {
7045   rtx rval, mem, oldval, newval, scratch;
7046   enum machine_mode mode;
7047   bool is_weak;
7048   rtx label1, label2, x, cond;
7049
7050   rval = operands[0];
7051   mem = operands[1];
7052   oldval = operands[2];
7053   newval = operands[3];
7054   is_weak = (operands[4] != const0_rtx);
7055   scratch = operands[7];
7056   mode = GET_MODE (mem);
7057
7058   label1 = NULL_RTX;
7059   if (!is_weak)
7060     {
7061       label1 = gen_label_rtx ();
7062       emit_label (label1);
7063     }
7064   label2 = gen_label_rtx ();
7065
7066   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7067
7068   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7069   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7070   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7071                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7072   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7073
7074   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7075
7076   if (!is_weak)
7077     {
7078       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7079       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7080                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7081       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7082     }
7083   else
7084     {
7085       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7086       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7087       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7088     }
7089
7090   emit_label (label2);
7091 }
7092
7093 /* Split an atomic operation.  */
7094
7095 void
7096 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7097                      rtx value, rtx model_rtx, rtx cond)
7098 {
7099   enum machine_mode mode = GET_MODE (mem);
7100   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7101   rtx label, x;
7102
7103   label = gen_label_rtx ();
7104   emit_label (label);
7105
7106   if (new_out)
7107     new_out = gen_lowpart (wmode, new_out);
7108   if (old_out)
7109     old_out = gen_lowpart (wmode, old_out);
7110   else
7111     old_out = new_out;
7112   value = simplify_gen_subreg (wmode, value, mode, 0);
7113
7114   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7115
7116   switch (code)
7117     {
7118     case SET:
7119       new_out = value;
7120       break;
7121
7122     case NOT:
7123       x = gen_rtx_AND (wmode, old_out, value);
7124       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7125       x = gen_rtx_NOT (wmode, new_out);
7126       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7127       break;
7128
7129     case MINUS:
7130       if (CONST_INT_P (value))
7131         {
7132           value = GEN_INT (-INTVAL (value));
7133           code = PLUS;
7134         }
7135       /* Fall through.  */
7136
7137     default:
7138       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7139       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7140       break;
7141     }
7142
7143   aarch64_emit_store_exclusive (mode, cond, mem,
7144                                 gen_lowpart (mode, new_out), model_rtx);
7145
7146   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7147   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7148                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7149   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7150 }
7151
7152 static void
7153 aarch64_print_extension (void)
7154 {
7155   const struct aarch64_option_extension *opt = NULL;
7156
7157   for (opt = all_extensions; opt->name != NULL; opt++)
7158     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7159       asm_fprintf (asm_out_file, "+%s", opt->name);
7160
7161   asm_fprintf (asm_out_file, "\n");
7162 }
7163
7164 static void
7165 aarch64_start_file (void)
7166 {
7167   if (selected_arch)
7168     {
7169       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7170       aarch64_print_extension ();
7171     }
7172   else if (selected_cpu)
7173     {
7174       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7175       aarch64_print_extension ();
7176     }
7177   default_file_start();
7178 }
7179
7180 /* Target hook for c_mode_for_suffix.  */
7181 static enum machine_mode
7182 aarch64_c_mode_for_suffix (char suffix)
7183 {
7184   if (suffix == 'q')
7185     return TFmode;
7186
7187   return VOIDmode;
7188 }
7189
7190 /* We can only represent floating point constants which will fit in
7191    "quarter-precision" values.  These values are characterised by
7192    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7193    by:
7194
7195    (-1)^s * (n/16) * 2^r
7196
7197    Where:
7198      's' is the sign bit.
7199      'n' is an integer in the range 16 <= n <= 31.
7200      'r' is an integer in the range -3 <= r <= 4.  */
7201
7202 /* Return true iff X can be represented by a quarter-precision
7203    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7204 bool
7205 aarch64_float_const_representable_p (rtx x)
7206 {
7207   /* This represents our current view of how many bits
7208      make up the mantissa.  */
7209   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7210   int exponent;
7211   unsigned HOST_WIDE_INT mantissa, mask;
7212   HOST_WIDE_INT m1, m2;
7213   REAL_VALUE_TYPE r, m;
7214
7215   if (!CONST_DOUBLE_P (x))
7216     return false;
7217
7218   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7219
7220   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7221      know if we have +zero until we analyse the mantissa, but we
7222      can reject the other invalid values.  */
7223   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7224       || REAL_VALUE_MINUS_ZERO (r))
7225     return false;
7226
7227   /* Extract exponent.  */
7228   r = real_value_abs (&r);
7229   exponent = REAL_EXP (&r);
7230
7231   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7232      highest (sign) bit, with a fixed binary point at bit point_pos.
7233      m1 holds the low part of the mantissa, m2 the high part.
7234      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7235      bits for the mantissa, this can fail (low bits will be lost).  */
7236   real_ldexp (&m, &r, point_pos - exponent);
7237   REAL_VALUE_TO_INT (&m1, &m2, m);
7238
7239   /* If the low part of the mantissa has bits set we cannot represent
7240      the value.  */
7241   if (m1 != 0)
7242     return false;
7243   /* We have rejected the lower HOST_WIDE_INT, so update our
7244      understanding of how many bits lie in the mantissa and
7245      look only at the high HOST_WIDE_INT.  */
7246   mantissa = m2;
7247   point_pos -= HOST_BITS_PER_WIDE_INT;
7248
7249   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7250   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7251   if ((mantissa & mask) != 0)
7252     return false;
7253
7254   /* Having filtered unrepresentable values, we may now remove all
7255      but the highest 5 bits.  */
7256   mantissa >>= point_pos - 5;
7257
7258   /* We cannot represent the value 0.0, so reject it.  This is handled
7259      elsewhere.  */
7260   if (mantissa == 0)
7261     return false;
7262
7263   /* Then, as bit 4 is always set, we can mask it off, leaving
7264      the mantissa in the range [0, 15].  */
7265   mantissa &= ~(1 << 4);
7266   gcc_assert (mantissa <= 15);
7267
7268   /* GCC internally does not use IEEE754-like encoding (where normalized
7269      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7270      Our mantissa values are shifted 4 places to the left relative to
7271      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7272      by 5 places to correct for GCC's representation.  */
7273   exponent = 5 - exponent;
7274
7275   return (exponent >= 0 && exponent <= 7);
7276 }
7277
7278 char*
7279 aarch64_output_simd_mov_immediate (rtx const_vector,
7280                                    enum machine_mode mode,
7281                                    unsigned width)
7282 {
7283   bool is_valid;
7284   static char templ[40];
7285   const char *mnemonic;
7286   unsigned int lane_count = 0;
7287   char element_char;
7288
7289   struct simd_immediate_info info;
7290
7291   /* This will return true to show const_vector is legal for use as either
7292      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7293      also update INFO to show how the immediate should be generated.  */
7294   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7295   gcc_assert (is_valid);
7296
7297   element_char = sizetochar (info.element_width);
7298   lane_count = width / info.element_width;
7299
7300   mode = GET_MODE_INNER (mode);
7301   if (mode == SFmode || mode == DFmode)
7302     {
7303       gcc_assert (info.shift == 0 && ! info.mvn);
7304       if (aarch64_float_const_zero_rtx_p (info.value))
7305         info.value = GEN_INT (0);
7306       else
7307         {
7308 #define buf_size 20
7309           REAL_VALUE_TYPE r;
7310           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7311           char float_buf[buf_size] = {'\0'};
7312           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7313 #undef buf_size
7314
7315           if (lane_count == 1)
7316             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7317           else
7318             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7319                       lane_count, element_char, float_buf);
7320           return templ;
7321         }
7322     }
7323
7324   mnemonic = info.mvn ? "mvni" : "movi";
7325
7326   if (lane_count == 1)
7327     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7328               mnemonic, UINTVAL (info.value));
7329   else if (info.shift)
7330     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7331               ", lsl %d", mnemonic, lane_count, element_char,
7332               UINTVAL (info.value), info.shift);
7333   else
7334     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7335               mnemonic, lane_count, element_char, UINTVAL (info.value));
7336   return templ;
7337 }
7338
7339 char*
7340 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7341                                           enum machine_mode mode)
7342 {
7343   enum machine_mode vmode;
7344
7345   gcc_assert (!VECTOR_MODE_P (mode));
7346   vmode = aarch64_simd_container_mode (mode, 64);
7347   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7348   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7349 }
7350
7351 /* Split operands into moves from op[1] + op[2] into op[0].  */
7352
7353 void
7354 aarch64_split_combinev16qi (rtx operands[3])
7355 {
7356   unsigned int dest = REGNO (operands[0]);
7357   unsigned int src1 = REGNO (operands[1]);
7358   unsigned int src2 = REGNO (operands[2]);
7359   enum machine_mode halfmode = GET_MODE (operands[1]);
7360   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7361   rtx destlo, desthi;
7362
7363   gcc_assert (halfmode == V16QImode);
7364
7365   if (src1 == dest && src2 == dest + halfregs)
7366     {
7367       /* No-op move.  Can't split to nothing; emit something.  */
7368       emit_note (NOTE_INSN_DELETED);
7369       return;
7370     }
7371
7372   /* Preserve register attributes for variable tracking.  */
7373   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7374   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7375                                GET_MODE_SIZE (halfmode));
7376
7377   /* Special case of reversed high/low parts.  */
7378   if (reg_overlap_mentioned_p (operands[2], destlo)
7379       && reg_overlap_mentioned_p (operands[1], desthi))
7380     {
7381       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7382       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7383       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7384     }
7385   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7386     {
7387       /* Try to avoid unnecessary moves if part of the result
7388          is in the right place already.  */
7389       if (src1 != dest)
7390         emit_move_insn (destlo, operands[1]);
7391       if (src2 != dest + halfregs)
7392         emit_move_insn (desthi, operands[2]);
7393     }
7394   else
7395     {
7396       if (src2 != dest + halfregs)
7397         emit_move_insn (desthi, operands[2]);
7398       if (src1 != dest)
7399         emit_move_insn (destlo, operands[1]);
7400     }
7401 }
7402
7403 /* vec_perm support.  */
7404
7405 #define MAX_VECT_LEN 16
7406
7407 struct expand_vec_perm_d
7408 {
7409   rtx target, op0, op1;
7410   unsigned char perm[MAX_VECT_LEN];
7411   enum machine_mode vmode;
7412   unsigned char nelt;
7413   bool one_vector_p;
7414   bool testing_p;
7415 };
7416
7417 /* Generate a variable permutation.  */
7418
7419 static void
7420 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7421 {
7422   enum machine_mode vmode = GET_MODE (target);
7423   bool one_vector_p = rtx_equal_p (op0, op1);
7424
7425   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7426   gcc_checking_assert (GET_MODE (op0) == vmode);
7427   gcc_checking_assert (GET_MODE (op1) == vmode);
7428   gcc_checking_assert (GET_MODE (sel) == vmode);
7429   gcc_checking_assert (TARGET_SIMD);
7430
7431   if (one_vector_p)
7432     {
7433       if (vmode == V8QImode)
7434         {
7435           /* Expand the argument to a V16QI mode by duplicating it.  */
7436           rtx pair = gen_reg_rtx (V16QImode);
7437           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7438           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7439         }
7440       else
7441         {
7442           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7443         }
7444     }
7445   else
7446     {
7447       rtx pair;
7448
7449       if (vmode == V8QImode)
7450         {
7451           pair = gen_reg_rtx (V16QImode);
7452           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7453           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7454         }
7455       else
7456         {
7457           pair = gen_reg_rtx (OImode);
7458           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7459           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7460         }
7461     }
7462 }
7463
7464 void
7465 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7466 {
7467   enum machine_mode vmode = GET_MODE (target);
7468   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7469   bool one_vector_p = rtx_equal_p (op0, op1);
7470   rtx rmask[MAX_VECT_LEN], mask;
7471
7472   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7473
7474   /* The TBL instruction does not use a modulo index, so we must take care
7475      of that ourselves.  */
7476   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7477   for (i = 0; i < nelt; ++i)
7478     rmask[i] = mask;
7479   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7480   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7481
7482   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7483 }
7484
7485 /* Recognize patterns suitable for the TRN instructions.  */
7486 static bool
7487 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7488 {
7489   unsigned int i, odd, mask, nelt = d->nelt;
7490   rtx out, in0, in1, x;
7491   rtx (*gen) (rtx, rtx, rtx);
7492   enum machine_mode vmode = d->vmode;
7493
7494   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7495     return false;
7496
7497   /* Note that these are little-endian tests.
7498      We correct for big-endian later.  */
7499   if (d->perm[0] == 0)
7500     odd = 0;
7501   else if (d->perm[0] == 1)
7502     odd = 1;
7503   else
7504     return false;
7505   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7506
7507   for (i = 0; i < nelt; i += 2)
7508     {
7509       if (d->perm[i] != i + odd)
7510         return false;
7511       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7512         return false;
7513     }
7514
7515   /* Success!  */
7516   if (d->testing_p)
7517     return true;
7518
7519   in0 = d->op0;
7520   in1 = d->op1;
7521   if (BYTES_BIG_ENDIAN)
7522     {
7523       x = in0, in0 = in1, in1 = x;
7524       odd = !odd;
7525     }
7526   out = d->target;
7527
7528   if (odd)
7529     {
7530       switch (vmode)
7531         {
7532         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7533         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7534         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7535         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7536         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7537         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7538         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7539         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7540         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7541         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7542         default:
7543           return false;
7544         }
7545     }
7546   else
7547     {
7548       switch (vmode)
7549         {
7550         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7551         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7552         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7553         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7554         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7555         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7556         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7557         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7558         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7559         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7560         default:
7561           return false;
7562         }
7563     }
7564
7565   emit_insn (gen (out, in0, in1));
7566   return true;
7567 }
7568
7569 /* Recognize patterns suitable for the UZP instructions.  */
7570 static bool
7571 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7572 {
7573   unsigned int i, odd, mask, nelt = d->nelt;
7574   rtx out, in0, in1, x;
7575   rtx (*gen) (rtx, rtx, rtx);
7576   enum machine_mode vmode = d->vmode;
7577
7578   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7579     return false;
7580
7581   /* Note that these are little-endian tests.
7582      We correct for big-endian later.  */
7583   if (d->perm[0] == 0)
7584     odd = 0;
7585   else if (d->perm[0] == 1)
7586     odd = 1;
7587   else
7588     return false;
7589   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7590
7591   for (i = 0; i < nelt; i++)
7592     {
7593       unsigned elt = (i * 2 + odd) & mask;
7594       if (d->perm[i] != elt)
7595         return false;
7596     }
7597
7598   /* Success!  */
7599   if (d->testing_p)
7600     return true;
7601
7602   in0 = d->op0;
7603   in1 = d->op1;
7604   if (BYTES_BIG_ENDIAN)
7605     {
7606       x = in0, in0 = in1, in1 = x;
7607       odd = !odd;
7608     }
7609   out = d->target;
7610
7611   if (odd)
7612     {
7613       switch (vmode)
7614         {
7615         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7616         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7617         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7618         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7619         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7620         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7621         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7622         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7623         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7624         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7625         default:
7626           return false;
7627         }
7628     }
7629   else
7630     {
7631       switch (vmode)
7632         {
7633         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7634         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7635         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7636         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7637         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7638         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7639         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7640         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7641         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7642         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7643         default:
7644           return false;
7645         }
7646     }
7647
7648   emit_insn (gen (out, in0, in1));
7649   return true;
7650 }
7651
7652 /* Recognize patterns suitable for the ZIP instructions.  */
7653 static bool
7654 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7655 {
7656   unsigned int i, high, mask, nelt = d->nelt;
7657   rtx out, in0, in1, x;
7658   rtx (*gen) (rtx, rtx, rtx);
7659   enum machine_mode vmode = d->vmode;
7660
7661   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7662     return false;
7663
7664   /* Note that these are little-endian tests.
7665      We correct for big-endian later.  */
7666   high = nelt / 2;
7667   if (d->perm[0] == high)
7668     /* Do Nothing.  */
7669     ;
7670   else if (d->perm[0] == 0)
7671     high = 0;
7672   else
7673     return false;
7674   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7675
7676   for (i = 0; i < nelt / 2; i++)
7677     {
7678       unsigned elt = (i + high) & mask;
7679       if (d->perm[i * 2] != elt)
7680         return false;
7681       elt = (elt + nelt) & mask;
7682       if (d->perm[i * 2 + 1] != elt)
7683         return false;
7684     }
7685
7686   /* Success!  */
7687   if (d->testing_p)
7688     return true;
7689
7690   in0 = d->op0;
7691   in1 = d->op1;
7692   if (BYTES_BIG_ENDIAN)
7693     {
7694       x = in0, in0 = in1, in1 = x;
7695       high = !high;
7696     }
7697   out = d->target;
7698
7699   if (high)
7700     {
7701       switch (vmode)
7702         {
7703         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7704         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7705         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7706         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7707         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7708         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7709         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7710         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7711         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7712         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7713         default:
7714           return false;
7715         }
7716     }
7717   else
7718     {
7719       switch (vmode)
7720         {
7721         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7722         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7723         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7724         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7725         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7726         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7727         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7728         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7729         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7730         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7731         default:
7732           return false;
7733         }
7734     }
7735
7736   emit_insn (gen (out, in0, in1));
7737   return true;
7738 }
7739
7740 static bool
7741 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7742 {
7743   rtx rperm[MAX_VECT_LEN], sel;
7744   enum machine_mode vmode = d->vmode;
7745   unsigned int i, nelt = d->nelt;
7746
7747   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7748      numbering of elements for big-endian, we must reverse the order.  */
7749   if (BYTES_BIG_ENDIAN)
7750     return false;
7751
7752   if (d->testing_p)
7753     return true;
7754
7755   /* Generic code will try constant permutation twice.  Once with the
7756      original mode and again with the elements lowered to QImode.
7757      So wait and don't do the selector expansion ourselves.  */
7758   if (vmode != V8QImode && vmode != V16QImode)
7759     return false;
7760
7761   for (i = 0; i < nelt; ++i)
7762     rperm[i] = GEN_INT (d->perm[i]);
7763   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7764   sel = force_reg (vmode, sel);
7765
7766   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7767   return true;
7768 }
7769
7770 static bool
7771 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7772 {
7773   /* The pattern matching functions above are written to look for a small
7774      number to begin the sequence (0, 1, N/2).  If we begin with an index
7775      from the second operand, we can swap the operands.  */
7776   if (d->perm[0] >= d->nelt)
7777     {
7778       unsigned i, nelt = d->nelt;
7779       rtx x;
7780
7781       for (i = 0; i < nelt; ++i)
7782         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7783
7784       x = d->op0;
7785       d->op0 = d->op1;
7786       d->op1 = x;
7787     }
7788
7789   if (TARGET_SIMD)
7790     {
7791       if (aarch64_evpc_zip (d))
7792         return true;
7793       else if (aarch64_evpc_uzp (d))
7794         return true;
7795       else if (aarch64_evpc_trn (d))
7796         return true;
7797       return aarch64_evpc_tbl (d);
7798     }
7799   return false;
7800 }
7801
7802 /* Expand a vec_perm_const pattern.  */
7803
7804 bool
7805 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7806 {
7807   struct expand_vec_perm_d d;
7808   int i, nelt, which;
7809
7810   d.target = target;
7811   d.op0 = op0;
7812   d.op1 = op1;
7813
7814   d.vmode = GET_MODE (target);
7815   gcc_assert (VECTOR_MODE_P (d.vmode));
7816   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7817   d.testing_p = false;
7818
7819   for (i = which = 0; i < nelt; ++i)
7820     {
7821       rtx e = XVECEXP (sel, 0, i);
7822       int ei = INTVAL (e) & (2 * nelt - 1);
7823       which |= (ei < nelt ? 1 : 2);
7824       d.perm[i] = ei;
7825     }
7826
7827   switch (which)
7828     {
7829     default:
7830       gcc_unreachable ();
7831
7832     case 3:
7833       d.one_vector_p = false;
7834       if (!rtx_equal_p (op0, op1))
7835         break;
7836
7837       /* The elements of PERM do not suggest that only the first operand
7838          is used, but both operands are identical.  Allow easier matching
7839          of the permutation by folding the permutation into the single
7840          input vector.  */
7841       /* Fall Through.  */
7842     case 2:
7843       for (i = 0; i < nelt; ++i)
7844         d.perm[i] &= nelt - 1;
7845       d.op0 = op1;
7846       d.one_vector_p = true;
7847       break;
7848
7849     case 1:
7850       d.op1 = op0;
7851       d.one_vector_p = true;
7852       break;
7853     }
7854
7855   return aarch64_expand_vec_perm_const_1 (&d);
7856 }
7857
7858 static bool
7859 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7860                                      const unsigned char *sel)
7861 {
7862   struct expand_vec_perm_d d;
7863   unsigned int i, nelt, which;
7864   bool ret;
7865
7866   d.vmode = vmode;
7867   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7868   d.testing_p = true;
7869   memcpy (d.perm, sel, nelt);
7870
7871   /* Calculate whether all elements are in one vector.  */
7872   for (i = which = 0; i < nelt; ++i)
7873     {
7874       unsigned char e = d.perm[i];
7875       gcc_assert (e < 2 * nelt);
7876       which |= (e < nelt ? 1 : 2);
7877     }
7878
7879   /* If all elements are from the second vector, reindex as if from the
7880      first vector.  */
7881   if (which == 2)
7882     for (i = 0; i < nelt; ++i)
7883       d.perm[i] -= nelt;
7884
7885   /* Check whether the mask can be applied to a single vector.  */
7886   d.one_vector_p = (which != 3);
7887
7888   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7889   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7890   if (!d.one_vector_p)
7891     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7892
7893   start_sequence ();
7894   ret = aarch64_expand_vec_perm_const_1 (&d);
7895   end_sequence ();
7896
7897   return ret;
7898 }
7899
7900 #undef TARGET_ADDRESS_COST
7901 #define TARGET_ADDRESS_COST aarch64_address_cost
7902
7903 /* This hook will determines whether unnamed bitfields affect the alignment
7904    of the containing structure.  The hook returns true if the structure
7905    should inherit the alignment requirements of an unnamed bitfield's
7906    type.  */
7907 #undef TARGET_ALIGN_ANON_BITFIELD
7908 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7909
7910 #undef TARGET_ASM_ALIGNED_DI_OP
7911 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7912
7913 #undef TARGET_ASM_ALIGNED_HI_OP
7914 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7915
7916 #undef TARGET_ASM_ALIGNED_SI_OP
7917 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7918
7919 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7920 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7921   hook_bool_const_tree_hwi_hwi_const_tree_true
7922
7923 #undef TARGET_ASM_FILE_START
7924 #define TARGET_ASM_FILE_START aarch64_start_file
7925
7926 #undef TARGET_ASM_OUTPUT_MI_THUNK
7927 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7928
7929 #undef TARGET_ASM_SELECT_RTX_SECTION
7930 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7931
7932 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7933 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7934
7935 #undef TARGET_BUILD_BUILTIN_VA_LIST
7936 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7937
7938 #undef TARGET_CALLEE_COPIES
7939 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7940
7941 #undef TARGET_CAN_ELIMINATE
7942 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7943
7944 #undef TARGET_CANNOT_FORCE_CONST_MEM
7945 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7946
7947 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7948 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7949
7950 /* Only the least significant bit is used for initialization guard
7951    variables.  */
7952 #undef TARGET_CXX_GUARD_MASK_BIT
7953 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7954
7955 #undef TARGET_C_MODE_FOR_SUFFIX
7956 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7957
7958 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7959 #undef  TARGET_DEFAULT_TARGET_FLAGS
7960 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7961 #endif
7962
7963 #undef TARGET_CLASS_MAX_NREGS
7964 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7965
7966 #undef TARGET_BUILTIN_DECL
7967 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7968
7969 #undef  TARGET_EXPAND_BUILTIN
7970 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7971
7972 #undef TARGET_EXPAND_BUILTIN_VA_START
7973 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7974
7975 #undef TARGET_FOLD_BUILTIN
7976 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7977
7978 #undef TARGET_FUNCTION_ARG
7979 #define TARGET_FUNCTION_ARG aarch64_function_arg
7980
7981 #undef TARGET_FUNCTION_ARG_ADVANCE
7982 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7983
7984 #undef TARGET_FUNCTION_ARG_BOUNDARY
7985 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7986
7987 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7988 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7989
7990 #undef TARGET_FUNCTION_VALUE
7991 #define TARGET_FUNCTION_VALUE aarch64_function_value
7992
7993 #undef TARGET_FUNCTION_VALUE_REGNO_P
7994 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7995
7996 #undef TARGET_FRAME_POINTER_REQUIRED
7997 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7998
7999 #undef TARGET_GIMPLE_FOLD_BUILTIN
8000 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8001
8002 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8003 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8004
8005 #undef  TARGET_INIT_BUILTINS
8006 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8007
8008 #undef TARGET_LEGITIMATE_ADDRESS_P
8009 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8010
8011 #undef TARGET_LEGITIMATE_CONSTANT_P
8012 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8013
8014 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8015 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8016
8017 #undef TARGET_MANGLE_TYPE
8018 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8019
8020 #undef TARGET_MEMORY_MOVE_COST
8021 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8022
8023 #undef TARGET_MUST_PASS_IN_STACK
8024 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8025
8026 /* This target hook should return true if accesses to volatile bitfields
8027    should use the narrowest mode possible.  It should return false if these
8028    accesses should use the bitfield container type.  */
8029 #undef TARGET_NARROW_VOLATILE_BITFIELD
8030 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8031
8032 #undef  TARGET_OPTION_OVERRIDE
8033 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8034
8035 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8036 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8037   aarch64_override_options_after_change
8038
8039 #undef TARGET_PASS_BY_REFERENCE
8040 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8041
8042 #undef TARGET_PREFERRED_RELOAD_CLASS
8043 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8044
8045 #undef TARGET_SECONDARY_RELOAD
8046 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8047
8048 #undef TARGET_SHIFT_TRUNCATION_MASK
8049 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8050
8051 #undef TARGET_SETUP_INCOMING_VARARGS
8052 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8053
8054 #undef TARGET_STRUCT_VALUE_RTX
8055 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8056
8057 #undef TARGET_REGISTER_MOVE_COST
8058 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8059
8060 #undef TARGET_RETURN_IN_MEMORY
8061 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8062
8063 #undef TARGET_RETURN_IN_MSB
8064 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8065
8066 #undef TARGET_RTX_COSTS
8067 #define TARGET_RTX_COSTS aarch64_rtx_costs
8068
8069 #undef TARGET_TRAMPOLINE_INIT
8070 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8071
8072 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8073 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8074
8075 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8076 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8077
8078 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8079 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8080
8081 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8082 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8083
8084 #undef TARGET_VECTORIZE_BUILTINS
8085 #define TARGET_VECTORIZE_BUILTINS
8086
8087 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8088 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8089   aarch64_builtin_vectorized_function
8090
8091 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8092 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8093   aarch64_autovectorize_vector_sizes
8094
8095 /* Section anchor support.  */
8096
8097 #undef TARGET_MIN_ANCHOR_OFFSET
8098 #define TARGET_MIN_ANCHOR_OFFSET -256
8099
8100 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8101    byte offset; we can do much more for larger data types, but have no way
8102    to determine the size of the access.  We assume accesses are aligned.  */
8103 #undef TARGET_MAX_ANCHOR_OFFSET
8104 #define TARGET_MAX_ANCHOR_OFFSET 4095
8105
8106 #undef TARGET_VECTOR_ALIGNMENT
8107 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8108
8109 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8110 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8111   aarch64_simd_vector_alignment_reachable
8112
8113 /* vec_perm support.  */
8114
8115 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8116 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8117   aarch64_vectorize_vec_perm_const_ok
8118
8119
8120 #undef TARGET_FIXED_CONDITION_CODE_REGS
8121 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8122
8123 struct gcc_target targetm = TARGET_INITIALIZER;
8124
8125 #include "gt-aarch64.h"