main/gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66
  67 /* Defined for convenience.  */
  68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  69
  70 /* Classifies an address.
  71
  72    ADDRESS_REG_IMM
  73        A simple base register plus immediate offset.
  74
  75    ADDRESS_REG_WB
  76        A base register indexed by immediate offset with writeback.
  77
  78    ADDRESS_REG_REG
  79        A base register indexed by (optionally scaled) register.
  80
  81    ADDRESS_REG_UXTW
  82        A base register indexed by (optionally scaled) zero-extended register.
  83
  84    ADDRESS_REG_SXTW
  85        A base register indexed by (optionally scaled) sign-extended register.
  86
  87    ADDRESS_LO_SUM
  88        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  89
  90    ADDRESS_SYMBOLIC:
  91        A constant symbolic address, in pc-relative literal pool.  */
  92
  93 enum aarch64_address_type {
  94   ADDRESS_REG_IMM,
  95   ADDRESS_REG_WB,
  96   ADDRESS_REG_REG,
  97   ADDRESS_REG_UXTW,
  98   ADDRESS_REG_SXTW,
  99   ADDRESS_LO_SUM,
 100   ADDRESS_SYMBOLIC
 101 };
 102
 103 struct aarch64_address_info {
 104   enum aarch64_address_type type;
 105   rtx base;
 106   rtx offset;
 107   int shift;
 108   enum aarch64_symbol_type symbol_type;
 109 };
 110
 111 struct simd_immediate_info
 112 {
 113   rtx value;
 114   int shift;
 115   int element_width;
 116   bool mvn;
 117   bool msl;
 118 };
 119
 120 /* The current code model.  */
 121 enum aarch64_code_model aarch64_cmodel;
 122
 123 #ifdef HAVE_AS_TLS
 124 #undef TARGET_HAVE_TLS
 125 #define TARGET_HAVE_TLS 1
 126 #endif
 127
 128 static bool aarch64_lra_p (void);
 129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 131                                                      const_tree,
 132                                                      enum machine_mode *, int *,
 133                                                      bool *);
 134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_override_options_after_change (void);
 137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 138 static unsigned bit_count (unsigned HOST_WIDE_INT);
 139 static bool aarch64_const_vec_all_same_int_p (rtx,
 140                                               HOST_WIDE_INT, HOST_WIDE_INT);
 141
 142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 143                                                  const unsigned char *sel);
 144
 145 /* The processor for which instructions should be scheduled.  */
 146 enum aarch64_processor aarch64_tune = cortexa53;
 147
 148 /* The current tuning set.  */
 149 const struct tune_params *aarch64_tune_params;
 150
 151 /* Mask to specify which instructions we are allowed to generate.  */
 152 unsigned long aarch64_isa_flags = 0;
 153
 154 /* Mask to specify which instruction scheduling options should be used.  */
 155 unsigned long aarch64_tune_flags = 0;
 156
 157 /* Tuning parameters.  */
 158
 159 #if HAVE_DESIGNATED_INITIALIZERS
 160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 161 #else
 162 #define NAMED_PARAM(NAME, VAL) (VAL)
 163 #endif
 164
 165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 166 __extension__
 167 #endif
 168
 169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 170 __extension__
 171 #endif
 172 static const struct cpu_addrcost_table generic_addrcost_table =
 173 {
 174   NAMED_PARAM (pre_modify, 0),
 175   NAMED_PARAM (post_modify, 0),
 176   NAMED_PARAM (register_offset, 0),
 177   NAMED_PARAM (register_extend, 0),
 178   NAMED_PARAM (imm_offset, 0)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct cpu_regmove_cost generic_regmove_cost =
 185 {
 186   NAMED_PARAM (GP2GP, 1),
 187   NAMED_PARAM (GP2FP, 2),
 188   NAMED_PARAM (FP2GP, 2),
 189   /* We currently do not provide direct support for TFmode Q->Q move.
 190      Therefore we need to raise the cost above 2 in order to have
 191      reload handle the situation.  */
 192   NAMED_PARAM (FP2FP, 4)
 193 };
 194
 195 /* Generic costs for vector insn classes.  */
 196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 197 __extension__
 198 #endif
 199 static const struct cpu_vector_cost generic_vector_cost =
 200 {
 201   NAMED_PARAM (scalar_stmt_cost, 1),
 202   NAMED_PARAM (scalar_load_cost, 1),
 203   NAMED_PARAM (scalar_store_cost, 1),
 204   NAMED_PARAM (vec_stmt_cost, 1),
 205   NAMED_PARAM (vec_to_scalar_cost, 1),
 206   NAMED_PARAM (scalar_to_vec_cost, 1),
 207   NAMED_PARAM (vec_align_load_cost, 1),
 208   NAMED_PARAM (vec_unalign_load_cost, 1),
 209   NAMED_PARAM (vec_unalign_store_cost, 1),
 210   NAMED_PARAM (vec_store_cost, 1),
 211   NAMED_PARAM (cond_taken_branch_cost, 3),
 212   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 213 };
 214
 215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 216 __extension__
 217 #endif
 218 static const struct tune_params generic_tunings =
 219 {
 220   &cortexa57_extra_costs,
 221   &generic_addrcost_table,
 222   &generic_regmove_cost,
 223   &generic_vector_cost,
 224   NAMED_PARAM (memmov_cost, 4),
 225   NAMED_PARAM (issue_rate, 2)
 226 };
 227
 228 static const struct tune_params cortexa53_tunings =
 229 {
 230   &cortexa53_extra_costs,
 231   &generic_addrcost_table,
 232   &generic_regmove_cost,
 233   &generic_vector_cost,
 234   NAMED_PARAM (memmov_cost, 4),
 235   NAMED_PARAM (issue_rate, 2)
 236 };
 237
 238 static const struct tune_params cortexa57_tunings =
 239 {
 240   &cortexa57_extra_costs,
 241   &generic_addrcost_table,
 242   &generic_regmove_cost,
 243   &generic_vector_cost,
 244   NAMED_PARAM (memmov_cost, 4),
 245   NAMED_PARAM (issue_rate, 3)
 246 };
 247
 248 /* A processor implementing AArch64.  */
 249 struct processor
 250 {
 251   const char *const name;
 252   enum aarch64_processor core;
 253   const char *arch;
 254   const unsigned long flags;
 255   const struct tune_params *const tune;
 256 };
 257
 258 /* Processor cores implementing AArch64.  */
 259 static const struct processor all_cores[] =
 260 {
 261 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 262   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 263 #include "aarch64-cores.def"
 264 #undef AARCH64_CORE
 265   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 266   {NULL, aarch64_none, NULL, 0, NULL}
 267 };
 268
 269 /* Architectures implementing AArch64.  */
 270 static const struct processor all_architectures[] =
 271 {
 272 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 273   {NAME, CORE, #ARCH, FLAGS, NULL},
 274 #include "aarch64-arches.def"
 275 #undef AARCH64_ARCH
 276   {NULL, aarch64_none, NULL, 0, NULL}
 277 };
 278
 279 /* Target specification.  These are populated as commandline arguments
 280    are processed, or NULL if not specified.  */
 281 static const struct processor *selected_arch;
 282 static const struct processor *selected_cpu;
 283 static const struct processor *selected_tune;
 284
 285 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 286
 287 /* An ISA extension in the co-processor and main instruction set space.  */
 288 struct aarch64_option_extension
 289 {
 290   const char *const name;
 291   const unsigned long flags_on;
 292   const unsigned long flags_off;
 293 };
 294
 295 /* ISA extensions in AArch64.  */
 296 static const struct aarch64_option_extension all_extensions[] =
 297 {
 298 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 299   {NAME, FLAGS_ON, FLAGS_OFF},
 300 #include "aarch64-option-extensions.def"
 301 #undef AARCH64_OPT_EXTENSION
 302   {NULL, 0, 0}
 303 };
 304
 305 /* Used to track the size of an address when generating a pre/post
 306    increment address.  */
 307 static enum machine_mode aarch64_memory_reference_mode;
 308
 309 /* Used to force GTY into this file.  */
 310 static GTY(()) int gty_dummy;
 311
 312 /* A table of valid AArch64 "bitmask immediate" values for
 313    logical instructions.  */
 314
 315 #define AARCH64_NUM_BITMASKS  5334
 316 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 317
 318 typedef enum aarch64_cond_code
 319 {
 320   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 321   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 322   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 323 }
 324 aarch64_cc;
 325
 326 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 327
 328 /* The condition codes of the processor, and the inverse function.  */
 329 static const char * const aarch64_condition_codes[] =
 330 {
 331   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 332   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 333 };
 334
 335 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 336 unsigned
 337 aarch64_dbx_register_number (unsigned regno)
 338 {
 339    if (GP_REGNUM_P (regno))
 340      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 341    else if (regno == SP_REGNUM)
 342      return AARCH64_DWARF_SP;
 343    else if (FP_REGNUM_P (regno))
 344      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 345
 346    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 347       equivalent DWARF register.  */
 348    return DWARF_FRAME_REGISTERS;
 349 }
 350
 351 /* Return TRUE if MODE is any of the large INT modes.  */
 352 static bool
 353 aarch64_vect_struct_mode_p (enum machine_mode mode)
 354 {
 355   return mode == OImode || mode == CImode || mode == XImode;
 356 }
 357
 358 /* Return TRUE if MODE is any of the vector modes.  */
 359 static bool
 360 aarch64_vector_mode_p (enum machine_mode mode)
 361 {
 362   return aarch64_vector_mode_supported_p (mode)
 363          || aarch64_vect_struct_mode_p (mode);
 364 }
 365
 366 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 367 static bool
 368 aarch64_array_mode_supported_p (enum machine_mode mode,
 369                                 unsigned HOST_WIDE_INT nelems)
 370 {
 371   if (TARGET_SIMD
 372       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 373       && (nelems >= 2 && nelems <= 4))
 374     return true;
 375
 376   return false;
 377 }
 378
 379 /* Implement HARD_REGNO_NREGS.  */
 380
 381 int
 382 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 383 {
 384   switch (aarch64_regno_regclass (regno))
 385     {
 386     case FP_REGS:
 387     case FP_LO_REGS:
 388       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 389     default:
 390       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 391     }
 392   gcc_unreachable ();
 393 }
 394
 395 /* Implement HARD_REGNO_MODE_OK.  */
 396
 397 int
 398 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 399 {
 400   if (GET_MODE_CLASS (mode) == MODE_CC)
 401     return regno == CC_REGNUM;
 402
 403   if (regno == SP_REGNUM)
 404     /* The purpose of comparing with ptr_mode is to support the
 405        global register variable associated with the stack pointer
 406        register via the syntax of asm ("wsp") in ILP32.  */
 407     return mode == Pmode || mode == ptr_mode;
 408
 409   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 410     return mode == Pmode;
 411
 412   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 413     return 1;
 414
 415   if (FP_REGNUM_P (regno))
 416     {
 417       if (aarch64_vect_struct_mode_p (mode))
 418         return
 419           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 420       else
 421         return 1;
 422     }
 423
 424   return 0;
 425 }
 426
 427 /* Return true if calls to DECL should be treated as
 428    long-calls (ie called via a register).  */
 429 static bool
 430 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 431 {
 432   return false;
 433 }
 434
 435 /* Return true if calls to symbol-ref SYM should be treated as
 436    long-calls (ie called via a register).  */
 437 bool
 438 aarch64_is_long_call_p (rtx sym)
 439 {
 440   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 441 }
 442
 443 /* Return true if the offsets to a zero/sign-extract operation
 444    represent an expression that matches an extend operation.  The
 445    operands represent the paramters from
 446
 447    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 448 bool
 449 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 450                                 rtx extract_imm)
 451 {
 452   HOST_WIDE_INT mult_val, extract_val;
 453
 454   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 455     return false;
 456
 457   mult_val = INTVAL (mult_imm);
 458   extract_val = INTVAL (extract_imm);
 459
 460   if (extract_val > 8
 461       && extract_val < GET_MODE_BITSIZE (mode)
 462       && exact_log2 (extract_val & ~7) > 0
 463       && (extract_val & 7) <= 4
 464       && mult_val == (1 << (extract_val & 7)))
 465     return true;
 466
 467   return false;
 468 }
 469
 470 /* Emit an insn that's a simple single-set.  Both the operands must be
 471    known to be valid.  */
 472 inline static rtx
 473 emit_set_insn (rtx x, rtx y)
 474 {
 475   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 476 }
 477
 478 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 479    return the rtx for register 0 in the proper mode.  */
 480 rtx
 481 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 482 {
 483   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 484   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 485
 486   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 487   return cc_reg;
 488 }
 489
 490 /* Build the SYMBOL_REF for __tls_get_addr.  */
 491
 492 static GTY(()) rtx tls_get_addr_libfunc;
 493
 494 rtx
 495 aarch64_tls_get_addr (void)
 496 {
 497   if (!tls_get_addr_libfunc)
 498     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 499   return tls_get_addr_libfunc;
 500 }
 501
 502 /* Return the TLS model to use for ADDR.  */
 503
 504 static enum tls_model
 505 tls_symbolic_operand_type (rtx addr)
 506 {
 507   enum tls_model tls_kind = TLS_MODEL_NONE;
 508   rtx sym, addend;
 509
 510   if (GET_CODE (addr) == CONST)
 511     {
 512       split_const (addr, &sym, &addend);
 513       if (GET_CODE (sym) == SYMBOL_REF)
 514         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 515     }
 516   else if (GET_CODE (addr) == SYMBOL_REF)
 517     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 518
 519   return tls_kind;
 520 }
 521
 522 /* We'll allow lo_sum's in addresses in our legitimate addresses
 523    so that combine would take care of combining addresses where
 524    necessary, but for generation purposes, we'll generate the address
 525    as :
 526    RTL                               Absolute
 527    tmp = hi (symbol_ref);            adrp  x1, foo
 528    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 529                                      nop
 530
 531    PIC                               TLS
 532    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 533    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 534                                      bl   __tls_get_addr
 535                                      nop
 536
 537    Load TLS symbol, depending on TLS mechanism and TLS access model.
 538
 539    Global Dynamic - Traditional TLS:
 540    adrp tmp, :tlsgd:imm
 541    add  dest, tmp, #:tlsgd_lo12:imm
 542    bl   __tls_get_addr
 543
 544    Global Dynamic - TLS Descriptors:
 545    adrp dest, :tlsdesc:imm
 546    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 547    add  dest, dest, #:tlsdesc_lo12:imm
 548    blr  tmp
 549    mrs  tp, tpidr_el0
 550    add  dest, dest, tp
 551
 552    Initial Exec:
 553    mrs  tp, tpidr_el0
 554    adrp tmp, :gottprel:imm
 555    ldr  dest, [tmp, #:gottprel_lo12:imm]
 556    add  dest, dest, tp
 557
 558    Local Exec:
 559    mrs  tp, tpidr_el0
 560    add  t0, tp, #:tprel_hi12:imm
 561    add  t0, #:tprel_lo12_nc:imm
 562 */
 563
 564 static void
 565 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 566                                    enum aarch64_symbol_type type)
 567 {
 568   switch (type)
 569     {
 570     case SYMBOL_SMALL_ABSOLUTE:
 571       {
 572         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 573         rtx tmp_reg = dest;
 574         enum machine_mode mode = GET_MODE (dest);
 575
 576         gcc_assert (mode == Pmode || mode == ptr_mode);
 577
 578         if (can_create_pseudo_p ())
 579           tmp_reg = gen_reg_rtx (mode);
 580
 581         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 582         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 583         return;
 584       }
 585
 586     case SYMBOL_TINY_ABSOLUTE:
 587       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 588       return;
 589
 590     case SYMBOL_SMALL_GOT:
 591       {
 592         /* In ILP32, the mode of dest can be either SImode or DImode,
 593            while the got entry is always of SImode size.  The mode of
 594            dest depends on how dest is used: if dest is assigned to a
 595            pointer (e.g. in the memory), it has SImode; it may have
 596            DImode if dest is dereferenced to access the memeory.
 597            This is why we have to handle three different ldr_got_small
 598            patterns here (two patterns for ILP32).  */
 599         rtx tmp_reg = dest;
 600         enum machine_mode mode = GET_MODE (dest);
 601
 602         if (can_create_pseudo_p ())
 603           tmp_reg = gen_reg_rtx (mode);
 604
 605         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 606         if (mode == ptr_mode)
 607           {
 608             if (mode == DImode)
 609               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 610             else
 611               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 612           }
 613         else
 614           {
 615             gcc_assert (mode == Pmode);
 616             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 617           }
 618
 619         return;
 620       }
 621
 622     case SYMBOL_SMALL_TLSGD:
 623       {
 624         rtx insns;
 625         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 626
 627         start_sequence ();
 628         emit_call_insn (gen_tlsgd_small (result, imm));
 629         insns = get_insns ();
 630         end_sequence ();
 631
 632         RTL_CONST_CALL_P (insns) = 1;
 633         emit_libcall_block (insns, dest, result, imm);
 634         return;
 635       }
 636
 637     case SYMBOL_SMALL_TLSDESC:
 638       {
 639         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 640         rtx tp;
 641
 642         emit_insn (gen_tlsdesc_small (imm));
 643         tp = aarch64_load_tp (NULL);
 644         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 645         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 646         return;
 647       }
 648
 649     case SYMBOL_SMALL_GOTTPREL:
 650       {
 651         rtx tmp_reg = gen_reg_rtx (Pmode);
 652         rtx tp = aarch64_load_tp (NULL);
 653         emit_insn (gen_tlsie_small (tmp_reg, imm));
 654         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 655         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 656         return;
 657       }
 658
 659     case SYMBOL_SMALL_TPREL:
 660       {
 661         rtx tp = aarch64_load_tp (NULL);
 662         emit_insn (gen_tlsle_small (dest, tp, imm));
 663         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 664         return;
 665       }
 666
 667     case SYMBOL_TINY_GOT:
 668       emit_insn (gen_ldr_got_tiny (dest, imm));
 669       return;
 670
 671     default:
 672       gcc_unreachable ();
 673     }
 674 }
 675
 676 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 677    handle all moves if !can_create_pseudo_p ().  The distinction is
 678    important because, unlike emit_move_insn, the move expanders know
 679    how to force Pmode objects into the constant pool even when the
 680    constant pool address is not itself legitimate.  */
 681 static rtx
 682 aarch64_emit_move (rtx dest, rtx src)
 683 {
 684   return (can_create_pseudo_p ()
 685           ? emit_move_insn (dest, src)
 686           : emit_move_insn_1 (dest, src));
 687 }
 688
 689 /* Split a 128-bit move operation into two 64-bit move operations,
 690    taking care to handle partial overlap of register to register
 691    copies.  Special cases are needed when moving between GP regs and
 692    FP regs.  SRC can be a register, constant or memory; DST a register
 693    or memory.  If either operand is memory it must not have any side
 694    effects.  */
 695 void
 696 aarch64_split_128bit_move (rtx dst, rtx src)
 697 {
 698   rtx dst_lo, dst_hi;
 699   rtx src_lo, src_hi;
 700
 701   enum machine_mode mode = GET_MODE (dst);
 702
 703   gcc_assert (mode == TImode || mode == TFmode);
 704   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 705   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 706
 707   if (REG_P (dst) && REG_P (src))
 708     {
 709       int src_regno = REGNO (src);
 710       int dst_regno = REGNO (dst);
 711
 712       /* Handle FP <-> GP regs.  */
 713       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 714         {
 715           src_lo = gen_lowpart (word_mode, src);
 716           src_hi = gen_highpart (word_mode, src);
 717
 718           if (mode == TImode)
 719             {
 720               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 721               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 722             }
 723           else
 724             {
 725               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 726               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 727             }
 728           return;
 729         }
 730       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 731         {
 732           dst_lo = gen_lowpart (word_mode, dst);
 733           dst_hi = gen_highpart (word_mode, dst);
 734
 735           if (mode == TImode)
 736             {
 737               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 738               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 739             }
 740           else
 741             {
 742               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 743               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 744             }
 745           return;
 746         }
 747     }
 748
 749   dst_lo = gen_lowpart (word_mode, dst);
 750   dst_hi = gen_highpart (word_mode, dst);
 751   src_lo = gen_lowpart (word_mode, src);
 752   src_hi = gen_highpart_mode (word_mode, mode, src);
 753
 754   /* At most one pairing may overlap.  */
 755   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 756     {
 757       aarch64_emit_move (dst_hi, src_hi);
 758       aarch64_emit_move (dst_lo, src_lo);
 759     }
 760   else
 761     {
 762       aarch64_emit_move (dst_lo, src_lo);
 763       aarch64_emit_move (dst_hi, src_hi);
 764     }
 765 }
 766
 767 bool
 768 aarch64_split_128bit_move_p (rtx dst, rtx src)
 769 {
 770   return (! REG_P (src)
 771           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 772 }
 773
 774 /* Split a complex SIMD combine.  */
 775
 776 void
 777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 778 {
 779   enum machine_mode src_mode = GET_MODE (src1);
 780   enum machine_mode dst_mode = GET_MODE (dst);
 781
 782   gcc_assert (VECTOR_MODE_P (dst_mode));
 783
 784   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 785     {
 786       rtx (*gen) (rtx, rtx, rtx);
 787
 788       switch (src_mode)
 789         {
 790         case V8QImode:
 791           gen = gen_aarch64_simd_combinev8qi;
 792           break;
 793         case V4HImode:
 794           gen = gen_aarch64_simd_combinev4hi;
 795           break;
 796         case V2SImode:
 797           gen = gen_aarch64_simd_combinev2si;
 798           break;
 799         case V2SFmode:
 800           gen = gen_aarch64_simd_combinev2sf;
 801           break;
 802         case DImode:
 803           gen = gen_aarch64_simd_combinedi;
 804           break;
 805         case DFmode:
 806           gen = gen_aarch64_simd_combinedf;
 807           break;
 808         default:
 809           gcc_unreachable ();
 810         }
 811
 812       emit_insn (gen (dst, src1, src2));
 813       return;
 814     }
 815 }
 816
 817 /* Split a complex SIMD move.  */
 818
 819 void
 820 aarch64_split_simd_move (rtx dst, rtx src)
 821 {
 822   enum machine_mode src_mode = GET_MODE (src);
 823   enum machine_mode dst_mode = GET_MODE (dst);
 824
 825   gcc_assert (VECTOR_MODE_P (dst_mode));
 826
 827   if (REG_P (dst) && REG_P (src))
 828     {
 829       rtx (*gen) (rtx, rtx);
 830
 831       gcc_assert (VECTOR_MODE_P (src_mode));
 832
 833       switch (src_mode)
 834         {
 835         case V16QImode:
 836           gen = gen_aarch64_split_simd_movv16qi;
 837           break;
 838         case V8HImode:
 839           gen = gen_aarch64_split_simd_movv8hi;
 840           break;
 841         case V4SImode:
 842           gen = gen_aarch64_split_simd_movv4si;
 843           break;
 844         case V2DImode:
 845           gen = gen_aarch64_split_simd_movv2di;
 846           break;
 847         case V4SFmode:
 848           gen = gen_aarch64_split_simd_movv4sf;
 849           break;
 850         case V2DFmode:
 851           gen = gen_aarch64_split_simd_movv2df;
 852           break;
 853         default:
 854           gcc_unreachable ();
 855         }
 856
 857       emit_insn (gen (dst, src));
 858       return;
 859     }
 860 }
 861
 862 static rtx
 863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 864 {
 865   if (can_create_pseudo_p ())
 866     return force_reg (mode, value);
 867   else
 868     {
 869       x = aarch64_emit_move (x, value);
 870       return x;
 871     }
 872 }
 873
 874
 875 static rtx
 876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 877 {
 878   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 879     {
 880       rtx high;
 881       /* Load the full offset into a register.  This
 882          might be improvable in the future.  */
 883       high = GEN_INT (offset);
 884       offset = 0;
 885       high = aarch64_force_temporary (mode, temp, high);
 886       reg = aarch64_force_temporary (mode, temp,
 887                                      gen_rtx_PLUS (mode, high, reg));
 888     }
 889   return plus_constant (mode, reg, offset);
 890 }
 891
 892 void
 893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 894 {
 895   enum machine_mode mode = GET_MODE (dest);
 896   unsigned HOST_WIDE_INT mask;
 897   int i;
 898   bool first;
 899   unsigned HOST_WIDE_INT val;
 900   bool subtargets;
 901   rtx subtarget;
 902   int one_match, zero_match;
 903
 904   gcc_assert (mode == SImode || mode == DImode);
 905
 906   /* Check on what type of symbol it is.  */
 907   if (GET_CODE (imm) == SYMBOL_REF
 908       || GET_CODE (imm) == LABEL_REF
 909       || GET_CODE (imm) == CONST)
 910     {
 911       rtx mem, base, offset;
 912       enum aarch64_symbol_type sty;
 913
 914       /* If we have (const (plus symbol offset)), separate out the offset
 915          before we start classifying the symbol.  */
 916       split_const (imm, &base, &offset);
 917
 918       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 919       switch (sty)
 920         {
 921         case SYMBOL_FORCE_TO_MEM:
 922           if (offset != const0_rtx
 923               && targetm.cannot_force_const_mem (mode, imm))
 924             {
 925               gcc_assert (can_create_pseudo_p ());
 926               base = aarch64_force_temporary (mode, dest, base);
 927               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 928               aarch64_emit_move (dest, base);
 929               return;
 930             }
 931           mem = force_const_mem (ptr_mode, imm);
 932           gcc_assert (mem);
 933           if (mode != ptr_mode)
 934             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 935           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 936           return;
 937
 938         case SYMBOL_SMALL_TLSGD:
 939         case SYMBOL_SMALL_TLSDESC:
 940         case SYMBOL_SMALL_GOTTPREL:
 941         case SYMBOL_SMALL_GOT:
 942         case SYMBOL_TINY_GOT:
 943           if (offset != const0_rtx)
 944             {
 945               gcc_assert(can_create_pseudo_p ());
 946               base = aarch64_force_temporary (mode, dest, base);
 947               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 948               aarch64_emit_move (dest, base);
 949               return;
 950             }
 951           /* FALLTHRU */
 952
 953         case SYMBOL_SMALL_TPREL:
 954         case SYMBOL_SMALL_ABSOLUTE:
 955         case SYMBOL_TINY_ABSOLUTE:
 956           aarch64_load_symref_appropriately (dest, imm, sty);
 957           return;
 958
 959         default:
 960           gcc_unreachable ();
 961         }
 962     }
 963
 964   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 965     {
 966       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 967       return;
 968     }
 969
 970   if (!CONST_INT_P (imm))
 971     {
 972       if (GET_CODE (imm) == HIGH)
 973         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 974       else
 975         {
 976           rtx mem = force_const_mem (mode, imm);
 977           gcc_assert (mem);
 978           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 979         }
 980
 981       return;
 982     }
 983
 984   if (mode == SImode)
 985     {
 986       /* We know we can't do this in 1 insn, and we must be able to do it
 987          in two; so don't mess around looking for sequences that don't buy
 988          us anything.  */
 989       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 990       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 991                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 992       return;
 993     }
 994
 995   /* Remaining cases are all for DImode.  */
 996
 997   val = INTVAL (imm);
 998   subtargets = optimize && can_create_pseudo_p ();
 999
1000   one_match = 0;
1001   zero_match = 0;
1002   mask = 0xffff;
1003
1004   for (i = 0; i < 64; i += 16, mask <<= 16)
1005     {
1006       if ((val & mask) == 0)
1007         zero_match++;
1008       else if ((val & mask) == mask)
1009         one_match++;
1010     }
1011
1012   if (one_match == 2)
1013     {
1014       mask = 0xffff;
1015       for (i = 0; i < 64; i += 16, mask <<= 16)
1016         {
1017           if ((val & mask) != mask)
1018             {
1019               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021                                          GEN_INT ((val >> i) & 0xffff)));
1022               return;
1023             }
1024         }
1025       gcc_unreachable ();
1026     }
1027
1028   if (zero_match == 2)
1029     goto simple_sequence;
1030
1031   mask = 0x0ffff0000UL;
1032   for (i = 16; i < 64; i += 16, mask <<= 16)
1033     {
1034       HOST_WIDE_INT comp = mask & ~(mask - 1);
1035
1036       if (aarch64_uimm12_shift (val - (val & mask)))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041           emit_insn (gen_adddi3 (dest, subtarget,
1042                                  GEN_INT (val - (val & mask))));
1043           return;
1044         }
1045       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1046         {
1047           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050                                   GEN_INT ((val + comp) & mask)));
1051           emit_insn (gen_adddi3 (dest, subtarget,
1052                                  GEN_INT (val - ((val + comp) & mask))));
1053           return;
1054         }
1055       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1056         {
1057           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060                                   GEN_INT ((val - comp) | ~mask)));
1061           emit_insn (gen_adddi3 (dest, subtarget,
1062                                  GEN_INT (val - ((val - comp) | ~mask))));
1063           return;
1064         }
1065       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1066         {
1067           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1068
1069           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070                                   GEN_INT (val | ~mask)));
1071           emit_insn (gen_adddi3 (dest, subtarget,
1072                                  GEN_INT (val - (val | ~mask))));
1073           return;
1074         }
1075     }
1076
1077   /* See if we can do it by arithmetically combining two
1078      immediates.  */
1079   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1080     {
1081       int j;
1082       mask = 0xffff;
1083
1084       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1086         {
1087           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089                                   GEN_INT (aarch64_bitmasks[i])));
1090           emit_insn (gen_adddi3 (dest, subtarget,
1091                                  GEN_INT (val - aarch64_bitmasks[i])));
1092           return;
1093         }
1094
1095       for (j = 0; j < 64; j += 16, mask <<= 16)
1096         {
1097           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1098             {
1099               emit_insn (gen_rtx_SET (VOIDmode, dest,
1100                                       GEN_INT (aarch64_bitmasks[i])));
1101               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102                                          GEN_INT ((val >> j) & 0xffff)));
1103               return;
1104             }
1105         }
1106     }
1107
1108   /* See if we can do it by logically combining two immediates.  */
1109   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1110     {
1111       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1112         {
1113           int j;
1114
1115           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1117               {
1118                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120                                         GEN_INT (aarch64_bitmasks[i])));
1121                 emit_insn (gen_iordi3 (dest, subtarget,
1122                                        GEN_INT (aarch64_bitmasks[j])));
1123                 return;
1124               }
1125         }
1126       else if ((val & aarch64_bitmasks[i]) == val)
1127         {
1128           int j;
1129
1130           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1132               {
1133
1134                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136                                         GEN_INT (aarch64_bitmasks[j])));
1137                 emit_insn (gen_anddi3 (dest, subtarget,
1138                                        GEN_INT (aarch64_bitmasks[i])));
1139                 return;
1140               }
1141         }
1142     }
1143
1144  simple_sequence:
1145   first = true;
1146   mask = 0xffff;
1147   for (i = 0; i < 64; i += 16, mask <<= 16)
1148     {
1149       if ((val & mask) != 0)
1150         {
1151           if (first)
1152             {
1153               emit_insn (gen_rtx_SET (VOIDmode, dest,
1154                                       GEN_INT (val & mask)));
1155               first = false;
1156             }
1157           else
1158             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159                                        GEN_INT ((val >> i) & 0xffff)));
1160         }
1161     }
1162 }
1163
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1166 {
1167   /* Indirect calls are not currently supported.  */
1168   if (decl == NULL)
1169     return false;
1170
1171   /* Cannot tail-call to long-calls, since these are outside of the
1172      range of a branch instruction (we could handle this if we added
1173      support for indirect tail-calls.  */
1174   if (aarch64_decl_is_long_call_p (decl))
1175     return false;
1176
1177   return true;
1178 }
1179
1180 /* Implement TARGET_PASS_BY_REFERENCE.  */
1181
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184                            enum machine_mode mode,
1185                            const_tree type,
1186                            bool named ATTRIBUTE_UNUSED)
1187 {
1188   HOST_WIDE_INT size;
1189   enum machine_mode dummymode;
1190   int nregs;
1191
1192   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1193   size = (mode == BLKmode && type)
1194     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1195
1196   /* Aggregates are passed by reference based on their size.  */
1197   if (type && AGGREGATE_TYPE_P (type))
1198     {
1199       size = int_size_in_bytes (type);
1200     }
1201
1202   /* Variable sized arguments are always returned by reference.  */
1203   if (size < 0)
1204     return true;
1205
1206   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1207   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1208                                                &dummymode, &nregs,
1209                                                NULL))
1210     return false;
1211
1212   /* Arguments which are variable sized or larger than 2 registers are
1213      passed by reference unless they are a homogenous floating point
1214      aggregate.  */
1215   return size > 2 * UNITS_PER_WORD;
1216 }
1217
1218 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1219 static bool
1220 aarch64_return_in_msb (const_tree valtype)
1221 {
1222   enum machine_mode dummy_mode;
1223   int dummy_int;
1224
1225   /* Never happens in little-endian mode.  */
1226   if (!BYTES_BIG_ENDIAN)
1227     return false;
1228
1229   /* Only composite types smaller than or equal to 16 bytes can
1230      be potentially returned in registers.  */
1231   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1232       || int_size_in_bytes (valtype) <= 0
1233       || int_size_in_bytes (valtype) > 16)
1234     return false;
1235
1236   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1237      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1238      is always passed/returned in the least significant bits of fp/simd
1239      register(s).  */
1240   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1241                                                &dummy_mode, &dummy_int, NULL))
1242     return false;
1243
1244   return true;
1245 }
1246
1247 /* Implement TARGET_FUNCTION_VALUE.
1248    Define how to find the value returned by a function.  */
1249
1250 static rtx
1251 aarch64_function_value (const_tree type, const_tree func,
1252                         bool outgoing ATTRIBUTE_UNUSED)
1253 {
1254   enum machine_mode mode;
1255   int unsignedp;
1256   int count;
1257   enum machine_mode ag_mode;
1258
1259   mode = TYPE_MODE (type);
1260   if (INTEGRAL_TYPE_P (type))
1261     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1262
1263   if (aarch64_return_in_msb (type))
1264     {
1265       HOST_WIDE_INT size = int_size_in_bytes (type);
1266
1267       if (size % UNITS_PER_WORD != 0)
1268         {
1269           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1270           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1271         }
1272     }
1273
1274   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1275                                                &ag_mode, &count, NULL))
1276     {
1277       if (!aarch64_composite_type_p (type, mode))
1278         {
1279           gcc_assert (count == 1 && mode == ag_mode);
1280           return gen_rtx_REG (mode, V0_REGNUM);
1281         }
1282       else
1283         {
1284           int i;
1285           rtx par;
1286
1287           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1288           for (i = 0; i < count; i++)
1289             {
1290               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1291               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1292                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1293               XVECEXP (par, 0, i) = tmp;
1294             }
1295           return par;
1296         }
1297     }
1298   else
1299     return gen_rtx_REG (mode, R0_REGNUM);
1300 }
1301
1302 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1303    Return true if REGNO is the number of a hard register in which the values
1304    of called function may come back.  */
1305
1306 static bool
1307 aarch64_function_value_regno_p (const unsigned int regno)
1308 {
1309   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1310      of 16-byte return values are: 128-bit integers and 16-byte small
1311      structures (excluding homogeneous floating-point aggregates).  */
1312   if (regno == R0_REGNUM || regno == R1_REGNUM)
1313     return true;
1314
1315   /* Up to four fp/simd registers can return a function value, e.g. a
1316      homogeneous floating-point aggregate having four members.  */
1317   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1318     return !TARGET_GENERAL_REGS_ONLY;
1319
1320   return false;
1321 }
1322
1323 /* Implement TARGET_RETURN_IN_MEMORY.
1324
1325    If the type T of the result of a function is such that
1326      void func (T arg)
1327    would require that arg be passed as a value in a register (or set of
1328    registers) according to the parameter passing rules, then the result
1329    is returned in the same registers as would be used for such an
1330    argument.  */
1331
1332 static bool
1333 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1334 {
1335   HOST_WIDE_INT size;
1336   enum machine_mode ag_mode;
1337   int count;
1338
1339   if (!AGGREGATE_TYPE_P (type)
1340       && TREE_CODE (type) != COMPLEX_TYPE
1341       && TREE_CODE (type) != VECTOR_TYPE)
1342     /* Simple scalar types always returned in registers.  */
1343     return false;
1344
1345   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1346                                                type,
1347                                                &ag_mode,
1348                                                &count,
1349                                                NULL))
1350     return false;
1351
1352   /* Types larger than 2 registers returned in memory.  */
1353   size = int_size_in_bytes (type);
1354   return (size < 0 || size > 2 * UNITS_PER_WORD);
1355 }
1356
1357 static bool
1358 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1359                                const_tree type, int *nregs)
1360 {
1361   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1362   return aarch64_vfp_is_call_or_return_candidate (mode,
1363                                                   type,
1364                                                   &pcum->aapcs_vfp_rmode,
1365                                                   nregs,
1366                                                   NULL);
1367 }
1368
1369 /* Given MODE and TYPE of a function argument, return the alignment in
1370    bits.  The idea is to suppress any stronger alignment requested by
1371    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1372    This is a helper function for local use only.  */
1373
1374 static unsigned int
1375 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1376 {
1377   unsigned int alignment;
1378
1379   if (type)
1380     {
1381       if (!integer_zerop (TYPE_SIZE (type)))
1382         {
1383           if (TYPE_MODE (type) == mode)
1384             alignment = TYPE_ALIGN (type);
1385           else
1386             alignment = GET_MODE_ALIGNMENT (mode);
1387         }
1388       else
1389         alignment = 0;
1390     }
1391   else
1392     alignment = GET_MODE_ALIGNMENT (mode);
1393
1394   return alignment;
1395 }
1396
1397 /* Layout a function argument according to the AAPCS64 rules.  The rule
1398    numbers refer to the rule numbers in the AAPCS64.  */
1399
1400 static void
1401 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1402                     const_tree type,
1403                     bool named ATTRIBUTE_UNUSED)
1404 {
1405   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1406   int ncrn, nvrn, nregs;
1407   bool allocate_ncrn, allocate_nvrn;
1408
1409   /* We need to do this once per argument.  */
1410   if (pcum->aapcs_arg_processed)
1411     return;
1412
1413   pcum->aapcs_arg_processed = true;
1414
1415   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1416   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1417                                                  mode,
1418                                                  type,
1419                                                  &nregs);
1420
1421   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1422      The following code thus handles passing by SIMD/FP registers first.  */
1423
1424   nvrn = pcum->aapcs_nvrn;
1425
1426   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1427      and homogenous short-vector aggregates (HVA).  */
1428   if (allocate_nvrn)
1429     {
1430       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1431         {
1432           pcum->aapcs_nextnvrn = nvrn + nregs;
1433           if (!aarch64_composite_type_p (type, mode))
1434             {
1435               gcc_assert (nregs == 1);
1436               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1437             }
1438           else
1439             {
1440               rtx par;
1441               int i;
1442               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1443               for (i = 0; i < nregs; i++)
1444                 {
1445                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1446                                          V0_REGNUM + nvrn + i);
1447                   tmp = gen_rtx_EXPR_LIST
1448                     (VOIDmode, tmp,
1449                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1450                   XVECEXP (par, 0, i) = tmp;
1451                 }
1452               pcum->aapcs_reg = par;
1453             }
1454           return;
1455         }
1456       else
1457         {
1458           /* C.3 NSRN is set to 8.  */
1459           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1460           goto on_stack;
1461         }
1462     }
1463
1464   ncrn = pcum->aapcs_ncrn;
1465   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1466            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1467
1468
1469   /* C6 - C9.  though the sign and zero extension semantics are
1470      handled elsewhere.  This is the case where the argument fits
1471      entirely general registers.  */
1472   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1473     {
1474       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1475
1476       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1477
1478       /* C.8 if the argument has an alignment of 16 then the NGRN is
1479          rounded up to the next even number.  */
1480       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1481         {
1482           ++ncrn;
1483           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1484         }
1485       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1486          A reg is still generated for it, but the caller should be smart
1487          enough not to use it.  */
1488       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1489         {
1490           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1491         }
1492       else
1493         {
1494           rtx par;
1495           int i;
1496
1497           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1498           for (i = 0; i < nregs; i++)
1499             {
1500               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1501               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1502                                        GEN_INT (i * UNITS_PER_WORD));
1503               XVECEXP (par, 0, i) = tmp;
1504             }
1505           pcum->aapcs_reg = par;
1506         }
1507
1508       pcum->aapcs_nextncrn = ncrn + nregs;
1509       return;
1510     }
1511
1512   /* C.11  */
1513   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1514
1515   /* The argument is passed on stack; record the needed number of words for
1516      this argument (we can re-use NREGS) and align the total size if
1517      necessary.  */
1518 on_stack:
1519   pcum->aapcs_stack_words = nregs;
1520   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1521     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1522                                                16 / UNITS_PER_WORD) + 1;
1523   return;
1524 }
1525
1526 /* Implement TARGET_FUNCTION_ARG.  */
1527
1528 static rtx
1529 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1530                       const_tree type, bool named)
1531 {
1532   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1533   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1534
1535   if (mode == VOIDmode)
1536     return NULL_RTX;
1537
1538   aarch64_layout_arg (pcum_v, mode, type, named);
1539   return pcum->aapcs_reg;
1540 }
1541
1542 void
1543 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1544                            const_tree fntype ATTRIBUTE_UNUSED,
1545                            rtx libname ATTRIBUTE_UNUSED,
1546                            const_tree fndecl ATTRIBUTE_UNUSED,
1547                            unsigned n_named ATTRIBUTE_UNUSED)
1548 {
1549   pcum->aapcs_ncrn = 0;
1550   pcum->aapcs_nvrn = 0;
1551   pcum->aapcs_nextncrn = 0;
1552   pcum->aapcs_nextnvrn = 0;
1553   pcum->pcs_variant = ARM_PCS_AAPCS64;
1554   pcum->aapcs_reg = NULL_RTX;
1555   pcum->aapcs_arg_processed = false;
1556   pcum->aapcs_stack_words = 0;
1557   pcum->aapcs_stack_size = 0;
1558
1559   return;
1560 }
1561
1562 static void
1563 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1564                               enum machine_mode mode,
1565                               const_tree type,
1566                               bool named)
1567 {
1568   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1569   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1570     {
1571       aarch64_layout_arg (pcum_v, mode, type, named);
1572       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1573                   != (pcum->aapcs_stack_words != 0));
1574       pcum->aapcs_arg_processed = false;
1575       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1576       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1577       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1578       pcum->aapcs_stack_words = 0;
1579       pcum->aapcs_reg = NULL_RTX;
1580     }
1581 }
1582
1583 bool
1584 aarch64_function_arg_regno_p (unsigned regno)
1585 {
1586   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1587           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1588 }
1589
1590 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1591    PARM_BOUNDARY bits of alignment, but will be given anything up
1592    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1593    that both before and after the layout of each argument, the Next
1594    Stacked Argument Address (NSAA) will have a minimum alignment of
1595    8 bytes.  */
1596
1597 static unsigned int
1598 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1599 {
1600   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1601
1602   if (alignment < PARM_BOUNDARY)
1603     alignment = PARM_BOUNDARY;
1604   if (alignment > STACK_BOUNDARY)
1605     alignment = STACK_BOUNDARY;
1606   return alignment;
1607 }
1608
1609 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1610
1611    Return true if an argument passed on the stack should be padded upwards,
1612    i.e. if the least-significant byte of the stack slot has useful data.
1613
1614    Small aggregate types are placed in the lowest memory address.
1615
1616    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1617
1618 bool
1619 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1620 {
1621   /* On little-endian targets, the least significant byte of every stack
1622      argument is passed at the lowest byte address of the stack slot.  */
1623   if (!BYTES_BIG_ENDIAN)
1624     return true;
1625
1626   /* Otherwise, integral, floating-point and pointer types are padded downward:
1627      the least significant byte of a stack argument is passed at the highest
1628      byte address of the stack slot.  */
1629   if (type
1630       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1631          || POINTER_TYPE_P (type))
1632       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1633     return false;
1634
1635   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1636   return true;
1637 }
1638
1639 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1640
1641    It specifies padding for the last (may also be the only)
1642    element of a block move between registers and memory.  If
1643    assuming the block is in the memory, padding upward means that
1644    the last element is padded after its highest significant byte,
1645    while in downward padding, the last element is padded at the
1646    its least significant byte side.
1647
1648    Small aggregates and small complex types are always padded
1649    upwards.
1650
1651    We don't need to worry about homogeneous floating-point or
1652    short-vector aggregates; their move is not affected by the
1653    padding direction determined here.  Regardless of endianness,
1654    each element of such an aggregate is put in the least
1655    significant bits of a fp/simd register.
1656
1657    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1658    register has useful data, and return the opposite if the most
1659    significant byte does.  */
1660
1661 bool
1662 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1663                      bool first ATTRIBUTE_UNUSED)
1664 {
1665
1666   /* Small composite types are always padded upward.  */
1667   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1668     {
1669       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1670                             : GET_MODE_SIZE (mode));
1671       if (size < 2 * UNITS_PER_WORD)
1672         return true;
1673     }
1674
1675   /* Otherwise, use the default padding.  */
1676   return !BYTES_BIG_ENDIAN;
1677 }
1678
1679 static enum machine_mode
1680 aarch64_libgcc_cmp_return_mode (void)
1681 {
1682   return SImode;
1683 }
1684
1685 static bool
1686 aarch64_frame_pointer_required (void)
1687 {
1688   /* If the function contains dynamic stack allocations, we need to
1689      use the frame pointer to access the static parts of the frame.  */
1690   if (cfun->calls_alloca)
1691     return true;
1692
1693   /* In aarch64_override_options_after_change
1694      flag_omit_leaf_frame_pointer turns off the frame pointer by
1695      default.  Turn it back on now if we've not got a leaf
1696      function.  */
1697   if (flag_omit_leaf_frame_pointer
1698       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1699     return true;
1700
1701   return false;
1702 }
1703
1704 /* Mark the registers that need to be saved by the callee and calculate
1705    the size of the callee-saved registers area and frame record (both FP
1706    and LR may be omitted).  */
1707 static void
1708 aarch64_layout_frame (void)
1709 {
1710   HOST_WIDE_INT offset = 0;
1711   int regno;
1712
1713   if (reload_completed && cfun->machine->frame.laid_out)
1714     return;
1715
1716   cfun->machine->frame.fp_lr_offset = 0;
1717
1718   /* First mark all the registers that really need to be saved...  */
1719   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1720     cfun->machine->frame.reg_offset[regno] = -1;
1721
1722   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1723     cfun->machine->frame.reg_offset[regno] = -1;
1724
1725   /* ... that includes the eh data registers (if needed)...  */
1726   if (crtl->calls_eh_return)
1727     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1728       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1729
1730   /* ... and any callee saved register that dataflow says is live.  */
1731   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1732     if (df_regs_ever_live_p (regno)
1733         && !call_used_regs[regno])
1734       cfun->machine->frame.reg_offset[regno] = 0;
1735
1736   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1737     if (df_regs_ever_live_p (regno)
1738         && !call_used_regs[regno])
1739       cfun->machine->frame.reg_offset[regno] = 0;
1740
1741   if (frame_pointer_needed)
1742     {
1743       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1744       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1745       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1746     }
1747
1748   /* Now assign stack slots for them.  */
1749   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1750     if (cfun->machine->frame.reg_offset[regno] != -1)
1751       {
1752         cfun->machine->frame.reg_offset[regno] = offset;
1753         offset += UNITS_PER_WORD;
1754       }
1755
1756   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1757     if (cfun->machine->frame.reg_offset[regno] != -1)
1758       {
1759         cfun->machine->frame.reg_offset[regno] = offset;
1760         offset += UNITS_PER_WORD;
1761       }
1762
1763   if (frame_pointer_needed)
1764     {
1765       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1766       offset += UNITS_PER_WORD;
1767       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1768     }
1769
1770   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1771     {
1772       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1773       offset += UNITS_PER_WORD;
1774       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1775     }
1776
1777   cfun->machine->frame.padding0 =
1778     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1779   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1780
1781   cfun->machine->frame.saved_regs_size = offset;
1782   cfun->machine->frame.laid_out = true;
1783 }
1784
1785 /* Make the last instruction frame-related and note that it performs
1786    the operation described by FRAME_PATTERN.  */
1787
1788 static void
1789 aarch64_set_frame_expr (rtx frame_pattern)
1790 {
1791   rtx insn;
1792
1793   insn = get_last_insn ();
1794   RTX_FRAME_RELATED_P (insn) = 1;
1795   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1796   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1797                                       frame_pattern,
1798                                       REG_NOTES (insn));
1799 }
1800
1801 static bool
1802 aarch64_register_saved_on_entry (int regno)
1803 {
1804   return cfun->machine->frame.reg_offset[regno] != -1;
1805 }
1806
1807
1808 static void
1809 aarch64_save_or_restore_fprs (int start_offset, int increment,
1810                               bool restore, rtx base_rtx)
1811
1812 {
1813   unsigned regno;
1814   unsigned regno2;
1815   rtx insn;
1816   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1817     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1818
1819
1820   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1821     {
1822       if (aarch64_register_saved_on_entry (regno))
1823         {
1824           rtx mem;
1825           mem = gen_mem_ref (DFmode,
1826                              plus_constant (Pmode,
1827                                             base_rtx,
1828                                             start_offset));
1829
1830           for (regno2 = regno + 1;
1831                regno2 <= V31_REGNUM
1832                  && !aarch64_register_saved_on_entry (regno2);
1833                regno2++)
1834             {
1835               /* Empty loop.  */
1836             }
1837           if (regno2 <= V31_REGNUM &&
1838               aarch64_register_saved_on_entry (regno2))
1839             {
1840               rtx mem2;
1841               /* Next highest register to be saved.  */
1842               mem2 = gen_mem_ref (DFmode,
1843                                   plus_constant
1844                                   (Pmode,
1845                                    base_rtx,
1846                                    start_offset + increment));
1847               if (restore == false)
1848                 {
1849                   insn = emit_insn
1850                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1851                                         mem2, gen_rtx_REG (DFmode, regno2)));
1852
1853                 }
1854               else
1855                 {
1856                   insn = emit_insn
1857                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1858                                        gen_rtx_REG (DFmode, regno2), mem2));
1859
1860                   add_reg_note (insn, REG_CFA_RESTORE,
1861                                 gen_rtx_REG (DFmode, regno));
1862                   add_reg_note (insn, REG_CFA_RESTORE,
1863                                 gen_rtx_REG (DFmode, regno2));
1864                 }
1865
1866                   /* The first part of a frame-related parallel insn
1867                      is always assumed to be relevant to the frame
1868                      calculations; subsequent parts, are only
1869                      frame-related if explicitly marked.  */
1870               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1871               regno = regno2;
1872               start_offset += increment * 2;
1873             }
1874           else
1875             {
1876               if (restore == false)
1877                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1878               else
1879                 {
1880                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1881                   add_reg_note (insn, REG_CFA_RESTORE,
1882                                 gen_rtx_REG (DImode, regno));
1883                 }
1884               start_offset += increment;
1885             }
1886           RTX_FRAME_RELATED_P (insn) = 1;
1887         }
1888     }
1889
1890 }
1891
1892
1893 /* offset from the stack pointer of where the saves and
1894    restore's have to happen.  */
1895 static void
1896 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1897                                             bool restore)
1898 {
1899   rtx insn;
1900   rtx base_rtx = stack_pointer_rtx;
1901   HOST_WIDE_INT start_offset = offset;
1902   HOST_WIDE_INT increment = UNITS_PER_WORD;
1903   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1904   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1905   unsigned regno;
1906   unsigned regno2;
1907
1908   for (regno = R0_REGNUM; regno <= limit; regno++)
1909     {
1910       if (aarch64_register_saved_on_entry (regno))
1911         {
1912           rtx mem;
1913           mem = gen_mem_ref (Pmode,
1914                              plus_constant (Pmode,
1915                                             base_rtx,
1916                                             start_offset));
1917
1918           for (regno2 = regno + 1;
1919                regno2 <= limit
1920                  && !aarch64_register_saved_on_entry (regno2);
1921                regno2++)
1922             {
1923               /* Empty loop.  */
1924             }
1925           if (regno2 <= limit &&
1926               aarch64_register_saved_on_entry (regno2))
1927             {
1928               rtx mem2;
1929               /* Next highest register to be saved.  */
1930               mem2 = gen_mem_ref (Pmode,
1931                                   plus_constant
1932                                   (Pmode,
1933                                    base_rtx,
1934                                    start_offset + increment));
1935               if (restore == false)
1936                 {
1937                   insn = emit_insn
1938                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1939                                         mem2, gen_rtx_REG (DImode, regno2)));
1940
1941                 }
1942               else
1943                 {
1944                   insn = emit_insn
1945                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1946                                      gen_rtx_REG (DImode, regno2), mem2));
1947
1948                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1949                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1950                 }
1951
1952                   /* The first part of a frame-related parallel insn
1953                      is always assumed to be relevant to the frame
1954                      calculations; subsequent parts, are only
1955                      frame-related if explicitly marked.  */
1956               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1957                                             1)) = 1;
1958               regno = regno2;
1959               start_offset += increment * 2;
1960             }
1961           else
1962             {
1963               if (restore == false)
1964                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1965               else
1966                 {
1967                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1968                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1969                 }
1970               start_offset += increment;
1971             }
1972           RTX_FRAME_RELATED_P (insn) = 1;
1973         }
1974     }
1975
1976   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1977
1978 }
1979
1980 /* AArch64 stack frames generated by this compiler look like:
1981
1982         +-------------------------------+
1983         |                               |
1984         |  incoming stack arguments     |
1985         |                               |
1986         +-------------------------------+ <-- arg_pointer_rtx
1987         |                               |
1988         |  callee-allocated save area   |
1989         |  for register varargs         |
1990         |                               |
1991         +-------------------------------+ <-- frame_pointer_rtx
1992         |                               |
1993         |  local variables              |
1994         |                               |
1995         +-------------------------------+
1996         |  padding0                     | \
1997         +-------------------------------+  |
1998         |                               |  |
1999         |                               |  |
2000         |  callee-saved registers       |  | frame.saved_regs_size
2001         |                               |  |
2002         +-------------------------------+  |
2003         |  LR'                          |  |
2004         +-------------------------------+  |
2005         |  FP'                          | /
2006       P +-------------------------------+ <-- hard_frame_pointer_rtx
2007         |  dynamic allocation           |
2008         +-------------------------------+
2009         |                               |
2010         |  outgoing stack arguments     |
2011         |                               |
2012         +-------------------------------+ <-- stack_pointer_rtx
2013
2014    Dynamic stack allocations such as alloca insert data at point P.
2015    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2016    hard_frame_pointer_rtx unchanged.  */
2017
2018 /* Generate the prologue instructions for entry into a function.
2019    Establish the stack frame by decreasing the stack pointer with a
2020    properly calculated size and, if necessary, create a frame record
2021    filled with the values of LR and previous frame pointer.  The
2022    current FP is also set up if it is in use.  */
2023
2024 void
2025 aarch64_expand_prologue (void)
2026 {
2027   /* sub sp, sp, #<frame_size>
2028      stp {fp, lr}, [sp, #<frame_size> - 16]
2029      add fp, sp, #<frame_size> - hardfp_offset
2030      stp {cs_reg}, [fp, #-16] etc.
2031
2032      sub sp, sp, <final_adjustment_if_any>
2033   */
2034   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2035   HOST_WIDE_INT frame_size, offset;
2036   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2037   rtx insn;
2038
2039   aarch64_layout_frame ();
2040   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2041   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2042               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2043   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2044                 + crtl->outgoing_args_size);
2045   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2046                                           STACK_BOUNDARY / BITS_PER_UNIT);
2047
2048   if (flag_stack_usage_info)
2049     current_function_static_stack_size = frame_size;
2050
2051   fp_offset = (offset
2052                - original_frame_size
2053                - cfun->machine->frame.saved_regs_size);
2054
2055   /* Store pairs and load pairs have a range only -512 to 504.  */
2056   if (offset >= 512)
2057     {
2058       /* When the frame has a large size, an initial decrease is done on
2059          the stack pointer to jump over the callee-allocated save area for
2060          register varargs, the local variable area and/or the callee-saved
2061          register area.  This will allow the pre-index write-back
2062          store pair instructions to be used for setting up the stack frame
2063          efficiently.  */
2064       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2065       if (offset >= 512)
2066         offset = cfun->machine->frame.saved_regs_size;
2067
2068       frame_size -= (offset + crtl->outgoing_args_size);
2069       fp_offset = 0;
2070
2071       if (frame_size >= 0x1000000)
2072         {
2073           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2074           emit_move_insn (op0, GEN_INT (-frame_size));
2075           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2076           aarch64_set_frame_expr (gen_rtx_SET
2077                                   (Pmode, stack_pointer_rtx,
2078                                    plus_constant (Pmode,
2079                                                   stack_pointer_rtx,
2080                                                   -frame_size)));
2081         }
2082       else if (frame_size > 0)
2083         {
2084           if ((frame_size & 0xfff) != frame_size)
2085             {
2086               insn = emit_insn (gen_add2_insn
2087                                 (stack_pointer_rtx,
2088                                  GEN_INT (-(frame_size
2089                                             & ~(HOST_WIDE_INT)0xfff))));
2090               RTX_FRAME_RELATED_P (insn) = 1;
2091             }
2092           if ((frame_size & 0xfff) != 0)
2093             {
2094               insn = emit_insn (gen_add2_insn
2095                                 (stack_pointer_rtx,
2096                                  GEN_INT (-(frame_size
2097                                             & (HOST_WIDE_INT)0xfff))));
2098               RTX_FRAME_RELATED_P (insn) = 1;
2099             }
2100         }
2101     }
2102   else
2103     frame_size = -1;
2104
2105   if (offset > 0)
2106     {
2107       /* Save the frame pointer and lr if the frame pointer is needed
2108          first.  Make the frame pointer point to the location of the
2109          old frame pointer on the stack.  */
2110       if (frame_pointer_needed)
2111         {
2112           rtx mem_fp, mem_lr;
2113
2114           if (fp_offset)
2115             {
2116               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2117                                                GEN_INT (-offset)));
2118               RTX_FRAME_RELATED_P (insn) = 1;
2119               aarch64_set_frame_expr (gen_rtx_SET
2120                                       (Pmode, stack_pointer_rtx,
2121                                        gen_rtx_MINUS (Pmode,
2122                                                       stack_pointer_rtx,
2123                                                       GEN_INT (offset))));
2124               mem_fp = gen_frame_mem (DImode,
2125                                       plus_constant (Pmode,
2126                                                      stack_pointer_rtx,
2127                                                      fp_offset));
2128               mem_lr = gen_frame_mem (DImode,
2129                                       plus_constant (Pmode,
2130                                                      stack_pointer_rtx,
2131                                                      fp_offset
2132                                                      + UNITS_PER_WORD));
2133               insn = emit_insn (gen_store_pairdi (mem_fp,
2134                                                   hard_frame_pointer_rtx,
2135                                                   mem_lr,
2136                                                   gen_rtx_REG (DImode,
2137                                                                LR_REGNUM)));
2138             }
2139           else
2140             {
2141               insn = emit_insn (gen_storewb_pairdi_di
2142                                 (stack_pointer_rtx, stack_pointer_rtx,
2143                                  hard_frame_pointer_rtx,
2144                                  gen_rtx_REG (DImode, LR_REGNUM),
2145                                  GEN_INT (-offset),
2146                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2147               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2148             }
2149
2150           /* The first part of a frame-related parallel insn is always
2151              assumed to be relevant to the frame calculations;
2152              subsequent parts, are only frame-related if explicitly
2153              marked.  */
2154           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2155           RTX_FRAME_RELATED_P (insn) = 1;
2156
2157           /* Set up frame pointer to point to the location of the
2158              previous frame pointer on the stack.  */
2159           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2160                                            stack_pointer_rtx,
2161                                            GEN_INT (fp_offset)));
2162           aarch64_set_frame_expr (gen_rtx_SET
2163                                   (Pmode, hard_frame_pointer_rtx,
2164                                    plus_constant (Pmode,
2165                                                   stack_pointer_rtx,
2166                                                   fp_offset)));
2167           RTX_FRAME_RELATED_P (insn) = 1;
2168           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2169                                            hard_frame_pointer_rtx));
2170         }
2171       else
2172         {
2173           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2174                                            GEN_INT (-offset)));
2175           RTX_FRAME_RELATED_P (insn) = 1;
2176         }
2177
2178       aarch64_save_or_restore_callee_save_registers
2179         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2180     }
2181
2182   /* when offset >= 512,
2183      sub sp, sp, #<outgoing_args_size> */
2184   if (frame_size > -1)
2185     {
2186       if (crtl->outgoing_args_size > 0)
2187         {
2188           insn = emit_insn (gen_add2_insn
2189                             (stack_pointer_rtx,
2190                              GEN_INT (- crtl->outgoing_args_size)));
2191           RTX_FRAME_RELATED_P (insn) = 1;
2192         }
2193     }
2194 }
2195
2196 /* Generate the epilogue instructions for returning from a function.  */
2197 void
2198 aarch64_expand_epilogue (bool for_sibcall)
2199 {
2200   HOST_WIDE_INT original_frame_size, frame_size, offset;
2201   HOST_WIDE_INT fp_offset;
2202   rtx insn;
2203   rtx cfa_reg;
2204
2205   aarch64_layout_frame ();
2206   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2207   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2208                 + crtl->outgoing_args_size);
2209   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2210                                           STACK_BOUNDARY / BITS_PER_UNIT);
2211
2212   fp_offset = (offset
2213                - original_frame_size
2214                - cfun->machine->frame.saved_regs_size);
2215
2216   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2217
2218   /* Store pairs and load pairs have a range only -512 to 504.  */
2219   if (offset >= 512)
2220     {
2221       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2222       if (offset >= 512)
2223         offset = cfun->machine->frame.saved_regs_size;
2224
2225       frame_size -= (offset + crtl->outgoing_args_size);
2226       fp_offset = 0;
2227       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2228         {
2229           insn = emit_insn (gen_add2_insn
2230                             (stack_pointer_rtx,
2231                              GEN_INT (crtl->outgoing_args_size)));
2232           RTX_FRAME_RELATED_P (insn) = 1;
2233         }
2234     }
2235   else
2236     frame_size = -1;
2237
2238   /* If there were outgoing arguments or we've done dynamic stack
2239      allocation, then restore the stack pointer from the frame
2240      pointer.  This is at most one insn and more efficient than using
2241      GCC's internal mechanism.  */
2242   if (frame_pointer_needed
2243       && (crtl->outgoing_args_size || cfun->calls_alloca))
2244     {
2245       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2246                                        hard_frame_pointer_rtx,
2247                                        GEN_INT (- fp_offset)));
2248       RTX_FRAME_RELATED_P (insn) = 1;
2249       /* As SP is set to (FP - fp_offset), according to the rules in
2250          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2251          from the value of SP from now on.  */
2252       cfa_reg = stack_pointer_rtx;
2253     }
2254
2255   aarch64_save_or_restore_callee_save_registers
2256     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2257
2258   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2259   if (offset > 0)
2260     {
2261       if (frame_pointer_needed)
2262         {
2263           rtx mem_fp, mem_lr;
2264
2265           if (fp_offset)
2266             {
2267               mem_fp = gen_frame_mem (DImode,
2268                                       plus_constant (Pmode,
2269                                                      stack_pointer_rtx,
2270                                                      fp_offset));
2271               mem_lr = gen_frame_mem (DImode,
2272                                       plus_constant (Pmode,
2273                                                      stack_pointer_rtx,
2274                                                      fp_offset
2275                                                      + UNITS_PER_WORD));
2276               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2277                                                  mem_fp,
2278                                                  gen_rtx_REG (DImode,
2279                                                               LR_REGNUM),
2280                                                  mem_lr));
2281             }
2282           else
2283             {
2284               insn = emit_insn (gen_loadwb_pairdi_di
2285                                 (stack_pointer_rtx,
2286                                  stack_pointer_rtx,
2287                                  hard_frame_pointer_rtx,
2288                                  gen_rtx_REG (DImode, LR_REGNUM),
2289                                  GEN_INT (offset),
2290                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2291               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2292               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2293                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2294                                           plus_constant (Pmode, cfa_reg,
2295                                                          offset))));
2296             }
2297
2298           /* The first part of a frame-related parallel insn
2299              is always assumed to be relevant to the frame
2300              calculations; subsequent parts, are only
2301              frame-related if explicitly marked.  */
2302           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2303           RTX_FRAME_RELATED_P (insn) = 1;
2304           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2305           add_reg_note (insn, REG_CFA_RESTORE,
2306                         gen_rtx_REG (DImode, LR_REGNUM));
2307
2308           if (fp_offset)
2309             {
2310               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2311                                                GEN_INT (offset)));
2312               RTX_FRAME_RELATED_P (insn) = 1;
2313             }
2314         }
2315       else
2316         {
2317           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2318                                            GEN_INT (offset)));
2319           RTX_FRAME_RELATED_P (insn) = 1;
2320         }
2321     }
2322
2323   /* Stack adjustment for exception handler.  */
2324   if (crtl->calls_eh_return)
2325     {
2326       /* We need to unwind the stack by the offset computed by
2327          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2328          based on SP.  Ideally we would update the SP and define the
2329          CFA along the lines of:
2330
2331          SP = SP + EH_RETURN_STACKADJ_RTX
2332          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2333
2334          However the dwarf emitter only understands a constant
2335          register offset.
2336
2337          The solution chosen here is to use the otherwise unused IP0
2338          as a temporary register to hold the current SP value.  The
2339          CFA is described using IP0 then SP is modified.  */
2340
2341       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2342
2343       insn = emit_move_insn (ip0, stack_pointer_rtx);
2344       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2345       RTX_FRAME_RELATED_P (insn) = 1;
2346
2347       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2348
2349       /* Ensure the assignment to IP0 does not get optimized away.  */
2350       emit_use (ip0);
2351     }
2352
2353   if (frame_size > -1)
2354     {
2355       if (frame_size >= 0x1000000)
2356         {
2357           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2358           emit_move_insn (op0, GEN_INT (frame_size));
2359           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2360           aarch64_set_frame_expr (gen_rtx_SET
2361                                   (Pmode, stack_pointer_rtx,
2362                                    plus_constant (Pmode,
2363                                                   stack_pointer_rtx,
2364                                                   frame_size)));
2365         }
2366       else if (frame_size > 0)
2367         {
2368           if ((frame_size & 0xfff) != 0)
2369             {
2370               insn = emit_insn (gen_add2_insn
2371                                 (stack_pointer_rtx,
2372                                  GEN_INT ((frame_size
2373                                            & (HOST_WIDE_INT) 0xfff))));
2374               RTX_FRAME_RELATED_P (insn) = 1;
2375             }
2376           if ((frame_size & 0xfff) != frame_size)
2377             {
2378               insn = emit_insn (gen_add2_insn
2379                                 (stack_pointer_rtx,
2380                                  GEN_INT ((frame_size
2381                                            & ~ (HOST_WIDE_INT) 0xfff))));
2382               RTX_FRAME_RELATED_P (insn) = 1;
2383             }
2384         }
2385
2386         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2387                                              plus_constant (Pmode,
2388                                                             stack_pointer_rtx,
2389                                                             offset)));
2390     }
2391
2392   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2393   if (!for_sibcall)
2394     emit_jump_insn (ret_rtx);
2395 }
2396
2397 /* Return the place to copy the exception unwinding return address to.
2398    This will probably be a stack slot, but could (in theory be the
2399    return register).  */
2400 rtx
2401 aarch64_final_eh_return_addr (void)
2402 {
2403   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2404   aarch64_layout_frame ();
2405   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2406   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2407                 + crtl->outgoing_args_size);
2408   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2409                                           STACK_BOUNDARY / BITS_PER_UNIT);
2410   fp_offset = offset
2411     - original_frame_size
2412     - cfun->machine->frame.saved_regs_size;
2413
2414   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2415     return gen_rtx_REG (DImode, LR_REGNUM);
2416
2417   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2418      result in a store to save LR introduced by builtin_eh_return () being
2419      incorrectly deleted because the alias is not detected.
2420      So in the calculation of the address to copy the exception unwinding
2421      return address to, we note 2 cases.
2422      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2423      we return a SP-relative location since all the addresses are SP-relative
2424      in this case.  This prevents the store from being optimized away.
2425      If the fp_offset is not 0, then the addresses will be FP-relative and
2426      therefore we return a FP-relative location.  */
2427
2428   if (frame_pointer_needed)
2429     {
2430       if (fp_offset)
2431         return gen_frame_mem (DImode,
2432                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2433       else
2434         return gen_frame_mem (DImode,
2435                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2436     }
2437
2438   /* If FP is not needed, we calculate the location of LR, which would be
2439      at the top of the saved registers block.  */
2440
2441   return gen_frame_mem (DImode,
2442                         plus_constant (Pmode,
2443                                        stack_pointer_rtx,
2444                                        fp_offset
2445                                        + cfun->machine->frame.saved_regs_size
2446                                        - 2 * UNITS_PER_WORD));
2447 }
2448
2449 /* Output code to build up a constant in a register.  */
2450 static void
2451 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2452 {
2453   if (aarch64_bitmask_imm (val, DImode))
2454     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2455   else
2456     {
2457       int i;
2458       int ncount = 0;
2459       int zcount = 0;
2460       HOST_WIDE_INT valp = val >> 16;
2461       HOST_WIDE_INT valm;
2462       HOST_WIDE_INT tval;
2463
2464       for (i = 16; i < 64; i += 16)
2465         {
2466           valm = (valp & 0xffff);
2467
2468           if (valm != 0)
2469             ++ zcount;
2470
2471           if (valm != 0xffff)
2472             ++ ncount;
2473
2474           valp >>= 16;
2475         }
2476
2477       /* zcount contains the number of additional MOVK instructions
2478          required if the constant is built up with an initial MOVZ instruction,
2479          while ncount is the number of MOVK instructions required if starting
2480          with a MOVN instruction.  Choose the sequence that yields the fewest
2481          number of instructions, preferring MOVZ instructions when they are both
2482          the same.  */
2483       if (ncount < zcount)
2484         {
2485           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2486                           GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2487           tval = 0xffff;
2488         }
2489       else
2490         {
2491           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2492                           GEN_INT (val & 0xffff));
2493           tval = 0;
2494         }
2495
2496       val >>= 16;
2497
2498       for (i = 16; i < 64; i += 16)
2499         {
2500           if ((val & 0xffff) != tval)
2501             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2502                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2503           val >>= 16;
2504         }
2505     }
2506 }
2507
2508 static void
2509 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2510 {
2511   HOST_WIDE_INT mdelta = delta;
2512   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2513   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2514
2515   if (mdelta < 0)
2516     mdelta = -mdelta;
2517
2518   if (mdelta >= 4096 * 4096)
2519     {
2520       aarch64_build_constant (scratchreg, delta);
2521       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2522     }
2523   else if (mdelta > 0)
2524     {
2525       if (mdelta >= 4096)
2526         {
2527           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2528           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2529           if (delta < 0)
2530             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2532           else
2533             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2535         }
2536       if (mdelta % 4096 != 0)
2537         {
2538           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2539           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2540                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2541         }
2542     }
2543 }
2544
2545 /* Output code to add DELTA to the first argument, and then jump
2546    to FUNCTION.  Used for C++ multiple inheritance.  */
2547 static void
2548 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2549                          HOST_WIDE_INT delta,
2550                          HOST_WIDE_INT vcall_offset,
2551                          tree function)
2552 {
2553   /* The this pointer is always in x0.  Note that this differs from
2554      Arm where the this pointer maybe bumped to r1 if r0 is required
2555      to return a pointer to an aggregate.  On AArch64 a result value
2556      pointer will be in x8.  */
2557   int this_regno = R0_REGNUM;
2558   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2559
2560   reload_completed = 1;
2561   emit_note (NOTE_INSN_PROLOGUE_END);
2562
2563   if (vcall_offset == 0)
2564     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2565   else
2566     {
2567       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2568
2569       this_rtx = gen_rtx_REG (Pmode, this_regno);
2570       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2571       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2572
2573       addr = this_rtx;
2574       if (delta != 0)
2575         {
2576           if (delta >= -256 && delta < 256)
2577             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2578                                        plus_constant (Pmode, this_rtx, delta));
2579           else
2580             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2581         }
2582
2583       if (Pmode == ptr_mode)
2584         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2585       else
2586         aarch64_emit_move (temp0,
2587                            gen_rtx_ZERO_EXTEND (Pmode,
2588                                                 gen_rtx_MEM (ptr_mode, addr)));
2589
2590       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2591           addr = plus_constant (Pmode, temp0, vcall_offset);
2592       else
2593         {
2594           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2595           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2596         }
2597
2598       if (Pmode == ptr_mode)
2599         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2600       else
2601         aarch64_emit_move (temp1,
2602                            gen_rtx_SIGN_EXTEND (Pmode,
2603                                                 gen_rtx_MEM (ptr_mode, addr)));
2604
2605       emit_insn (gen_add2_insn (this_rtx, temp1));
2606     }
2607
2608   /* Generate a tail call to the target function.  */
2609   if (!TREE_USED (function))
2610     {
2611       assemble_external (function);
2612       TREE_USED (function) = 1;
2613     }
2614   funexp = XEXP (DECL_RTL (function), 0);
2615   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2616   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2617   SIBLING_CALL_P (insn) = 1;
2618
2619   insn = get_insns ();
2620   shorten_branches (insn);
2621   final_start_function (insn, file, 1);
2622   final (insn, file, 1);
2623   final_end_function ();
2624
2625   /* Stop pretending to be a post-reload pass.  */
2626   reload_completed = 0;
2627 }
2628
2629 static int
2630 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2631 {
2632   if (GET_CODE (*x) == SYMBOL_REF)
2633     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2634
2635   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2636      TLS offsets, not real symbol references.  */
2637   if (GET_CODE (*x) == UNSPEC
2638       && XINT (*x, 1) == UNSPEC_TLS)
2639     return -1;
2640
2641   return 0;
2642 }
2643
2644 static bool
2645 aarch64_tls_referenced_p (rtx x)
2646 {
2647   if (!TARGET_HAVE_TLS)
2648     return false;
2649
2650   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2651 }
2652
2653
2654 static int
2655 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2656 {
2657   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2658   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2659
2660   if (*imm1 < *imm2)
2661     return -1;
2662   if (*imm1 > *imm2)
2663     return +1;
2664   return 0;
2665 }
2666
2667
2668 static void
2669 aarch64_build_bitmask_table (void)
2670 {
2671   unsigned HOST_WIDE_INT mask, imm;
2672   unsigned int log_e, e, s, r;
2673   unsigned int nimms = 0;
2674
2675   for (log_e = 1; log_e <= 6; log_e++)
2676     {
2677       e = 1 << log_e;
2678       if (e == 64)
2679         mask = ~(HOST_WIDE_INT) 0;
2680       else
2681         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2682       for (s = 1; s < e; s++)
2683         {
2684           for (r = 0; r < e; r++)
2685             {
2686               /* set s consecutive bits to 1 (s < 64) */
2687               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2688               /* rotate right by r */
2689               if (r != 0)
2690                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2691               /* replicate the constant depending on SIMD size */
2692               switch (log_e) {
2693               case 1: imm |= (imm <<  2);
2694               case 2: imm |= (imm <<  4);
2695               case 3: imm |= (imm <<  8);
2696               case 4: imm |= (imm << 16);
2697               case 5: imm |= (imm << 32);
2698               case 6:
2699                 break;
2700               default:
2701                 gcc_unreachable ();
2702               }
2703               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2704               aarch64_bitmasks[nimms++] = imm;
2705             }
2706         }
2707     }
2708
2709   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2710   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2711          aarch64_bitmasks_cmp);
2712 }
2713
2714
2715 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2716    a left shift of 0 or 12 bits.  */
2717 bool
2718 aarch64_uimm12_shift (HOST_WIDE_INT val)
2719 {
2720   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2721           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2722           );
2723 }
2724
2725
2726 /* Return true if val is an immediate that can be loaded into a
2727    register by a MOVZ instruction.  */
2728 static bool
2729 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2730 {
2731   if (GET_MODE_SIZE (mode) > 4)
2732     {
2733       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2734           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2735         return 1;
2736     }
2737   else
2738     {
2739       /* Ignore sign extension.  */
2740       val &= (HOST_WIDE_INT) 0xffffffff;
2741     }
2742   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2743           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2744 }
2745
2746
2747 /* Return true if val is a valid bitmask immediate.  */
2748 bool
2749 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2750 {
2751   if (GET_MODE_SIZE (mode) < 8)
2752     {
2753       /* Replicate bit pattern.  */
2754       val &= (HOST_WIDE_INT) 0xffffffff;
2755       val |= val << 32;
2756     }
2757   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2758                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2759 }
2760
2761
2762 /* Return true if val is an immediate that can be loaded into a
2763    register in a single instruction.  */
2764 bool
2765 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2766 {
2767   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2768     return 1;
2769   return aarch64_bitmask_imm (val, mode);
2770 }
2771
2772 static bool
2773 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2774 {
2775   rtx base, offset;
2776
2777   if (GET_CODE (x) == HIGH)
2778     return true;
2779
2780   split_const (x, &base, &offset);
2781   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2782     {
2783       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2784           != SYMBOL_FORCE_TO_MEM)
2785         return true;
2786       else
2787         /* Avoid generating a 64-bit relocation in ILP32; leave
2788            to aarch64_expand_mov_immediate to handle it properly.  */
2789         return mode != ptr_mode;
2790     }
2791
2792   return aarch64_tls_referenced_p (x);
2793 }
2794
2795 /* Return true if register REGNO is a valid index register.
2796    STRICT_P is true if REG_OK_STRICT is in effect.  */
2797
2798 bool
2799 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2800 {
2801   if (!HARD_REGISTER_NUM_P (regno))
2802     {
2803       if (!strict_p)
2804         return true;
2805
2806       if (!reg_renumber)
2807         return false;
2808
2809       regno = reg_renumber[regno];
2810     }
2811   return GP_REGNUM_P (regno);
2812 }
2813
2814 /* Return true if register REGNO is a valid base register for mode MODE.
2815    STRICT_P is true if REG_OK_STRICT is in effect.  */
2816
2817 bool
2818 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2819 {
2820   if (!HARD_REGISTER_NUM_P (regno))
2821     {
2822       if (!strict_p)
2823         return true;
2824
2825       if (!reg_renumber)
2826         return false;
2827
2828       regno = reg_renumber[regno];
2829     }
2830
2831   /* The fake registers will be eliminated to either the stack or
2832      hard frame pointer, both of which are usually valid base registers.
2833      Reload deals with the cases where the eliminated form isn't valid.  */
2834   return (GP_REGNUM_P (regno)
2835           || regno == SP_REGNUM
2836           || regno == FRAME_POINTER_REGNUM
2837           || regno == ARG_POINTER_REGNUM);
2838 }
2839
2840 /* Return true if X is a valid base register for mode MODE.
2841    STRICT_P is true if REG_OK_STRICT is in effect.  */
2842
2843 static bool
2844 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2845 {
2846   if (!strict_p && GET_CODE (x) == SUBREG)
2847     x = SUBREG_REG (x);
2848
2849   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2850 }
2851
2852 /* Return true if address offset is a valid index.  If it is, fill in INFO
2853    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2854
2855 static bool
2856 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2857                         enum machine_mode mode, bool strict_p)
2858 {
2859   enum aarch64_address_type type;
2860   rtx index;
2861   int shift;
2862
2863   /* (reg:P) */
2864   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2865       && GET_MODE (x) == Pmode)
2866     {
2867       type = ADDRESS_REG_REG;
2868       index = x;
2869       shift = 0;
2870     }
2871   /* (sign_extend:DI (reg:SI)) */
2872   else if ((GET_CODE (x) == SIGN_EXTEND
2873             || GET_CODE (x) == ZERO_EXTEND)
2874            && GET_MODE (x) == DImode
2875            && GET_MODE (XEXP (x, 0)) == SImode)
2876     {
2877       type = (GET_CODE (x) == SIGN_EXTEND)
2878         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2879       index = XEXP (x, 0);
2880       shift = 0;
2881     }
2882   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2883   else if (GET_CODE (x) == MULT
2884            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2885                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2886            && GET_MODE (XEXP (x, 0)) == DImode
2887            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2888            && CONST_INT_P (XEXP (x, 1)))
2889     {
2890       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2891         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2892       index = XEXP (XEXP (x, 0), 0);
2893       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2894     }
2895   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2896   else if (GET_CODE (x) == ASHIFT
2897            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2898                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2899            && GET_MODE (XEXP (x, 0)) == DImode
2900            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2901            && CONST_INT_P (XEXP (x, 1)))
2902     {
2903       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2904         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2905       index = XEXP (XEXP (x, 0), 0);
2906       shift = INTVAL (XEXP (x, 1));
2907     }
2908   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2909   else if ((GET_CODE (x) == SIGN_EXTRACT
2910             || GET_CODE (x) == ZERO_EXTRACT)
2911            && GET_MODE (x) == DImode
2912            && GET_CODE (XEXP (x, 0)) == MULT
2913            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2914            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2915     {
2916       type = (GET_CODE (x) == SIGN_EXTRACT)
2917         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2918       index = XEXP (XEXP (x, 0), 0);
2919       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2920       if (INTVAL (XEXP (x, 1)) != 32 + shift
2921           || INTVAL (XEXP (x, 2)) != 0)
2922         shift = -1;
2923     }
2924   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2925      (const_int 0xffffffff<<shift)) */
2926   else if (GET_CODE (x) == AND
2927            && GET_MODE (x) == DImode
2928            && GET_CODE (XEXP (x, 0)) == MULT
2929            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2930            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2931            && CONST_INT_P (XEXP (x, 1)))
2932     {
2933       type = ADDRESS_REG_UXTW;
2934       index = XEXP (XEXP (x, 0), 0);
2935       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2936       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2937         shift = -1;
2938     }
2939   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2940   else if ((GET_CODE (x) == SIGN_EXTRACT
2941             || GET_CODE (x) == ZERO_EXTRACT)
2942            && GET_MODE (x) == DImode
2943            && GET_CODE (XEXP (x, 0)) == ASHIFT
2944            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2945            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2946     {
2947       type = (GET_CODE (x) == SIGN_EXTRACT)
2948         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2949       index = XEXP (XEXP (x, 0), 0);
2950       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2951       if (INTVAL (XEXP (x, 1)) != 32 + shift
2952           || INTVAL (XEXP (x, 2)) != 0)
2953         shift = -1;
2954     }
2955   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2956      (const_int 0xffffffff<<shift)) */
2957   else if (GET_CODE (x) == AND
2958            && GET_MODE (x) == DImode
2959            && GET_CODE (XEXP (x, 0)) == ASHIFT
2960            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2961            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2962            && CONST_INT_P (XEXP (x, 1)))
2963     {
2964       type = ADDRESS_REG_UXTW;
2965       index = XEXP (XEXP (x, 0), 0);
2966       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2967       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2968         shift = -1;
2969     }
2970   /* (mult:P (reg:P) (const_int scale)) */
2971   else if (GET_CODE (x) == MULT
2972            && GET_MODE (x) == Pmode
2973            && GET_MODE (XEXP (x, 0)) == Pmode
2974            && CONST_INT_P (XEXP (x, 1)))
2975     {
2976       type = ADDRESS_REG_REG;
2977       index = XEXP (x, 0);
2978       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2979     }
2980   /* (ashift:P (reg:P) (const_int shift)) */
2981   else if (GET_CODE (x) == ASHIFT
2982            && GET_MODE (x) == Pmode
2983            && GET_MODE (XEXP (x, 0)) == Pmode
2984            && CONST_INT_P (XEXP (x, 1)))
2985     {
2986       type = ADDRESS_REG_REG;
2987       index = XEXP (x, 0);
2988       shift = INTVAL (XEXP (x, 1));
2989     }
2990   else
2991     return false;
2992
2993   if (GET_CODE (index) == SUBREG)
2994     index = SUBREG_REG (index);
2995
2996   if ((shift == 0 ||
2997        (shift > 0 && shift <= 3
2998         && (1 << shift) == GET_MODE_SIZE (mode)))
2999       && REG_P (index)
3000       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3001     {
3002       info->type = type;
3003       info->offset = index;
3004       info->shift = shift;
3005       return true;
3006     }
3007
3008   return false;
3009 }
3010
3011 static inline bool
3012 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3013 {
3014   return (offset >= -64 * GET_MODE_SIZE (mode)
3015           && offset < 64 * GET_MODE_SIZE (mode)
3016           && offset % GET_MODE_SIZE (mode) == 0);
3017 }
3018
3019 static inline bool
3020 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3021                                HOST_WIDE_INT offset)
3022 {
3023   return offset >= -256 && offset < 256;
3024 }
3025
3026 static inline bool
3027 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3028 {
3029   return (offset >= 0
3030           && offset < 4096 * GET_MODE_SIZE (mode)
3031           && offset % GET_MODE_SIZE (mode) == 0);
3032 }
3033
3034 /* Return true if X is a valid address for machine mode MODE.  If it is,
3035    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3036    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3037
3038 static bool
3039 aarch64_classify_address (struct aarch64_address_info *info,
3040                           rtx x, enum machine_mode mode,
3041                           RTX_CODE outer_code, bool strict_p)
3042 {
3043   enum rtx_code code = GET_CODE (x);
3044   rtx op0, op1;
3045   bool allow_reg_index_p =
3046     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3047
3048   /* Don't support anything other than POST_INC or REG addressing for
3049      AdvSIMD.  */
3050   if (aarch64_vector_mode_p (mode)
3051       && (code != POST_INC && code != REG))
3052     return false;
3053
3054   switch (code)
3055     {
3056     case REG:
3057     case SUBREG:
3058       info->type = ADDRESS_REG_IMM;
3059       info->base = x;
3060       info->offset = const0_rtx;
3061       return aarch64_base_register_rtx_p (x, strict_p);
3062
3063     case PLUS:
3064       op0 = XEXP (x, 0);
3065       op1 = XEXP (x, 1);
3066       if (GET_MODE_SIZE (mode) != 0
3067           && CONST_INT_P (op1)
3068           && aarch64_base_register_rtx_p (op0, strict_p))
3069         {
3070           HOST_WIDE_INT offset = INTVAL (op1);
3071
3072           info->type = ADDRESS_REG_IMM;
3073           info->base = op0;
3074           info->offset = op1;
3075
3076           /* TImode and TFmode values are allowed in both pairs of X
3077              registers and individual Q registers.  The available
3078              address modes are:
3079              X,X: 7-bit signed scaled offset
3080              Q:   9-bit signed offset
3081              We conservatively require an offset representable in either mode.
3082            */
3083           if (mode == TImode || mode == TFmode)
3084             return (offset_7bit_signed_scaled_p (mode, offset)
3085                     && offset_9bit_signed_unscaled_p (mode, offset));
3086
3087           if (outer_code == PARALLEL)
3088             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3089                     && offset_7bit_signed_scaled_p (mode, offset));
3090           else
3091             return (offset_9bit_signed_unscaled_p (mode, offset)
3092                     || offset_12bit_unsigned_scaled_p (mode, offset));
3093         }
3094
3095       if (allow_reg_index_p)
3096         {
3097           /* Look for base + (scaled/extended) index register.  */
3098           if (aarch64_base_register_rtx_p (op0, strict_p)
3099               && aarch64_classify_index (info, op1, mode, strict_p))
3100             {
3101               info->base = op0;
3102               return true;
3103             }
3104           if (aarch64_base_register_rtx_p (op1, strict_p)
3105               && aarch64_classify_index (info, op0, mode, strict_p))
3106             {
3107               info->base = op1;
3108               return true;
3109             }
3110         }
3111
3112       return false;
3113
3114     case POST_INC:
3115     case POST_DEC:
3116     case PRE_INC:
3117     case PRE_DEC:
3118       info->type = ADDRESS_REG_WB;
3119       info->base = XEXP (x, 0);
3120       info->offset = NULL_RTX;
3121       return aarch64_base_register_rtx_p (info->base, strict_p);
3122
3123     case POST_MODIFY:
3124     case PRE_MODIFY:
3125       info->type = ADDRESS_REG_WB;
3126       info->base = XEXP (x, 0);
3127       if (GET_CODE (XEXP (x, 1)) == PLUS
3128           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3129           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3130           && aarch64_base_register_rtx_p (info->base, strict_p))
3131         {
3132           HOST_WIDE_INT offset;
3133           info->offset = XEXP (XEXP (x, 1), 1);
3134           offset = INTVAL (info->offset);
3135
3136           /* TImode and TFmode values are allowed in both pairs of X
3137              registers and individual Q registers.  The available
3138              address modes are:
3139              X,X: 7-bit signed scaled offset
3140              Q:   9-bit signed offset
3141              We conservatively require an offset representable in either mode.
3142            */
3143           if (mode == TImode || mode == TFmode)
3144             return (offset_7bit_signed_scaled_p (mode, offset)
3145                     && offset_9bit_signed_unscaled_p (mode, offset));
3146
3147           if (outer_code == PARALLEL)
3148             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3149                     && offset_7bit_signed_scaled_p (mode, offset));
3150           else
3151             return offset_9bit_signed_unscaled_p (mode, offset);
3152         }
3153       return false;
3154
3155     case CONST:
3156     case SYMBOL_REF:
3157     case LABEL_REF:
3158       /* load literal: pc-relative constant pool entry.  Only supported
3159          for SI mode or larger.  */
3160       info->type = ADDRESS_SYMBOLIC;
3161       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3162         {
3163           rtx sym, addend;
3164
3165           split_const (x, &sym, &addend);
3166           return (GET_CODE (sym) == LABEL_REF
3167                   || (GET_CODE (sym) == SYMBOL_REF
3168                       && CONSTANT_POOL_ADDRESS_P (sym)));
3169         }
3170       return false;
3171
3172     case LO_SUM:
3173       info->type = ADDRESS_LO_SUM;
3174       info->base = XEXP (x, 0);
3175       info->offset = XEXP (x, 1);
3176       if (allow_reg_index_p
3177           && aarch64_base_register_rtx_p (info->base, strict_p))
3178         {
3179           rtx sym, offs;
3180           split_const (info->offset, &sym, &offs);
3181           if (GET_CODE (sym) == SYMBOL_REF
3182               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3183                   == SYMBOL_SMALL_ABSOLUTE))
3184             {
3185               /* The symbol and offset must be aligned to the access size.  */
3186               unsigned int align;
3187               unsigned int ref_size;
3188
3189               if (CONSTANT_POOL_ADDRESS_P (sym))
3190                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3191               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3192                 {
3193                   tree exp = SYMBOL_REF_DECL (sym);
3194                   align = TYPE_ALIGN (TREE_TYPE (exp));
3195                   align = CONSTANT_ALIGNMENT (exp, align);
3196                 }
3197               else if (SYMBOL_REF_DECL (sym))
3198                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3199               else
3200                 align = BITS_PER_UNIT;
3201
3202               ref_size = GET_MODE_SIZE (mode);
3203               if (ref_size == 0)
3204                 ref_size = GET_MODE_SIZE (DImode);
3205
3206               return ((INTVAL (offs) & (ref_size - 1)) == 0
3207                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3208             }
3209         }
3210       return false;
3211
3212     default:
3213       return false;
3214     }
3215 }
3216
3217 bool
3218 aarch64_symbolic_address_p (rtx x)
3219 {
3220   rtx offset;
3221
3222   split_const (x, &x, &offset);
3223   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3224 }
3225
3226 /* Classify the base of symbolic expression X, given that X appears in
3227    context CONTEXT.  */
3228
3229 enum aarch64_symbol_type
3230 aarch64_classify_symbolic_expression (rtx x,
3231                                       enum aarch64_symbol_context context)
3232 {
3233   rtx offset;
3234
3235   split_const (x, &x, &offset);
3236   return aarch64_classify_symbol (x, context);
3237 }
3238
3239
3240 /* Return TRUE if X is a legitimate address for accessing memory in
3241    mode MODE.  */
3242 static bool
3243 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3244 {
3245   struct aarch64_address_info addr;
3246
3247   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3248 }
3249
3250 /* Return TRUE if X is a legitimate address for accessing memory in
3251    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3252    pair operation.  */
3253 bool
3254 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3255                               RTX_CODE outer_code, bool strict_p)
3256 {
3257   struct aarch64_address_info addr;
3258
3259   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3260 }
3261
3262 /* Return TRUE if rtx X is immediate constant 0.0 */
3263 bool
3264 aarch64_float_const_zero_rtx_p (rtx x)
3265 {
3266   REAL_VALUE_TYPE r;
3267
3268   if (GET_MODE (x) == VOIDmode)
3269     return false;
3270
3271   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3272   if (REAL_VALUE_MINUS_ZERO (r))
3273     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3274   return REAL_VALUES_EQUAL (r, dconst0);
3275 }
3276
3277 /* Return the fixed registers used for condition codes.  */
3278
3279 static bool
3280 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3281 {
3282   *p1 = CC_REGNUM;
3283   *p2 = INVALID_REGNUM;
3284   return true;
3285 }
3286
3287 enum machine_mode
3288 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3289 {
3290   /* All floating point compares return CCFP if it is an equality
3291      comparison, and CCFPE otherwise.  */
3292   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3293     {
3294       switch (code)
3295         {
3296         case EQ:
3297         case NE:
3298         case UNORDERED:
3299         case ORDERED:
3300         case UNLT:
3301         case UNLE:
3302         case UNGT:
3303         case UNGE:
3304         case UNEQ:
3305         case LTGT:
3306           return CCFPmode;
3307
3308         case LT:
3309         case LE:
3310         case GT:
3311         case GE:
3312           return CCFPEmode;
3313
3314         default:
3315           gcc_unreachable ();
3316         }
3317     }
3318
3319   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3320       && y == const0_rtx
3321       && (code == EQ || code == NE || code == LT || code == GE)
3322       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3323           || GET_CODE (x) == NEG))
3324     return CC_NZmode;
3325
3326   /* A compare with a shifted operand.  Because of canonicalization,
3327      the comparison will have to be swapped when we emit the assembly
3328      code.  */
3329   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3330       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3331       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3332           || GET_CODE (x) == LSHIFTRT
3333           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3334     return CC_SWPmode;
3335
3336   /* Similarly for a negated operand, but we can only do this for
3337      equalities.  */
3338   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3339       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3340       && (code == EQ || code == NE)
3341       && GET_CODE (x) == NEG)
3342     return CC_Zmode;
3343
3344   /* A compare of a mode narrower than SI mode against zero can be done
3345      by extending the value in the comparison.  */
3346   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3347       && y == const0_rtx)
3348     /* Only use sign-extension if we really need it.  */
3349     return ((code == GT || code == GE || code == LE || code == LT)
3350             ? CC_SESWPmode : CC_ZESWPmode);
3351
3352   /* For everything else, return CCmode.  */
3353   return CCmode;
3354 }
3355
3356 static unsigned
3357 aarch64_get_condition_code (rtx x)
3358 {
3359   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3360   enum rtx_code comp_code = GET_CODE (x);
3361
3362   if (GET_MODE_CLASS (mode) != MODE_CC)
3363     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3364
3365   switch (mode)
3366     {
3367     case CCFPmode:
3368     case CCFPEmode:
3369       switch (comp_code)
3370         {
3371         case GE: return AARCH64_GE;
3372         case GT: return AARCH64_GT;
3373         case LE: return AARCH64_LS;
3374         case LT: return AARCH64_MI;
3375         case NE: return AARCH64_NE;
3376         case EQ: return AARCH64_EQ;
3377         case ORDERED: return AARCH64_VC;
3378         case UNORDERED: return AARCH64_VS;
3379         case UNLT: return AARCH64_LT;
3380         case UNLE: return AARCH64_LE;
3381         case UNGT: return AARCH64_HI;
3382         case UNGE: return AARCH64_PL;
3383         default: gcc_unreachable ();
3384         }
3385       break;
3386
3387     case CCmode:
3388       switch (comp_code)
3389         {
3390         case NE: return AARCH64_NE;
3391         case EQ: return AARCH64_EQ;
3392         case GE: return AARCH64_GE;
3393         case GT: return AARCH64_GT;
3394         case LE: return AARCH64_LE;
3395         case LT: return AARCH64_LT;
3396         case GEU: return AARCH64_CS;
3397         case GTU: return AARCH64_HI;
3398         case LEU: return AARCH64_LS;
3399         case LTU: return AARCH64_CC;
3400         default: gcc_unreachable ();
3401         }
3402       break;
3403
3404     case CC_SWPmode:
3405     case CC_ZESWPmode:
3406     case CC_SESWPmode:
3407       switch (comp_code)
3408         {
3409         case NE: return AARCH64_NE;
3410         case EQ: return AARCH64_EQ;
3411         case GE: return AARCH64_LE;
3412         case GT: return AARCH64_LT;
3413         case LE: return AARCH64_GE;
3414         case LT: return AARCH64_GT;
3415         case GEU: return AARCH64_LS;
3416         case GTU: return AARCH64_CC;
3417         case LEU: return AARCH64_CS;
3418         case LTU: return AARCH64_HI;
3419         default: gcc_unreachable ();
3420         }
3421       break;
3422
3423     case CC_NZmode:
3424       switch (comp_code)
3425         {
3426         case NE: return AARCH64_NE;
3427         case EQ: return AARCH64_EQ;
3428         case GE: return AARCH64_PL;
3429         case LT: return AARCH64_MI;
3430         default: gcc_unreachable ();
3431         }
3432       break;
3433
3434     case CC_Zmode:
3435       switch (comp_code)
3436         {
3437         case NE: return AARCH64_NE;
3438         case EQ: return AARCH64_EQ;
3439         default: gcc_unreachable ();
3440         }
3441       break;
3442
3443     default:
3444       gcc_unreachable ();
3445       break;
3446     }
3447 }
3448
3449 static unsigned
3450 bit_count (unsigned HOST_WIDE_INT value)
3451 {
3452   unsigned count = 0;
3453
3454   while (value)
3455     {
3456       count++;
3457       value &= value - 1;
3458     }
3459
3460   return count;
3461 }
3462
3463 void
3464 aarch64_print_operand (FILE *f, rtx x, char code)
3465 {
3466   switch (code)
3467     {
3468     /* An integer or symbol address without a preceding # sign.  */
3469     case 'c':
3470       switch (GET_CODE (x))
3471         {
3472         case CONST_INT:
3473           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3474           break;
3475
3476         case SYMBOL_REF:
3477           output_addr_const (f, x);
3478           break;
3479
3480         case CONST:
3481           if (GET_CODE (XEXP (x, 0)) == PLUS
3482               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3483             {
3484               output_addr_const (f, x);
3485               break;
3486             }
3487           /* Fall through.  */
3488
3489         default:
3490           output_operand_lossage ("Unsupported operand for code '%c'", code);
3491         }
3492       break;
3493
3494     case 'e':
3495       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3496       {
3497         int n;
3498
3499         if (GET_CODE (x) != CONST_INT
3500             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3501           {
3502             output_operand_lossage ("invalid operand for '%%%c'", code);
3503             return;
3504           }
3505
3506         switch (n)
3507           {
3508           case 3:
3509             fputc ('b', f);
3510             break;
3511           case 4:
3512             fputc ('h', f);
3513             break;
3514           case 5:
3515             fputc ('w', f);
3516             break;
3517           default:
3518             output_operand_lossage ("invalid operand for '%%%c'", code);
3519             return;
3520           }
3521       }
3522       break;
3523
3524     case 'p':
3525       {
3526         int n;
3527
3528         /* Print N such that 2^N == X.  */
3529         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3530           {
3531             output_operand_lossage ("invalid operand for '%%%c'", code);
3532             return;
3533           }
3534
3535         asm_fprintf (f, "%d", n);
3536       }
3537       break;
3538
3539     case 'P':
3540       /* Print the number of non-zero bits in X (a const_int).  */
3541       if (GET_CODE (x) != CONST_INT)
3542         {
3543           output_operand_lossage ("invalid operand for '%%%c'", code);
3544           return;
3545         }
3546
3547       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3548       break;
3549
3550     case 'H':
3551       /* Print the higher numbered register of a pair (TImode) of regs.  */
3552       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3553         {
3554           output_operand_lossage ("invalid operand for '%%%c'", code);
3555           return;
3556         }
3557
3558       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3559       break;
3560
3561     case 'm':
3562       /* Print a condition (eq, ne, etc).  */
3563
3564       /* CONST_TRUE_RTX means always -- that's the default.  */
3565       if (x == const_true_rtx)
3566         return;
3567
3568       if (!COMPARISON_P (x))
3569         {
3570           output_operand_lossage ("invalid operand for '%%%c'", code);
3571           return;
3572         }
3573
3574       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3575       break;
3576
3577     case 'M':
3578       /* Print the inverse of a condition (eq <-> ne, etc).  */
3579
3580       /* CONST_TRUE_RTX means never -- that's the default.  */
3581       if (x == const_true_rtx)
3582         {
3583           fputs ("nv", f);
3584           return;
3585         }
3586
3587       if (!COMPARISON_P (x))
3588         {
3589           output_operand_lossage ("invalid operand for '%%%c'", code);
3590           return;
3591         }
3592
3593       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3594                                   (aarch64_get_condition_code (x))], f);
3595       break;
3596
3597     case 'b':
3598     case 'h':
3599     case 's':
3600     case 'd':
3601     case 'q':
3602       /* Print a scalar FP/SIMD register name.  */
3603       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3604         {
3605           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3606           return;
3607         }
3608       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3609       break;
3610
3611     case 'S':
3612     case 'T':
3613     case 'U':
3614     case 'V':
3615       /* Print the first FP/SIMD register name in a list.  */
3616       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3617         {
3618           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3619           return;
3620         }
3621       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3622       break;
3623
3624     case 'X':
3625       /* Print bottom 16 bits of integer constant in hex.  */
3626       if (GET_CODE (x) != CONST_INT)
3627         {
3628           output_operand_lossage ("invalid operand for '%%%c'", code);
3629           return;
3630         }
3631       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3632       break;
3633
3634     case 'w':
3635     case 'x':
3636       /* Print a general register name or the zero register (32-bit or
3637          64-bit).  */
3638       if (x == const0_rtx
3639           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3640         {
3641           asm_fprintf (f, "%czr", code);
3642           break;
3643         }
3644
3645       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3646         {
3647           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3648           break;
3649         }
3650
3651       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3652         {
3653           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3654           break;
3655         }
3656
3657       /* Fall through */
3658
3659     case 0:
3660       /* Print a normal operand, if it's a general register, then we
3661          assume DImode.  */
3662       if (x == NULL)
3663         {
3664           output_operand_lossage ("missing operand");
3665           return;
3666         }
3667
3668       switch (GET_CODE (x))
3669         {
3670         case REG:
3671           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3672           break;
3673
3674         case MEM:
3675           aarch64_memory_reference_mode = GET_MODE (x);
3676           output_address (XEXP (x, 0));
3677           break;
3678
3679         case LABEL_REF:
3680         case SYMBOL_REF:
3681           output_addr_const (asm_out_file, x);
3682           break;
3683
3684         case CONST_INT:
3685           asm_fprintf (f, "%wd", INTVAL (x));
3686           break;
3687
3688         case CONST_VECTOR:
3689           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3690             {
3691               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3692                                                             HOST_WIDE_INT_MIN,
3693                                                             HOST_WIDE_INT_MAX));
3694               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3695             }
3696           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3697             {
3698               fputc ('0', f);
3699             }
3700           else
3701             gcc_unreachable ();
3702           break;
3703
3704         case CONST_DOUBLE:
3705           /* CONST_DOUBLE can represent a double-width integer.
3706              In this case, the mode of x is VOIDmode.  */
3707           if (GET_MODE (x) == VOIDmode)
3708             ; /* Do Nothing.  */
3709           else if (aarch64_float_const_zero_rtx_p (x))
3710             {
3711               fputc ('0', f);
3712               break;
3713             }
3714           else if (aarch64_float_const_representable_p (x))
3715             {
3716 #define buf_size 20
3717               char float_buf[buf_size] = {'\0'};
3718               REAL_VALUE_TYPE r;
3719               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3720               real_to_decimal_for_mode (float_buf, &r,
3721                                         buf_size, buf_size,
3722                                         1, GET_MODE (x));
3723               asm_fprintf (asm_out_file, "%s", float_buf);
3724               break;
3725 #undef buf_size
3726             }
3727           output_operand_lossage ("invalid constant");
3728           return;
3729         default:
3730           output_operand_lossage ("invalid operand");
3731           return;
3732         }
3733       break;
3734
3735     case 'A':
3736       if (GET_CODE (x) == HIGH)
3737         x = XEXP (x, 0);
3738
3739       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3740         {
3741         case SYMBOL_SMALL_GOT:
3742           asm_fprintf (asm_out_file, ":got:");
3743           break;
3744
3745         case SYMBOL_SMALL_TLSGD:
3746           asm_fprintf (asm_out_file, ":tlsgd:");
3747           break;
3748
3749         case SYMBOL_SMALL_TLSDESC:
3750           asm_fprintf (asm_out_file, ":tlsdesc:");
3751           break;
3752
3753         case SYMBOL_SMALL_GOTTPREL:
3754           asm_fprintf (asm_out_file, ":gottprel:");
3755           break;
3756
3757         case SYMBOL_SMALL_TPREL:
3758           asm_fprintf (asm_out_file, ":tprel:");
3759           break;
3760
3761         case SYMBOL_TINY_GOT:
3762           gcc_unreachable ();
3763           break;
3764
3765         default:
3766           break;
3767         }
3768       output_addr_const (asm_out_file, x);
3769       break;
3770
3771     case 'L':
3772       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3773         {
3774         case SYMBOL_SMALL_GOT:
3775           asm_fprintf (asm_out_file, ":lo12:");
3776           break;
3777
3778         case SYMBOL_SMALL_TLSGD:
3779           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3780           break;
3781
3782         case SYMBOL_SMALL_TLSDESC:
3783           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3784           break;
3785
3786         case SYMBOL_SMALL_GOTTPREL:
3787           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3788           break;
3789
3790         case SYMBOL_SMALL_TPREL:
3791           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3792           break;
3793
3794         case SYMBOL_TINY_GOT:
3795           asm_fprintf (asm_out_file, ":got:");
3796           break;
3797
3798         default:
3799           break;
3800         }
3801       output_addr_const (asm_out_file, x);
3802       break;
3803
3804     case 'G':
3805
3806       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3807         {
3808         case SYMBOL_SMALL_TPREL:
3809           asm_fprintf (asm_out_file, ":tprel_hi12:");
3810           break;
3811         default:
3812           break;
3813         }
3814       output_addr_const (asm_out_file, x);
3815       break;
3816
3817     default:
3818       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3819       return;
3820     }
3821 }
3822
3823 void
3824 aarch64_print_operand_address (FILE *f, rtx x)
3825 {
3826   struct aarch64_address_info addr;
3827
3828   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3829                              MEM, true))
3830     switch (addr.type)
3831       {
3832       case ADDRESS_REG_IMM:
3833         if (addr.offset == const0_rtx)
3834           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3835         else
3836           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3837                        INTVAL (addr.offset));
3838         return;
3839
3840       case ADDRESS_REG_REG:
3841         if (addr.shift == 0)
3842           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3843                        reg_names [REGNO (addr.offset)]);
3844         else
3845           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3846                        reg_names [REGNO (addr.offset)], addr.shift);
3847         return;
3848
3849       case ADDRESS_REG_UXTW:
3850         if (addr.shift == 0)
3851           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3852                        REGNO (addr.offset) - R0_REGNUM);
3853         else
3854           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3855                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3856         return;
3857
3858       case ADDRESS_REG_SXTW:
3859         if (addr.shift == 0)
3860           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3861                        REGNO (addr.offset) - R0_REGNUM);
3862         else
3863           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3864                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3865         return;
3866
3867       case ADDRESS_REG_WB:
3868         switch (GET_CODE (x))
3869           {
3870           case PRE_INC:
3871             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3872                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3873             return;
3874           case POST_INC:
3875             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3876                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3877             return;
3878           case PRE_DEC:
3879             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3880                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3881             return;
3882           case POST_DEC:
3883             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3884                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3885             return;
3886           case PRE_MODIFY:
3887             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3888                          INTVAL (addr.offset));
3889             return;
3890           case POST_MODIFY:
3891             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3892                          INTVAL (addr.offset));
3893             return;
3894           default:
3895             break;
3896           }
3897         break;
3898
3899       case ADDRESS_LO_SUM:
3900         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3901         output_addr_const (f, addr.offset);
3902         asm_fprintf (f, "]");
3903         return;
3904
3905       case ADDRESS_SYMBOLIC:
3906         break;
3907       }
3908
3909   output_addr_const (f, x);
3910 }
3911
3912 bool
3913 aarch64_label_mentioned_p (rtx x)
3914 {
3915   const char *fmt;
3916   int i;
3917
3918   if (GET_CODE (x) == LABEL_REF)
3919     return true;
3920
3921   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3922      referencing instruction, but they are constant offsets, not
3923      symbols.  */
3924   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3925     return false;
3926
3927   fmt = GET_RTX_FORMAT (GET_CODE (x));
3928   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3929     {
3930       if (fmt[i] == 'E')
3931         {
3932           int j;
3933
3934           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3935             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3936               return 1;
3937         }
3938       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3939         return 1;
3940     }
3941
3942   return 0;
3943 }
3944
3945 /* Implement REGNO_REG_CLASS.  */
3946
3947 enum reg_class
3948 aarch64_regno_regclass (unsigned regno)
3949 {
3950   if (GP_REGNUM_P (regno))
3951     return CORE_REGS;
3952
3953   if (regno == SP_REGNUM)
3954     return STACK_REG;
3955
3956   if (regno == FRAME_POINTER_REGNUM
3957       || regno == ARG_POINTER_REGNUM)
3958     return POINTER_REGS;
3959
3960   if (FP_REGNUM_P (regno))
3961     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3962
3963   return NO_REGS;
3964 }
3965
3966 /* Try a machine-dependent way of reloading an illegitimate address
3967    operand.  If we find one, push the reload and return the new rtx.  */
3968
3969 rtx
3970 aarch64_legitimize_reload_address (rtx *x_p,
3971                                    enum machine_mode mode,
3972                                    int opnum, int type,
3973                                    int ind_levels ATTRIBUTE_UNUSED)
3974 {
3975   rtx x = *x_p;
3976
3977   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3978   if (aarch64_vector_mode_p (mode)
3979       && GET_CODE (x) == PLUS
3980       && REG_P (XEXP (x, 0))
3981       && CONST_INT_P (XEXP (x, 1)))
3982     {
3983       rtx orig_rtx = x;
3984       x = copy_rtx (x);
3985       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3986                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3987                    opnum, (enum reload_type) type);
3988       return x;
3989     }
3990
3991   /* We must recognize output that we have already generated ourselves.  */
3992   if (GET_CODE (x) == PLUS
3993       && GET_CODE (XEXP (x, 0)) == PLUS
3994       && REG_P (XEXP (XEXP (x, 0), 0))
3995       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3996       && CONST_INT_P (XEXP (x, 1)))
3997     {
3998       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3999                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4000                    opnum, (enum reload_type) type);
4001       return x;
4002     }
4003
4004   /* We wish to handle large displacements off a base register by splitting
4005      the addend across an add and the mem insn.  This can cut the number of
4006      extra insns needed from 3 to 1.  It is only useful for load/store of a
4007      single register with 12 bit offset field.  */
4008   if (GET_CODE (x) == PLUS
4009       && REG_P (XEXP (x, 0))
4010       && CONST_INT_P (XEXP (x, 1))
4011       && HARD_REGISTER_P (XEXP (x, 0))
4012       && mode != TImode
4013       && mode != TFmode
4014       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4015     {
4016       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4017       HOST_WIDE_INT low = val & 0xfff;
4018       HOST_WIDE_INT high = val - low;
4019       HOST_WIDE_INT offs;
4020       rtx cst;
4021       enum machine_mode xmode = GET_MODE (x);
4022
4023       /* In ILP32, xmode can be either DImode or SImode.  */
4024       gcc_assert (xmode == DImode || xmode == SImode);
4025
4026       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4027          BLKmode alignment.  */
4028       if (GET_MODE_SIZE (mode) == 0)
4029         return NULL_RTX;
4030
4031       offs = low % GET_MODE_SIZE (mode);
4032
4033       /* Align misaligned offset by adjusting high part to compensate.  */
4034       if (offs != 0)
4035         {
4036           if (aarch64_uimm12_shift (high + offs))
4037             {
4038               /* Align down.  */
4039               low = low - offs;
4040               high = high + offs;
4041             }
4042           else
4043             {
4044               /* Align up.  */
4045               offs = GET_MODE_SIZE (mode) - offs;
4046               low = low + offs;
4047               high = high + (low & 0x1000) - offs;
4048               low &= 0xfff;
4049             }
4050         }
4051
4052       /* Check for overflow.  */
4053       if (high + low != val)
4054         return NULL_RTX;
4055
4056       cst = GEN_INT (high);
4057       if (!aarch64_uimm12_shift (high))
4058         cst = force_const_mem (xmode, cst);
4059
4060       /* Reload high part into base reg, leaving the low part
4061          in the mem instruction.
4062          Note that replacing this gen_rtx_PLUS with plus_constant is
4063          wrong in this case because we rely on the
4064          (plus (plus reg c1) c2) structure being preserved so that
4065          XEXP (*p, 0) in push_reload below uses the correct term.  */
4066       x = gen_rtx_PLUS (xmode,
4067                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4068                         GEN_INT (low));
4069
4070       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4071                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4072                    opnum, (enum reload_type) type);
4073       return x;
4074     }
4075
4076   return NULL_RTX;
4077 }
4078
4079
4080 static reg_class_t
4081 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4082                           reg_class_t rclass,
4083                           enum machine_mode mode,
4084                           secondary_reload_info *sri)
4085 {
4086   /* Without the TARGET_SIMD instructions we cannot move a Q register
4087      to a Q register directly.  We need a scratch.  */
4088   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4089       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4090       && reg_class_subset_p (rclass, FP_REGS))
4091     {
4092       if (mode == TFmode)
4093         sri->icode = CODE_FOR_aarch64_reload_movtf;
4094       else if (mode == TImode)
4095         sri->icode = CODE_FOR_aarch64_reload_movti;
4096       return NO_REGS;
4097     }
4098
4099   /* A TFmode or TImode memory access should be handled via an FP_REGS
4100      because AArch64 has richer addressing modes for LDR/STR instructions
4101      than LDP/STP instructions.  */
4102   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4103       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4104     return FP_REGS;
4105
4106   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4107       return CORE_REGS;
4108
4109   return NO_REGS;
4110 }
4111
4112 static bool
4113 aarch64_can_eliminate (const int from, const int to)
4114 {
4115   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4116      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4117
4118   if (frame_pointer_needed)
4119     {
4120       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4121         return true;
4122       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4123         return false;
4124       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4125           && !cfun->calls_alloca)
4126         return true;
4127       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4128         return true;
4129
4130       return false;
4131     }
4132
4133   return true;
4134 }
4135
4136 HOST_WIDE_INT
4137 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4138 {
4139   HOST_WIDE_INT frame_size;
4140   HOST_WIDE_INT offset;
4141
4142   aarch64_layout_frame ();
4143   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4144                 + crtl->outgoing_args_size
4145                 + cfun->machine->saved_varargs_size);
4146
4147    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4148    offset = frame_size;
4149
4150    if (to == HARD_FRAME_POINTER_REGNUM)
4151      {
4152        if (from == ARG_POINTER_REGNUM)
4153          return offset - crtl->outgoing_args_size;
4154
4155        if (from == FRAME_POINTER_REGNUM)
4156          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4157      }
4158
4159    if (to == STACK_POINTER_REGNUM)
4160      {
4161        if (from == FRAME_POINTER_REGNUM)
4162          {
4163            HOST_WIDE_INT elim = crtl->outgoing_args_size
4164                               + cfun->machine->frame.saved_regs_size
4165                               + get_frame_size ()
4166                               - cfun->machine->frame.fp_lr_offset;
4167            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4168            return elim;
4169          }
4170      }
4171
4172    return offset;
4173 }
4174
4175
4176 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4177    previous frame.  */
4178
4179 rtx
4180 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4181 {
4182   if (count != 0)
4183     return const0_rtx;
4184   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4185 }
4186
4187
4188 static void
4189 aarch64_asm_trampoline_template (FILE *f)
4190 {
4191   if (TARGET_ILP32)
4192     {
4193       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4194       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4195     }
4196   else
4197     {
4198       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4199       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4200     }
4201   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4202   assemble_aligned_integer (4, const0_rtx);
4203   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4204   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4205 }
4206
4207 static void
4208 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4209 {
4210   rtx fnaddr, mem, a_tramp;
4211   const int tramp_code_sz = 16;
4212
4213   /* Don't need to copy the trailing D-words, we fill those in below.  */
4214   emit_block_move (m_tramp, assemble_trampoline_template (),
4215                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4216   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4217   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4218   if (GET_MODE (fnaddr) != ptr_mode)
4219     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4220   emit_move_insn (mem, fnaddr);
4221
4222   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4223   emit_move_insn (mem, chain_value);
4224
4225   /* XXX We should really define a "clear_cache" pattern and use
4226      gen_clear_cache().  */
4227   a_tramp = XEXP (m_tramp, 0);
4228   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4229                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4230                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4231                      ptr_mode);
4232 }
4233
4234 static unsigned char
4235 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4236 {
4237   switch (regclass)
4238     {
4239     case CORE_REGS:
4240     case POINTER_REGS:
4241     case GENERAL_REGS:
4242     case ALL_REGS:
4243     case FP_REGS:
4244     case FP_LO_REGS:
4245       return
4246         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4247                                        (GET_MODE_SIZE (mode) + 7) / 8;
4248     case STACK_REG:
4249       return 1;
4250
4251     case NO_REGS:
4252       return 0;
4253
4254     default:
4255       break;
4256     }
4257   gcc_unreachable ();
4258 }
4259
4260 static reg_class_t
4261 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4262 {
4263   if (regclass == POINTER_REGS)
4264     return GENERAL_REGS;
4265
4266   if (regclass == STACK_REG)
4267     {
4268       if (REG_P(x)
4269           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4270           return regclass;
4271
4272       return NO_REGS;
4273     }
4274
4275   /* If it's an integer immediate that MOVI can't handle, then
4276      FP_REGS is not an option, so we return NO_REGS instead.  */
4277   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4278       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4279     return NO_REGS;
4280
4281   /* Register eliminiation can result in a request for
4282      SP+constant->FP_REGS.  We cannot support such operations which
4283      use SP as source and an FP_REG as destination, so reject out
4284      right now.  */
4285   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4286     {
4287       rtx lhs = XEXP (x, 0);
4288
4289       /* Look through a possible SUBREG introduced by ILP32.  */
4290       if (GET_CODE (lhs) == SUBREG)
4291         lhs = SUBREG_REG (lhs);
4292
4293       gcc_assert (REG_P (lhs));
4294       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4295                                       POINTER_REGS));
4296       return NO_REGS;
4297     }
4298
4299   return regclass;
4300 }
4301
4302 void
4303 aarch64_asm_output_labelref (FILE* f, const char *name)
4304 {
4305   asm_fprintf (f, "%U%s", name);
4306 }
4307
4308 static void
4309 aarch64_elf_asm_constructor (rtx symbol, int priority)
4310 {
4311   if (priority == DEFAULT_INIT_PRIORITY)
4312     default_ctor_section_asm_out_constructor (symbol, priority);
4313   else
4314     {
4315       section *s;
4316       char buf[18];
4317       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4318       s = get_section (buf, SECTION_WRITE, NULL);
4319       switch_to_section (s);
4320       assemble_align (POINTER_SIZE);
4321       assemble_aligned_integer (POINTER_BYTES, symbol);
4322     }
4323 }
4324
4325 static void
4326 aarch64_elf_asm_destructor (rtx symbol, int priority)
4327 {
4328   if (priority == DEFAULT_INIT_PRIORITY)
4329     default_dtor_section_asm_out_destructor (symbol, priority);
4330   else
4331     {
4332       section *s;
4333       char buf[18];
4334       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4335       s = get_section (buf, SECTION_WRITE, NULL);
4336       switch_to_section (s);
4337       assemble_align (POINTER_SIZE);
4338       assemble_aligned_integer (POINTER_BYTES, symbol);
4339     }
4340 }
4341
4342 const char*
4343 aarch64_output_casesi (rtx *operands)
4344 {
4345   char buf[100];
4346   char label[100];
4347   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4348   int index;
4349   static const char *const patterns[4][2] =
4350   {
4351     {
4352       "ldrb\t%w3, [%0,%w1,uxtw]",
4353       "add\t%3, %4, %w3, sxtb #2"
4354     },
4355     {
4356       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4357       "add\t%3, %4, %w3, sxth #2"
4358     },
4359     {
4360       "ldr\t%w3, [%0,%w1,uxtw #2]",
4361       "add\t%3, %4, %w3, sxtw #2"
4362     },
4363     /* We assume that DImode is only generated when not optimizing and
4364        that we don't really need 64-bit address offsets.  That would
4365        imply an object file with 8GB of code in a single function!  */
4366     {
4367       "ldr\t%w3, [%0,%w1,uxtw #2]",
4368       "add\t%3, %4, %w3, sxtw #2"
4369     }
4370   };
4371
4372   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4373
4374   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4375
4376   gcc_assert (index >= 0 && index <= 3);
4377
4378   /* Need to implement table size reduction, by chaning the code below.  */
4379   output_asm_insn (patterns[index][0], operands);
4380   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4381   snprintf (buf, sizeof (buf),
4382             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4383   output_asm_insn (buf, operands);
4384   output_asm_insn (patterns[index][1], operands);
4385   output_asm_insn ("br\t%3", operands);
4386   assemble_label (asm_out_file, label);
4387   return "";
4388 }
4389
4390
4391 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4392    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4393    operator.  */
4394
4395 int
4396 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4397 {
4398   if (shift >= 0 && shift <= 3)
4399     {
4400       int size;
4401       for (size = 8; size <= 32; size *= 2)
4402         {
4403           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4404           if (mask == bits << shift)
4405             return size;
4406         }
4407     }
4408   return 0;
4409 }
4410
4411 static bool
4412 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4413                                    const_rtx x ATTRIBUTE_UNUSED)
4414 {
4415   /* We can't use blocks for constants when we're using a per-function
4416      constant pool.  */
4417   return false;
4418 }
4419
4420 static section *
4421 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4422                             rtx x ATTRIBUTE_UNUSED,
4423                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4424 {
4425   /* Force all constant pool entries into the current function section.  */
4426   return function_section (current_function_decl);
4427 }
4428
4429
4430 /* Costs.  */
4431
4432 /* Helper function for rtx cost calculation.  Strip a shift expression
4433    from X.  Returns the inner operand if successful, or the original
4434    expression on failure.  */
4435 static rtx
4436 aarch64_strip_shift (rtx x)
4437 {
4438   rtx op = x;
4439
4440   if ((GET_CODE (op) == ASHIFT
4441        || GET_CODE (op) == ASHIFTRT
4442        || GET_CODE (op) == LSHIFTRT)
4443       && CONST_INT_P (XEXP (op, 1)))
4444     return XEXP (op, 0);
4445
4446   if (GET_CODE (op) == MULT
4447       && CONST_INT_P (XEXP (op, 1))
4448       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4449     return XEXP (op, 0);
4450
4451   return x;
4452 }
4453
4454 /* Helper function for rtx cost calculation.  Strip a shift or extend
4455    expression from X.  Returns the inner operand if successful, or the
4456    original expression on failure.  We deal with a number of possible
4457    canonicalization variations here.  */
4458 static rtx
4459 aarch64_strip_shift_or_extend (rtx x)
4460 {
4461   rtx op = x;
4462
4463   /* Zero and sign extraction of a widened value.  */
4464   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4465       && XEXP (op, 2) == const0_rtx
4466       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4467                                          XEXP (op, 1)))
4468     return XEXP (XEXP (op, 0), 0);
4469
4470   /* It can also be represented (for zero-extend) as an AND with an
4471      immediate.  */
4472   if (GET_CODE (op) == AND
4473       && GET_CODE (XEXP (op, 0)) == MULT
4474       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4475       && CONST_INT_P (XEXP (op, 1))
4476       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4477                            INTVAL (XEXP (op, 1))) != 0)
4478     return XEXP (XEXP (op, 0), 0);
4479
4480   /* Now handle extended register, as this may also have an optional
4481      left shift by 1..4.  */
4482   if (GET_CODE (op) == ASHIFT
4483       && CONST_INT_P (XEXP (op, 1))
4484       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4485     op = XEXP (op, 0);
4486
4487   if (GET_CODE (op) == ZERO_EXTEND
4488       || GET_CODE (op) == SIGN_EXTEND)
4489     op = XEXP (op, 0);
4490
4491   if (op != x)
4492     return op;
4493
4494   return aarch64_strip_shift (x);
4495 }
4496
4497 /* Calculate the cost of calculating X, storing it in *COST.  Result
4498    is true if the total cost of the operation has now been calculated.  */
4499 static bool
4500 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4501                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4502 {
4503   rtx op0, op1;
4504   const struct cpu_cost_table *extra_cost
4505     = aarch64_tune_params->insn_extra_cost;
4506
4507   switch (code)
4508     {
4509     case SET:
4510       op0 = SET_DEST (x);
4511       op1 = SET_SRC (x);
4512
4513       switch (GET_CODE (op0))
4514         {
4515         case MEM:
4516           if (speed)
4517             *cost += extra_cost->ldst.store;
4518
4519           if (op1 != const0_rtx)
4520             *cost += rtx_cost (op1, SET, 1, speed);
4521           return true;
4522
4523         case SUBREG:
4524           if (! REG_P (SUBREG_REG (op0)))
4525             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4526           /* Fall through.  */
4527         case REG:
4528           /* Cost is just the cost of the RHS of the set.  */
4529           *cost += rtx_cost (op1, SET, 1, true);
4530           return true;
4531
4532         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4533         case SIGN_EXTRACT:
4534           /* Strip any redundant widening of the RHS to meet the width of
4535              the target.  */
4536           if (GET_CODE (op1) == SUBREG)
4537             op1 = SUBREG_REG (op1);
4538           if ((GET_CODE (op1) == ZERO_EXTEND
4539                || GET_CODE (op1) == SIGN_EXTEND)
4540               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4541               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4542                   >= INTVAL (XEXP (op0, 1))))
4543             op1 = XEXP (op1, 0);
4544           *cost += rtx_cost (op1, SET, 1, speed);
4545           return true;
4546
4547         default:
4548           break;
4549         }
4550       return false;
4551
4552     case MEM:
4553       if (speed)
4554         *cost += extra_cost->ldst.load;
4555
4556       return true;
4557
4558     case NEG:
4559       op0 = CONST0_RTX (GET_MODE (x));
4560       op1 = XEXP (x, 0);
4561       goto cost_minus;
4562
4563     case COMPARE:
4564       op0 = XEXP (x, 0);
4565       op1 = XEXP (x, 1);
4566
4567       if (op1 == const0_rtx
4568           && GET_CODE (op0) == AND)
4569         {
4570           x = op0;
4571           goto cost_logic;
4572         }
4573
4574       /* Comparisons can work if the order is swapped.
4575          Canonicalization puts the more complex operation first, but
4576          we want it in op1.  */
4577       if (! (REG_P (op0)
4578              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4579         {
4580           op0 = XEXP (x, 1);
4581           op1 = XEXP (x, 0);
4582         }
4583       goto cost_minus;
4584
4585     case MINUS:
4586       op0 = XEXP (x, 0);
4587       op1 = XEXP (x, 1);
4588
4589     cost_minus:
4590       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4591           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4592               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4593         {
4594           if (op0 != const0_rtx)
4595             *cost += rtx_cost (op0, MINUS, 0, speed);
4596
4597           if (CONST_INT_P (op1))
4598             {
4599               if (!aarch64_uimm12_shift (INTVAL (op1)))
4600                 *cost += rtx_cost (op1, MINUS, 1, speed);
4601             }
4602           else
4603             {
4604               op1 = aarch64_strip_shift_or_extend (op1);
4605               *cost += rtx_cost (op1, MINUS, 1, speed);
4606             }
4607           return true;
4608         }
4609
4610       return false;
4611
4612     case PLUS:
4613       op0 = XEXP (x, 0);
4614       op1 = XEXP (x, 1);
4615
4616       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4617         {
4618           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4619             {
4620               *cost += rtx_cost (op0, PLUS, 0, speed);
4621             }
4622           else
4623             {
4624               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4625
4626               if (new_op0 == op0
4627                   && GET_CODE (op0) == MULT)
4628                 {
4629                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4630                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4631                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4632                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4633                     {
4634                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4635                                           speed)
4636                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4637                                             speed)
4638                                 + rtx_cost (op1, PLUS, 1, speed));
4639                       if (speed)
4640                         *cost +=
4641                           extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4642                       return true;
4643                     }
4644
4645                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4646                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4647                             + rtx_cost (op1, PLUS, 1, speed));
4648
4649                   if (speed)
4650                     *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4651
4652                   return true;
4653                 }
4654
4655               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4656                         + rtx_cost (op1, PLUS, 1, speed));
4657             }
4658           return true;
4659         }
4660
4661       return false;
4662
4663     case IOR:
4664     case XOR:
4665     case AND:
4666     cost_logic:
4667       op0 = XEXP (x, 0);
4668       op1 = XEXP (x, 1);
4669
4670       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4671         {
4672           if (CONST_INT_P (op1)
4673               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4674             {
4675               *cost += rtx_cost (op0, AND, 0, speed);
4676             }
4677           else
4678             {
4679               if (GET_CODE (op0) == NOT)
4680                 op0 = XEXP (op0, 0);
4681               op0 = aarch64_strip_shift (op0);
4682               *cost += (rtx_cost (op0, AND, 0, speed)
4683                         + rtx_cost (op1, AND, 1, speed));
4684             }
4685           return true;
4686         }
4687       return false;
4688
4689     case ZERO_EXTEND:
4690       if ((GET_MODE (x) == DImode
4691            && GET_MODE (XEXP (x, 0)) == SImode)
4692           || GET_CODE (XEXP (x, 0)) == MEM)
4693         {
4694           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4695           return true;
4696         }
4697       return false;
4698
4699     case SIGN_EXTEND:
4700       if (GET_CODE (XEXP (x, 0)) == MEM)
4701         {
4702           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4703           return true;
4704         }
4705       return false;
4706
4707     case ROTATE:
4708       if (!CONST_INT_P (XEXP (x, 1)))
4709         *cost += COSTS_N_INSNS (2);
4710       /* Fall through.  */
4711     case ROTATERT:
4712     case LSHIFTRT:
4713     case ASHIFT:
4714     case ASHIFTRT:
4715
4716       /* Shifting by a register often takes an extra cycle.  */
4717       if (speed && !CONST_INT_P (XEXP (x, 1)))
4718         *cost += extra_cost->alu.arith_shift_reg;
4719
4720       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4721       return true;
4722
4723     case HIGH:
4724       if (!CONSTANT_P (XEXP (x, 0)))
4725         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4726       return true;
4727
4728     case LO_SUM:
4729       if (!CONSTANT_P (XEXP (x, 1)))
4730         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4731       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4732       return true;
4733
4734     case ZERO_EXTRACT:
4735     case SIGN_EXTRACT:
4736       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4737       return true;
4738
4739     case MULT:
4740       op0 = XEXP (x, 0);
4741       op1 = XEXP (x, 1);
4742
4743       *cost = COSTS_N_INSNS (1);
4744       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4745         {
4746           if (CONST_INT_P (op1)
4747               && exact_log2 (INTVAL (op1)) > 0)
4748             {
4749               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4750               return true;
4751             }
4752
4753           if ((GET_CODE (op0) == ZERO_EXTEND
4754                && GET_CODE (op1) == ZERO_EXTEND)
4755               || (GET_CODE (op0) == SIGN_EXTEND
4756                   && GET_CODE (op1) == SIGN_EXTEND))
4757             {
4758               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4759                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4760               if (speed)
4761                 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4762               return true;
4763             }
4764
4765           if (speed)
4766             *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4767         }
4768       else if (speed)
4769         {
4770           if (GET_MODE (x) == DFmode)
4771             *cost += extra_cost->fp[1].mult;
4772           else if (GET_MODE (x) == SFmode)
4773             *cost += extra_cost->fp[0].mult;
4774         }
4775
4776       return false;  /* All arguments need to be in registers.  */
4777
4778     case MOD:
4779     case UMOD:
4780       *cost = COSTS_N_INSNS (2);
4781       if (speed)
4782         {
4783           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4784             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4785                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4786           else if (GET_MODE (x) == DFmode)
4787             *cost += (extra_cost->fp[1].mult
4788                       + extra_cost->fp[1].div);
4789           else if (GET_MODE (x) == SFmode)
4790             *cost += (extra_cost->fp[0].mult
4791                       + extra_cost->fp[0].div);
4792         }
4793       return false;  /* All arguments need to be in registers.  */
4794
4795     case DIV:
4796     case UDIV:
4797       *cost = COSTS_N_INSNS (1);
4798       if (speed)
4799         {
4800           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4801             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4802           else if (GET_MODE (x) == DFmode)
4803             *cost += extra_cost->fp[1].div;
4804           else if (GET_MODE (x) == SFmode)
4805             *cost += extra_cost->fp[0].div;
4806         }
4807       return false;  /* All arguments need to be in registers.  */
4808
4809     default:
4810       break;
4811     }
4812   return false;
4813 }
4814
4815 static int
4816 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4817                   enum machine_mode mode ATTRIBUTE_UNUSED,
4818                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4819 {
4820   enum rtx_code c  = GET_CODE (x);
4821   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4822
4823   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4824     return addr_cost->pre_modify;
4825
4826   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4827     return addr_cost->post_modify;
4828
4829   if (c == PLUS)
4830     {
4831       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4832         return addr_cost->imm_offset;
4833       else if (GET_CODE (XEXP (x, 0)) == MULT
4834                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4835                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4836         return addr_cost->register_extend;
4837
4838       return addr_cost->register_offset;
4839     }
4840   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4841     return addr_cost->imm_offset;
4842
4843   return 0;
4844 }
4845
4846 static int
4847 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4848                             reg_class_t from, reg_class_t to)
4849 {
4850   const struct cpu_regmove_cost *regmove_cost
4851     = aarch64_tune_params->regmove_cost;
4852
4853   /* Moving between GPR and stack cost is the same as GP2GP.  */
4854   if ((from == GENERAL_REGS && to == STACK_REG)
4855       || (to == GENERAL_REGS && from == STACK_REG))
4856     return regmove_cost->GP2GP;
4857
4858   /* To/From the stack register, we move via the gprs.  */
4859   if (to == STACK_REG || from == STACK_REG)
4860     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
4861             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
4862
4863   if (from == GENERAL_REGS && to == GENERAL_REGS)
4864     return regmove_cost->GP2GP;
4865   else if (from == GENERAL_REGS)
4866     return regmove_cost->GP2FP;
4867   else if (to == GENERAL_REGS)
4868     return regmove_cost->FP2GP;
4869
4870   /* When AdvSIMD instructions are disabled it is not possible to move
4871      a 128-bit value directly between Q registers.  This is handled in
4872      secondary reload.  A general register is used as a scratch to move
4873      the upper DI value and the lower DI value is moved directly,
4874      hence the cost is the sum of three moves. */
4875
4876   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4877     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4878
4879   return regmove_cost->FP2FP;
4880 }
4881
4882 static int
4883 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4884                           reg_class_t rclass ATTRIBUTE_UNUSED,
4885                           bool in ATTRIBUTE_UNUSED)
4886 {
4887   return aarch64_tune_params->memmov_cost;
4888 }
4889
4890 /* Return the number of instructions that can be issued per cycle.  */
4891 static int
4892 aarch64_sched_issue_rate (void)
4893 {
4894   return aarch64_tune_params->issue_rate;
4895 }
4896
4897 /* Vectorizer cost model target hooks.  */
4898
4899 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4900 static int
4901 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4902                                     tree vectype,
4903                                     int misalign ATTRIBUTE_UNUSED)
4904 {
4905   unsigned elements;
4906
4907   switch (type_of_cost)
4908     {
4909       case scalar_stmt:
4910         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4911
4912       case scalar_load:
4913         return aarch64_tune_params->vec_costs->scalar_load_cost;
4914
4915       case scalar_store:
4916         return aarch64_tune_params->vec_costs->scalar_store_cost;
4917
4918       case vector_stmt:
4919         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4920
4921       case vector_load:
4922         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4923
4924       case vector_store:
4925         return aarch64_tune_params->vec_costs->vec_store_cost;
4926
4927       case vec_to_scalar:
4928         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4929
4930       case scalar_to_vec:
4931         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4932
4933       case unaligned_load:
4934         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4935
4936       case unaligned_store:
4937         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4938
4939       case cond_branch_taken:
4940         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4941
4942       case cond_branch_not_taken:
4943         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4944
4945       case vec_perm:
4946       case vec_promote_demote:
4947         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4948
4949       case vec_construct:
4950         elements = TYPE_VECTOR_SUBPARTS (vectype);
4951         return elements / 2 + 1;
4952
4953       default:
4954         gcc_unreachable ();
4955     }
4956 }
4957
4958 /* Implement targetm.vectorize.add_stmt_cost.  */
4959 static unsigned
4960 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4961                        struct _stmt_vec_info *stmt_info, int misalign,
4962                        enum vect_cost_model_location where)
4963 {
4964   unsigned *cost = (unsigned *) data;
4965   unsigned retval = 0;
4966
4967   if (flag_vect_cost_model)
4968     {
4969       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4970       int stmt_cost =
4971             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4972
4973       /* Statements in an inner loop relative to the loop being
4974          vectorized are weighted more heavily.  The value here is
4975          a function (linear for now) of the loop nest level.  */
4976       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4977         {
4978           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4979           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4980           unsigned nest_level = loop_depth (loop);
4981
4982           count *= nest_level;
4983         }
4984
4985       retval = (unsigned) (count * stmt_cost);
4986       cost[where] += retval;
4987     }
4988
4989   return retval;
4990 }
4991
4992 static void initialize_aarch64_code_model (void);
4993
4994 /* Parse the architecture extension string.  */
4995
4996 static void
4997 aarch64_parse_extension (char *str)
4998 {
4999   /* The extension string is parsed left to right.  */
5000   const struct aarch64_option_extension *opt = NULL;
5001
5002   /* Flag to say whether we are adding or removing an extension.  */
5003   int adding_ext = -1;
5004
5005   while (str != NULL && *str != 0)
5006     {
5007       char *ext;
5008       size_t len;
5009
5010       str++;
5011       ext = strchr (str, '+');
5012
5013       if (ext != NULL)
5014         len = ext - str;
5015       else
5016         len = strlen (str);
5017
5018       if (len >= 2 && strncmp (str, "no", 2) == 0)
5019         {
5020           adding_ext = 0;
5021           len -= 2;
5022           str += 2;
5023         }
5024       else if (len > 0)
5025         adding_ext = 1;
5026
5027       if (len == 0)
5028         {
5029           error ("missing feature modifier after %qs", "+no");
5030           return;
5031         }
5032
5033       /* Scan over the extensions table trying to find an exact match.  */
5034       for (opt = all_extensions; opt->name != NULL; opt++)
5035         {
5036           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5037             {
5038               /* Add or remove the extension.  */
5039               if (adding_ext)
5040                 aarch64_isa_flags |= opt->flags_on;
5041               else
5042                 aarch64_isa_flags &= ~(opt->flags_off);
5043               break;
5044             }
5045         }
5046
5047       if (opt->name == NULL)
5048         {
5049           /* Extension not found in list.  */
5050           error ("unknown feature modifier %qs", str);
5051           return;
5052         }
5053
5054       str = ext;
5055     };
5056
5057   return;
5058 }
5059
5060 /* Parse the ARCH string.  */
5061
5062 static void
5063 aarch64_parse_arch (void)
5064 {
5065   char *ext;
5066   const struct processor *arch;
5067   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5068   size_t len;
5069
5070   strcpy (str, aarch64_arch_string);
5071
5072   ext = strchr (str, '+');
5073
5074   if (ext != NULL)
5075     len = ext - str;
5076   else
5077     len = strlen (str);
5078
5079   if (len == 0)
5080     {
5081       error ("missing arch name in -march=%qs", str);
5082       return;
5083     }
5084
5085   /* Loop through the list of supported ARCHs to find a match.  */
5086   for (arch = all_architectures; arch->name != NULL; arch++)
5087     {
5088       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5089         {
5090           selected_arch = arch;
5091           aarch64_isa_flags = selected_arch->flags;
5092
5093           if (!selected_cpu)
5094             selected_cpu = &all_cores[selected_arch->core];
5095
5096           if (ext != NULL)
5097             {
5098               /* ARCH string contains at least one extension.  */
5099               aarch64_parse_extension (ext);
5100             }
5101
5102           if (strcmp (selected_arch->arch, selected_cpu->arch))
5103             {
5104               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5105                        selected_cpu->name, selected_arch->name);
5106             }
5107
5108           return;
5109         }
5110     }
5111
5112   /* ARCH name not found in list.  */
5113   error ("unknown value %qs for -march", str);
5114   return;
5115 }
5116
5117 /* Parse the CPU string.  */
5118
5119 static void
5120 aarch64_parse_cpu (void)
5121 {
5122   char *ext;
5123   const struct processor *cpu;
5124   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5125   size_t len;
5126
5127   strcpy (str, aarch64_cpu_string);
5128
5129   ext = strchr (str, '+');
5130
5131   if (ext != NULL)
5132     len = ext - str;
5133   else
5134     len = strlen (str);
5135
5136   if (len == 0)
5137     {
5138       error ("missing cpu name in -mcpu=%qs", str);
5139       return;
5140     }
5141
5142   /* Loop through the list of supported CPUs to find a match.  */
5143   for (cpu = all_cores; cpu->name != NULL; cpu++)
5144     {
5145       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5146         {
5147           selected_cpu = cpu;
5148           selected_tune = cpu;
5149           aarch64_isa_flags = selected_cpu->flags;
5150
5151           if (ext != NULL)
5152             {
5153               /* CPU string contains at least one extension.  */
5154               aarch64_parse_extension (ext);
5155             }
5156
5157           return;
5158         }
5159     }
5160
5161   /* CPU name not found in list.  */
5162   error ("unknown value %qs for -mcpu", str);
5163   return;
5164 }
5165
5166 /* Parse the TUNE string.  */
5167
5168 static void
5169 aarch64_parse_tune (void)
5170 {
5171   const struct processor *cpu;
5172   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5173   strcpy (str, aarch64_tune_string);
5174
5175   /* Loop through the list of supported CPUs to find a match.  */
5176   for (cpu = all_cores; cpu->name != NULL; cpu++)
5177     {
5178       if (strcmp (cpu->name, str) == 0)
5179         {
5180           selected_tune = cpu;
5181           return;
5182         }
5183     }
5184
5185   /* CPU name not found in list.  */
5186   error ("unknown value %qs for -mtune", str);
5187   return;
5188 }
5189
5190
5191 /* Implement TARGET_OPTION_OVERRIDE.  */
5192
5193 static void
5194 aarch64_override_options (void)
5195 {
5196   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5197      If either of -march or -mtune is given, they override their
5198      respective component of -mcpu.
5199
5200      So, first parse AARCH64_CPU_STRING, then the others, be careful
5201      with -march as, if -mcpu is not present on the command line, march
5202      must set a sensible default CPU.  */
5203   if (aarch64_cpu_string)
5204     {
5205       aarch64_parse_cpu ();
5206     }
5207
5208   if (aarch64_arch_string)
5209     {
5210       aarch64_parse_arch ();
5211     }
5212
5213   if (aarch64_tune_string)
5214     {
5215       aarch64_parse_tune ();
5216     }
5217
5218 #ifndef HAVE_AS_MABI_OPTION
5219   /* The compiler may have been configured with 2.23.* binutils, which does
5220      not have support for ILP32.  */
5221   if (TARGET_ILP32)
5222     error ("Assembler does not support -mabi=ilp32");
5223 #endif
5224
5225   initialize_aarch64_code_model ();
5226
5227   aarch64_build_bitmask_table ();
5228
5229   /* This target defaults to strict volatile bitfields.  */
5230   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5231     flag_strict_volatile_bitfields = 1;
5232
5233   /* If the user did not specify a processor, choose the default
5234      one for them.  This will be the CPU set during configuration using
5235      --with-cpu, otherwise it is "generic".  */
5236   if (!selected_cpu)
5237     {
5238       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5239       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5240     }
5241
5242   gcc_assert (selected_cpu);
5243
5244   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5245   if (!selected_tune)
5246     selected_tune = &all_cores[selected_cpu->core];
5247
5248   aarch64_tune_flags = selected_tune->flags;
5249   aarch64_tune = selected_tune->core;
5250   aarch64_tune_params = selected_tune->tune;
5251
5252   aarch64_override_options_after_change ();
5253 }
5254
5255 /* Implement targetm.override_options_after_change.  */
5256
5257 static void
5258 aarch64_override_options_after_change (void)
5259 {
5260   if (flag_omit_frame_pointer)
5261     flag_omit_leaf_frame_pointer = false;
5262   else if (flag_omit_leaf_frame_pointer)
5263     flag_omit_frame_pointer = true;
5264 }
5265
5266 static struct machine_function *
5267 aarch64_init_machine_status (void)
5268 {
5269   struct machine_function *machine;
5270   machine = ggc_alloc_cleared_machine_function ();
5271   return machine;
5272 }
5273
5274 void
5275 aarch64_init_expanders (void)
5276 {
5277   init_machine_status = aarch64_init_machine_status;
5278 }
5279
5280 /* A checking mechanism for the implementation of the various code models.  */
5281 static void
5282 initialize_aarch64_code_model (void)
5283 {
5284    if (flag_pic)
5285      {
5286        switch (aarch64_cmodel_var)
5287          {
5288          case AARCH64_CMODEL_TINY:
5289            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5290            break;
5291          case AARCH64_CMODEL_SMALL:
5292            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5293            break;
5294          case AARCH64_CMODEL_LARGE:
5295            sorry ("code model %qs with -f%s", "large",
5296                   flag_pic > 1 ? "PIC" : "pic");
5297          default:
5298            gcc_unreachable ();
5299          }
5300      }
5301    else
5302      aarch64_cmodel = aarch64_cmodel_var;
5303 }
5304
5305 /* Return true if SYMBOL_REF X binds locally.  */
5306
5307 static bool
5308 aarch64_symbol_binds_local_p (const_rtx x)
5309 {
5310   return (SYMBOL_REF_DECL (x)
5311           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5312           : SYMBOL_REF_LOCAL_P (x));
5313 }
5314
5315 /* Return true if SYMBOL_REF X is thread local */
5316 static bool
5317 aarch64_tls_symbol_p (rtx x)
5318 {
5319   if (! TARGET_HAVE_TLS)
5320     return false;
5321
5322   if (GET_CODE (x) != SYMBOL_REF)
5323     return false;
5324
5325   return SYMBOL_REF_TLS_MODEL (x) != 0;
5326 }
5327
5328 /* Classify a TLS symbol into one of the TLS kinds.  */
5329 enum aarch64_symbol_type
5330 aarch64_classify_tls_symbol (rtx x)
5331 {
5332   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5333
5334   switch (tls_kind)
5335     {
5336     case TLS_MODEL_GLOBAL_DYNAMIC:
5337     case TLS_MODEL_LOCAL_DYNAMIC:
5338       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5339
5340     case TLS_MODEL_INITIAL_EXEC:
5341       return SYMBOL_SMALL_GOTTPREL;
5342
5343     case TLS_MODEL_LOCAL_EXEC:
5344       return SYMBOL_SMALL_TPREL;
5345
5346     case TLS_MODEL_EMULATED:
5347     case TLS_MODEL_NONE:
5348       return SYMBOL_FORCE_TO_MEM;
5349
5350     default:
5351       gcc_unreachable ();
5352     }
5353 }
5354
5355 /* Return the method that should be used to access SYMBOL_REF or
5356    LABEL_REF X in context CONTEXT.  */
5357
5358 enum aarch64_symbol_type
5359 aarch64_classify_symbol (rtx x,
5360                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5361 {
5362   if (GET_CODE (x) == LABEL_REF)
5363     {
5364       switch (aarch64_cmodel)
5365         {
5366         case AARCH64_CMODEL_LARGE:
5367           return SYMBOL_FORCE_TO_MEM;
5368
5369         case AARCH64_CMODEL_TINY_PIC:
5370         case AARCH64_CMODEL_TINY:
5371           return SYMBOL_TINY_ABSOLUTE;
5372
5373         case AARCH64_CMODEL_SMALL_PIC:
5374         case AARCH64_CMODEL_SMALL:
5375           return SYMBOL_SMALL_ABSOLUTE;
5376
5377         default:
5378           gcc_unreachable ();
5379         }
5380     }
5381
5382   if (GET_CODE (x) == SYMBOL_REF)
5383     {
5384       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5385           return SYMBOL_FORCE_TO_MEM;
5386
5387       if (aarch64_tls_symbol_p (x))
5388         return aarch64_classify_tls_symbol (x);
5389
5390       switch (aarch64_cmodel)
5391         {
5392         case AARCH64_CMODEL_TINY:
5393           if (SYMBOL_REF_WEAK (x))
5394             return SYMBOL_FORCE_TO_MEM;
5395           return SYMBOL_TINY_ABSOLUTE;
5396
5397         case AARCH64_CMODEL_SMALL:
5398           if (SYMBOL_REF_WEAK (x))
5399             return SYMBOL_FORCE_TO_MEM;
5400           return SYMBOL_SMALL_ABSOLUTE;
5401
5402         case AARCH64_CMODEL_TINY_PIC:
5403           if (!aarch64_symbol_binds_local_p (x))
5404             return SYMBOL_TINY_GOT;
5405           return SYMBOL_TINY_ABSOLUTE;
5406
5407         case AARCH64_CMODEL_SMALL_PIC:
5408           if (!aarch64_symbol_binds_local_p (x))
5409             return SYMBOL_SMALL_GOT;
5410           return SYMBOL_SMALL_ABSOLUTE;
5411
5412         default:
5413           gcc_unreachable ();
5414         }
5415     }
5416
5417   /* By default push everything into the constant pool.  */
5418   return SYMBOL_FORCE_TO_MEM;
5419 }
5420
5421 bool
5422 aarch64_constant_address_p (rtx x)
5423 {
5424   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5425 }
5426
5427 bool
5428 aarch64_legitimate_pic_operand_p (rtx x)
5429 {
5430   if (GET_CODE (x) == SYMBOL_REF
5431       || (GET_CODE (x) == CONST
5432           && GET_CODE (XEXP (x, 0)) == PLUS
5433           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5434      return false;
5435
5436   return true;
5437 }
5438
5439 /* Return true if X holds either a quarter-precision or
5440      floating-point +0.0 constant.  */
5441 static bool
5442 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5443 {
5444   if (!CONST_DOUBLE_P (x))
5445     return false;
5446
5447   /* TODO: We could handle moving 0.0 to a TFmode register,
5448      but first we would like to refactor the movtf_aarch64
5449      to be more amicable to split moves properly and
5450      correctly gate on TARGET_SIMD.  For now - reject all
5451      constants which are not to SFmode or DFmode registers.  */
5452   if (!(mode == SFmode || mode == DFmode))
5453     return false;
5454
5455   if (aarch64_float_const_zero_rtx_p (x))
5456     return true;
5457   return aarch64_float_const_representable_p (x);
5458 }
5459
5460 static bool
5461 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5462 {
5463   /* Do not allow vector struct mode constants.  We could support
5464      0 and -1 easily, but they need support in aarch64-simd.md.  */
5465   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5466     return false;
5467
5468   /* This could probably go away because
5469      we now decompose CONST_INTs according to expand_mov_immediate.  */
5470   if ((GET_CODE (x) == CONST_VECTOR
5471        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5472       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5473         return !targetm.cannot_force_const_mem (mode, x);
5474
5475   if (GET_CODE (x) == HIGH
5476       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5477     return true;
5478
5479   return aarch64_constant_address_p (x);
5480 }
5481
5482 rtx
5483 aarch64_load_tp (rtx target)
5484 {
5485   if (!target
5486       || GET_MODE (target) != Pmode
5487       || !register_operand (target, Pmode))
5488     target = gen_reg_rtx (Pmode);
5489
5490   /* Can return in any reg.  */
5491   emit_insn (gen_aarch64_load_tp_hard (target));
5492   return target;
5493 }
5494
5495 /* On AAPCS systems, this is the "struct __va_list".  */
5496 static GTY(()) tree va_list_type;
5497
5498 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5499    Return the type to use as __builtin_va_list.
5500
5501    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5502
5503    struct __va_list
5504    {
5505      void *__stack;
5506      void *__gr_top;
5507      void *__vr_top;
5508      int   __gr_offs;
5509      int   __vr_offs;
5510    };  */
5511
5512 static tree
5513 aarch64_build_builtin_va_list (void)
5514 {
5515   tree va_list_name;
5516   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5517
5518   /* Create the type.  */
5519   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5520   /* Give it the required name.  */
5521   va_list_name = build_decl (BUILTINS_LOCATION,
5522                              TYPE_DECL,
5523                              get_identifier ("__va_list"),
5524                              va_list_type);
5525   DECL_ARTIFICIAL (va_list_name) = 1;
5526   TYPE_NAME (va_list_type) = va_list_name;
5527   TYPE_STUB_DECL (va_list_type) = va_list_name;
5528
5529   /* Create the fields.  */
5530   f_stack = build_decl (BUILTINS_LOCATION,
5531                         FIELD_DECL, get_identifier ("__stack"),
5532                         ptr_type_node);
5533   f_grtop = build_decl (BUILTINS_LOCATION,
5534                         FIELD_DECL, get_identifier ("__gr_top"),
5535                         ptr_type_node);
5536   f_vrtop = build_decl (BUILTINS_LOCATION,
5537                         FIELD_DECL, get_identifier ("__vr_top"),
5538                         ptr_type_node);
5539   f_groff = build_decl (BUILTINS_LOCATION,
5540                         FIELD_DECL, get_identifier ("__gr_offs"),
5541                         integer_type_node);
5542   f_vroff = build_decl (BUILTINS_LOCATION,
5543                         FIELD_DECL, get_identifier ("__vr_offs"),
5544                         integer_type_node);
5545
5546   DECL_ARTIFICIAL (f_stack) = 1;
5547   DECL_ARTIFICIAL (f_grtop) = 1;
5548   DECL_ARTIFICIAL (f_vrtop) = 1;
5549   DECL_ARTIFICIAL (f_groff) = 1;
5550   DECL_ARTIFICIAL (f_vroff) = 1;
5551
5552   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5553   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5554   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5555   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5556   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5557
5558   TYPE_FIELDS (va_list_type) = f_stack;
5559   DECL_CHAIN (f_stack) = f_grtop;
5560   DECL_CHAIN (f_grtop) = f_vrtop;
5561   DECL_CHAIN (f_vrtop) = f_groff;
5562   DECL_CHAIN (f_groff) = f_vroff;
5563
5564   /* Compute its layout.  */
5565   layout_type (va_list_type);
5566
5567   return va_list_type;
5568 }
5569
5570 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5571 static void
5572 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5573 {
5574   const CUMULATIVE_ARGS *cum;
5575   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576   tree stack, grtop, vrtop, groff, vroff;
5577   tree t;
5578   int gr_save_area_size;
5579   int vr_save_area_size;
5580   int vr_offset;
5581
5582   cum = &crtl->args.info;
5583   gr_save_area_size
5584     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5585   vr_save_area_size
5586     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5587
5588   if (TARGET_GENERAL_REGS_ONLY)
5589     {
5590       if (cum->aapcs_nvrn > 0)
5591         sorry ("%qs and floating point or vector arguments",
5592                "-mgeneral-regs-only");
5593       vr_save_area_size = 0;
5594     }
5595
5596   f_stack = TYPE_FIELDS (va_list_type_node);
5597   f_grtop = DECL_CHAIN (f_stack);
5598   f_vrtop = DECL_CHAIN (f_grtop);
5599   f_groff = DECL_CHAIN (f_vrtop);
5600   f_vroff = DECL_CHAIN (f_groff);
5601
5602   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5603                   NULL_TREE);
5604   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5605                   NULL_TREE);
5606   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5607                   NULL_TREE);
5608   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5609                   NULL_TREE);
5610   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5611                   NULL_TREE);
5612
5613   /* Emit code to initialize STACK, which points to the next varargs stack
5614      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5615      by named arguments.  STACK is 8-byte aligned.  */
5616   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5617   if (cum->aapcs_stack_size > 0)
5618     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5619   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5620   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5621
5622   /* Emit code to initialize GRTOP, the top of the GR save area.
5623      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5624   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5625   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5626   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5627
5628   /* Emit code to initialize VRTOP, the top of the VR save area.
5629      This address is gr_save_area_bytes below GRTOP, rounded
5630      down to the next 16-byte boundary.  */
5631   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5632   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5633                              STACK_BOUNDARY / BITS_PER_UNIT);
5634
5635   if (vr_offset)
5636     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5637   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5638   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5639
5640   /* Emit code to initialize GROFF, the offset from GRTOP of the
5641      next GPR argument.  */
5642   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5643               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5644   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5645
5646   /* Likewise emit code to initialize VROFF, the offset from FTOP
5647      of the next VR argument.  */
5648   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5649               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5650   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5651 }
5652
5653 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5654
5655 static tree
5656 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5657                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5658 {
5659   tree addr;
5660   bool indirect_p;
5661   bool is_ha;           /* is HFA or HVA.  */
5662   bool dw_align;        /* double-word align.  */
5663   enum machine_mode ag_mode = VOIDmode;
5664   int nregs;
5665   enum machine_mode mode;
5666
5667   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5668   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5669   HOST_WIDE_INT size, rsize, adjust, align;
5670   tree t, u, cond1, cond2;
5671
5672   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5673   if (indirect_p)
5674     type = build_pointer_type (type);
5675
5676   mode = TYPE_MODE (type);
5677
5678   f_stack = TYPE_FIELDS (va_list_type_node);
5679   f_grtop = DECL_CHAIN (f_stack);
5680   f_vrtop = DECL_CHAIN (f_grtop);
5681   f_groff = DECL_CHAIN (f_vrtop);
5682   f_vroff = DECL_CHAIN (f_groff);
5683
5684   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5685                   f_stack, NULL_TREE);
5686   size = int_size_in_bytes (type);
5687   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5688
5689   dw_align = false;
5690   adjust = 0;
5691   if (aarch64_vfp_is_call_or_return_candidate (mode,
5692                                                type,
5693                                                &ag_mode,
5694                                                &nregs,
5695                                                &is_ha))
5696     {
5697       /* TYPE passed in fp/simd registers.  */
5698       if (TARGET_GENERAL_REGS_ONLY)
5699         sorry ("%qs and floating point or vector arguments",
5700                "-mgeneral-regs-only");
5701
5702       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5703                       unshare_expr (valist), f_vrtop, NULL_TREE);
5704       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5705                       unshare_expr (valist), f_vroff, NULL_TREE);
5706
5707       rsize = nregs * UNITS_PER_VREG;
5708
5709       if (is_ha)
5710         {
5711           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5712             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5713         }
5714       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5715                && size < UNITS_PER_VREG)
5716         {
5717           adjust = UNITS_PER_VREG - size;
5718         }
5719     }
5720   else
5721     {
5722       /* TYPE passed in general registers.  */
5723       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5724                       unshare_expr (valist), f_grtop, NULL_TREE);
5725       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5726                       unshare_expr (valist), f_groff, NULL_TREE);
5727       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5728       nregs = rsize / UNITS_PER_WORD;
5729
5730       if (align > 8)
5731         dw_align = true;
5732
5733       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5734           && size < UNITS_PER_WORD)
5735         {
5736           adjust = UNITS_PER_WORD  - size;
5737         }
5738     }
5739
5740   /* Get a local temporary for the field value.  */
5741   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5742
5743   /* Emit code to branch if off >= 0.  */
5744   t = build2 (GE_EXPR, boolean_type_node, off,
5745               build_int_cst (TREE_TYPE (off), 0));
5746   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5747
5748   if (dw_align)
5749     {
5750       /* Emit: offs = (offs + 15) & -16.  */
5751       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5752                   build_int_cst (TREE_TYPE (off), 15));
5753       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5754                   build_int_cst (TREE_TYPE (off), -16));
5755       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5756     }
5757   else
5758     roundup = NULL;
5759
5760   /* Update ap.__[g|v]r_offs  */
5761   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5762               build_int_cst (TREE_TYPE (off), rsize));
5763   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5764
5765   /* String up.  */
5766   if (roundup)
5767     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5768
5769   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5770   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5771               build_int_cst (TREE_TYPE (f_off), 0));
5772   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5773
5774   /* String up: make sure the assignment happens before the use.  */
5775   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5776   COND_EXPR_ELSE (cond1) = t;
5777
5778   /* Prepare the trees handling the argument that is passed on the stack;
5779      the top level node will store in ON_STACK.  */
5780   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5781   if (align > 8)
5782     {
5783       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5784       t = fold_convert (intDI_type_node, arg);
5785       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5786                   build_int_cst (TREE_TYPE (t), 15));
5787       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5788                   build_int_cst (TREE_TYPE (t), -16));
5789       t = fold_convert (TREE_TYPE (arg), t);
5790       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5791     }
5792   else
5793     roundup = NULL;
5794   /* Advance ap.__stack  */
5795   t = fold_convert (intDI_type_node, arg);
5796   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5797               build_int_cst (TREE_TYPE (t), size + 7));
5798   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5799               build_int_cst (TREE_TYPE (t), -8));
5800   t = fold_convert (TREE_TYPE (arg), t);
5801   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5802   /* String up roundup and advance.  */
5803   if (roundup)
5804     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5805   /* String up with arg */
5806   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5807   /* Big-endianness related address adjustment.  */
5808   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5809       && size < UNITS_PER_WORD)
5810   {
5811     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5812                 size_int (UNITS_PER_WORD - size));
5813     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5814   }
5815
5816   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5817   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5818
5819   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5820   t = off;
5821   if (adjust)
5822     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5823                 build_int_cst (TREE_TYPE (off), adjust));
5824
5825   t = fold_convert (sizetype, t);
5826   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5827
5828   if (is_ha)
5829     {
5830       /* type ha; // treat as "struct {ftype field[n];}"
5831          ... [computing offs]
5832          for (i = 0; i <nregs; ++i, offs += 16)
5833            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5834          return ha;  */
5835       int i;
5836       tree tmp_ha, field_t, field_ptr_t;
5837
5838       /* Declare a local variable.  */
5839       tmp_ha = create_tmp_var_raw (type, "ha");
5840       gimple_add_tmp_var (tmp_ha);
5841
5842       /* Establish the base type.  */
5843       switch (ag_mode)
5844         {
5845         case SFmode:
5846           field_t = float_type_node;
5847           field_ptr_t = float_ptr_type_node;
5848           break;
5849         case DFmode:
5850           field_t = double_type_node;
5851           field_ptr_t = double_ptr_type_node;
5852           break;
5853         case TFmode:
5854           field_t = long_double_type_node;
5855           field_ptr_t = long_double_ptr_type_node;
5856           break;
5857 /* The half precision and quad precision are not fully supported yet.  Enable
5858    the following code after the support is complete.  Need to find the correct
5859    type node for __fp16 *.  */
5860 #if 0
5861         case HFmode:
5862           field_t = float_type_node;
5863           field_ptr_t = float_ptr_type_node;
5864           break;
5865 #endif
5866         case V2SImode:
5867         case V4SImode:
5868             {
5869               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5870               field_t = build_vector_type_for_mode (innertype, ag_mode);
5871               field_ptr_t = build_pointer_type (field_t);
5872             }
5873           break;
5874         default:
5875           gcc_assert (0);
5876         }
5877
5878       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5879       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5880       addr = t;
5881       t = fold_convert (field_ptr_t, addr);
5882       t = build2 (MODIFY_EXPR, field_t,
5883                   build1 (INDIRECT_REF, field_t, tmp_ha),
5884                   build1 (INDIRECT_REF, field_t, t));
5885
5886       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5887       for (i = 1; i < nregs; ++i)
5888         {
5889           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5890           u = fold_convert (field_ptr_t, addr);
5891           u = build2 (MODIFY_EXPR, field_t,
5892                       build2 (MEM_REF, field_t, tmp_ha,
5893                               build_int_cst (field_ptr_t,
5894                                              (i *
5895                                               int_size_in_bytes (field_t)))),
5896                       build1 (INDIRECT_REF, field_t, u));
5897           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5898         }
5899
5900       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5901       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5902     }
5903
5904   COND_EXPR_ELSE (cond2) = t;
5905   addr = fold_convert (build_pointer_type (type), cond1);
5906   addr = build_va_arg_indirect_ref (addr);
5907
5908   if (indirect_p)
5909     addr = build_va_arg_indirect_ref (addr);
5910
5911   return addr;
5912 }
5913
5914 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5915
5916 static void
5917 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5918                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5919                                 int no_rtl)
5920 {
5921   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5922   CUMULATIVE_ARGS local_cum;
5923   int gr_saved, vr_saved;
5924
5925   /* The caller has advanced CUM up to, but not beyond, the last named
5926      argument.  Advance a local copy of CUM past the last "real" named
5927      argument, to find out how many registers are left over.  */
5928   local_cum = *cum;
5929   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5930
5931   /* Found out how many registers we need to save.  */
5932   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5933   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5934
5935   if (TARGET_GENERAL_REGS_ONLY)
5936     {
5937       if (local_cum.aapcs_nvrn > 0)
5938         sorry ("%qs and floating point or vector arguments",
5939                "-mgeneral-regs-only");
5940       vr_saved = 0;
5941     }
5942
5943   if (!no_rtl)
5944     {
5945       if (gr_saved > 0)
5946         {
5947           rtx ptr, mem;
5948
5949           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5950           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5951                                - gr_saved * UNITS_PER_WORD);
5952           mem = gen_frame_mem (BLKmode, ptr);
5953           set_mem_alias_set (mem, get_varargs_alias_set ());
5954
5955           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5956                                mem, gr_saved);
5957         }
5958       if (vr_saved > 0)
5959         {
5960           /* We can't use move_block_from_reg, because it will use
5961              the wrong mode, storing D regs only.  */
5962           enum machine_mode mode = TImode;
5963           int off, i;
5964
5965           /* Set OFF to the offset from virtual_incoming_args_rtx of
5966              the first vector register.  The VR save area lies below
5967              the GR one, and is aligned to 16 bytes.  */
5968           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5969                                    STACK_BOUNDARY / BITS_PER_UNIT);
5970           off -= vr_saved * UNITS_PER_VREG;
5971
5972           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5973             {
5974               rtx ptr, mem;
5975
5976               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5977               mem = gen_frame_mem (mode, ptr);
5978               set_mem_alias_set (mem, get_varargs_alias_set ());
5979               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5980               off += UNITS_PER_VREG;
5981             }
5982         }
5983     }
5984
5985   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5986      any complication of having crtl->args.pretend_args_size changed.  */
5987   cfun->machine->saved_varargs_size
5988     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5989                       STACK_BOUNDARY / BITS_PER_UNIT)
5990        + vr_saved * UNITS_PER_VREG);
5991 }
5992
5993 static void
5994 aarch64_conditional_register_usage (void)
5995 {
5996   int i;
5997   if (!TARGET_FLOAT)
5998     {
5999       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6000         {
6001           fixed_regs[i] = 1;
6002           call_used_regs[i] = 1;
6003         }
6004     }
6005 }
6006
6007 /* Walk down the type tree of TYPE counting consecutive base elements.
6008    If *MODEP is VOIDmode, then set it to the first valid floating point
6009    type.  If a non-floating point type is found, or if a floating point
6010    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6011    otherwise return the count in the sub-tree.  */
6012 static int
6013 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6014 {
6015   enum machine_mode mode;
6016   HOST_WIDE_INT size;
6017
6018   switch (TREE_CODE (type))
6019     {
6020     case REAL_TYPE:
6021       mode = TYPE_MODE (type);
6022       if (mode != DFmode && mode != SFmode && mode != TFmode)
6023         return -1;
6024
6025       if (*modep == VOIDmode)
6026         *modep = mode;
6027
6028       if (*modep == mode)
6029         return 1;
6030
6031       break;
6032
6033     case COMPLEX_TYPE:
6034       mode = TYPE_MODE (TREE_TYPE (type));
6035       if (mode != DFmode && mode != SFmode && mode != TFmode)
6036         return -1;
6037
6038       if (*modep == VOIDmode)
6039         *modep = mode;
6040
6041       if (*modep == mode)
6042         return 2;
6043
6044       break;
6045
6046     case VECTOR_TYPE:
6047       /* Use V2SImode and V4SImode as representatives of all 64-bit
6048          and 128-bit vector types.  */
6049       size = int_size_in_bytes (type);
6050       switch (size)
6051         {
6052         case 8:
6053           mode = V2SImode;
6054           break;
6055         case 16:
6056           mode = V4SImode;
6057           break;
6058         default:
6059           return -1;
6060         }
6061
6062       if (*modep == VOIDmode)
6063         *modep = mode;
6064
6065       /* Vector modes are considered to be opaque: two vectors are
6066          equivalent for the purposes of being homogeneous aggregates
6067          if they are the same size.  */
6068       if (*modep == mode)
6069         return 1;
6070
6071       break;
6072
6073     case ARRAY_TYPE:
6074       {
6075         int count;
6076         tree index = TYPE_DOMAIN (type);
6077
6078         /* Can't handle incomplete types.  */
6079         if (!COMPLETE_TYPE_P (type))
6080           return -1;
6081
6082         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6083         if (count == -1
6084             || !index
6085             || !TYPE_MAX_VALUE (index)
6086             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6087             || !TYPE_MIN_VALUE (index)
6088             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6089             || count < 0)
6090           return -1;
6091
6092         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6093                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6094
6095         /* There must be no padding.  */
6096         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6097             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6098                 != count * GET_MODE_BITSIZE (*modep)))
6099           return -1;
6100
6101         return count;
6102       }
6103
6104     case RECORD_TYPE:
6105       {
6106         int count = 0;
6107         int sub_count;
6108         tree field;
6109
6110         /* Can't handle incomplete types.  */
6111         if (!COMPLETE_TYPE_P (type))
6112           return -1;
6113
6114         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6115           {
6116             if (TREE_CODE (field) != FIELD_DECL)
6117               continue;
6118
6119             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6120             if (sub_count < 0)
6121               return -1;
6122             count += sub_count;
6123           }
6124
6125         /* There must be no padding.  */
6126         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6127             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6128                 != count * GET_MODE_BITSIZE (*modep)))
6129           return -1;
6130
6131         return count;
6132       }
6133
6134     case UNION_TYPE:
6135     case QUAL_UNION_TYPE:
6136       {
6137         /* These aren't very interesting except in a degenerate case.  */
6138         int count = 0;
6139         int sub_count;
6140         tree field;
6141
6142         /* Can't handle incomplete types.  */
6143         if (!COMPLETE_TYPE_P (type))
6144           return -1;
6145
6146         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6147           {
6148             if (TREE_CODE (field) != FIELD_DECL)
6149               continue;
6150
6151             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6152             if (sub_count < 0)
6153               return -1;
6154             count = count > sub_count ? count : sub_count;
6155           }
6156
6157         /* There must be no padding.  */
6158         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6159             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6160                 != count * GET_MODE_BITSIZE (*modep)))
6161           return -1;
6162
6163         return count;
6164       }
6165
6166     default:
6167       break;
6168     }
6169
6170   return -1;
6171 }
6172
6173 /* Return true if we use LRA instead of reload pass.  */
6174 static bool
6175 aarch64_lra_p (void)
6176 {
6177   return aarch64_lra_flag;
6178 }
6179
6180 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6181    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6182    array types.  The C99 floating-point complex types are also considered
6183    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6184    types, which are GCC extensions and out of the scope of AAPCS64, are
6185    treated as composite types here as well.
6186
6187    Note that MODE itself is not sufficient in determining whether a type
6188    is such a composite type or not.  This is because
6189    stor-layout.c:compute_record_mode may have already changed the MODE
6190    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6191    structure with only one field may have its MODE set to the mode of the
6192    field.  Also an integer mode whose size matches the size of the
6193    RECORD_TYPE type may be used to substitute the original mode
6194    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6195    solely relied on.  */
6196
6197 static bool
6198 aarch64_composite_type_p (const_tree type,
6199                           enum machine_mode mode)
6200 {
6201   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6202     return true;
6203
6204   if (mode == BLKmode
6205       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6206       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6207     return true;
6208
6209   return false;
6210 }
6211
6212 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6213    type as described in AAPCS64 \S 4.1.2.
6214
6215    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6216
6217 static bool
6218 aarch64_short_vector_p (const_tree type,
6219                         enum machine_mode mode)
6220 {
6221   HOST_WIDE_INT size = -1;
6222
6223   if (type && TREE_CODE (type) == VECTOR_TYPE)
6224     size = int_size_in_bytes (type);
6225   else if (!aarch64_composite_type_p (type, mode)
6226            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6227                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6228     size = GET_MODE_SIZE (mode);
6229
6230   return (size == 8 || size == 16) ? true : false;
6231 }
6232
6233 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6234    shall be passed or returned in simd/fp register(s) (providing these
6235    parameter passing registers are available).
6236
6237    Upon successful return, *COUNT returns the number of needed registers,
6238    *BASE_MODE returns the mode of the individual register and when IS_HAF
6239    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6240    floating-point aggregate or a homogeneous short-vector aggregate.  */
6241
6242 static bool
6243 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6244                                          const_tree type,
6245                                          enum machine_mode *base_mode,
6246                                          int *count,
6247                                          bool *is_ha)
6248 {
6249   enum machine_mode new_mode = VOIDmode;
6250   bool composite_p = aarch64_composite_type_p (type, mode);
6251
6252   if (is_ha != NULL) *is_ha = false;
6253
6254   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6255       || aarch64_short_vector_p (type, mode))
6256     {
6257       *count = 1;
6258       new_mode = mode;
6259     }
6260   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6261     {
6262       if (is_ha != NULL) *is_ha = true;
6263       *count = 2;
6264       new_mode = GET_MODE_INNER (mode);
6265     }
6266   else if (type && composite_p)
6267     {
6268       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6269
6270       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6271         {
6272           if (is_ha != NULL) *is_ha = true;
6273           *count = ag_count;
6274         }
6275       else
6276         return false;
6277     }
6278   else
6279     return false;
6280
6281   *base_mode = new_mode;
6282   return true;
6283 }
6284
6285 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6286
6287 static rtx
6288 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6289                           int incoming ATTRIBUTE_UNUSED)
6290 {
6291   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6292 }
6293
6294 /* Implements target hook vector_mode_supported_p.  */
6295 static bool
6296 aarch64_vector_mode_supported_p (enum machine_mode mode)
6297 {
6298   if (TARGET_SIMD
6299       && (mode == V4SImode  || mode == V8HImode
6300           || mode == V16QImode || mode == V2DImode
6301           || mode == V2SImode  || mode == V4HImode
6302           || mode == V8QImode || mode == V2SFmode
6303           || mode == V4SFmode || mode == V2DFmode))
6304     return true;
6305
6306   return false;
6307 }
6308
6309 /* Return appropriate SIMD container
6310    for MODE within a vector of WIDTH bits.  */
6311 static enum machine_mode
6312 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6313 {
6314   gcc_assert (width == 64 || width == 128);
6315   if (TARGET_SIMD)
6316     {
6317       if (width == 128)
6318         switch (mode)
6319           {
6320           case DFmode:
6321             return V2DFmode;
6322           case SFmode:
6323             return V4SFmode;
6324           case SImode:
6325             return V4SImode;
6326           case HImode:
6327             return V8HImode;
6328           case QImode:
6329             return V16QImode;
6330           case DImode:
6331             return V2DImode;
6332           default:
6333             break;
6334           }
6335       else
6336         switch (mode)
6337           {
6338           case SFmode:
6339             return V2SFmode;
6340           case SImode:
6341             return V2SImode;
6342           case HImode:
6343             return V4HImode;
6344           case QImode:
6345             return V8QImode;
6346           default:
6347             break;
6348           }
6349     }
6350   return word_mode;
6351 }
6352
6353 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6354 static enum machine_mode
6355 aarch64_preferred_simd_mode (enum machine_mode mode)
6356 {
6357   return aarch64_simd_container_mode (mode, 128);
6358 }
6359
6360 /* Return the bitmask of possible vector sizes for the vectorizer
6361    to iterate over.  */
6362 static unsigned int
6363 aarch64_autovectorize_vector_sizes (void)
6364 {
6365   return (16 | 8);
6366 }
6367
6368 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6369    vector types in order to conform to the AAPCS64 (see "Procedure
6370    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6371    qualify for emission with the mangled names defined in that document,
6372    a vector type must not only be of the correct mode but also be
6373    composed of AdvSIMD vector element types (e.g.
6374    _builtin_aarch64_simd_qi); these types are registered by
6375    aarch64_init_simd_builtins ().  In other words, vector types defined
6376    in other ways e.g. via vector_size attribute will get default
6377    mangled names.  */
6378 typedef struct
6379 {
6380   enum machine_mode mode;
6381   const char *element_type_name;
6382   const char *mangled_name;
6383 } aarch64_simd_mangle_map_entry;
6384
6385 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6386   /* 64-bit containerized types.  */
6387   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6388   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6389   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6390   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6391   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6392   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6393   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6394   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6395   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6396   /* 128-bit containerized types.  */
6397   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6398   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6399   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6400   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6401   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6402   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6403   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6404   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6405   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6406   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6407   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6408   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6409   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6410   { VOIDmode, NULL, NULL }
6411 };
6412
6413 /* Implement TARGET_MANGLE_TYPE.  */
6414
6415 static const char *
6416 aarch64_mangle_type (const_tree type)
6417 {
6418   /* The AArch64 ABI documents say that "__va_list" has to be
6419      managled as if it is in the "std" namespace.  */
6420   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6421     return "St9__va_list";
6422
6423   /* Check the mode of the vector type, and the name of the vector
6424      element type, against the table.  */
6425   if (TREE_CODE (type) == VECTOR_TYPE)
6426     {
6427       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6428
6429       while (pos->mode != VOIDmode)
6430         {
6431           tree elt_type = TREE_TYPE (type);
6432
6433           if (pos->mode == TYPE_MODE (type)
6434               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6435               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6436                           pos->element_type_name))
6437             return pos->mangled_name;
6438
6439           pos++;
6440         }
6441     }
6442
6443   /* Use the default mangling.  */
6444   return NULL;
6445 }
6446
6447 /* Return the equivalent letter for size.  */
6448 static char
6449 sizetochar (int size)
6450 {
6451   switch (size)
6452     {
6453     case 64: return 'd';
6454     case 32: return 's';
6455     case 16: return 'h';
6456     case 8 : return 'b';
6457     default: gcc_unreachable ();
6458     }
6459 }
6460
6461 /* Return true iff x is a uniform vector of floating-point
6462    constants, and the constant can be represented in
6463    quarter-precision form.  Note, as aarch64_float_const_representable
6464    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6465 static bool
6466 aarch64_vect_float_const_representable_p (rtx x)
6467 {
6468   int i = 0;
6469   REAL_VALUE_TYPE r0, ri;
6470   rtx x0, xi;
6471
6472   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6473     return false;
6474
6475   x0 = CONST_VECTOR_ELT (x, 0);
6476   if (!CONST_DOUBLE_P (x0))
6477     return false;
6478
6479   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6480
6481   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6482     {
6483       xi = CONST_VECTOR_ELT (x, i);
6484       if (!CONST_DOUBLE_P (xi))
6485         return false;
6486
6487       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6488       if (!REAL_VALUES_EQUAL (r0, ri))
6489         return false;
6490     }
6491
6492   return aarch64_float_const_representable_p (x0);
6493 }
6494
6495 /* Return true for valid and false for invalid.  */
6496 bool
6497 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6498                               struct simd_immediate_info *info)
6499 {
6500 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6501   matches = 1;                                          \
6502   for (i = 0; i < idx; i += (STRIDE))                   \
6503     if (!(TEST))                                        \
6504       matches = 0;                                      \
6505   if (matches)                                          \
6506     {                                                   \
6507       immtype = (CLASS);                                \
6508       elsize = (ELSIZE);                                \
6509       eshift = (SHIFT);                                 \
6510       emvn = (NEG);                                     \
6511       break;                                            \
6512     }
6513
6514   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6515   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6516   unsigned char bytes[16];
6517   int immtype = -1, matches;
6518   unsigned int invmask = inverse ? 0xff : 0;
6519   int eshift, emvn;
6520
6521   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6522     {
6523       if (! (aarch64_simd_imm_zero_p (op, mode)
6524              || aarch64_vect_float_const_representable_p (op)))
6525         return false;
6526
6527       if (info)
6528         {
6529           info->value = CONST_VECTOR_ELT (op, 0);
6530           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6531           info->mvn = false;
6532           info->shift = 0;
6533         }
6534
6535       return true;
6536     }
6537
6538   /* Splat vector constant out into a byte vector.  */
6539   for (i = 0; i < n_elts; i++)
6540     {
6541       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
6542          it must be laid out in the vector register in reverse order.  */
6543       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
6544       unsigned HOST_WIDE_INT elpart;
6545       unsigned int part, parts;
6546
6547       if (GET_CODE (el) == CONST_INT)
6548         {
6549           elpart = INTVAL (el);
6550           parts = 1;
6551         }
6552       else if (GET_CODE (el) == CONST_DOUBLE)
6553         {
6554           elpart = CONST_DOUBLE_LOW (el);
6555           parts = 2;
6556         }
6557       else
6558         gcc_unreachable ();
6559
6560       for (part = 0; part < parts; part++)
6561         {
6562           unsigned int byte;
6563           for (byte = 0; byte < innersize; byte++)
6564             {
6565               bytes[idx++] = (elpart & 0xff) ^ invmask;
6566               elpart >>= BITS_PER_UNIT;
6567             }
6568           if (GET_CODE (el) == CONST_DOUBLE)
6569             elpart = CONST_DOUBLE_HIGH (el);
6570         }
6571     }
6572
6573   /* Sanity check.  */
6574   gcc_assert (idx == GET_MODE_SIZE (mode));
6575
6576   do
6577     {
6578       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6579              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6580
6581       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6582              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6583
6584       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6585              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6586
6587       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6588              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6589
6590       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6591
6592       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6593
6594       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6595              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6596
6597       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6598              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6599
6600       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6601              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6602
6603       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6604              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6605
6606       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6607
6608       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6609
6610       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6611              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6612
6613       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6614              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6615
6616       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6617              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6618
6619       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6620              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6621
6622       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6623
6624       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6625              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6626     }
6627   while (0);
6628
6629   if (immtype == -1)
6630     return false;
6631
6632   if (info)
6633     {
6634       info->element_width = elsize;
6635       info->mvn = emvn != 0;
6636       info->shift = eshift;
6637
6638       unsigned HOST_WIDE_INT imm = 0;
6639
6640       if (immtype >= 12 && immtype <= 15)
6641         info->msl = true;
6642
6643       /* Un-invert bytes of recognized vector, if necessary.  */
6644       if (invmask != 0)
6645         for (i = 0; i < idx; i++)
6646           bytes[i] ^= invmask;
6647
6648       if (immtype == 17)
6649         {
6650           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6651           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6652
6653           for (i = 0; i < 8; i++)
6654             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6655               << (i * BITS_PER_UNIT);
6656
6657
6658           info->value = GEN_INT (imm);
6659         }
6660       else
6661         {
6662           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6663             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6664
6665           /* Construct 'abcdefgh' because the assembler cannot handle
6666              generic constants.  */
6667           if (info->mvn)
6668             imm = ~imm;
6669           imm = (imm >> info->shift) & 0xff;
6670           info->value = GEN_INT (imm);
6671         }
6672     }
6673
6674   return true;
6675 #undef CHECK
6676 }
6677
6678 static bool
6679 aarch64_const_vec_all_same_int_p (rtx x,
6680                                   HOST_WIDE_INT minval,
6681                                   HOST_WIDE_INT maxval)
6682 {
6683   HOST_WIDE_INT firstval;
6684   int count, i;
6685
6686   if (GET_CODE (x) != CONST_VECTOR
6687       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6688     return false;
6689
6690   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6691   if (firstval < minval || firstval > maxval)
6692     return false;
6693
6694   count = CONST_VECTOR_NUNITS (x);
6695   for (i = 1; i < count; i++)
6696     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6697       return false;
6698
6699   return true;
6700 }
6701
6702 /* Check of immediate shift constants are within range.  */
6703 bool
6704 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6705 {
6706   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6707   if (left)
6708     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6709   else
6710     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6711 }
6712
6713 /* Return true if X is a uniform vector where all elements
6714    are either the floating-point constant 0.0 or the
6715    integer constant 0.  */
6716 bool
6717 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6718 {
6719   return x == CONST0_RTX (mode);
6720 }
6721
6722 bool
6723 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6724 {
6725   HOST_WIDE_INT imm = INTVAL (x);
6726   int i;
6727
6728   for (i = 0; i < 8; i++)
6729     {
6730       unsigned int byte = imm & 0xff;
6731       if (byte != 0xff && byte != 0)
6732        return false;
6733       imm >>= 8;
6734     }
6735
6736   return true;
6737 }
6738
6739 bool
6740 aarch64_mov_operand_p (rtx x,
6741                        enum aarch64_symbol_context context,
6742                        enum machine_mode mode)
6743 {
6744   if (GET_CODE (x) == HIGH
6745       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6746     return true;
6747
6748   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6749     return true;
6750
6751   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6752     return true;
6753
6754   return aarch64_classify_symbolic_expression (x, context)
6755     == SYMBOL_TINY_ABSOLUTE;
6756 }
6757
6758 /* Return a const_int vector of VAL.  */
6759 rtx
6760 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6761 {
6762   int nunits = GET_MODE_NUNITS (mode);
6763   rtvec v = rtvec_alloc (nunits);
6764   int i;
6765
6766   for (i=0; i < nunits; i++)
6767     RTVEC_ELT (v, i) = GEN_INT (val);
6768
6769   return gen_rtx_CONST_VECTOR (mode, v);
6770 }
6771
6772 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6773
6774 bool
6775 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6776 {
6777   enum machine_mode vmode;
6778
6779   gcc_assert (!VECTOR_MODE_P (mode));
6780   vmode = aarch64_preferred_simd_mode (mode);
6781   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6782   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6783 }
6784
6785 /* Construct and return a PARALLEL RTX vector.  */
6786 rtx
6787 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6788 {
6789   int nunits = GET_MODE_NUNITS (mode);
6790   rtvec v = rtvec_alloc (nunits / 2);
6791   int base = high ? nunits / 2 : 0;
6792   rtx t1;
6793   int i;
6794
6795   for (i=0; i < nunits / 2; i++)
6796     RTVEC_ELT (v, i) = GEN_INT (base + i);
6797
6798   t1 = gen_rtx_PARALLEL (mode, v);
6799   return t1;
6800 }
6801
6802 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6803    HIGH (exclusive).  */
6804 void
6805 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6806 {
6807   HOST_WIDE_INT lane;
6808   gcc_assert (GET_CODE (operand) == CONST_INT);
6809   lane = INTVAL (operand);
6810
6811   if (lane < low || lane >= high)
6812     error ("lane out of range");
6813 }
6814
6815 void
6816 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6817 {
6818   gcc_assert (GET_CODE (operand) == CONST_INT);
6819   HOST_WIDE_INT lane = INTVAL (operand);
6820
6821   if (lane < low || lane >= high)
6822     error ("constant out of range");
6823 }
6824
6825 /* Emit code to reinterpret one AdvSIMD type as another,
6826    without altering bits.  */
6827 void
6828 aarch64_simd_reinterpret (rtx dest, rtx src)
6829 {
6830   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6831 }
6832
6833 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6834    registers).  */
6835 void
6836 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6837                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6838                             rtx op1)
6839 {
6840   rtx mem = gen_rtx_MEM (mode, destaddr);
6841   rtx tmp1 = gen_reg_rtx (mode);
6842   rtx tmp2 = gen_reg_rtx (mode);
6843
6844   emit_insn (intfn (tmp1, op1, tmp2));
6845
6846   emit_move_insn (mem, tmp1);
6847   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6848   emit_move_insn (mem, tmp2);
6849 }
6850
6851 /* Return TRUE if OP is a valid vector addressing mode.  */
6852 bool
6853 aarch64_simd_mem_operand_p (rtx op)
6854 {
6855   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6856                         || GET_CODE (XEXP (op, 0)) == REG);
6857 }
6858
6859 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6860    not to early-clobber SRC registers in the process.
6861
6862    We assume that the operands described by SRC and DEST represent a
6863    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6864    number of components into which the copy has been decomposed.  */
6865 void
6866 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6867                                 rtx *src, unsigned int count)
6868 {
6869   unsigned int i;
6870
6871   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6872       || REGNO (operands[0]) < REGNO (operands[1]))
6873     {
6874       for (i = 0; i < count; i++)
6875         {
6876           operands[2 * i] = dest[i];
6877           operands[2 * i + 1] = src[i];
6878         }
6879     }
6880   else
6881     {
6882       for (i = 0; i < count; i++)
6883         {
6884           operands[2 * i] = dest[count - i - 1];
6885           operands[2 * i + 1] = src[count - i - 1];
6886         }
6887     }
6888 }
6889
6890 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6891    one of VSTRUCT modes: OI, CI or XI.  */
6892 int
6893 aarch64_simd_attr_length_move (rtx insn)
6894 {
6895   enum machine_mode mode;
6896
6897   extract_insn_cached (insn);
6898
6899   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6900     {
6901       mode = GET_MODE (recog_data.operand[0]);
6902       switch (mode)
6903         {
6904         case OImode:
6905           return 8;
6906         case CImode:
6907           return 12;
6908         case XImode:
6909           return 16;
6910         default:
6911           gcc_unreachable ();
6912         }
6913     }
6914   return 4;
6915 }
6916
6917 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6918    alignment of a vector to 128 bits.  */
6919 static HOST_WIDE_INT
6920 aarch64_simd_vector_alignment (const_tree type)
6921 {
6922   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6923   return MIN (align, 128);
6924 }
6925
6926 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6927 static bool
6928 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6929 {
6930   if (is_packed)
6931     return false;
6932
6933   /* We guarantee alignment for vectors up to 128-bits.  */
6934   if (tree_int_cst_compare (TYPE_SIZE (type),
6935                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6936     return false;
6937
6938   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6939   return true;
6940 }
6941
6942 /* If VALS is a vector constant that can be loaded into a register
6943    using DUP, generate instructions to do so and return an RTX to
6944    assign to the register.  Otherwise return NULL_RTX.  */
6945 static rtx
6946 aarch64_simd_dup_constant (rtx vals)
6947 {
6948   enum machine_mode mode = GET_MODE (vals);
6949   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6950   int n_elts = GET_MODE_NUNITS (mode);
6951   bool all_same = true;
6952   rtx x;
6953   int i;
6954
6955   if (GET_CODE (vals) != CONST_VECTOR)
6956     return NULL_RTX;
6957
6958   for (i = 1; i < n_elts; ++i)
6959     {
6960       x = CONST_VECTOR_ELT (vals, i);
6961       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6962         all_same = false;
6963     }
6964
6965   if (!all_same)
6966     return NULL_RTX;
6967
6968   /* We can load this constant by using DUP and a constant in a
6969      single ARM register.  This will be cheaper than a vector
6970      load.  */
6971   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6972   return gen_rtx_VEC_DUPLICATE (mode, x);
6973 }
6974
6975
6976 /* Generate code to load VALS, which is a PARALLEL containing only
6977    constants (for vec_init) or CONST_VECTOR, efficiently into a
6978    register.  Returns an RTX to copy into the register, or NULL_RTX
6979    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6980 static rtx
6981 aarch64_simd_make_constant (rtx vals)
6982 {
6983   enum machine_mode mode = GET_MODE (vals);
6984   rtx const_dup;
6985   rtx const_vec = NULL_RTX;
6986   int n_elts = GET_MODE_NUNITS (mode);
6987   int n_const = 0;
6988   int i;
6989
6990   if (GET_CODE (vals) == CONST_VECTOR)
6991     const_vec = vals;
6992   else if (GET_CODE (vals) == PARALLEL)
6993     {
6994       /* A CONST_VECTOR must contain only CONST_INTs and
6995          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6996          Only store valid constants in a CONST_VECTOR.  */
6997       for (i = 0; i < n_elts; ++i)
6998         {
6999           rtx x = XVECEXP (vals, 0, i);
7000           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7001             n_const++;
7002         }
7003       if (n_const == n_elts)
7004         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7005     }
7006   else
7007     gcc_unreachable ();
7008
7009   if (const_vec != NULL_RTX
7010       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7011     /* Load using MOVI/MVNI.  */
7012     return const_vec;
7013   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7014     /* Loaded using DUP.  */
7015     return const_dup;
7016   else if (const_vec != NULL_RTX)
7017     /* Load from constant pool. We can not take advantage of single-cycle
7018        LD1 because we need a PC-relative addressing mode.  */
7019     return const_vec;
7020   else
7021     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7022        We can not construct an initializer.  */
7023     return NULL_RTX;
7024 }
7025
7026 void
7027 aarch64_expand_vector_init (rtx target, rtx vals)
7028 {
7029   enum machine_mode mode = GET_MODE (target);
7030   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7031   int n_elts = GET_MODE_NUNITS (mode);
7032   int n_var = 0, one_var = -1;
7033   bool all_same = true;
7034   rtx x, mem;
7035   int i;
7036
7037   x = XVECEXP (vals, 0, 0);
7038   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7039     n_var = 1, one_var = 0;
7040
7041   for (i = 1; i < n_elts; ++i)
7042     {
7043       x = XVECEXP (vals, 0, i);
7044       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7045         ++n_var, one_var = i;
7046
7047       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7048         all_same = false;
7049     }
7050
7051   if (n_var == 0)
7052     {
7053       rtx constant = aarch64_simd_make_constant (vals);
7054       if (constant != NULL_RTX)
7055         {
7056           emit_move_insn (target, constant);
7057           return;
7058         }
7059     }
7060
7061   /* Splat a single non-constant element if we can.  */
7062   if (all_same)
7063     {
7064       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7065       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7066       return;
7067     }
7068
7069   /* One field is non-constant.  Load constant then overwrite varying
7070      field.  This is more efficient than using the stack.  */
7071   if (n_var == 1)
7072     {
7073       rtx copy = copy_rtx (vals);
7074       rtx index = GEN_INT (one_var);
7075       enum insn_code icode;
7076
7077       /* Load constant part of vector, substitute neighboring value for
7078          varying element.  */
7079       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7080       aarch64_expand_vector_init (target, copy);
7081
7082       /* Insert variable.  */
7083       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7084       icode = optab_handler (vec_set_optab, mode);
7085       gcc_assert (icode != CODE_FOR_nothing);
7086       emit_insn (GEN_FCN (icode) (target, x, index));
7087       return;
7088     }
7089
7090   /* Construct the vector in memory one field at a time
7091      and load the whole vector.  */
7092   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7093   for (i = 0; i < n_elts; i++)
7094     emit_move_insn (adjust_address_nv (mem, inner_mode,
7095                                     i * GET_MODE_SIZE (inner_mode)),
7096                     XVECEXP (vals, 0, i));
7097   emit_move_insn (target, mem);
7098
7099 }
7100
7101 static unsigned HOST_WIDE_INT
7102 aarch64_shift_truncation_mask (enum machine_mode mode)
7103 {
7104   return
7105     (aarch64_vector_mode_supported_p (mode)
7106      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7107 }
7108
7109 #ifndef TLS_SECTION_ASM_FLAG
7110 #define TLS_SECTION_ASM_FLAG 'T'
7111 #endif
7112
7113 void
7114 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7115                                tree decl ATTRIBUTE_UNUSED)
7116 {
7117   char flagchars[10], *f = flagchars;
7118
7119   /* If we have already declared this section, we can use an
7120      abbreviated form to switch back to it -- unless this section is
7121      part of a COMDAT groups, in which case GAS requires the full
7122      declaration every time.  */
7123   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7124       && (flags & SECTION_DECLARED))
7125     {
7126       fprintf (asm_out_file, "\t.section\t%s\n", name);
7127       return;
7128     }
7129
7130   if (!(flags & SECTION_DEBUG))
7131     *f++ = 'a';
7132   if (flags & SECTION_WRITE)
7133     *f++ = 'w';
7134   if (flags & SECTION_CODE)
7135     *f++ = 'x';
7136   if (flags & SECTION_SMALL)
7137     *f++ = 's';
7138   if (flags & SECTION_MERGE)
7139     *f++ = 'M';
7140   if (flags & SECTION_STRINGS)
7141     *f++ = 'S';
7142   if (flags & SECTION_TLS)
7143     *f++ = TLS_SECTION_ASM_FLAG;
7144   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7145     *f++ = 'G';
7146   *f = '\0';
7147
7148   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7149
7150   if (!(flags & SECTION_NOTYPE))
7151     {
7152       const char *type;
7153       const char *format;
7154
7155       if (flags & SECTION_BSS)
7156         type = "nobits";
7157       else
7158         type = "progbits";
7159
7160 #ifdef TYPE_OPERAND_FMT
7161       format = "," TYPE_OPERAND_FMT;
7162 #else
7163       format = ",@%s";
7164 #endif
7165
7166       fprintf (asm_out_file, format, type);
7167
7168       if (flags & SECTION_ENTSIZE)
7169         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7170       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7171         {
7172           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7173             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7174           else
7175             fprintf (asm_out_file, ",%s,comdat",
7176                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7177         }
7178     }
7179
7180   putc ('\n', asm_out_file);
7181 }
7182
7183 /* Select a format to encode pointers in exception handling data.  */
7184 int
7185 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7186 {
7187    int type;
7188    switch (aarch64_cmodel)
7189      {
7190      case AARCH64_CMODEL_TINY:
7191      case AARCH64_CMODEL_TINY_PIC:
7192      case AARCH64_CMODEL_SMALL:
7193      case AARCH64_CMODEL_SMALL_PIC:
7194        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7195           for everything.  */
7196        type = DW_EH_PE_sdata4;
7197        break;
7198      default:
7199        /* No assumptions here.  8-byte relocs required.  */
7200        type = DW_EH_PE_sdata8;
7201        break;
7202      }
7203    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7204 }
7205
7206 /* Emit load exclusive.  */
7207
7208 static void
7209 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7210                              rtx mem, rtx model_rtx)
7211 {
7212   rtx (*gen) (rtx, rtx, rtx);
7213
7214   switch (mode)
7215     {
7216     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7217     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7218     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7219     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7220     default:
7221       gcc_unreachable ();
7222     }
7223
7224   emit_insn (gen (rval, mem, model_rtx));
7225 }
7226
7227 /* Emit store exclusive.  */
7228
7229 static void
7230 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7231                               rtx rval, rtx mem, rtx model_rtx)
7232 {
7233   rtx (*gen) (rtx, rtx, rtx, rtx);
7234
7235   switch (mode)
7236     {
7237     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7238     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7239     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7240     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7241     default:
7242       gcc_unreachable ();
7243     }
7244
7245   emit_insn (gen (bval, rval, mem, model_rtx));
7246 }
7247
7248 /* Mark the previous jump instruction as unlikely.  */
7249
7250 static void
7251 aarch64_emit_unlikely_jump (rtx insn)
7252 {
7253   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7254
7255   insn = emit_jump_insn (insn);
7256   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7257 }
7258
7259 /* Expand a compare and swap pattern.  */
7260
7261 void
7262 aarch64_expand_compare_and_swap (rtx operands[])
7263 {
7264   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7265   enum machine_mode mode, cmp_mode;
7266   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7267
7268   bval = operands[0];
7269   rval = operands[1];
7270   mem = operands[2];
7271   oldval = operands[3];
7272   newval = operands[4];
7273   is_weak = operands[5];
7274   mod_s = operands[6];
7275   mod_f = operands[7];
7276   mode = GET_MODE (mem);
7277   cmp_mode = mode;
7278
7279   /* Normally the succ memory model must be stronger than fail, but in the
7280      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7281      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7282
7283   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7284       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7285     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7286
7287   switch (mode)
7288     {
7289     case QImode:
7290     case HImode:
7291       /* For short modes, we're going to perform the comparison in SImode,
7292          so do the zero-extension now.  */
7293       cmp_mode = SImode;
7294       rval = gen_reg_rtx (SImode);
7295       oldval = convert_modes (SImode, mode, oldval, true);
7296       /* Fall through.  */
7297
7298     case SImode:
7299     case DImode:
7300       /* Force the value into a register if needed.  */
7301       if (!aarch64_plus_operand (oldval, mode))
7302         oldval = force_reg (cmp_mode, oldval);
7303       break;
7304
7305     default:
7306       gcc_unreachable ();
7307     }
7308
7309   switch (mode)
7310     {
7311     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7312     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7313     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7314     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7315     default:
7316       gcc_unreachable ();
7317     }
7318
7319   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7320
7321   if (mode == QImode || mode == HImode)
7322     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7323
7324   x = gen_rtx_REG (CCmode, CC_REGNUM);
7325   x = gen_rtx_EQ (SImode, x, const0_rtx);
7326   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7327 }
7328
7329 /* Split a compare and swap pattern.  */
7330
7331 void
7332 aarch64_split_compare_and_swap (rtx operands[])
7333 {
7334   rtx rval, mem, oldval, newval, scratch;
7335   enum machine_mode mode;
7336   bool is_weak;
7337   rtx label1, label2, x, cond;
7338
7339   rval = operands[0];
7340   mem = operands[1];
7341   oldval = operands[2];
7342   newval = operands[3];
7343   is_weak = (operands[4] != const0_rtx);
7344   scratch = operands[7];
7345   mode = GET_MODE (mem);
7346
7347   label1 = NULL_RTX;
7348   if (!is_weak)
7349     {
7350       label1 = gen_label_rtx ();
7351       emit_label (label1);
7352     }
7353   label2 = gen_label_rtx ();
7354
7355   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7356
7357   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7358   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7359   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7360                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7361   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7362
7363   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7364
7365   if (!is_weak)
7366     {
7367       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7368       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7369                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7370       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7371     }
7372   else
7373     {
7374       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7375       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7376       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7377     }
7378
7379   emit_label (label2);
7380 }
7381
7382 /* Split an atomic operation.  */
7383
7384 void
7385 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7386                      rtx value, rtx model_rtx, rtx cond)
7387 {
7388   enum machine_mode mode = GET_MODE (mem);
7389   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7390   rtx label, x;
7391
7392   label = gen_label_rtx ();
7393   emit_label (label);
7394
7395   if (new_out)
7396     new_out = gen_lowpart (wmode, new_out);
7397   if (old_out)
7398     old_out = gen_lowpart (wmode, old_out);
7399   else
7400     old_out = new_out;
7401   value = simplify_gen_subreg (wmode, value, mode, 0);
7402
7403   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7404
7405   switch (code)
7406     {
7407     case SET:
7408       new_out = value;
7409       break;
7410
7411     case NOT:
7412       x = gen_rtx_AND (wmode, old_out, value);
7413       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7414       x = gen_rtx_NOT (wmode, new_out);
7415       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7416       break;
7417
7418     case MINUS:
7419       if (CONST_INT_P (value))
7420         {
7421           value = GEN_INT (-INTVAL (value));
7422           code = PLUS;
7423         }
7424       /* Fall through.  */
7425
7426     default:
7427       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7428       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7429       break;
7430     }
7431
7432   aarch64_emit_store_exclusive (mode, cond, mem,
7433                                 gen_lowpart (mode, new_out), model_rtx);
7434
7435   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7436   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7437                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7438   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7439 }
7440
7441 static void
7442 aarch64_print_extension (void)
7443 {
7444   const struct aarch64_option_extension *opt = NULL;
7445
7446   for (opt = all_extensions; opt->name != NULL; opt++)
7447     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7448       asm_fprintf (asm_out_file, "+%s", opt->name);
7449
7450   asm_fprintf (asm_out_file, "\n");
7451 }
7452
7453 static void
7454 aarch64_start_file (void)
7455 {
7456   if (selected_arch)
7457     {
7458       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7459       aarch64_print_extension ();
7460     }
7461   else if (selected_cpu)
7462     {
7463       const char *truncated_name
7464             = aarch64_rewrite_selected_cpu (selected_cpu->name);
7465       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7466       aarch64_print_extension ();
7467     }
7468   default_file_start();
7469 }
7470
7471 /* Target hook for c_mode_for_suffix.  */
7472 static enum machine_mode
7473 aarch64_c_mode_for_suffix (char suffix)
7474 {
7475   if (suffix == 'q')
7476     return TFmode;
7477
7478   return VOIDmode;
7479 }
7480
7481 /* We can only represent floating point constants which will fit in
7482    "quarter-precision" values.  These values are characterised by
7483    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7484    by:
7485
7486    (-1)^s * (n/16) * 2^r
7487
7488    Where:
7489      's' is the sign bit.
7490      'n' is an integer in the range 16 <= n <= 31.
7491      'r' is an integer in the range -3 <= r <= 4.  */
7492
7493 /* Return true iff X can be represented by a quarter-precision
7494    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7495 bool
7496 aarch64_float_const_representable_p (rtx x)
7497 {
7498   /* This represents our current view of how many bits
7499      make up the mantissa.  */
7500   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7501   int exponent;
7502   unsigned HOST_WIDE_INT mantissa, mask;
7503   HOST_WIDE_INT m1, m2;
7504   REAL_VALUE_TYPE r, m;
7505
7506   if (!CONST_DOUBLE_P (x))
7507     return false;
7508
7509   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7510
7511   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7512      know if we have +zero until we analyse the mantissa, but we
7513      can reject the other invalid values.  */
7514   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7515       || REAL_VALUE_MINUS_ZERO (r))
7516     return false;
7517
7518   /* Extract exponent.  */
7519   r = real_value_abs (&r);
7520   exponent = REAL_EXP (&r);
7521
7522   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7523      highest (sign) bit, with a fixed binary point at bit point_pos.
7524      m1 holds the low part of the mantissa, m2 the high part.
7525      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7526      bits for the mantissa, this can fail (low bits will be lost).  */
7527   real_ldexp (&m, &r, point_pos - exponent);
7528   REAL_VALUE_TO_INT (&m1, &m2, m);
7529
7530   /* If the low part of the mantissa has bits set we cannot represent
7531      the value.  */
7532   if (m1 != 0)
7533     return false;
7534   /* We have rejected the lower HOST_WIDE_INT, so update our
7535      understanding of how many bits lie in the mantissa and
7536      look only at the high HOST_WIDE_INT.  */
7537   mantissa = m2;
7538   point_pos -= HOST_BITS_PER_WIDE_INT;
7539
7540   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7541   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7542   if ((mantissa & mask) != 0)
7543     return false;
7544
7545   /* Having filtered unrepresentable values, we may now remove all
7546      but the highest 5 bits.  */
7547   mantissa >>= point_pos - 5;
7548
7549   /* We cannot represent the value 0.0, so reject it.  This is handled
7550      elsewhere.  */
7551   if (mantissa == 0)
7552     return false;
7553
7554   /* Then, as bit 4 is always set, we can mask it off, leaving
7555      the mantissa in the range [0, 15].  */
7556   mantissa &= ~(1 << 4);
7557   gcc_assert (mantissa <= 15);
7558
7559   /* GCC internally does not use IEEE754-like encoding (where normalized
7560      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7561      Our mantissa values are shifted 4 places to the left relative to
7562      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7563      by 5 places to correct for GCC's representation.  */
7564   exponent = 5 - exponent;
7565
7566   return (exponent >= 0 && exponent <= 7);
7567 }
7568
7569 char*
7570 aarch64_output_simd_mov_immediate (rtx const_vector,
7571                                    enum machine_mode mode,
7572                                    unsigned width)
7573 {
7574   bool is_valid;
7575   static char templ[40];
7576   const char *mnemonic;
7577   const char *shift_op;
7578   unsigned int lane_count = 0;
7579   char element_char;
7580
7581   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7582
7583   /* This will return true to show const_vector is legal for use as either
7584      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7585      also update INFO to show how the immediate should be generated.  */
7586   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7587   gcc_assert (is_valid);
7588
7589   element_char = sizetochar (info.element_width);
7590   lane_count = width / info.element_width;
7591
7592   mode = GET_MODE_INNER (mode);
7593   if (mode == SFmode || mode == DFmode)
7594     {
7595       gcc_assert (info.shift == 0 && ! info.mvn);
7596       if (aarch64_float_const_zero_rtx_p (info.value))
7597         info.value = GEN_INT (0);
7598       else
7599         {
7600 #define buf_size 20
7601           REAL_VALUE_TYPE r;
7602           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7603           char float_buf[buf_size] = {'\0'};
7604           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7605 #undef buf_size
7606
7607           if (lane_count == 1)
7608             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7609           else
7610             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7611                       lane_count, element_char, float_buf);
7612           return templ;
7613         }
7614     }
7615
7616   mnemonic = info.mvn ? "mvni" : "movi";
7617   shift_op = info.msl ? "msl" : "lsl";
7618
7619   if (lane_count == 1)
7620     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7621               mnemonic, UINTVAL (info.value));
7622   else if (info.shift)
7623     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7624               ", %s %d", mnemonic, lane_count, element_char,
7625               UINTVAL (info.value), shift_op, info.shift);
7626   else
7627     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7628               mnemonic, lane_count, element_char, UINTVAL (info.value));
7629   return templ;
7630 }
7631
7632 char*
7633 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7634                                           enum machine_mode mode)
7635 {
7636   enum machine_mode vmode;
7637
7638   gcc_assert (!VECTOR_MODE_P (mode));
7639   vmode = aarch64_simd_container_mode (mode, 64);
7640   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7641   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7642 }
7643
7644 /* Split operands into moves from op[1] + op[2] into op[0].  */
7645
7646 void
7647 aarch64_split_combinev16qi (rtx operands[3])
7648 {
7649   unsigned int dest = REGNO (operands[0]);
7650   unsigned int src1 = REGNO (operands[1]);
7651   unsigned int src2 = REGNO (operands[2]);
7652   enum machine_mode halfmode = GET_MODE (operands[1]);
7653   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7654   rtx destlo, desthi;
7655
7656   gcc_assert (halfmode == V16QImode);
7657
7658   if (src1 == dest && src2 == dest + halfregs)
7659     {
7660       /* No-op move.  Can't split to nothing; emit something.  */
7661       emit_note (NOTE_INSN_DELETED);
7662       return;
7663     }
7664
7665   /* Preserve register attributes for variable tracking.  */
7666   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7667   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7668                                GET_MODE_SIZE (halfmode));
7669
7670   /* Special case of reversed high/low parts.  */
7671   if (reg_overlap_mentioned_p (operands[2], destlo)
7672       && reg_overlap_mentioned_p (operands[1], desthi))
7673     {
7674       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7675       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7676       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7677     }
7678   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7679     {
7680       /* Try to avoid unnecessary moves if part of the result
7681          is in the right place already.  */
7682       if (src1 != dest)
7683         emit_move_insn (destlo, operands[1]);
7684       if (src2 != dest + halfregs)
7685         emit_move_insn (desthi, operands[2]);
7686     }
7687   else
7688     {
7689       if (src2 != dest + halfregs)
7690         emit_move_insn (desthi, operands[2]);
7691       if (src1 != dest)
7692         emit_move_insn (destlo, operands[1]);
7693     }
7694 }
7695
7696 /* vec_perm support.  */
7697
7698 #define MAX_VECT_LEN 16
7699
7700 struct expand_vec_perm_d
7701 {
7702   rtx target, op0, op1;
7703   unsigned char perm[MAX_VECT_LEN];
7704   enum machine_mode vmode;
7705   unsigned char nelt;
7706   bool one_vector_p;
7707   bool testing_p;
7708 };
7709
7710 /* Generate a variable permutation.  */
7711
7712 static void
7713 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7714 {
7715   enum machine_mode vmode = GET_MODE (target);
7716   bool one_vector_p = rtx_equal_p (op0, op1);
7717
7718   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7719   gcc_checking_assert (GET_MODE (op0) == vmode);
7720   gcc_checking_assert (GET_MODE (op1) == vmode);
7721   gcc_checking_assert (GET_MODE (sel) == vmode);
7722   gcc_checking_assert (TARGET_SIMD);
7723
7724   if (one_vector_p)
7725     {
7726       if (vmode == V8QImode)
7727         {
7728           /* Expand the argument to a V16QI mode by duplicating it.  */
7729           rtx pair = gen_reg_rtx (V16QImode);
7730           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7731           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7732         }
7733       else
7734         {
7735           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7736         }
7737     }
7738   else
7739     {
7740       rtx pair;
7741
7742       if (vmode == V8QImode)
7743         {
7744           pair = gen_reg_rtx (V16QImode);
7745           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7746           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7747         }
7748       else
7749         {
7750           pair = gen_reg_rtx (OImode);
7751           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7752           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7753         }
7754     }
7755 }
7756
7757 void
7758 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7759 {
7760   enum machine_mode vmode = GET_MODE (target);
7761   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7762   bool one_vector_p = rtx_equal_p (op0, op1);
7763   rtx rmask[MAX_VECT_LEN], mask;
7764
7765   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7766
7767   /* The TBL instruction does not use a modulo index, so we must take care
7768      of that ourselves.  */
7769   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7770   for (i = 0; i < nelt; ++i)
7771     rmask[i] = mask;
7772   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7773   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7774
7775   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7776 }
7777
7778 /* Recognize patterns suitable for the TRN instructions.  */
7779 static bool
7780 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7781 {
7782   unsigned int i, odd, mask, nelt = d->nelt;
7783   rtx out, in0, in1, x;
7784   rtx (*gen) (rtx, rtx, rtx);
7785   enum machine_mode vmode = d->vmode;
7786
7787   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7788     return false;
7789
7790   /* Note that these are little-endian tests.
7791      We correct for big-endian later.  */
7792   if (d->perm[0] == 0)
7793     odd = 0;
7794   else if (d->perm[0] == 1)
7795     odd = 1;
7796   else
7797     return false;
7798   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7799
7800   for (i = 0; i < nelt; i += 2)
7801     {
7802       if (d->perm[i] != i + odd)
7803         return false;
7804       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7805         return false;
7806     }
7807
7808   /* Success!  */
7809   if (d->testing_p)
7810     return true;
7811
7812   in0 = d->op0;
7813   in1 = d->op1;
7814   if (BYTES_BIG_ENDIAN)
7815     {
7816       x = in0, in0 = in1, in1 = x;
7817       odd = !odd;
7818     }
7819   out = d->target;
7820
7821   if (odd)
7822     {
7823       switch (vmode)
7824         {
7825         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7826         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7827         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7828         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7829         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7830         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7831         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7832         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7833         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7834         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7835         default:
7836           return false;
7837         }
7838     }
7839   else
7840     {
7841       switch (vmode)
7842         {
7843         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7844         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7845         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7846         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7847         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7848         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7849         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7850         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7851         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7852         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7853         default:
7854           return false;
7855         }
7856     }
7857
7858   emit_insn (gen (out, in0, in1));
7859   return true;
7860 }
7861
7862 /* Recognize patterns suitable for the UZP instructions.  */
7863 static bool
7864 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7865 {
7866   unsigned int i, odd, mask, nelt = d->nelt;
7867   rtx out, in0, in1, x;
7868   rtx (*gen) (rtx, rtx, rtx);
7869   enum machine_mode vmode = d->vmode;
7870
7871   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7872     return false;
7873
7874   /* Note that these are little-endian tests.
7875      We correct for big-endian later.  */
7876   if (d->perm[0] == 0)
7877     odd = 0;
7878   else if (d->perm[0] == 1)
7879     odd = 1;
7880   else
7881     return false;
7882   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7883
7884   for (i = 0; i < nelt; i++)
7885     {
7886       unsigned elt = (i * 2 + odd) & mask;
7887       if (d->perm[i] != elt)
7888         return false;
7889     }
7890
7891   /* Success!  */
7892   if (d->testing_p)
7893     return true;
7894
7895   in0 = d->op0;
7896   in1 = d->op1;
7897   if (BYTES_BIG_ENDIAN)
7898     {
7899       x = in0, in0 = in1, in1 = x;
7900       odd = !odd;
7901     }
7902   out = d->target;
7903
7904   if (odd)
7905     {
7906       switch (vmode)
7907         {
7908         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7909         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7910         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7911         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7912         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7913         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7914         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7915         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7916         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7917         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7918         default:
7919           return false;
7920         }
7921     }
7922   else
7923     {
7924       switch (vmode)
7925         {
7926         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7927         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7928         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7929         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7930         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7931         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7932         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7933         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7934         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7935         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7936         default:
7937           return false;
7938         }
7939     }
7940
7941   emit_insn (gen (out, in0, in1));
7942   return true;
7943 }
7944
7945 /* Recognize patterns suitable for the ZIP instructions.  */
7946 static bool
7947 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7948 {
7949   unsigned int i, high, mask, nelt = d->nelt;
7950   rtx out, in0, in1, x;
7951   rtx (*gen) (rtx, rtx, rtx);
7952   enum machine_mode vmode = d->vmode;
7953
7954   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7955     return false;
7956
7957   /* Note that these are little-endian tests.
7958      We correct for big-endian later.  */
7959   high = nelt / 2;
7960   if (d->perm[0] == high)
7961     /* Do Nothing.  */
7962     ;
7963   else if (d->perm[0] == 0)
7964     high = 0;
7965   else
7966     return false;
7967   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7968
7969   for (i = 0; i < nelt / 2; i++)
7970     {
7971       unsigned elt = (i + high) & mask;
7972       if (d->perm[i * 2] != elt)
7973         return false;
7974       elt = (elt + nelt) & mask;
7975       if (d->perm[i * 2 + 1] != elt)
7976         return false;
7977     }
7978
7979   /* Success!  */
7980   if (d->testing_p)
7981     return true;
7982
7983   in0 = d->op0;
7984   in1 = d->op1;
7985   if (BYTES_BIG_ENDIAN)
7986     {
7987       x = in0, in0 = in1, in1 = x;
7988       high = !high;
7989     }
7990   out = d->target;
7991
7992   if (high)
7993     {
7994       switch (vmode)
7995         {
7996         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7997         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7998         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7999         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8000         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8001         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8002         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8003         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8004         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8005         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8006         default:
8007           return false;
8008         }
8009     }
8010   else
8011     {
8012       switch (vmode)
8013         {
8014         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8015         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8016         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8017         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8018         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8019         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8020         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8021         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8022         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8023         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8024         default:
8025           return false;
8026         }
8027     }
8028
8029   emit_insn (gen (out, in0, in1));
8030   return true;
8031 }
8032
8033 static bool
8034 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8035 {
8036   rtx (*gen) (rtx, rtx, rtx);
8037   rtx out = d->target;
8038   rtx in0;
8039   enum machine_mode vmode = d->vmode;
8040   unsigned int i, elt, nelt = d->nelt;
8041   rtx lane;
8042
8043   /* TODO: This may not be big-endian safe.  */
8044   if (BYTES_BIG_ENDIAN)
8045     return false;
8046
8047   elt = d->perm[0];
8048   for (i = 1; i < nelt; i++)
8049     {
8050       if (elt != d->perm[i])
8051         return false;
8052     }
8053
8054   /* The generic preparation in aarch64_expand_vec_perm_const_1
8055      swaps the operand order and the permute indices if it finds
8056      d->perm[0] to be in the second operand.  Thus, we can always
8057      use d->op0 and need not do any extra arithmetic to get the
8058      correct lane number.  */
8059   in0 = d->op0;
8060   lane = GEN_INT (elt);
8061
8062   switch (vmode)
8063     {
8064     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8065     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8066     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8067     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8068     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8069     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8070     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8071     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8072     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8073     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8074     default:
8075       return false;
8076     }
8077
8078   emit_insn (gen (out, in0, lane));
8079   return true;
8080 }
8081
8082 static bool
8083 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8084 {
8085   rtx rperm[MAX_VECT_LEN], sel;
8086   enum machine_mode vmode = d->vmode;
8087   unsigned int i, nelt = d->nelt;
8088
8089   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8090      numbering of elements for big-endian, we must reverse the order.  */
8091   if (BYTES_BIG_ENDIAN)
8092     return false;
8093
8094   if (d->testing_p)
8095     return true;
8096
8097   /* Generic code will try constant permutation twice.  Once with the
8098      original mode and again with the elements lowered to QImode.
8099      So wait and don't do the selector expansion ourselves.  */
8100   if (vmode != V8QImode && vmode != V16QImode)
8101     return false;
8102
8103   for (i = 0; i < nelt; ++i)
8104     rperm[i] = GEN_INT (d->perm[i]);
8105   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8106   sel = force_reg (vmode, sel);
8107
8108   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8109   return true;
8110 }
8111
8112 static bool
8113 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8114 {
8115   /* The pattern matching functions above are written to look for a small
8116      number to begin the sequence (0, 1, N/2).  If we begin with an index
8117      from the second operand, we can swap the operands.  */
8118   if (d->perm[0] >= d->nelt)
8119     {
8120       unsigned i, nelt = d->nelt;
8121       rtx x;
8122
8123       for (i = 0; i < nelt; ++i)
8124         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8125
8126       x = d->op0;
8127       d->op0 = d->op1;
8128       d->op1 = x;
8129     }
8130
8131   if (TARGET_SIMD)
8132     {
8133       if (aarch64_evpc_zip (d))
8134         return true;
8135       else if (aarch64_evpc_uzp (d))
8136         return true;
8137       else if (aarch64_evpc_trn (d))
8138         return true;
8139       else if (aarch64_evpc_dup (d))
8140         return true;
8141       return aarch64_evpc_tbl (d);
8142     }
8143   return false;
8144 }
8145
8146 /* Expand a vec_perm_const pattern.  */
8147
8148 bool
8149 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8150 {
8151   struct expand_vec_perm_d d;
8152   int i, nelt, which;
8153
8154   d.target = target;
8155   d.op0 = op0;
8156   d.op1 = op1;
8157
8158   d.vmode = GET_MODE (target);
8159   gcc_assert (VECTOR_MODE_P (d.vmode));
8160   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8161   d.testing_p = false;
8162
8163   for (i = which = 0; i < nelt; ++i)
8164     {
8165       rtx e = XVECEXP (sel, 0, i);
8166       int ei = INTVAL (e) & (2 * nelt - 1);
8167       which |= (ei < nelt ? 1 : 2);
8168       d.perm[i] = ei;
8169     }
8170
8171   switch (which)
8172     {
8173     default:
8174       gcc_unreachable ();
8175
8176     case 3:
8177       d.one_vector_p = false;
8178       if (!rtx_equal_p (op0, op1))
8179         break;
8180
8181       /* The elements of PERM do not suggest that only the first operand
8182          is used, but both operands are identical.  Allow easier matching
8183          of the permutation by folding the permutation into the single
8184          input vector.  */
8185       /* Fall Through.  */
8186     case 2:
8187       for (i = 0; i < nelt; ++i)
8188         d.perm[i] &= nelt - 1;
8189       d.op0 = op1;
8190       d.one_vector_p = true;
8191       break;
8192
8193     case 1:
8194       d.op1 = op0;
8195       d.one_vector_p = true;
8196       break;
8197     }
8198
8199   return aarch64_expand_vec_perm_const_1 (&d);
8200 }
8201
8202 static bool
8203 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8204                                      const unsigned char *sel)
8205 {
8206   struct expand_vec_perm_d d;
8207   unsigned int i, nelt, which;
8208   bool ret;
8209
8210   d.vmode = vmode;
8211   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8212   d.testing_p = true;
8213   memcpy (d.perm, sel, nelt);
8214
8215   /* Calculate whether all elements are in one vector.  */
8216   for (i = which = 0; i < nelt; ++i)
8217     {
8218       unsigned char e = d.perm[i];
8219       gcc_assert (e < 2 * nelt);
8220       which |= (e < nelt ? 1 : 2);
8221     }
8222
8223   /* If all elements are from the second vector, reindex as if from the
8224      first vector.  */
8225   if (which == 2)
8226     for (i = 0; i < nelt; ++i)
8227       d.perm[i] -= nelt;
8228
8229   /* Check whether the mask can be applied to a single vector.  */
8230   d.one_vector_p = (which != 3);
8231
8232   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8233   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8234   if (!d.one_vector_p)
8235     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8236
8237   start_sequence ();
8238   ret = aarch64_expand_vec_perm_const_1 (&d);
8239   end_sequence ();
8240
8241   return ret;
8242 }
8243
8244 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8245 bool
8246 aarch64_cannot_change_mode_class (enum machine_mode from,
8247                                   enum machine_mode to,
8248                                   enum reg_class rclass)
8249 {
8250   /* Full-reg subregs are allowed on general regs or any class if they are
8251      the same size.  */
8252   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8253       || !reg_classes_intersect_p (FP_REGS, rclass))
8254     return false;
8255
8256   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8257      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8258      2. Scalar to Scalar for integer modes or same size float modes.
8259      3. Vector to Vector modes.  */
8260   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8261     {
8262       if (aarch64_vector_mode_supported_p (from)
8263           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8264         return false;
8265
8266       if (GET_MODE_NUNITS (from) == 1
8267           && GET_MODE_NUNITS (to) == 1
8268           && (GET_MODE_CLASS (from) == MODE_INT
8269               || from == to))
8270         return false;
8271
8272       if (aarch64_vector_mode_supported_p (from)
8273           && aarch64_vector_mode_supported_p (to))
8274         return false;
8275     }
8276
8277   return true;
8278 }
8279
8280 #undef TARGET_ADDRESS_COST
8281 #define TARGET_ADDRESS_COST aarch64_address_cost
8282
8283 /* This hook will determines whether unnamed bitfields affect the alignment
8284    of the containing structure.  The hook returns true if the structure
8285    should inherit the alignment requirements of an unnamed bitfield's
8286    type.  */
8287 #undef TARGET_ALIGN_ANON_BITFIELD
8288 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8289
8290 #undef TARGET_ASM_ALIGNED_DI_OP
8291 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8292
8293 #undef TARGET_ASM_ALIGNED_HI_OP
8294 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8295
8296 #undef TARGET_ASM_ALIGNED_SI_OP
8297 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8298
8299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8301   hook_bool_const_tree_hwi_hwi_const_tree_true
8302
8303 #undef TARGET_ASM_FILE_START
8304 #define TARGET_ASM_FILE_START aarch64_start_file
8305
8306 #undef TARGET_ASM_OUTPUT_MI_THUNK
8307 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8308
8309 #undef TARGET_ASM_SELECT_RTX_SECTION
8310 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8311
8312 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8313 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8314
8315 #undef TARGET_BUILD_BUILTIN_VA_LIST
8316 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8317
8318 #undef TARGET_CALLEE_COPIES
8319 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8320
8321 #undef TARGET_CAN_ELIMINATE
8322 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8323
8324 #undef TARGET_CANNOT_FORCE_CONST_MEM
8325 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8326
8327 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8328 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8329
8330 /* Only the least significant bit is used for initialization guard
8331    variables.  */
8332 #undef TARGET_CXX_GUARD_MASK_BIT
8333 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8334
8335 #undef TARGET_C_MODE_FOR_SUFFIX
8336 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8337
8338 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8339 #undef  TARGET_DEFAULT_TARGET_FLAGS
8340 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8341 #endif
8342
8343 #undef TARGET_CLASS_MAX_NREGS
8344 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8345
8346 #undef TARGET_BUILTIN_DECL
8347 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8348
8349 #undef  TARGET_EXPAND_BUILTIN
8350 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8351
8352 #undef TARGET_EXPAND_BUILTIN_VA_START
8353 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8354
8355 #undef TARGET_FOLD_BUILTIN
8356 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8357
8358 #undef TARGET_FUNCTION_ARG
8359 #define TARGET_FUNCTION_ARG aarch64_function_arg
8360
8361 #undef TARGET_FUNCTION_ARG_ADVANCE
8362 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8363
8364 #undef TARGET_FUNCTION_ARG_BOUNDARY
8365 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8366
8367 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8368 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8369
8370 #undef TARGET_FUNCTION_VALUE
8371 #define TARGET_FUNCTION_VALUE aarch64_function_value
8372
8373 #undef TARGET_FUNCTION_VALUE_REGNO_P
8374 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8375
8376 #undef TARGET_FRAME_POINTER_REQUIRED
8377 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8378
8379 #undef TARGET_GIMPLE_FOLD_BUILTIN
8380 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8381
8382 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8383 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8384
8385 #undef  TARGET_INIT_BUILTINS
8386 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8387
8388 #undef TARGET_LEGITIMATE_ADDRESS_P
8389 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8390
8391 #undef TARGET_LEGITIMATE_CONSTANT_P
8392 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8393
8394 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8395 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8396
8397 #undef TARGET_LRA_P
8398 #define TARGET_LRA_P aarch64_lra_p
8399
8400 #undef TARGET_MANGLE_TYPE
8401 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8402
8403 #undef TARGET_MEMORY_MOVE_COST
8404 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8405
8406 #undef TARGET_MUST_PASS_IN_STACK
8407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8408
8409 /* This target hook should return true if accesses to volatile bitfields
8410    should use the narrowest mode possible.  It should return false if these
8411    accesses should use the bitfield container type.  */
8412 #undef TARGET_NARROW_VOLATILE_BITFIELD
8413 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8414
8415 #undef  TARGET_OPTION_OVERRIDE
8416 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8417
8418 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8419 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8420   aarch64_override_options_after_change
8421
8422 #undef TARGET_PASS_BY_REFERENCE
8423 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8424
8425 #undef TARGET_PREFERRED_RELOAD_CLASS
8426 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8427
8428 #undef TARGET_SECONDARY_RELOAD
8429 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8430
8431 #undef TARGET_SHIFT_TRUNCATION_MASK
8432 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8433
8434 #undef TARGET_SETUP_INCOMING_VARARGS
8435 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8436
8437 #undef TARGET_STRUCT_VALUE_RTX
8438 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8439
8440 #undef TARGET_REGISTER_MOVE_COST
8441 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8442
8443 #undef TARGET_RETURN_IN_MEMORY
8444 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8445
8446 #undef TARGET_RETURN_IN_MSB
8447 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8448
8449 #undef TARGET_RTX_COSTS
8450 #define TARGET_RTX_COSTS aarch64_rtx_costs
8451
8452 #undef TARGET_SCHED_ISSUE_RATE
8453 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8454
8455 #undef TARGET_TRAMPOLINE_INIT
8456 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8457
8458 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8459 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8460
8461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8462 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8463
8464 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8465 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8466
8467 #undef TARGET_VECTORIZE_ADD_STMT_COST
8468 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8469
8470 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8471 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8472   aarch64_builtin_vectorization_cost
8473
8474 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8475 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8476
8477 #undef TARGET_VECTORIZE_BUILTINS
8478 #define TARGET_VECTORIZE_BUILTINS
8479
8480 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8481 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8482   aarch64_builtin_vectorized_function
8483
8484 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8485 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8486   aarch64_autovectorize_vector_sizes
8487
8488 /* Section anchor support.  */
8489
8490 #undef TARGET_MIN_ANCHOR_OFFSET
8491 #define TARGET_MIN_ANCHOR_OFFSET -256
8492
8493 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8494    byte offset; we can do much more for larger data types, but have no way
8495    to determine the size of the access.  We assume accesses are aligned.  */
8496 #undef TARGET_MAX_ANCHOR_OFFSET
8497 #define TARGET_MAX_ANCHOR_OFFSET 4095
8498
8499 #undef TARGET_VECTOR_ALIGNMENT
8500 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8501
8502 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8503 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8504   aarch64_simd_vector_alignment_reachable
8505
8506 /* vec_perm support.  */
8507
8508 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8509 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8510   aarch64_vectorize_vec_perm_const_ok
8511
8512
8513 #undef TARGET_FIXED_CONDITION_CODE_REGS
8514 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8515
8516 struct gcc_target targetm = TARGET_INITIALIZER;
8517
8518 #include "gt-aarch64.h"